Source code for superblockify.partitioning.utils

"""Utility functions for Partitioners."""

from os import remove
from os.path import exists, join
from uuid import uuid4

from geopandas import GeoDataFrame
from networkx import set_edge_attributes, strongly_connected_components
from numpy import generic
from osmnx import graph_to_gdfs
from osmnx.convert import to_undirected
from pandas import DataFrame
from shapely import Point, Geometry
from shapely.ops import substring

from .representative import find_representative_node_id
from ..config import logger
from ..graph_stats import get_population_area, basic_graph_stats


[docs] def save_to_gpkg( partitioner, save_path=None, ltn_boundary=False, ): """Save the partitioner's graph and Superblocks to a geodatapackage. The name of the components (/partitions) is saved into a "classification" edge attribute. The sparse graph is saved with the value "SPARSE" into the "classification" edge attribute. Parameters ---------- partitioner : superblockify.partitioning.BasePartitioner The partitioner to save. save_path : str, optional The path to save the geodatapackage to. If None, it will be saved to the partitioners folder at (part.results_dir, part.name + ".gpkg") ltn_boundary : bool, optional If True, the boundary of the Superblocks will be saved as a polygon into the `cell` attribute of the Superblocks layer. For this, the tessellation needs to be calculated. Raises ------ ValueError If the partitioner has no `components` or `partitions` attribute. ValueError If the partitioner has no sparsified subgraph. Notes ----- The geopackage will contain the following layers: - nodes - `representative_node_name` - `missing_nodes`: if node is not in any component/partition or sparsified graph - y, x, lat, lon, geometry - edges - `classification` - `osmid`, `highway`, `length`, `geometry` - (bearing: the bearing of the edge, bearing_90: mod(bearing, 90)) - (residential: 1 if edge['highway'] is or contains 'residential', None otherwise) - ltns - `classification` - `cell`: if `ltn_boundary` is True - else `representative_node_point` - `population_density` (in people/m^2) - `area` (in m^2) - `population` (in people) - see :func:`osmnx.stats.basic_stats` for more - graph_meta - `boundary` (by OSM relation) - `boundary_crs` - `area` (in m^2) - `street_orientation_order` - `circuity_avg` - see :func:`osmnx.stats.basic_stats` for more """ if partitioner.sparsified is None: raise ValueError("Partitioner has no sparsified subgraph.") if not isinstance(partitioner.sparsified, type(partitioner.graph)): raise ValueError( f"Partitioner's sparsified subgraph is of type " f"{type(partitioner.sparsified)}, but should be of type " f"{type(partitioner.graph)}. This is not supported." ) # if partitioner.components and partitioner.partitions are None if not partitioner.components and not partitioner.partitions: raise ValueError("Partitioner has no components nor partitions attribute.") filepath = ( save_path if save_path is not None else join(partitioner.results_dir, partitioner.name + ".gpkg") ) parts = None if partitioner.components: parts = partitioner.components logger.info( "Using components attribute to save Superblocks to geodatapackage %s", filepath, ) elif partitioner.partitions: parts = partitioner.partitions logger.info( "Using partitions attribute to save Superblocks to geodatapackage %s", filepath, ) if not isinstance(parts, list): raise ValueError( f"Partitioner's components/partitions attribute is of type {type(parts)}, " "but should be a list of dicts where each dict has a 'subgraph' and " "'name' key." ) # if parts are not None and type is not list of dicts with "subgraph" and "name" if not all( isinstance(comp, dict) and "subgraph" in comp and "name" in comp for comp in parts ): raise ValueError( f"Partitioner's components/partitions attribute is of type {type(parts)}, " "but should be a list of dicts where each dict has a 'subgraph' and " "'name' key." ) # Bake the Superblocks into the graph for _, part in enumerate(parts): # As part["subgraph"] is connected to partitioner.graph, we can just # change the edge attribute in the whole subgraph, applying the Superblock set_edge_attributes(part["subgraph"], part["name"], "classification") # Sparsified edges are saved with the value "SPARSE" into the "classification" # edge attribute set_edge_attributes(partitioner.sparsified, "SPARSE", "classification") nodes, edges = graph_to_gdfs(partitioner.graph, nodes=True, fill_edge_geometry=True) # Components/Partitions ltns = _get_ltns(partitioner, nodes, ltn_boundary) # Graph meta graph_meta = _get_graph_meta(partitioner) # list attributes in nodes and edges logger.info("Node attributes: %s", nodes.columns) logger.info("Edge attributes: %s", edges.columns) logger.info("Superblock attributes: %s", ltns.columns) logger.info("Graph meta attributes: %s", graph_meta.columns) # Remove certain attributes # nodes = nodes.drop(columns=[]) edges = edges.drop( columns=[attr for attr in edges.columns if attr in ["component_name"]] ) # For attributes that are lists or other objects, we need to convert them to strings for gdfs in [edges, ltns, graph_meta]: for col in gdfs.columns: if gdfs[col].dtype == "object": logger.debug("Converting column %s of type %s to str.", col, type(col)) gdfs[col] = gdfs[col].astype(str) # Save nodes and edges to separate layers/geodataframes of the same geodatapackage # if file already exists, remove it if exists(filepath): remove(filepath) nodes.to_file(filepath, layer="nodes", index=False, mode="w") logger.info("Saved %d nodes to %s", len(nodes), filepath) edges.to_file(filepath, layer="edges", index=False, mode="w") logger.info("Saved %d edges to %s", len(edges), filepath) ltns.to_file(filepath, layer="ltns", index=False, mode="w") logger.info("Saved %d Superblocks to %s", len(ltns), filepath) graph_meta.to_file(filepath, layer="graph_meta", index=False, mode="w") logger.info("Saved graph meta to %s", filepath)
def _get_ltns(partitioner, nodes, ltn_boundary): """Prepare ltn geodataframe for the geopackage export. If `ltn_boundary` is True, the ltns are the cells of the tessellation. If `ltn_boundary` is False, the ltns are the representative nodes of the components/partitions. Parameters ---------- partitioner : superblockify.partitioning.BasePartitioner The partitioner to get the ltns from. nodes : geopandas.GeoDataFrame The nodes of the graph. ltn_boundary : bool Whether to use the cells of the tessellation as ltns or the representative nodes of the components/partitions. Returns ------- ltns : geopandas.GeoDataFrame The ltns as a geodataframe. """ if ltn_boundary: partitioner.add_component_tessellation() else: # add node geometry for representative node from nodes layer for _, part in enumerate(partitioner.get_ltns()): part["representative_node_point"] = nodes.loc[ part["representative_node_id"], "geometry" ] ltns = GeoDataFrame.from_dict( partitioner.get_ltns(), geometry="cell" if ltn_boundary else "representative_node_point", orient="columns", crs=partitioner.graph.graph["crs"], ) ltns.rename(columns={"name": "classification"}, inplace=True) ltns.drop( columns=[attr for attr in ltns.columns if attr in ["subgraph"]], inplace=True ) return ltns def _get_graph_meta(partitioner): """Prepare metadata for the graph for the geopackage export. Parameters ---------- partitioner : superblockify.partitioning.BasePartitioner The partitioner to get the graph metadata from. Returns ------- graph_meta : geopandas.GeoDataFrame One line gdf with the graph boundary and all graph attributes. """ return GeoDataFrame.from_dict( partitioner.graph.graph, orient="columns", geometry="boundary", crs=partitioner.graph.graph["crs"], )
[docs] def show_highway_stats(graph): """Show the number of edges for each highway type in the graph. Also show the type proportions of the highway attributes. Parameters ---------- graph : networkx.classes.multidigraph.MultiDiGraph Graph, where the edges have a "highway" attribute. Notes ----- The highway is usually a string, but can also be a list of strings. If the proportion of edges with a highway attribute of type 'str' is below 98%, a warning is logged. """ edges = graph_to_gdfs(graph, nodes=False, fill_edge_geometry=False) # highway counts highway_counts = edges.highway.value_counts().to_frame("count") highway_counts["proportion"] = highway_counts["count"] / len(edges) logger.info( "Highway counts (type, count, proportion): \n%s", highway_counts.to_string() ) # dtypes of the highway attributes dtype_counts = edges.highway.apply(type).value_counts().to_frame("count") dtype_counts["proportion"] = dtype_counts["count"] / len(edges) logger.debug( "Dtype counts (type, count, proportion): \n%s", dtype_counts.to_string() ) # Warning if 'str' is underrepresented if dtype_counts.loc[dtype_counts.index == str, "proportion"].values < 0.98: logger.warning( "The dtype of the 'highway' attribute is not 'str' in %d%% of the edges.", (1 - dtype_counts.loc[dtype_counts.index == str, "proportion"]) * 100, )
[docs] def show_graph_stats(graph): """Show selected graph statistics. Parameters ---------- graph : networkx.classes.multidigraph.MultiDiGraph Graph to show statistics for. Notes ----- Graph must have basic graph stats added by means of :func:`superblockify.graph_stats.basic_graph_stats`. """ logger.info( "Graph stats: \n%s", DataFrame.from_dict( { "Number of nodes": graph.graph["n"], "Number of edges": graph.graph["m"], "Average degree": graph.graph["k_avg"], "Circuity average": graph.graph["circuity_avg"], "Street orientation order": graph.graph["street_orientation_order"], "Date created": graph.graph["created_date"], "Projection": graph.graph["crs"], "Area by OSM boundary (m²)": graph.graph["area"], }, orient="index", ).to_string(), )
[docs] def remove_dead_ends_directed(graph): """Remove all dead ends from the directed graph. Comes down to removing all nodes that are not in the largest strongly connected component. Parameters ---------- graph : networkx.classes.multidigraph.MultiDiGraph Graph to remove dead ends from. Raises ------ ValueError If the graph is not directed. Notes ----- The graph is modified in place. """ if not graph.is_directed(): raise ValueError("Graph must be directed.") num_nodes = graph.number_of_nodes() # Get the largest strongly connected component scc = max(strongly_connected_components(graph), key=len) # Remove all nodes that are not in the largest strongly connected component for node in list(graph.nodes): if node not in scc: graph.remove_node(node) if graph.number_of_nodes() < num_nodes: logger.debug("Removed %d dead ends.", num_nodes - len(graph.nodes))
[docs] def split_up_isolated_edges_directed(graph, sparsified): """Split up all edges in the directed graph that are isolated disregarding the sparsified subgraph. Isolated edges are edges that are not connected to any other edge in the graph. Also, two parallel edges are considered isolated together if their geometries equal. These nodes are inserted at a middle point of the edge(s) geometry, the geometry split up. Attributes `population` and `area` split, but the further attributes are kept the same. Parameters ---------- graph : networkx.classes.multidigraph.MultiDiGraph Graph to split up edges in. sparsified : networkx.classes.multidigraph.MultiDiGraph View of graph, edges taken away from graph to expose isolated edges. Raises ------ ValueError If the graphs are not directed. NotImplementedError Parallel edges of a degree higher than 2 are not supported. Notes ----- The graph is modified in place. The function generates the new node ids `get_new_node_id`. """ # pylint: disable=too-many-locals if not graph.is_directed(): raise ValueError("Graph must be directed.") if not sparsified.is_directed(): raise ValueError("Sparsified graph must be directed.") # Find difference, edgewise, between the graph and the sparsified subgraph rest = graph.edge_subgraph( [ (u, v, k) for u, v, k in graph.edges(keys=True, data=False) if (u, v, k) not in sparsified.edges(keys=True) ] ) rest_un = to_undirected(rest) # for u, v, k, d in rest.edges(keys=True, data=True): for u_isol, v_isol in [ (u, v) for u, v in rest_un.edges() if rest_un.degree(u) == 1 and rest_un.degree(v) == 1 ]: # For one-way-street, only one edge if rest.degree(u_isol) == 1 and rest.degree(v_isol) == 1: edges = [ ( *( # single edge with an unknown direction and key list(rest.edges(v_isol, keys=True, data=True)) + list(rest.edges(u_isol, keys=True, data=True)) )[0], u_isol, ) ] # For two-way-street, two edges elif rest.degree(u_isol) == 2 and rest.degree(v_isol) == 2: edges = [(*list(rest.edges(v_isol, keys=True, data=True))[0], u_isol)] edges += [(*list(rest.edges(u_isol, keys=True, data=True))[0], v_isol)] else: edges = [ (*list(rest.edges(v_isol, keys=True, data=True))[deg_v_isol], u_isol) for deg_v_isol in range(rest.out_degree(v_isol)) ] edges += [ (*list(rest.edges(u_isol, keys=True, data=True))[deg_u_isol], v_isol) for deg_u_isol in range(rest.out_degree(u_isol)) ] # Split up at the middle point if "geometry" not in edges[0][3]: geom = None # interpolate between the nodes middle = Point( (rest.nodes[u_isol]["x"] + rest.nodes[v_isol]["x"]) / 2, (rest.nodes[u_isol]["y"] + rest.nodes[v_isol]["y"]) / 2, ) else: geom = edges[0][3].get("geometry", None) middle = geom.interpolate(geom.length / 2) # Add a node at the middle point middle_id = get_new_node_id(graph) graph.add_node(middle_id, x=middle.x, y=middle.y, split=True) # Add the edges for u_parallel, v_parallel, k, data_p, start_node in edges: data_p["length"] = None if "length" not in data_p else data_p["length"] / 2 # `population` and `area` need to be split up, `cell_id` assigned separately # This is what the start_node is for, # if the inserted node is the start_node, the cell_id is kept, # otherwise it is inverted (usually cell_ids are positive) if "population" in data_p: data_p["population"] = data_p["population"] / 2 data_p["area"] = data_p["area"] / 2 cell_id = data_p.pop("cell_id", None) if geom is None: graph.add_edge( u_parallel, middle_id, k, **data_p, cell_id=( cell_id if cell_id is None else cell_id if u_parallel == start_node else -cell_id ), ) graph.add_edge( middle_id, v_parallel, k, **data_p, cell_id=( cell_id if cell_id is None else cell_id if v_parallel == start_node else -cell_id ), ) else: data_p.pop("geometry", None) graph.add_edge( u_parallel, middle_id, k, **data_p, geometry=substring(geom, 0, geom.project(middle)), cell_id=( cell_id if cell_id is None else cell_id if u_parallel == start_node else -cell_id ), ) graph.add_edge( middle_id, v_parallel, k, **data_p, geometry=substring(geom, geom.project(middle), geom.length), cell_id=( cell_id if cell_id is None else cell_id if v_parallel == start_node else -cell_id ), ) # Remove the original edge graph.remove_edge(u_parallel, v_parallel, k)
[docs] def get_new_node_id(graph): """Get a new node id that is not yet in the graph. Parameters ---------- graph : networkx.classes.multidigraph.MultiDiGraph Graph to get the new node id for. Returns ------- int New node id. Notes ----- The node id is generated by the function `uuid4().int`. We only accept ids > int64.max because the node ids should be differentiable from the osm node ids. """ node_id = uuid4().int while node_id in graph.nodes or node_id < 2**63: node_id = uuid4().int return node_id
[docs] def get_key_figures(partitioner): """Get key figures of the partitioner. Contains the name, city_name, graph_stats, metrics, component stats, attribute_label, and attribute_dtype. Parameters ---------- partitioner : BasePartitioner Partitioner to get the key figures for. Returns ------- dict Key figures of the partitioner. See code for structure. """ return _make_yaml_compatible( { "name": partitioner.name, "city_name": partitioner.city_name, "attribute_label": partitioner.attribute_label, "attribute_dtype": partitioner.attribute_dtype, "graph_stats": { key: value for key, value in partitioner.graph.graph.items() if not isinstance(value, Geometry) # discard OSM graph boundary }, "components": [ # everything of the components/partitions, except the subgraph {key: value for key, value in comp.items() if key != "subgraph"} for comp in partitioner.get_ltns() ], "metric": { "unit": partitioner.metric.unit, "coverage": partitioner.metric.coverage, "directness": partitioner.metric.directness, "global_efficiency": partitioner.metric.global_efficiency, "high_bc_clustering": partitioner.metric.high_bc_clustering, "high_bc_anisotropy": partitioner.metric.high_bc_anisotropy, }, } )
def _make_yaml_compatible(input_val): """Make the dict compatible with the yaml format. Represent only with python types, recursively. Parameters ---------- dict : dict Dictionary to make compatible. Returns ------- dict Copy of the dict with only python types. """ # Recursively call the function if the value is a dict if isinstance(input_val, dict): new_dict = {} for key, value in input_val.items(): new_dict[key] = _make_yaml_compatible(value) return new_dict # Recursively call the function if the value is a list or tuple if isinstance(input_val, (list, tuple)): return [_make_yaml_compatible(value) for value in input_val] # If int, float, str, bool, re-cast it to the same type if isinstance(input_val, bool): output_val = bool(input_val) elif isinstance(input_val, int): output_val = int(input_val) elif isinstance(input_val, float): output_val = float(input_val) elif isinstance(input_val, str): output_val = str(input_val) # Numpy types # find out if it is a numpy scalar elif isinstance(input_val, generic): output_val = input_val.item() else: output_val = str(input_val) return output_val
[docs] def reduce_graph(graph, max_nodes): """Reduce the graph to have at most `max_nodes` nodes. Done by taking the ego graph with not more than `max_nodes` nodes, starting from a representative central node. As in :func:`superblockify.partitioning.representative.set_representative_nodes`. """ if max_nodes is None or graph.number_of_nodes() <= max_nodes: logger.debug( "No reduction necessary (%s <= %s)", graph.number_of_nodes(), max_nodes ) return graph logger.debug( "Reducing graph from %s to %s nodes", graph.number_of_nodes(), max_nodes ) # Find representative node rep_node_id = find_representative_node_id(graph) # Get ego graph - breadth first search (BFS) included = dict(_bfs_egograph(graph, rep_node_id, max_nodes)) reduced = graph.subgraph(included).copy() # Update basic graph stats ( reduced.graph["reduced_population"], reduced.graph["reduced_area"], ) = get_population_area(reduced) try: reduced.graph["reduced_population_density"] = ( reduced.graph["reduced_population"] / reduced.graph["reduced_area"] ) except ZeroDivisionError: reduced.graph["reduced_population_density"] = 0 # Add basic_stats to the graph - prepend "reduced_" to the keys if reduced.number_of_edges() > 0: reduced.graph.update( { "reduced_" + key: value for key, value in basic_graph_stats( reduced, area=reduced.graph["reduced_area"] ).items() } ) logger.debug("Reduced graph has %s nodes", reduced.number_of_nodes()) return reduced
def _bfs_egograph(graph, rep_node_id, max_nodes): """Breadth-first search (BFS) to get the ego graph of the representative node. Parameters ---------- graph : networkx.classes.multidigraph.MultiDiGraph Graph to get the ego graph for. rep_node_id : int Node id of the seed node. max_nodes : int Maximum number of nodes in the ego graph. Yields ------ (node_id, level) : tuple Node id and level of the node in the BFS. Notes ----- Modified from :func:`networkx.algorithms.shortest_paths.unweighted._single_shortest_path_length`. """ seen = set([rep_node_id]) nextlevel = [rep_node_id] level = 0 n_total = len(graph.adj) # Yield the seed node yield (rep_node_id, level) # Yield the nodes in the BFS, as long as the sum of the next level and the seen # nodes are less than the maximum number of nodes while ( nextlevel and len(seen) + len([n_next for leaf in nextlevel for n_next in graph.adj[leaf]]) <= max_nodes ): level += 1 thislevel = nextlevel nextlevel = [] for leaf in thislevel: for n_next in graph.adj[leaf]: if n_next not in seen: seen.add(n_next) nextlevel.append(n_next) # Yield the node id and level yield (n_next, level) if len(seen) == n_total: return # pragma: no cover