Source code for superblockify.utils

"""Utility functions for superblockify."""

from ast import literal_eval
from itertools import chain
from os.path import getsize
from re import match

import osmnx as ox
from networkx import Graph, is_isomorphic, set_node_attributes
from numba import njit, int64, int32, prange
from numpy import (
    zeros,
    array,
    fill_diagonal,
    ndarray,
    array_equal,
    empty,
    int64 as np_int64,
    sign,
    inf,
    isinf,
    nan,
)
from osmnx._errors import CacheOnlyInterruptError
from osmnx.stats import count_streets_per_node
from shapely import wkt

from .partitioning.utils import reduce_graph
from .config import logger, Config
from .graph_stats import basic_graph_stats
from .population.approximation import add_edge_population


[docs] def extract_attributes(graph, edge_attributes, node_attributes): """Extract only the specified attributes from a graph. Parameters ---------- graph : networkx.MultiDiGraph The graph to extract attributes from. edge_attributes : set The edge attributes to keep. node_attributes : set The node attributes to keep. Returns ------- networkx.MultiDiGraph The graph with only the specified attributes. """ # Get all unique edge attributes all_attributes = set( chain.from_iterable(d.keys() for *_, d in graph.edges(data=True)) ) excess_attributes = all_attributes - edge_attributes # Delete all excess attributes for _, _, attr_dict in graph.edges(data=True): for att in excess_attributes: attr_dict.pop(att, None) # Get all unique node attributes all_attributes = set( chain.from_iterable(d.keys() for *_, d in graph.nodes(data=True)) ) excess_attributes = all_attributes - node_attributes # Delete all excess attributes for _, attr_dict in graph.nodes(data=True): for att in excess_attributes: attr_dict.pop(att, None) return graph
[docs] def load_graph_from_place( save_as, search_string, add_population=False, only_cache=False, max_nodes=Config.MAX_NODES, **gfp_kwargs, ): """Load a graph from a place and save it to a file. The method filters the attributes of the graph to only include the ones that are needed for the package. Parameters ---------- save_as : str The name of the file to save the graph to. search_string : str or list of str The place to load the graph from. Find the place name on OpenStreetMap. https://nominatim.openstreetmap.org/ Can otherwise be OSM relation ID or a list of those. They have the format "R1234567". Not mixed with place names. add_population : bool, optional Whether to add population data to the graph. Default is False. only_cache : bool, optional Whether to only load the graph from cache. Default is False. max_nodes : int, optional Maximum number of nodes in the graph. If the graph has more nodes, it will be reduced to `max_nodes`, by taking the ego graph of a representative, central node. If None, the graph will not be reduced. Default is set in :mod:`superblockify.config`. **gfp_kwargs Keyword arguments to pass to osmnx.graph_from_place. Returns ------- networkx.MultiDiGraph or None The graph loaded from the place. If only_cache is True, returns None. """ # check the format of the search string, match to regex R\d+, str or list of str # use re.match(r"R\d+", search_string) or to all elements in list mult_polygon = ox.geocode_to_gdf( search_string, by_osmid=isinstance(search_string, str) and match(r"R\d+", search_string) or isinstance(search_string, list) and all(isinstance(s, str) and match(r"R\d+", s) for s in search_string), ) # geopandas.GeoDataFrame, every column is polygon # make shapely.geometry.MultiPolygon from all polygons # Project to WGS84 to query OSM mult_polygon = ox.projection.project_gdf(mult_polygon, to_crs="epsg:4326") logger.debug("Reprojected boundary to WGS84 - Downloading graph") # Get graph - automatically adds distances before simplifying ox.settings.cache_only_mode = only_cache try: graph = ox.graph_from_polygon(mult_polygon.geometry.union_all(), **gfp_kwargs) except CacheOnlyInterruptError: # pragma: no cover logger.debug("Only loaded graph from cache") return None logger.debug("Downloaded graph - Preprocessing") # Add edge bearings graph = ox.add_edge_bearings(graph) # precision=2) # the precision >1 is # important for binning when using the deprecated BearingPartitioner # Project to local UTM - coordinates can be used as graph = ox.project_graph(graph) graph = ox.add_edge_speeds(graph) # adds attribute "maxspeed" graph = ox.add_edge_travel_times(graph) # adds attribute "travel_time" # count the number of streets per node / degree street_count = count_streets_per_node(graph) set_node_attributes(graph, values=street_count, name="street_count") graph = extract_attributes( graph, edge_attributes={ "geometry", "osmid", "length", "highway", "speed_kph", "travel_time", "bearing", }, node_attributes={"y", "x", "lat", "lon", "osmid", "street_count"}, ) # Add edge population and area if add_population: add_edge_population(graph) # Add boundary as union of all polygons as attribute - in UTM crs of centroid mult_polygon = ox.projection.project_gdf(mult_polygon) graph.graph["boundary_crs"] = mult_polygon.crs graph.graph["boundary"] = mult_polygon.geometry.union_all() graph.graph["area"] = graph.graph["boundary"].area # update graph attributes with basic graph stats graph.graph.update(basic_graph_stats(graph, area=graph.graph["area"])) # Save graph if max_nodes is not None and graph.number_of_nodes() > max_nodes: logger.debug("Reducing graph to %s nodes", max_nodes) graph = reduce_graph(graph, max_nodes=max_nodes) logger.debug("Preprocessing done - Saving graph") ox.save_graphml(graph, filepath=save_as) logger.debug("Saved graph (%s MB) to %s", getsize(save_as) >> 20, save_as) return graph
[docs] def compare_components_and_partitions(list1, list2): """Compare two lists of dictionaries. The lists are equal if they have the same length and the dictionaries in the lists are equal. If a value of a dictionary is a networkx.Graph, it compares the graphs with networkx.graph_equal(). Parameters ---------- list1 : list of dict The first list of dictionaries to compare. list2 : list of dict The second list of dictionaries to compare. Returns ------- bool True if the lists are equal, False otherwise. """ if len(list1) != len(list2): return False for element1, element2 in zip(list1, list2): if element1.keys() != element2.keys(): return False for key in element1.keys(): if all(isinstance(x, Graph) for x in [element1[key], element2[key]]): # Check if Graphs are isomorphic, as the attributes might differ if not is_isomorphic(element1[key], element2[key]): return False elif element1[key] != element2[key]: return False return True
[docs] def has_pairwise_overlap(lists): """Return a boolean array indicating overlap between pairs of lists. Uses numpy arrays and vector broadcasting to speed up the calculation. For short lists using set operations is faster. Parameters ---------- lists : list of lists The lists to check for pairwise overlap. Lists can be of different length. Raises ------ ValueError If lists is not a list of lists. ValueError If lists is empty. Returns ------- has_overlap : ndarray A boolean array indicating whether there is overlap between each pair of lists. has_overlap[i, j] is True if there is overlap between list i and list j, and False otherwise. """ if not isinstance(lists, list) or not all(isinstance(lst, list) for lst in lists): raise ValueError("The input must be a list of lists.") if not lists: raise ValueError("The input must not be the empty list.") # Convert lists to sets sets = [set(lst) for lst in lists] # Compute the pairwise intersection of the sets intersections = zeros((len(sets), len(sets))) for i, _ in enumerate(sets): for j, _ in enumerate(sets): intersections[i, j] = len(sets[i] & sets[j]) intersections[j, i] = intersections[i, j] # Compute the pairwise union of the sets unions = array([len(s) for s in sets]).reshape(-1, 1) + len(lists) - 1 # Compute whether there is overlap between each pair of sets has_overlap = intersections > 0 overlaps = intersections / unions fill_diagonal(overlaps, 0) has_overlap |= overlaps > 0 return has_overlap
[docs] def compare_dicts(dict1, dict2): """Compare two dictionaries recursively. Function to recursively compare nested dicts that might contain numpy arrays. Parameters ---------- dict1 : dict The first dictionary to compare. dict2 : dict The second dictionary to compare. Returns ------- bool True if the dictionaries are equal, False otherwise. """ if type(dict1).__name__ != type(dict2).__name__: return False if isinstance(dict1, dict): if dict1.keys() != dict2.keys(): return False for key in dict1.keys(): if not compare_dicts(dict1[key], dict2[key]): return False return True if isinstance(dict1, ndarray): return array_equal(dict1, dict2) return dict1 == dict2
@njit(int64(int32, int32, int64)) def __edge_to_1d(edge_u, edge_v, max_len): # pragma: no cover """Convert edge to 1D representation. Parameters ---------- edge_u : int First node index edge_v : int Second node index max_len : int Maximum length of the node indices Returns ------- int 1D representation of the edge """ return edge_u * 10**max_len + edge_v @njit(int64[:](int32[:], int32[:], int64), parallel=True) def __edges_to_1d(edge_u, edge_v, max_len): # pragma: no cover """Convert edges to 1D representation. Parameters ---------- edge_u : np.ndarray First node indices edge_v : np.ndarray Second node indices max_len : int Maximum length of the node indices Returns ------- ndarray 1D representation of the edges """ edges = empty(len(edge_u), dtype=np_int64) for i in prange(len(edge_u)): # pylint: disable=not-an-iterable edges[i] = __edge_to_1d(edge_u[i], edge_v[i], max_len) return edges
[docs] def load_graphml_dtypes(filepath=None, attribute_label=None, attribute_dtype=None): """Load a graphml file with custom dtypes. Parameters ---------- filepath : str Path to the graphml file. attribute_label : str, optional The attribute label to convert to the specified dtype. attribute_dtype : type, optional The dtype to convert the attribute to. Returns ------- networkx.MultiDiGraph The graph. """ node_dtypes = { "y": float, "x": float, } edge_dtypes = { "bearing": float, "length": float, "speed_kph": float, "travel_time": float, "travel_time_restricted": float, "rel_increase": float, "population": float, "area": float, "cell_id": int, "edge_betweenness_normal": float, "edge_betweenness_length": float, "edge_betweenness_linear": float, "edge_betweenness_normal_restricted": float, "edge_betweenness_length_restricted": float, "edge_betweenness_linear_restricted": float, } graph_dtypes = { "simplified": bool, "edge_population": bool, "boundary": wkt.loads, "area": float, "n": int, "m": int, "k_avg": float, "edge_length_total": float, "edge_length_avg": float, "streets_per_node_avg": float, "streets_per_node_counts": literal_eval, "streets_per_node_proportions": literal_eval, "intersection_count": int, "street_length_total": float, "street_segment_count": int, "street_length_avg": float, "circuity_avg": float, "self_loop_proportion": float, "node_density_km": float, "intersection_density_km": float, "edge_density_km": float, "street_density_km": float, "street_orientation_order": float, } # Add the same graph_dtypes, but with the `reduced_` prefix graph_dtypes.update( {f"reduced_{key}": value for key, value in graph_dtypes.items()} ) if attribute_label is not None and attribute_dtype is not None: edge_dtypes[attribute_label] = attribute_dtype graph = ox.load_graphml( filepath=filepath, node_dtypes=node_dtypes, edge_dtypes=edge_dtypes, graph_dtypes=graph_dtypes, ) return graph
[docs] def percentual_increase(val_1, val_2): """Compute the percentual increase between two values. 3 -> 4 = 33.33% = 1/3 4 -> 3 = -25.00% = -1/4 Parameters ---------- val_1 : float The first value. val_2 : float The second value. Returns ------- float The relative difference between the two values. Notes ----- If both values are zero, the result is zero. If one value is zero, the result is +-infinity. """ if val_1 == val_2: return 0 if val_1 == 0 or val_2 == 0: return inf * (sign(val_2 - val_1) if val_2 != 0 else -1) if isinf(val_1) and isinf(val_2): return nan if isinf(val_1): return -inf if isinf(val_2): return inf * sign(val_1) * (sign(val_2) if val_2 != 0 else 1) return (val_2 / val_1) - 1