Source code for superblockify.partitioning.approaches.betweenness

"""Approach based on high betweenness centrality of nodes and edges."""

from numpy import array

from .attribute import AttributePartitioner
from ...attribute import new_edge_attribute_by_function
from ...config import logger


[docs] class BetweennessPartitioner(AttributePartitioner): """Partitioner using betweenness centrality of nodes and edges. Set sparsified graph from edges or nodes with high betweenness centrality. """
[docs] def write_attribute( self, percentile=85.0, scaling="normal", max_range=None, **kwargs ): """Determine edges with high betweenness centrality for sparsified graph. Edges with high betweenness centrality are used to construct the sparsified graph. The high percentile is determined through ranking all edges by their betweenness centrality and taking the top percentile. The percentile is determined by the `percentile` parameter. Parameters ---------- percentile : float, optional The percentile to use for determining the high betweenness centrality edges, by default 90.0 scaling : str, optional The type of betweenness to use, can be `normal`, `length`, or `linear`, by default `normal` max_range : int, optional The range to use for calculating the betweenness centrality, by default None, which uses the whole graph. Its unit is meters. **kwargs Additional keyword arguments. `calculate_metrics_before` takes the `make_plots` parameter. Raises ------ ValueError If `scaling` is not `normal`, `length`, or `linear`. ValueError If `percentile` is not between, 0.0 and 100.0. """ self.attribute_label = "betweenness_percentile" self.attribute_dtype = int logger.debug("Writing edge betweenness attribute to graph.") if not isinstance(percentile, (float, int)) or not 0.0 < percentile < 100.0: raise ValueError( f"Percentile must be between, 0.0 and 100.0, but is {percentile}." ) if scaling not in ["normal", "length", "linear"]: raise ValueError( f"Scaling must be 'normal', 'length', or 'linear', but is {scaling}." ) self.calculate_metrics_before( make_plots=kwargs.get("make_plots", False), betweenness_range=max_range ) # determine a threshold for betweenness from ranking attr_list = array( [ val for _, _, val in self.graph.edges( data=f"edge_betweenness_{scaling}" + ("_range_limited" if max_range else "") ) ] ) attr_list.sort() threshold = attr_list[int(len(attr_list) * percentile / 100.0)] # Threshold is not allowed to be below the minimal and above the maximal value if threshold <= attr_list[0]: ixd_2nd_smallest = 1 while attr_list[ixd_2nd_smallest] == attr_list[0]: ixd_2nd_smallest += 1 # set threshold to second-smallest value threshold = attr_list[ixd_2nd_smallest] # at least one node/edge is outside elif threshold >= attr_list[-1]: # improbable case due to betw. distribution ixd_2nd_largest = -2 while attr_list[ixd_2nd_largest] == attr_list[-1]: ixd_2nd_largest -= 1 # set threshold to second-largest value threshold = attr_list[ixd_2nd_largest] # at least one node/edge is inside # write boolean attribute to graph new_edge_attribute_by_function( self.graph, lambda x: 0 if x < threshold else 1, source_attribute=f"edge_betweenness_{scaling}", destination_attribute=self.attribute_label, )