Source code for superblockify.partitioning.approaches.betweenness

"""Approach based on high betweenness centrality of nodes and edges."""

from numpy import array

from .attribute import AttributePartitioner
from ...attribute import new_edge_attribute_by_function
from ...config import logger



[docs]
class BetweennessPartitioner(AttributePartitioner):
    """Partitioner using betweenness centrality of nodes and edges.

    Set sparsified graph from edges or nodes with high betweenness centrality.
    """


[docs]
    def write_attribute(
        self, percentile=85.0, scaling="normal", max_range=None, **kwargs
    ):
        """Determine edges with high betweenness centrality for sparsified graph.

        Edges with high betweenness centrality are used to construct the sparsified
        graph.

        The high percentile is determined through ranking all edges by their
        betweenness centrality and taking the top percentile. The percentile is
        determined by the `percentile` parameter.

        Parameters
        ----------
        percentile : float, optional
            The percentile to use for determining the high betweenness centrality
            edges, by default 90.0
        scaling : str, optional
            The type of betweenness to use, can be `normal`, `length`, or `linear`,
            by default `normal`
        max_range : int, optional
            The range to use for calculating the betweenness centrality, by default
            None, which uses the whole graph. Its unit is meters.
        **kwargs
            Additional keyword arguments. `calculate_metrics_before` takes the
            `make_plots` parameter.


        Raises
        ------
        ValueError
            If `scaling` is not `normal`, `length`, or `linear`.
        ValueError
            If `percentile` is not between, 0.0 and 100.0.
        """
        self.attribute_label = "betweenness_percentile"
        self.attribute_dtype = int

        logger.debug("Writing edge betweenness attribute to graph.")
        if not isinstance(percentile, (float, int)) or not 0.0 < percentile < 100.0:
            raise ValueError(
                f"Percentile must be between, 0.0 and 100.0, but is {percentile}."
            )
        if scaling not in ["normal", "length", "linear"]:
            raise ValueError(
                f"Scaling must be 'normal', 'length', or 'linear', but is {scaling}."
            )

        self.calculate_metrics_before(
            make_plots=kwargs.get("make_plots", False), betweenness_range=max_range
        )

        # determine a threshold for betweenness from ranking
        attr_list = array(
            [
                val
                for _, _, val in self.graph.edges(
                    data=f"edge_betweenness_{scaling}"
                    + ("_range_limited" if max_range else "")
                )
            ]
        )

        attr_list.sort()
        threshold = attr_list[int(len(attr_list) * percentile / 100.0)]

        # Threshold is not allowed to be below the minimal and above the maximal value
        if threshold <= attr_list[0]:
            ixd_2nd_smallest = 1
            while attr_list[ixd_2nd_smallest] == attr_list[0]:
                ixd_2nd_smallest += 1
            # set threshold to second-smallest value
            threshold = attr_list[ixd_2nd_smallest]
            # at least one node/edge is outside
        elif threshold >= attr_list[-1]:  # improbable case due to betw. distribution
            ixd_2nd_largest = -2
            while attr_list[ixd_2nd_largest] == attr_list[-1]:
                ixd_2nd_largest -= 1
            # set threshold to second-largest value
            threshold = attr_list[ixd_2nd_largest]
            # at least one node/edge is inside

        # write boolean attribute to graph
        new_edge_attribute_by_function(
            self.graph,
            lambda x: 0 if x < threshold else 1,
            source_attribute=f"edge_betweenness_{scaling}",
            destination_attribute=self.attribute_label,
        )