yichael
/
xhs-note-crawling


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
							r"""Computation of graph non-randomness."""

import math

import networkx as nx
from networkx.utils import not_implemented_for

__all__ = ["non_randomness"]


@not_implemented_for("directed")
@not_implemented_for("multigraph")
@nx._dispatchable(edge_attrs="weight")
def non_randomness(G, k=None, weight="weight"):
    """Compute the non-randomness of a graph.

    The first value $R_G$ is the sum of non-randomness values of all
    edges within the graph (where the non-randomness of an edge tends to be
    small when the two nodes linked by that edge are from two different
    communities).

    The second value $R_G^*$ is a relative measure that indicates
    to what extent `G` is different from a random graph in terms
    of probability. The closer it is to 0, the higher the likelihood
    the graph was generated by an Erdős--Rényi model.

    Parameters
    ----------
    G : NetworkX graph
        Graph must be undirected, connected, and without self-loops.

    k : int or None, optional (default=None)
        The number of communities in `G`.
        If `k` is not set, the function uses a default community detection
        algorithm (:func:`~networkx.algorithms.community.label_propagation_communities`)
        to set it.

    weight : string or None, optional (default="weight")
        The name of an edge attribute that holds the numerical value used
        as a weight. If `None`, then each edge has weight 1, i.e., the graph is
        binary.

    Returns
    -------
    (float, float) tuple
        The first value is $R_G$, the non-randomness of the graph,
        the second is $R_G^*$, the relative non-randomness
        w.r.t. the Erdős--Rényi model.

    Raises
    ------
    NetworkXNotImplemented
        If the input graph is directed or a multigraph.

    NetworkXException
        If the input graph is not connected.

    NetworkXError
        If the input graph contains self-loops or has no edges.

    ValueError
        If `k` is not in $\\{1, \\dots, n-1\\}$, where $n$ is the number of nodes,
        or if `k` is such that the computed edge probability
        $p = \\frac{2km}{n(n-k)}$ does not satisfy $0 < p < 1$.

    Examples
    --------
    >>> G = nx.karate_club_graph()
    >>> nr, nr_rd = nx.non_randomness(G, 2)
    >>> nr, nr_rd = nx.non_randomness(G, 2, "weight")

    When the number of communities `k` is not specified,
    :func:`~networkx.algorithms.community.label_propagation_communities`
    is used to compute it.
    This algorithm can give different results depending on
    the order of nodes and edges in the graph.
    For example, while the following graphs are identical,
    computing the non-randomness of each of them yields different results:

    >>> G1, G2 = nx.Graph(), nx.Graph()
    >>> G1.add_edges_from([(0, 1), (1, 2), (1, 3), (3, 4)])
    >>> G2.add_edges_from([(0, 1), (1, 3), (1, 2), (3, 4)])
    >>> [round(r, 6) for r in nx.non_randomness(G1)]
    [-1.847759, -5.842437]
    >>> [round(r, 6) for r in nx.non_randomness(G2)]
    Traceback (most recent call last):
     ...
    ValueError: invalid number of communities for graph with 5 nodes and 4 edges: 2

    This is because the community detection algorithm finds
    1 community in `G1` and 2 communities in `G2`.
    This can be resolved by specifying the number of communities `k`:

    >>> [round(r, 6) for r in nx.non_randomness(G2, k=1)]
    [-1.847759, -5.842437]

    Notes
    -----
    If a `weight` argument is passed, this algorithm will use the eigenvalues
    of the weighted adjacency matrix instead.

    The output of this function corresponds to (4.4) and (4.5) in [1]_.
    A lower value of $R^*_G$ indicates a more random graph;
    one can think of $1 - \\Phi(R_G^*)$ as the similarity
    between the graph and a random graph,
    where $\\Phi(x)$ is the cumulative distribution function
    of the standard normal distribution.

    Theorem 2 in [2]_ states that for any graph $G$
    with $n$ nodes, $m$ edges, and $k$ communities,
    its non-randomness is bounded below by the non-randomness of an
    $r$-regular graph (a graph where each node has degree $r$),
    and bounded above by the non-randomness of an $l$-complete graph
    (a graph where each community is a clique of $l$ nodes).

    References
    ----------
    .. [1] Xiaowei Ying and Xintao Wu,
           On Randomness Measures for Social Networks,
           SIAM International Conference on Data Mining. 2009
           https://doi.org/10.1137/1.9781611972795.61
    .. [2] Ying, Xiaowei & Wu, Leting & Wu, Xintao. (2012).
           A Spectrum-Based Framework for Quantifying Randomness of Social Networks.
           IEEE Transactions on Knowledge and Data Engineering 23(12):1842--1856.
           https://dl.acm.org/doi/abs/10.1109/TKDE.2010.218
    """
    import numpy as np

    # corner case: graph has no edges
    if nx.is_empty(G):
        raise nx.NetworkXError("non_randomness not applicable to empty graphs")
    if not nx.is_connected(G):
        raise nx.NetworkXException("Non connected graph.")
    if len(list(nx.selfloop_edges(G))) > 0:
        raise nx.NetworkXError("Graph must not contain self-loops")

    n = G.number_of_nodes()
    m = G.number_of_edges()

    if k is None:
        k = len(tuple(nx.community.label_propagation_communities(G)))
    if not 1 <= k < n or not 0 < (p := (2 * k * m) / (n * (n - k))) < 1:
        err = (
            f"invalid number of communities for graph with {n} nodes and {m} edges: {k}"
        )
        raise ValueError(err)

    # eq. 4.4
    eigenvalues = np.linalg.eigvals(nx.to_numpy_array(G, weight=weight))
    nr = float(np.real(np.sum(eigenvalues[:k])))

    # eq. 4.5
    nr_rd = (nr - ((n - 2 * k) * p + k)) / math.sqrt(2 * k * p * (1 - p))

    return nr, nr_rd