non_randomness.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. r"""Computation of graph non-randomness."""
  2. import math
  3. import networkx as nx
  4. from networkx.utils import not_implemented_for
  5. __all__ = ["non_randomness"]
  6. @not_implemented_for("directed")
  7. @not_implemented_for("multigraph")
  8. @nx._dispatchable(edge_attrs="weight")
  9. def non_randomness(G, k=None, weight="weight"):
  10. """Compute the non-randomness of a graph.
  11. The first value $R_G$ is the sum of non-randomness values of all
  12. edges within the graph (where the non-randomness of an edge tends to be
  13. small when the two nodes linked by that edge are from two different
  14. communities).
  15. The second value $R_G^*$ is a relative measure that indicates
  16. to what extent `G` is different from a random graph in terms
  17. of probability. The closer it is to 0, the higher the likelihood
  18. the graph was generated by an Erdős--Rényi model.
  19. Parameters
  20. ----------
  21. G : NetworkX graph
  22. Graph must be undirected, connected, and without self-loops.
  23. k : int or None, optional (default=None)
  24. The number of communities in `G`.
  25. If `k` is not set, the function uses a default community detection
  26. algorithm (:func:`~networkx.algorithms.community.label_propagation_communities`)
  27. to set it.
  28. weight : string or None, optional (default="weight")
  29. The name of an edge attribute that holds the numerical value used
  30. as a weight. If `None`, then each edge has weight 1, i.e., the graph is
  31. binary.
  32. Returns
  33. -------
  34. (float, float) tuple
  35. The first value is $R_G$, the non-randomness of the graph,
  36. the second is $R_G^*$, the relative non-randomness
  37. w.r.t. the Erdős--Rényi model.
  38. Raises
  39. ------
  40. NetworkXNotImplemented
  41. If the input graph is directed or a multigraph.
  42. NetworkXException
  43. If the input graph is not connected.
  44. NetworkXError
  45. If the input graph contains self-loops or has no edges.
  46. ValueError
  47. If `k` is not in $\\{1, \\dots, n-1\\}$, where $n$ is the number of nodes,
  48. or if `k` is such that the computed edge probability
  49. $p = \\frac{2km}{n(n-k)}$ does not satisfy $0 < p < 1$.
  50. Examples
  51. --------
  52. >>> G = nx.karate_club_graph()
  53. >>> nr, nr_rd = nx.non_randomness(G, 2)
  54. >>> nr, nr_rd = nx.non_randomness(G, 2, "weight")
  55. When the number of communities `k` is not specified,
  56. :func:`~networkx.algorithms.community.label_propagation_communities`
  57. is used to compute it.
  58. This algorithm can give different results depending on
  59. the order of nodes and edges in the graph.
  60. For example, while the following graphs are identical,
  61. computing the non-randomness of each of them yields different results:
  62. >>> G1, G2 = nx.Graph(), nx.Graph()
  63. >>> G1.add_edges_from([(0, 1), (1, 2), (1, 3), (3, 4)])
  64. >>> G2.add_edges_from([(0, 1), (1, 3), (1, 2), (3, 4)])
  65. >>> [round(r, 6) for r in nx.non_randomness(G1)]
  66. [-1.847759, -5.842437]
  67. >>> [round(r, 6) for r in nx.non_randomness(G2)]
  68. Traceback (most recent call last):
  69. ...
  70. ValueError: invalid number of communities for graph with 5 nodes and 4 edges: 2
  71. This is because the community detection algorithm finds
  72. 1 community in `G1` and 2 communities in `G2`.
  73. This can be resolved by specifying the number of communities `k`:
  74. >>> [round(r, 6) for r in nx.non_randomness(G2, k=1)]
  75. [-1.847759, -5.842437]
  76. Notes
  77. -----
  78. If a `weight` argument is passed, this algorithm will use the eigenvalues
  79. of the weighted adjacency matrix instead.
  80. The output of this function corresponds to (4.4) and (4.5) in [1]_.
  81. A lower value of $R^*_G$ indicates a more random graph;
  82. one can think of $1 - \\Phi(R_G^*)$ as the similarity
  83. between the graph and a random graph,
  84. where $\\Phi(x)$ is the cumulative distribution function
  85. of the standard normal distribution.
  86. Theorem 2 in [2]_ states that for any graph $G$
  87. with $n$ nodes, $m$ edges, and $k$ communities,
  88. its non-randomness is bounded below by the non-randomness of an
  89. $r$-regular graph (a graph where each node has degree $r$),
  90. and bounded above by the non-randomness of an $l$-complete graph
  91. (a graph where each community is a clique of $l$ nodes).
  92. References
  93. ----------
  94. .. [1] Xiaowei Ying and Xintao Wu,
  95. On Randomness Measures for Social Networks,
  96. SIAM International Conference on Data Mining. 2009
  97. https://doi.org/10.1137/1.9781611972795.61
  98. .. [2] Ying, Xiaowei & Wu, Leting & Wu, Xintao. (2012).
  99. A Spectrum-Based Framework for Quantifying Randomness of Social Networks.
  100. IEEE Transactions on Knowledge and Data Engineering 23(12):1842--1856.
  101. https://dl.acm.org/doi/abs/10.1109/TKDE.2010.218
  102. """
  103. import numpy as np
  104. # corner case: graph has no edges
  105. if nx.is_empty(G):
  106. raise nx.NetworkXError("non_randomness not applicable to empty graphs")
  107. if not nx.is_connected(G):
  108. raise nx.NetworkXException("Non connected graph.")
  109. if len(list(nx.selfloop_edges(G))) > 0:
  110. raise nx.NetworkXError("Graph must not contain self-loops")
  111. n = G.number_of_nodes()
  112. m = G.number_of_edges()
  113. if k is None:
  114. k = len(tuple(nx.community.label_propagation_communities(G)))
  115. if not 1 <= k < n or not 0 < (p := (2 * k * m) / (n * (n - k))) < 1:
  116. err = (
  117. f"invalid number of communities for graph with {n} nodes and {m} edges: {k}"
  118. )
  119. raise ValueError(err)
  120. # eq. 4.4
  121. eigenvalues = np.linalg.eigvals(nx.to_numpy_array(G, weight=weight))
  122. nr = float(np.real(np.sum(eigenvalues[:k])))
  123. # eq. 4.5
  124. nr_rd = (nr - ((n - 2 * k) * p + k)) / math.sqrt(2 * k * p * (1 - p))
  125. return nr, nr_rd