quality.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. """Functions for measuring the quality of a partition (into
  2. communities).
  3. """
  4. from itertools import combinations
  5. import networkx as nx
  6. from networkx import NetworkXError
  7. from networkx.algorithms.community.community_utils import is_partition
  8. from networkx.utils.decorators import argmap
  9. __all__ = ["modularity", "partition_quality"]
  10. class NotAPartition(NetworkXError):
  11. """Raised if a given collection is not a partition."""
  12. def __init__(self, G, collection):
  13. msg = f"{collection} is not a valid partition of the graph {G}"
  14. super().__init__(msg)
  15. def _require_partition(G, partition):
  16. """Decorator to check that a valid partition is input to a function
  17. Raises :exc:`networkx.NetworkXError` if the partition is not valid.
  18. This decorator should be used on functions whose first two arguments
  19. are a graph and a partition of the nodes of that graph (in that
  20. order)::
  21. >>> @require_partition
  22. ... def foo(G, partition):
  23. ... print("partition is valid!")
  24. ...
  25. >>> G = nx.complete_graph(5)
  26. >>> partition = [{0, 1}, {2, 3}, {4}]
  27. >>> foo(G, partition)
  28. partition is valid!
  29. >>> partition = [{0}, {2, 3}, {4}]
  30. >>> foo(G, partition)
  31. Traceback (most recent call last):
  32. ...
  33. networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G
  34. >>> partition = [{0, 1}, {1, 2, 3}, {4}]
  35. >>> foo(G, partition)
  36. Traceback (most recent call last):
  37. ...
  38. networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G
  39. """
  40. if is_partition(G, partition):
  41. return G, partition
  42. raise nx.NetworkXError("`partition` is not a valid partition of the nodes of G")
  43. require_partition = argmap(_require_partition, (0, 1))
  44. @nx._dispatchable
  45. def intra_community_edges(G, partition):
  46. """Returns the number of intra-community edges for a partition of `G`.
  47. Parameters
  48. ----------
  49. G : NetworkX graph.
  50. partition : iterable of sets of nodes
  51. This must be a partition of the nodes of `G`.
  52. The "intra-community edges" are those edges joining a pair of nodes
  53. in the same block of the partition.
  54. """
  55. return sum(G.subgraph(block).size() for block in partition)
  56. @nx._dispatchable
  57. def inter_community_edges(G, partition):
  58. """Returns the number of inter-community edges for a partition of `G`.
  59. according to the given
  60. partition of the nodes of `G`.
  61. Parameters
  62. ----------
  63. G : NetworkX graph.
  64. partition : iterable of sets of nodes
  65. This must be a partition of the nodes of `G`.
  66. The *inter-community edges* are those edges joining a pair of nodes
  67. in different blocks of the partition.
  68. Implementation note: this function creates an intermediate graph
  69. that may require the same amount of memory as that of `G`.
  70. """
  71. # Alternate implementation that does not require constructing a new
  72. # graph object (but does require constructing an affiliation
  73. # dictionary):
  74. #
  75. # aff = dict(chain.from_iterable(((v, block) for v in block)
  76. # for block in partition))
  77. # return sum(1 for u, v in G.edges() if aff[u] != aff[v])
  78. #
  79. MG = nx.MultiDiGraph if G.is_directed() else nx.MultiGraph
  80. return nx.quotient_graph(G, partition, create_using=MG).size()
  81. @nx._dispatchable
  82. def inter_community_non_edges(G, partition):
  83. """Returns the number of inter-community non-edges according to the
  84. given partition of the nodes of `G`.
  85. Parameters
  86. ----------
  87. G : NetworkX graph.
  88. partition : iterable of sets of nodes
  89. This must be a partition of the nodes of `G`.
  90. A *non-edge* is a pair of nodes (undirected if `G` is undirected)
  91. that are not adjacent in `G`. The *inter-community non-edges* are
  92. those non-edges on a pair of nodes in different blocks of the
  93. partition.
  94. Implementation note: this function creates two intermediate graphs,
  95. which may require up to twice the amount of memory as required to
  96. store `G`.
  97. """
  98. # Alternate implementation that does not require constructing two
  99. # new graph objects (but does require constructing an affiliation
  100. # dictionary):
  101. #
  102. # aff = dict(chain.from_iterable(((v, block) for v in block)
  103. # for block in partition))
  104. # return sum(1 for u, v in nx.non_edges(G) if aff[u] != aff[v])
  105. #
  106. return inter_community_edges(nx.complement(G), partition)
  107. @nx._dispatchable(edge_attrs="weight")
  108. def modularity(G, communities, weight="weight", resolution=1):
  109. r"""Returns the modularity of the given partition of the graph.
  110. Modularity is defined in [1]_ as
  111. .. math::
  112. Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \gamma\frac{k_ik_j}{2m}\right)
  113. \delta(c_i,c_j)
  114. where $m$ is the number of edges (or sum of all edge weights as in [5]_),
  115. $A$ is the adjacency matrix of `G`, $k_i$ is the (weighted) degree of $i$,
  116. $\gamma$ is the resolution parameter, and $\delta(c_i, c_j)$ is 1 if $i$ and
  117. $j$ are in the same community else 0.
  118. According to [2]_ (and verified by some algebra) this can be reduced to
  119. .. math::
  120. Q = \sum_{c=1}^{n}
  121. \left[ \frac{L_c}{m} - \gamma\left( \frac{k_c}{2m} \right) ^2 \right]
  122. where the sum iterates over all communities $c$, $m$ is the number of edges,
  123. $L_c$ is the number of intra-community links for community $c$,
  124. $k_c$ is the sum of degrees of the nodes in community $c$,
  125. and $\gamma$ is the resolution parameter.
  126. The resolution parameter sets an arbitrary tradeoff between intra-group
  127. edges and inter-group edges. More complex grouping patterns can be
  128. discovered by analyzing the same network with multiple values of gamma
  129. and then combining the results [3]_. That said, it is very common to
  130. simply use gamma=1. More on the choice of gamma is in [4]_.
  131. The second formula is the one actually used in calculation of the modularity.
  132. For directed graphs the second formula replaces $k_c$ with $k^{in}_c k^{out}_c$.
  133. Parameters
  134. ----------
  135. G : NetworkX Graph
  136. communities : list or iterable of set of nodes
  137. These node sets must represent a partition of G's nodes.
  138. weight : string or None, optional (default="weight")
  139. The edge attribute that holds the numerical value used
  140. as a weight. If None or an edge does not have that attribute,
  141. then that edge has weight 1.
  142. resolution : float (default=1)
  143. If resolution is less than 1, modularity favors larger communities.
  144. Greater than 1 favors smaller communities.
  145. Returns
  146. -------
  147. Q : float
  148. The modularity of the partition.
  149. Raises
  150. ------
  151. NotAPartition
  152. If `communities` is not a partition of the nodes of `G`.
  153. Examples
  154. --------
  155. >>> G = nx.barbell_graph(3, 0)
  156. >>> nx.community.modularity(G, [{0, 1, 2}, {3, 4, 5}])
  157. 0.35714285714285715
  158. >>> nx.community.modularity(G, nx.community.label_propagation_communities(G))
  159. 0.35714285714285715
  160. References
  161. ----------
  162. .. [1] M. E. J. Newman "Networks: An Introduction", page 224.
  163. Oxford University Press, 2011.
  164. .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore.
  165. "Finding community structure in very large networks."
  166. Phys. Rev. E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187>
  167. .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection"
  168. Phys. Rev. E 74, 016110, 2006. https://doi.org/10.1103/PhysRevE.74.016110
  169. .. [4] M. E. J. Newman, "Equivalence between modularity optimization and
  170. maximum likelihood methods for community detection"
  171. Phys. Rev. E 94, 052315, 2016. https://doi.org/10.1103/PhysRevE.94.052315
  172. .. [5] Blondel, V.D. et al. "Fast unfolding of communities in large
  173. networks" J. Stat. Mech 10008, 1-12 (2008).
  174. https://doi.org/10.1088/1742-5468/2008/10/P10008
  175. """
  176. if not isinstance(communities, list):
  177. communities = list(communities)
  178. if not is_partition(G, communities):
  179. raise NotAPartition(G, communities)
  180. directed = G.is_directed()
  181. if directed:
  182. out_degree = dict(G.out_degree(weight=weight))
  183. in_degree = dict(G.in_degree(weight=weight))
  184. m = sum(out_degree.values())
  185. norm = 1 / m**2
  186. else:
  187. out_degree = in_degree = dict(G.degree(weight=weight))
  188. deg_sum = sum(out_degree.values())
  189. m = deg_sum / 2
  190. norm = 1 / deg_sum**2
  191. def community_contribution(community):
  192. comm = set(community)
  193. L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm)
  194. out_degree_sum = sum(out_degree[u] for u in comm)
  195. in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum
  196. return L_c / m - resolution * out_degree_sum * in_degree_sum * norm
  197. return sum(map(community_contribution, communities))
  198. @require_partition
  199. @nx._dispatchable
  200. def partition_quality(G, partition):
  201. """Returns the coverage and performance of a partition of G.
  202. The *coverage* of a partition is the ratio of the number of
  203. intra-community edges to the total number of edges in the graph.
  204. The *performance* of a partition is the number of
  205. intra-community edges plus inter-community non-edges divided by the total
  206. number of potential edges.
  207. This algorithm has complexity $O(C^2 + L)$ where C is the number of
  208. communities and L is the number of links.
  209. Parameters
  210. ----------
  211. G : NetworkX graph
  212. partition : sequence
  213. Partition of the nodes of `G`, represented as a sequence of
  214. sets of nodes (blocks). Each block of the partition represents a
  215. community.
  216. Returns
  217. -------
  218. (float, float)
  219. The (coverage, performance) tuple of the partition, as defined above.
  220. Raises
  221. ------
  222. NetworkXError
  223. If `partition` is not a valid partition of the nodes of `G`.
  224. Notes
  225. -----
  226. If `G` is a multigraph;
  227. - for coverage, the multiplicity of edges is counted
  228. - for performance, the result is -1 (total number of possible edges is not defined)
  229. References
  230. ----------
  231. .. [1] Santo Fortunato.
  232. "Community Detection in Graphs".
  233. *Physical Reports*, Volume 486, Issue 3--5 pp. 75--174
  234. <https://arxiv.org/abs/0906.0612>
  235. """
  236. node_community = {}
  237. for i, community in enumerate(partition):
  238. for node in community:
  239. node_community[node] = i
  240. # `performance` is not defined for multigraphs
  241. if not G.is_multigraph():
  242. # Iterate over the communities, quadratic, to calculate `possible_inter_community_edges`
  243. possible_inter_community_edges = sum(
  244. len(p1) * len(p2) for p1, p2 in combinations(partition, 2)
  245. )
  246. if G.is_directed():
  247. possible_inter_community_edges *= 2
  248. else:
  249. possible_inter_community_edges = 0
  250. # Compute the number of edges in the complete graph -- `n` nodes,
  251. # directed or undirected, depending on `G`
  252. n = len(G)
  253. total_pairs = n * (n - 1)
  254. if not G.is_directed():
  255. total_pairs //= 2
  256. intra_community_edges = 0
  257. inter_community_non_edges = possible_inter_community_edges
  258. # Iterate over the links to count `intra_community_edges` and `inter_community_non_edges`
  259. for e in G.edges():
  260. if node_community[e[0]] == node_community[e[1]]:
  261. intra_community_edges += 1
  262. else:
  263. inter_community_non_edges -= 1
  264. coverage = intra_community_edges / len(G.edges)
  265. if G.is_multigraph():
  266. performance = -1.0
  267. else:
  268. performance = (intra_community_edges + inter_community_non_edges) / total_pairs
  269. return coverage, performance