local.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. """
  2. Local Community Detection Algorithms
  3. Local Community Detection (LCD) aims to detected one or a few communities
  4. starting from certain source nodes in the network. This differs from Global
  5. Community Detection (GCD), which aims to partition an entire network into
  6. communities.
  7. LCD is often useful when only a portion of the graph is known or the
  8. graph is large enough that GCD is infeasable
  9. [1]_ Gives a good introduction and overview of LCD
  10. References
  11. ----------
  12. .. [1] Baltsou, Georgia, Konstantinos Christopoulos, and Konstantinos Tsichlas.
  13. Local community detection: A survey. IEEE Access 10 (2022): 110701-110726.
  14. https://doi.org/10.1109/ACCESS.2022.3213980
  15. """
  16. __all__ = ["greedy_source_expansion"]
  17. def _clauset_greedy_source_expansion(G, *, source, cutoff=None):
  18. if cutoff is None:
  19. cutoff = float("inf")
  20. C = {source}
  21. B = {source}
  22. U = G[source].keys() - C
  23. T = {frozenset([node, nbr]) for node in B for nbr in G.neighbors(node)}
  24. I = {edge for edge in T if all(node in C for node in edge)}
  25. R_value = 0
  26. while len(C) < cutoff:
  27. if not U:
  28. break
  29. max_R = 0
  30. best_node = None
  31. best_node_B = best_node_T = best_node_I = set()
  32. for v in U:
  33. R_tmp, B_tmp, T_tmp, I_tmp = _calculate_local_modularity_for_candidate(
  34. G, v, C, B, T, I
  35. )
  36. if R_tmp > max_R:
  37. max_R = R_tmp
  38. best_node = v
  39. best_node_B = B_tmp
  40. best_node_T = T_tmp
  41. best_node_I = I_tmp
  42. C = C | {best_node}
  43. U.update(G[best_node].keys() - C)
  44. U.remove(best_node)
  45. B = best_node_B
  46. T = best_node_T
  47. I = best_node_I
  48. if max_R < R_value:
  49. break
  50. R_value = max_R
  51. return C
  52. def _calculate_local_modularity_for_candidate(G, v, C, B, T, I):
  53. """
  54. Compute the local modularity R and updated variables when adding node v to the community.
  55. Parameters
  56. ----------
  57. G : NetworkX graph
  58. The input graph.
  59. v : node
  60. The candidate node to add to the community.
  61. C : set
  62. The current set of community nodes.
  63. B : set
  64. The current set of boundary nodes.
  65. T : set of frozenset
  66. The current set of boundary edges.
  67. I : set of frozenset
  68. The current set of internal boundary edges.
  69. Returns
  70. -------
  71. R_tmp : float
  72. The local modularity after adding node v.
  73. B_tmp : set
  74. The updated set of boundary nodes.
  75. T_tmp : set of frozenset
  76. The updated set of boundary edges.
  77. I_tmp : set of frozenset
  78. The updated set of internal boundary edges.
  79. """
  80. C_tmp = C | {v}
  81. B_tmp = B.copy()
  82. T_tmp = T.copy()
  83. I_tmp = I.copy()
  84. removed_B_nodes = set()
  85. # Update boundary nodes and edges
  86. for nbr in G[v]:
  87. if nbr not in C_tmp:
  88. # v has nbrs not in the community, so it remains a boundary node
  89. B_tmp.add(v)
  90. # Add edge between v and nbr to boundary edges
  91. T_tmp.add(frozenset([v, nbr]))
  92. if nbr in B:
  93. # Check if nbr should be removed from boundary nodes
  94. # Go through nbrs nbrs to see if it is still a boundary node
  95. nbr_still_in_B = any(nbr_nbr not in C_tmp for nbr_nbr in G[nbr])
  96. if not nbr_still_in_B:
  97. B_tmp.remove(nbr)
  98. removed_B_nodes.add(nbr)
  99. if nbr in C_tmp:
  100. # Add edge between v and nbr to internal edges
  101. I_tmp.add(frozenset([v, nbr]))
  102. # Remove edges no longer in the boundary
  103. for removed_node in removed_B_nodes:
  104. for removed_node_nbr in G[removed_node]:
  105. if removed_node_nbr not in B_tmp:
  106. T_tmp.discard(frozenset([removed_node_nbr, removed_node]))
  107. I_tmp.discard(frozenset([removed_node_nbr, removed_node]))
  108. R_tmp = len(I_tmp) / len(T_tmp) if len(T_tmp) > 0 else 1
  109. return R_tmp, B_tmp, T_tmp, I_tmp
  110. ALGORITHMS = {
  111. "clauset": _clauset_greedy_source_expansion,
  112. }
  113. def greedy_source_expansion(G, *, source, cutoff=None, method="clauset"):
  114. r"""Find the local community around a source node.
  115. Find the local community around a source node using Greedy Source
  116. Expansion. Greedy Source Expansion generally identifies a local community
  117. starting from the source node and expands it based on the criteria of the
  118. chosen algorithm.
  119. The algorithm is specified with the `method` keyword argument.
  120. * `"clauset"` [1]_ uses local modularity gain to determine local communities.
  121. The algorithm adds nbring nodes that maximize local modularity to the
  122. community iteratively, stopping when no additional nodes improve the modularity
  123. or when a predefined cutoff is reached.
  124. Local modularity measures the density of edges within a community relative
  125. to the total graph. By focusing on local modularity, the algorithm efficiently
  126. uncovers communities around a specific node without requiring global
  127. optimization over the entire graph.
  128. The algorithm assumes that the graph $G$ consists of a known community $C$ and
  129. an unknown set of nodes $U$, which are adjacent to $C$ . The boundary of the
  130. community $B$, consists of nodes in $C$ that have at least one nbr in $U$.
  131. Mathematically, the local modularity is expressed as:
  132. .. math::
  133. R = \frac{I}{T}
  134. where $T$ is the number of edges with one or more endpoints in $B$, and $I$ is the
  135. number of those edges with neither endpoint in $U$.
  136. Parameters
  137. ----------
  138. G : NetworkX graph
  139. The input graph.
  140. source : node
  141. The source node from which the community expansion begins.
  142. cutoff : int, optional (default=None)
  143. The maximum number of nodes to include in the community. If None, the algorithm
  144. expands until no further modularity gain can be made.
  145. method : string, optional (default='clauset')
  146. The algorithm to use to carry out greedy source expansion.
  147. Supported options: 'clauset'. Other inputs produce a ValueError
  148. Returns
  149. -------
  150. set
  151. A set of nodes representing the local community around the source node.
  152. Examples
  153. --------
  154. >>> G = nx.karate_club_graph()
  155. >>> nx.community.greedy_source_expansion(G, source=16)
  156. {16, 0, 4, 5, 6, 10}
  157. Notes
  158. -----
  159. This algorithm is designed for detecting local communities around a specific node,
  160. which is useful for large networks where global community detection is computationally
  161. expensive.
  162. The result of the algorithm may vary based on the structure of the graph, the choice of
  163. the source node, and the presence of ties between nodes during the greedy expansion process.
  164. References
  165. ----------
  166. .. [1] Clauset, Aaron. Finding local community structure in networks.
  167. Physical Review E—Statistical, Nonlinear, and Soft Matter Physics 72, no. 2 (2005): 026132.
  168. https://arxiv.org/pdf/physics/0503036
  169. """
  170. try:
  171. algo = ALGORITHMS[method]
  172. except KeyError as e:
  173. raise ValueError(f"{method} is not a valid choice for an algorithm.") from e
  174. return algo(G, source=source, cutoff=cutoff)