divisive.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. import functools
  2. import networkx as nx
  3. __all__ = [
  4. "edge_betweenness_partition",
  5. "edge_current_flow_betweenness_partition",
  6. ]
  7. @nx._dispatchable(edge_attrs="weight")
  8. def edge_betweenness_partition(G, number_of_sets, *, weight=None):
  9. """Partition created by iteratively removing the highest edge betweenness edge.
  10. This algorithm works by calculating the edge betweenness for all
  11. edges and removing the edge with the highest value. It is then
  12. determined whether the graph has been broken into at least
  13. `number_of_sets` connected components.
  14. If not the process is repeated.
  15. Parameters
  16. ----------
  17. G : NetworkX Graph, DiGraph or MultiGraph
  18. Graph to be partitioned
  19. number_of_sets : int
  20. Number of sets in the desired partition of the graph
  21. weight : key, optional, default=None
  22. The key to use if using weights for edge betweenness calculation
  23. Returns
  24. -------
  25. C : list of sets
  26. Partition of the nodes of G
  27. Raises
  28. ------
  29. NetworkXError
  30. If number_of_sets is <= 0 or if number_of_sets > len(G)
  31. Examples
  32. --------
  33. >>> G = nx.karate_club_graph()
  34. >>> part = nx.community.edge_betweenness_partition(G, 2)
  35. >>> {0, 1, 3, 4, 5, 6, 7, 10, 11, 12, 13, 16, 17, 19, 21} in part
  36. True
  37. >>> {
  38. ... 2,
  39. ... 8,
  40. ... 9,
  41. ... 14,
  42. ... 15,
  43. ... 18,
  44. ... 20,
  45. ... 22,
  46. ... 23,
  47. ... 24,
  48. ... 25,
  49. ... 26,
  50. ... 27,
  51. ... 28,
  52. ... 29,
  53. ... 30,
  54. ... 31,
  55. ... 32,
  56. ... 33,
  57. ... } in part
  58. True
  59. See Also
  60. --------
  61. edge_current_flow_betweenness_partition
  62. Notes
  63. -----
  64. This algorithm is fairly slow, as both the calculation of connected
  65. components and edge betweenness relies on all pairs shortest
  66. path algorithms. They could potentially be combined to cut down
  67. on overall computation time.
  68. References
  69. ----------
  70. .. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports
  71. Volume 486, Issue 3-5 p. 75-174
  72. http://arxiv.org/abs/0906.0612
  73. """
  74. if number_of_sets <= 0:
  75. raise nx.NetworkXError("number_of_sets must be >0")
  76. if number_of_sets == 1:
  77. return [set(G)]
  78. if number_of_sets == len(G):
  79. return [{n} for n in G]
  80. if number_of_sets > len(G):
  81. raise nx.NetworkXError("number_of_sets must be <= len(G)")
  82. H = G.copy()
  83. partition = list(nx.connected_components(H))
  84. while len(partition) < number_of_sets:
  85. ranking = nx.edge_betweenness_centrality(H, weight=weight)
  86. edge = max(ranking, key=ranking.get)
  87. H.remove_edge(*edge)
  88. partition = list(nx.connected_components(H))
  89. return partition
  90. @nx._dispatchable(edge_attrs="weight")
  91. def edge_current_flow_betweenness_partition(G, number_of_sets, *, weight=None):
  92. """Partition created by removing the highest edge current flow betweenness edge.
  93. This algorithm works by calculating the edge current flow
  94. betweenness for all edges and removing the edge with the
  95. highest value. It is then determined whether the graph has
  96. been broken into at least `number_of_sets` connected
  97. components. If not the process is repeated.
  98. Parameters
  99. ----------
  100. G : NetworkX Graph, DiGraph or MultiGraph
  101. Graph to be partitioned
  102. number_of_sets : int
  103. Number of sets in the desired partition of the graph
  104. weight : key, optional (default=None)
  105. The edge attribute key to use as weights for
  106. edge current flow betweenness calculations
  107. Returns
  108. -------
  109. C : list of sets
  110. Partition of G
  111. Raises
  112. ------
  113. NetworkXError
  114. If number_of_sets is <= 0 or number_of_sets > len(G)
  115. Examples
  116. --------
  117. >>> G = nx.karate_club_graph()
  118. >>> part = nx.community.edge_current_flow_betweenness_partition(G, 2)
  119. >>> {0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 16, 17, 19, 21} in part
  120. True
  121. >>> {8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33} in part
  122. True
  123. See Also
  124. --------
  125. edge_betweenness_partition
  126. Notes
  127. -----
  128. This algorithm is extremely slow, as the recalculation of the edge
  129. current flow betweenness is extremely slow.
  130. References
  131. ----------
  132. .. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports
  133. Volume 486, Issue 3-5 p. 75-174
  134. http://arxiv.org/abs/0906.0612
  135. """
  136. if number_of_sets <= 0:
  137. raise nx.NetworkXError("number_of_sets must be >0")
  138. elif number_of_sets == 1:
  139. return [set(G)]
  140. elif number_of_sets == len(G):
  141. return [{n} for n in G]
  142. elif number_of_sets > len(G):
  143. raise nx.NetworkXError("number_of_sets must be <= len(G)")
  144. rank = functools.partial(
  145. nx.edge_current_flow_betweenness_centrality, normalized=False, weight=weight
  146. )
  147. # current flow requires a connected network so we track the components explicitly
  148. H = G.copy()
  149. partition = list(nx.connected_components(H))
  150. if len(partition) > 1:
  151. Hcc_subgraphs = [H.subgraph(cc).copy() for cc in partition]
  152. else:
  153. Hcc_subgraphs = [H]
  154. ranking = {}
  155. for Hcc in Hcc_subgraphs:
  156. ranking.update(rank(Hcc))
  157. while len(partition) < number_of_sets:
  158. edge = max(ranking, key=ranking.get)
  159. for cc, Hcc in zip(partition, Hcc_subgraphs):
  160. if edge[0] in cc:
  161. Hcc.remove_edge(*edge)
  162. del ranking[edge]
  163. splitcc_list = list(nx.connected_components(Hcc))
  164. if len(splitcc_list) > 1:
  165. # there are 2 connected components. split off smaller one
  166. cc_new = min(splitcc_list, key=len)
  167. Hcc_new = Hcc.subgraph(cc_new).copy()
  168. # update edge rankings for Hcc_new
  169. newranks = rank(Hcc_new)
  170. for e, r in newranks.items():
  171. ranking[e if e in ranking else e[::-1]] = r
  172. # append new cc and Hcc to their lists.
  173. partition.append(cc_new)
  174. Hcc_subgraphs.append(Hcc_new)
  175. # leave existing cc and Hcc in their lists, but shrink them
  176. Hcc.remove_nodes_from(cc_new)
  177. cc.difference_update(cc_new)
  178. # update edge rankings for Hcc whether it was split or not
  179. newranks = rank(Hcc)
  180. for e, r in newranks.items():
  181. ranking[e if e in ranking else e[::-1]] = r
  182. break
  183. return partition