kcomponents.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. """
  2. Moody and White algorithm for k-components
  3. """
  4. from collections import defaultdict
  5. from itertools import combinations
  6. from operator import itemgetter
  7. import networkx as nx
  8. # Define the default maximum flow function.
  9. from networkx.algorithms.flow import edmonds_karp
  10. from networkx.utils import not_implemented_for
  11. default_flow_func = edmonds_karp
  12. __all__ = ["k_components"]
  13. @not_implemented_for("directed")
  14. @nx._dispatchable
  15. def k_components(G, flow_func=None):
  16. r"""Returns the k-component structure of a graph G.
  17. A `k`-component is a maximal subgraph of a graph G that has, at least,
  18. node connectivity `k`: we need to remove at least `k` nodes to break it
  19. into more components. `k`-components have an inherent hierarchical
  20. structure because they are nested in terms of connectivity: a connected
  21. graph can contain several 2-components, each of which can contain
  22. one or more 3-components, and so forth.
  23. Parameters
  24. ----------
  25. G : NetworkX graph
  26. flow_func : function
  27. Function to perform the underlying flow computations. Default value
  28. :meth:`edmonds_karp`. This function performs better in sparse graphs with
  29. right tailed degree distributions. :meth:`shortest_augmenting_path` will
  30. perform better in denser graphs.
  31. Returns
  32. -------
  33. k_components : dict
  34. Dictionary with all connectivity levels `k` in the input Graph as keys
  35. and a list of sets of nodes that form a k-component of level `k` as
  36. values.
  37. Raises
  38. ------
  39. NetworkXNotImplemented
  40. If the input graph is directed.
  41. Examples
  42. --------
  43. >>> # Petersen graph has 10 nodes and it is triconnected, thus all
  44. >>> # nodes are in a single component on all three connectivity levels
  45. >>> G = nx.petersen_graph()
  46. >>> k_components = nx.k_components(G)
  47. Notes
  48. -----
  49. Moody and White [1]_ (appendix A) provide an algorithm for identifying
  50. k-components in a graph, which is based on Kanevsky's algorithm [2]_
  51. for finding all minimum-size node cut-sets of a graph (implemented in
  52. :meth:`all_node_cuts` function):
  53. 1. Compute node connectivity, k, of the input graph G.
  54. 2. Identify all k-cutsets at the current level of connectivity using
  55. Kanevsky's algorithm.
  56. 3. Generate new graph components based on the removal of
  57. these cutsets. Nodes in a cutset belong to both sides
  58. of the induced cut.
  59. 4. If the graph is neither complete nor trivial, return to 1;
  60. else end.
  61. This implementation also uses some heuristics (see [3]_ for details)
  62. to speed up the computation.
  63. See also
  64. --------
  65. node_connectivity
  66. all_node_cuts
  67. biconnected_components : special case of this function when k=2
  68. k_edge_components : similar to this function, but uses edge-connectivity
  69. instead of node-connectivity
  70. References
  71. ----------
  72. .. [1] Moody, J. and D. White (2003). Social cohesion and embeddedness:
  73. A hierarchical conception of social groups.
  74. American Sociological Review 68(1), 103--28.
  75. http://www2.asanet.org/journals/ASRFeb03MoodyWhite.pdf
  76. .. [2] Kanevsky, A. (1993). Finding all minimum-size separating vertex
  77. sets in a graph. Networks 23(6), 533--541.
  78. http://onlinelibrary.wiley.com/doi/10.1002/net.3230230604/abstract
  79. .. [3] Torrents, J. and F. Ferraro (2015). Structural Cohesion:
  80. Visualization and Heuristics for Fast Computation.
  81. https://arxiv.org/pdf/1503.04476v1
  82. """
  83. # Dictionary with connectivity level (k) as keys and a list of
  84. # sets of nodes that form a k-component as values. Note that
  85. # k-components can overlap (but only k - 1 nodes).
  86. k_components = defaultdict(list)
  87. # Define default flow function
  88. if flow_func is None:
  89. flow_func = default_flow_func
  90. # Bicomponents as a base to check for higher order k-components
  91. for component in nx.connected_components(G):
  92. # isolated nodes have connectivity 0
  93. comp = set(component)
  94. if len(comp) > 1:
  95. k_components[1].append(comp)
  96. bicomponents = [G.subgraph(c) for c in nx.biconnected_components(G)]
  97. for bicomponent in bicomponents:
  98. bicomp = set(bicomponent)
  99. # avoid considering dyads as bicomponents
  100. if len(bicomp) > 2:
  101. k_components[2].append(bicomp)
  102. for B in bicomponents:
  103. if len(B) <= 2:
  104. continue
  105. k = nx.node_connectivity(B, flow_func=flow_func)
  106. if k > 2:
  107. k_components[k].append(set(B))
  108. # Perform cuts in a DFS like order.
  109. cuts = list(nx.all_node_cuts(B, k=k, flow_func=flow_func))
  110. stack = [(k, _generate_partition(B, cuts, k))]
  111. while stack:
  112. (parent_k, partition) = stack[-1]
  113. try:
  114. nodes = next(partition)
  115. C = B.subgraph(nodes)
  116. this_k = nx.node_connectivity(C, flow_func=flow_func)
  117. if this_k > parent_k and this_k > 2:
  118. k_components[this_k].append(set(C))
  119. cuts = list(nx.all_node_cuts(C, k=this_k, flow_func=flow_func))
  120. if cuts:
  121. stack.append((this_k, _generate_partition(C, cuts, this_k)))
  122. except StopIteration:
  123. stack.pop()
  124. # This is necessary because k-components may only be reported at their
  125. # maximum k level. But we want to return a dictionary in which keys are
  126. # connectivity levels and values list of sets of components, without
  127. # skipping any connectivity level. Also, it's possible that subsets of
  128. # an already detected k-component appear at a level k. Checking for this
  129. # in the while loop above penalizes the common case. Thus we also have to
  130. # _consolidate all connectivity levels in _reconstruct_k_components.
  131. return _reconstruct_k_components(k_components)
  132. def _consolidate(sets, k):
  133. """Merge sets that share k or more elements.
  134. See: http://rosettacode.org/wiki/Set_consolidation
  135. The iterative python implementation posted there is
  136. faster than this because of the overhead of building a
  137. Graph and calling nx.connected_components, but it's not
  138. clear for us if we can use it in NetworkX because there
  139. is no licence for the code.
  140. """
  141. G = nx.Graph()
  142. nodes = dict(enumerate(sets))
  143. G.add_nodes_from(nodes)
  144. G.add_edges_from(
  145. (u, v) for u, v in combinations(nodes, 2) if len(nodes[u] & nodes[v]) >= k
  146. )
  147. for component in nx.connected_components(G):
  148. yield set.union(*[nodes[n] for n in component])
  149. def _generate_partition(G, cuts, k):
  150. def has_nbrs_in_partition(G, node, partition):
  151. return any(n in partition for n in G[node])
  152. components = []
  153. n_in_cuts = {n for cut in cuts for n in cut}
  154. nodes = {n for n, d in G.degree() if d > k} - n_in_cuts
  155. H = G.subgraph(nodes)
  156. for cc in map(set, nx.connected_components(H)):
  157. component = cc | {n for n in n_in_cuts if has_nbrs_in_partition(G, n, cc)}
  158. if len(component) < G.order():
  159. components.append(component)
  160. yield from _consolidate(components, k + 1)
  161. def _reconstruct_k_components(k_comps):
  162. result = {}
  163. max_k = max(k_comps) if k_comps else 0
  164. for k in range(max_k, 0, -1):
  165. if k == max_k:
  166. result[k] = list(_consolidate(k_comps[k], k))
  167. elif k not in k_comps:
  168. result[k] = list(_consolidate(result[k + 1], k))
  169. else:
  170. nodes_at_k = set.union(*k_comps[k])
  171. to_add = [c for c in result[k + 1] if any(n not in nodes_at_k for n in c)]
  172. if to_add:
  173. result[k] = list(_consolidate(k_comps[k] + to_add, k))
  174. else:
  175. result[k] = list(_consolidate(k_comps[k], k))
  176. return result
  177. def build_k_number_dict(kcomps):
  178. return {
  179. node: k
  180. for k, comps in sorted(kcomps.items(), key=itemgetter(0))
  181. for comp in comps
  182. for node in comp
  183. }