cuts.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. """Functions for finding and evaluating cuts in a graph."""
  2. from itertools import chain
  3. import networkx as nx
  4. __all__ = [
  5. "boundary_expansion",
  6. "conductance",
  7. "cut_size",
  8. "edge_expansion",
  9. "mixing_expansion",
  10. "node_expansion",
  11. "normalized_cut_size",
  12. "volume",
  13. ]
  14. # TODO STILL NEED TO UPDATE ALL THE DOCUMENTATION!
  15. @nx._dispatchable(edge_attrs="weight")
  16. def cut_size(G, S, T=None, weight=None):
  17. """Returns the size of the cut between two sets of nodes.
  18. A *cut* is a partition of the nodes of a graph into two sets. The
  19. *cut size* is the sum of the weights of the edges "between" the two
  20. sets of nodes.
  21. Parameters
  22. ----------
  23. G : NetworkX graph
  24. S : collection
  25. A collection of nodes in `G`.
  26. T : collection
  27. A collection of nodes in `G`. If not specified, this is taken to
  28. be the set complement of `S`.
  29. weight : object
  30. Edge attribute key to use as weight. If not specified, edges
  31. have weight one.
  32. Returns
  33. -------
  34. number
  35. Total weight of all edges from nodes in set `S` to nodes in
  36. set `T` (and, in the case of directed graphs, all edges from
  37. nodes in `T` to nodes in `S`).
  38. Examples
  39. --------
  40. In the graph with two cliques joined by a single edges, the natural
  41. bipartition of the graph into two blocks, one for each clique,
  42. yields a cut of weight one:
  43. >>> G = nx.barbell_graph(3, 0)
  44. >>> S = {0, 1, 2}
  45. >>> T = {3, 4, 5}
  46. >>> nx.cut_size(G, S, T)
  47. 1
  48. Each parallel edge in a multigraph is counted when determining the
  49. cut size:
  50. >>> G = nx.MultiGraph(["ab", "ab"])
  51. >>> S = {"a"}
  52. >>> T = {"b"}
  53. >>> nx.cut_size(G, S, T)
  54. 2
  55. Notes
  56. -----
  57. In a multigraph, the cut size is the total weight of edges including
  58. multiplicity.
  59. """
  60. edges = nx.edge_boundary(G, S, T, data=weight, default=1)
  61. if G.is_directed():
  62. edges = chain(edges, nx.edge_boundary(G, T, S, data=weight, default=1))
  63. return sum(weight for u, v, weight in edges)
  64. @nx._dispatchable(edge_attrs="weight")
  65. def volume(G, S, weight=None):
  66. """Returns the volume of a set of nodes.
  67. The *volume* of a set *S* is the sum of the (out-)degrees of nodes
  68. in *S* (taking into account parallel edges in multigraphs). [1]
  69. Parameters
  70. ----------
  71. G : NetworkX graph
  72. S : collection
  73. A collection of nodes in `G`.
  74. weight : object
  75. Edge attribute key to use as weight. If not specified, edges
  76. have weight one.
  77. Returns
  78. -------
  79. number
  80. The volume of the set of nodes represented by `S` in the graph
  81. `G`.
  82. See also
  83. --------
  84. conductance
  85. cut_size
  86. edge_expansion
  87. edge_boundary
  88. normalized_cut_size
  89. References
  90. ----------
  91. .. [1] David Gleich.
  92. *Hierarchical Directed Spectral Graph Partitioning*.
  93. <https://www.cs.purdue.edu/homes/dgleich/publications/Gleich%202005%20-%20hierarchical%20directed%20spectral.pdf>
  94. """
  95. degree = G.out_degree if G.is_directed() else G.degree
  96. return sum(d for v, d in degree(S, weight=weight))
  97. @nx._dispatchable(edge_attrs="weight")
  98. def normalized_cut_size(G, S, T=None, weight=None):
  99. """Returns the normalized size of the cut between two sets of nodes.
  100. The *normalized cut size* is the cut size times the sum of the
  101. reciprocal sizes of the volumes of the two sets. [1]
  102. Parameters
  103. ----------
  104. G : NetworkX graph
  105. S : collection
  106. A collection of nodes in `G`.
  107. T : collection
  108. A collection of nodes in `G`.
  109. weight : object
  110. Edge attribute key to use as weight. If not specified, edges
  111. have weight one.
  112. Returns
  113. -------
  114. number
  115. The normalized cut size between the two sets `S` and `T`.
  116. Notes
  117. -----
  118. In a multigraph, the cut size is the total weight of edges including
  119. multiplicity.
  120. See also
  121. --------
  122. conductance
  123. cut_size
  124. edge_expansion
  125. volume
  126. References
  127. ----------
  128. .. [1] David Gleich.
  129. *Hierarchical Directed Spectral Graph Partitioning*.
  130. <https://www.cs.purdue.edu/homes/dgleich/publications/Gleich%202005%20-%20hierarchical%20directed%20spectral.pdf>
  131. """
  132. if T is None:
  133. T = set(G) - set(S)
  134. num_cut_edges = cut_size(G, S, T=T, weight=weight)
  135. volume_S = volume(G, S, weight=weight)
  136. volume_T = volume(G, T, weight=weight)
  137. return num_cut_edges * ((1 / volume_S) + (1 / volume_T))
  138. @nx._dispatchable(edge_attrs="weight")
  139. def conductance(G, S, T=None, weight=None):
  140. """Returns the conductance of two sets of nodes.
  141. The *conductance* is the quotient of the cut size and the smaller of
  142. the volumes of the two sets. [1]
  143. Parameters
  144. ----------
  145. G : NetworkX graph
  146. S : collection
  147. A collection of nodes in `G`.
  148. T : collection
  149. A collection of nodes in `G`.
  150. weight : object
  151. Edge attribute key to use as weight. If not specified, edges
  152. have weight one.
  153. Returns
  154. -------
  155. number
  156. The conductance between the two sets `S` and `T`.
  157. See also
  158. --------
  159. cut_size
  160. edge_expansion
  161. normalized_cut_size
  162. volume
  163. References
  164. ----------
  165. .. [1] David Gleich.
  166. *Hierarchical Directed Spectral Graph Partitioning*.
  167. <https://www.cs.purdue.edu/homes/dgleich/publications/Gleich%202005%20-%20hierarchical%20directed%20spectral.pdf>
  168. """
  169. if T is None:
  170. T = set(G) - set(S)
  171. num_cut_edges = cut_size(G, S, T, weight=weight)
  172. volume_S = volume(G, S, weight=weight)
  173. volume_T = volume(G, T, weight=weight)
  174. return num_cut_edges / min(volume_S, volume_T)
  175. @nx._dispatchable(edge_attrs="weight")
  176. def edge_expansion(G, S, T=None, weight=None):
  177. """Returns the edge expansion between two node sets.
  178. The *edge expansion* is the quotient of the cut size and the smaller
  179. of the cardinalities of the two sets. [1]
  180. Parameters
  181. ----------
  182. G : NetworkX graph
  183. S : collection
  184. A collection of nodes in `G`.
  185. T : collection
  186. A collection of nodes in `G`.
  187. weight : object
  188. Edge attribute key to use as weight. If not specified, edges
  189. have weight one.
  190. Returns
  191. -------
  192. number
  193. The edge expansion between the two sets `S` and `T`.
  194. See also
  195. --------
  196. boundary_expansion
  197. mixing_expansion
  198. node_expansion
  199. References
  200. ----------
  201. .. [1] Fan Chung.
  202. *Spectral Graph Theory*.
  203. (CBMS Regional Conference Series in Mathematics, No. 92),
  204. American Mathematical Society, 1997, ISBN 0-8218-0315-8
  205. <http://www.math.ucsd.edu/~fan/research/revised.html>
  206. """
  207. if T is None:
  208. T = set(G) - set(S)
  209. num_cut_edges = cut_size(G, S, T=T, weight=weight)
  210. return num_cut_edges / min(len(S), len(T))
  211. @nx._dispatchable(edge_attrs="weight")
  212. def mixing_expansion(G, S, T=None, weight=None):
  213. """Returns the mixing expansion between two node sets.
  214. The *mixing expansion* is the quotient of the cut size and twice the
  215. number of edges in the graph. [1]
  216. Parameters
  217. ----------
  218. G : NetworkX graph
  219. S : collection
  220. A collection of nodes in `G`.
  221. T : collection
  222. A collection of nodes in `G`.
  223. weight : object
  224. Edge attribute key to use as weight. If not specified, edges
  225. have weight one.
  226. Returns
  227. -------
  228. number
  229. The mixing expansion between the two sets `S` and `T`.
  230. See also
  231. --------
  232. boundary_expansion
  233. edge_expansion
  234. node_expansion
  235. References
  236. ----------
  237. .. [1] Vadhan, Salil P.
  238. "Pseudorandomness."
  239. *Foundations and Trends
  240. in Theoretical Computer Science* 7.1–3 (2011): 1–336.
  241. <https://doi.org/10.1561/0400000010>
  242. """
  243. num_cut_edges = cut_size(G, S, T=T, weight=weight)
  244. num_total_edges = G.number_of_edges()
  245. return num_cut_edges / (2 * num_total_edges)
  246. # TODO What is the generalization to two arguments, S and T? Does the
  247. # denominator become `min(len(S), len(T))`?
  248. @nx._dispatchable
  249. def node_expansion(G, S):
  250. """Returns the node expansion of the set `S`.
  251. The *node expansion* is the quotient of the size of the node
  252. boundary of *S* and the cardinality of *S*. [1]
  253. Parameters
  254. ----------
  255. G : NetworkX graph
  256. S : collection
  257. A collection of nodes in `G`.
  258. Returns
  259. -------
  260. number
  261. The node expansion of the set `S`.
  262. See also
  263. --------
  264. boundary_expansion
  265. edge_expansion
  266. mixing_expansion
  267. References
  268. ----------
  269. .. [1] Vadhan, Salil P.
  270. "Pseudorandomness."
  271. *Foundations and Trends
  272. in Theoretical Computer Science* 7.1–3 (2011): 1–336.
  273. <https://doi.org/10.1561/0400000010>
  274. """
  275. neighborhood = set(chain.from_iterable(G.neighbors(v) for v in S))
  276. return len(neighborhood) / len(S)
  277. @nx._dispatchable
  278. def boundary_expansion(G, S):
  279. """Returns the boundary expansion of the set `S`.
  280. The *boundary expansion* of a set `S` is the ratio between the size of its
  281. node boundary and the cardinality of the set itself [1]_ .
  282. Parameters
  283. ----------
  284. G : NetworkX graph
  285. The input graph.
  286. S : collection
  287. A collection of nodes in `G`.
  288. Returns
  289. -------
  290. number
  291. The boundary expansion ratio: size of node boundary / size of `S`.
  292. Examples
  293. --------
  294. The node boundary is {2, 3} (size 2), divided by ``|S|=2``:
  295. >>> G = nx.cycle_graph(4)
  296. >>> S = {0, 1}
  297. >>> nx.boundary_expansion(G, S)
  298. 1.0
  299. For disconnected sets, e.g. here where the node boundary is ``{1, 3, 5}``:
  300. >>> G = nx.cycle_graph(6)
  301. >>> S = {0, 2, 4}
  302. >>> nx.boundary_expansion(G, S)
  303. 1.0
  304. See also
  305. --------
  306. :func:`~networkx.algorithms.boundary.node_boundary`
  307. edge_expansion
  308. mixing_expansion
  309. node_expansion
  310. Notes
  311. -----
  312. The node boundary is defined as all nodes not in `S` that are adjacent to
  313. nodes in `S`.
  314. References
  315. ----------
  316. .. [1] Vadhan, Salil P.
  317. "Pseudorandomness." *Foundations and Trends in Theoretical Computer Science*
  318. 7.1–3 (2011): 1–336. <https://doi.org/10.1561/0400000010>
  319. """
  320. return len(nx.node_boundary(G, S)) / len(S)