sparsifiers.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. """Functions for computing sparsifiers of graphs."""
  2. import math
  3. import networkx as nx
  4. from networkx.utils import not_implemented_for, py_random_state
  5. __all__ = ["spanner"]
  6. @not_implemented_for("directed")
  7. @not_implemented_for("multigraph")
  8. @py_random_state(3)
  9. @nx._dispatchable(edge_attrs="weight", returns_graph=True)
  10. def spanner(G, stretch, weight=None, seed=None):
  11. """Returns a spanner of the given graph with the given stretch.
  12. A spanner of a graph G = (V, E) with stretch t is a subgraph
  13. H = (V, E_S) such that E_S is a subset of E and the distance between
  14. any pair of nodes in H is at most t times the distance between the
  15. nodes in G.
  16. Parameters
  17. ----------
  18. G : NetworkX graph
  19. An undirected simple graph.
  20. stretch : float
  21. The stretch of the spanner.
  22. weight : object
  23. The edge attribute to use as distance.
  24. seed : integer, random_state, or None (default)
  25. Indicator of random number generation state.
  26. See :ref:`Randomness<randomness>`.
  27. Returns
  28. -------
  29. NetworkX graph
  30. A spanner of the given graph with the given stretch.
  31. Raises
  32. ------
  33. ValueError
  34. If a stretch less than 1 is given.
  35. Notes
  36. -----
  37. This function implements the spanner algorithm by Baswana and Sen,
  38. see [1].
  39. This algorithm is a randomized las vegas algorithm: The expected
  40. running time is O(km) where k = (stretch + 1) // 2 and m is the
  41. number of edges in G. The returned graph is always a spanner of the
  42. given graph with the specified stretch. For weighted graphs the
  43. number of edges in the spanner is O(k * n^(1 + 1 / k)) where k is
  44. defined as above and n is the number of nodes in G. For unweighted
  45. graphs the number of edges is O(n^(1 + 1 / k) + kn).
  46. References
  47. ----------
  48. [1] S. Baswana, S. Sen. A Simple and Linear Time Randomized
  49. Algorithm for Computing Sparse Spanners in Weighted Graphs.
  50. Random Struct. Algorithms 30(4): 532-563 (2007).
  51. """
  52. if stretch < 1:
  53. raise ValueError("stretch must be at least 1")
  54. k = (stretch + 1) // 2
  55. # initialize spanner H with empty edge set
  56. H = nx.empty_graph()
  57. H.add_nodes_from(G.nodes)
  58. # phase 1: forming the clusters
  59. # the residual graph has V' from the paper as its node set
  60. # and E' from the paper as its edge set
  61. residual_graph = _setup_residual_graph(G, weight)
  62. # clustering is a dictionary that maps nodes in a cluster to the
  63. # cluster center
  64. clustering = {v: v for v in G.nodes}
  65. sample_prob = math.pow(G.number_of_nodes(), -1 / k)
  66. size_limit = 2 * math.pow(G.number_of_nodes(), 1 + 1 / k)
  67. i = 0
  68. while i < k - 1:
  69. # step 1: sample centers
  70. sampled_centers = set()
  71. for center in set(clustering.values()):
  72. if seed.random() < sample_prob:
  73. sampled_centers.add(center)
  74. # combined loop for steps 2 and 3
  75. edges_to_add = set()
  76. edges_to_remove = set()
  77. new_clustering = {}
  78. for v in residual_graph.nodes:
  79. if clustering[v] in sampled_centers:
  80. continue
  81. # step 2: find neighboring (sampled) clusters and
  82. # lightest edges to them
  83. lightest_edge_neighbor, lightest_edge_weight = _lightest_edge_dicts(
  84. residual_graph, clustering, v
  85. )
  86. neighboring_sampled_centers = (
  87. set(lightest_edge_weight.keys()) & sampled_centers
  88. )
  89. # step 3: add edges to spanner
  90. if not neighboring_sampled_centers:
  91. # connect to each neighboring center via lightest edge
  92. for neighbor in lightest_edge_neighbor.values():
  93. edges_to_add.add((v, neighbor))
  94. # remove all incident edges
  95. for neighbor in residual_graph.adj[v]:
  96. edges_to_remove.add((v, neighbor))
  97. else: # there is a neighboring sampled center
  98. closest_center = min(
  99. neighboring_sampled_centers, key=lightest_edge_weight.get
  100. )
  101. closest_center_weight = lightest_edge_weight[closest_center]
  102. closest_center_neighbor = lightest_edge_neighbor[closest_center]
  103. edges_to_add.add((v, closest_center_neighbor))
  104. new_clustering[v] = closest_center
  105. # connect to centers with edge weight less than
  106. # closest_center_weight
  107. for center, edge_weight in lightest_edge_weight.items():
  108. if edge_weight < closest_center_weight:
  109. neighbor = lightest_edge_neighbor[center]
  110. edges_to_add.add((v, neighbor))
  111. # remove edges to centers with edge weight less than
  112. # closest_center_weight
  113. for neighbor in residual_graph.adj[v]:
  114. nbr_cluster = clustering[neighbor]
  115. nbr_weight = lightest_edge_weight[nbr_cluster]
  116. if (
  117. nbr_cluster == closest_center
  118. or nbr_weight < closest_center_weight
  119. ):
  120. edges_to_remove.add((v, neighbor))
  121. # check whether iteration added too many edges to spanner,
  122. # if so repeat
  123. if len(edges_to_add) > size_limit:
  124. # an iteration is repeated O(1) times on expectation
  125. continue
  126. # iteration succeeded
  127. i = i + 1
  128. # actually add edges to spanner
  129. for u, v in edges_to_add:
  130. _add_edge_to_spanner(H, residual_graph, u, v, weight)
  131. # actually delete edges from residual graph
  132. residual_graph.remove_edges_from(edges_to_remove)
  133. # copy old clustering data to new_clustering
  134. for node, center in clustering.items():
  135. if center in sampled_centers:
  136. new_clustering[node] = center
  137. clustering = new_clustering
  138. # step 4: remove intra-cluster edges
  139. for u in residual_graph.nodes:
  140. for v in list(residual_graph.adj[u]):
  141. if clustering[u] == clustering[v]:
  142. residual_graph.remove_edge(u, v)
  143. # update residual graph node set
  144. for v in list(residual_graph.nodes):
  145. if v not in clustering:
  146. residual_graph.remove_node(v)
  147. # phase 2: vertex-cluster joining
  148. for v in residual_graph.nodes:
  149. lightest_edge_neighbor, _ = _lightest_edge_dicts(residual_graph, clustering, v)
  150. for neighbor in lightest_edge_neighbor.values():
  151. _add_edge_to_spanner(H, residual_graph, v, neighbor, weight)
  152. return H
  153. def _setup_residual_graph(G, weight):
  154. """Setup residual graph as a copy of G with unique edges weights.
  155. The node set of the residual graph corresponds to the set V' from
  156. the Baswana-Sen paper and the edge set corresponds to the set E'
  157. from the paper.
  158. This function associates distinct weights to the edges of the
  159. residual graph (even for unweighted input graphs), as required by
  160. the algorithm.
  161. Parameters
  162. ----------
  163. G : NetworkX graph
  164. An undirected simple graph.
  165. weight : object
  166. The edge attribute to use as distance.
  167. Returns
  168. -------
  169. NetworkX graph
  170. The residual graph used for the Baswana-Sen algorithm.
  171. """
  172. residual_graph = G.copy()
  173. # establish unique edge weights, even for unweighted graphs
  174. for u, v in G.edges():
  175. if not weight:
  176. residual_graph[u][v]["weight"] = (id(u), id(v))
  177. else:
  178. residual_graph[u][v]["weight"] = (G[u][v][weight], id(u), id(v))
  179. return residual_graph
  180. def _lightest_edge_dicts(residual_graph, clustering, node):
  181. """Find the lightest edge to each cluster.
  182. Searches for the minimum-weight edge to each cluster adjacent to
  183. the given node.
  184. Parameters
  185. ----------
  186. residual_graph : NetworkX graph
  187. The residual graph used by the Baswana-Sen algorithm.
  188. clustering : dictionary
  189. The current clustering of the nodes.
  190. node : node
  191. The node from which the search originates.
  192. Returns
  193. -------
  194. lightest_edge_neighbor, lightest_edge_weight : dictionary, dictionary
  195. lightest_edge_neighbor is a dictionary that maps a center C to
  196. a node v in the corresponding cluster such that the edge from
  197. the given node to v is the lightest edge from the given node to
  198. any node in cluster. lightest_edge_weight maps a center C to the
  199. weight of the aforementioned edge.
  200. Notes
  201. -----
  202. If a cluster has no node that is adjacent to the given node in the
  203. residual graph then the center of the cluster is not a key in the
  204. returned dictionaries.
  205. """
  206. lightest_edge_neighbor = {}
  207. lightest_edge_weight = {}
  208. for neighbor in residual_graph.adj[node]:
  209. nbr_center = clustering[neighbor]
  210. weight = residual_graph[node][neighbor]["weight"]
  211. if (
  212. nbr_center not in lightest_edge_weight
  213. or weight < lightest_edge_weight[nbr_center]
  214. ):
  215. lightest_edge_neighbor[nbr_center] = neighbor
  216. lightest_edge_weight[nbr_center] = weight
  217. return lightest_edge_neighbor, lightest_edge_weight
  218. def _add_edge_to_spanner(H, residual_graph, u, v, weight):
  219. """Add the edge {u, v} to the spanner H and take weight from
  220. the residual graph.
  221. Parameters
  222. ----------
  223. H : NetworkX graph
  224. The spanner under construction.
  225. residual_graph : NetworkX graph
  226. The residual graph used by the Baswana-Sen algorithm. The weight
  227. for the edge is taken from this graph.
  228. u : node
  229. One endpoint of the edge.
  230. v : node
  231. The other endpoint of the edge.
  232. weight : object
  233. The edge attribute to use as distance.
  234. """
  235. H.add_edge(u, v)
  236. if weight:
  237. H[u][v][weight] = residual_graph[u][v]["weight"][0]