cluster.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732
  1. """Algorithms to characterize the number of triangles in a graph."""
  2. from collections import Counter
  3. from itertools import chain, combinations
  4. import networkx as nx
  5. from networkx.utils import not_implemented_for
  6. __all__ = [
  7. "triangles",
  8. "all_triangles",
  9. "average_clustering",
  10. "clustering",
  11. "transitivity",
  12. "square_clustering",
  13. "generalized_degree",
  14. ]
  15. @not_implemented_for("directed")
  16. @nx._dispatchable
  17. def triangles(G, nodes=None):
  18. """Compute the number of triangles.
  19. Finds the number of triangles that include a node as one vertex.
  20. Parameters
  21. ----------
  22. G : graph
  23. A networkx graph
  24. nodes : node, iterable of nodes, or None (default=None)
  25. If a singleton node, return the number of triangles for that node.
  26. If an iterable, compute the number of triangles for each of those nodes.
  27. If `None` (the default) compute the number of triangles for all nodes in `G`.
  28. Returns
  29. -------
  30. out : dict or int
  31. If `nodes` is a container of nodes, returns number of triangles keyed by node (dict).
  32. If `nodes` is a specific node, returns number of triangles for the node (int).
  33. Examples
  34. --------
  35. >>> G = nx.complete_graph(5)
  36. >>> print(nx.triangles(G, 0))
  37. 6
  38. >>> print(nx.triangles(G))
  39. {0: 6, 1: 6, 2: 6, 3: 6, 4: 6}
  40. >>> print(list(nx.triangles(G, [0, 1]).values()))
  41. [6, 6]
  42. The total number of unique triangles in `G` can be determined by summing
  43. the number of triangles for each node and dividing by 3 (because a given
  44. triangle gets counted three times, once for each of its nodes).
  45. >>> sum(nx.triangles(G).values()) // 3
  46. 10
  47. Notes
  48. -----
  49. Self loops are ignored.
  50. """
  51. if nodes is not None:
  52. # If `nodes` represents a single node, return only its number of triangles
  53. if nodes in G:
  54. return next(_triangles_and_degree_iter(G, nodes))[2] // 2
  55. # if `nodes` is a container of nodes, then return a
  56. # dictionary mapping node to number of triangles.
  57. return {v: t // 2 for v, d, t, _ in _triangles_and_degree_iter(G, nodes)}
  58. # if nodes is None, then compute triangles for the complete graph
  59. # dict used to avoid visiting the same nodes twice
  60. # this allows calculating/counting each triangle only once
  61. later_nbrs = {}
  62. # iterate over the nodes in a graph
  63. for node, neighbors in G.adjacency():
  64. later_nbrs[node] = {n for n in neighbors if n not in later_nbrs and n != node}
  65. # instantiate Counter for each node to include isolated nodes
  66. # add 1 to the count if a nodes neighbor's neighbor is also a neighbor
  67. triangle_counts = Counter(dict.fromkeys(G, 0))
  68. for node1, neighbors in later_nbrs.items():
  69. for node2 in neighbors:
  70. third_nodes = neighbors & later_nbrs[node2]
  71. m = len(third_nodes)
  72. triangle_counts[node1] += m
  73. triangle_counts[node2] += m
  74. triangle_counts.update(third_nodes)
  75. return dict(triangle_counts)
  76. @not_implemented_for("multigraph")
  77. def _triangles_and_degree_iter(G, nodes=None):
  78. """Return an iterator of (node, degree, triangles, generalized degree).
  79. This double counts triangles so you may want to divide by 2.
  80. See degree(), triangles() and generalized_degree() for definitions
  81. and details.
  82. """
  83. if nodes is None:
  84. nodes_nbrs = G.adj.items()
  85. else:
  86. nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes))
  87. for v, v_nbrs in nodes_nbrs:
  88. vs = set(v_nbrs) - {v}
  89. gen_degree = Counter(len(vs & (set(G[w]) - {w})) for w in vs)
  90. ntriangles = sum(k * val for k, val in gen_degree.items())
  91. yield (v, len(vs), ntriangles, gen_degree)
  92. @not_implemented_for("multigraph")
  93. def _weighted_triangles_and_degree_iter(G, nodes=None, weight="weight"):
  94. """Return an iterator of (node, degree, weighted_triangles).
  95. Used for weighted clustering.
  96. Note: this returns the geometric average weight of edges in the triangle.
  97. Also, each triangle is counted twice (each direction).
  98. So you may want to divide by 2.
  99. """
  100. import numpy as np
  101. if weight is None or G.number_of_edges() == 0:
  102. max_weight = 1
  103. else:
  104. max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True))
  105. if nodes is None:
  106. nodes_nbrs = G.adj.items()
  107. else:
  108. nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes))
  109. def wt(u, v):
  110. return G[u][v].get(weight, 1) / max_weight
  111. for i, nbrs in nodes_nbrs:
  112. inbrs = set(nbrs) - {i}
  113. weighted_triangles = 0
  114. seen = set()
  115. for j in inbrs:
  116. seen.add(j)
  117. # This avoids counting twice -- we double at the end.
  118. jnbrs = set(G[j]) - seen
  119. # Only compute the edge weight once, before the inner inner
  120. # loop.
  121. wij = wt(i, j)
  122. weighted_triangles += np.cbrt(
  123. [(wij * wt(j, k) * wt(k, i)) for k in inbrs & jnbrs]
  124. ).sum()
  125. yield (i, len(inbrs), 2 * float(weighted_triangles))
  126. @not_implemented_for("multigraph")
  127. def _directed_triangles_and_degree_iter(G, nodes=None):
  128. """Return an iterator of
  129. (node, total_degree, reciprocal_degree, directed_triangles).
  130. Used for directed clustering.
  131. Note that unlike `_triangles_and_degree_iter()`, this function counts
  132. directed triangles so does not count triangles twice.
  133. """
  134. nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes))
  135. for i, preds, succs in nodes_nbrs:
  136. ipreds = set(preds) - {i}
  137. isuccs = set(succs) - {i}
  138. directed_triangles = 0
  139. for j in chain(ipreds, isuccs):
  140. jpreds = set(G._pred[j]) - {j}
  141. jsuccs = set(G._succ[j]) - {j}
  142. directed_triangles += sum(
  143. 1
  144. for k in chain(
  145. (ipreds & jpreds),
  146. (ipreds & jsuccs),
  147. (isuccs & jpreds),
  148. (isuccs & jsuccs),
  149. )
  150. )
  151. dtotal = len(ipreds) + len(isuccs)
  152. dbidirectional = len(ipreds & isuccs)
  153. yield (i, dtotal, dbidirectional, directed_triangles)
  154. @not_implemented_for("multigraph")
  155. def _directed_weighted_triangles_and_degree_iter(G, nodes=None, weight="weight"):
  156. """Return an iterator of
  157. (node, total_degree, reciprocal_degree, directed_weighted_triangles).
  158. Used for directed weighted clustering.
  159. Note that unlike `_weighted_triangles_and_degree_iter()`, this function counts
  160. directed triangles so does not count triangles twice.
  161. """
  162. import numpy as np
  163. if weight is None or G.number_of_edges() == 0:
  164. max_weight = 1
  165. else:
  166. max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True))
  167. nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes))
  168. def wt(u, v):
  169. return G[u][v].get(weight, 1) / max_weight
  170. for i, preds, succs in nodes_nbrs:
  171. ipreds = set(preds) - {i}
  172. isuccs = set(succs) - {i}
  173. directed_triangles = 0
  174. for j in ipreds:
  175. jpreds = set(G._pred[j]) - {j}
  176. jsuccs = set(G._succ[j]) - {j}
  177. directed_triangles += np.cbrt(
  178. [(wt(j, i) * wt(k, i) * wt(k, j)) for k in ipreds & jpreds]
  179. ).sum()
  180. directed_triangles += np.cbrt(
  181. [(wt(j, i) * wt(k, i) * wt(j, k)) for k in ipreds & jsuccs]
  182. ).sum()
  183. directed_triangles += np.cbrt(
  184. [(wt(j, i) * wt(i, k) * wt(k, j)) for k in isuccs & jpreds]
  185. ).sum()
  186. directed_triangles += np.cbrt(
  187. [(wt(j, i) * wt(i, k) * wt(j, k)) for k in isuccs & jsuccs]
  188. ).sum()
  189. for j in isuccs:
  190. jpreds = set(G._pred[j]) - {j}
  191. jsuccs = set(G._succ[j]) - {j}
  192. directed_triangles += np.cbrt(
  193. [(wt(i, j) * wt(k, i) * wt(k, j)) for k in ipreds & jpreds]
  194. ).sum()
  195. directed_triangles += np.cbrt(
  196. [(wt(i, j) * wt(k, i) * wt(j, k)) for k in ipreds & jsuccs]
  197. ).sum()
  198. directed_triangles += np.cbrt(
  199. [(wt(i, j) * wt(i, k) * wt(k, j)) for k in isuccs & jpreds]
  200. ).sum()
  201. directed_triangles += np.cbrt(
  202. [(wt(i, j) * wt(i, k) * wt(j, k)) for k in isuccs & jsuccs]
  203. ).sum()
  204. dtotal = len(ipreds) + len(isuccs)
  205. dbidirectional = len(ipreds & isuccs)
  206. yield (i, dtotal, dbidirectional, float(directed_triangles))
  207. @not_implemented_for("directed")
  208. @nx._dispatchable
  209. def all_triangles(G, nbunch=None):
  210. """
  211. Yields all unique triangles in an undirected graph.
  212. A triangle is a set of three distinct nodes where each node is connected to
  213. the other two.
  214. Parameters
  215. ----------
  216. G : NetworkX graph
  217. An undirected graph.
  218. nbunch : node, iterable of nodes, or None (default=None)
  219. If a node or iterable of nodes, only triangles involving at least one
  220. node in `nbunch` are yielded.
  221. If ``None``, yields all unique triangles in the graph.
  222. Yields
  223. ------
  224. tuple
  225. A tuple of three nodes forming a triangle ``(u, v, w)``.
  226. Examples
  227. --------
  228. >>> G = nx.complete_graph(4)
  229. >>> sorted([sorted(t) for t in all_triangles(G)])
  230. [[0, 1, 2], [0, 1, 3], [0, 2, 3], [1, 2, 3]]
  231. Notes
  232. -----
  233. This algorithm ensures each triangle is yielded once using an internal node ordering.
  234. In multigraphs, triangles are identified by their unique set of nodes,
  235. ignoring multiple edges between the same nodes. Self-loops are ignored.
  236. Runs in ``O(m * d)`` time in the worst case, where ``m`` the number of edges
  237. and ``d`` the maximum degree.
  238. See Also
  239. --------
  240. :func:`~networkx.algorithms.triads.all_triads` : related function for directed graphs
  241. """
  242. if nbunch is None:
  243. nbunch = relevant_nodes = G
  244. else:
  245. nbunch = dict.fromkeys(G.nbunch_iter(nbunch))
  246. relevant_nodes = chain(
  247. nbunch,
  248. (nbr for node in nbunch for nbr in G.neighbors(node) if nbr not in nbunch),
  249. )
  250. node_to_id = {node: i for i, node in enumerate(relevant_nodes)}
  251. for u in nbunch:
  252. u_id = node_to_id[u]
  253. u_nbrs = G._adj[u].keys()
  254. for v in u_nbrs:
  255. v_id = node_to_id.get(v, -1)
  256. if v_id <= u_id:
  257. continue
  258. v_nbrs = G._adj[v].keys()
  259. for w in v_nbrs & u_nbrs:
  260. if node_to_id.get(w, -1) > v_id:
  261. yield u, v, w
  262. @nx._dispatchable(edge_attrs="weight")
  263. def average_clustering(G, nodes=None, weight=None, count_zeros=True):
  264. r"""Compute the average clustering coefficient for the graph G.
  265. The clustering coefficient for the graph is the average,
  266. .. math::
  267. C = \frac{1}{n}\sum_{v \in G} c_v,
  268. where :math:`n` is the number of nodes in `G`.
  269. Parameters
  270. ----------
  271. G : graph
  272. nodes : container of nodes, optional (default=all nodes in G)
  273. Compute average clustering for nodes in this container.
  274. weight : string or None, optional (default=None)
  275. The edge attribute that holds the numerical value used as a weight.
  276. If None, then each edge has weight 1.
  277. count_zeros : bool
  278. If False include only the nodes with nonzero clustering in the average.
  279. Returns
  280. -------
  281. avg : float
  282. Average clustering
  283. Examples
  284. --------
  285. >>> G = nx.complete_graph(5)
  286. >>> print(nx.average_clustering(G))
  287. 1.0
  288. Notes
  289. -----
  290. This is a space saving routine; it might be faster
  291. to use the clustering function to get a list and then take the average.
  292. Self loops are ignored.
  293. References
  294. ----------
  295. .. [1] Generalizations of the clustering coefficient to weighted
  296. complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
  297. K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
  298. http://jponnela.com/web_documents/a9.pdf
  299. .. [2] Marcus Kaiser, Mean clustering coefficients: the role of isolated
  300. nodes and leafs on clustering measures for small-world networks.
  301. https://arxiv.org/abs/0802.2512
  302. """
  303. c = clustering(G, nodes, weight=weight).values()
  304. if not count_zeros:
  305. c = [v for v in c if abs(v) > 0]
  306. return sum(c) / len(c)
  307. @nx._dispatchable(edge_attrs="weight")
  308. def clustering(G, nodes=None, weight=None):
  309. r"""Compute the clustering coefficient for nodes.
  310. For unweighted graphs, the clustering of a node :math:`u`
  311. is the fraction of possible triangles through that node that exist,
  312. .. math::
  313. c_u = \frac{2 T(u)}{deg(u)(deg(u)-1)},
  314. where :math:`T(u)` is the number of triangles through node :math:`u` and
  315. :math:`deg(u)` is the degree of :math:`u`.
  316. For weighted graphs, there are several ways to define clustering [1]_.
  317. the one used here is defined
  318. as the geometric average of the subgraph edge weights [2]_,
  319. .. math::
  320. c_u = \frac{1}{deg(u)(deg(u)-1))}
  321. \sum_{vw} (\hat{w}_{uv} \hat{w}_{uw} \hat{w}_{vw})^{1/3}.
  322. The edge weights :math:`\hat{w}_{uv}` are normalized by the maximum weight
  323. in the network :math:`\hat{w}_{uv} = w_{uv}/\max(w)`.
  324. The value of :math:`c_u` is assigned to 0 if :math:`deg(u) < 2`.
  325. Additionally, this weighted definition has been generalized to support negative edge weights [3]_.
  326. For directed graphs, the clustering is similarly defined as the fraction
  327. of all possible directed triangles or geometric average of the subgraph
  328. edge weights for unweighted and weighted directed graph respectively [4]_.
  329. .. math::
  330. c_u = \frac{T(u)}{2(deg^{tot}(u)(deg^{tot}(u)-1) - 2deg^{\leftrightarrow}(u))},
  331. where :math:`T(u)` is the number of directed triangles through node
  332. :math:`u`, :math:`deg^{tot}(u)` is the sum of in degree and out degree of
  333. :math:`u` and :math:`deg^{\leftrightarrow}(u)` is the reciprocal degree of
  334. :math:`u`.
  335. Parameters
  336. ----------
  337. G : graph
  338. nodes : node, iterable of nodes, or None (default=None)
  339. If a singleton node, return the number of triangles for that node.
  340. If an iterable, compute the number of triangles for each of those nodes.
  341. If `None` (the default) compute the number of triangles for all nodes in `G`.
  342. weight : string or None, optional (default=None)
  343. The edge attribute that holds the numerical value used as a weight.
  344. If None, then each edge has weight 1.
  345. Returns
  346. -------
  347. out : float, or dictionary
  348. Clustering coefficient at specified nodes
  349. Examples
  350. --------
  351. >>> G = nx.complete_graph(5)
  352. >>> print(nx.clustering(G, 0))
  353. 1.0
  354. >>> print(nx.clustering(G))
  355. {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}
  356. Notes
  357. -----
  358. Self loops are ignored.
  359. References
  360. ----------
  361. .. [1] Generalizations of the clustering coefficient to weighted
  362. complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
  363. K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
  364. http://jponnela.com/web_documents/a9.pdf
  365. .. [2] Intensity and coherence of motifs in weighted complex
  366. networks by J. P. Onnela, J. Saramäki, J. Kertész, and K. Kaski,
  367. Physical Review E, 71(6), 065103 (2005).
  368. .. [3] Generalization of Clustering Coefficients to Signed Correlation Networks
  369. by G. Costantini and M. Perugini, PloS one, 9(2), e88669 (2014).
  370. .. [4] Clustering in complex directed networks by G. Fagiolo,
  371. Physical Review E, 76(2), 026107 (2007).
  372. """
  373. if G.is_directed():
  374. if weight is not None:
  375. td_iter = _directed_weighted_triangles_and_degree_iter(G, nodes, weight)
  376. clusterc = {
  377. v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
  378. for v, dt, db, t in td_iter
  379. }
  380. else:
  381. td_iter = _directed_triangles_and_degree_iter(G, nodes)
  382. clusterc = {
  383. v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
  384. for v, dt, db, t in td_iter
  385. }
  386. else:
  387. # The formula 2*T/(d*(d-1)) from docs is t/(d*(d-1)) here b/c t==2*T
  388. if weight is not None:
  389. td_iter = _weighted_triangles_and_degree_iter(G, nodes, weight)
  390. clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t in td_iter}
  391. else:
  392. td_iter = _triangles_and_degree_iter(G, nodes)
  393. clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t, _ in td_iter}
  394. if nodes in G:
  395. # Return the value of the sole entry in the dictionary.
  396. return clusterc[nodes]
  397. return clusterc
  398. @nx._dispatchable
  399. def transitivity(G):
  400. r"""Compute graph transitivity, the fraction of all possible triangles
  401. present in G.
  402. Possible triangles are identified by the number of "triads"
  403. (two edges with a shared vertex).
  404. The transitivity is
  405. .. math::
  406. T = 3\frac{\#triangles}{\#triads}.
  407. Parameters
  408. ----------
  409. G : graph
  410. Returns
  411. -------
  412. out : float
  413. Transitivity
  414. Notes
  415. -----
  416. Self loops are ignored.
  417. Examples
  418. --------
  419. >>> G = nx.complete_graph(5)
  420. >>> print(nx.transitivity(G))
  421. 1.0
  422. """
  423. triangles_contri = [
  424. (t, d * (d - 1)) for v, d, t, _ in _triangles_and_degree_iter(G)
  425. ]
  426. # If the graph is empty
  427. if len(triangles_contri) == 0:
  428. return 0
  429. triangles, contri = map(sum, zip(*triangles_contri))
  430. return 0 if triangles == 0 else triangles / contri
  431. @nx._dispatchable
  432. def square_clustering(G, nodes=None):
  433. r"""Compute the squares clustering coefficient for nodes.
  434. For each node return the fraction of possible squares that exist at
  435. the node [1]_
  436. .. math::
  437. C_4(v) = \frac{ \sum_{u=1}^{k_v}
  438. \sum_{w=u+1}^{k_v} q_v(u,w) }{ \sum_{u=1}^{k_v}
  439. \sum_{w=u+1}^{k_v} [a_v(u,w) + q_v(u,w)]},
  440. where :math:`q_v(u,w)` are the number of common neighbors of :math:`u` and
  441. :math:`w` other than :math:`v` (ie squares), and :math:`a_v(u,w) = (k_u -
  442. (1+q_v(u,w)+\theta_{uv})) + (k_w - (1+q_v(u,w)+\theta_{uw}))`, where
  443. :math:`\theta_{uw} = 1` if :math:`u` and :math:`w` are connected and 0
  444. otherwise. [2]_
  445. Parameters
  446. ----------
  447. G : graph
  448. nodes : container of nodes, optional (default=all nodes in G)
  449. Compute clustering for nodes in this container.
  450. Returns
  451. -------
  452. c4 : dictionary
  453. A dictionary keyed by node with the square clustering coefficient value.
  454. Examples
  455. --------
  456. >>> G = nx.complete_graph(5)
  457. >>> print(nx.square_clustering(G, 0))
  458. 1.0
  459. >>> print(nx.square_clustering(G))
  460. {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}
  461. Notes
  462. -----
  463. Self loops are ignored.
  464. While :math:`C_3(v)` (triangle clustering) gives the probability that
  465. two neighbors of node v are connected with each other, :math:`C_4(v)` is
  466. the probability that two neighbors of node v share a common
  467. neighbor different from v. This algorithm can be applied to both
  468. bipartite and unipartite networks.
  469. References
  470. ----------
  471. .. [1] Pedro G. Lind, Marta C. González, and Hans J. Herrmann. 2005
  472. Cycles and clustering in bipartite networks.
  473. Physical Review E (72) 056127.
  474. .. [2] Zhang, Peng et al. Clustering Coefficient and Community Structure of
  475. Bipartite Networks. Physica A: Statistical Mechanics and its Applications 387.27 (2008): 6869–6875.
  476. https://arxiv.org/abs/0710.0117v1
  477. """
  478. if nodes is None:
  479. node_iter = G
  480. else:
  481. node_iter = G.nbunch_iter(nodes)
  482. clustering = {}
  483. _G_adj = G._adj
  484. class GAdj(dict):
  485. """Calculate (and cache) node neighbor sets excluding self-loops."""
  486. def __missing__(self, v):
  487. v_neighbors = self[v] = set(_G_adj[v])
  488. v_neighbors.discard(v) # Ignore self-loops
  489. return v_neighbors
  490. G_adj = GAdj() # Values are sets of neighbors (no self-loops)
  491. for v in node_iter:
  492. v_neighbors = G_adj[v]
  493. v_degrees_m1 = len(v_neighbors) - 1 # degrees[v] - 1 (used below)
  494. if v_degrees_m1 <= 0:
  495. # Can't form a square without at least two neighbors
  496. clustering[v] = 0
  497. continue
  498. # Count squares with nodes u-v-w-x from the current node v.
  499. # Terms of the denominator: potential = uw_degrees - uw_count - triangles - squares
  500. # uw_degrees: degrees[u] + degrees[w] for each u-w combo
  501. uw_degrees = 0
  502. # uw_count: 1 for each u and 1 for each w for all combos (degrees * (degrees - 1))
  503. uw_count = len(v_neighbors) * v_degrees_m1
  504. # triangles: 1 for each edge where u-w or w-u are connected (i.e. triangles)
  505. triangles = 0
  506. # squares: the number of squares (also the numerator)
  507. squares = 0
  508. # Iterate over all neighbors
  509. for u in v_neighbors:
  510. u_neighbors = G_adj[u]
  511. uw_degrees += len(u_neighbors) * v_degrees_m1
  512. # P2 from https://arxiv.org/abs/2007.11111
  513. p2 = len(u_neighbors & v_neighbors)
  514. # triangles is C_3, sigma_4 from https://arxiv.org/abs/2007.11111
  515. # This double-counts triangles compared to `triangles` function
  516. triangles += p2
  517. # squares is C_4, sigma_12 from https://arxiv.org/abs/2007.11111
  518. # Include this term, b/c a neighbor u can also be a neighbor of neighbor x
  519. squares += p2 * (p2 - 1) # Will divide by 2 later
  520. # And iterate over all neighbors of neighbors.
  521. # These nodes x may be the corners opposite v in squares u-v-w-x.
  522. two_hop_neighbors = set.union(*(G_adj[u] for u in v_neighbors))
  523. two_hop_neighbors -= v_neighbors # Neighbors already counted above
  524. two_hop_neighbors.discard(v)
  525. for x in two_hop_neighbors:
  526. p2 = len(v_neighbors & G_adj[x])
  527. squares += p2 * (p2 - 1) # Will divide by 2 later
  528. squares //= 2
  529. potential = uw_degrees - uw_count - triangles - squares
  530. if potential > 0:
  531. clustering[v] = squares / potential
  532. else:
  533. clustering[v] = 0
  534. if nodes in G:
  535. # Return the value of the sole entry in the dictionary.
  536. return clustering[nodes]
  537. return clustering
  538. @not_implemented_for("directed")
  539. @nx._dispatchable
  540. def generalized_degree(G, nodes=None):
  541. r"""Compute the generalized degree for nodes.
  542. For each node, the generalized degree shows how many edges of given
  543. triangle multiplicity the node is connected to. The triangle multiplicity
  544. of an edge is the number of triangles an edge participates in. The
  545. generalized degree of node :math:`i` can be written as a vector
  546. :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc, k_i^{(N-2)})` where
  547. :math:`k_i^{(j)}` is the number of edges attached to node :math:`i` that
  548. participate in :math:`j` triangles.
  549. Parameters
  550. ----------
  551. G : graph
  552. nodes : container of nodes, optional (default=all nodes in G)
  553. Compute the generalized degree for nodes in this container.
  554. Returns
  555. -------
  556. out : Counter, or dictionary of Counters
  557. Generalized degree of specified nodes. The Counter is keyed by edge
  558. triangle multiplicity.
  559. Examples
  560. --------
  561. >>> G = nx.complete_graph(5)
  562. >>> print(nx.generalized_degree(G, 0))
  563. Counter({3: 4})
  564. >>> print(nx.generalized_degree(G))
  565. {0: Counter({3: 4}), 1: Counter({3: 4}), 2: Counter({3: 4}), 3: Counter({3: 4}), 4: Counter({3: 4})}
  566. To recover the number of triangles attached to a node:
  567. >>> k1 = nx.generalized_degree(G, 0)
  568. >>> sum([k * v for k, v in k1.items()]) / 2 == nx.triangles(G, 0)
  569. True
  570. Notes
  571. -----
  572. Self loops are ignored.
  573. In a network of N nodes, the highest triangle multiplicity an edge can have
  574. is N-2.
  575. The return value does not include a `zero` entry if no edges of a
  576. particular triangle multiplicity are present.
  577. The number of triangles node :math:`i` is attached to can be recovered from
  578. the generalized degree :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc,
  579. k_i^{(N-2)})` by :math:`(k_i^{(1)}+2k_i^{(2)}+\dotsc +(N-2)k_i^{(N-2)})/2`.
  580. References
  581. ----------
  582. .. [1] Networks with arbitrary edge multiplicities by V. Zlatić,
  583. D. Garlaschelli and G. Caldarelli, EPL (Europhysics Letters),
  584. Volume 97, Number 2 (2012).
  585. https://iopscience.iop.org/article/10.1209/0295-5075/97/28005
  586. """
  587. if nodes in G:
  588. return next(_triangles_and_degree_iter(G, nodes))[3]
  589. return {v: gd for v, d, t, gd in _triangles_and_degree_iter(G, nodes)}