group.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787
  1. """Group centrality measures."""
  2. from copy import deepcopy
  3. import networkx as nx
  4. from networkx.algorithms.centrality.betweenness import (
  5. _accumulate_endpoints,
  6. _single_source_dijkstra_path_basic,
  7. _single_source_shortest_path_basic,
  8. )
  9. from networkx.utils.decorators import not_implemented_for
  10. __all__ = [
  11. "group_betweenness_centrality",
  12. "group_closeness_centrality",
  13. "group_degree_centrality",
  14. "group_in_degree_centrality",
  15. "group_out_degree_centrality",
  16. "prominent_group",
  17. ]
  18. @nx._dispatchable(edge_attrs="weight")
  19. def group_betweenness_centrality(G, C, normalized=True, weight=None, endpoints=False):
  20. r"""Compute the group betweenness centrality for a group of nodes.
  21. Group betweenness centrality of a group of nodes $C$ is the sum of the
  22. fraction of all-pairs shortest paths that pass through any vertex in $C$
  23. .. math::
  24. c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
  25. where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
  26. shortest $(s, t)$-paths, and $\sigma(s, t|C)$ is the number of
  27. those paths passing through some node in group $C$. Note that
  28. $(s, t)$ are not members of the group ($V-C$ is the set of nodes
  29. in $V$ that are not in $C$).
  30. Parameters
  31. ----------
  32. G : graph
  33. A NetworkX graph.
  34. C : list or set or list of lists or list of sets
  35. A group or a list of groups containing nodes which belong to G, for which group betweenness
  36. centrality is to be calculated.
  37. normalized : bool, optional (default=True)
  38. If True, group betweenness is normalized by `1/((|V|-|C|)(|V|-|C|-1))`
  39. where `|V|` is the number of nodes in G and `|C|` is the number of nodes in C.
  40. weight : None or string, optional (default=None)
  41. If None, all edge weights are considered equal.
  42. Otherwise holds the name of the edge attribute used as weight.
  43. The weight of an edge is treated as the length or distance between the two sides.
  44. endpoints : bool, optional (default=False)
  45. If True include the endpoints in the shortest path counts.
  46. Raises
  47. ------
  48. NodeNotFound
  49. If node(s) in C are not present in G.
  50. Returns
  51. -------
  52. betweenness : list of floats or float
  53. If C is a single group then return a float. If C is a list with
  54. several groups then return a list of group betweenness centralities.
  55. See Also
  56. --------
  57. betweenness_centrality
  58. Notes
  59. -----
  60. Group betweenness centrality is described in [1]_ and its importance discussed in [3]_.
  61. The initial implementation of the algorithm is mentioned in [2]_. This function uses
  62. an improved algorithm presented in [4]_.
  63. The number of nodes in the group must be a maximum of n - 2 where `n`
  64. is the total number of nodes in the graph.
  65. For weighted graphs the edge weights must be greater than zero.
  66. Zero edge weights can produce an infinite number of equal length
  67. paths between pairs of nodes.
  68. The total number of paths between source and target is counted
  69. differently for directed and undirected graphs. Directed paths
  70. between "u" and "v" are counted as two possible paths (one each
  71. direction) while undirected paths between "u" and "v" are counted
  72. as one path. Said another way, the sum in the expression above is
  73. over all ``s != t`` for directed graphs and for ``s < t`` for undirected graphs.
  74. References
  75. ----------
  76. .. [1] M G Everett and S P Borgatti:
  77. The Centrality of Groups and Classes.
  78. Journal of Mathematical Sociology. 23(3): 181-201. 1999.
  79. http://www.analytictech.com/borgatti/group_centrality.htm
  80. .. [2] Ulrik Brandes:
  81. On Variants of Shortest-Path Betweenness
  82. Centrality and their Generic Computation.
  83. Social Networks 30(2):136-145, 2008.
  84. http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.9610&rep=rep1&type=pdf
  85. .. [3] Sourav Medya et. al.:
  86. Group Centrality Maximization via Network Design.
  87. SIAM International Conference on Data Mining, SDM 2018, 126–134.
  88. https://sites.cs.ucsb.edu/~arlei/pubs/sdm18.pdf
  89. .. [4] Rami Puzis, Yuval Elovici, and Shlomi Dolev.
  90. "Fast algorithm for successive computation of group betweenness centrality."
  91. https://journals.aps.org/pre/pdf/10.1103/PhysRevE.76.056709
  92. """
  93. GBC = [] # initialize betweenness
  94. list_of_groups = True
  95. # check weather C contains one or many groups
  96. if any(el in G for el in C):
  97. C = [C]
  98. list_of_groups = False
  99. set_v = {node for group in C for node in group}
  100. if set_v - G.nodes: # element(s) of C not in G
  101. raise nx.NodeNotFound(f"The node(s) {set_v - G.nodes} are in C but not in G.")
  102. # pre-processing
  103. PB, sigma, D = _group_preprocessing(G, set_v, weight)
  104. # the algorithm for each group
  105. for group in C:
  106. group = set(group) # set of nodes in group
  107. # initialize the matrices of the sigma and the PB
  108. GBC_group = 0
  109. sigma_m = deepcopy(sigma)
  110. PB_m = deepcopy(PB)
  111. sigma_m_v = deepcopy(sigma_m)
  112. PB_m_v = deepcopy(PB_m)
  113. for v in group:
  114. GBC_group += PB_m[v][v]
  115. for x in group:
  116. for y in group:
  117. dxvy = 0
  118. dxyv = 0
  119. dvxy = 0
  120. if not (
  121. sigma_m[x][y] == 0 or sigma_m[x][v] == 0 or sigma_m[v][y] == 0
  122. ):
  123. if D[x][v] == D[x][y] + D[y][v]:
  124. dxyv = sigma_m[x][y] * sigma_m[y][v] / sigma_m[x][v]
  125. if D[x][y] == D[x][v] + D[v][y]:
  126. dxvy = sigma_m[x][v] * sigma_m[v][y] / sigma_m[x][y]
  127. if D[v][y] == D[v][x] + D[x][y]:
  128. dvxy = sigma_m[v][x] * sigma[x][y] / sigma[v][y]
  129. sigma_m_v[x][y] = sigma_m[x][y] * (1 - dxvy)
  130. PB_m_v[x][y] = PB_m[x][y] - PB_m[x][y] * dxvy
  131. if y != v:
  132. PB_m_v[x][y] -= PB_m[x][v] * dxyv
  133. if x != v:
  134. PB_m_v[x][y] -= PB_m[v][y] * dvxy
  135. sigma_m, sigma_m_v = sigma_m_v, sigma_m
  136. PB_m, PB_m_v = PB_m_v, PB_m
  137. # endpoints
  138. v, c = len(G), len(group)
  139. if not endpoints:
  140. scale = 0
  141. # if the graph is connected then subtract the endpoints from
  142. # the count for all the nodes in the graph. else count how many
  143. # nodes are connected to the group's nodes and subtract that.
  144. if nx.is_directed(G):
  145. if nx.is_strongly_connected(G):
  146. scale = c * (2 * v - c - 1)
  147. elif nx.is_connected(G):
  148. scale = c * (2 * v - c - 1)
  149. if scale == 0:
  150. for group_node1 in group:
  151. for node in D[group_node1]:
  152. if node != group_node1:
  153. if node in group:
  154. scale += 1
  155. else:
  156. scale += 2
  157. GBC_group -= scale
  158. # normalized
  159. if normalized:
  160. scale = 1 / ((v - c) * (v - c - 1))
  161. GBC_group *= scale
  162. # If undirected than count only the undirected edges
  163. elif not G.is_directed():
  164. GBC_group /= 2
  165. GBC.append(GBC_group)
  166. if list_of_groups:
  167. return GBC
  168. return GBC[0]
  169. def _group_preprocessing(G, set_v, weight):
  170. sigma = {}
  171. delta = {}
  172. D = {}
  173. betweenness = dict.fromkeys(G, 0)
  174. for s in G:
  175. if weight is None: # use BFS
  176. S, P, sigma[s], D[s] = _single_source_shortest_path_basic(G, s)
  177. else: # use Dijkstra's algorithm
  178. S, P, sigma[s], D[s] = _single_source_dijkstra_path_basic(G, s, weight)
  179. betweenness, delta[s] = _accumulate_endpoints(betweenness, S, P, sigma[s], s)
  180. for i in delta[s]: # add the paths from s to i and rescale sigma
  181. if s != i:
  182. delta[s][i] += 1
  183. if weight is not None:
  184. sigma[s][i] = sigma[s][i] / 2
  185. # building the path betweenness matrix only for nodes that appear in the group
  186. PB = dict.fromkeys(G)
  187. for group_node1 in set_v:
  188. PB[group_node1] = dict.fromkeys(G, 0.0)
  189. for group_node2 in set_v:
  190. if group_node2 not in D[group_node1]:
  191. continue
  192. for node in G:
  193. # if node is connected to the two group nodes than continue
  194. if group_node2 in D[node] and group_node1 in D[node]:
  195. if (
  196. D[node][group_node2]
  197. == D[node][group_node1] + D[group_node1][group_node2]
  198. ):
  199. PB[group_node1][group_node2] += (
  200. delta[node][group_node2]
  201. * sigma[node][group_node1]
  202. * sigma[group_node1][group_node2]
  203. / sigma[node][group_node2]
  204. )
  205. return PB, sigma, D
  206. @nx._dispatchable(edge_attrs="weight")
  207. def prominent_group(
  208. G, k, weight=None, C=None, endpoints=False, normalized=True, greedy=False
  209. ):
  210. r"""Find the prominent group of size $k$ in graph $G$. The prominence of the
  211. group is evaluated by the group betweenness centrality.
  212. Group betweenness centrality of a group of nodes $C$ is the sum of the
  213. fraction of all-pairs shortest paths that pass through any vertex in $C$
  214. .. math::
  215. c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
  216. where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
  217. shortest $(s, t)$-paths, and $\sigma(s, t|C)$ is the number of
  218. those paths passing through some node in group $C$. Note that
  219. $(s, t)$ are not members of the group ($V-C$ is the set of nodes
  220. in $V$ that are not in $C$).
  221. Parameters
  222. ----------
  223. G : graph
  224. A NetworkX graph.
  225. k : int
  226. The number of nodes in the group.
  227. normalized : bool, optional (default=True)
  228. If True, group betweenness is normalized by ``1/((|V|-|C|)(|V|-|C|-1))``
  229. where ``|V|`` is the number of nodes in G and ``|C|`` is the number of
  230. nodes in C.
  231. weight : None or string, optional (default=None)
  232. If None, all edge weights are considered equal.
  233. Otherwise holds the name of the edge attribute used as weight.
  234. The weight of an edge is treated as the length or distance between the two sides.
  235. endpoints : bool, optional (default=False)
  236. If True include the endpoints in the shortest path counts.
  237. C : list or set, optional (default=None)
  238. list of nodes which won't be candidates of the prominent group.
  239. greedy : bool, optional (default=False)
  240. Using a naive greedy algorithm in order to find non-optimal prominent
  241. group. For scale free networks the results are negligibly below the optimal
  242. results.
  243. Raises
  244. ------
  245. NodeNotFound
  246. If node(s) in C are not present in G.
  247. Returns
  248. -------
  249. max_GBC : float
  250. The group betweenness centrality of the prominent group.
  251. max_group : list
  252. The list of nodes in the prominent group.
  253. See Also
  254. --------
  255. betweenness_centrality, group_betweenness_centrality
  256. Notes
  257. -----
  258. Group betweenness centrality is described in [1]_ and its importance discussed in [3]_.
  259. The algorithm is described in [2]_ and is based on techniques mentioned in [4]_.
  260. The number of nodes in the group must be a maximum of ``n - 2`` where ``n``
  261. is the total number of nodes in the graph.
  262. For weighted graphs the edge weights must be greater than zero.
  263. Zero edge weights can produce an infinite number of equal length
  264. paths between pairs of nodes.
  265. The total number of paths between source and target is counted
  266. differently for directed and undirected graphs. Directed paths
  267. between "u" and "v" are counted as two possible paths (one each
  268. direction) while undirected paths between "u" and "v" are counted
  269. as one path. Said another way, the sum in the expression above is
  270. over all ``s != t`` for directed graphs and for ``s < t`` for undirected graphs.
  271. References
  272. ----------
  273. .. [1] M G Everett and S P Borgatti:
  274. The Centrality of Groups and Classes.
  275. Journal of Mathematical Sociology. 23(3): 181-201. 1999.
  276. http://www.analytictech.com/borgatti/group_centrality.htm
  277. .. [2] Rami Puzis, Yuval Elovici, and Shlomi Dolev:
  278. "Finding the Most Prominent Group in Complex Networks"
  279. AI communications 20(4): 287-296, 2007.
  280. https://www.researchgate.net/profile/Rami_Puzis2/publication/220308855
  281. .. [3] Sourav Medya et. al.:
  282. Group Centrality Maximization via Network Design.
  283. SIAM International Conference on Data Mining, SDM 2018, 126–134.
  284. https://sites.cs.ucsb.edu/~arlei/pubs/sdm18.pdf
  285. .. [4] Rami Puzis, Yuval Elovici, and Shlomi Dolev.
  286. "Fast algorithm for successive computation of group betweenness centrality."
  287. https://journals.aps.org/pre/pdf/10.1103/PhysRevE.76.056709
  288. """
  289. import numpy as np
  290. import pandas as pd
  291. if C is not None:
  292. C = set(C)
  293. if C - G.nodes: # element(s) of C not in G
  294. raise nx.NodeNotFound(f"The node(s) {C - G.nodes} are in C but not in G.")
  295. nodes = list(G.nodes - C)
  296. else:
  297. nodes = list(G.nodes)
  298. DF_tree = nx.Graph()
  299. DF_tree.__networkx_cache__ = None # Disable caching
  300. PB, sigma, D = _group_preprocessing(G, nodes, weight)
  301. betweenness = pd.DataFrame.from_dict(PB)
  302. if C is not None:
  303. for node in C:
  304. # remove from the betweenness all the nodes not part of the group
  305. betweenness = betweenness.drop(index=node)
  306. betweenness = betweenness.drop(columns=node)
  307. CL = [node for _, node in sorted(zip(np.diag(betweenness), nodes), reverse=True)]
  308. max_GBC = 0
  309. max_group = []
  310. DF_tree.add_node(
  311. 1,
  312. CL=CL,
  313. betweenness=betweenness,
  314. GBC=0,
  315. GM=[],
  316. sigma=sigma,
  317. cont=dict(zip(nodes, np.diag(betweenness))),
  318. )
  319. # the algorithm
  320. DF_tree.nodes[1]["heu"] = 0
  321. for i in range(k):
  322. DF_tree.nodes[1]["heu"] += DF_tree.nodes[1]["cont"][DF_tree.nodes[1]["CL"][i]]
  323. max_GBC, DF_tree, max_group = _dfbnb(
  324. G, k, DF_tree, max_GBC, 1, D, max_group, nodes, greedy
  325. )
  326. v = len(G)
  327. if not endpoints:
  328. scale = 0
  329. # if the graph is connected then subtract the endpoints from
  330. # the count for all the nodes in the graph. else count how many
  331. # nodes are connected to the group's nodes and subtract that.
  332. if nx.is_directed(G):
  333. if nx.is_strongly_connected(G):
  334. scale = k * (2 * v - k - 1)
  335. elif nx.is_connected(G):
  336. scale = k * (2 * v - k - 1)
  337. if scale == 0:
  338. for group_node1 in max_group:
  339. for node in D[group_node1]:
  340. if node != group_node1:
  341. if node in max_group:
  342. scale += 1
  343. else:
  344. scale += 2
  345. max_GBC -= scale
  346. # normalized
  347. if normalized:
  348. scale = 1 / ((v - k) * (v - k - 1))
  349. max_GBC *= scale
  350. # If undirected then count only the undirected edges
  351. elif not G.is_directed():
  352. max_GBC /= 2
  353. max_GBC = float(f"{max_GBC:.2f}")
  354. return max_GBC, max_group
  355. def _dfbnb(G, k, DF_tree, max_GBC, root, D, max_group, nodes, greedy):
  356. # stopping condition - if we found a group of size k and with higher GBC then prune
  357. if len(DF_tree.nodes[root]["GM"]) == k and DF_tree.nodes[root]["GBC"] > max_GBC:
  358. return DF_tree.nodes[root]["GBC"], DF_tree, DF_tree.nodes[root]["GM"]
  359. # stopping condition - if the size of group members equal to k or there are less than
  360. # k - |GM| in the candidate list or the heuristic function plus the GBC is below the
  361. # maximal GBC found then prune
  362. if (
  363. len(DF_tree.nodes[root]["GM"]) == k
  364. or len(DF_tree.nodes[root]["CL"]) <= k - len(DF_tree.nodes[root]["GM"])
  365. or DF_tree.nodes[root]["GBC"] + DF_tree.nodes[root]["heu"] <= max_GBC
  366. ):
  367. return max_GBC, DF_tree, max_group
  368. # finding the heuristic of both children
  369. node_p, node_m, DF_tree = _heuristic(k, root, DF_tree, D, nodes, greedy)
  370. # finding the child with the bigger heuristic + GBC and expand
  371. # that node first if greedy then only expand the plus node
  372. if greedy:
  373. max_GBC, DF_tree, max_group = _dfbnb(
  374. G, k, DF_tree, max_GBC, node_p, D, max_group, nodes, greedy
  375. )
  376. elif (
  377. DF_tree.nodes[node_p]["GBC"] + DF_tree.nodes[node_p]["heu"]
  378. > DF_tree.nodes[node_m]["GBC"] + DF_tree.nodes[node_m]["heu"]
  379. ):
  380. max_GBC, DF_tree, max_group = _dfbnb(
  381. G, k, DF_tree, max_GBC, node_p, D, max_group, nodes, greedy
  382. )
  383. max_GBC, DF_tree, max_group = _dfbnb(
  384. G, k, DF_tree, max_GBC, node_m, D, max_group, nodes, greedy
  385. )
  386. else:
  387. max_GBC, DF_tree, max_group = _dfbnb(
  388. G, k, DF_tree, max_GBC, node_m, D, max_group, nodes, greedy
  389. )
  390. max_GBC, DF_tree, max_group = _dfbnb(
  391. G, k, DF_tree, max_GBC, node_p, D, max_group, nodes, greedy
  392. )
  393. return max_GBC, DF_tree, max_group
  394. def _heuristic(k, root, DF_tree, D, nodes, greedy):
  395. import numpy as np
  396. # This helper function add two nodes to DF_tree - one left son and the
  397. # other right son, finds their heuristic, CL, GBC, and GM
  398. node_p = DF_tree.number_of_nodes() + 1
  399. node_m = DF_tree.number_of_nodes() + 2
  400. added_node = DF_tree.nodes[root]["CL"][0]
  401. # adding the plus node
  402. DF_tree.add_nodes_from([(node_p, deepcopy(DF_tree.nodes[root]))])
  403. DF_tree.nodes[node_p]["GM"].append(added_node)
  404. DF_tree.nodes[node_p]["GBC"] += DF_tree.nodes[node_p]["cont"][added_node]
  405. root_node = DF_tree.nodes[root]
  406. for x in nodes:
  407. for y in nodes:
  408. dxvy = 0
  409. dxyv = 0
  410. dvxy = 0
  411. if not (
  412. root_node["sigma"][x][y] == 0
  413. or root_node["sigma"][x][added_node] == 0
  414. or root_node["sigma"][added_node][y] == 0
  415. ):
  416. if D[x][added_node] == D[x][y] + D[y][added_node]:
  417. dxyv = (
  418. root_node["sigma"][x][y]
  419. * root_node["sigma"][y][added_node]
  420. / root_node["sigma"][x][added_node]
  421. )
  422. if D[x][y] == D[x][added_node] + D[added_node][y]:
  423. dxvy = (
  424. root_node["sigma"][x][added_node]
  425. * root_node["sigma"][added_node][y]
  426. / root_node["sigma"][x][y]
  427. )
  428. if D[added_node][y] == D[added_node][x] + D[x][y]:
  429. dvxy = (
  430. root_node["sigma"][added_node][x]
  431. * root_node["sigma"][x][y]
  432. / root_node["sigma"][added_node][y]
  433. )
  434. DF_tree.nodes[node_p]["sigma"][x][y] = root_node["sigma"][x][y] * (1 - dxvy)
  435. DF_tree.nodes[node_p]["betweenness"].loc[y, x] = (
  436. root_node["betweenness"][x][y] - root_node["betweenness"][x][y] * dxvy
  437. )
  438. if y != added_node:
  439. DF_tree.nodes[node_p]["betweenness"].loc[y, x] -= (
  440. root_node["betweenness"][x][added_node] * dxyv
  441. )
  442. if x != added_node:
  443. DF_tree.nodes[node_p]["betweenness"].loc[y, x] -= (
  444. root_node["betweenness"][added_node][y] * dvxy
  445. )
  446. DF_tree.nodes[node_p]["CL"] = [
  447. node
  448. for _, node in sorted(
  449. zip(np.diag(DF_tree.nodes[node_p]["betweenness"]), nodes), reverse=True
  450. )
  451. if node not in DF_tree.nodes[node_p]["GM"]
  452. ]
  453. DF_tree.nodes[node_p]["cont"] = dict(
  454. zip(nodes, np.diag(DF_tree.nodes[node_p]["betweenness"]))
  455. )
  456. DF_tree.nodes[node_p]["heu"] = 0
  457. for i in range(k - len(DF_tree.nodes[node_p]["GM"])):
  458. DF_tree.nodes[node_p]["heu"] += DF_tree.nodes[node_p]["cont"][
  459. DF_tree.nodes[node_p]["CL"][i]
  460. ]
  461. # adding the minus node - don't insert the first node in the CL to GM
  462. # Insert minus node only if isn't greedy type algorithm
  463. if not greedy:
  464. DF_tree.add_nodes_from([(node_m, deepcopy(DF_tree.nodes[root]))])
  465. DF_tree.nodes[node_m]["CL"].pop(0)
  466. DF_tree.nodes[node_m]["cont"].pop(added_node)
  467. DF_tree.nodes[node_m]["heu"] = 0
  468. for i in range(k - len(DF_tree.nodes[node_m]["GM"])):
  469. DF_tree.nodes[node_m]["heu"] += DF_tree.nodes[node_m]["cont"][
  470. DF_tree.nodes[node_m]["CL"][i]
  471. ]
  472. else:
  473. node_m = None
  474. return node_p, node_m, DF_tree
  475. @nx._dispatchable(edge_attrs="weight")
  476. def group_closeness_centrality(G, S, weight=None):
  477. r"""Compute the group closeness centrality for a group of nodes.
  478. Group closeness centrality of a group of nodes $S$ is a measure
  479. of how close the group is to the other nodes in the graph.
  480. .. math::
  481. c_{close}(S) = \frac{|V-S|}{\sum_{v \in V-S} d_{S, v}}
  482. d_{S, v} = min_{u \in S} (d_{u, v})
  483. where $V$ is the set of nodes, $d_{S, v}$ is the distance of
  484. the group $S$ from $v$ defined as above. ($V-S$ is the set of nodes
  485. in $V$ that are not in $S$).
  486. Parameters
  487. ----------
  488. G : graph
  489. A NetworkX graph.
  490. S : list or set
  491. S is a group of nodes which belong to G, for which group closeness
  492. centrality is to be calculated.
  493. weight : None or string, optional (default=None)
  494. If None, all edge weights are considered equal.
  495. Otherwise holds the name of the edge attribute used as weight.
  496. The weight of an edge is treated as the length or distance between the two sides.
  497. Raises
  498. ------
  499. NodeNotFound
  500. If node(s) in S are not present in G.
  501. Returns
  502. -------
  503. closeness : float
  504. Group closeness centrality of the group S.
  505. See Also
  506. --------
  507. closeness_centrality
  508. Notes
  509. -----
  510. The measure was introduced in [1]_.
  511. The formula implemented here is described in [2]_.
  512. Higher values of closeness indicate greater centrality.
  513. It is assumed that 1 / 0 is 0 (required in the case of directed graphs,
  514. or when a shortest path length is 0).
  515. The number of nodes in the group must be a maximum of n - 1 where `n`
  516. is the total number of nodes in the graph.
  517. For directed graphs, the incoming distance is utilized here. To use the
  518. outward distance, act on `G.reverse()`.
  519. For weighted graphs the edge weights must be greater than zero.
  520. Zero edge weights can produce an infinite number of equal length
  521. paths between pairs of nodes.
  522. References
  523. ----------
  524. .. [1] M G Everett and S P Borgatti:
  525. The Centrality of Groups and Classes.
  526. Journal of Mathematical Sociology. 23(3): 181-201. 1999.
  527. http://www.analytictech.com/borgatti/group_centrality.htm
  528. .. [2] J. Zhao et. al.:
  529. Measuring and Maximizing Group Closeness Centrality over
  530. Disk Resident Graphs.
  531. WWWConference Proceedings, 2014. 689-694.
  532. https://doi.org/10.1145/2567948.2579356
  533. """
  534. if G.is_directed():
  535. G = G.reverse() # reverse view
  536. closeness = 0 # initialize to 0
  537. V = set(G) # set of nodes in G
  538. S = set(S) # set of nodes in group S
  539. V_S = V - S # set of nodes in V but not S
  540. shortest_path_lengths = nx.multi_source_dijkstra_path_length(G, S, weight=weight)
  541. # accumulation
  542. for v in V_S:
  543. try:
  544. closeness += shortest_path_lengths[v]
  545. except KeyError: # no path exists
  546. closeness += 0
  547. try:
  548. closeness = len(V_S) / closeness
  549. except ZeroDivisionError: # 1 / 0 assumed as 0
  550. closeness = 0
  551. return closeness
  552. @nx._dispatchable
  553. def group_degree_centrality(G, S):
  554. """Compute the group degree centrality for a group of nodes.
  555. Group degree centrality of a group of nodes $S$ is the fraction
  556. of non-group members connected to group members.
  557. Parameters
  558. ----------
  559. G : graph
  560. A NetworkX graph.
  561. S : list or set
  562. S is a group of nodes which belong to G, for which group degree
  563. centrality is to be calculated.
  564. Raises
  565. ------
  566. NetworkXError
  567. If node(s) in S are not in G.
  568. Returns
  569. -------
  570. centrality : float
  571. Group degree centrality of the group S.
  572. See Also
  573. --------
  574. degree_centrality
  575. group_in_degree_centrality
  576. group_out_degree_centrality
  577. Notes
  578. -----
  579. The measure was introduced in [1]_.
  580. The number of nodes in the group must be a maximum of n - 1 where `n`
  581. is the total number of nodes in the graph.
  582. References
  583. ----------
  584. .. [1] M G Everett and S P Borgatti:
  585. The Centrality of Groups and Classes.
  586. Journal of Mathematical Sociology. 23(3): 181-201. 1999.
  587. http://www.analytictech.com/borgatti/group_centrality.htm
  588. """
  589. centrality = len(set().union(*[set(G.neighbors(i)) for i in S]) - set(S))
  590. centrality /= len(G.nodes()) - len(S)
  591. return centrality
  592. @not_implemented_for("undirected")
  593. @nx._dispatchable
  594. def group_in_degree_centrality(G, S):
  595. """Compute the group in-degree centrality for a group of nodes.
  596. Group in-degree centrality of a group of nodes $S$ is the fraction
  597. of non-group members connected to group members by incoming edges.
  598. Parameters
  599. ----------
  600. G : graph
  601. A NetworkX graph.
  602. S : list or set
  603. S is a group of nodes which belong to G, for which group in-degree
  604. centrality is to be calculated.
  605. Returns
  606. -------
  607. centrality : float
  608. Group in-degree centrality of the group S.
  609. Raises
  610. ------
  611. NetworkXNotImplemented
  612. If G is undirected.
  613. NodeNotFound
  614. If node(s) in S are not in G.
  615. See Also
  616. --------
  617. degree_centrality
  618. group_degree_centrality
  619. group_out_degree_centrality
  620. Notes
  621. -----
  622. The number of nodes in the group must be a maximum of n - 1 where `n`
  623. is the total number of nodes in the graph.
  624. `G.neighbors(i)` gives nodes with an outward edge from i, in a DiGraph,
  625. so for group in-degree centrality, the reverse graph is used.
  626. """
  627. return group_degree_centrality(G.reverse(), S)
  628. @not_implemented_for("undirected")
  629. @nx._dispatchable
  630. def group_out_degree_centrality(G, S):
  631. """Compute the group out-degree centrality for a group of nodes.
  632. Group out-degree centrality of a group of nodes $S$ is the fraction
  633. of non-group members connected to group members by outgoing edges.
  634. Parameters
  635. ----------
  636. G : graph
  637. A NetworkX graph.
  638. S : list or set
  639. S is a group of nodes which belong to G, for which group in-degree
  640. centrality is to be calculated.
  641. Returns
  642. -------
  643. centrality : float
  644. Group out-degree centrality of the group S.
  645. Raises
  646. ------
  647. NetworkXNotImplemented
  648. If G is undirected.
  649. NodeNotFound
  650. If node(s) in S are not in G.
  651. See Also
  652. --------
  653. degree_centrality
  654. group_degree_centrality
  655. group_in_degree_centrality
  656. Notes
  657. -----
  658. The number of nodes in the group must be a maximum of n - 1 where `n`
  659. is the total number of nodes in the graph.
  660. `G.neighbors(i)` gives nodes with an outward edge from i, in a DiGraph,
  661. so for group out-degree centrality, the graph itself is used.
  662. """
  663. return group_degree_centrality(G, S)