structuralholes.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. """Functions for computing measures of structural holes."""
  2. import networkx as nx
  3. __all__ = ["constraint", "local_constraint", "effective_size"]
  4. @nx._dispatchable(edge_attrs="weight")
  5. def mutual_weight(G, u, v, weight=None):
  6. """Returns the sum of the weights of the edge from `u` to `v` and
  7. the edge from `v` to `u` in `G`.
  8. `weight` is the edge data key that represents the edge weight. If
  9. the specified key is `None` or is not in the edge data for an edge,
  10. that edge is assumed to have weight 1.
  11. Pre-conditions: `u` and `v` must both be in `G`.
  12. """
  13. try:
  14. a_uv = G[u][v].get(weight, 1)
  15. except KeyError:
  16. a_uv = 0
  17. try:
  18. a_vu = G[v][u].get(weight, 1)
  19. except KeyError:
  20. a_vu = 0
  21. return a_uv + a_vu
  22. @nx._dispatchable(edge_attrs="weight")
  23. def normalized_mutual_weight(G, u, v, norm=sum, weight=None):
  24. """Returns normalized mutual weight of the edges from `u` to `v`
  25. with respect to the mutual weights of the neighbors of `u` in `G`.
  26. `norm` specifies how the normalization factor is computed. It must
  27. be a function that takes a single argument and returns a number.
  28. The argument will be an iterable of mutual weights
  29. of pairs ``(u, w)``, where ``w`` ranges over each (in- and
  30. out-)neighbor of ``u``. Commons values for `normalization` are
  31. ``sum`` and ``max``.
  32. `weight` can be ``None`` or a string, if None, all edge weights
  33. are considered equal. Otherwise holds the name of the edge
  34. attribute used as weight.
  35. """
  36. scale = norm(mutual_weight(G, u, w, weight) for w in set(nx.all_neighbors(G, u)))
  37. return 0 if scale == 0 else mutual_weight(G, u, v, weight) / scale
  38. @nx._dispatchable(edge_attrs="weight")
  39. def effective_size(G, nodes=None, weight=None):
  40. r"""Returns the effective size of all nodes in the graph ``G``.
  41. The *effective size* of a node's ego network is based on the concept
  42. of redundancy. A person's ego network has redundancy to the extent
  43. that her contacts are connected to each other as well. The
  44. nonredundant part of a person's relationships is the effective
  45. size of her ego network [1]_. Formally, the effective size of a
  46. node $u$, denoted $e(u)$, is defined by
  47. .. math::
  48. e(u) = \sum_{v \in N(u) \setminus \{u\}}
  49. \left(1 - \sum_{w \in N(v)} p_{uw} m_{vw}\right)
  50. where $N(u)$ is the set of neighbors of $u$ and $p_{uw}$ is the
  51. normalized mutual weight of the (directed or undirected) edges
  52. joining $u$ and $v$, for each vertex $u$ and $v$ [1]_. And $m_{vw}$
  53. is the mutual weight of $v$ and $w$ divided by $v$ highest mutual
  54. weight with any of its neighbors. The *mutual weight* of $u$ and $v$
  55. is the sum of the weights of edges joining them (edge weights are
  56. assumed to be one if the graph is unweighted).
  57. For the case of unweighted and undirected graphs, Borgatti proposed
  58. a simplified formula to compute effective size [2]_
  59. .. math::
  60. e(u) = n - \frac{2t}{n}
  61. where `t` is the number of ties in the ego network (not including
  62. ties to ego) and `n` is the number of nodes (excluding ego).
  63. Parameters
  64. ----------
  65. G : NetworkX graph
  66. The graph containing ``v``. Directed graphs are treated like
  67. undirected graphs when computing neighbors of ``v``.
  68. nodes : container, optional
  69. Container of nodes in the graph ``G`` to compute the effective size.
  70. If None, the effective size of every node is computed.
  71. weight : None or string, optional
  72. If None, all edge weights are considered equal.
  73. Otherwise holds the name of the edge attribute used as weight.
  74. Returns
  75. -------
  76. dict
  77. Dictionary with nodes as keys and the effective size of the node as values.
  78. Notes
  79. -----
  80. Isolated nodes, including nodes which only have self-loop edges, do not
  81. have a well-defined effective size::
  82. >>> G = nx.path_graph(3)
  83. >>> G.add_edge(4, 4)
  84. >>> nx.effective_size(G)
  85. {0: 1.0, 1: 2.0, 2: 1.0, 4: nan}
  86. Burt also defined the related concept of *efficiency* of a node's ego
  87. network, which is its effective size divided by the degree of that
  88. node [1]_. So you can easily compute efficiency:
  89. >>> G = nx.DiGraph()
  90. >>> G.add_edges_from([(0, 1), (0, 2), (1, 0), (2, 1)])
  91. >>> esize = nx.effective_size(G)
  92. >>> efficiency = {n: v / G.degree(n) for n, v in esize.items()}
  93. See also
  94. --------
  95. constraint
  96. References
  97. ----------
  98. .. [1] Burt, Ronald S.
  99. *Structural Holes: The Social Structure of Competition.*
  100. Cambridge: Harvard University Press, 1995.
  101. .. [2] Borgatti, S.
  102. "Structural Holes: Unpacking Burt's Redundancy Measures"
  103. CONNECTIONS 20(1):35-38.
  104. http://www.analytictech.com/connections/v20(1)/holes.htm
  105. """
  106. def redundancy(G, u, v, weight=None):
  107. nmw = normalized_mutual_weight
  108. r = sum(
  109. nmw(G, u, w, weight=weight) * nmw(G, v, w, norm=max, weight=weight)
  110. for w in set(nx.all_neighbors(G, u))
  111. )
  112. return 1 - r
  113. # Check if scipy is available
  114. try:
  115. # Needed for errstate
  116. import numpy as np
  117. # make sure nx.adjacency_matrix will not raise
  118. import scipy as sp
  119. has_scipy = True
  120. except:
  121. has_scipy = False
  122. if nodes is None and has_scipy:
  123. # In order to compute constraint of all nodes,
  124. # algorithms based on sparse matrices can be much faster
  125. # Obtain the adjacency matrix
  126. P = nx.adjacency_matrix(G, weight=weight)
  127. # Calculate mutual weights
  128. mutual_weights1 = P + P.T
  129. mutual_weights2 = mutual_weights1.copy()
  130. with np.errstate(divide="ignore"):
  131. # Mutual_weights1 = Normalize mutual weights by row sums
  132. mutual_weights1 /= mutual_weights1.sum(axis=1)[:, np.newaxis]
  133. # Mutual_weights2 = Normalize mutual weights by row max
  134. mutual_weights2 /= mutual_weights2.max(axis=1).toarray()
  135. # Calculate effective sizes
  136. r = 1 - (mutual_weights1 @ mutual_weights2.T).toarray()
  137. effective_size = ((mutual_weights1 > 0) * r).sum(axis=1)
  138. # Special treatment: isolated nodes (ignoring selfloops) marked with "nan"
  139. sum_mutual_weights = mutual_weights1.sum(axis=1) - mutual_weights1.diagonal()
  140. isolated_nodes = sum_mutual_weights == 0
  141. effective_size[isolated_nodes] = float("nan")
  142. # Use tolist() to automatically convert numpy scalars -> Python scalars
  143. return dict(zip(G, effective_size.tolist()))
  144. # Results for only requested nodes
  145. effective_size = {}
  146. if nodes is None:
  147. nodes = G
  148. # Use Borgatti's simplified formula for unweighted and undirected graphs
  149. if not G.is_directed() and weight is None:
  150. for v in nodes:
  151. # Effective size is not defined for isolated nodes, including nodes
  152. # with only self-edges
  153. if all(u == v for u in G[v]):
  154. effective_size[v] = float("nan")
  155. continue
  156. E = nx.ego_graph(G, v, center=False, undirected=True)
  157. effective_size[v] = len(E) - (2 * E.size()) / len(E)
  158. else:
  159. for v in nodes:
  160. # Effective size is not defined for isolated nodes, including nodes
  161. # with only self-edges
  162. if all(u == v for u in G[v]):
  163. effective_size[v] = float("nan")
  164. continue
  165. effective_size[v] = sum(
  166. redundancy(G, v, u, weight) for u in set(nx.all_neighbors(G, v))
  167. )
  168. return effective_size
  169. @nx._dispatchable(edge_attrs="weight")
  170. def constraint(G, nodes=None, weight=None):
  171. r"""Returns the constraint on all nodes in the graph ``G``.
  172. The *constraint* is a measure of the extent to which a node *v* is
  173. invested in those nodes that are themselves invested in the
  174. neighbors of *v*. Formally, the *constraint on v*, denoted `c(v)`,
  175. is defined by
  176. .. math::
  177. c(v) = \sum_{w \in N(v) \setminus \{v\}} \ell(v, w)
  178. where $N(v)$ is the subset of the neighbors of `v` that are either
  179. predecessors or successors of `v` and $\ell(v, w)$ is the local
  180. constraint on `v` with respect to `w` [1]_. For the definition of local
  181. constraint, see :func:`local_constraint`.
  182. Parameters
  183. ----------
  184. G : NetworkX graph
  185. The graph containing ``v``. This can be either directed or undirected.
  186. nodes : container, optional
  187. Container of nodes in the graph ``G`` to compute the constraint. If
  188. None, the constraint of every node is computed.
  189. weight : None or string, optional
  190. If None, all edge weights are considered equal.
  191. Otherwise holds the name of the edge attribute used as weight.
  192. Returns
  193. -------
  194. dict
  195. Dictionary with nodes as keys and the constraint on the node as values.
  196. See also
  197. --------
  198. local_constraint
  199. References
  200. ----------
  201. .. [1] Burt, Ronald S.
  202. "Structural holes and good ideas".
  203. American Journal of Sociology (110): 349–399.
  204. """
  205. # Check if scipy is available
  206. try:
  207. # Needed for errstate
  208. import numpy as np
  209. # make sure nx.adjacency_matrix will not raise
  210. import scipy as sp
  211. has_scipy = True
  212. except:
  213. has_scipy = False
  214. if nodes is None and has_scipy:
  215. # In order to compute constraint of all nodes,
  216. # algorithms based on sparse matrices can be much faster
  217. # Obtain the adjacency matrix
  218. P = nx.adjacency_matrix(G, weight=weight)
  219. # Calculate mutual weights
  220. mutual_weights = P + P.T
  221. # Normalize mutual weights by row sums
  222. sum_mutual_weights = mutual_weights.sum(axis=1)
  223. with np.errstate(divide="ignore"):
  224. mutual_weights /= sum_mutual_weights[:, np.newaxis]
  225. # Calculate local constraints and constraints
  226. local_constraints = (mutual_weights + mutual_weights @ mutual_weights) ** 2
  227. constraints = ((mutual_weights > 0) * local_constraints).sum(axis=1)
  228. # Special treatment: isolated nodes marked with "nan"
  229. isolated_nodes = sum_mutual_weights - 2 * mutual_weights.diagonal() == 0
  230. constraints[isolated_nodes] = float("nan")
  231. # Use tolist() to automatically convert numpy scalars -> Python scalars
  232. return dict(zip(G, constraints.tolist()))
  233. # Result for only requested nodes
  234. constraint = {}
  235. if nodes is None:
  236. nodes = G
  237. for v in nodes:
  238. # Constraint is not defined for isolated nodes
  239. if len(G[v]) == 0:
  240. constraint[v] = float("nan")
  241. continue
  242. constraint[v] = sum(
  243. local_constraint(G, v, n, weight) for n in set(nx.all_neighbors(G, v))
  244. )
  245. return constraint
  246. @nx._dispatchable(edge_attrs="weight")
  247. def local_constraint(G, u, v, weight=None):
  248. r"""Returns the local constraint on the node ``u`` with respect to
  249. the node ``v`` in the graph ``G``.
  250. Formally, the *local constraint on u with respect to v*, denoted
  251. $\ell(u, v)$, is defined by
  252. .. math::
  253. \ell(u, v) = \left(p_{uv} + \sum_{w \in N(v)} p_{uw} p_{wv}\right)^2,
  254. where $N(v)$ is the set of neighbors of $v$ and $p_{uv}$ is the
  255. normalized mutual weight of the (directed or undirected) edges
  256. joining $u$ and $v$, for each vertex $u$ and $v$ [1]_. The *mutual
  257. weight* of $u$ and $v$ is the sum of the weights of edges joining
  258. them (edge weights are assumed to be one if the graph is
  259. unweighted).
  260. Parameters
  261. ----------
  262. G : NetworkX graph
  263. The graph containing ``u`` and ``v``. This can be either
  264. directed or undirected.
  265. u : node
  266. A node in the graph ``G``.
  267. v : node
  268. A node in the graph ``G``.
  269. weight : None or string, optional
  270. If None, all edge weights are considered equal.
  271. Otherwise holds the name of the edge attribute used as weight.
  272. Returns
  273. -------
  274. float
  275. The constraint of the node ``v`` in the graph ``G``.
  276. See also
  277. --------
  278. constraint
  279. References
  280. ----------
  281. .. [1] Burt, Ronald S.
  282. "Structural holes and good ideas".
  283. American Journal of Sociology (110): 349–399.
  284. """
  285. nmw = normalized_mutual_weight
  286. direct = nmw(G, u, v, weight=weight)
  287. indirect = sum(
  288. nmw(G, u, w, weight=weight) * nmw(G, w, v, weight=weight)
  289. for w in set(nx.all_neighbors(G, u))
  290. )
  291. return (direct + indirect) ** 2