time_dependent.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. """Time dependent algorithms."""
  2. import networkx as nx
  3. from networkx.utils import not_implemented_for
  4. __all__ = ["cd_index"]
  5. @not_implemented_for("undirected")
  6. @not_implemented_for("multigraph")
  7. @nx._dispatchable(node_attrs={"time": None, "weight": 1})
  8. def cd_index(G, node, time_delta, *, time="time", weight=None):
  9. r"""Compute the CD index for `node` within the graph `G`.
  10. Calculates the CD index for the given node of the graph,
  11. considering only its predecessors who have the `time` attribute
  12. smaller than or equal to the `time` attribute of the `node`
  13. plus `time_delta`.
  14. Parameters
  15. ----------
  16. G : graph
  17. A directed networkx graph whose nodes have `time` attributes and optionally
  18. `weight` attributes (if a weight is not given, it is considered 1).
  19. node : node
  20. The node for which the CD index is calculated.
  21. time_delta : numeric or timedelta
  22. Amount of time after the `time` attribute of the `node`. The value of
  23. `time_delta` must support comparison with the `time` node attribute. For
  24. example, if the `time` attribute of the nodes are `datetime.datetime`
  25. objects, then `time_delta` should be a `datetime.timedelta` object.
  26. time : string (Optional, default is "time")
  27. The name of the node attribute that will be used for the calculations.
  28. weight : string (Optional, default is None)
  29. The name of the node attribute used as weight.
  30. Returns
  31. -------
  32. float
  33. The CD index calculated for the node `node` within the graph `G`.
  34. Raises
  35. ------
  36. NetworkXError
  37. If not all nodes have a `time` attribute or
  38. `time_delta` and `time` attribute types are not compatible or
  39. `n` equals 0.
  40. NetworkXNotImplemented
  41. If `G` is a non-directed graph or a multigraph.
  42. Examples
  43. --------
  44. >>> from datetime import datetime, timedelta
  45. >>> G = nx.DiGraph()
  46. >>> nodes = {
  47. ... 1: {"time": datetime(2015, 1, 1)},
  48. ... 2: {"time": datetime(2012, 1, 1), "weight": 4},
  49. ... 3: {"time": datetime(2010, 1, 1)},
  50. ... 4: {"time": datetime(2008, 1, 1)},
  51. ... 5: {"time": datetime(2014, 1, 1)},
  52. ... }
  53. >>> G.add_nodes_from([(n, nodes[n]) for n in nodes])
  54. >>> edges = [(1, 3), (1, 4), (2, 3), (3, 4), (3, 5)]
  55. >>> G.add_edges_from(edges)
  56. >>> delta = timedelta(days=5 * 365)
  57. >>> nx.cd_index(G, 3, time_delta=delta, time="time")
  58. 0.5
  59. >>> nx.cd_index(G, 3, time_delta=delta, time="time", weight="weight")
  60. 0.12
  61. Integers can also be used for the time values:
  62. >>> node_times = {1: 2015, 2: 2012, 3: 2010, 4: 2008, 5: 2014}
  63. >>> nx.set_node_attributes(G, node_times, "new_time")
  64. >>> nx.cd_index(G, 3, time_delta=4, time="new_time")
  65. 0.5
  66. >>> nx.cd_index(G, 3, time_delta=4, time="new_time", weight="weight")
  67. 0.12
  68. Notes
  69. -----
  70. This method implements the algorithm for calculating the CD index,
  71. as described in the paper by Funk and Owen-Smith [1]_. The CD index
  72. is used in order to check how consolidating or destabilizing a patent
  73. is, hence the nodes of the graph represent patents and the edges show
  74. the citations between these patents. The mathematical model is given
  75. below:
  76. .. math::
  77. CD_{t}=\frac{1}{n_{t}}\sum_{i=1}^{n}\frac{-2f_{it}b_{it}+f_{it}}{w_{it}},
  78. where `f_{it}` equals 1 if `i` cites the focal patent else 0, `b_{it}` equals
  79. 1 if `i` cites any of the focal patents successors else 0, `n_{t}` is the number
  80. of forward citations in `i` and `w_{it}` is a matrix of weight for patent `i`
  81. at time `t`.
  82. The `datetime.timedelta` package can lead to off-by-one issues when converting
  83. from years to days. In the example above `timedelta(days=5 * 365)` looks like
  84. 5 years, but it isn't because of leap year days. So it gives the same result
  85. as `timedelta(days=4 * 365)`. But using `timedelta(days=5 * 365 + 1)` gives
  86. a 5 year delta **for this choice of years** but may not if the 5 year gap has
  87. more than 1 leap year. To avoid these issues, use integers to represent years,
  88. or be very careful when you convert units of time.
  89. References
  90. ----------
  91. .. [1] Funk, Russell J., and Jason Owen-Smith.
  92. "A dynamic network measure of technological change."
  93. Management science 63, no. 3 (2017): 791-817.
  94. http://russellfunk.org/cdindex/static/papers/funk_ms_2017.pdf
  95. """
  96. if not all(time in G.nodes[n] for n in G):
  97. raise nx.NetworkXError("Not all nodes have a 'time' attribute.")
  98. try:
  99. # get target_date
  100. target_date = G.nodes[node][time] + time_delta
  101. # keep the predecessors that existed before the target date
  102. pred = {i for i in G.pred[node] if G.nodes[i][time] <= target_date}
  103. except:
  104. raise nx.NetworkXError(
  105. "Addition and comparison are not supported between 'time_delta' "
  106. "and 'time' types."
  107. )
  108. # -1 if any edge between node's predecessors and node's successors, else 1
  109. b = [-1 if any(j in G[i] for j in G[node]) else 1 for i in pred]
  110. # n is size of the union of the focal node's predecessors and its successors' predecessors
  111. n = len(pred.union(*(G.pred[s].keys() - {node} for s in G[node])))
  112. if n == 0:
  113. raise nx.NetworkXError("The cd index cannot be defined.")
  114. # calculate cd index
  115. if weight is None:
  116. return round(sum(bi for bi in b) / n, 2)
  117. else:
  118. # If a node has the specified weight attribute, its weight is used in the calculation
  119. # otherwise, a weight of 1 is assumed for that node
  120. weights = [G.nodes[i].get(weight, 1) for i in pred]
  121. return round(sum(bi / wt for bi, wt in zip(b, weights)) / n, 2)