edgelist.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. """
  2. **********
  3. Edge Lists
  4. **********
  5. Read and write NetworkX graphs as edge lists.
  6. The multi-line adjacency list format is useful for graphs with nodes
  7. that can be meaningfully represented as strings. With the edgelist
  8. format simple edge data can be stored but node or graph data is not.
  9. There is no way of representing isolated nodes unless the node has a
  10. self-loop edge.
  11. Format
  12. ------
  13. You can read or write three formats of edge lists with these functions.
  14. Node pairs with no data::
  15. 1 2
  16. Python dictionary as data::
  17. 1 2 {'weight':7, 'color':'green'}
  18. Arbitrary data::
  19. 1 2 7 green
  20. """
  21. __all__ = [
  22. "generate_edgelist",
  23. "write_edgelist",
  24. "parse_edgelist",
  25. "read_edgelist",
  26. "read_weighted_edgelist",
  27. "write_weighted_edgelist",
  28. ]
  29. import networkx as nx
  30. from networkx.utils import open_file
  31. def generate_edgelist(G, delimiter=" ", data=True):
  32. """Generate a single line of the graph G in edge list format.
  33. Parameters
  34. ----------
  35. G : NetworkX graph
  36. delimiter : string, optional
  37. Separator for node labels
  38. data : bool or list of keys
  39. If False generate no edge data. If True use a dictionary
  40. representation of edge data. If a list of keys use a list of data
  41. values corresponding to the keys.
  42. Returns
  43. -------
  44. lines : string
  45. Lines of data in adjlist format.
  46. Examples
  47. --------
  48. >>> G = nx.lollipop_graph(4, 3)
  49. >>> G[1][2]["weight"] = 3
  50. >>> G[3][4]["capacity"] = 12
  51. >>> for line in nx.generate_edgelist(G, data=False):
  52. ... print(line)
  53. 0 1
  54. 0 2
  55. 0 3
  56. 1 2
  57. 1 3
  58. 2 3
  59. 3 4
  60. 4 5
  61. 5 6
  62. >>> for line in nx.generate_edgelist(G):
  63. ... print(line)
  64. 0 1 {}
  65. 0 2 {}
  66. 0 3 {}
  67. 1 2 {'weight': 3}
  68. 1 3 {}
  69. 2 3 {}
  70. 3 4 {'capacity': 12}
  71. 4 5 {}
  72. 5 6 {}
  73. >>> for line in nx.generate_edgelist(G, data=["weight"]):
  74. ... print(line)
  75. 0 1
  76. 0 2
  77. 0 3
  78. 1 2 3
  79. 1 3
  80. 2 3
  81. 3 4
  82. 4 5
  83. 5 6
  84. See Also
  85. --------
  86. write_adjlist, read_adjlist
  87. """
  88. if data is True:
  89. for u, v, d in G.edges(data=True):
  90. e = u, v, dict(d)
  91. yield delimiter.join(map(str, e))
  92. elif data is False:
  93. for u, v in G.edges(data=False):
  94. e = u, v
  95. yield delimiter.join(map(str, e))
  96. else:
  97. for u, v, d in G.edges(data=True):
  98. e = [u, v]
  99. try:
  100. e.extend(d[k] for k in data)
  101. except KeyError:
  102. pass # missing data for this edge, should warn?
  103. yield delimiter.join(map(str, e))
  104. @open_file(1, mode="wb")
  105. def write_edgelist(G, path, comments="#", delimiter=" ", data=True, encoding="utf-8"):
  106. """Write graph as a list of edges.
  107. Parameters
  108. ----------
  109. G : graph
  110. A NetworkX graph
  111. path : file or string
  112. File or filename to write. If a file is provided, it must be
  113. opened in 'wb' mode. Filenames ending in .gz or .bz2 will be compressed.
  114. comments : string, optional
  115. The character used to indicate the start of a comment
  116. delimiter : string, optional
  117. The string used to separate values. The default is whitespace.
  118. data : bool or list, optional
  119. If False write no edge data.
  120. If True write a string representation of the edge data dictionary..
  121. If a list (or other iterable) is provided, write the keys specified
  122. in the list.
  123. encoding: string, optional
  124. Specify which encoding to use when writing file.
  125. Examples
  126. --------
  127. >>> G = nx.path_graph(4)
  128. >>> nx.write_edgelist(G, "test.edgelist")
  129. >>> G = nx.path_graph(4)
  130. >>> fh = open("test.edgelist", "wb")
  131. >>> nx.write_edgelist(G, fh)
  132. >>> nx.write_edgelist(G, "test.edgelist.gz")
  133. >>> nx.write_edgelist(G, "test.edgelist_nodata.gz", data=False)
  134. >>> G = nx.Graph()
  135. >>> G.add_edge(1, 2, weight=7, color="red")
  136. >>> nx.write_edgelist(G, "test.edgelist_bigger_nodata", data=False)
  137. >>> nx.write_edgelist(G, "test.edgelist_color", data=["color"])
  138. >>> nx.write_edgelist(G, "test.edgelist_color_weight", data=["color", "weight"])
  139. See Also
  140. --------
  141. read_edgelist
  142. write_weighted_edgelist
  143. """
  144. for line in generate_edgelist(G, delimiter, data):
  145. line += "\n"
  146. path.write(line.encode(encoding))
  147. @nx._dispatchable(graphs=None, returns_graph=True)
  148. def parse_edgelist(
  149. lines, comments="#", delimiter=None, create_using=None, nodetype=None, data=True
  150. ):
  151. """Parse lines of an edge list representation of a graph.
  152. Parameters
  153. ----------
  154. lines : list or iterator of strings
  155. Input data in edgelist format
  156. comments : string, optional
  157. Marker for comment lines. Default is `'#'`. To specify that no character
  158. should be treated as a comment, use ``comments=None``.
  159. delimiter : string, optional
  160. Separator for node labels. Default is `None`, meaning any whitespace.
  161. create_using : NetworkX graph constructor, optional (default=nx.Graph)
  162. Graph type to create. If graph instance, then cleared before populated.
  163. nodetype : Python type, optional
  164. Convert nodes to this type. Default is `None`, meaning no conversion is
  165. performed.
  166. data : bool or list of (label,type) tuples
  167. If `False` generate no edge data or if `True` use a dictionary
  168. representation of edge data or a list tuples specifying dictionary
  169. key names and types for edge data.
  170. Returns
  171. -------
  172. G: NetworkX Graph
  173. The graph corresponding to lines
  174. Examples
  175. --------
  176. Edgelist with no data:
  177. >>> lines = ["1 2", "2 3", "3 4"]
  178. >>> G = nx.parse_edgelist(lines, nodetype=int)
  179. >>> list(G)
  180. [1, 2, 3, 4]
  181. >>> list(G.edges())
  182. [(1, 2), (2, 3), (3, 4)]
  183. Edgelist with data in Python dictionary representation:
  184. >>> lines = ["1 2 {'weight': 3}", "2 3 {'weight': 27}", "3 4 {'weight': 3.0}"]
  185. >>> G = nx.parse_edgelist(lines, nodetype=int)
  186. >>> list(G)
  187. [1, 2, 3, 4]
  188. >>> list(G.edges(data=True))
  189. [(1, 2, {'weight': 3}), (2, 3, {'weight': 27}), (3, 4, {'weight': 3.0})]
  190. Edgelist with data in a list:
  191. >>> lines = ["1 2 3", "2 3 27", "3 4 3.0"]
  192. >>> G = nx.parse_edgelist(lines, nodetype=int, data=(("weight", float),))
  193. >>> list(G)
  194. [1, 2, 3, 4]
  195. >>> list(G.edges(data=True))
  196. [(1, 2, {'weight': 3.0}), (2, 3, {'weight': 27.0}), (3, 4, {'weight': 3.0})]
  197. See Also
  198. --------
  199. read_weighted_edgelist
  200. """
  201. from ast import literal_eval
  202. G = nx.empty_graph(0, create_using)
  203. for line in lines:
  204. if comments is not None:
  205. p = line.find(comments)
  206. if p >= 0:
  207. line = line[:p]
  208. if not line:
  209. continue
  210. # split line, should have 2 or more
  211. s = line.rstrip("\n").split(delimiter)
  212. if len(s) < 2:
  213. continue
  214. u = s.pop(0)
  215. v = s.pop(0)
  216. d = s
  217. if nodetype is not None:
  218. try:
  219. u = nodetype(u)
  220. v = nodetype(v)
  221. except Exception as err:
  222. raise TypeError(
  223. f"Failed to convert nodes {u},{v} to type {nodetype}."
  224. ) from err
  225. if len(d) == 0 or data is False:
  226. # no data or data type specified
  227. edgedata = {}
  228. elif data is True:
  229. # no edge types specified
  230. try: # try to evaluate as dictionary
  231. if delimiter == ",":
  232. edgedata_str = ",".join(d)
  233. else:
  234. edgedata_str = " ".join(d)
  235. edgedata = dict(literal_eval(edgedata_str.strip()))
  236. except Exception as err:
  237. raise TypeError(
  238. f"Failed to convert edge data ({d}) to dictionary."
  239. ) from err
  240. else:
  241. # convert edge data to dictionary with specified keys and type
  242. if len(d) != len(data):
  243. raise IndexError(
  244. f"Edge data {d} and data_keys {data} are not the same length"
  245. )
  246. edgedata = {}
  247. for (edge_key, edge_type), edge_value in zip(data, d):
  248. try:
  249. edge_value = edge_type(edge_value)
  250. except Exception as err:
  251. raise TypeError(
  252. f"Failed to convert {edge_key} data {edge_value} "
  253. f"to type {edge_type}."
  254. ) from err
  255. edgedata.update({edge_key: edge_value})
  256. G.add_edge(u, v, **edgedata)
  257. return G
  258. @open_file(0, mode="rb")
  259. @nx._dispatchable(graphs=None, returns_graph=True)
  260. def read_edgelist(
  261. path,
  262. comments="#",
  263. delimiter=None,
  264. create_using=None,
  265. nodetype=None,
  266. data=True,
  267. edgetype=None,
  268. encoding="utf-8",
  269. ):
  270. """Read a graph from a list of edges.
  271. Parameters
  272. ----------
  273. path : file or string
  274. File or filename to read. If a file is provided, it must be
  275. opened in 'rb' mode.
  276. Filenames ending in .gz or .bz2 will be decompressed.
  277. comments : string, optional
  278. The character used to indicate the start of a comment. To specify that
  279. no character should be treated as a comment, use ``comments=None``.
  280. delimiter : string, optional
  281. The string used to separate values. The default is whitespace.
  282. create_using : NetworkX graph constructor, optional (default=nx.Graph)
  283. Graph type to create. If graph instance, then cleared before populated.
  284. nodetype : int, float, str, Python type, optional
  285. Convert node data from strings to specified type
  286. data : bool or list of (label,type) tuples
  287. Tuples specifying dictionary key names and types for edge data
  288. edgetype : int, float, str, Python type, optional OBSOLETE
  289. Convert edge data from strings to specified type and use as 'weight'
  290. encoding: string, optional
  291. Specify which encoding to use when reading file.
  292. Returns
  293. -------
  294. G : graph
  295. A networkx Graph or other type specified with create_using
  296. Examples
  297. --------
  298. >>> nx.write_edgelist(nx.path_graph(4), "test.edgelist_P4")
  299. >>> G = nx.read_edgelist("test.edgelist_P4")
  300. >>> fh = open("test.edgelist_P4", "rb")
  301. >>> G = nx.read_edgelist(fh)
  302. >>> fh.close()
  303. >>> G = nx.read_edgelist("test.edgelist_P4", nodetype=int)
  304. >>> G = nx.read_edgelist("test.edgelist_P4", create_using=nx.DiGraph)
  305. Edgelist with data in a list:
  306. >>> textline = "1 2 3"
  307. >>> fh = open("test.textline", "w")
  308. >>> d = fh.write(textline)
  309. >>> fh.close()
  310. >>> G = nx.read_edgelist("test.textline", nodetype=int, data=(("weight", float),))
  311. >>> list(G)
  312. [1, 2]
  313. >>> list(G.edges(data=True))
  314. [(1, 2, {'weight': 3.0})]
  315. See parse_edgelist() for more examples of formatting.
  316. See Also
  317. --------
  318. parse_edgelist
  319. write_edgelist
  320. Notes
  321. -----
  322. Since nodes must be hashable, the function nodetype must return hashable
  323. types (e.g. int, float, str, frozenset - or tuples of those, etc.)
  324. """
  325. lines = (line if isinstance(line, str) else line.decode(encoding) for line in path)
  326. return parse_edgelist(
  327. lines,
  328. comments=comments,
  329. delimiter=delimiter,
  330. create_using=create_using,
  331. nodetype=nodetype,
  332. data=data,
  333. )
  334. def write_weighted_edgelist(G, path, comments="#", delimiter=" ", encoding="utf-8"):
  335. """Write graph G as a list of edges with numeric weights.
  336. Parameters
  337. ----------
  338. G : graph
  339. A NetworkX graph
  340. path : file or string
  341. File or filename to write. If a file is provided, it must be
  342. opened in 'wb' mode.
  343. Filenames ending in .gz or .bz2 will be compressed.
  344. comments : string, optional
  345. The character used to indicate the start of a comment
  346. delimiter : string, optional
  347. The string used to separate values. The default is whitespace.
  348. encoding: string, optional
  349. Specify which encoding to use when writing file.
  350. Examples
  351. --------
  352. >>> G = nx.Graph()
  353. >>> G.add_edge(1, 2, weight=7)
  354. >>> nx.write_weighted_edgelist(G, "test.weighted.edgelist")
  355. See Also
  356. --------
  357. read_edgelist
  358. write_edgelist
  359. read_weighted_edgelist
  360. """
  361. write_edgelist(
  362. G,
  363. path,
  364. comments=comments,
  365. delimiter=delimiter,
  366. data=("weight",),
  367. encoding=encoding,
  368. )
  369. @nx._dispatchable(graphs=None, returns_graph=True)
  370. def read_weighted_edgelist(
  371. path,
  372. comments="#",
  373. delimiter=None,
  374. create_using=None,
  375. nodetype=None,
  376. encoding="utf-8",
  377. ):
  378. """Read a graph as list of edges with numeric weights.
  379. Parameters
  380. ----------
  381. path : file or string
  382. File or filename to read. If a file is provided, it must be
  383. opened in 'rb' mode.
  384. Filenames ending in .gz or .bz2 will be decompressed.
  385. comments : string, optional
  386. The character used to indicate the start of a comment.
  387. delimiter : string, optional
  388. The string used to separate values. The default is whitespace.
  389. create_using : NetworkX graph constructor, optional (default=nx.Graph)
  390. Graph type to create. If graph instance, then cleared before populated.
  391. nodetype : int, float, str, Python type, optional
  392. Convert node data from strings to specified type
  393. encoding: string, optional
  394. Specify which encoding to use when reading file.
  395. Returns
  396. -------
  397. G : graph
  398. A networkx Graph or other type specified with create_using
  399. Notes
  400. -----
  401. Since nodes must be hashable, the function nodetype must return hashable
  402. types (e.g. int, float, str, frozenset - or tuples of those, etc.)
  403. Example edgelist file format.
  404. With numeric edge data::
  405. # read with
  406. # >>> G=nx.read_weighted_edgelist(fh)
  407. # source target data
  408. a b 1
  409. a c 3.14159
  410. d e 42
  411. See Also
  412. --------
  413. write_weighted_edgelist
  414. """
  415. return read_edgelist(
  416. path,
  417. comments=comments,
  418. delimiter=delimiter,
  419. create_using=create_using,
  420. nodetype=nodetype,
  421. data=(("weight", float),),
  422. encoding=encoding,
  423. )