p2g.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. """
  2. This module provides the following: read and write of p2g format
  3. used in metabolic pathway studies.
  4. See:
  5. <https://web.archive.org/web/20080626113807/http://www.cs.purdue.edu/homes/koyuturk/pathway/>
  6. for a description.
  7. The summary is included here:
  8. A file that describes a uniquely labeled graph (with extension ".gr")
  9. format looks like the following:
  10. name
  11. 3 4
  12. a
  13. 1 2
  14. b
  15. c
  16. 0 2
  17. "name" is simply a description of what the graph corresponds to. The
  18. second line displays the number of nodes and number of edges,
  19. respectively. This sample graph contains three nodes labeled "a", "b",
  20. and "c". The rest of the graph contains two lines for each node. The
  21. first line for a node contains the node label. After the declaration
  22. of the node label, the out-edges of that node in the graph are
  23. provided. For instance, "a" is linked to nodes 1 and 2, which are
  24. labeled "b" and "c", while the node labeled "b" has no outgoing
  25. edges. Observe that node labeled "c" has an outgoing edge to
  26. itself. Indeed, self-loops are allowed. Node index starts from 0.
  27. """
  28. import networkx as nx
  29. from networkx.utils import open_file
  30. @open_file(1, mode="w")
  31. def write_p2g(G, path, encoding="utf-8"):
  32. """Write NetworkX graph in p2g format.
  33. Notes
  34. -----
  35. This format is meant to be used with directed graphs with
  36. possible self loops.
  37. """
  38. path.write((f"{G.name}\n").encode(encoding))
  39. path.write((f"{G.order()} {G.size()}\n").encode(encoding))
  40. nodes = list(G)
  41. # make dictionary mapping nodes to integers
  42. nodenumber = dict(zip(nodes, range(len(nodes))))
  43. for n in nodes:
  44. path.write((f"{n}\n").encode(encoding))
  45. for nbr in G.neighbors(n):
  46. path.write((f"{nodenumber[nbr]} ").encode(encoding))
  47. path.write("\n".encode(encoding))
  48. @open_file(0, mode="r")
  49. @nx._dispatchable(graphs=None, returns_graph=True)
  50. def read_p2g(path, encoding="utf-8"):
  51. """Read graph in p2g format from path.
  52. Parameters
  53. ----------
  54. path : string or file
  55. Filename or file handle to read.
  56. Filenames ending in .gz or .bz2 will be decompressed.
  57. Returns
  58. -------
  59. MultiDiGraph
  60. Notes
  61. -----
  62. If you want a DiGraph (with no self loops allowed and no edge data)
  63. use D=nx.DiGraph(read_p2g(path))
  64. """
  65. lines = (line.decode(encoding) for line in path)
  66. G = parse_p2g(lines)
  67. return G
  68. @nx._dispatchable(graphs=None, returns_graph=True)
  69. def parse_p2g(lines):
  70. """Parse p2g format graph from string or iterable.
  71. Returns
  72. -------
  73. MultiDiGraph
  74. """
  75. description = next(lines).strip()
  76. # are multiedges (parallel edges) allowed?
  77. G = nx.MultiDiGraph(name=description, selfloops=True)
  78. nnodes, nedges = map(int, next(lines).split())
  79. nodelabel = {}
  80. nbrs = {}
  81. # loop over the nodes keeping track of node labels and out neighbors
  82. # defer adding edges until all node labels are known
  83. for i in range(nnodes):
  84. n = next(lines).strip()
  85. nodelabel[i] = n
  86. G.add_node(n)
  87. nbrs[n] = map(int, next(lines).split())
  88. # now we know all of the node labels so we can add the edges
  89. # with the correct labels
  90. for n in G:
  91. for nbr in nbrs[n]:
  92. G.add_edge(n, nodelabel[nbr])
  93. return G