graph6.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. # Original author: D. Eppstein, UC Irvine, August 12, 2003.
  2. # The original code at http://www.ics.uci.edu/~eppstein/PADS/ is public domain.
  3. """Functions for reading and writing graphs in the *graph6* format.
  4. The *graph6* file format is suitable for small graphs or large dense
  5. graphs. For large sparse graphs, use the *sparse6* format.
  6. For more information, see the `graph6`_ homepage.
  7. .. _graph6: http://users.cecs.anu.edu.au/~bdm/data/formats.html
  8. """
  9. from itertools import islice
  10. import networkx as nx
  11. from networkx.exception import NetworkXError
  12. from networkx.utils import not_implemented_for, open_file
  13. __all__ = ["from_graph6_bytes", "read_graph6", "to_graph6_bytes", "write_graph6"]
  14. def _generate_graph6_bytes(G, nodes, header):
  15. """Yield bytes in the graph6 encoding of a graph.
  16. `G` is an undirected simple graph. `nodes` is the list of nodes for
  17. which the node-induced subgraph will be encoded; if `nodes` is the
  18. list of all nodes in the graph, the entire graph will be
  19. encoded. `header` is a Boolean that specifies whether to generate
  20. the header ``b'>>graph6<<'`` before the remaining data.
  21. This function generates `bytes` objects in the following order:
  22. 1. the header (if requested),
  23. 2. the encoding of the number of nodes,
  24. 3. each character, one-at-a-time, in the encoding of the requested
  25. node-induced subgraph,
  26. 4. a newline character.
  27. This function raises :exc:`ValueError` if the graph is too large for
  28. the graph6 format (that is, greater than ``2 ** 36`` nodes).
  29. """
  30. n = len(G)
  31. if n >= 2**36:
  32. raise ValueError(
  33. "graph6 is only defined if number of nodes is less than 2 ** 36"
  34. )
  35. if header:
  36. yield b">>graph6<<"
  37. for d in n_to_data(n):
  38. yield str.encode(chr(d + 63))
  39. # This generates the same as `(v in G[u] for u, v in combinations(G, 2))`,
  40. # but in "column-major" order instead of "row-major" order.
  41. bits = (nodes[j] in G[nodes[i]] for j in range(1, n) for i in range(j))
  42. chunk = list(islice(bits, 6))
  43. while chunk:
  44. d = sum(b << 5 - i for i, b in enumerate(chunk))
  45. yield str.encode(chr(d + 63))
  46. chunk = list(islice(bits, 6))
  47. yield b"\n"
  48. @nx._dispatchable(graphs=None, returns_graph=True)
  49. def from_graph6_bytes(bytes_in):
  50. """Read a simple undirected graph in graph6 format from bytes.
  51. Parameters
  52. ----------
  53. bytes_in : bytes
  54. Data in graph6 format
  55. Returns
  56. -------
  57. G : Graph
  58. Raises
  59. ------
  60. NetworkXError
  61. If `bytes_in` is unable to be parsed in graph6 format
  62. ValueError
  63. If any character ``c`` in bytes_in does not satisfy
  64. ``63 <= ord(c) < 127``.
  65. Examples
  66. --------
  67. >>> G = nx.from_graph6_bytes(b"A_")
  68. >>> sorted(G.edges())
  69. [(0, 1)]
  70. Notes
  71. -----
  72. Per the graph6 spec, the header (e.g. ``b'>>graph6<<'``) must not be
  73. followed by a newline character.
  74. See Also
  75. --------
  76. read_graph6, write_graph6
  77. References
  78. ----------
  79. .. [1] Graph6 specification
  80. <http://users.cecs.anu.edu.au/~bdm/data/formats.html>
  81. """
  82. def bits():
  83. """Returns sequence of individual bits from 6-bit-per-value
  84. list of data values."""
  85. for d in data:
  86. for i in [5, 4, 3, 2, 1, 0]:
  87. yield (d >> i) & 1
  88. # Ignore trailing newline
  89. bytes_in = bytes_in.rstrip(b"\n")
  90. if bytes_in.startswith(b">>graph6<<"):
  91. bytes_in = bytes_in[10:]
  92. data = [c - 63 for c in bytes_in]
  93. if any(c > 63 for c in data):
  94. raise ValueError("each input character must be in range(63, 127)")
  95. n, data = data_to_n(data)
  96. nd = (n * (n - 1) // 2 + 5) // 6
  97. if len(data) != nd:
  98. raise NetworkXError(
  99. f"Expected {n * (n - 1) // 2} bits but got {len(data) * 6} in graph6"
  100. )
  101. G = nx.Graph()
  102. G.add_nodes_from(range(n))
  103. for (i, j), b in zip(((i, j) for j in range(1, n) for i in range(j)), bits()):
  104. if b:
  105. G.add_edge(i, j)
  106. return G
  107. @not_implemented_for("directed")
  108. @not_implemented_for("multigraph")
  109. def to_graph6_bytes(G, nodes=None, header=True):
  110. """Convert a simple undirected graph to bytes in graph6 format.
  111. Parameters
  112. ----------
  113. G : Graph (undirected)
  114. nodes: list or iterable
  115. Nodes are labeled 0...n-1 in the order provided. If None the ordering
  116. given by ``G.nodes()`` is used.
  117. header: bool
  118. If True add '>>graph6<<' bytes to head of data.
  119. Raises
  120. ------
  121. NetworkXNotImplemented
  122. If the graph is directed or is a multigraph.
  123. ValueError
  124. If the graph has at least ``2 ** 36`` nodes; the graph6 format
  125. is only defined for graphs of order less than ``2 ** 36``.
  126. Examples
  127. --------
  128. >>> nx.to_graph6_bytes(nx.path_graph(2))
  129. b'>>graph6<<A_\\n'
  130. See Also
  131. --------
  132. from_graph6_bytes, read_graph6, write_graph6_bytes
  133. Notes
  134. -----
  135. The returned bytes end with a newline character.
  136. The format does not support edge or node labels, parallel edges or
  137. self loops. If self loops are present they are silently ignored.
  138. References
  139. ----------
  140. .. [1] Graph6 specification
  141. <http://users.cecs.anu.edu.au/~bdm/data/formats.html>
  142. """
  143. if nodes is not None:
  144. G = G.subgraph(nodes)
  145. H = nx.convert_node_labels_to_integers(G)
  146. nodes = sorted(H.nodes())
  147. return b"".join(_generate_graph6_bytes(H, nodes, header))
  148. @open_file(0, mode="rb")
  149. @nx._dispatchable(graphs=None, returns_graph=True)
  150. def read_graph6(path):
  151. """Read simple undirected graphs in graph6 format from path.
  152. Parameters
  153. ----------
  154. path : file or string
  155. Filename or file handle to read.
  156. Filenames ending in .gz or .bz2 will be decompressed.
  157. Returns
  158. -------
  159. G : Graph or list of Graphs
  160. If the file contains multiple lines then a list of graphs is returned
  161. Raises
  162. ------
  163. NetworkXError
  164. If the string is unable to be parsed in graph6 format
  165. Examples
  166. --------
  167. You can read a graph6 file by giving the path to the file::
  168. >>> import tempfile
  169. >>> with tempfile.NamedTemporaryFile(delete=False) as f:
  170. ... _ = f.write(b">>graph6<<A_\\n")
  171. ... _ = f.seek(0)
  172. ... G = nx.read_graph6(f.name)
  173. >>> list(G.edges())
  174. [(0, 1)]
  175. You can also read a graph6 file by giving an open file-like object::
  176. >>> import tempfile
  177. >>> with tempfile.NamedTemporaryFile() as f:
  178. ... _ = f.write(b">>graph6<<A_\\n")
  179. ... _ = f.seek(0)
  180. ... G = nx.read_graph6(f)
  181. >>> list(G.edges())
  182. [(0, 1)]
  183. See Also
  184. --------
  185. from_graph6_bytes, write_graph6
  186. References
  187. ----------
  188. .. [1] Graph6 specification
  189. <http://users.cecs.anu.edu.au/~bdm/data/formats.html>
  190. """
  191. glist = []
  192. for line in path:
  193. line = line.strip()
  194. if not len(line):
  195. continue
  196. glist.append(from_graph6_bytes(line))
  197. if len(glist) == 1:
  198. return glist[0]
  199. else:
  200. return glist
  201. @not_implemented_for("directed")
  202. @not_implemented_for("multigraph")
  203. @open_file(1, mode="wb")
  204. def write_graph6(G, path, nodes=None, header=True):
  205. """Write a simple undirected graph to a path in graph6 format.
  206. Parameters
  207. ----------
  208. G : Graph (undirected)
  209. path : file or string
  210. File or filename to write.
  211. Filenames ending in .gz or .bz2 will be compressed.
  212. nodes: list or iterable
  213. Nodes are labeled 0...n-1 in the order provided. If None the ordering
  214. given by ``G.nodes()`` is used.
  215. header: bool
  216. If True add '>>graph6<<' string to head of data
  217. Raises
  218. ------
  219. NetworkXNotImplemented
  220. If the graph is directed or is a multigraph.
  221. ValueError
  222. If the graph has at least ``2 ** 36`` nodes; the graph6 format
  223. is only defined for graphs of order less than ``2 ** 36``.
  224. Examples
  225. --------
  226. You can write a graph6 file by giving the path to a file::
  227. >>> import tempfile
  228. >>> with tempfile.NamedTemporaryFile(delete=False) as f:
  229. ... nx.write_graph6(nx.path_graph(2), f.name)
  230. ... _ = f.seek(0)
  231. ... print(f.read())
  232. b'>>graph6<<A_\\n'
  233. See Also
  234. --------
  235. from_graph6_bytes, read_graph6
  236. Notes
  237. -----
  238. The function writes a newline character after writing the encoding
  239. of the graph.
  240. The format does not support edge or node labels, parallel edges or
  241. self loops. If self loops are present they are silently ignored.
  242. References
  243. ----------
  244. .. [1] Graph6 specification
  245. <http://users.cecs.anu.edu.au/~bdm/data/formats.html>
  246. """
  247. return write_graph6_file(G, path, nodes=nodes, header=header)
  248. @not_implemented_for("directed")
  249. @not_implemented_for("multigraph")
  250. def write_graph6_file(G, f, nodes=None, header=True):
  251. """Write a simple undirected graph to a file-like object in graph6 format.
  252. Parameters
  253. ----------
  254. G : Graph (undirected)
  255. f : file-like object
  256. The file to write.
  257. nodes: list or iterable
  258. Nodes are labeled 0...n-1 in the order provided. If None the ordering
  259. given by ``G.nodes()`` is used.
  260. header: bool
  261. If True add '>>graph6<<' string to head of data
  262. Raises
  263. ------
  264. NetworkXNotImplemented
  265. If the graph is directed or is a multigraph.
  266. ValueError
  267. If the graph has at least ``2 ** 36`` nodes; the graph6 format
  268. is only defined for graphs of order less than ``2 ** 36``.
  269. Examples
  270. --------
  271. You can write a graph6 file by giving an open file-like object::
  272. >>> import tempfile
  273. >>> with tempfile.NamedTemporaryFile() as f:
  274. ... nx.write_graph6(nx.path_graph(2), f)
  275. ... _ = f.seek(0)
  276. ... print(f.read())
  277. b'>>graph6<<A_\\n'
  278. See Also
  279. --------
  280. from_graph6_bytes, read_graph6
  281. Notes
  282. -----
  283. The function writes a newline character after writing the encoding
  284. of the graph.
  285. The format does not support edge or node labels, parallel edges or
  286. self loops. If self loops are present they are silently ignored.
  287. References
  288. ----------
  289. .. [1] Graph6 specification
  290. <http://users.cecs.anu.edu.au/~bdm/data/formats.html>
  291. """
  292. if nodes is not None:
  293. G = G.subgraph(nodes)
  294. H = nx.convert_node_labels_to_integers(G)
  295. nodes = sorted(H.nodes())
  296. for b in _generate_graph6_bytes(H, nodes, header):
  297. f.write(b)
  298. def data_to_n(data):
  299. """Read initial one-, four- or eight-unit value from graph6
  300. integer sequence.
  301. Return (value, rest of seq.)"""
  302. if data[0] <= 62:
  303. return data[0], data[1:]
  304. if data[1] <= 62:
  305. return (data[1] << 12) + (data[2] << 6) + data[3], data[4:]
  306. return (
  307. (data[2] << 30)
  308. + (data[3] << 24)
  309. + (data[4] << 18)
  310. + (data[5] << 12)
  311. + (data[6] << 6)
  312. + data[7],
  313. data[8:],
  314. )
  315. def n_to_data(n):
  316. """Convert an integer to one-, four- or eight-unit graph6 sequence.
  317. This function is undefined if `n` is not in ``range(2 ** 36)``.
  318. """
  319. if n <= 62:
  320. return [n]
  321. elif n <= 258047:
  322. return [63, (n >> 12) & 0x3F, (n >> 6) & 0x3F, n & 0x3F]
  323. else: # if n <= 68719476735:
  324. return [
  325. 63,
  326. 63,
  327. (n >> 30) & 0x3F,
  328. (n >> 24) & 0x3F,
  329. (n >> 18) & 0x3F,
  330. (n >> 12) & 0x3F,
  331. (n >> 6) & 0x3F,
  332. n & 0x3F,
  333. ]