sparse6.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. # Original author: D. Eppstein, UC Irvine, August 12, 2003.
  2. # The original code at https://www.ics.uci.edu/~eppstein/PADS/ is public domain.
  3. """Functions for reading and writing graphs in the *sparse6* format.
  4. The *sparse6* file format is a space-efficient format for large sparse
  5. graphs. For small graphs or large dense graphs, use the *graph6* file
  6. format.
  7. For more information, see the `sparse6`_ homepage.
  8. .. _sparse6: https://users.cecs.anu.edu.au/~bdm/data/formats.html
  9. """
  10. import networkx as nx
  11. from networkx.exception import NetworkXError
  12. from networkx.readwrite.graph6 import data_to_n, n_to_data
  13. from networkx.utils import not_implemented_for, open_file
  14. __all__ = ["from_sparse6_bytes", "read_sparse6", "to_sparse6_bytes", "write_sparse6"]
  15. def _generate_sparse6_bytes(G, nodes, header):
  16. """Yield bytes in the sparse6 encoding of a graph.
  17. `G` is an undirected simple graph. `nodes` is the list of nodes for
  18. which the node-induced subgraph will be encoded; if `nodes` is the
  19. list of all nodes in the graph, the entire graph will be
  20. encoded. `header` is a Boolean that specifies whether to generate
  21. the header ``b'>>sparse6<<'`` before the remaining data.
  22. This function generates `bytes` objects in the following order:
  23. 1. the header (if requested),
  24. 2. the encoding of the number of nodes,
  25. 3. each character, one-at-a-time, in the encoding of the requested
  26. node-induced subgraph,
  27. 4. a newline character.
  28. This function raises :exc:`ValueError` if the graph is too large for
  29. the graph6 format (that is, greater than ``2 ** 36`` nodes).
  30. """
  31. n = len(G)
  32. if n >= 2**36:
  33. raise ValueError(
  34. "sparse6 is only defined if number of nodes is less than 2 ** 36"
  35. )
  36. if header:
  37. yield b">>sparse6<<"
  38. yield b":"
  39. for d in n_to_data(n):
  40. yield str.encode(chr(d + 63))
  41. k = 1
  42. while 1 << k < n:
  43. k += 1
  44. def enc(x):
  45. """Big endian k-bit encoding of x"""
  46. return [1 if (x & 1 << (k - 1 - i)) else 0 for i in range(k)]
  47. edges = sorted((max(u, v), min(u, v)) for u, v in G.edges())
  48. bits = []
  49. curv = 0
  50. for v, u in edges:
  51. if v == curv: # current vertex edge
  52. bits.append(0)
  53. bits.extend(enc(u))
  54. elif v == curv + 1: # next vertex edge
  55. curv += 1
  56. bits.append(1)
  57. bits.extend(enc(u))
  58. else: # skip to vertex v and then add edge to u
  59. curv = v
  60. bits.append(1)
  61. bits.extend(enc(v))
  62. bits.append(0)
  63. bits.extend(enc(u))
  64. if k < 6 and n == (1 << k) and ((-len(bits)) % 6) >= k and curv < (n - 1):
  65. # Padding special case: small k, n=2^k,
  66. # more than k bits of padding needed,
  67. # current vertex is not (n-1) --
  68. # appending 1111... would add a loop on (n-1)
  69. bits.append(0)
  70. bits.extend([1] * ((-len(bits)) % 6))
  71. else:
  72. bits.extend([1] * ((-len(bits)) % 6))
  73. data = [
  74. (bits[i + 0] << 5)
  75. + (bits[i + 1] << 4)
  76. + (bits[i + 2] << 3)
  77. + (bits[i + 3] << 2)
  78. + (bits[i + 4] << 1)
  79. + (bits[i + 5] << 0)
  80. for i in range(0, len(bits), 6)
  81. ]
  82. for d in data:
  83. yield str.encode(chr(d + 63))
  84. yield b"\n"
  85. @nx._dispatchable(graphs=None, returns_graph=True)
  86. def from_sparse6_bytes(string):
  87. """Read an undirected graph in sparse6 format from string.
  88. Parameters
  89. ----------
  90. string : string
  91. Data in sparse6 format
  92. Returns
  93. -------
  94. G : Graph
  95. Raises
  96. ------
  97. NetworkXError
  98. If the string is unable to be parsed in sparse6 format
  99. Examples
  100. --------
  101. >>> G = nx.from_sparse6_bytes(b":A_")
  102. >>> sorted(G.edges())
  103. [(0, 1), (0, 1), (0, 1)]
  104. See Also
  105. --------
  106. read_sparse6, write_sparse6
  107. References
  108. ----------
  109. .. [1] Sparse6 specification
  110. <https://users.cecs.anu.edu.au/~bdm/data/formats.html>
  111. """
  112. if string.startswith(b">>sparse6<<"):
  113. string = string[11:]
  114. if not string.startswith(b":"):
  115. raise NetworkXError("Expected leading colon in sparse6")
  116. chars = [c - 63 for c in string[1:]]
  117. n, data = data_to_n(chars)
  118. k = 1
  119. while 1 << k < n:
  120. k += 1
  121. def parseData():
  122. """Returns stream of pairs b[i], x[i] for sparse6 format."""
  123. chunks = iter(data)
  124. d = None # partial data word
  125. dLen = 0 # how many unparsed bits are left in d
  126. while 1:
  127. if dLen < 1:
  128. try:
  129. d = next(chunks)
  130. except StopIteration:
  131. return
  132. dLen = 6
  133. dLen -= 1
  134. b = (d >> dLen) & 1 # grab top remaining bit
  135. x = d & ((1 << dLen) - 1) # partially built up value of x
  136. xLen = dLen # how many bits included so far in x
  137. while xLen < k: # now grab full chunks until we have enough
  138. try:
  139. d = next(chunks)
  140. except StopIteration:
  141. return
  142. dLen = 6
  143. x = (x << 6) + d
  144. xLen += 6
  145. x = x >> (xLen - k) # shift back the extra bits
  146. dLen = xLen - k
  147. yield b, x
  148. v = 0
  149. G = nx.MultiGraph()
  150. G.add_nodes_from(range(n))
  151. multigraph = False
  152. for b, x in parseData():
  153. if b == 1:
  154. v += 1
  155. # padding with ones can cause overlarge number here
  156. if x >= n or v >= n:
  157. break
  158. elif x > v:
  159. v = x
  160. else:
  161. if G.has_edge(x, v):
  162. multigraph = True
  163. G.add_edge(x, v)
  164. if not multigraph:
  165. G = nx.Graph(G)
  166. return G
  167. def to_sparse6_bytes(G, nodes=None, header=True):
  168. """Convert an undirected graph to bytes in sparse6 format.
  169. Parameters
  170. ----------
  171. G : Graph (undirected)
  172. nodes: list or iterable
  173. Nodes are labeled 0...n-1 in the order provided. If None the ordering
  174. given by ``G.nodes()`` is used.
  175. header: bool
  176. If True add '>>sparse6<<' bytes to head of data.
  177. Raises
  178. ------
  179. NetworkXNotImplemented
  180. If the graph is directed.
  181. ValueError
  182. If the graph has at least ``2 ** 36`` nodes; the sparse6 format
  183. is only defined for graphs of order less than ``2 ** 36``.
  184. Examples
  185. --------
  186. >>> nx.to_sparse6_bytes(nx.path_graph(2))
  187. b'>>sparse6<<:An\\n'
  188. See Also
  189. --------
  190. to_sparse6_bytes, read_sparse6, write_sparse6_bytes
  191. Notes
  192. -----
  193. The returned bytes end with a newline character.
  194. The format does not support edge or node labels.
  195. References
  196. ----------
  197. .. [1] Graph6 specification
  198. <https://users.cecs.anu.edu.au/~bdm/data/formats.html>
  199. """
  200. if nodes is not None:
  201. G = G.subgraph(nodes)
  202. G = nx.convert_node_labels_to_integers(G, ordering="sorted")
  203. return b"".join(_generate_sparse6_bytes(G, nodes, header))
  204. @open_file(0, mode="rb")
  205. @nx._dispatchable(graphs=None, returns_graph=True)
  206. def read_sparse6(path):
  207. """Read an undirected graph in sparse6 format from path.
  208. Parameters
  209. ----------
  210. path : file or string
  211. Filename or file handle to read.
  212. Filenames ending in .gz or .bz2 will be decompressed.
  213. Returns
  214. -------
  215. G : Graph/Multigraph or list of Graphs/MultiGraphs
  216. If the file contains multiple lines then a list of graphs is returned
  217. Raises
  218. ------
  219. NetworkXError
  220. If the string is unable to be parsed in sparse6 format
  221. Examples
  222. --------
  223. You can read a sparse6 file by giving the path to the file::
  224. >>> import tempfile
  225. >>> with tempfile.NamedTemporaryFile(delete=False) as f:
  226. ... _ = f.write(b">>sparse6<<:An\\n")
  227. ... _ = f.seek(0)
  228. ... G = nx.read_sparse6(f.name)
  229. >>> list(G.edges())
  230. [(0, 1)]
  231. You can also read a sparse6 file by giving an open file-like object::
  232. >>> import tempfile
  233. >>> with tempfile.NamedTemporaryFile() as f:
  234. ... _ = f.write(b">>sparse6<<:An\\n")
  235. ... _ = f.seek(0)
  236. ... G = nx.read_sparse6(f)
  237. >>> list(G.edges())
  238. [(0, 1)]
  239. See Also
  240. --------
  241. read_sparse6, from_sparse6_bytes
  242. References
  243. ----------
  244. .. [1] Sparse6 specification
  245. <https://users.cecs.anu.edu.au/~bdm/data/formats.html>
  246. """
  247. glist = []
  248. for line in path:
  249. line = line.strip()
  250. if not len(line):
  251. continue
  252. glist.append(from_sparse6_bytes(line))
  253. if len(glist) == 1:
  254. return glist[0]
  255. else:
  256. return glist
  257. @not_implemented_for("directed")
  258. @open_file(1, mode="wb")
  259. def write_sparse6(G, path, nodes=None, header=True):
  260. """Write graph G to given path in sparse6 format.
  261. Parameters
  262. ----------
  263. G : Graph (undirected)
  264. path : file or string
  265. File or filename to write.
  266. Filenames ending in .gz or .bz2 will be compressed.
  267. nodes: list or iterable
  268. Nodes are labeled 0...n-1 in the order provided. If None the ordering
  269. given by G.nodes() is used.
  270. header: bool
  271. If True add '>>sparse6<<' string to head of data
  272. Raises
  273. ------
  274. NetworkXError
  275. If the graph is directed
  276. Examples
  277. --------
  278. You can write a sparse6 file by giving the path to the file::
  279. >>> import tempfile
  280. >>> with tempfile.NamedTemporaryFile(delete=False) as f:
  281. ... nx.write_sparse6(nx.path_graph(2), f.name)
  282. ... print(f.read())
  283. b'>>sparse6<<:An\\n'
  284. You can also write a sparse6 file by giving an open file-like object::
  285. >>> with tempfile.NamedTemporaryFile() as f:
  286. ... nx.write_sparse6(nx.path_graph(2), f)
  287. ... _ = f.seek(0)
  288. ... print(f.read())
  289. b'>>sparse6<<:An\\n'
  290. See Also
  291. --------
  292. read_sparse6, from_sparse6_bytes
  293. Notes
  294. -----
  295. The format does not support edge or node labels.
  296. References
  297. ----------
  298. .. [1] Sparse6 specification
  299. <https://users.cecs.anu.edu.au/~bdm/data/formats.html>
  300. """
  301. if nodes is not None:
  302. G = G.subgraph(nodes)
  303. G = nx.convert_node_labels_to_integers(G, ordering="sorted")
  304. for b in _generate_sparse6_bytes(G, nodes, header):
  305. path.write(b)