gml.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879
  1. """
  2. Read graphs in GML format.
  3. "GML, the Graph Modelling Language, is our proposal for a portable
  4. file format for graphs. GML's key features are portability, simple
  5. syntax, extensibility and flexibility. A GML file consists of a
  6. hierarchical key-value lists. Graphs can be annotated with arbitrary
  7. data structures. The idea for a common file format was born at the
  8. GD'95; this proposal is the outcome of many discussions. GML is the
  9. standard file format in the Graphlet graph editor system. It has been
  10. overtaken and adapted by several other systems for drawing graphs."
  11. GML files are stored using a 7-bit ASCII encoding with any extended
  12. ASCII characters (iso8859-1) appearing as HTML character entities.
  13. You will need to give some thought into how the exported data should
  14. interact with different languages and even different Python versions.
  15. Re-importing from gml is also a concern.
  16. Without specifying a `stringizer`/`destringizer`, the code is capable of
  17. writing `int`/`float`/`str`/`dict`/`list` data as required by the GML
  18. specification. For writing other data types, and for reading data other
  19. than `str` you need to explicitly supply a `stringizer`/`destringizer`.
  20. For additional documentation on the GML file format, please see the
  21. `GML website <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>`_.
  22. Several example graphs in GML format may be found on Mark Newman's
  23. `Network data page <http://www-personal.umich.edu/~mejn/netdata/>`_.
  24. """
  25. import html.entities as htmlentitydefs
  26. import re
  27. from ast import literal_eval
  28. from collections import defaultdict
  29. from enum import Enum
  30. from io import StringIO
  31. from typing import Any, NamedTuple
  32. import networkx as nx
  33. from networkx.exception import NetworkXError
  34. from networkx.utils import open_file
  35. __all__ = ["read_gml", "parse_gml", "generate_gml", "write_gml"]
  36. def escape(text):
  37. """Use XML character references to escape characters.
  38. Use XML character references for unprintable or non-ASCII
  39. characters, double quotes and ampersands in a string
  40. """
  41. def fixup(m):
  42. ch = m.group(0)
  43. return "&#" + str(ord(ch)) + ";"
  44. text = re.sub('[^ -~]|[&"]', fixup, text)
  45. return text if isinstance(text, str) else str(text)
  46. def unescape(text):
  47. """Replace XML character references with the referenced characters"""
  48. def fixup(m):
  49. text = m.group(0)
  50. if text[1] == "#":
  51. # Character reference
  52. if text[2] == "x":
  53. code = int(text[3:-1], 16)
  54. else:
  55. code = int(text[2:-1])
  56. else:
  57. # Named entity
  58. try:
  59. code = htmlentitydefs.name2codepoint[text[1:-1]]
  60. except KeyError:
  61. return text # leave unchanged
  62. try:
  63. return chr(code)
  64. except (ValueError, OverflowError):
  65. return text # leave unchanged
  66. return re.sub("&(?:[0-9A-Za-z]+|#(?:[0-9]+|x[0-9A-Fa-f]+));", fixup, text)
  67. def literal_destringizer(rep):
  68. """Convert a Python literal to the value it represents.
  69. Parameters
  70. ----------
  71. rep : string
  72. A Python literal.
  73. Returns
  74. -------
  75. value : object
  76. The value of the Python literal.
  77. Raises
  78. ------
  79. ValueError
  80. If `rep` is not a Python literal.
  81. """
  82. if isinstance(rep, str):
  83. orig_rep = rep
  84. try:
  85. return literal_eval(rep)
  86. except SyntaxError as err:
  87. raise ValueError(f"{orig_rep!r} is not a valid Python literal") from err
  88. else:
  89. raise ValueError(f"{rep!r} is not a string")
  90. @open_file(0, mode="rb")
  91. @nx._dispatchable(graphs=None, returns_graph=True)
  92. def read_gml(path, label="label", destringizer=None):
  93. """Read graph in GML format from `path`.
  94. Parameters
  95. ----------
  96. path : file or string
  97. Filename or file handle to read.
  98. Filenames ending in .gz or .bz2 will be decompressed.
  99. label : string, optional
  100. If not None, the parsed nodes will be renamed according to node
  101. attributes indicated by `label`. Default value: 'label'.
  102. destringizer : callable, optional
  103. A `destringizer` that recovers values stored as strings in GML. If it
  104. cannot convert a string to a value, a `ValueError` is raised. Default
  105. value : None.
  106. Returns
  107. -------
  108. G : NetworkX graph
  109. The parsed graph.
  110. Raises
  111. ------
  112. NetworkXError
  113. If the input cannot be parsed.
  114. See Also
  115. --------
  116. write_gml, parse_gml
  117. literal_destringizer
  118. Notes
  119. -----
  120. GML files are stored using a 7-bit ASCII encoding with any extended
  121. ASCII characters (iso8859-1) appearing as HTML character entities.
  122. Without specifying a `stringizer`/`destringizer`, the code is capable of
  123. writing `int`/`float`/`str`/`dict`/`list` data as required by the GML
  124. specification. For writing other data types, and for reading data other
  125. than `str` you need to explicitly supply a `stringizer`/`destringizer`.
  126. For additional documentation on the GML file format, please see the
  127. `GML url <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>`_.
  128. See the module docstring :mod:`networkx.readwrite.gml` for more details.
  129. Examples
  130. --------
  131. >>> G = nx.path_graph(4)
  132. >>> nx.write_gml(G, "test_path4.gml")
  133. GML values are interpreted as strings by default:
  134. >>> H = nx.read_gml("test_path4.gml")
  135. >>> H.nodes
  136. NodeView(('0', '1', '2', '3'))
  137. When a `destringizer` is provided, GML values are converted to the provided type.
  138. For example, integer nodes can be recovered as shown below:
  139. >>> J = nx.read_gml("test_path4.gml", destringizer=int)
  140. >>> J.nodes
  141. NodeView((0, 1, 2, 3))
  142. """
  143. def filter_lines(lines):
  144. for line in lines:
  145. try:
  146. line = line.decode("ascii")
  147. except UnicodeDecodeError as err:
  148. raise NetworkXError("input is not ASCII-encoded") from err
  149. if not isinstance(line, str):
  150. lines = str(lines)
  151. if line and line[-1] == "\n":
  152. line = line[:-1]
  153. yield line
  154. G = parse_gml_lines(filter_lines(path), label, destringizer)
  155. return G
  156. @nx._dispatchable(graphs=None, returns_graph=True)
  157. def parse_gml(lines, label="label", destringizer=None):
  158. """Parse GML graph from a string or iterable.
  159. Parameters
  160. ----------
  161. lines : string or iterable of strings
  162. Data in GML format.
  163. label : string, optional
  164. If not None, the parsed nodes will be renamed according to node
  165. attributes indicated by `label`. Default value: 'label'.
  166. destringizer : callable, optional
  167. A `destringizer` that recovers values stored as strings in GML. If it
  168. cannot convert a string to a value, a `ValueError` is raised. Default
  169. value : None.
  170. Returns
  171. -------
  172. G : NetworkX graph
  173. The parsed graph.
  174. Raises
  175. ------
  176. NetworkXError
  177. If the input cannot be parsed.
  178. See Also
  179. --------
  180. write_gml, read_gml
  181. Notes
  182. -----
  183. This stores nested GML attributes as dictionaries in the NetworkX graph,
  184. node, and edge attribute structures.
  185. GML files are stored using a 7-bit ASCII encoding with any extended
  186. ASCII characters (iso8859-1) appearing as HTML character entities.
  187. Without specifying a `stringizer`/`destringizer`, the code is capable of
  188. writing `int`/`float`/`str`/`dict`/`list` data as required by the GML
  189. specification. For writing other data types, and for reading data other
  190. than `str` you need to explicitly supply a `stringizer`/`destringizer`.
  191. For additional documentation on the GML file format, please see the
  192. `GML url <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>`_.
  193. See the module docstring :mod:`networkx.readwrite.gml` for more details.
  194. """
  195. def decode_line(line):
  196. if isinstance(line, bytes):
  197. try:
  198. line.decode("ascii")
  199. except UnicodeDecodeError as err:
  200. raise NetworkXError("input is not ASCII-encoded") from err
  201. if not isinstance(line, str):
  202. line = str(line)
  203. return line
  204. def filter_lines(lines):
  205. if isinstance(lines, str):
  206. lines = decode_line(lines)
  207. lines = lines.splitlines()
  208. yield from lines
  209. else:
  210. for line in lines:
  211. line = decode_line(line)
  212. if line and line[-1] == "\n":
  213. line = line[:-1]
  214. if line.find("\n") != -1:
  215. raise NetworkXError("input line contains newline")
  216. yield line
  217. G = parse_gml_lines(filter_lines(lines), label, destringizer)
  218. return G
  219. class Pattern(Enum):
  220. """encodes the index of each token-matching pattern in `tokenize`."""
  221. KEYS = 0
  222. REALS = 1
  223. INTS = 2
  224. STRINGS = 3
  225. DICT_START = 4
  226. DICT_END = 5
  227. COMMENT_WHITESPACE = 6
  228. class Token(NamedTuple):
  229. category: Pattern
  230. value: Any
  231. line: int
  232. position: int
  233. LIST_START_VALUE = "_networkx_list_start"
  234. def parse_gml_lines(lines, label, destringizer):
  235. """Parse GML `lines` into a graph."""
  236. def tokenize():
  237. patterns = [
  238. r"[A-Za-z][0-9A-Za-z_]*\b", # keys
  239. # reals
  240. r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*|INF)(?:[Ee][+-]?[0-9]+)?",
  241. r"[+-]?[0-9]+", # ints
  242. r'".*?"', # strings
  243. r"\[", # dict start
  244. r"\]", # dict end
  245. r"#.*$|\s+", # comments and whitespaces
  246. ]
  247. tokens = re.compile("|".join(f"({pattern})" for pattern in patterns))
  248. lineno = 0
  249. multilines = [] # entries spread across multiple lines
  250. for line in lines:
  251. pos = 0
  252. # deal with entries spread across multiple lines
  253. #
  254. # should we actually have to deal with escaped "s then do it here
  255. if multilines:
  256. multilines.append(line.strip())
  257. if line[-1] == '"': # closing multiline entry
  258. # multiline entries will be joined by space. cannot
  259. # reintroduce newlines as this will break the tokenizer
  260. line = " ".join(multilines)
  261. multilines = []
  262. else: # continued multiline entry
  263. lineno += 1
  264. continue
  265. else:
  266. if line.count('"') == 1: # opening multiline entry
  267. if line.strip()[0] != '"' and line.strip()[-1] != '"':
  268. # since we expect something like key "value", the " should not be found at ends
  269. # otherwise tokenizer will pick up the formatting mistake.
  270. multilines = [line.rstrip()]
  271. lineno += 1
  272. continue
  273. length = len(line)
  274. while pos < length:
  275. match = tokens.match(line, pos)
  276. if match is None:
  277. m = f"cannot tokenize {line[pos:]} at ({lineno + 1}, {pos + 1})"
  278. raise NetworkXError(m)
  279. for i in range(len(patterns)):
  280. group = match.group(i + 1)
  281. if group is not None:
  282. if i == 0: # keys
  283. value = group.rstrip()
  284. elif i == 1: # reals
  285. value = float(group)
  286. elif i == 2: # ints
  287. value = int(group)
  288. else:
  289. value = group
  290. if i != 6: # comments and whitespaces
  291. yield Token(Pattern(i), value, lineno + 1, pos + 1)
  292. pos += len(group)
  293. break
  294. lineno += 1
  295. yield Token(None, None, lineno + 1, 1) # EOF
  296. def unexpected(curr_token, expected):
  297. category, value, lineno, pos = curr_token
  298. value = repr(value) if value is not None else "EOF"
  299. raise NetworkXError(f"expected {expected}, found {value} at ({lineno}, {pos})")
  300. def consume(curr_token, category, expected):
  301. if curr_token.category == category:
  302. return next(tokens)
  303. unexpected(curr_token, expected)
  304. def parse_kv(curr_token):
  305. dct = defaultdict(list)
  306. while curr_token.category == Pattern.KEYS:
  307. key = curr_token.value
  308. curr_token = next(tokens)
  309. category = curr_token.category
  310. if category == Pattern.REALS or category == Pattern.INTS:
  311. value = curr_token.value
  312. curr_token = next(tokens)
  313. elif category == Pattern.STRINGS:
  314. value = unescape(curr_token.value[1:-1])
  315. if destringizer:
  316. try:
  317. value = destringizer(value)
  318. except ValueError:
  319. pass
  320. # Special handling for empty lists and tuples
  321. if value == "()":
  322. value = ()
  323. if value == "[]":
  324. value = []
  325. curr_token = next(tokens)
  326. elif category == Pattern.DICT_START:
  327. curr_token, value = parse_dict(curr_token)
  328. else:
  329. # Allow for string convertible id and label values
  330. if key in ("id", "label", "source", "target"):
  331. try:
  332. # String convert the token value
  333. value = unescape(str(curr_token.value))
  334. if destringizer:
  335. try:
  336. value = destringizer(value)
  337. except ValueError:
  338. pass
  339. curr_token = next(tokens)
  340. except Exception:
  341. msg = (
  342. "an int, float, string, '[' or string"
  343. + " convertible ASCII value for node id or label"
  344. )
  345. unexpected(curr_token, msg)
  346. # Special handling for nan and infinity. Since the gml language
  347. # defines unquoted strings as keys, the numeric and string branches
  348. # are skipped and we end up in this special branch, so we need to
  349. # convert the current token value to a float for NAN and plain INF.
  350. # +/-INF are handled in the pattern for 'reals' in tokenize(). This
  351. # allows labels and values to be nan or infinity, but not keys.
  352. elif curr_token.value in {"NAN", "INF"}:
  353. value = float(curr_token.value)
  354. curr_token = next(tokens)
  355. else: # Otherwise error out
  356. unexpected(curr_token, "an int, float, string or '['")
  357. dct[key].append(value)
  358. def clean_dict_value(value):
  359. if not isinstance(value, list):
  360. return value
  361. if len(value) == 1:
  362. return value[0]
  363. if value[0] == LIST_START_VALUE:
  364. return value[1:]
  365. return value
  366. dct = {key: clean_dict_value(value) for key, value in dct.items()}
  367. return curr_token, dct
  368. def parse_dict(curr_token):
  369. # dict start
  370. curr_token = consume(curr_token, Pattern.DICT_START, "'['")
  371. # dict contents
  372. curr_token, dct = parse_kv(curr_token)
  373. # dict end
  374. curr_token = consume(curr_token, Pattern.DICT_END, "']'")
  375. return curr_token, dct
  376. def parse_graph():
  377. curr_token, dct = parse_kv(next(tokens))
  378. if curr_token.category is not None: # EOF
  379. unexpected(curr_token, "EOF")
  380. if "graph" not in dct:
  381. raise NetworkXError("input contains no graph")
  382. graph = dct["graph"]
  383. if isinstance(graph, list):
  384. raise NetworkXError("input contains more than one graph")
  385. return graph
  386. tokens = tokenize()
  387. graph = parse_graph()
  388. directed = graph.pop("directed", False)
  389. multigraph = graph.pop("multigraph", False)
  390. if not multigraph:
  391. G = nx.DiGraph() if directed else nx.Graph()
  392. else:
  393. G = nx.MultiDiGraph() if directed else nx.MultiGraph()
  394. graph_attr = {k: v for k, v in graph.items() if k not in ("node", "edge")}
  395. G.graph.update(graph_attr)
  396. def pop_attr(dct, category, attr, i):
  397. try:
  398. return dct.pop(attr)
  399. except KeyError as err:
  400. raise NetworkXError(f"{category} #{i} has no {attr!r} attribute") from err
  401. nodes = graph.get("node", [])
  402. mapping = {}
  403. node_labels = set()
  404. for i, node in enumerate(nodes if isinstance(nodes, list) else [nodes]):
  405. id = pop_attr(node, "node", "id", i)
  406. if id in G:
  407. raise NetworkXError(f"node id {id!r} is duplicated")
  408. if label is not None and label != "id":
  409. node_label = pop_attr(node, "node", label, i)
  410. if node_label in node_labels:
  411. raise NetworkXError(f"node label {node_label!r} is duplicated")
  412. node_labels.add(node_label)
  413. mapping[id] = node_label
  414. G.add_node(id, **node)
  415. edges = graph.get("edge", [])
  416. for i, edge in enumerate(edges if isinstance(edges, list) else [edges]):
  417. source = pop_attr(edge, "edge", "source", i)
  418. target = pop_attr(edge, "edge", "target", i)
  419. if source not in G:
  420. raise NetworkXError(f"edge #{i} has undefined source {source!r}")
  421. if target not in G:
  422. raise NetworkXError(f"edge #{i} has undefined target {target!r}")
  423. if not multigraph:
  424. if not G.has_edge(source, target):
  425. G.add_edge(source, target, **edge)
  426. else:
  427. arrow = "->" if directed else "--"
  428. msg = f"edge #{i} ({source!r}{arrow}{target!r}) is duplicated"
  429. raise nx.NetworkXError(msg)
  430. else:
  431. key = edge.pop("key", None)
  432. if key is not None and G.has_edge(source, target, key):
  433. arrow = "->" if directed else "--"
  434. msg = f"edge #{i} ({source!r}{arrow}{target!r}, {key!r})"
  435. msg2 = 'Hint: If multigraph add "multigraph 1" to file header.'
  436. raise nx.NetworkXError(msg + " is duplicated\n" + msg2)
  437. G.add_edge(source, target, key, **edge)
  438. if label is not None and label != "id":
  439. G = nx.relabel_nodes(G, mapping)
  440. return G
  441. def literal_stringizer(value):
  442. """Convert a `value` to a Python literal in GML representation.
  443. Parameters
  444. ----------
  445. value : object
  446. The `value` to be converted to GML representation.
  447. Returns
  448. -------
  449. rep : string
  450. A double-quoted Python literal representing value. Unprintable
  451. characters are replaced by XML character references.
  452. Raises
  453. ------
  454. ValueError
  455. If `value` cannot be converted to GML.
  456. Notes
  457. -----
  458. The original value can be recovered using the
  459. :func:`networkx.readwrite.gml.literal_destringizer` function.
  460. """
  461. def stringize(value):
  462. if isinstance(value, int | bool) or value is None:
  463. if value is True: # GML uses 1/0 for boolean values.
  464. buf.write(str(1))
  465. elif value is False:
  466. buf.write(str(0))
  467. else:
  468. buf.write(str(value))
  469. elif isinstance(value, str):
  470. text = repr(value)
  471. if text[0] != "u":
  472. try:
  473. value.encode("latin1")
  474. except UnicodeEncodeError:
  475. text = "u" + text
  476. buf.write(text)
  477. elif isinstance(value, float | complex | str | bytes):
  478. buf.write(repr(value))
  479. elif isinstance(value, list):
  480. buf.write("[")
  481. first = True
  482. for item in value:
  483. if not first:
  484. buf.write(",")
  485. else:
  486. first = False
  487. stringize(item)
  488. buf.write("]")
  489. elif isinstance(value, tuple):
  490. if len(value) > 1:
  491. buf.write("(")
  492. first = True
  493. for item in value:
  494. if not first:
  495. buf.write(",")
  496. else:
  497. first = False
  498. stringize(item)
  499. buf.write(")")
  500. elif value:
  501. buf.write("(")
  502. stringize(value[0])
  503. buf.write(",)")
  504. else:
  505. buf.write("()")
  506. elif isinstance(value, dict):
  507. buf.write("{")
  508. first = True
  509. for key, value in value.items():
  510. if not first:
  511. buf.write(",")
  512. else:
  513. first = False
  514. stringize(key)
  515. buf.write(":")
  516. stringize(value)
  517. buf.write("}")
  518. elif isinstance(value, set):
  519. buf.write("{")
  520. first = True
  521. for item in value:
  522. if not first:
  523. buf.write(",")
  524. else:
  525. first = False
  526. stringize(item)
  527. buf.write("}")
  528. else:
  529. msg = f"{value!r} cannot be converted into a Python literal"
  530. raise ValueError(msg)
  531. buf = StringIO()
  532. stringize(value)
  533. return buf.getvalue()
  534. def generate_gml(G, stringizer=None):
  535. r"""Generate a single entry of the graph `G` in GML format.
  536. Parameters
  537. ----------
  538. G : NetworkX graph
  539. The graph to be converted to GML.
  540. stringizer : callable, optional
  541. A `stringizer` which converts non-int/non-float/non-dict values into
  542. strings. If it cannot convert a value into a string, it should raise a
  543. `ValueError` to indicate that. Default value: None.
  544. Returns
  545. -------
  546. lines: generator of strings
  547. Lines of GML data. Newlines are not appended.
  548. Raises
  549. ------
  550. NetworkXError
  551. If `stringizer` cannot convert a value into a string, or the value to
  552. convert is not a string while `stringizer` is None.
  553. See Also
  554. --------
  555. literal_stringizer
  556. Notes
  557. -----
  558. Graph attributes named 'directed', 'multigraph', 'node' or
  559. 'edge', node attributes named 'id' or 'label', edge attributes
  560. named 'source' or 'target' (or 'key' if `G` is a multigraph)
  561. are ignored because these attribute names are used to encode the graph
  562. structure.
  563. GML files are stored using a 7-bit ASCII encoding with any extended
  564. ASCII characters (iso8859-1) appearing as HTML character entities.
  565. Without specifying a `stringizer`/`destringizer`, the code is capable of
  566. writing `int`/`float`/`str`/`dict`/`list` data as required by the GML
  567. specification. For writing other data types, and for reading data other
  568. than `str` you need to explicitly supply a `stringizer`/`destringizer`.
  569. For additional documentation on the GML file format, please see the
  570. `GML url <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>`_.
  571. See the module docstring :mod:`networkx.readwrite.gml` for more details.
  572. Examples
  573. --------
  574. >>> G = nx.Graph()
  575. >>> G.add_node("1")
  576. >>> print("\n".join(nx.generate_gml(G)))
  577. graph [
  578. node [
  579. id 0
  580. label "1"
  581. ]
  582. ]
  583. >>> G = nx.MultiGraph([("a", "b"), ("a", "b")])
  584. >>> print("\n".join(nx.generate_gml(G)))
  585. graph [
  586. multigraph 1
  587. node [
  588. id 0
  589. label "a"
  590. ]
  591. node [
  592. id 1
  593. label "b"
  594. ]
  595. edge [
  596. source 0
  597. target 1
  598. key 0
  599. ]
  600. edge [
  601. source 0
  602. target 1
  603. key 1
  604. ]
  605. ]
  606. """
  607. valid_keys = re.compile("^[A-Za-z][0-9A-Za-z_]*$")
  608. def stringize(key, value, ignored_keys, indent, in_list=False):
  609. if not isinstance(key, str):
  610. raise NetworkXError(f"{key!r} is not a string")
  611. if not valid_keys.match(key):
  612. raise NetworkXError(f"{key!r} is not a valid key")
  613. if not isinstance(key, str):
  614. key = str(key)
  615. if key not in ignored_keys:
  616. if isinstance(value, int | bool):
  617. if key == "label":
  618. yield indent + key + ' "' + str(value) + '"'
  619. elif value is True:
  620. # python bool is an instance of int
  621. yield indent + key + " 1"
  622. elif value is False:
  623. yield indent + key + " 0"
  624. # GML only supports signed 32-bit integers
  625. elif value < -(2**31) or value >= 2**31:
  626. yield indent + key + ' "' + str(value) + '"'
  627. else:
  628. yield indent + key + " " + str(value)
  629. elif isinstance(value, float):
  630. text = repr(value).upper()
  631. # GML matches INF to keys, so prepend + to INF. Use repr(float(*))
  632. # instead of string literal to future proof against changes to repr.
  633. if text == repr(float("inf")).upper():
  634. text = "+" + text
  635. else:
  636. # GML requires that a real literal contain a decimal point, but
  637. # repr may not output a decimal point when the mantissa is
  638. # integral and hence needs fixing.
  639. epos = text.rfind("E")
  640. if epos != -1 and text.find(".", 0, epos) == -1:
  641. text = text[:epos] + "." + text[epos:]
  642. if key == "label":
  643. yield indent + key + ' "' + text + '"'
  644. else:
  645. yield indent + key + " " + text
  646. elif isinstance(value, dict):
  647. yield indent + key + " ["
  648. next_indent = indent + " "
  649. for key, value in value.items():
  650. yield from stringize(key, value, (), next_indent)
  651. yield indent + "]"
  652. elif isinstance(value, tuple) and key == "label":
  653. yield indent + key + f' "({",".join(repr(v) for v in value)})"'
  654. elif isinstance(value, list | tuple) and key != "label" and not in_list:
  655. if len(value) == 0:
  656. yield indent + key + " " + f'"{value!r}"'
  657. if len(value) == 1:
  658. yield indent + key + " " + f'"{LIST_START_VALUE}"'
  659. for val in value:
  660. yield from stringize(key, val, (), indent, True)
  661. else:
  662. if stringizer:
  663. try:
  664. value = stringizer(value)
  665. except ValueError as err:
  666. raise NetworkXError(
  667. f"{value!r} cannot be converted into a string"
  668. ) from err
  669. if not isinstance(value, str):
  670. raise NetworkXError(f"{value!r} is not a string")
  671. yield indent + key + ' "' + escape(value) + '"'
  672. multigraph = G.is_multigraph()
  673. yield "graph ["
  674. # Output graph attributes
  675. if G.is_directed():
  676. yield " directed 1"
  677. if multigraph:
  678. yield " multigraph 1"
  679. ignored_keys = {"directed", "multigraph", "node", "edge"}
  680. for attr, value in G.graph.items():
  681. yield from stringize(attr, value, ignored_keys, " ")
  682. # Output node data
  683. node_id = dict(zip(G, range(len(G))))
  684. ignored_keys = {"id", "label"}
  685. for node, attrs in G.nodes.items():
  686. yield " node ["
  687. yield " id " + str(node_id[node])
  688. yield from stringize("label", node, (), " ")
  689. for attr, value in attrs.items():
  690. yield from stringize(attr, value, ignored_keys, " ")
  691. yield " ]"
  692. # Output edge data
  693. ignored_keys = {"source", "target"}
  694. kwargs = {"data": True}
  695. if multigraph:
  696. ignored_keys.add("key")
  697. kwargs["keys"] = True
  698. for e in G.edges(**kwargs):
  699. yield " edge ["
  700. yield " source " + str(node_id[e[0]])
  701. yield " target " + str(node_id[e[1]])
  702. if multigraph:
  703. yield from stringize("key", e[2], (), " ")
  704. for attr, value in e[-1].items():
  705. yield from stringize(attr, value, ignored_keys, " ")
  706. yield " ]"
  707. yield "]"
  708. @open_file(1, mode="wb")
  709. def write_gml(G, path, stringizer=None):
  710. """Write a graph `G` in GML format to the file or file handle `path`.
  711. Parameters
  712. ----------
  713. G : NetworkX graph
  714. The graph to be converted to GML.
  715. path : string or file
  716. Filename or file handle to write to.
  717. Filenames ending in .gz or .bz2 will be compressed.
  718. stringizer : callable, optional
  719. A `stringizer` which converts non-int/non-float/non-dict values into
  720. strings. If it cannot convert a value into a string, it should raise a
  721. `ValueError` to indicate that. Default value: None.
  722. Raises
  723. ------
  724. NetworkXError
  725. If `stringizer` cannot convert a value into a string, or the value to
  726. convert is not a string while `stringizer` is None.
  727. See Also
  728. --------
  729. read_gml, generate_gml
  730. literal_stringizer
  731. Notes
  732. -----
  733. Graph attributes named 'directed', 'multigraph', 'node' or
  734. 'edge', node attributes named 'id' or 'label', edge attributes
  735. named 'source' or 'target' (or 'key' if `G` is a multigraph)
  736. are ignored because these attribute names are used to encode the graph
  737. structure.
  738. GML files are stored using a 7-bit ASCII encoding with any extended
  739. ASCII characters (iso8859-1) appearing as HTML character entities.
  740. Without specifying a `stringizer`/`destringizer`, the code is capable of
  741. writing `int`/`float`/`str`/`dict`/`list` data as required by the GML
  742. specification. For writing other data types, and for reading data other
  743. than `str` you need to explicitly supply a `stringizer`/`destringizer`.
  744. Note that while we allow non-standard GML to be read from a file, we make
  745. sure to write GML format. In particular, underscores are not allowed in
  746. attribute names.
  747. For additional documentation on the GML file format, please see the
  748. `GML url <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>`_.
  749. See the module docstring :mod:`networkx.readwrite.gml` for more details.
  750. Examples
  751. --------
  752. >>> G = nx.path_graph(5)
  753. >>> nx.write_gml(G, "test_path5.gml")
  754. Filenames ending in .gz or .bz2 will be compressed.
  755. >>> nx.write_gml(G, "test_path5.gml.gz")
  756. """
  757. for line in generate_gml(G, stringizer):
  758. path.write((line + "\n").encode("ascii"))