graphml.py 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053
  1. """
  2. *******
  3. GraphML
  4. *******
  5. Read and write graphs in GraphML format.
  6. .. warning::
  7. This parser uses the standard xml library present in Python, which is
  8. insecure - see :external+python:mod:`xml` for additional information.
  9. Only parse GraphML files you trust.
  10. This implementation does not support mixed graphs (directed and unidirected
  11. edges together), hyperedges, nested graphs, or ports.
  12. "GraphML is a comprehensive and easy-to-use file format for graphs. It
  13. consists of a language core to describe the structural properties of a
  14. graph and a flexible extension mechanism to add application-specific
  15. data. Its main features include support of
  16. * directed, undirected, and mixed graphs,
  17. * hypergraphs,
  18. * hierarchical graphs,
  19. * graphical representations,
  20. * references to external data,
  21. * application-specific attribute data, and
  22. * light-weight parsers.
  23. Unlike many other file formats for graphs, GraphML does not use a
  24. custom syntax. Instead, it is based on XML and hence ideally suited as
  25. a common denominator for all kinds of services generating, archiving,
  26. or processing graphs."
  27. http://graphml.graphdrawing.org/
  28. Format
  29. ------
  30. GraphML is an XML format. See
  31. http://graphml.graphdrawing.org/specification.html for the specification and
  32. http://graphml.graphdrawing.org/primer/graphml-primer.html
  33. for examples.
  34. """
  35. import warnings
  36. from collections import defaultdict
  37. import networkx as nx
  38. from networkx.utils import open_file
  39. __all__ = [
  40. "write_graphml",
  41. "read_graphml",
  42. "generate_graphml",
  43. "write_graphml_xml",
  44. "write_graphml_lxml",
  45. "parse_graphml",
  46. "GraphMLWriter",
  47. "GraphMLReader",
  48. ]
  49. @open_file(1, mode="wb")
  50. def write_graphml_xml(
  51. G,
  52. path,
  53. encoding="utf-8",
  54. prettyprint=True,
  55. infer_numeric_types=False,
  56. named_key_ids=False,
  57. edge_id_from_attribute=None,
  58. ):
  59. """Write G in GraphML XML format to path
  60. Parameters
  61. ----------
  62. G : graph
  63. A networkx graph
  64. path : file or string
  65. File or filename to write.
  66. Filenames ending in .gz or .bz2 will be compressed.
  67. encoding : string (optional)
  68. Encoding for text data.
  69. prettyprint : bool (optional)
  70. If True use line breaks and indenting in output XML.
  71. infer_numeric_types : boolean
  72. Determine if numeric types should be generalized.
  73. For example, if edges have both int and float 'weight' attributes,
  74. we infer in GraphML that both are floats.
  75. named_key_ids : bool (optional)
  76. If True use attr.name as value for key elements' id attribute.
  77. edge_id_from_attribute : dict key (optional)
  78. If provided, the graphml edge id is set by looking up the corresponding
  79. edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data,
  80. the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset.
  81. Examples
  82. --------
  83. >>> G = nx.path_graph(4)
  84. >>> nx.write_graphml(G, "test.graphml")
  85. Notes
  86. -----
  87. This implementation does not support mixed graphs (directed
  88. and unidirected edges together) hyperedges, nested graphs, or ports.
  89. """
  90. writer = GraphMLWriter(
  91. encoding=encoding,
  92. prettyprint=prettyprint,
  93. infer_numeric_types=infer_numeric_types,
  94. named_key_ids=named_key_ids,
  95. edge_id_from_attribute=edge_id_from_attribute,
  96. )
  97. writer.add_graph_element(G)
  98. writer.dump(path)
  99. @open_file(1, mode="wb")
  100. def write_graphml_lxml(
  101. G,
  102. path,
  103. encoding="utf-8",
  104. prettyprint=True,
  105. infer_numeric_types=False,
  106. named_key_ids=False,
  107. edge_id_from_attribute=None,
  108. ):
  109. """Write G in GraphML XML format to path
  110. This function uses the LXML framework and should be faster than
  111. the version using the xml library.
  112. Parameters
  113. ----------
  114. G : graph
  115. A networkx graph
  116. path : file or string
  117. File or filename to write.
  118. Filenames ending in .gz or .bz2 will be compressed.
  119. encoding : string (optional)
  120. Encoding for text data.
  121. prettyprint : bool (optional)
  122. If True use line breaks and indenting in output XML.
  123. infer_numeric_types : boolean
  124. Determine if numeric types should be generalized.
  125. For example, if edges have both int and float 'weight' attributes,
  126. we infer in GraphML that both are floats.
  127. named_key_ids : bool (optional)
  128. If True use attr.name as value for key elements' id attribute.
  129. edge_id_from_attribute : dict key (optional)
  130. If provided, the graphml edge id is set by looking up the corresponding
  131. edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data,
  132. the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset.
  133. Examples
  134. --------
  135. >>> G = nx.path_graph(4)
  136. >>> nx.write_graphml_lxml(G, "fourpath.graphml")
  137. Notes
  138. -----
  139. This implementation does not support mixed graphs (directed
  140. and unidirected edges together) hyperedges, nested graphs, or ports.
  141. """
  142. try:
  143. import lxml.etree as lxmletree
  144. except ImportError:
  145. return write_graphml_xml(
  146. G,
  147. path,
  148. encoding,
  149. prettyprint,
  150. infer_numeric_types,
  151. named_key_ids,
  152. edge_id_from_attribute,
  153. )
  154. writer = GraphMLWriterLxml(
  155. path,
  156. graph=G,
  157. encoding=encoding,
  158. prettyprint=prettyprint,
  159. infer_numeric_types=infer_numeric_types,
  160. named_key_ids=named_key_ids,
  161. edge_id_from_attribute=edge_id_from_attribute,
  162. )
  163. writer.dump()
  164. def generate_graphml(
  165. G,
  166. encoding="utf-8",
  167. prettyprint=True,
  168. named_key_ids=False,
  169. edge_id_from_attribute=None,
  170. ):
  171. """Generate GraphML lines for G
  172. Parameters
  173. ----------
  174. G : graph
  175. A networkx graph
  176. encoding : string (optional)
  177. Encoding for text data.
  178. prettyprint : bool (optional)
  179. If True use line breaks and indenting in output XML.
  180. named_key_ids : bool (optional)
  181. If True use attr.name as value for key elements' id attribute.
  182. edge_id_from_attribute : dict key (optional)
  183. If provided, the graphml edge id is set by looking up the corresponding
  184. edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data,
  185. the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset.
  186. Examples
  187. --------
  188. >>> G = nx.path_graph(4)
  189. >>> linefeed = chr(10) # linefeed = \n
  190. >>> s = linefeed.join(nx.generate_graphml(G))
  191. >>> for line in nx.generate_graphml(G): # doctest: +SKIP
  192. ... print(line)
  193. Notes
  194. -----
  195. This implementation does not support mixed graphs (directed and unidirected
  196. edges together) hyperedges, nested graphs, or ports.
  197. """
  198. writer = GraphMLWriter(
  199. encoding=encoding,
  200. prettyprint=prettyprint,
  201. named_key_ids=named_key_ids,
  202. edge_id_from_attribute=edge_id_from_attribute,
  203. )
  204. writer.add_graph_element(G)
  205. yield from str(writer).splitlines()
  206. @open_file(0, mode="rb")
  207. @nx._dispatchable(graphs=None, returns_graph=True)
  208. def read_graphml(path, node_type=str, edge_key_type=int, force_multigraph=False):
  209. """Read graph in GraphML format from path.
  210. Parameters
  211. ----------
  212. path : file or string
  213. Filename or file handle to read.
  214. Filenames ending in .gz or .bz2 will be decompressed.
  215. node_type: Python type (default: str)
  216. Convert node ids to this type
  217. edge_key_type: Python type (default: int)
  218. Convert graphml edge ids to this type. Multigraphs use id as edge key.
  219. Non-multigraphs add to edge attribute dict with name "id".
  220. force_multigraph : bool (default: False)
  221. If True, return a multigraph with edge keys. If False (the default)
  222. return a multigraph when multiedges are in the graph.
  223. Returns
  224. -------
  225. graph: NetworkX graph
  226. If parallel edges are present or `force_multigraph=True` then
  227. a MultiGraph or MultiDiGraph is returned. Otherwise a Graph/DiGraph.
  228. The returned graph is directed if the file indicates it should be.
  229. Notes
  230. -----
  231. Default node and edge attributes are not propagated to each node and edge.
  232. They can be obtained from `G.graph` and applied to node and edge attributes
  233. if desired using something like this:
  234. >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP
  235. >>> for node, data in G.nodes(data=True): # doctest: +SKIP
  236. ... if "color" not in data:
  237. ... data["color"] = default_color
  238. >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP
  239. >>> for u, v, data in G.edges(data=True): # doctest: +SKIP
  240. ... if "color" not in data:
  241. ... data["color"] = default_color
  242. This implementation does not support mixed graphs (directed and unidirected
  243. edges together), hypergraphs, nested graphs, or ports.
  244. For multigraphs the GraphML edge "id" will be used as the edge
  245. key. If not specified then they "key" attribute will be used. If
  246. there is no "key" attribute a default NetworkX multigraph edge key
  247. will be provided.
  248. Files with the yEd "yfiles" extension can be read. The type of the node's
  249. shape is preserved in the `shape_type` node attribute.
  250. yEd compressed files ("file.graphmlz" extension) can be read by renaming
  251. the file to "file.graphml.gz".
  252. """
  253. reader = GraphMLReader(node_type, edge_key_type, force_multigraph)
  254. # need to check for multiple graphs
  255. glist = list(reader(path=path))
  256. if len(glist) == 0:
  257. # If no graph comes back, try looking for an incomplete header
  258. header = b'<graphml xmlns="http://graphml.graphdrawing.org/xmlns">'
  259. path.seek(0)
  260. old_bytes = path.read()
  261. new_bytes = old_bytes.replace(b"<graphml>", header)
  262. glist = list(reader(string=new_bytes))
  263. if len(glist) == 0:
  264. raise nx.NetworkXError("file not successfully read as graphml")
  265. return glist[0]
  266. @nx._dispatchable(graphs=None, returns_graph=True)
  267. def parse_graphml(
  268. graphml_string, node_type=str, edge_key_type=int, force_multigraph=False
  269. ):
  270. """Read graph in GraphML format from string.
  271. Parameters
  272. ----------
  273. graphml_string : string
  274. String containing graphml information
  275. (e.g., contents of a graphml file).
  276. node_type: Python type (default: str)
  277. Convert node ids to this type
  278. edge_key_type: Python type (default: int)
  279. Convert graphml edge ids to this type. Multigraphs use id as edge key.
  280. Non-multigraphs add to edge attribute dict with name "id".
  281. force_multigraph : bool (default: False)
  282. If True, return a multigraph with edge keys. If False (the default)
  283. return a multigraph when multiedges are in the graph.
  284. Returns
  285. -------
  286. graph: NetworkX graph
  287. If no parallel edges are found a Graph or DiGraph is returned.
  288. Otherwise a MultiGraph or MultiDiGraph is returned.
  289. Examples
  290. --------
  291. >>> G = nx.path_graph(4)
  292. >>> linefeed = chr(10) # linefeed = \n
  293. >>> s = linefeed.join(nx.generate_graphml(G))
  294. >>> H = nx.parse_graphml(s)
  295. Notes
  296. -----
  297. Default node and edge attributes are not propagated to each node and edge.
  298. They can be obtained from `G.graph` and applied to node and edge attributes
  299. if desired using something like this:
  300. >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP
  301. >>> for node, data in G.nodes(data=True): # doctest: +SKIP
  302. ... if "color" not in data:
  303. ... data["color"] = default_color
  304. >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP
  305. >>> for u, v, data in G.edges(data=True): # doctest: +SKIP
  306. ... if "color" not in data:
  307. ... data["color"] = default_color
  308. This implementation does not support mixed graphs (directed and unidirected
  309. edges together), hypergraphs, nested graphs, or ports.
  310. For multigraphs the GraphML edge "id" will be used as the edge
  311. key. If not specified then they "key" attribute will be used. If
  312. there is no "key" attribute a default NetworkX multigraph edge key
  313. will be provided.
  314. """
  315. reader = GraphMLReader(node_type, edge_key_type, force_multigraph)
  316. # need to check for multiple graphs
  317. glist = list(reader(string=graphml_string))
  318. if len(glist) == 0:
  319. # If no graph comes back, try looking for an incomplete header
  320. header = '<graphml xmlns="http://graphml.graphdrawing.org/xmlns">'
  321. new_string = graphml_string.replace("<graphml>", header)
  322. glist = list(reader(string=new_string))
  323. if len(glist) == 0:
  324. raise nx.NetworkXError("file not successfully read as graphml")
  325. return glist[0]
  326. class GraphML:
  327. NS_GRAPHML = "http://graphml.graphdrawing.org/xmlns"
  328. NS_XSI = "http://www.w3.org/2001/XMLSchema-instance"
  329. # xmlns:y="http://www.yworks.com/xml/graphml"
  330. NS_Y = "http://www.yworks.com/xml/graphml"
  331. SCHEMALOCATION = " ".join(
  332. [
  333. "http://graphml.graphdrawing.org/xmlns",
  334. "http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd",
  335. ]
  336. )
  337. def construct_types(self):
  338. types = [
  339. (int, "integer"), # for Gephi GraphML bug
  340. (str, "yfiles"),
  341. (str, "string"),
  342. (int, "int"),
  343. (int, "long"),
  344. (float, "float"),
  345. (float, "double"),
  346. (bool, "boolean"),
  347. ]
  348. # These additions to types allow writing numpy types
  349. try:
  350. import numpy as np
  351. except:
  352. pass
  353. else:
  354. # prepend so that python types are created upon read (last entry wins)
  355. types = [
  356. (np.float64, "float"),
  357. (np.float32, "float"),
  358. (np.float16, "float"),
  359. (np.int_, "int"),
  360. (np.int8, "int"),
  361. (np.int16, "int"),
  362. (np.int32, "int"),
  363. (np.int64, "int"),
  364. (np.uint8, "int"),
  365. (np.uint16, "int"),
  366. (np.uint32, "int"),
  367. (np.uint64, "int"),
  368. (np.int_, "int"),
  369. (np.intc, "int"),
  370. (np.intp, "int"),
  371. ] + types
  372. self.xml_type = dict(types)
  373. self.python_type = dict(reversed(a) for a in types)
  374. # This page says that data types in GraphML follow Java(TM).
  375. # http://graphml.graphdrawing.org/primer/graphml-primer.html#AttributesDefinition
  376. # true and false are the only boolean literals:
  377. # http://en.wikibooks.org/wiki/Java_Programming/Literals#Boolean_Literals
  378. convert_bool = {
  379. # We use data.lower() in actual use.
  380. "true": True,
  381. "false": False,
  382. # Include integer strings for convenience.
  383. "0": False,
  384. 0: False,
  385. "1": True,
  386. 1: True,
  387. }
  388. def get_xml_type(self, key):
  389. """Wrapper around the xml_type dict that raises a more informative
  390. exception message when a user attempts to use data of a type not
  391. supported by GraphML."""
  392. try:
  393. return self.xml_type[key]
  394. except KeyError as err:
  395. raise TypeError(
  396. f"GraphML does not support type {key} as data values."
  397. ) from err
  398. class GraphMLWriter(GraphML):
  399. def __init__(
  400. self,
  401. graph=None,
  402. encoding="utf-8",
  403. prettyprint=True,
  404. infer_numeric_types=False,
  405. named_key_ids=False,
  406. edge_id_from_attribute=None,
  407. ):
  408. self.construct_types()
  409. from xml.etree.ElementTree import Element
  410. self.myElement = Element
  411. self.infer_numeric_types = infer_numeric_types
  412. self.prettyprint = prettyprint
  413. self.named_key_ids = named_key_ids
  414. self.edge_id_from_attribute = edge_id_from_attribute
  415. self.encoding = encoding
  416. self.xml = self.myElement(
  417. "graphml",
  418. {
  419. "xmlns": self.NS_GRAPHML,
  420. "xmlns:xsi": self.NS_XSI,
  421. "xsi:schemaLocation": self.SCHEMALOCATION,
  422. },
  423. )
  424. self.keys = {}
  425. self.attributes = defaultdict(list)
  426. self.attribute_types = defaultdict(set)
  427. if graph is not None:
  428. self.add_graph_element(graph)
  429. def __str__(self):
  430. from xml.etree.ElementTree import tostring
  431. if self.prettyprint:
  432. self.indent(self.xml)
  433. s = tostring(self.xml).decode(self.encoding)
  434. return s
  435. def attr_type(self, name, scope, value):
  436. """Infer the attribute type of data named name. Currently this only
  437. supports inference of numeric types.
  438. If self.infer_numeric_types is false, type is used. Otherwise, pick the
  439. most general of types found across all values with name and scope. This
  440. means edges with data named 'weight' are treated separately from nodes
  441. with data named 'weight'.
  442. """
  443. if self.infer_numeric_types:
  444. types = self.attribute_types[(name, scope)]
  445. if len(types) > 1:
  446. types = {self.get_xml_type(t) for t in types}
  447. if "string" in types:
  448. return str
  449. elif "float" in types or "double" in types:
  450. return float
  451. else:
  452. return int
  453. else:
  454. return list(types)[0]
  455. else:
  456. return type(value)
  457. def get_key(self, name, attr_type, scope, default):
  458. keys_key = (name, attr_type, scope)
  459. try:
  460. return self.keys[keys_key]
  461. except KeyError:
  462. if self.named_key_ids:
  463. new_id = name
  464. else:
  465. new_id = f"d{len(list(self.keys))}"
  466. self.keys[keys_key] = new_id
  467. key_kwargs = {
  468. "id": new_id,
  469. "for": scope,
  470. "attr.name": name,
  471. "attr.type": attr_type,
  472. }
  473. key_element = self.myElement("key", **key_kwargs)
  474. # add subelement for data default value if present
  475. if default is not None:
  476. default_element = self.myElement("default")
  477. default_element.text = str(default)
  478. key_element.append(default_element)
  479. self.xml.insert(0, key_element)
  480. return new_id
  481. def add_data(self, name, element_type, value, scope="all", default=None):
  482. """
  483. Make a data element for an edge or a node. Keep a log of the
  484. type in the keys table.
  485. """
  486. if element_type not in self.xml_type:
  487. raise nx.NetworkXError(
  488. f"GraphML writer does not support {element_type} as data values."
  489. )
  490. keyid = self.get_key(name, self.get_xml_type(element_type), scope, default)
  491. data_element = self.myElement("data", key=keyid)
  492. data_element.text = str(value)
  493. return data_element
  494. def add_attributes(self, scope, xml_obj, data, default):
  495. """Appends attribute data to edges or nodes, and stores type information
  496. to be added later. See add_graph_element.
  497. """
  498. for k, v in data.items():
  499. self.attribute_types[(str(k), scope)].add(type(v))
  500. self.attributes[xml_obj].append([k, v, scope, default.get(k)])
  501. def add_nodes(self, G, graph_element):
  502. default = G.graph.get("node_default", {})
  503. for node, data in G.nodes(data=True):
  504. node_element = self.myElement("node", id=str(node))
  505. self.add_attributes("node", node_element, data, default)
  506. graph_element.append(node_element)
  507. def add_edges(self, G, graph_element):
  508. if G.is_multigraph():
  509. for u, v, key, data in G.edges(data=True, keys=True):
  510. edge_element = self.myElement(
  511. "edge",
  512. source=str(u),
  513. target=str(v),
  514. id=str(data.get(self.edge_id_from_attribute))
  515. if self.edge_id_from_attribute
  516. and self.edge_id_from_attribute in data
  517. else str(key),
  518. )
  519. default = G.graph.get("edge_default", {})
  520. self.add_attributes("edge", edge_element, data, default)
  521. graph_element.append(edge_element)
  522. else:
  523. for u, v, data in G.edges(data=True):
  524. if self.edge_id_from_attribute and self.edge_id_from_attribute in data:
  525. # select attribute to be edge id
  526. edge_element = self.myElement(
  527. "edge",
  528. source=str(u),
  529. target=str(v),
  530. id=str(data.get(self.edge_id_from_attribute)),
  531. )
  532. else:
  533. # default: no edge id
  534. edge_element = self.myElement("edge", source=str(u), target=str(v))
  535. default = G.graph.get("edge_default", {})
  536. self.add_attributes("edge", edge_element, data, default)
  537. graph_element.append(edge_element)
  538. def add_graph_element(self, G):
  539. """
  540. Serialize graph G in GraphML to the stream.
  541. """
  542. if G.is_directed():
  543. default_edge_type = "directed"
  544. else:
  545. default_edge_type = "undirected"
  546. graphid = G.graph.pop("id", None)
  547. if graphid is None:
  548. graph_element = self.myElement("graph", edgedefault=default_edge_type)
  549. else:
  550. graph_element = self.myElement(
  551. "graph", edgedefault=default_edge_type, id=graphid
  552. )
  553. default = {}
  554. data = {
  555. k: v
  556. for (k, v) in G.graph.items()
  557. if k not in ["node_default", "edge_default"]
  558. }
  559. self.add_attributes("graph", graph_element, data, default)
  560. self.add_nodes(G, graph_element)
  561. self.add_edges(G, graph_element)
  562. # self.attributes contains a mapping from XML Objects to a list of
  563. # data that needs to be added to them.
  564. # We postpone processing in order to do type inference/generalization.
  565. # See self.attr_type
  566. for xml_obj, data in self.attributes.items():
  567. for k, v, scope, default in data:
  568. xml_obj.append(
  569. self.add_data(
  570. str(k), self.attr_type(k, scope, v), str(v), scope, default
  571. )
  572. )
  573. self.xml.append(graph_element)
  574. def add_graphs(self, graph_list):
  575. """Add many graphs to this GraphML document."""
  576. for G in graph_list:
  577. self.add_graph_element(G)
  578. def dump(self, stream):
  579. from xml.etree.ElementTree import ElementTree
  580. if self.prettyprint:
  581. self.indent(self.xml)
  582. document = ElementTree(self.xml)
  583. document.write(stream, encoding=self.encoding, xml_declaration=True)
  584. def indent(self, elem, level=0):
  585. # in-place prettyprint formatter
  586. i = "\n" + level * " "
  587. if len(elem):
  588. if not elem.text or not elem.text.strip():
  589. elem.text = i + " "
  590. if not elem.tail or not elem.tail.strip():
  591. elem.tail = i
  592. for elem in elem:
  593. self.indent(elem, level + 1)
  594. if not elem.tail or not elem.tail.strip():
  595. elem.tail = i
  596. else:
  597. if level and (not elem.tail or not elem.tail.strip()):
  598. elem.tail = i
  599. class IncrementalElement:
  600. """Wrapper for _IncrementalWriter providing an Element like interface.
  601. This wrapper does not intend to be a complete implementation but rather to
  602. deal with those calls used in GraphMLWriter.
  603. """
  604. def __init__(self, xml, prettyprint):
  605. self.xml = xml
  606. self.prettyprint = prettyprint
  607. def append(self, element):
  608. self.xml.write(element, pretty_print=self.prettyprint)
  609. class GraphMLWriterLxml(GraphMLWriter):
  610. def __init__(
  611. self,
  612. path,
  613. graph=None,
  614. encoding="utf-8",
  615. prettyprint=True,
  616. infer_numeric_types=False,
  617. named_key_ids=False,
  618. edge_id_from_attribute=None,
  619. ):
  620. self.construct_types()
  621. import lxml.etree as lxmletree
  622. self.myElement = lxmletree.Element
  623. self._encoding = encoding
  624. self._prettyprint = prettyprint
  625. self.named_key_ids = named_key_ids
  626. self.edge_id_from_attribute = edge_id_from_attribute
  627. self.infer_numeric_types = infer_numeric_types
  628. self._xml_base = lxmletree.xmlfile(path, encoding=encoding)
  629. self._xml = self._xml_base.__enter__()
  630. self._xml.write_declaration()
  631. # We need to have a xml variable that support insertion. This call is
  632. # used for adding the keys to the document.
  633. # We will store those keys in a plain list, and then after the graph
  634. # element is closed we will add them to the main graphml element.
  635. self.xml = []
  636. self._keys = self.xml
  637. self._graphml = self._xml.element(
  638. "graphml",
  639. {
  640. "xmlns": self.NS_GRAPHML,
  641. "xmlns:xsi": self.NS_XSI,
  642. "xsi:schemaLocation": self.SCHEMALOCATION,
  643. },
  644. )
  645. self._graphml.__enter__()
  646. self.keys = {}
  647. self.attribute_types = defaultdict(set)
  648. if graph is not None:
  649. self.add_graph_element(graph)
  650. def add_graph_element(self, G):
  651. """
  652. Serialize graph G in GraphML to the stream.
  653. """
  654. if G.is_directed():
  655. default_edge_type = "directed"
  656. else:
  657. default_edge_type = "undirected"
  658. graphid = G.graph.pop("id", None)
  659. if graphid is None:
  660. graph_element = self._xml.element("graph", edgedefault=default_edge_type)
  661. else:
  662. graph_element = self._xml.element(
  663. "graph", edgedefault=default_edge_type, id=graphid
  664. )
  665. # gather attributes types for the whole graph
  666. # to find the most general numeric format needed.
  667. # Then pass through attributes to create key_id for each.
  668. graphdata = {
  669. k: v
  670. for k, v in G.graph.items()
  671. if k not in ("node_default", "edge_default")
  672. }
  673. node_default = G.graph.get("node_default", {})
  674. edge_default = G.graph.get("edge_default", {})
  675. # Graph attributes
  676. for k, v in graphdata.items():
  677. self.attribute_types[(str(k), "graph")].add(type(v))
  678. for k, v in graphdata.items():
  679. element_type = self.get_xml_type(self.attr_type(k, "graph", v))
  680. self.get_key(str(k), element_type, "graph", None)
  681. # Nodes and data
  682. for node, d in G.nodes(data=True):
  683. for k, v in d.items():
  684. self.attribute_types[(str(k), "node")].add(type(v))
  685. for node, d in G.nodes(data=True):
  686. for k, v in d.items():
  687. T = self.get_xml_type(self.attr_type(k, "node", v))
  688. self.get_key(str(k), T, "node", node_default.get(k))
  689. # Edges and data
  690. if G.is_multigraph():
  691. for u, v, ekey, d in G.edges(keys=True, data=True):
  692. for k, v in d.items():
  693. self.attribute_types[(str(k), "edge")].add(type(v))
  694. for u, v, ekey, d in G.edges(keys=True, data=True):
  695. for k, v in d.items():
  696. T = self.get_xml_type(self.attr_type(k, "edge", v))
  697. self.get_key(str(k), T, "edge", edge_default.get(k))
  698. else:
  699. for u, v, d in G.edges(data=True):
  700. for k, v in d.items():
  701. self.attribute_types[(str(k), "edge")].add(type(v))
  702. for u, v, d in G.edges(data=True):
  703. for k, v in d.items():
  704. T = self.get_xml_type(self.attr_type(k, "edge", v))
  705. self.get_key(str(k), T, "edge", edge_default.get(k))
  706. # Now add attribute keys to the xml file
  707. for key in self.xml:
  708. self._xml.write(key, pretty_print=self._prettyprint)
  709. # The incremental_writer writes each node/edge as it is created
  710. incremental_writer = IncrementalElement(self._xml, self._prettyprint)
  711. with graph_element:
  712. self.add_attributes("graph", incremental_writer, graphdata, {})
  713. self.add_nodes(G, incremental_writer) # adds attributes too
  714. self.add_edges(G, incremental_writer) # adds attributes too
  715. def add_attributes(self, scope, xml_obj, data, default):
  716. """Appends attribute data."""
  717. for k, v in data.items():
  718. data_element = self.add_data(
  719. str(k), self.attr_type(str(k), scope, v), str(v), scope, default.get(k)
  720. )
  721. xml_obj.append(data_element)
  722. def __str__(self):
  723. return object.__str__(self)
  724. def dump(self, stream=None):
  725. self._graphml.__exit__(None, None, None)
  726. self._xml_base.__exit__(None, None, None)
  727. # default is lxml is present.
  728. write_graphml = write_graphml_lxml
  729. class GraphMLReader(GraphML):
  730. """Read a GraphML document. Produces NetworkX graph objects."""
  731. def __init__(self, node_type=str, edge_key_type=int, force_multigraph=False):
  732. self.construct_types()
  733. self.node_type = node_type
  734. self.edge_key_type = edge_key_type
  735. self.multigraph = force_multigraph # If False, test for multiedges
  736. self.edge_ids = {} # dict mapping (u,v) tuples to edge id attributes
  737. def __call__(self, path=None, string=None):
  738. from xml.etree.ElementTree import ElementTree, fromstring
  739. if path is not None:
  740. self.xml = ElementTree(file=path)
  741. elif string is not None:
  742. self.xml = fromstring(string)
  743. else:
  744. raise ValueError("Must specify either 'path' or 'string' as kwarg")
  745. (keys, defaults) = self.find_graphml_keys(self.xml)
  746. for g in self.xml.findall(f"{{{self.NS_GRAPHML}}}graph"):
  747. yield self.make_graph(g, keys, defaults)
  748. def make_graph(self, graph_xml, graphml_keys, defaults, G=None):
  749. # set default graph type
  750. edgedefault = graph_xml.get("edgedefault", None)
  751. if G is None:
  752. if edgedefault == "directed":
  753. G = nx.MultiDiGraph()
  754. else:
  755. G = nx.MultiGraph()
  756. # set defaults for graph attributes
  757. G.graph["node_default"] = {}
  758. G.graph["edge_default"] = {}
  759. for key_id, value in defaults.items():
  760. key_for = graphml_keys[key_id]["for"]
  761. name = graphml_keys[key_id]["name"]
  762. python_type = graphml_keys[key_id]["type"]
  763. if key_for == "node":
  764. G.graph["node_default"].update({name: python_type(value)})
  765. if key_for == "edge":
  766. G.graph["edge_default"].update({name: python_type(value)})
  767. # hyperedges are not supported
  768. hyperedge = graph_xml.find(f"{{{self.NS_GRAPHML}}}hyperedge")
  769. if hyperedge is not None:
  770. raise nx.NetworkXError("GraphML reader doesn't support hyperedges")
  771. # add nodes
  772. for node_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}node"):
  773. self.add_node(G, node_xml, graphml_keys, defaults)
  774. # add edges
  775. for edge_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}edge"):
  776. self.add_edge(G, edge_xml, graphml_keys)
  777. # add graph data
  778. data = self.decode_data_elements(graphml_keys, graph_xml)
  779. G.graph.update(data)
  780. # switch to Graph or DiGraph if no parallel edges were found
  781. if self.multigraph:
  782. return G
  783. G = nx.DiGraph(G) if G.is_directed() else nx.Graph(G)
  784. # add explicit edge "id" from file as attribute in NX graph.
  785. nx.set_edge_attributes(G, values=self.edge_ids, name="id")
  786. return G
  787. def add_node(self, G, node_xml, graphml_keys, defaults):
  788. """Add a node to the graph."""
  789. # warn on finding unsupported ports tag
  790. ports = node_xml.find(f"{{{self.NS_GRAPHML}}}port")
  791. if ports is not None:
  792. warnings.warn("GraphML port tag not supported.")
  793. # find the node by id and cast it to the appropriate type
  794. node_id = self.node_type(node_xml.get("id"))
  795. # get data/attributes for node
  796. data = self.decode_data_elements(graphml_keys, node_xml)
  797. G.add_node(node_id, **data)
  798. # get child nodes
  799. if node_xml.attrib.get("yfiles.foldertype") == "group":
  800. graph_xml = node_xml.find(f"{{{self.NS_GRAPHML}}}graph")
  801. self.make_graph(graph_xml, graphml_keys, defaults, G)
  802. def add_edge(self, G, edge_element, graphml_keys):
  803. """Add an edge to the graph."""
  804. # warn on finding unsupported ports tag
  805. ports = edge_element.find(f"{{{self.NS_GRAPHML}}}port")
  806. if ports is not None:
  807. warnings.warn("GraphML port tag not supported.")
  808. # raise error if we find mixed directed and undirected edges
  809. directed = edge_element.get("directed")
  810. if G.is_directed() and directed == "false":
  811. msg = "directed=false edge found in directed graph."
  812. raise nx.NetworkXError(msg)
  813. if (not G.is_directed()) and directed == "true":
  814. msg = "directed=true edge found in undirected graph."
  815. raise nx.NetworkXError(msg)
  816. source = self.node_type(edge_element.get("source"))
  817. target = self.node_type(edge_element.get("target"))
  818. data = self.decode_data_elements(graphml_keys, edge_element)
  819. # GraphML stores edge ids as an attribute
  820. # NetworkX uses them as keys in multigraphs too if no key
  821. # attribute is specified
  822. edge_id = edge_element.get("id")
  823. if edge_id:
  824. # self.edge_ids is used by `make_graph` method for non-multigraphs
  825. self.edge_ids[source, target] = edge_id
  826. try:
  827. edge_id = self.edge_key_type(edge_id)
  828. except ValueError: # Could not convert.
  829. pass
  830. else:
  831. edge_id = data.get("key")
  832. if G.has_edge(source, target):
  833. # mark this as a multigraph
  834. self.multigraph = True
  835. # Use add_edges_from to avoid error with add_edge when `'key' in data`
  836. # Note there is only one edge here...
  837. G.add_edges_from([(source, target, edge_id, data)])
  838. def decode_data_elements(self, graphml_keys, obj_xml):
  839. """Use the key information to decode the data XML if present."""
  840. data = {}
  841. for data_element in obj_xml.findall(f"{{{self.NS_GRAPHML}}}data"):
  842. key = data_element.get("key")
  843. try:
  844. data_name = graphml_keys[key]["name"]
  845. data_type = graphml_keys[key]["type"]
  846. except KeyError as err:
  847. raise nx.NetworkXError(f"Bad GraphML data: no key {key}") from err
  848. text = data_element.text
  849. # assume anything with subelements is a yfiles extension
  850. if text is not None and len(list(data_element)) == 0:
  851. if data_type is bool:
  852. # Ignore cases.
  853. # http://docs.oracle.com/javase/6/docs/api/java/lang/
  854. # Boolean.html#parseBoolean%28java.lang.String%29
  855. data[data_name] = self.convert_bool[text.lower()]
  856. else:
  857. data[data_name] = data_type(text)
  858. elif len(list(data_element)) > 0:
  859. # Assume yfiles as subelements, try to extract node_label
  860. node_label = None
  861. # set GenericNode's configuration as shape type
  862. gn = data_element.find(f"{{{self.NS_Y}}}GenericNode")
  863. if gn is not None:
  864. data["shape_type"] = gn.get("configuration")
  865. for node_type in ["GenericNode", "ShapeNode", "SVGNode", "ImageNode"]:
  866. pref = f"{{{self.NS_Y}}}{node_type}/{{{self.NS_Y}}}"
  867. geometry = data_element.find(f"{pref}Geometry")
  868. if geometry is not None:
  869. data["x"] = geometry.get("x")
  870. data["y"] = geometry.get("y")
  871. if node_label is None:
  872. node_label = data_element.find(f"{pref}NodeLabel")
  873. shape = data_element.find(f"{pref}Shape")
  874. if shape is not None:
  875. data["shape_type"] = shape.get("type")
  876. if node_label is not None:
  877. data["label"] = node_label.text
  878. # check all the different types of edges available in yEd.
  879. for edge_type in [
  880. "PolyLineEdge",
  881. "SplineEdge",
  882. "QuadCurveEdge",
  883. "BezierEdge",
  884. "ArcEdge",
  885. ]:
  886. pref = f"{{{self.NS_Y}}}{edge_type}/{{{self.NS_Y}}}"
  887. edge_label = data_element.find(f"{pref}EdgeLabel")
  888. if edge_label is not None:
  889. break
  890. if edge_label is not None:
  891. data["label"] = edge_label.text
  892. elif text is None:
  893. data[data_name] = ""
  894. return data
  895. def find_graphml_keys(self, graph_element):
  896. """Extracts all the keys and key defaults from the xml."""
  897. graphml_keys = {}
  898. graphml_key_defaults = {}
  899. for k in graph_element.findall(f"{{{self.NS_GRAPHML}}}key"):
  900. attr_id = k.get("id")
  901. attr_type = k.get("attr.type")
  902. attr_name = k.get("attr.name")
  903. yfiles_type = k.get("yfiles.type")
  904. if yfiles_type is not None:
  905. attr_name = yfiles_type
  906. attr_type = "yfiles"
  907. if attr_type is None:
  908. attr_type = "string"
  909. warnings.warn(f"No key type for id {attr_id}. Using string")
  910. if attr_name is None:
  911. raise nx.NetworkXError(f"Unknown key for id {attr_id}.")
  912. graphml_keys[attr_id] = {
  913. "name": attr_name,
  914. "type": self.python_type[attr_type],
  915. "for": k.get("for"),
  916. }
  917. # check for "default" sub-element of key element
  918. default = k.find(f"{{{self.NS_GRAPHML}}}default")
  919. if default is not None:
  920. # Handle default values identically to data element values
  921. python_type = graphml_keys[attr_id]["type"]
  922. if python_type is bool:
  923. graphml_key_defaults[attr_id] = self.convert_bool[
  924. default.text.lower()
  925. ]
  926. else:
  927. graphml_key_defaults[attr_id] = python_type(default.text)
  928. return graphml_keys, graphml_key_defaults