pajek.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. """
  2. *****
  3. Pajek
  4. *****
  5. Read graphs in Pajek format.
  6. This implementation handles directed and undirected graphs including
  7. those with self loops and parallel edges.
  8. Format
  9. ------
  10. See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
  11. for format information.
  12. """
  13. import warnings
  14. import networkx as nx
  15. from networkx.utils import open_file
  16. __all__ = ["read_pajek", "parse_pajek", "generate_pajek", "write_pajek"]
  17. def generate_pajek(G):
  18. """Generate lines in Pajek graph format.
  19. Parameters
  20. ----------
  21. G : graph
  22. A Networkx graph
  23. References
  24. ----------
  25. See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
  26. for format information.
  27. """
  28. if G.name == "":
  29. name = "NetworkX"
  30. else:
  31. name = G.name
  32. # Apparently many Pajek format readers can't process this line
  33. # So we'll leave it out for now.
  34. # yield '*network %s'%name
  35. # write nodes with attributes
  36. yield f"*vertices {G.order()}"
  37. nodes = list(G)
  38. # make dictionary mapping nodes to integers
  39. nodenumber = dict(zip(nodes, range(1, len(nodes) + 1)))
  40. for n in nodes:
  41. # copy node attributes and pop mandatory attributes
  42. # to avoid duplication.
  43. na = G.nodes.get(n, {}).copy()
  44. x = na.pop("x", 0.0)
  45. y = na.pop("y", 0.0)
  46. try:
  47. id = int(na.pop("id", nodenumber[n]))
  48. except ValueError as err:
  49. err.args += (
  50. (
  51. "Pajek format requires 'id' to be an int()."
  52. " Refer to the 'Relabeling nodes' section."
  53. ),
  54. )
  55. raise
  56. nodenumber[n] = id
  57. shape = na.pop("shape", "ellipse")
  58. s = " ".join(map(make_qstr, (id, n, x, y, shape)))
  59. # only optional attributes are left in na.
  60. for k, v in na.items():
  61. if isinstance(v, str) and v.strip() != "":
  62. s += f" {make_qstr(k)} {make_qstr(v)}"
  63. else:
  64. warnings.warn(
  65. f"Node attribute {k} is not processed. {('Empty attribute' if isinstance(v, str) else 'Non-string attribute')}."
  66. )
  67. yield s
  68. # write edges with attributes
  69. if G.is_directed():
  70. yield "*arcs"
  71. else:
  72. yield "*edges"
  73. for u, v, edgedata in G.edges(data=True):
  74. d = edgedata.copy()
  75. value = d.pop("weight", 1.0) # use 1 as default edge value
  76. s = " ".join(map(make_qstr, (nodenumber[u], nodenumber[v], value)))
  77. for k, v in d.items():
  78. if isinstance(v, str) and v.strip() != "":
  79. s += f" {make_qstr(k)} {make_qstr(v)}"
  80. else:
  81. warnings.warn(
  82. f"Edge attribute {k} is not processed. {('Empty attribute' if isinstance(v, str) else 'Non-string attribute')}."
  83. )
  84. yield s
  85. @open_file(1, mode="wb")
  86. def write_pajek(G, path, encoding="UTF-8"):
  87. """Write graph in Pajek format to path.
  88. Parameters
  89. ----------
  90. G : graph
  91. A Networkx graph
  92. path : file or string
  93. File or filename to write.
  94. Filenames ending in .gz or .bz2 will be compressed.
  95. Examples
  96. --------
  97. >>> G = nx.path_graph(4)
  98. >>> nx.write_pajek(G, "test.netP4")
  99. Warnings
  100. --------
  101. Optional node attributes and edge attributes must be non-empty strings.
  102. Otherwise it will not be written into the file. You will need to
  103. convert those attributes to strings if you want to keep them.
  104. References
  105. ----------
  106. See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
  107. for format information.
  108. """
  109. for line in generate_pajek(G):
  110. line += "\n"
  111. path.write(line.encode(encoding))
  112. @open_file(0, mode="rb")
  113. @nx._dispatchable(graphs=None, returns_graph=True)
  114. def read_pajek(path, encoding="UTF-8"):
  115. """Read graph in Pajek format from path.
  116. Parameters
  117. ----------
  118. path : file or string
  119. Filename or file handle to read.
  120. Filenames ending in .gz or .bz2 will be decompressed.
  121. Returns
  122. -------
  123. G : NetworkX MultiGraph or MultiDiGraph.
  124. Examples
  125. --------
  126. >>> G = nx.path_graph(4)
  127. >>> nx.write_pajek(G, "test.net")
  128. >>> G = nx.read_pajek("test.net")
  129. To create a Graph instead of a MultiGraph use
  130. >>> G1 = nx.Graph(G)
  131. References
  132. ----------
  133. See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
  134. for format information.
  135. """
  136. lines = (line.decode(encoding) for line in path)
  137. return parse_pajek(lines)
  138. @nx._dispatchable(graphs=None, returns_graph=True)
  139. def parse_pajek(lines):
  140. """Parse Pajek format graph from string or iterable.
  141. Parameters
  142. ----------
  143. lines : string or iterable
  144. Data in Pajek format.
  145. Returns
  146. -------
  147. G : NetworkX graph
  148. See Also
  149. --------
  150. read_pajek
  151. """
  152. import shlex
  153. # multigraph=False
  154. if isinstance(lines, str):
  155. lines = iter(lines.split("\n"))
  156. lines = iter([line.rstrip("\n") for line in lines])
  157. G = nx.MultiDiGraph() # are multiedges allowed in Pajek? assume yes
  158. labels = [] # in the order of the file, needed for matrix
  159. while lines:
  160. try:
  161. l = next(lines)
  162. except: # EOF
  163. break
  164. if l.lower().startswith("*network"):
  165. try:
  166. label, name = l.split(None, 1)
  167. except ValueError:
  168. # Line was not of the form: *network NAME
  169. pass
  170. else:
  171. G.graph["name"] = name
  172. elif l.lower().startswith("*vertices"):
  173. nodelabels = {}
  174. l, nnodes = l.split()
  175. for i in range(int(nnodes)):
  176. l = next(lines)
  177. try:
  178. splitline = [
  179. x.decode("utf-8") for x in shlex.split(str(l).encode("utf-8"))
  180. ]
  181. except AttributeError:
  182. splitline = shlex.split(str(l))
  183. id, label = splitline[0:2]
  184. labels.append(label)
  185. G.add_node(label)
  186. nodelabels[id] = label
  187. G.nodes[label]["id"] = id
  188. try:
  189. x, y, shape = splitline[2:5]
  190. G.nodes[label].update(
  191. {"x": float(x), "y": float(y), "shape": shape}
  192. )
  193. except:
  194. pass
  195. extra_attr = zip(splitline[5::2], splitline[6::2])
  196. G.nodes[label].update(extra_attr)
  197. elif l.lower().startswith("*edges") or l.lower().startswith("*arcs"):
  198. if l.lower().startswith("*edge"):
  199. # switch from multidigraph to multigraph
  200. G = nx.MultiGraph(G)
  201. if l.lower().startswith("*arcs"):
  202. # switch to directed with multiple arcs for each existing edge
  203. G = G.to_directed()
  204. for l in lines:
  205. try:
  206. splitline = [
  207. x.decode("utf-8") for x in shlex.split(str(l).encode("utf-8"))
  208. ]
  209. except AttributeError:
  210. splitline = shlex.split(str(l))
  211. if len(splitline) < 2:
  212. continue
  213. ui, vi = splitline[0:2]
  214. u = nodelabels.get(ui, ui)
  215. v = nodelabels.get(vi, vi)
  216. # parse the data attached to this edge and put in a dictionary
  217. edge_data = {}
  218. try:
  219. # there should always be a single value on the edge?
  220. w = splitline[2:3]
  221. edge_data.update({"weight": float(w[0])})
  222. except:
  223. pass
  224. # if there isn't, just assign a 1
  225. # edge_data.update({'value':1})
  226. extra_attr = zip(splitline[3::2], splitline[4::2])
  227. edge_data.update(extra_attr)
  228. # if G.has_edge(u,v):
  229. # multigraph=True
  230. G.add_edge(u, v, **edge_data)
  231. elif l.lower().startswith("*matrix"):
  232. G = nx.DiGraph(G)
  233. adj_list = (
  234. (labels[row], labels[col], {"weight": int(data)})
  235. for (row, line) in enumerate(lines)
  236. for (col, data) in enumerate(line.split())
  237. if int(data) != 0
  238. )
  239. G.add_edges_from(adj_list)
  240. return G
  241. def make_qstr(t):
  242. """Returns the string representation of t.
  243. Add outer double-quotes if the string has a space.
  244. """
  245. if not isinstance(t, str):
  246. t = str(t)
  247. if " " in t:
  248. t = f'"{t}"'
  249. return t