| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314 |
- """Functions to convert NetworkX graphs to and from common data containers
- like numpy arrays, scipy sparse arrays, and pandas DataFrames.
- The preferred way of converting data to a NetworkX graph is through the
- graph constructor. The constructor calls the `~networkx.convert.to_networkx_graph`
- function which attempts to guess the input type and convert it automatically.
- Examples
- --------
- Create a 10 node random graph from a numpy array
- >>> import numpy as np
- >>> rng = np.random.default_rng()
- >>> a = rng.integers(low=0, high=2, size=(10, 10))
- >>> DG = nx.from_numpy_array(a, create_using=nx.DiGraph)
- or equivalently:
- >>> DG = nx.DiGraph(a)
- which calls `from_numpy_array` internally based on the type of ``a``.
- See Also
- --------
- nx_agraph, nx_pydot
- """
- import itertools
- from collections import defaultdict
- import networkx as nx
- __all__ = [
- "from_pandas_adjacency",
- "to_pandas_adjacency",
- "from_pandas_edgelist",
- "to_pandas_edgelist",
- "from_scipy_sparse_array",
- "to_scipy_sparse_array",
- "from_numpy_array",
- "to_numpy_array",
- ]
- @nx._dispatchable(edge_attrs="weight")
- def to_pandas_adjacency(
- G,
- nodelist=None,
- dtype=None,
- order=None,
- multigraph_weight=sum,
- weight="weight",
- nonedge=0.0,
- ):
- """Returns the graph adjacency matrix as a Pandas DataFrame.
- Parameters
- ----------
- G : graph
- The NetworkX graph used to construct the Pandas DataFrame.
- nodelist : list, optional
- The rows and columns are ordered according to the nodes in `nodelist`.
- If `nodelist` is None, then the ordering is produced by G.nodes().
- multigraph_weight : {sum, min, max}, optional
- An operator that determines how weights in multigraphs are handled.
- The default is to sum the weights of the multiple edges.
- weight : string or None, optional
- The edge attribute that holds the numerical value used for
- the edge weight. If an edge does not have that attribute, then the
- value 1 is used instead.
- nonedge : float, optional
- The matrix values corresponding to nonedges are typically set to zero.
- However, this could be undesirable if there are matrix values
- corresponding to actual edges that also have the value zero. If so,
- one might prefer nonedges to have some other value, such as nan.
- Returns
- -------
- df : Pandas DataFrame
- Graph adjacency matrix
- Notes
- -----
- For directed graphs, entry i,j corresponds to an edge from i to j.
- The DataFrame entries are assigned to the weight edge attribute. When
- an edge does not have a weight attribute, the value of the entry is set to
- the number 1. For multiple (parallel) edges, the values of the entries
- are determined by the 'multigraph_weight' parameter. The default is to
- sum the weight attributes for each of the parallel edges.
- When `nodelist` does not contain every node in `G`, the matrix is built
- from the subgraph of `G` that is induced by the nodes in `nodelist`.
- The convention used for self-loop edges in graphs is to assign the
- diagonal matrix entry value to the weight attribute of the edge
- (or the number 1 if the edge has no weight attribute). If the
- alternate convention of doubling the edge weight is desired the
- resulting Pandas DataFrame can be modified as follows::
- >>> import pandas as pd
- >>> G = nx.Graph([(1, 1), (2, 2)])
- >>> df = nx.to_pandas_adjacency(G)
- >>> df
- 1 2
- 1 1.0 0.0
- 2 0.0 1.0
- >>> diag_idx = list(range(len(df)))
- >>> df.iloc[diag_idx, diag_idx] *= 2
- >>> df
- 1 2
- 1 2.0 0.0
- 2 0.0 2.0
- Examples
- --------
- >>> G = nx.MultiDiGraph()
- >>> G.add_edge(0, 1, weight=2)
- 0
- >>> G.add_edge(1, 0)
- 0
- >>> G.add_edge(2, 2, weight=3)
- 0
- >>> G.add_edge(2, 2)
- 1
- >>> nx.to_pandas_adjacency(G, nodelist=[0, 1, 2], dtype=int)
- 0 1 2
- 0 0 2 0
- 1 1 0 0
- 2 0 0 4
- """
- import pandas as pd
- M = to_numpy_array(
- G,
- nodelist=nodelist,
- dtype=dtype,
- order=order,
- multigraph_weight=multigraph_weight,
- weight=weight,
- nonedge=nonedge,
- )
- if nodelist is None:
- nodelist = list(G)
- return pd.DataFrame(data=M, index=nodelist, columns=nodelist)
- @nx._dispatchable(graphs=None, returns_graph=True)
- def from_pandas_adjacency(df, create_using=None):
- r"""Returns a graph from Pandas DataFrame.
- The Pandas DataFrame is interpreted as an adjacency matrix for the graph.
- Parameters
- ----------
- df : Pandas DataFrame
- An adjacency matrix representation of a graph
- create_using : NetworkX graph constructor, optional (default=nx.Graph)
- Graph type to create. If graph instance, then cleared before populated.
- Notes
- -----
- For directed graphs, explicitly mention create_using=nx.DiGraph,
- and entry i,j of df corresponds to an edge from i to j.
- If `df` has a single data type for each entry it will be converted to an
- appropriate Python data type.
- If you have node attributes stored in a separate dataframe `df_nodes`,
- you can load those attributes to the graph `G` using the following code::
- df_nodes = pd.DataFrame({"node_id": [1, 2, 3], "attribute1": ["A", "B", "C"]})
- G.add_nodes_from((n, dict(d)) for n, d in df_nodes.iterrows())
- If `df` has a user-specified compound data type the names
- of the data fields will be used as attribute keys in the resulting
- NetworkX graph.
- See Also
- --------
- to_pandas_adjacency
- Examples
- --------
- Simple integer weights on edges:
- >>> import pandas as pd
- >>> pd.options.display.max_columns = 20
- >>> df = pd.DataFrame([[1, 1], [2, 1]])
- >>> df
- 0 1
- 0 1 1
- 1 2 1
- >>> G = nx.from_pandas_adjacency(df)
- >>> G.name = "Graph from pandas adjacency matrix"
- >>> print(G)
- Graph named 'Graph from pandas adjacency matrix' with 2 nodes and 3 edges
- """
- try:
- df = df[df.index]
- except Exception as err:
- missing = list(set(df.index).difference(set(df.columns)))
- msg = f"{missing} not in columns"
- raise nx.NetworkXError("Columns must match Indices.", msg) from err
- A = df.values
- G = from_numpy_array(A, create_using=create_using, nodelist=df.columns)
- return G
- @nx._dispatchable(preserve_edge_attrs=True)
- def to_pandas_edgelist(
- G,
- source="source",
- target="target",
- nodelist=None,
- dtype=None,
- edge_key=None,
- ):
- """Returns the graph edge list as a Pandas DataFrame.
- Parameters
- ----------
- G : graph
- The NetworkX graph used to construct the Pandas DataFrame.
- source : str or int, optional
- A valid column name (string or integer) for the source nodes (for the
- directed case).
- target : str or int, optional
- A valid column name (string or integer) for the target nodes (for the
- directed case).
- nodelist : list, optional
- Use only nodes specified in nodelist
- dtype : dtype, default None
- Use to create the DataFrame. Data type to force.
- Only a single dtype is allowed. If None, infer.
- edge_key : str or int or None, optional (default=None)
- A valid column name (string or integer) for the edge keys (for the
- multigraph case). If None, edge keys are not stored in the DataFrame.
- Returns
- -------
- df : Pandas DataFrame
- Graph edge list
- Examples
- --------
- >>> G = nx.Graph(
- ... [
- ... ("A", "B", {"cost": 1, "weight": 7}),
- ... ("C", "E", {"cost": 9, "weight": 10}),
- ... ]
- ... )
- >>> df = nx.to_pandas_edgelist(G, nodelist=["A", "C"])
- >>> df[["source", "target", "cost", "weight"]]
- source target cost weight
- 0 A B 1 7
- 1 C E 9 10
- >>> G = nx.MultiGraph([("A", "B", {"cost": 1}), ("A", "B", {"cost": 9})])
- >>> df = nx.to_pandas_edgelist(G, nodelist=["A", "C"], edge_key="ekey")
- >>> df[["source", "target", "cost", "ekey"]]
- source target cost ekey
- 0 A B 1 0
- 1 A B 9 1
- """
- import pandas as pd
- if nodelist is None:
- edgelist = G.edges(data=True)
- else:
- edgelist = G.edges(nodelist, data=True)
- source_nodes = [s for s, _, _ in edgelist]
- target_nodes = [t for _, t, _ in edgelist]
- all_attrs = set().union(*(d.keys() for _, _, d in edgelist))
- if source in all_attrs:
- raise nx.NetworkXError(f"Source name {source!r} is an edge attr name")
- if target in all_attrs:
- raise nx.NetworkXError(f"Target name {target!r} is an edge attr name")
- nan = float("nan")
- edge_attr = {k: [d.get(k, nan) for _, _, d in edgelist] for k in all_attrs}
- if G.is_multigraph() and edge_key is not None:
- if edge_key in all_attrs:
- raise nx.NetworkXError(f"Edge key name {edge_key!r} is an edge attr name")
- edge_keys = [k for _, _, k in G.edges(keys=True)]
- edgelistdict = {source: source_nodes, target: target_nodes, edge_key: edge_keys}
- else:
- edgelistdict = {source: source_nodes, target: target_nodes}
- edgelistdict.update(edge_attr)
- return pd.DataFrame(edgelistdict, dtype=dtype)
- @nx._dispatchable(graphs=None, returns_graph=True)
- def from_pandas_edgelist(
- df,
- source="source",
- target="target",
- edge_attr=None,
- create_using=None,
- edge_key=None,
- ):
- """Returns a graph from Pandas DataFrame containing an edge list.
- The Pandas DataFrame should contain at least two columns of node names and
- zero or more columns of edge attributes. Each row will be processed as one
- edge instance.
- Note: This function iterates over DataFrame.values, which is not
- guaranteed to retain the data type across columns in the row. This is only
- a problem if your row is entirely numeric and a mix of ints and floats. In
- that case, all values will be returned as floats. See the
- DataFrame.iterrows documentation for an example.
- Parameters
- ----------
- df : Pandas DataFrame
- An edge list representation of a graph
- source : str or int
- A valid column name (string or integer) for the source nodes (for the
- directed case).
- target : str or int
- A valid column name (string or integer) for the target nodes (for the
- directed case).
- edge_attr : str or int, iterable, True, or None
- A valid column name (str or int) or iterable of column names that are
- used to retrieve items and add them to the graph as edge attributes.
- If `True`, all columns will be added except `source`, `target` and `edge_key`.
- If `None`, no edge attributes are added to the graph.
- create_using : NetworkX graph constructor, optional (default=nx.Graph)
- Graph type to create. If graph instance, then cleared before populated.
- edge_key : str or None, optional (default=None)
- A valid column name for the edge keys (for a MultiGraph). The values in
- this column are used for the edge keys when adding edges if create_using
- is a multigraph.
- Notes
- -----
- If you have node attributes stored in a separate dataframe `df_nodes`,
- you can load those attributes to the graph `G` using the following code::
- df_nodes = pd.DataFrame({"node_id": [1, 2, 3], "attribute1": ["A", "B", "C"]})
- G.add_nodes_from((n, dict(d)) for n, d in df_nodes.iterrows())
- See Also
- --------
- to_pandas_edgelist
- Examples
- --------
- Simple integer weights on edges:
- >>> import pandas as pd
- >>> pd.options.display.max_columns = 20
- >>> import numpy as np
- >>> rng = np.random.RandomState(seed=5)
- >>> ints = rng.randint(1, 11, size=(3, 2))
- >>> a = ["A", "B", "C"]
- >>> b = ["D", "A", "E"]
- >>> df = pd.DataFrame(ints, columns=["weight", "cost"])
- >>> df[0] = a
- >>> df["b"] = b
- >>> df[["weight", "cost", 0, "b"]]
- weight cost 0 b
- 0 4 7 A D
- 1 7 1 B A
- 2 10 9 C E
- >>> G = nx.from_pandas_edgelist(df, 0, "b", ["weight", "cost"])
- >>> G["E"]["C"]["weight"]
- 10
- >>> G["E"]["C"]["cost"]
- 9
- >>> edges = pd.DataFrame(
- ... {
- ... "source": [0, 1, 2],
- ... "target": [2, 2, 3],
- ... "weight": [3, 4, 5],
- ... "color": ["red", "blue", "blue"],
- ... }
- ... )
- >>> G = nx.from_pandas_edgelist(edges, edge_attr=True)
- >>> G[0][2]["color"]
- 'red'
- Build multigraph with custom keys:
- >>> edges = pd.DataFrame(
- ... {
- ... "source": [0, 1, 2, 0],
- ... "target": [2, 2, 3, 2],
- ... "my_edge_key": ["A", "B", "C", "D"],
- ... "weight": [3, 4, 5, 6],
- ... "color": ["red", "blue", "blue", "blue"],
- ... }
- ... )
- >>> G = nx.from_pandas_edgelist(
- ... edges,
- ... edge_key="my_edge_key",
- ... edge_attr=["weight", "color"],
- ... create_using=nx.MultiGraph(),
- ... )
- >>> G[0][2]
- AtlasView({'A': {'weight': 3, 'color': 'red'}, 'D': {'weight': 6, 'color': 'blue'}})
- """
- g = nx.empty_graph(0, create_using)
- if edge_attr is None:
- if g.is_multigraph() and edge_key is not None:
- for u, v, k in zip(df[source], df[target], df[edge_key]):
- g.add_edge(u, v, k)
- else:
- g.add_edges_from(zip(df[source], df[target]))
- return g
- reserved_columns = [source, target]
- if g.is_multigraph() and edge_key is not None:
- reserved_columns.append(edge_key)
- # Additional columns requested
- attr_col_headings = []
- attribute_data = []
- if edge_attr is True:
- attr_col_headings = [c for c in df.columns if c not in reserved_columns]
- elif isinstance(edge_attr, list | tuple):
- attr_col_headings = edge_attr
- else:
- attr_col_headings = [edge_attr]
- if len(attr_col_headings) == 0:
- raise nx.NetworkXError(
- f"Invalid edge_attr argument: No columns found with name: {attr_col_headings}"
- )
- try:
- attribute_data = zip(*[df[col] for col in attr_col_headings])
- except (KeyError, TypeError) as err:
- msg = f"Invalid edge_attr argument: {edge_attr}"
- raise nx.NetworkXError(msg) from err
- if g.is_multigraph():
- # => append the edge keys from the df to the bundled data
- if edge_key is not None:
- try:
- multigraph_edge_keys = df[edge_key]
- attribute_data = zip(attribute_data, multigraph_edge_keys)
- except (KeyError, TypeError) as err:
- msg = f"Invalid edge_key argument: {edge_key}"
- raise nx.NetworkXError(msg) from err
- for s, t, attrs in zip(df[source], df[target], attribute_data):
- if edge_key is not None:
- attrs, multigraph_edge_key = attrs
- key = g.add_edge(s, t, key=multigraph_edge_key)
- else:
- key = g.add_edge(s, t)
- g[s][t][key].update(zip(attr_col_headings, attrs))
- else:
- for s, t, attrs in zip(df[source], df[target], attribute_data):
- g.add_edge(s, t)
- g[s][t].update(zip(attr_col_headings, attrs))
- return g
- @nx._dispatchable(edge_attrs="weight")
- def to_scipy_sparse_array(G, nodelist=None, dtype=None, weight="weight", format="csr"):
- """Returns the graph adjacency matrix as a SciPy sparse array.
- Parameters
- ----------
- G : graph
- The NetworkX graph used to construct the sparse array.
- nodelist : list, optional
- The rows and columns are ordered according to the nodes in `nodelist`.
- If `nodelist` is None, then the ordering is produced by ``G.nodes()``.
- dtype : NumPy data-type, optional
- A valid NumPy dtype used to initialize the array. If None, then the
- NumPy default is used.
- weight : string or None, optional (default='weight')
- The edge attribute that holds the numerical value used for
- the edge weight. If None then all edge weights are 1.
- format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'}
- The format of the sparse array to be returned (default 'csr'). For
- some algorithms different implementations of sparse arrays
- can perform better. See [1]_ for details.
- Returns
- -------
- A : SciPy sparse array
- Graph adjacency matrix.
- Notes
- -----
- For directed graphs, matrix entry ``i, j`` corresponds to an edge from
- ``i`` to ``j``.
- The values of the adjacency matrix are populated using the edge attribute held in
- parameter `weight`. When an edge does not have that attribute, the
- value of the entry is 1.
- For multiple edges the matrix values are the sums of the edge weights.
- When `nodelist` does not contain every node in `G`, the adjacency matrix
- is built from the subgraph of `G` that is induced by the nodes in
- `nodelist`.
- The convention used for self-loop edges in graphs is to assign the
- diagonal matrix entry value to the weight attribute of the edge
- (or the number 1 if the edge has no weight attribute). If the
- alternate convention of doubling the edge weight is desired the
- resulting array can be modified as follows::
- >>> G = nx.Graph([(1, 1)])
- >>> A = nx.to_scipy_sparse_array(G)
- >>> A.toarray()
- array([[1]])
- >>> A.setdiag(A.diagonal() * 2)
- >>> A.toarray()
- array([[2]])
- Examples
- --------
- Basic usage:
- >>> G = nx.path_graph(4)
- >>> A = nx.to_scipy_sparse_array(G)
- >>> A # doctest: +SKIP
- <Compressed Sparse Row sparse array of dtype 'int64'
- with 6 stored elements and shape (4, 4)>
- >>> A.toarray()
- array([[0, 1, 0, 0],
- [1, 0, 1, 0],
- [0, 1, 0, 1],
- [0, 0, 1, 0]])
- .. note:: The `toarray` method is used in these examples to better visualize
- the adjacency matrix. For a dense representation of the adjaceny matrix,
- use `to_numpy_array` instead.
- Directed graphs:
- >>> G = nx.DiGraph([(0, 1), (1, 2), (2, 3)])
- >>> nx.to_scipy_sparse_array(G).toarray()
- array([[0, 1, 0, 0],
- [0, 0, 1, 0],
- [0, 0, 0, 1],
- [0, 0, 0, 0]])
- >>> H = G.reverse()
- >>> H.edges
- OutEdgeView([(1, 0), (2, 1), (3, 2)])
- >>> nx.to_scipy_sparse_array(H).toarray()
- array([[0, 0, 0, 0],
- [1, 0, 0, 0],
- [0, 1, 0, 0],
- [0, 0, 1, 0]])
- By default, the order of the rows/columns of the adjacency matrix is determined
- by the ordering of the nodes in `G`:
- >>> G = nx.Graph()
- >>> G.add_nodes_from([3, 5, 0, 1])
- >>> G.add_edges_from([(1, 3), (1, 5)])
- >>> nx.to_scipy_sparse_array(G).toarray()
- array([[0, 0, 0, 1],
- [0, 0, 0, 1],
- [0, 0, 0, 0],
- [1, 1, 0, 0]])
- The ordering of the rows can be changed with `nodelist`:
- >>> ordered = [0, 1, 3, 5]
- >>> nx.to_scipy_sparse_array(G, nodelist=ordered).toarray()
- array([[0, 0, 0, 0],
- [0, 0, 1, 1],
- [0, 1, 0, 0],
- [0, 1, 0, 0]])
- If `nodelist` contains a subset of the nodes in `G`, the adjacency matrix
- for the node-induced subgraph is produced:
- >>> nx.to_scipy_sparse_array(G, nodelist=[1, 3, 5]).toarray()
- array([[0, 1, 1],
- [1, 0, 0],
- [1, 0, 0]])
- The values of the adjacency matrix are drawn from the edge attribute
- specified by the `weight` parameter:
- >>> G = nx.path_graph(4)
- >>> nx.set_edge_attributes(
- ... G, values={(0, 1): 1, (1, 2): 10, (2, 3): 2}, name="weight"
- ... )
- >>> nx.set_edge_attributes(
- ... G, values={(0, 1): 50, (1, 2): 35, (2, 3): 10}, name="capacity"
- ... )
- >>> nx.to_scipy_sparse_array(G).toarray() # Default weight="weight"
- array([[ 0, 1, 0, 0],
- [ 1, 0, 10, 0],
- [ 0, 10, 0, 2],
- [ 0, 0, 2, 0]])
- >>> nx.to_scipy_sparse_array(G, weight="capacity").toarray()
- array([[ 0, 50, 0, 0],
- [50, 0, 35, 0],
- [ 0, 35, 0, 10],
- [ 0, 0, 10, 0]])
- Any edges that don't have a `weight` attribute default to 1:
- >>> G[1][2].pop("capacity")
- 35
- >>> nx.to_scipy_sparse_array(G, weight="capacity").toarray()
- array([[ 0, 50, 0, 0],
- [50, 0, 1, 0],
- [ 0, 1, 0, 10],
- [ 0, 0, 10, 0]])
- When `G` is a multigraph, the values in the adjacency matrix are given by
- the sum of the `weight` edge attribute over each edge key:
- >>> G = nx.MultiDiGraph([(0, 1), (0, 1), (0, 1), (2, 0)])
- >>> nx.to_scipy_sparse_array(G).toarray()
- array([[0, 3, 0],
- [0, 0, 0],
- [1, 0, 0]])
- References
- ----------
- .. [1] Scipy Dev. References, "Sparse Arrays",
- https://docs.scipy.org/doc/scipy/reference/sparse.html
- """
- import scipy as sp
- if len(G) == 0:
- raise nx.NetworkXError("Graph has no nodes or edges")
- if nodelist is None:
- nodelist = list(G)
- nlen = len(G)
- else:
- nlen = len(nodelist)
- if nlen == 0:
- raise nx.NetworkXError("nodelist has no nodes")
- nodeset = set(G.nbunch_iter(nodelist))
- if nlen != len(nodeset):
- for n in nodelist:
- if n not in G:
- raise nx.NetworkXError(f"Node {n} in nodelist is not in G")
- raise nx.NetworkXError("nodelist contains duplicates.")
- if nlen < len(G):
- G = G.subgraph(nodelist)
- index = dict(zip(nodelist, range(nlen)))
- coefficients = zip(
- *((index[u], index[v], wt) for u, v, wt in G.edges(data=weight, default=1))
- )
- try:
- row, col, data = coefficients
- except ValueError:
- # there is no edge in the subgraph
- row, col, data = [], [], []
- if G.is_directed():
- A = sp.sparse.coo_array((data, (row, col)), shape=(nlen, nlen), dtype=dtype)
- else:
- # symmetrize matrix
- d = data + data
- r = row + col
- c = col + row
- # selfloop entries get double counted when symmetrizing
- # so we subtract the data on the diagonal
- selfloops = list(nx.selfloop_edges(G, data=weight, default=1))
- if selfloops:
- diag_index, diag_data = zip(*((index[u], -wt) for u, v, wt in selfloops))
- d += diag_data
- r += diag_index
- c += diag_index
- A = sp.sparse.coo_array((d, (r, c)), shape=(nlen, nlen), dtype=dtype)
- try:
- return A.asformat(format)
- except ValueError as err:
- raise nx.NetworkXError(f"Unknown sparse matrix format: {format}") from err
- def _csr_gen_triples(A):
- """Converts a SciPy sparse array in **Compressed Sparse Row** format to
- an iterable of weighted edge triples.
- """
- nrows = A.shape[0]
- indptr, dst_indices, data = A.indptr, A.indices, A.data
- import numpy as np
- src_indices = np.repeat(np.arange(nrows), np.diff(indptr))
- return zip(src_indices.tolist(), dst_indices.tolist(), A.data.tolist())
- def _csc_gen_triples(A):
- """Converts a SciPy sparse array in **Compressed Sparse Column** format to
- an iterable of weighted edge triples.
- """
- ncols = A.shape[1]
- indptr, src_indices, data = A.indptr, A.indices, A.data
- import numpy as np
- dst_indices = np.repeat(np.arange(ncols), np.diff(indptr))
- return zip(src_indices.tolist(), dst_indices.tolist(), A.data.tolist())
- def _coo_gen_triples(A):
- """Converts a SciPy sparse array in **Coordinate** format to an iterable
- of weighted edge triples.
- """
- return zip(A.row.tolist(), A.col.tolist(), A.data.tolist())
- def _dok_gen_triples(A):
- """Converts a SciPy sparse array in **Dictionary of Keys** format to an
- iterable of weighted edge triples.
- """
- for (r, c), v in A.items():
- # Use `v.item()` to convert a NumPy scalar to the appropriate Python scalar
- yield int(r), int(c), v.item()
- def _generate_weighted_edges(A):
- """Returns an iterable over (u, v, w) triples, where u and v are adjacent
- vertices and w is the weight of the edge joining u and v.
- `A` is a SciPy sparse array (in any format).
- """
- if A.format == "csr":
- return _csr_gen_triples(A)
- if A.format == "csc":
- return _csc_gen_triples(A)
- if A.format == "dok":
- return _dok_gen_triples(A)
- # If A is in any other format (including COO), convert it to COO format.
- return _coo_gen_triples(A.tocoo())
- @nx._dispatchable(graphs=None, returns_graph=True)
- def from_scipy_sparse_array(
- A, parallel_edges=False, create_using=None, edge_attribute="weight"
- ):
- """Creates a new graph from an adjacency matrix given as a SciPy sparse
- array.
- Parameters
- ----------
- A: scipy.sparse array
- An adjacency matrix representation of a graph
- parallel_edges : Boolean
- If this is True, `create_using` is a multigraph, and `A` is an
- integer matrix, then entry *(i, j)* in the matrix is interpreted as the
- number of parallel edges joining vertices *i* and *j* in the graph.
- If it is False, then the entries in the matrix are interpreted as
- the weight of a single edge joining the vertices.
- create_using : NetworkX graph constructor, optional (default=nx.Graph)
- Graph type to create. If graph instance, then cleared before populated.
- edge_attribute: string
- Name of edge attribute to store matrix numeric value. The data will
- have the same type as the matrix entry (int, float, (real,imag)).
- Notes
- -----
- For directed graphs, explicitly mention create_using=nx.DiGraph,
- and entry i,j of A corresponds to an edge from i to j.
- If `create_using` is :class:`networkx.MultiGraph` or
- :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
- entries of `A` are of type :class:`int`, then this function returns a
- multigraph (constructed from `create_using`) with parallel edges.
- In this case, `edge_attribute` will be ignored.
- If `create_using` indicates an undirected multigraph, then only the edges
- indicated by the upper triangle of the matrix `A` will be added to the
- graph.
- Examples
- --------
- >>> import scipy as sp
- >>> A = sp.sparse.eye(2, 2, 1)
- >>> G = nx.from_scipy_sparse_array(A)
- If `create_using` indicates a multigraph and the matrix has only integer
- entries and `parallel_edges` is False, then the entries will be treated
- as weights for edges joining the nodes (without creating parallel edges):
- >>> A = sp.sparse.csr_array([[1, 1], [1, 2]])
- >>> G = nx.from_scipy_sparse_array(A, create_using=nx.MultiGraph)
- >>> G[1][1]
- AtlasView({0: {'weight': 2}})
- If `create_using` indicates a multigraph and the matrix has only integer
- entries and `parallel_edges` is True, then the entries will be treated
- as the number of parallel edges joining those two vertices:
- >>> A = sp.sparse.csr_array([[1, 1], [1, 2]])
- >>> G = nx.from_scipy_sparse_array(
- ... A, parallel_edges=True, create_using=nx.MultiGraph
- ... )
- >>> G[1][1]
- AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
- """
- G = nx.empty_graph(0, create_using)
- n, m = A.shape
- if n != m:
- raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}")
- # Make sure we get even the isolated nodes of the graph.
- G.add_nodes_from(range(n))
- # Create an iterable over (u, v, w) triples and for each triple, add an
- # edge from u to v with weight w.
- triples = _generate_weighted_edges(A)
- # If the entries in the adjacency matrix are integers, the graph is a
- # multigraph, and parallel_edges is True, then create parallel edges, each
- # with weight 1, for each entry in the adjacency matrix. Otherwise, create
- # one edge for each positive entry in the adjacency matrix and set the
- # weight of that edge to be the entry in the matrix.
- if A.dtype.kind in ("i", "u") and G.is_multigraph() and parallel_edges:
- chain = itertools.chain.from_iterable
- # The following line is equivalent to:
- #
- # for (u, v) in edges:
- # for d in range(A[u, v]):
- # G.add_edge(u, v, weight=1)
- #
- triples = chain(((u, v, 1) for d in range(w)) for (u, v, w) in triples)
- # If we are creating an undirected multigraph, only add the edges from the
- # upper triangle of the matrix. Otherwise, add all the edges. This relies
- # on the fact that the vertices created in the
- # `_generated_weighted_edges()` function are actually the row/column
- # indices for the matrix `A`.
- #
- # Without this check, we run into a problem where each edge is added twice
- # when `G.add_weighted_edges_from()` is invoked below.
- if G.is_multigraph() and not G.is_directed():
- triples = ((u, v, d) for u, v, d in triples if u <= v)
- G.add_weighted_edges_from(triples, weight=edge_attribute)
- return G
- @nx._dispatchable(edge_attrs="weight") # edge attrs may also be obtained from `dtype`
- def to_numpy_array(
- G,
- nodelist=None,
- dtype=None,
- order=None,
- multigraph_weight=sum,
- weight="weight",
- nonedge=0.0,
- ):
- """Returns the graph adjacency matrix as a NumPy array.
- Parameters
- ----------
- G : graph
- The NetworkX graph used to construct the NumPy array.
- nodelist : list, optional
- The rows and columns are ordered according to the nodes in `nodelist`.
- If `nodelist` is ``None``, then the ordering is produced by ``G.nodes()``.
- dtype : NumPy data type, optional
- A NumPy data type used to initialize the array. If None, then the NumPy
- default is used. The dtype can be structured if `weight=None`, in which
- case the dtype field names are used to look up edge attributes. The
- result is a structured array where each named field in the dtype
- corresponds to the adjacency for that edge attribute. See examples for
- details.
- order : {'C', 'F'}, optional
- Whether to store multidimensional data in C- or Fortran-contiguous
- (row- or column-wise) order in memory. If None, then the NumPy default
- is used.
- multigraph_weight : callable, optional
- An function that determines how weights in multigraphs are handled.
- The function should accept a sequence of weights and return a single
- value. The default is to sum the weights of the multiple edges.
- weight : string or None optional (default = 'weight')
- The edge attribute that holds the numerical value used for
- the edge weight. If an edge does not have that attribute, then the
- value 1 is used instead. `weight` must be ``None`` if a structured
- dtype is used.
- nonedge : array_like (default = 0.0)
- The value used to represent non-edges in the adjacency matrix.
- The array values corresponding to nonedges are typically set to zero.
- However, this could be undesirable if there are array values
- corresponding to actual edges that also have the value zero. If so,
- one might prefer nonedges to have some other value, such as ``nan``.
- Returns
- -------
- A : NumPy ndarray
- Graph adjacency matrix
- Raises
- ------
- NetworkXError
- If `dtype` is a structured dtype and `G` is a multigraph
- ValueError
- If `dtype` is a structured dtype and `weight` is not `None`
- See Also
- --------
- from_numpy_array
- Notes
- -----
- For directed graphs, entry ``i, j`` corresponds to an edge from ``i`` to ``j``.
- Entries in the adjacency matrix are given by the `weight` edge attribute.
- When an edge does not have a weight attribute, the value of the entry is
- set to the number 1. For multiple (parallel) edges, the values of the
- entries are determined by the `multigraph_weight` parameter. The default is
- to sum the weight attributes for each of the parallel edges.
- When `nodelist` does not contain every node in `G`, the adjacency matrix is
- built from the subgraph of `G` that is induced by the nodes in `nodelist`.
- The convention used for self-loop edges in graphs is to assign the
- diagonal array entry value to the weight attribute of the edge
- (or the number 1 if the edge has no weight attribute). If the
- alternate convention of doubling the edge weight is desired the
- resulting NumPy array can be modified as follows:
- >>> import numpy as np
- >>> G = nx.Graph([(1, 1)])
- >>> A = nx.to_numpy_array(G)
- >>> A
- array([[1.]])
- >>> A[np.diag_indices_from(A)] *= 2
- >>> A
- array([[2.]])
- Examples
- --------
- >>> G = nx.MultiDiGraph()
- >>> G.add_edge(0, 1, weight=2)
- 0
- >>> G.add_edge(1, 0)
- 0
- >>> G.add_edge(2, 2, weight=3)
- 0
- >>> G.add_edge(2, 2)
- 1
- >>> nx.to_numpy_array(G, nodelist=[0, 1, 2])
- array([[0., 2., 0.],
- [1., 0., 0.],
- [0., 0., 4.]])
- When `nodelist` argument is used, nodes of `G` which do not appear in the `nodelist`
- and their edges are not included in the adjacency matrix. Here is an example:
- >>> G = nx.Graph()
- >>> G.add_edge(3, 1)
- >>> G.add_edge(2, 0)
- >>> G.add_edge(2, 1)
- >>> G.add_edge(3, 0)
- >>> nx.to_numpy_array(G, nodelist=[1, 2, 3])
- array([[0., 1., 1.],
- [1., 0., 0.],
- [1., 0., 0.]])
- This function can also be used to create adjacency matrices for multiple
- edge attributes with structured dtypes:
- >>> G = nx.Graph()
- >>> G.add_edge(0, 1, weight=10)
- >>> G.add_edge(1, 2, cost=5)
- >>> G.add_edge(2, 3, weight=3, cost=-4.0)
- >>> dtype = np.dtype([("weight", int), ("cost", float)])
- >>> A = nx.to_numpy_array(G, dtype=dtype, weight=None)
- >>> A["weight"]
- array([[ 0, 10, 0, 0],
- [10, 0, 1, 0],
- [ 0, 1, 0, 3],
- [ 0, 0, 3, 0]])
- >>> A["cost"]
- array([[ 0., 1., 0., 0.],
- [ 1., 0., 5., 0.],
- [ 0., 5., 0., -4.],
- [ 0., 0., -4., 0.]])
- As stated above, the argument "nonedge" is useful especially when there are
- actually edges with weight 0 in the graph. Setting a nonedge value different than 0,
- makes it much clearer to differentiate such 0-weighted edges and actual nonedge values.
- >>> G = nx.Graph()
- >>> G.add_edge(3, 1, weight=2)
- >>> G.add_edge(2, 0, weight=0)
- >>> G.add_edge(2, 1, weight=0)
- >>> G.add_edge(3, 0, weight=1)
- >>> nx.to_numpy_array(G, nonedge=-1.0)
- array([[-1., 2., -1., 1.],
- [ 2., -1., 0., -1.],
- [-1., 0., -1., 0.],
- [ 1., -1., 0., -1.]])
- """
- import numpy as np
- if nodelist is None:
- nodelist = list(G)
- nlen = len(nodelist)
- # Input validation
- nodeset = set(nodelist)
- if nodeset - set(G):
- raise nx.NetworkXError(f"Nodes {nodeset - set(G)} in nodelist is not in G")
- if len(nodeset) < nlen:
- raise nx.NetworkXError("nodelist contains duplicates.")
- A = np.full((nlen, nlen), fill_value=nonedge, dtype=dtype, order=order)
- # Corner cases: empty nodelist or graph without any edges
- if nlen == 0 or G.number_of_edges() == 0:
- return A
- # If dtype is structured and weight is None, use dtype field names as
- # edge attributes
- edge_attrs = None # Only single edge attribute by default
- if A.dtype.names:
- if weight is None:
- edge_attrs = dtype.names
- else:
- raise ValueError(
- "Specifying `weight` not supported for structured dtypes\n."
- "To create adjacency matrices from structured dtypes, use `weight=None`."
- )
- # Map nodes to row/col in matrix
- idx = dict(zip(nodelist, range(nlen)))
- if len(nodelist) < len(G):
- G = G.subgraph(nodelist).copy()
- # Collect all edge weights and reduce with `multigraph_weights`
- if G.is_multigraph():
- if edge_attrs:
- raise nx.NetworkXError(
- "Structured arrays are not supported for MultiGraphs"
- )
- d = defaultdict(list)
- for u, v, wt in G.edges(data=weight, default=1.0):
- d[(idx[u], idx[v])].append(wt)
- i, j = np.array(list(d.keys())).T # indices
- wts = [multigraph_weight(ws) for ws in d.values()] # reduced weights
- else:
- i, j, wts = [], [], []
- # Special branch: multi-attr adjacency from structured dtypes
- if edge_attrs:
- # Extract edges with all data
- for u, v, data in G.edges(data=True):
- i.append(idx[u])
- j.append(idx[v])
- wts.append(data)
- # Map each attribute to the appropriate named field in the
- # structured dtype
- for attr in edge_attrs:
- attr_data = [wt.get(attr, 1.0) for wt in wts]
- A[attr][i, j] = attr_data
- if not G.is_directed():
- A[attr][j, i] = attr_data
- return A
- for u, v, wt in G.edges(data=weight, default=1.0):
- i.append(idx[u])
- j.append(idx[v])
- wts.append(wt)
- # Set array values with advanced indexing
- A[i, j] = wts
- if not G.is_directed():
- A[j, i] = wts
- return A
- @nx._dispatchable(graphs=None, returns_graph=True)
- def from_numpy_array(
- A, parallel_edges=False, create_using=None, edge_attr="weight", *, nodelist=None
- ):
- """Returns a graph from a 2D NumPy array.
- The 2D NumPy array is interpreted as an adjacency matrix for the graph.
- Parameters
- ----------
- A : a 2D numpy.ndarray
- An adjacency matrix representation of a graph
- parallel_edges : Boolean
- If this is True, `create_using` is a multigraph, and `A` is an
- integer array, then entry *(i, j)* in the array is interpreted as the
- number of parallel edges joining vertices *i* and *j* in the graph.
- If it is False, then the entries in the array are interpreted as
- the weight of a single edge joining the vertices.
- create_using : NetworkX graph constructor, optional (default=nx.Graph)
- Graph type to create. If graph instance, then cleared before populated.
- edge_attr : String, optional (default="weight")
- The attribute to which the array values are assigned on each edge. If
- it is None, edge attributes will not be assigned.
- nodelist : sequence of nodes, optional
- A sequence of objects to use as the nodes in the graph. If provided, the
- list of nodes must be the same length as the dimensions of `A`. The
- default is `None`, in which case the nodes are drawn from ``range(n)``.
- Notes
- -----
- For directed graphs, explicitly mention create_using=nx.DiGraph,
- and entry i,j of A corresponds to an edge from i to j.
- If `create_using` is :class:`networkx.MultiGraph` or
- :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
- entries of `A` are of type :class:`int`, then this function returns a
- multigraph (of the same type as `create_using`) with parallel edges.
- If `create_using` indicates an undirected multigraph, then only the edges
- indicated by the upper triangle of the array `A` will be added to the
- graph.
- If `edge_attr` is Falsy (False or None), edge attributes will not be
- assigned, and the array data will be treated like a binary mask of
- edge presence or absence. Otherwise, the attributes will be assigned
- as follows:
- If the NumPy array has a single data type for each array entry it
- will be converted to an appropriate Python data type.
- If the NumPy array has a user-specified compound data type the names
- of the data fields will be used as attribute keys in the resulting
- NetworkX graph.
- See Also
- --------
- to_numpy_array
- Examples
- --------
- Simple integer weights on edges:
- >>> import numpy as np
- >>> A = np.array([[1, 1], [2, 1]])
- >>> G = nx.from_numpy_array(A)
- >>> G.edges(data=True)
- EdgeDataView([(0, 0, {'weight': 1}), (0, 1, {'weight': 2}), (1, 1, {'weight': 1})])
- If `create_using` indicates a multigraph and the array has only integer
- entries and `parallel_edges` is False, then the entries will be treated
- as weights for edges joining the nodes (without creating parallel edges):
- >>> A = np.array([[1, 1], [1, 2]])
- >>> G = nx.from_numpy_array(A, create_using=nx.MultiGraph)
- >>> G[1][1]
- AtlasView({0: {'weight': 2}})
- If `create_using` indicates a multigraph and the array has only integer
- entries and `parallel_edges` is True, then the entries will be treated
- as the number of parallel edges joining those two vertices:
- >>> A = np.array([[1, 1], [1, 2]])
- >>> temp = nx.MultiGraph()
- >>> G = nx.from_numpy_array(A, parallel_edges=True, create_using=temp)
- >>> G[1][1]
- AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
- User defined compound data type on edges:
- >>> dt = [("weight", float), ("cost", int)]
- >>> A = np.array([[(1.0, 2)]], dtype=dt)
- >>> G = nx.from_numpy_array(A)
- >>> G.edges()
- EdgeView([(0, 0)])
- >>> G[0][0]["cost"]
- 2
- >>> G[0][0]["weight"]
- 1.0
- """
- kind_to_python_type = {
- "f": float,
- "i": int,
- "u": int,
- "b": bool,
- "c": complex,
- "S": str,
- "U": str,
- "V": "void",
- }
- G = nx.empty_graph(0, create_using)
- if A.ndim != 2:
- raise nx.NetworkXError(f"Input array must be 2D, not {A.ndim}")
- n, m = A.shape
- if n != m:
- raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}")
- dt = A.dtype
- try:
- python_type = kind_to_python_type[dt.kind]
- except Exception as err:
- raise TypeError(f"Unknown numpy data type: {dt}") from err
- if _default_nodes := (nodelist is None):
- nodelist = range(n)
- else:
- if len(nodelist) != n:
- raise ValueError("nodelist must have the same length as A.shape[0]")
- # Make sure we get even the isolated nodes of the graph.
- G.add_nodes_from(nodelist)
- # Get a list of all the entries in the array with nonzero entries. These
- # coordinates become edges in the graph. (convert to int from np.int64)
- edges = ((int(e[0]), int(e[1])) for e in zip(*A.nonzero()))
- # handle numpy constructed data type
- if python_type == "void":
- # Sort the fields by their offset, then by dtype, then by name.
- fields = sorted(
- (offset, dtype, name) for name, (dtype, offset) in A.dtype.fields.items()
- )
- triples = (
- (
- u,
- v,
- {}
- if edge_attr in [False, None]
- else {
- name: kind_to_python_type[dtype.kind](val)
- for (_, dtype, name), val in zip(fields, A[u, v])
- },
- )
- for u, v in edges
- )
- # If the entries in the adjacency matrix are integers, the graph is a
- # multigraph, and parallel_edges is True, then create parallel edges, each
- # with weight 1, for each entry in the adjacency matrix. Otherwise, create
- # one edge for each positive entry in the adjacency matrix and set the
- # weight of that edge to be the entry in the matrix.
- elif python_type is int and G.is_multigraph() and parallel_edges:
- chain = itertools.chain.from_iterable
- # The following line is equivalent to:
- #
- # for (u, v) in edges:
- # for d in range(A[u, v]):
- # G.add_edge(u, v, weight=1)
- #
- if edge_attr in [False, None]:
- triples = chain(((u, v, {}) for d in range(A[u, v])) for (u, v) in edges)
- else:
- triples = chain(
- ((u, v, {edge_attr: 1}) for d in range(A[u, v])) for (u, v) in edges
- )
- else: # basic data type
- if edge_attr in [False, None]:
- triples = ((u, v, {}) for u, v in edges)
- else:
- triples = ((u, v, {edge_attr: python_type(A[u, v])}) for u, v in edges)
- # If we are creating an undirected multigraph, only add the edges from the
- # upper triangle of the matrix. Otherwise, add all the edges. This relies
- # on the fact that the vertices created in the
- # `_generated_weighted_edges()` function are actually the row/column
- # indices for the matrix `A`.
- #
- # Without this check, we run into a problem where each edge is added twice
- # when `G.add_edges_from()` is invoked below.
- if G.is_multigraph() and not G.is_directed():
- triples = ((u, v, d) for u, v, d in triples if u <= v)
- # Remap nodes if user provided custom `nodelist`
- if not _default_nodes:
- idx_to_node = dict(enumerate(nodelist))
- triples = ((idx_to_node[u], idx_to_node[v], d) for u, v, d in triples)
- G.add_edges_from(triples)
- return G
|