| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172 |
- import numpy as np
- import scipy as sp
- __all__ = ['save_npz', 'load_npz']
- # Make loading safe vs. malicious input
- PICKLE_KWARGS = dict(allow_pickle=False)
- def save_npz(file, matrix, compressed=True):
- """ Save a sparse matrix or array to a file using ``.npz`` format.
- Parameters
- ----------
- file : str or file-like object
- Either the file name (string) or an open file (file-like object)
- where the data will be saved. If file is a string, the ``.npz``
- extension will be appended to the file name if it is not already
- there.
- matrix: spmatrix or sparray
- The sparse matrix or array to save.
- Supported formats: ``csc``, ``csr``, ``bsr``, ``dia`` or ``coo``.
- compressed : bool, optional
- Allow compressing the file. Default: True
- See Also
- --------
- scipy.sparse.load_npz: Load a sparse matrix from a file using ``.npz`` format.
- numpy.savez: Save several arrays into a ``.npz`` archive.
- numpy.savez_compressed : Save several arrays into a compressed ``.npz`` archive.
- Examples
- --------
- Store sparse matrix to disk, and load it again:
- >>> import numpy as np
- >>> import scipy as sp
- >>> sparse_matrix = sp.sparse.csc_matrix([[0, 0, 3], [4, 0, 0]])
- >>> sparse_matrix
- <Compressed Sparse Column sparse matrix of dtype 'int64'
- with 2 stored elements and shape (2, 3)>
- >>> sparse_matrix.toarray()
- array([[0, 0, 3],
- [4, 0, 0]], dtype=int64)
- >>> sp.sparse.save_npz('/tmp/sparse_matrix.npz', sparse_matrix)
- >>> sparse_matrix = sp.sparse.load_npz('/tmp/sparse_matrix.npz')
- >>> sparse_matrix
- <Compressed Sparse Column sparse matrix of dtype 'int64'
- with 2 stored elements and shape (2, 3)>
- >>> sparse_matrix.toarray()
- array([[0, 0, 3],
- [4, 0, 0]], dtype=int64)
- """
- arrays_dict = {}
- if matrix.format in ('csc', 'csr', 'bsr'):
- arrays_dict.update(indices=matrix.indices, indptr=matrix.indptr)
- elif matrix.format == 'dia':
- arrays_dict.update(offsets=matrix.offsets)
- elif matrix.format == 'coo':
- if matrix.ndim == 2:
- # TODO: After a few releases, switch 2D case to save with coords only.
- arrays_dict.update(row=matrix.row, col=matrix.col)
- else:
- arrays_dict.update(coords=matrix.coords)
- else:
- msg = f'Save is not implemented for sparse matrix of format {matrix.format}.'
- raise NotImplementedError(msg)
- arrays_dict.update(
- format=matrix.format.encode('ascii'),
- shape=matrix.shape,
- data=matrix.data
- )
- if isinstance(matrix, sp.sparse.sparray):
- arrays_dict.update(_is_array=True)
- if compressed:
- np.savez_compressed(file, **arrays_dict)
- else:
- np.savez(file, **arrays_dict)
- def load_npz(file):
- """ Load a sparse array/matrix from a file using ``.npz`` format.
- Parameters
- ----------
- file : str or file-like object
- Either the file name (string) or an open file (file-like object)
- where the data will be loaded.
- Returns
- -------
- result : csc_array, csr_array, bsr_array, dia_array or coo_array
- A sparse array/matrix containing the loaded data.
- Raises
- ------
- OSError
- If the input file does not exist or cannot be read.
- See Also
- --------
- scipy.sparse.save_npz: Save a sparse array/matrix to a file using ``.npz`` format.
- numpy.load: Load several arrays from a ``.npz`` archive.
- Examples
- --------
- Store sparse array/matrix to disk, and load it again:
- >>> import numpy as np
- >>> import scipy as sp
- >>> sparse_array = sp.sparse.csc_array([[0, 0, 3], [4, 0, 0]])
- >>> sparse_array
- <Compressed Sparse Column sparse array of dtype 'int64'
- with 2 stored elements and shape (2, 3)>
- >>> sparse_array.toarray()
- array([[0, 0, 3],
- [4, 0, 0]], dtype=int64)
- >>> sp.sparse.save_npz('/tmp/sparse_array.npz', sparse_array)
- >>> sparse_array = sp.sparse.load_npz('/tmp/sparse_array.npz')
- >>> sparse_array
- <Compressed Sparse Column sparse array of dtype 'int64'
- with 2 stored elements and shape (2, 3)>
- >>> sparse_array.toarray()
- array([[0, 0, 3],
- [4, 0, 0]], dtype=int64)
- In this example we force the result to be csr_array from csr_matrix
- >>> sparse_matrix = sp.sparse.csc_matrix([[0, 0, 3], [4, 0, 0]])
- >>> sp.sparse.save_npz('/tmp/sparse_matrix.npz', sparse_matrix)
- >>> tmp = sp.sparse.load_npz('/tmp/sparse_matrix.npz')
- >>> sparse_array = sp.sparse.csr_array(tmp)
- """
- with np.load(file, **PICKLE_KWARGS) as loaded:
- sparse_format = loaded.get('format')
- if sparse_format is None:
- raise ValueError(f'The file {file} does not contain '
- f'a sparse array or matrix.')
- sparse_format = sparse_format.item()
- if not isinstance(sparse_format, str):
- # Play safe with Python 2 vs 3 backward compatibility;
- # files saved with SciPy < 1.0.0 may contain unicode or bytes.
- sparse_format = sparse_format.decode('ascii')
- if loaded.get('_is_array'):
- sparse_type = sparse_format + '_array'
- else:
- sparse_type = sparse_format + '_matrix'
- try:
- cls = getattr(sp.sparse, f'{sparse_type}')
- except AttributeError as e:
- raise ValueError(f'Unknown format "{sparse_type}"') from e
- if sparse_format in ('csc', 'csr', 'bsr'):
- return cls((loaded['data'], loaded['indices'], loaded['indptr']),
- shape=loaded['shape'])
- elif sparse_format == 'dia':
- return cls((loaded['data'], loaded['offsets']), shape=loaded['shape'])
- elif sparse_format == 'coo':
- if 'coords' in loaded:
- return cls((loaded['data'], loaded['coords']), shape=loaded['shape'])
- return cls((loaded['data'], (loaded['row'], loaded['col'])),
- shape=loaded['shape'])
- else:
- raise NotImplementedError(f'Load is not implemented for '
- f'sparse matrix of format {sparse_format}.')
|