| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571 |
- """
- Implementation of Harwell-Boeing read/write.
- At the moment not the full Harwell-Boeing format is supported. Supported
- features are:
- - assembled, non-symmetric, real matrices
- - integer for pointer/indices
- - exponential format for float values, and int format
- """
- # TODO:
- # - Add more support (symmetric/complex matrices, non-assembled matrices ?)
- # XXX: reading is reasonably efficient (>= 85 % is in numpy.fromstring), but
- # takes a lot of memory. Being faster would require compiled code.
- # write is not efficient. Although not a terribly exciting task,
- # having reusable facilities to efficiently read/write fortran-formatted files
- # would be useful outside this module.
- import warnings
- import numpy as np
- from scipy.sparse import csc_array, csc_matrix
- from ._fortran_format_parser import FortranFormatParser, IntFormat, ExpFormat
- __all__ = ["hb_read", "hb_write"]
- class MalformedHeader(Exception):
- pass
- class LineOverflow(Warning):
- pass
- def _nbytes_full(fmt, nlines):
- """Return the number of bytes to read to get every full lines for the
- given parsed fortran format."""
- return (fmt.repeat * fmt.width + 1) * (nlines - 1)
- class HBInfo:
- @classmethod
- def from_data(cls, m, title="Default title", key="0", mxtype=None, fmt=None):
- """Create a HBInfo instance from an existing sparse matrix.
- Parameters
- ----------
- m : sparse array or matrix
- the HBInfo instance will derive its parameters from m
- title : str
- Title to put in the HB header
- key : str
- Key
- mxtype : HBMatrixType
- type of the input matrix
- fmt : dict
- not implemented
- Returns
- -------
- hb_info : HBInfo instance
- """
- m = m.tocsc(copy=False)
- pointer = m.indptr
- indices = m.indices
- values = m.data
- nrows, ncols = m.shape
- nnon_zeros = m.nnz
- if fmt is None:
- # +1 because HB use one-based indexing (Fortran), and we will write
- # the indices /pointer as such
- pointer_fmt = IntFormat.from_number(np.max(pointer+1))
- indices_fmt = IntFormat.from_number(np.max(indices+1))
- if values.dtype.kind in np.typecodes["AllFloat"]:
- values_fmt = ExpFormat.from_number(-np.max(np.abs(values)))
- elif values.dtype.kind in np.typecodes["AllInteger"]:
- values_fmt = IntFormat.from_number(-np.max(np.abs(values)))
- else:
- message = f"type {values.dtype.kind} not implemented yet"
- raise NotImplementedError(message)
- else:
- raise NotImplementedError("fmt argument not supported yet.")
- if mxtype is None:
- if not np.isrealobj(values):
- raise ValueError("Complex values not supported yet")
- if values.dtype.kind in np.typecodes["AllInteger"]:
- tp = "integer"
- elif values.dtype.kind in np.typecodes["AllFloat"]:
- tp = "real"
- else:
- raise NotImplementedError(
- f"type {values.dtype} for values not implemented")
- mxtype = HBMatrixType(tp, "unsymmetric", "assembled")
- else:
- raise ValueError("mxtype argument not handled yet.")
- def _nlines(fmt, size):
- nlines = size // fmt.repeat
- if nlines * fmt.repeat != size:
- nlines += 1
- return nlines
- pointer_nlines = _nlines(pointer_fmt, pointer.size)
- indices_nlines = _nlines(indices_fmt, indices.size)
- values_nlines = _nlines(values_fmt, values.size)
- total_nlines = pointer_nlines + indices_nlines + values_nlines
- return cls(title, key,
- total_nlines, pointer_nlines, indices_nlines, values_nlines,
- mxtype, nrows, ncols, nnon_zeros,
- pointer_fmt.fortran_format, indices_fmt.fortran_format,
- values_fmt.fortran_format)
- @classmethod
- def from_file(cls, fid):
- """Create a HBInfo instance from a file object containing a matrix in the
- HB format.
- Parameters
- ----------
- fid : file-like matrix
- File or file-like object containing a matrix in the HB format.
- Returns
- -------
- hb_info : HBInfo instance
- """
- # First line
- line = fid.readline().strip("\n")
- if not len(line) > 72:
- raise ValueError("Expected at least 72 characters for first line, "
- f"got: \n{line}")
- title = line[:72]
- key = line[72:]
- # Second line
- line = fid.readline().strip("\n")
- if not len(line.rstrip()) >= 56:
- raise ValueError("Expected at least 56 characters for second line, "
- f"got: \n{line}")
- total_nlines = _expect_int(line[:14])
- pointer_nlines = _expect_int(line[14:28])
- indices_nlines = _expect_int(line[28:42])
- values_nlines = _expect_int(line[42:56])
- rhs_nlines = line[56:72].strip()
- if rhs_nlines == '':
- rhs_nlines = 0
- else:
- rhs_nlines = _expect_int(rhs_nlines)
- if not rhs_nlines == 0:
- raise ValueError("Only files without right hand side supported for "
- "now.")
- # Third line
- line = fid.readline().strip("\n")
- if not len(line) >= 70:
- raise ValueError(f"Expected at least 72 character for third line, "
- f"got:\n{line}")
- mxtype_s = line[:3].upper()
- if not len(mxtype_s) == 3:
- raise ValueError("mxtype expected to be 3 characters long")
- mxtype = HBMatrixType.from_fortran(mxtype_s)
- if mxtype.value_type not in ["real", "integer"]:
- raise ValueError("Only real or integer matrices supported for "
- f"now (detected {mxtype})")
- if not mxtype.structure == "unsymmetric":
- raise ValueError("Only unsymmetric matrices supported for "
- f"now (detected {mxtype})")
- if not mxtype.storage == "assembled":
- raise ValueError("Only assembled matrices supported for now")
- if not line[3:14] == " " * 11:
- raise ValueError(f"Malformed data for third line: {line}")
- nrows = _expect_int(line[14:28])
- ncols = _expect_int(line[28:42])
- nnon_zeros = _expect_int(line[42:56])
- nelementals = _expect_int(line[56:70])
- if not nelementals == 0:
- raise ValueError(
- f"Unexpected value {nelementals} for nltvl (last entry of line 3)"
- )
-
- # Fourth line
- line = fid.readline().strip("\n")
- ct = line.split()
- if not len(ct) == 3:
- raise ValueError(f"Expected 3 formats, got {ct}")
- return cls(title, key,
- total_nlines, pointer_nlines, indices_nlines, values_nlines,
- mxtype, nrows, ncols, nnon_zeros,
- ct[0], ct[1], ct[2],
- rhs_nlines, nelementals)
- def __init__(self, title, key,
- total_nlines, pointer_nlines, indices_nlines, values_nlines,
- mxtype, nrows, ncols, nnon_zeros,
- pointer_format_str, indices_format_str, values_format_str,
- right_hand_sides_nlines=0, nelementals=0):
- """Do not use this directly, but the class ctrs (from_* functions)."""
- if title is None:
- title = "No Title"
- if len(title) > 72:
- raise ValueError("title cannot be > 72 characters")
- if key is None:
- key = "|No Key"
- if len(key) > 8:
- warnings.warn(f"key is > 8 characters (key is {key})",
- LineOverflow, stacklevel=3)
- self.title = title
- self.key = key
- self.total_nlines = total_nlines
- self.pointer_nlines = pointer_nlines
- self.indices_nlines = indices_nlines
- self.values_nlines = values_nlines
- parser = FortranFormatParser()
- pointer_format = parser.parse(pointer_format_str)
- if not isinstance(pointer_format, IntFormat):
- raise ValueError("Expected int format for pointer format, got "
- f"{pointer_format}")
- indices_format = parser.parse(indices_format_str)
- if not isinstance(indices_format, IntFormat):
- raise ValueError("Expected int format for indices format, got "
- f"{indices_format}")
- values_format = parser.parse(values_format_str)
- if isinstance(values_format, ExpFormat):
- if mxtype.value_type not in ["real", "complex"]:
- raise ValueError(f"Inconsistency between matrix type {mxtype} and "
- f"value type {values_format}")
- values_dtype = np.float64
- elif isinstance(values_format, IntFormat):
- if mxtype.value_type not in ["integer"]:
- raise ValueError(f"Inconsistency between matrix type {mxtype} and "
- f"value type {values_format}")
- # XXX: fortran int -> dtype association ?
- values_dtype = int
- else:
- raise ValueError(f"Unsupported format for values {values_format!r}")
- self.pointer_format = pointer_format
- self.indices_format = indices_format
- self.values_format = values_format
- self.pointer_dtype = np.int32
- self.indices_dtype = np.int32
- self.values_dtype = values_dtype
- self.pointer_nlines = pointer_nlines
- self.pointer_nbytes_full = _nbytes_full(pointer_format, pointer_nlines)
- self.indices_nlines = indices_nlines
- self.indices_nbytes_full = _nbytes_full(indices_format, indices_nlines)
- self.values_nlines = values_nlines
- self.values_nbytes_full = _nbytes_full(values_format, values_nlines)
- self.nrows = nrows
- self.ncols = ncols
- self.nnon_zeros = nnon_zeros
- self.nelementals = nelementals
- self.mxtype = mxtype
- def dump(self):
- """Gives the header corresponding to this instance as a string."""
- header = [self.title.ljust(72) + self.key.ljust(8)]
- header.append(f"{self.total_nlines:14d}{self.pointer_nlines:14d}{self.indices_nlines:14d}{self.values_nlines:14d}")
- header.append(f"{self.mxtype.fortran_format.ljust(14):14s}{self.nrows:14d}{self.ncols:14d}{self.nnon_zeros:14d}{0:14d}")
- pffmt = self.pointer_format.fortran_format
- iffmt = self.indices_format.fortran_format
- vffmt = self.values_format.fortran_format
- header.append(f"{pffmt.ljust(16):16s}{iffmt.ljust(16):16s}{vffmt.ljust(20):20s}")
- return "\n".join(header)
- def _expect_int(value, msg=None):
- try:
- return int(value)
- except ValueError as e:
- if msg is None:
- msg = "Expected an int, got %s"
- raise ValueError(msg % value) from e
- def _read_hb_data(content, header):
- # XXX: look at a way to reduce memory here (big string creation)
- ptr_string = "".join([content.read(header.pointer_nbytes_full),
- content.readline()])
- ptr = np.fromstring(ptr_string,
- dtype=int, sep=' ')
- ind_string = "".join([content.read(header.indices_nbytes_full),
- content.readline()])
- ind = np.fromstring(ind_string,
- dtype=int, sep=' ')
- val_string = "".join([content.read(header.values_nbytes_full),
- content.readline()])
- val = np.fromstring(val_string,
- dtype=header.values_dtype, sep=' ')
- return csc_array((val, ind-1, ptr-1), shape=(header.nrows, header.ncols))
- def _write_data(m, fid, header):
- m = m.tocsc(copy=False)
- def write_array(f, ar, nlines, fmt):
- # ar_nlines is the number of full lines, n is the number of items per
- # line, ffmt the fortran format
- pyfmt = fmt.python_format
- pyfmt_full = pyfmt * fmt.repeat
- # for each array to write, we first write the full lines, and special
- # case for partial line
- full = ar[:(nlines - 1) * fmt.repeat]
- for row in full.reshape((nlines-1, fmt.repeat)):
- f.write(pyfmt_full % tuple(row) + "\n")
- nremain = ar.size - full.size
- if nremain > 0:
- f.write((pyfmt * nremain) % tuple(ar[ar.size - nremain:]) + "\n")
- fid.write(header.dump())
- fid.write("\n")
- # +1 is for Fortran one-based indexing
- write_array(fid, m.indptr+1, header.pointer_nlines,
- header.pointer_format)
- write_array(fid, m.indices+1, header.indices_nlines,
- header.indices_format)
- write_array(fid, m.data, header.values_nlines,
- header.values_format)
- class HBMatrixType:
- """Class to hold the matrix type."""
- # q2f* translates qualified names to Fortran character
- _q2f_type = {
- "real": "R",
- "complex": "C",
- "pattern": "P",
- "integer": "I",
- }
- _q2f_structure = {
- "symmetric": "S",
- "unsymmetric": "U",
- "hermitian": "H",
- "skewsymmetric": "Z",
- "rectangular": "R"
- }
- _q2f_storage = {
- "assembled": "A",
- "elemental": "E",
- }
- _f2q_type = {j: i for i, j in _q2f_type.items()}
- _f2q_structure = {j: i for i, j in _q2f_structure.items()}
- _f2q_storage = {j: i for i, j in _q2f_storage.items()}
- @classmethod
- def from_fortran(cls, fmt):
- if not len(fmt) == 3:
- raise ValueError("Fortran format for matrix type should be 3 "
- "characters long")
- try:
- value_type = cls._f2q_type[fmt[0]]
- structure = cls._f2q_structure[fmt[1]]
- storage = cls._f2q_storage[fmt[2]]
- return cls(value_type, structure, storage)
- except KeyError as e:
- raise ValueError(f"Unrecognized format {fmt}") from e
- def __init__(self, value_type, structure, storage="assembled"):
- self.value_type = value_type
- self.structure = structure
- self.storage = storage
- if value_type not in self._q2f_type:
- raise ValueError(f"Unrecognized type {value_type}")
- if structure not in self._q2f_structure:
- raise ValueError(f"Unrecognized structure {structure}")
- if storage not in self._q2f_storage:
- raise ValueError(f"Unrecognized storage {storage}")
- @property
- def fortran_format(self):
- return self._q2f_type[self.value_type] + \
- self._q2f_structure[self.structure] + \
- self._q2f_storage[self.storage]
- def __repr__(self):
- return f"HBMatrixType({self.value_type}, {self.structure}, {self.storage})"
- class HBFile:
- def __init__(self, file, hb_info=None):
- """Create a HBFile instance.
- Parameters
- ----------
- file : file-object
- StringIO work as well
- hb_info : HBInfo, optional
- Should be given as an argument for writing, in which case the file
- should be writable.
- """
- self._fid = file
- if hb_info is None:
- self._hb_info = HBInfo.from_file(file)
- else:
- #raise OSError("file %s is not writable, and hb_info "
- # "was given." % file)
- self._hb_info = hb_info
- @property
- def title(self):
- return self._hb_info.title
- @property
- def key(self):
- return self._hb_info.key
- @property
- def type(self):
- return self._hb_info.mxtype.value_type
- @property
- def structure(self):
- return self._hb_info.mxtype.structure
- @property
- def storage(self):
- return self._hb_info.mxtype.storage
- def read_matrix(self):
- return _read_hb_data(self._fid, self._hb_info)
- def write_matrix(self, m):
- return _write_data(m, self._fid, self._hb_info)
- def hb_read(path_or_open_file, *, spmatrix=True):
- """Read HB-format file.
- Parameters
- ----------
- path_or_open_file : path-like or file-like
- If a file-like object, it is used as-is. Otherwise, it is opened
- before reading.
- spmatrix : bool, optional (default: True)
- If ``True``, return sparse matrix. Otherwise return sparse array.
- Returns
- -------
- data : csc_array or csc_matrix
- The data read from the HB file as a sparse array.
- Notes
- -----
- At the moment not the full Harwell-Boeing format is supported. Supported
- features are:
- - assembled, non-symmetric, real matrices
- - integer for pointer/indices
- - exponential format for float values, and int format
- Examples
- --------
- We can read and write a harwell-boeing format file:
- >>> from scipy.io import hb_read, hb_write
- >>> from scipy.sparse import csr_array, eye
- >>> data = csr_array(eye(3)) # create a sparse array
- >>> hb_write("data.hb", data) # write a hb file
- >>> print(hb_read("data.hb", spmatrix=False)) # read a hb file
- <Compressed Sparse Column sparse array of dtype 'float64'
- with 3 stored elements and shape (3, 3)>
- Coords Values
- (0, 0) 1.0
- (1, 1) 1.0
- (2, 2) 1.0
- """
- def _get_matrix(fid):
- hb = HBFile(fid)
- return hb.read_matrix()
- if hasattr(path_or_open_file, 'read'):
- data = _get_matrix(path_or_open_file)
- else:
- with open(path_or_open_file) as f:
- data = _get_matrix(f)
- if spmatrix:
- return csc_matrix(data)
- return data
- def hb_write(path_or_open_file, m, hb_info=None):
- """Write HB-format file.
- Parameters
- ----------
- path_or_open_file : path-like or file-like
- If a file-like object, it is used as-is. Otherwise, it is opened
- before writing.
- m : sparse array or matrix
- the sparse array to write
- hb_info : HBInfo
- contains the meta-data for write
- Returns
- -------
- None
- Notes
- -----
- At the moment not the full Harwell-Boeing format is supported. Supported
- features are:
- - assembled, non-symmetric, real matrices
- - integer for pointer/indices
- - exponential format for float values, and int format
- Examples
- --------
- We can read and write a harwell-boeing format file:
- >>> from scipy.io import hb_read, hb_write
- >>> from scipy.sparse import csr_array, eye
- >>> data = csr_array(eye(3)) # create a sparse array
- >>> hb_write("data.hb", data) # write a hb file
- >>> print(hb_read("data.hb", spmatrix=False)) # read a hb file
- <Compressed Sparse Column sparse array of dtype 'float64'
- with 3 stored elements and shape (3, 3)>
- Coords Values
- (0, 0) 1.0
- (1, 1) 1.0
- (2, 2) 1.0
- """
- m = m.tocsc(copy=False)
- if hb_info is None:
- hb_info = HBInfo.from_data(m)
- def _set_matrix(fid):
- hb = HBFile(fid, hb_info)
- return hb.write_matrix(m)
- if hasattr(path_or_open_file, 'write'):
- return _set_matrix(path_or_open_file)
- else:
- with open(path_or_open_file, 'w') as f:
- return _set_matrix(f)
|