yichael
/
image-match


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
							"""Sparse DIAgonal format"""

__docformat__ = "restructuredtext en"

__all__ = ['dia_array', 'dia_matrix', 'isspmatrix_dia']

import numpy as np

from .._lib._util import _prune_array, copy_if_needed
from ._matrix import spmatrix
from ._base import issparse, _formats, _spbase, sparray
from ._data import _data_matrix
from ._sputils import (
    isdense, isscalarlike, isshape, upcast_char, getdtype, get_sum_dtype,
    validateaxis, check_shape
)
from ._sparsetools import dia_matmat, dia_matvec, dia_matvecs, dia_tocsr


class _dia_base(_data_matrix):
    _format = 'dia'

    def __init__(self, arg1, shape=None, dtype=None, copy=False, *, maxprint=None):
        _data_matrix.__init__(self, arg1, maxprint=maxprint)

        if issparse(arg1):
            if arg1.format == "dia":
                if copy:
                    arg1 = arg1.copy()
                self.data = arg1.data
                self.offsets = arg1.offsets
                self._shape = check_shape(arg1.shape)
            else:
                if arg1.format == self.format and copy:
                    A = arg1.copy()
                else:
                    A = arg1.todia()
                self.data = A.data
                self.offsets = A.offsets
                self._shape = check_shape(A.shape)
        elif isinstance(arg1, tuple):
            if isshape(arg1):
                # It's a tuple of matrix dimensions (M, N)
                # create empty matrix
                self._shape = check_shape(arg1)
                self.data = np.zeros((0,0), getdtype(dtype, default=float))
                idx_dtype = self._get_index_dtype(maxval=max(self.shape))
                self.offsets = np.zeros((0), dtype=idx_dtype)
            else:
                try:
                    # Try interpreting it as (data, offsets)
                    data, offsets = arg1
                except Exception as e:
                    message = 'unrecognized form for dia_array constructor'
                    raise ValueError(message) from e
                else:
                    if shape is None:
                        raise ValueError('expected a shape argument')
                    if not copy:
                        copy = copy_if_needed
                    self.data = np.atleast_2d(np.array(arg1[0], dtype=dtype, copy=copy))
                    offsets = np.array(arg1[1],
                                       dtype=self._get_index_dtype(maxval=max(shape)),
                                       copy=copy)
                    self.offsets = np.atleast_1d(offsets)
                    self._shape = check_shape(shape)
        else:
            # must be dense, convert to COO first, then to DIA
            try:
                arg1 = np.asarray(arg1)
            except Exception as e:
                raise ValueError("unrecognized form for "
                                 f"{self.format}_matrix constructor") from e
            if isinstance(self, sparray) and arg1.ndim != 2:
                raise ValueError(f"DIA arrays don't support {arg1.ndim}D input. Use 2D")
            A = self._coo_container(arg1, dtype=dtype, shape=shape).todia()
            self.data = A.data
            self.offsets = A.offsets
            self._shape = check_shape(A.shape)

        if dtype is not None:
            newdtype = getdtype(dtype)
            self.data = self.data.astype(newdtype, copy=False)

        # check format
        if self.offsets.ndim != 1:
            raise ValueError('offsets array must have rank 1')

        if self.data.ndim != 2:
            raise ValueError('data array must have rank 2')

        if self.data.shape[0] != len(self.offsets):
            raise ValueError(
                f'number of diagonals ({self.data.shape[0]}) does not match the number '
                f'of offsets ({len(self.offsets)})'
            )
        if len(np.unique(self.offsets)) != len(self.offsets):
            raise ValueError('offset array contains duplicate values')

    def __repr__(self):
        _, fmt = _formats[self.format]
        sparse_cls = 'array' if isinstance(self, sparray) else 'matrix'
        d = self.data.shape[0]
        return (
            f"<{fmt} sparse {sparse_cls} of dtype '{self.dtype}'\n"
            f"\twith {self.nnz} stored elements ({d} diagonals) and shape {self.shape}>"
        )

    def _data_mask(self):
        """Returns a mask of the same shape as self.data, where
        mask[i,j] is True when data[i,j] corresponds to a stored element."""
        num_rows, num_cols = self.shape
        offset_inds = np.arange(self.data.shape[1])
        row = offset_inds - self.offsets[:,None]
        mask = (row >= 0)
        mask &= (row < num_rows)
        mask &= (offset_inds < num_cols)
        return mask

    def count_nonzero(self, axis=None):
        if axis is not None:
            raise NotImplementedError(
                "count_nonzero over an axis is not implemented for DIA format"
            )
        mask = self._data_mask()
        return np.count_nonzero(self.data[mask])

    count_nonzero.__doc__ = _spbase.count_nonzero.__doc__

    def _getnnz(self, axis=None):
        if axis is not None:
            raise NotImplementedError("_getnnz over an axis is not implemented "
                                      "for DIA format")
        M, N = self.shape
        L = min(self.data.shape[1], N)
        return int(np.maximum(np.minimum(M + self.offsets, L) -
                              np.maximum(self.offsets, 0),
                              0).sum())

    _getnnz.__doc__ = _spbase._getnnz.__doc__

    def sum(self, axis=None, dtype=None, out=None):
        axis = validateaxis(axis)

        res_dtype = get_sum_dtype(self.dtype)
        num_rows, num_cols = self.shape
        ret = None

        if axis == (0,):
            mask = self._data_mask()
            x = (self.data * mask).sum(axis=0)
            if x.shape[0] == num_cols:
                res = x
            else:
                res = np.zeros(num_cols, dtype=x.dtype)
                res[:x.shape[0]] = x
            ret = self._ascontainer(res, dtype=res_dtype)

        else:  # axis is None or (1,)
            row_sums = np.zeros((num_rows, 1), dtype=res_dtype)
            one = np.ones(num_cols, dtype=res_dtype)
            dia_matvec(num_rows, num_cols, len(self.offsets),
                       self.data.shape[1], self.offsets, self.data, one, row_sums)

            row_sums = self._ascontainer(row_sums)

            if axis is None:
                return row_sums.sum(dtype=dtype, out=out)

            ret = self._ascontainer(row_sums.sum(axis=axis))

        return ret.sum(axis=(), dtype=dtype, out=out)

    sum.__doc__ = _spbase.sum.__doc__

    def _add_sparse(self, other, sub=False):
        # If other is not DIA format, let them handle us instead.
        if not isinstance(other, _dia_base):
            return other._add_sparse(self)

        # Fast path for exact equality of the sparsity structure.
        if np.array_equal(self.offsets, other.offsets):
            return self._with_data(self.data - other.data if sub else
                                   self.data + other.data)

        # Find the union of the offsets (which will be sorted and unique).
        new_offsets = np.union1d(self.offsets, other.offsets)
        self_idx = np.searchsorted(new_offsets, self.offsets)
        other_idx = np.searchsorted(new_offsets, other.offsets)

        self_d = self.data.shape[1]
        other_d = other.data.shape[1]
        # Fast path for a sparsity structure where the final offsets are a
        # permutation of the existing offsets and the diagonal lengths match.
        if self_d == other_d and len(new_offsets) == len(self.offsets):
            new_data = self.data[_invert_index(self_idx)]
            if sub:
                new_data[other_idx, :] -= other.data
            else:
                new_data[other_idx, :] += other.data
        elif self_d == other_d and len(new_offsets) == len(other.offsets):
            if sub:
                new_data = -other.data[_invert_index(other_idx)]
            else:
                new_data = other.data[_invert_index(other_idx)]
            new_data[self_idx, :] += self.data
        else:
            # Maximum diagonal length of the result.
            d = min(self.shape[0] + new_offsets[-1], self.shape[1])

            # Add all diagonals to a freshly allocated data array.
            new_data = np.zeros(
                (len(new_offsets), d),
                dtype=np.result_type(self.data, other.data),
            )
            new_data[self_idx, :self_d] += self.data[:, :d]
            if sub:
                new_data[other_idx, :other_d] -= other.data[:, :d]
            else:
                new_data[other_idx, :other_d] += other.data[:, :d]
        return self._dia_container((new_data, new_offsets), shape=self.shape)

    def _sub_sparse(self, other):
        # If other is not DIA format, use default handler.
        if not isinstance(other, _dia_base):
            return super()._sub_sparse(other)

        return self._add_sparse(other, sub=True)

    def _mul_scalar(self, other):
        return self._with_data(self.data * other)

    def multiply(self, other):
        if isscalarlike(other):
            return self._mul_scalar(other)

        if isdense(other):
            if other.ndim > 2:
                return self.toarray() * other

            # Use default handler for pathological cases.
            if 0 in self.shape or 1 in self.shape or 0 in other.shape:
                return super().multiply(other)

            other = np.atleast_2d(other)
            other_rows, other_cols = other.shape
            rows, cols = self.shape
            L = min(self.data.shape[1], cols)
            data = self.data[:, :L].astype(np.result_type(self.data, other))  # copy
            if other_rows == 1:
                data *= other[0, :L]
            elif other_rows != rows:
                raise ValueError('inconsistent shapes')
            else:
                j = np.arange(L)
                if L > rows:
                    i = (j - self.offsets[:, None]) % rows
                else:  # can use faster method
                    i = j - self.offsets[:, None] % rows
                if other_cols == 1:
                    j = 0
                elif other_cols != cols:
                    raise ValueError('inconsistent shapes')
                data *= other[i, j]
            return self._with_data(data)

        # If other is not DIA format or needs broadcasting (unreasonable
        # use case for DIA anyway), use default handler.
        if not isinstance(other, _dia_base) or other.shape != self.shape:
            return super().multiply(other)

        # Find common offsets (unique diagonals don't contribute)
        # and indices corresponding to them in multiplicand and multiplier.
        offsets, self_idx, other_idx = \
            np.intersect1d(self.offsets, other.offsets,
                           assume_unique=True, return_indices=True)
        # Only overlapping length of diagonals can have non-zero products.
        L = min(self.data.shape[1], other.data.shape[1])
        data = self.data[self_idx, :L] * other.data[other_idx, :L]
        return self._dia_container((data, offsets), shape=self.shape)

    def _matmul_vector(self, other):
        x = other

        y = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char,
                                                       x.dtype.char))

        L = self.data.shape[1]

        M,N = self.shape

        dia_matvec(M,N, len(self.offsets), L, self.offsets, self.data,
                   x.ravel(), y.ravel())

        return y

    def _matmul_multivector(self, other):
        res = np.zeros((self.shape[0], other.shape[1]),
                       dtype=np.result_type(self.data, other))
        dia_matvecs(*self.shape, *self.data.shape, self.offsets, self.data,
                    other.shape[1], other, res)
        return res

    def _matmul_sparse(self, other):
        # If other is not DIA format, use default handler.
        if not isinstance(other, _dia_base):
            return super()._matmul_sparse(other)

        # If any dimension is zero, return empty array immediately.
        if 0 in self.shape or 0 in other.shape:
            return self._dia_container((self.shape[0], other.shape[1]))

        offsets, data = dia_matmat(*self.shape, *self.data.shape,
                                   self.offsets, self.data,
                                   other.shape[1], *other.data.shape,
                                   other.offsets, other.data)
        return self._dia_container((data.reshape(len(offsets), -1), offsets),
                                   (self.shape[0], other.shape[1]))

    def _setdiag(self, values, k=0):
        M, N = self.shape

        if values.ndim == 0:
            # broadcast
            values_n = np.inf
        else:
            values_n = len(values)

        if k < 0:
            n = min(M + k, N, values_n)
            min_index = 0
            max_index = n
        else:
            n = min(M, N - k, values_n)
            min_index = k
            max_index = k + n

        if values.ndim != 0:
            # allow also longer sequences
            values = values[:n]

        data_rows, data_cols = self.data.shape
        if k in self.offsets:
            if max_index > data_cols:
                data = np.zeros((data_rows, max_index), dtype=self.data.dtype)
                data[:, :data_cols] = self.data
                self.data = data
            self.data[self.offsets == k, min_index:max_index] = values
        else:
            self.offsets = np.append(self.offsets, self.offsets.dtype.type(k))
            m = max(max_index, data_cols)
            data = np.zeros((data_rows + 1, m), dtype=self.data.dtype)
            data[:-1, :data_cols] = self.data
            data[-1, min_index:max_index] = values
            self.data = data

    def todia(self, copy=False):
        if copy:
            return self.copy()
        else:
            return self

    todia.__doc__ = _spbase.todia.__doc__

    def transpose(self, axes=None, copy=False):
        if axes is not None and axes != (1, 0):
            raise ValueError("Sparse arrays/matrices do not support "
                              "an 'axes' parameter because swapping "
                              "dimensions is the only logical permutation.")

        num_rows, num_cols = self.shape
        max_dim = max(self.shape)

        # flip diagonal offsets
        offsets = -self.offsets

        # re-align the data matrix
        r = np.arange(len(offsets), dtype=np.intc)[:, None]
        c = np.arange(num_rows, dtype=np.intc) - (offsets % max_dim)[:, None]
        pad_amount = max(0, max_dim-self.data.shape[1])
        data = np.hstack((self.data, np.zeros((self.data.shape[0], pad_amount),
                                              dtype=self.data.dtype)))
        data = data[r, c]
        return self._dia_container((data, offsets), shape=(
            num_cols, num_rows), copy=copy)

    transpose.__doc__ = _spbase.transpose.__doc__

    def diagonal(self, k=0):
        rows, cols = self.shape
        if k <= -rows or k >= cols:
            return np.empty(0, dtype=self.data.dtype)
        idx, = np.nonzero(self.offsets == k)
        first_col = max(0, k)
        last_col = min(rows + k, cols)
        result_size = last_col - first_col
        if idx.size == 0:
            return np.zeros(result_size, dtype=self.data.dtype)
        result = self.data[idx[0], first_col:last_col]
        padding = result_size - len(result)
        if padding > 0:
            result = np.pad(result, (0, padding), mode='constant')
        return result

    diagonal.__doc__ = _spbase.diagonal.__doc__

    def tocsr(self, copy=False):
        if 0 in self.shape or len(self.offsets) == 0:
            return self._csr_container(self.shape, dtype=self.dtype)

        n_rows, n_cols = self.shape
        max_nnz = self.nnz
        # np.argsort always returns dtype=int, which can cause automatic dtype
        # expansion for everything else even if not needed (see gh19245), but
        # CSR wants common dtype for indices, indptr and shape, so care should
        # be taken to use appropriate indexing dtype throughout.
        idx_dtype = self._get_index_dtype(maxval=max(max_nnz, n_rows, n_cols))
        order = np.argsort(self.offsets).astype(idx_dtype, copy=False)
        csr_data = np.empty(max_nnz, dtype=self.dtype)
        indices = np.empty(max_nnz, dtype=idx_dtype)
        indptr = np.empty(1 + n_rows, dtype=idx_dtype)
        # Conversion eliminates explicit zeros and returns actual nnz.
        nnz = dia_tocsr(n_rows, n_cols, *self.data.shape,
                        self.offsets.astype(idx_dtype, copy=False), self.data,
                        order, csr_data, indices, indptr)
        # Shrink indexing dtype, if needed, and prune arrays.
        idx_dtype = self._get_index_dtype(maxval=max(nnz, n_rows, n_cols))
        csr_data = _prune_array(csr_data[:nnz])
        indices = _prune_array(indices[:nnz].astype(idx_dtype, copy=False))
        indptr = indptr.astype(idx_dtype, copy=False)
        out = self._csr_container((csr_data, indices, indptr),
                                  shape=self.shape, dtype=self.dtype)
        out.has_canonical_format = True
        return out

    tocsr.__doc__ = _spbase.tocsr.__doc__

    # needed by _data_matrix
    def _with_data(self, data, copy=True):
        """Returns a matrix with the same sparsity structure as self,
        but with different data.  By default the structure arrays are copied.
        """
        if copy:
            return self._dia_container(
                (data, self.offsets.copy()), shape=self.shape
            )
        else:
            return self._dia_container(
                (data, self.offsets), shape=self.shape
            )

    def resize(self, *shape):
        shape = check_shape(shape)
        M, N = shape
        # we do not need to handle the case of expanding N
        self.data = self.data[:, :N]

        if (M > self.shape[0] and
                np.any(self.offsets + self.shape[0] < self.data.shape[1])):
            # explicitly clear values that were previously hidden
            mask = (self.offsets[:, None] + self.shape[0] <=
                    np.arange(self.data.shape[1]))
            self.data[mask] = 0

        self._shape = shape

    resize.__doc__ = _spbase.resize.__doc__


def _invert_index(idx):
    """Helper function to invert an index array."""
    inv = np.zeros_like(idx)
    inv[idx] = np.arange(len(idx))
    return inv


def isspmatrix_dia(x):
    """Is `x` of dia_matrix type?

    Parameters
    ----------
    x
        object to check for being a dia matrix

    Returns
    -------
    bool
        True if `x` is a dia matrix, False otherwise

    Examples
    --------
    >>> from scipy.sparse import dia_array, dia_matrix, coo_matrix, isspmatrix_dia
    >>> isspmatrix_dia(dia_matrix([[5]]))
    True
    >>> isspmatrix_dia(dia_array([[5]]))
    False
    >>> isspmatrix_dia(coo_matrix([[5]]))
    False
    """
    return isinstance(x, dia_matrix)


# This namespace class separates array from matrix with isinstance
class dia_array(_dia_base, sparray):
    """
    Sparse array with DIAgonal storage.

    This can be instantiated in several ways:
        dia_array(D)
            where D is a 2-D ndarray

        dia_array(S)
            with another sparse array or matrix S (equivalent to S.todia())

        dia_array((M, N), [dtype])
            to construct an empty array with shape (M, N),
            dtype is optional, defaulting to dtype='d'.

        dia_array((data, offsets), shape=(M, N))
            where the ``data[k,:]`` stores the diagonal entries for
            diagonal ``offsets[k]`` (See example below)

    Attributes
    ----------
    dtype : dtype
        Data type of the array
    shape : 2-tuple
        Shape of the array
    ndim : int
        Number of dimensions (this is always 2)
    nnz
    size
    data
        DIA format data array of the array
    offsets
        DIA format offset array of the array
    T

    Notes
    -----

    Sparse arrays can be used in arithmetic operations: they support
    addition, subtraction, multiplication, division, and matrix power.
    Sparse arrays with DIAgonal storage do not support slicing.

    Examples
    --------

    >>> import numpy as np
    >>> from scipy.sparse import dia_array
    >>> dia_array((3, 4), dtype=np.int8).toarray()
    array([[0, 0, 0, 0],
           [0, 0, 0, 0],
           [0, 0, 0, 0]], dtype=int8)

    >>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0)
    >>> offsets = np.array([0, -1, 2])
    >>> dia_array((data, offsets), shape=(4, 4)).toarray()
    array([[1, 0, 3, 0],
           [1, 2, 0, 4],
           [0, 2, 3, 0],
           [0, 0, 3, 4]])

    >>> from scipy.sparse import dia_array
    >>> n = 10
    >>> ex = np.ones(n)
    >>> data = np.array([ex, 2 * ex, ex])
    >>> offsets = np.array([-1, 0, 1])
    >>> dia_array((data, offsets), shape=(n, n)).toarray()
    array([[2., 1., 0., ..., 0., 0., 0.],
           [1., 2., 1., ..., 0., 0., 0.],
           [0., 1., 2., ..., 0., 0., 0.],
           ...,
           [0., 0., 0., ..., 2., 1., 0.],
           [0., 0., 0., ..., 1., 2., 1.],
           [0., 0., 0., ..., 0., 1., 2.]])
    """


class dia_matrix(spmatrix, _dia_base):
    """
    Sparse matrix with DIAgonal storage.

    This can be instantiated in several ways:
        dia_matrix(D)
            where D is a 2-D ndarray

        dia_matrix(S)
            with another sparse array or matrix S (equivalent to S.todia())

        dia_matrix((M, N), [dtype])
            to construct an empty matrix with shape (M, N),
            dtype is optional, defaulting to dtype='d'.

        dia_matrix((data, offsets), shape=(M, N))
            where the ``data[k,:]`` stores the diagonal entries for
            diagonal ``offsets[k]`` (See example below)

    Attributes
    ----------
    dtype : dtype
        Data type of the matrix
    shape : 2-tuple
        Shape of the matrix
    ndim : int
        Number of dimensions (this is always 2)
    nnz
    size
    data
        DIA format data array of the matrix
    offsets
        DIA format offset array of the matrix
    T

    Notes
    -----

    Sparse matrices can be used in arithmetic operations: they support
    addition, subtraction, multiplication, division, and matrix power.
    Sparse matrices with DIAgonal storage do not support slicing.

    Examples
    --------

    >>> import numpy as np
    >>> from scipy.sparse import dia_matrix
    >>> dia_matrix((3, 4), dtype=np.int8).toarray()
    array([[0, 0, 0, 0],
           [0, 0, 0, 0],
           [0, 0, 0, 0]], dtype=int8)

    >>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0)
    >>> offsets = np.array([0, -1, 2])
    >>> dia_matrix((data, offsets), shape=(4, 4)).toarray()
    array([[1, 0, 3, 0],
           [1, 2, 0, 4],
           [0, 2, 3, 0],
           [0, 0, 3, 4]])

    >>> from scipy.sparse import dia_matrix
    >>> n = 10
    >>> ex = np.ones(n)
    >>> data = np.array([ex, 2 * ex, ex])
    >>> offsets = np.array([-1, 0, 1])
    >>> dia_matrix((data, offsets), shape=(n, n)).toarray()
    array([[2., 1., 0., ..., 0., 0., 0.],
           [1., 2., 1., ..., 0., 0., 0.],
           [0., 1., 2., ..., 0., 0., 0.],
           ...,
           [0., 0., 0., ..., 2., 1., 0.],
           [0., 0., 0., ..., 1., 2., 1.],
           [0., 0., 0., ..., 0., 1., 2.]])
    """