| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756 |
- """Utilities for fast persistence of big data, with optional compression."""
- # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
- # Copyright (c) 2009 Gael Varoquaux
- # License: BSD Style, 3 clauses.
- import io
- import os
- import pickle
- import warnings
- from pathlib import Path
- from .backports import make_memmap
- from .compressor import (
- _COMPRESSORS,
- LZ4_NOT_INSTALLED_ERROR,
- BinaryZlibFile,
- BZ2CompressorWrapper,
- GzipCompressorWrapper,
- LZ4CompressorWrapper,
- LZMACompressorWrapper,
- XZCompressorWrapper,
- ZlibCompressorWrapper,
- lz4,
- register_compressor,
- )
- # For compatibility with old versions of joblib, we need ZNDArrayWrapper
- # to be visible in the current namespace.
- from .numpy_pickle_compat import (
- NDArrayWrapper,
- ZNDArrayWrapper, # noqa: F401
- load_compatibility,
- )
- from .numpy_pickle_utils import (
- BUFFER_SIZE,
- Pickler,
- Unpickler,
- _ensure_native_byte_order,
- _read_bytes,
- _reconstruct,
- _validate_fileobject_and_memmap,
- _write_fileobject,
- )
- # Register supported compressors
- register_compressor("zlib", ZlibCompressorWrapper())
- register_compressor("gzip", GzipCompressorWrapper())
- register_compressor("bz2", BZ2CompressorWrapper())
- register_compressor("lzma", LZMACompressorWrapper())
- register_compressor("xz", XZCompressorWrapper())
- register_compressor("lz4", LZ4CompressorWrapper())
- ###############################################################################
- # Utility objects for persistence.
- # For convenience, 16 bytes are used to be sure to cover all the possible
- # dtypes' alignments. For reference, see:
- # https://numpy.org/devdocs/dev/alignment.html
- NUMPY_ARRAY_ALIGNMENT_BYTES = 16
- class NumpyArrayWrapper(object):
- """An object to be persisted instead of numpy arrays.
- This object is used to hack into the pickle machinery and read numpy
- array data from our custom persistence format.
- More precisely, this object is used for:
- * carrying the information of the persisted array: subclass, shape, order,
- dtype. Those ndarray metadata are used to correctly reconstruct the array
- with low level numpy functions.
- * determining if memmap is allowed on the array.
- * reading the array bytes from a file.
- * reading the array using memorymap from a file.
- * writing the array bytes to a file.
- Attributes
- ----------
- subclass: numpy.ndarray subclass
- Determine the subclass of the wrapped array.
- shape: numpy.ndarray shape
- Determine the shape of the wrapped array.
- order: {'C', 'F'}
- Determine the order of wrapped array data. 'C' is for C order, 'F' is
- for fortran order.
- dtype: numpy.ndarray dtype
- Determine the data type of the wrapped array.
- allow_mmap: bool
- Determine if memory mapping is allowed on the wrapped array.
- Default: False.
- """
- def __init__(
- self,
- subclass,
- shape,
- order,
- dtype,
- allow_mmap=False,
- numpy_array_alignment_bytes=NUMPY_ARRAY_ALIGNMENT_BYTES,
- ):
- """Constructor. Store the useful information for later."""
- self.subclass = subclass
- self.shape = shape
- self.order = order
- self.dtype = dtype
- self.allow_mmap = allow_mmap
- # We make numpy_array_alignment_bytes an instance attribute to allow us
- # to change our mind about the default alignment and still load the old
- # pickles (with the previous alignment) correctly
- self.numpy_array_alignment_bytes = numpy_array_alignment_bytes
- def safe_get_numpy_array_alignment_bytes(self):
- # NumpyArrayWrapper instances loaded from joblib <= 1.1 pickles don't
- # have an numpy_array_alignment_bytes attribute
- return getattr(self, "numpy_array_alignment_bytes", None)
- def write_array(self, array, pickler):
- """Write array bytes to pickler file handle.
- This function is an adaptation of the numpy write_array function
- available in version 1.10.1 in numpy/lib/format.py.
- """
- # Set buffer size to 16 MiB to hide the Python loop overhead.
- buffersize = max(16 * 1024**2 // array.itemsize, 1)
- if array.dtype.hasobject:
- # We contain Python objects so we cannot write out the data
- # directly. Instead, we will pickle it out with version 5 of the
- # pickle protocol.
- pickle.dump(array, pickler.file_handle, protocol=5)
- else:
- numpy_array_alignment_bytes = self.safe_get_numpy_array_alignment_bytes()
- if numpy_array_alignment_bytes is not None:
- current_pos = pickler.file_handle.tell()
- pos_after_padding_byte = current_pos + 1
- padding_length = numpy_array_alignment_bytes - (
- pos_after_padding_byte % numpy_array_alignment_bytes
- )
- # A single byte is written that contains the padding length in
- # bytes
- padding_length_byte = int.to_bytes(
- padding_length, length=1, byteorder="little"
- )
- pickler.file_handle.write(padding_length_byte)
- if padding_length != 0:
- padding = b"\xff" * padding_length
- pickler.file_handle.write(padding)
- for chunk in pickler.np.nditer(
- array,
- flags=["external_loop", "buffered", "zerosize_ok"],
- buffersize=buffersize,
- order=self.order,
- ):
- pickler.file_handle.write(chunk.tobytes("C"))
- def read_array(self, unpickler, ensure_native_byte_order):
- """Read array from unpickler file handle.
- This function is an adaptation of the numpy read_array function
- available in version 1.10.1 in numpy/lib/format.py.
- """
- if len(self.shape) == 0:
- count = 1
- else:
- # joblib issue #859: we cast the elements of self.shape to int64 to
- # prevent a potential overflow when computing their product.
- shape_int64 = [unpickler.np.int64(x) for x in self.shape]
- count = unpickler.np.multiply.reduce(shape_int64)
- # Now read the actual data.
- if self.dtype.hasobject:
- # The array contained Python objects. We need to unpickle the data.
- array = pickle.load(unpickler.file_handle)
- else:
- numpy_array_alignment_bytes = self.safe_get_numpy_array_alignment_bytes()
- if numpy_array_alignment_bytes is not None:
- padding_byte = unpickler.file_handle.read(1)
- padding_length = int.from_bytes(padding_byte, byteorder="little")
- if padding_length != 0:
- unpickler.file_handle.read(padding_length)
- # This is not a real file. We have to read it the
- # memory-intensive way.
- # crc32 module fails on reads greater than 2 ** 32 bytes,
- # breaking large reads from gzip streams. Chunk reads to
- # BUFFER_SIZE bytes to avoid issue and reduce memory overhead
- # of the read. In non-chunked case count < max_read_count, so
- # only one read is performed.
- max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, self.dtype.itemsize)
- array = unpickler.np.empty(count, dtype=self.dtype)
- for i in range(0, count, max_read_count):
- read_count = min(max_read_count, count - i)
- read_size = int(read_count * self.dtype.itemsize)
- data = _read_bytes(unpickler.file_handle, read_size, "array data")
- array[i : i + read_count] = unpickler.np.frombuffer(
- data, dtype=self.dtype, count=read_count
- )
- del data
- if self.order == "F":
- array.shape = self.shape[::-1]
- array = array.transpose()
- else:
- array.shape = self.shape
- if ensure_native_byte_order:
- # Detect byte order mismatch and swap as needed.
- array = _ensure_native_byte_order(array)
- return array
- def read_mmap(self, unpickler):
- """Read an array using numpy memmap."""
- current_pos = unpickler.file_handle.tell()
- offset = current_pos
- numpy_array_alignment_bytes = self.safe_get_numpy_array_alignment_bytes()
- if numpy_array_alignment_bytes is not None:
- padding_byte = unpickler.file_handle.read(1)
- padding_length = int.from_bytes(padding_byte, byteorder="little")
- # + 1 is for the padding byte
- offset += padding_length + 1
- if unpickler.mmap_mode == "w+":
- unpickler.mmap_mode = "r+"
- marray = make_memmap(
- unpickler.filename,
- dtype=self.dtype,
- shape=self.shape,
- order=self.order,
- mode=unpickler.mmap_mode,
- offset=offset,
- )
- # update the offset so that it corresponds to the end of the read array
- unpickler.file_handle.seek(offset + marray.nbytes)
- if (
- numpy_array_alignment_bytes is None
- and current_pos % NUMPY_ARRAY_ALIGNMENT_BYTES != 0
- ):
- message = (
- f"The memmapped array {marray} loaded from the file "
- f"{unpickler.file_handle.name} is not byte aligned. "
- "This may cause segmentation faults if this memmapped array "
- "is used in some libraries like BLAS or PyTorch. "
- "To get rid of this warning, regenerate your pickle file "
- "with joblib >= 1.2.0. "
- "See https://github.com/joblib/joblib/issues/563 "
- "for more details"
- )
- warnings.warn(message)
- return marray
- def read(self, unpickler, ensure_native_byte_order):
- """Read the array corresponding to this wrapper.
- Use the unpickler to get all information to correctly read the array.
- Parameters
- ----------
- unpickler: NumpyUnpickler
- ensure_native_byte_order: bool
- If true, coerce the array to use the native endianness of the
- host system.
- Returns
- -------
- array: numpy.ndarray
- """
- # When requested, only use memmap mode if allowed.
- if unpickler.mmap_mode is not None and self.allow_mmap:
- assert not ensure_native_byte_order, (
- "Memmaps cannot be coerced to a given byte order, "
- "this code path is impossible."
- )
- array = self.read_mmap(unpickler)
- else:
- array = self.read_array(unpickler, ensure_native_byte_order)
- # Manage array subclass case
- if hasattr(array, "__array_prepare__") and self.subclass not in (
- unpickler.np.ndarray,
- unpickler.np.memmap,
- ):
- # We need to reconstruct another subclass
- new_array = _reconstruct(self.subclass, (0,), "b")
- return new_array.__array_prepare__(array)
- else:
- return array
- ###############################################################################
- # Pickler classes
- class NumpyPickler(Pickler):
- """A pickler to persist big data efficiently.
- The main features of this object are:
- * persistence of numpy arrays in a single file.
- * optional compression with a special care on avoiding memory copies.
- Attributes
- ----------
- fp: file
- File object handle used for serializing the input object.
- protocol: int, optional
- Pickle protocol used. Default is pickle.DEFAULT_PROTOCOL.
- """
- dispatch = Pickler.dispatch.copy()
- def __init__(self, fp, protocol=None):
- self.file_handle = fp
- self.buffered = isinstance(self.file_handle, BinaryZlibFile)
- # By default we want a pickle protocol that only changes with
- # the major python version and not the minor one
- if protocol is None:
- protocol = pickle.DEFAULT_PROTOCOL
- Pickler.__init__(self, self.file_handle, protocol=protocol)
- # delayed import of numpy, to avoid tight coupling
- try:
- import numpy as np
- except ImportError:
- np = None
- self.np = np
- def _create_array_wrapper(self, array):
- """Create and returns a numpy array wrapper from a numpy array."""
- order = (
- "F" if (array.flags.f_contiguous and not array.flags.c_contiguous) else "C"
- )
- allow_mmap = not self.buffered and not array.dtype.hasobject
- kwargs = {}
- try:
- self.file_handle.tell()
- except io.UnsupportedOperation:
- kwargs = {"numpy_array_alignment_bytes": None}
- wrapper = NumpyArrayWrapper(
- type(array),
- array.shape,
- order,
- array.dtype,
- allow_mmap=allow_mmap,
- **kwargs,
- )
- return wrapper
- def save(self, obj):
- """Subclass the Pickler `save` method.
- This is a total abuse of the Pickler class in order to use the numpy
- persistence function `save` instead of the default pickle
- implementation. The numpy array is replaced by a custom wrapper in the
- pickle persistence stack and the serialized array is written right
- after in the file. Warning: the file produced does not follow the
- pickle format. As such it can not be read with `pickle.load`.
- """
- if self.np is not None and type(obj) in (
- self.np.ndarray,
- self.np.matrix,
- self.np.memmap,
- ):
- if type(obj) is self.np.memmap:
- # Pickling doesn't work with memmapped arrays
- obj = self.np.asanyarray(obj)
- # The array wrapper is pickled instead of the real array.
- wrapper = self._create_array_wrapper(obj)
- Pickler.save(self, wrapper)
- # A framer was introduced with pickle protocol 4 and we want to
- # ensure the wrapper object is written before the numpy array
- # buffer in the pickle file.
- # See https://www.python.org/dev/peps/pep-3154/#framing to get
- # more information on the framer behavior.
- if self.proto >= 4:
- self.framer.commit_frame(force=True)
- # And then array bytes are written right after the wrapper.
- wrapper.write_array(obj, self)
- return
- return Pickler.save(self, obj)
- class NumpyUnpickler(Unpickler):
- """A subclass of the Unpickler to unpickle our numpy pickles.
- Attributes
- ----------
- mmap_mode: str
- The memorymap mode to use for reading numpy arrays.
- file_handle: file_like
- File object to unpickle from.
- ensure_native_byte_order: bool
- If True, coerce the array to use the native endianness of the
- host system.
- filename: str
- Name of the file to unpickle from. It should correspond to file_handle.
- This parameter is required when using mmap_mode.
- np: module
- Reference to numpy module if numpy is installed else None.
- """
- dispatch = Unpickler.dispatch.copy()
- def __init__(self, filename, file_handle, ensure_native_byte_order, mmap_mode=None):
- # The next line is for backward compatibility with pickle generated
- # with joblib versions less than 0.10.
- self._dirname = os.path.dirname(filename)
- self.mmap_mode = mmap_mode
- self.file_handle = file_handle
- # filename is required for numpy mmap mode.
- self.filename = filename
- self.compat_mode = False
- self.ensure_native_byte_order = ensure_native_byte_order
- Unpickler.__init__(self, self.file_handle)
- try:
- import numpy as np
- except ImportError:
- np = None
- self.np = np
- def load_build(self):
- """Called to set the state of a newly created object.
- We capture it to replace our place-holder objects, NDArrayWrapper or
- NumpyArrayWrapper, by the array we are interested in. We
- replace them directly in the stack of pickler.
- NDArrayWrapper is used for backward compatibility with joblib <= 0.9.
- """
- Unpickler.load_build(self)
- # For backward compatibility, we support NDArrayWrapper objects.
- if isinstance(self.stack[-1], (NDArrayWrapper, NumpyArrayWrapper)):
- if self.np is None:
- raise ImportError(
- "Trying to unpickle an ndarray, but numpy didn't import correctly"
- )
- array_wrapper = self.stack.pop()
- # If any NDArrayWrapper is found, we switch to compatibility mode,
- # this will be used to raise a DeprecationWarning to the user at
- # the end of the unpickling.
- if isinstance(array_wrapper, NDArrayWrapper):
- self.compat_mode = True
- _array_payload = array_wrapper.read(self)
- else:
- _array_payload = array_wrapper.read(self, self.ensure_native_byte_order)
- self.stack.append(_array_payload)
- # Be careful to register our new method.
- dispatch[pickle.BUILD[0]] = load_build
- ###############################################################################
- # Utility functions
- def dump(value, filename, compress=0, protocol=None):
- """Persist an arbitrary Python object into one file.
- Read more in the :ref:`User Guide <persistence>`.
- Parameters
- ----------
- value: any Python object
- The object to store to disk.
- filename: str, pathlib.Path, or file object.
- The file object or path of the file in which it is to be stored.
- The compression method corresponding to one of the supported filename
- extensions ('.z', '.gz', '.bz2', '.xz' or '.lzma') will be used
- automatically.
- compress: int from 0 to 9 or bool or 2-tuple, optional
- Optional compression level for the data. 0 or False is no compression.
- Higher value means more compression, but also slower read and
- write times. Using a value of 3 is often a good compromise.
- See the notes for more details.
- If compress is True, the compression level used is 3.
- If compress is a 2-tuple, the first element must correspond to a string
- between supported compressors (e.g 'zlib', 'gzip', 'bz2', 'lzma'
- 'xz'), the second element must be an integer from 0 to 9, corresponding
- to the compression level.
- protocol: int, optional
- Pickle protocol, see pickle.dump documentation for more details.
- Returns
- -------
- filenames: list of strings
- The list of file names in which the data is stored. If
- compress is false, each array is stored in a different file.
- See Also
- --------
- joblib.load : corresponding loader
- Notes
- -----
- Memmapping on load cannot be used for compressed files. Thus
- using compression can significantly slow down loading. In
- addition, compressed files take up extra memory during
- dump and load.
- """
- if Path is not None and isinstance(filename, Path):
- filename = str(filename)
- is_filename = isinstance(filename, str)
- is_fileobj = hasattr(filename, "write")
- compress_method = "zlib" # zlib is the default compression method.
- if compress is True:
- # By default, if compress is enabled, we want the default compress
- # level of the compressor.
- compress_level = None
- elif isinstance(compress, tuple):
- # a 2-tuple was set in compress
- if len(compress) != 2:
- raise ValueError(
- "Compress argument tuple should contain exactly 2 elements: "
- "(compress method, compress level), you passed {}".format(compress)
- )
- compress_method, compress_level = compress
- elif isinstance(compress, str):
- compress_method = compress
- compress_level = None # Use default compress level
- compress = (compress_method, compress_level)
- else:
- compress_level = compress
- if compress_method == "lz4" and lz4 is None:
- raise ValueError(LZ4_NOT_INSTALLED_ERROR)
- if (
- compress_level is not None
- and compress_level is not False
- and compress_level not in range(10)
- ):
- # Raising an error if a non valid compress level is given.
- raise ValueError(
- 'Non valid compress level given: "{}". Possible values are {}.'.format(
- compress_level, list(range(10))
- )
- )
- if compress_method not in _COMPRESSORS:
- # Raising an error if an unsupported compression method is given.
- raise ValueError(
- 'Non valid compression method given: "{}". Possible values are {}.'.format(
- compress_method, _COMPRESSORS
- )
- )
- if not is_filename and not is_fileobj:
- # People keep inverting arguments, and the resulting error is
- # incomprehensible
- raise ValueError(
- "Second argument should be a filename or a file-like object, "
- "%s (type %s) was given." % (filename, type(filename))
- )
- if is_filename and not isinstance(compress, tuple):
- # In case no explicit compression was requested using both compression
- # method and level in a tuple and the filename has an explicit
- # extension, we select the corresponding compressor.
- # unset the variable to be sure no compression level is set afterwards.
- compress_method = None
- for name, compressor in _COMPRESSORS.items():
- if filename.endswith(compressor.extension):
- compress_method = name
- if compress_method in _COMPRESSORS and compress_level == 0:
- # we choose the default compress_level in case it was not given
- # as an argument (using compress).
- compress_level = None
- if compress_level != 0:
- with _write_fileobject(
- filename, compress=(compress_method, compress_level)
- ) as f:
- NumpyPickler(f, protocol=protocol).dump(value)
- elif is_filename:
- with open(filename, "wb") as f:
- NumpyPickler(f, protocol=protocol).dump(value)
- else:
- NumpyPickler(filename, protocol=protocol).dump(value)
- # If the target container is a file object, nothing is returned.
- if is_fileobj:
- return
- # For compatibility, the list of created filenames (e.g with one element
- # after 0.10.0) is returned by default.
- return [filename]
- def _unpickle(fobj, ensure_native_byte_order, filename="", mmap_mode=None):
- """Internal unpickling function."""
- # We are careful to open the file handle early and keep it open to
- # avoid race-conditions on renames.
- # That said, if data is stored in companion files, which can be
- # the case with the old persistence format, moving the directory
- # will create a race when joblib tries to access the companion
- # files.
- unpickler = NumpyUnpickler(
- filename, fobj, ensure_native_byte_order, mmap_mode=mmap_mode
- )
- obj = None
- try:
- obj = unpickler.load()
- if unpickler.compat_mode:
- warnings.warn(
- "The file '%s' has been generated with a "
- "joblib version less than 0.10. "
- "Please regenerate this pickle file." % filename,
- DeprecationWarning,
- stacklevel=3,
- )
- except UnicodeDecodeError as exc:
- # More user-friendly error message
- new_exc = ValueError(
- "You may be trying to read with "
- "python 3 a joblib pickle generated with python 2. "
- "This feature is not supported by joblib."
- )
- new_exc.__cause__ = exc
- raise new_exc
- return obj
- def load_temporary_memmap(filename, mmap_mode, unlink_on_gc_collect):
- from ._memmapping_reducer import JOBLIB_MMAPS, add_maybe_unlink_finalizer
- with open(filename, "rb") as f:
- with _validate_fileobject_and_memmap(f, filename, mmap_mode) as (
- fobj,
- validated_mmap_mode,
- ):
- # Memmap are used for interprocess communication, which should
- # keep the objects untouched. We pass `ensure_native_byte_order=False`
- # to remain consistent with the loading behavior of non-memmaped arrays
- # in workers, where the byte order is preserved.
- # Note that we do not implement endianness change for memmaps, as this
- # would result in inconsistent behavior.
- obj = _unpickle(
- fobj,
- ensure_native_byte_order=False,
- filename=filename,
- mmap_mode=validated_mmap_mode,
- )
- JOBLIB_MMAPS.add(obj.filename)
- if unlink_on_gc_collect:
- add_maybe_unlink_finalizer(obj)
- return obj
- def load(filename, mmap_mode=None, ensure_native_byte_order="auto"):
- """Reconstruct a Python object from a file persisted with joblib.dump.
- Read more in the :ref:`User Guide <persistence>`.
- WARNING: joblib.load relies on the pickle module and can therefore
- execute arbitrary Python code. It should therefore never be used
- to load files from untrusted sources.
- Parameters
- ----------
- filename: str, pathlib.Path, or file object.
- The file object or path of the file from which to load the object
- mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
- If not None, the arrays are memory-mapped from the disk. This
- mode has no effect for compressed files. Note that in this
- case the reconstructed object might no longer match exactly
- the originally pickled object.
- ensure_native_byte_order: bool, or 'auto', default=='auto'
- If True, ensures that the byte order of the loaded arrays matches the
- native byte ordering (or _endianness_) of the host system. This is not
- compatible with memory-mapped arrays and using non-null `mmap_mode`
- parameter at the same time will raise an error. The default 'auto'
- parameter is equivalent to True if `mmap_mode` is None, else False.
- Returns
- -------
- result: any Python object
- The object stored in the file.
- See Also
- --------
- joblib.dump : function to save an object
- Notes
- -----
- This function can load numpy array files saved separately during the
- dump. If the mmap_mode argument is given, it is passed to np.load and
- arrays are loaded as memmaps. As a consequence, the reconstructed
- object might not match the original pickled object. Note that if the
- file was saved with compression, the arrays cannot be memmapped.
- """
- if ensure_native_byte_order == "auto":
- ensure_native_byte_order = mmap_mode is None
- if ensure_native_byte_order and mmap_mode is not None:
- raise ValueError(
- "Native byte ordering can only be enforced if 'mmap_mode' parameter "
- f"is set to None, but got 'mmap_mode={mmap_mode}' instead."
- )
- if Path is not None and isinstance(filename, Path):
- filename = str(filename)
- if hasattr(filename, "read"):
- fobj = filename
- filename = getattr(fobj, "name", "")
- with _validate_fileobject_and_memmap(fobj, filename, mmap_mode) as (fobj, _):
- obj = _unpickle(fobj, ensure_native_byte_order=ensure_native_byte_order)
- else:
- with open(filename, "rb") as f:
- with _validate_fileobject_and_memmap(f, filename, mmap_mode) as (
- fobj,
- validated_mmap_mode,
- ):
- if isinstance(fobj, str):
- # if the returned file object is a string, this means we
- # try to load a pickle file generated with an version of
- # Joblib so we load it with joblib compatibility function.
- return load_compatibility(fobj)
- # A memory-mapped array has to be mapped with the endianness
- # it has been written with. Other arrays are coerced to the
- # native endianness of the host system.
- obj = _unpickle(
- fobj,
- ensure_native_byte_order=ensure_native_byte_order,
- filename=filename,
- mmap_mode=validated_mmap_mode,
- )
- return obj
|