yichael
/
image-match


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
							# Short term workaround for https://github.com/ray-project/ray/issues/32435
# Dataset has a hard dependency on pandas, so it doesn't need to be delayed.
import pandas  # noqa
from packaging.version import parse as parse_version

from ray.data._internal.utils.arrow_utils import get_pyarrow_version

from ray.data._internal.compute import ActorPoolStrategy, TaskPoolStrategy
from ray.data._internal.datasource.tfrecords_datasource import TFXReadOptions
from ray.data._internal.execution.interfaces import (
    ExecutionOptions,
    ExecutionResources,
    NodeIdStr,
)
from ray.data._internal.logging import configure_logging
from ray.data.context import DataContext, DatasetContext
from ray.data.dataset import (
    Dataset,
    Schema,
    SinkMode,
    ClickHouseTableSettings,
    SaveMode,
)
from ray.data.stats import DatasetSummary
from ray.data.datasource import (
    BlockBasedFileDatasink,
    Datasink,
    Datasource,
    FileShuffleConfig,
    ReadTask,
    RowBasedFileDatasink,
)
from ray.data.iterator import DataIterator, DatasetIterator
from ray.data.preprocessor import Preprocessor
from ray.data.read_api import (  # noqa: F401
    KafkaAuthConfig,  # noqa: F401
    from_arrow,
    from_arrow_refs,
    from_blocks,
    from_daft,
    from_dask,
    from_huggingface,
    from_items,
    from_mars,
    from_modin,
    from_numpy,
    from_numpy_refs,
    from_pandas,
    from_pandas_refs,
    from_spark,
    from_tf,
    from_torch,
    range,
    range_tensor,
    read_audio,
    read_avro,
    read_bigquery,
    read_binary_files,
    read_clickhouse,
    read_csv,
    read_databricks_tables,
    read_datasource,
    read_delta,
    read_delta_sharing_tables,
    read_kafka,
    read_hudi,
    read_iceberg,
    read_images,
    read_json,
    read_lance,
    read_mcap,
    read_mongo,
    read_numpy,
    read_parquet,
    read_snowflake,
    read_sql,
    read_text,
    read_tfrecords,
    read_unity_catalog,
    read_videos,
    read_webdataset,
)

# Module-level cached global functions for callable classes. It needs to be defined here
# since it has to be process-global across cloudpickled funcs.
_map_actor_context = None

configure_logging()

try:
    import pyarrow as pa

    # Import these arrow extension types to ensure that they are registered.
    from ray.data._internal.tensor_extensions.arrow import (  # noqa
        ArrowTensorType,
        ArrowVariableShapedTensorType,
    )

    # https://github.com/apache/arrow/pull/38608 deprecated `PyExtensionType`, and
    # disabled it's deserialization by default. To ensure that users can load data
    # written with earlier version of Ray Data, we enable auto-loading of serialized
    # tensor extensions.
    #
    # NOTE: `PyExtensionType` is deleted from Arrow >= 21.0
    pyarrow_version = get_pyarrow_version()
    if pyarrow_version is None or pyarrow_version >= parse_version("21.0.0"):
        pass
    else:
        from ray._private.ray_constants import env_bool

        RAY_DATA_AUTOLOAD_PYEXTENSIONTYPE = env_bool(
            "RAY_DATA_AUTOLOAD_PYEXTENSIONTYPE", False
        )

        if (
            pyarrow_version >= parse_version("14.0.1")
            and RAY_DATA_AUTOLOAD_PYEXTENSIONTYPE
        ):
            pa.PyExtensionType.set_auto_load(True)

except ModuleNotFoundError:
    pass


__all__ = [
    "ActorPoolStrategy",
    "BlockBasedFileDatasink",
    "ClickHouseTableSettings",
    "Dataset",
    "DataContext",
    "DatasetContext",  # Backwards compatibility alias.
    "DatasetSummary",
    "DataIterator",
    "DatasetIterator",  # Backwards compatibility alias.
    "Datasink",
    "Datasource",
    "ExecutionOptions",
    "ExecutionResources",
    "FileShuffleConfig",
    "NodeIdStr",
    "ReadTask",
    "RowBasedFileDatasink",
    "Schema",
    "SinkMode",
    "SaveMode",
    "TaskPoolStrategy",
    "from_daft",
    "from_dask",
    "from_items",
    "from_arrow",
    "from_arrow_refs",
    "from_mars",
    "from_modin",
    "from_numpy",
    "from_numpy_refs",
    "from_pandas",
    "from_pandas_refs",
    "from_spark",
    "from_tf",
    "from_torch",
    "from_huggingface",
    "range",
    "range_tensor",
    "read_audio",
    "read_avro",
    "read_text",
    "read_binary_files",
    "read_clickhouse",
    "read_csv",
    "read_datasource",
    "read_delta",
    "read_delta_sharing_tables",
    "read_kafka",
    "KafkaAuthConfig",
    "read_hudi",
    "read_iceberg",
    "read_images",
    "read_json",
    "read_lance",
    "read_mcap",
    "read_numpy",
    "read_mongo",
    "read_parquet",
    "read_snowflake",
    "read_sql",
    "read_tfrecords",
    "read_unity_catalog",
    "read_videos",
    "read_webdataset",
    "Preprocessor",
    "TFXReadOptions",
]