yichael
/
image-match


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
							# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING

from ..utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_torch_greater_or_equal


_import_structure = {
    "aqlm": ["replace_with_aqlm_linear"],
    "awq": [
        "post_init_awq_exllama_modules",
        "replace_quantization_scales",
        "replace_with_awq_linear",
    ],
    "bitnet": [
        "BitLinear",
        "pack_weights",
        "replace_with_bitnet_linear",
        "unpack_weights",
    ],
    "bitsandbytes": [
        "Bnb4bitQuantize",
        "dequantize_and_replace",
        "replace_with_bnb_linear",
        "validate_bnb_backend_availability",
    ],
    "deepspeed": [
        "HfDeepSpeedConfig",
        "HfTrainerDeepSpeedConfig",
        "deepspeed_config",
        "deepspeed_init",
        "deepspeed_load_checkpoint",
        "deepspeed_optim_sched",
        "is_deepspeed_available",
        "is_deepspeed_zero3_enabled",
        "set_hf_deepspeed_config",
        "unset_hf_deepspeed_config",
    ],
    "eetq": ["replace_with_eetq_linear"],
    "fbgemm_fp8": ["FbgemmFp8Linear", "FbgemmFp8Llama4TextExperts", "replace_with_fbgemm_fp8_linear"],
    "finegrained_fp8": ["FP8Linear", "replace_with_fp8_linear"],
    "fsdp": ["is_fsdp_enabled", "is_fsdp_managed_module"],
    "ggml": [
        "GGUF_CONFIG_DEFAULTS_MAPPING",
        "GGUF_CONFIG_MAPPING",
        "GGUF_TOKENIZER_MAPPING",
        "_gguf_parse_value",
        "load_dequant_gguf_tensor",
        "load_gguf",
    ],
    "higgs": [
        "HiggsLinear",
        "dequantize_higgs",
        "quantize_with_higgs",
        "replace_with_higgs_linear",
    ],
    "hqq": ["prepare_for_hqq_linear"],
    "hub_kernels": [
        "LayerRepository",
        "lazy_load_kernel",
        "register_kernel_mapping",
        "replace_kernel_forward_from_hub",
        "use_kernel_forward_from_hub",
        "use_kernel_func_from_hub",
        "use_kernelized_func",
    ],
    "integration_utils": [
        "INTEGRATION_TO_CALLBACK",
        "AzureMLCallback",
        "ClearMLCallback",
        "CodeCarbonCallback",
        "CometCallback",
        "DagsHubCallback",
        "DVCLiveCallback",
        "FlyteCallback",
        "KubeflowCallback",
        "MLflowCallback",
        "NeptuneCallback",
        "NeptuneMissingConfiguration",
        "SwanLabCallback",
        "TensorBoardCallback",
        "TrackioCallback",
        "WandbCallback",
        "get_available_reporting_integrations",
        "get_reporting_integration_callbacks",
        "hp_params",
        "is_azureml_available",
        "is_clearml_available",
        "is_codecarbon_available",
        "is_comet_available",
        "is_dagshub_available",
        "is_dvclive_available",
        "is_flyte_deck_standard_available",
        "is_flytekit_available",
        "is_kubeflow_available",
        "is_mlflow_available",
        "is_neptune_available",
        "is_optuna_available",
        "is_ray_available",
        "is_ray_tune_available",
        "is_swanlab_available",
        "is_tensorboard_available",
        "is_trackio_available",
        "is_wandb_available",
        "rewrite_logs",
        "run_hp_search_optuna",
        "run_hp_search_ray",
        "run_hp_search_wandb",
    ],
    "liger": ["apply_liger_kernel"],
    "metal_quantization": [
        "MetalLinear",
        "replace_with_metal_linear",
    ],
    "moe": [
        "batched_mm_experts_forward",
        "grouped_mm_experts_forward",
        "use_experts_implementation",
    ],
    "mxfp4": [
        "Mxfp4GptOssExperts",
        "convert_moe_packed_tensors",
        "dequantize",
        "load_and_swizzle_mxfp4",
        "quantize_to_mxfp4",
        "replace_with_mxfp4_linear",
        "swizzle_mxfp4",
    ],
    "neftune": [
        "activate_neftune",
        "deactivate_neftune",
        "neftune_post_forward_hook",
    ],
    "peft": ["PeftAdapterMixin"],
    "quanto": ["replace_with_quanto_layers"],
    "sinq": ["SinqDeserialize", "SinqQuantize"],
    "spqr": ["replace_with_spqr_linear"],
    "vptq": ["replace_with_vptq_linear"],
}

try:
    if not is_torch_available():
        raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
    pass
else:
    _import_structure["executorch"] = [
        "TorchExportableModuleWithStaticCache",
        "convert_and_export_with_cache",
    ]

_import_structure["tensor_parallel"] = [
    "shard_and_distribute_module",
    "ALL_PARALLEL_STYLES",
    "translate_to_torch_parallel_style",
]
try:
    if not is_torch_greater_or_equal("2.5"):
        raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
    pass
else:
    _import_structure["flex_attention"] = [
        "make_flex_block_causal_mask",
    ]

if TYPE_CHECKING:
    from .aqlm import replace_with_aqlm_linear
    from .awq import (
        post_init_awq_exllama_modules,
        replace_quantization_scales,
        replace_with_awq_linear,
    )
    from .bitnet import (
        BitLinear,
        pack_weights,
        replace_with_bitnet_linear,
        unpack_weights,
    )
    from .bitsandbytes import (
        Bnb4bitQuantize,
        dequantize_and_replace,
        replace_with_bnb_linear,
        validate_bnb_backend_availability,
    )
    from .deepspeed import (
        HfDeepSpeedConfig,
        HfTrainerDeepSpeedConfig,
        deepspeed_config,
        deepspeed_init,
        deepspeed_load_checkpoint,
        deepspeed_optim_sched,
        is_deepspeed_available,
        is_deepspeed_zero3_enabled,
        set_hf_deepspeed_config,
        unset_hf_deepspeed_config,
    )
    from .eetq import replace_with_eetq_linear
    from .fbgemm_fp8 import FbgemmFp8Linear, FbgemmFp8Llama4TextExperts, replace_with_fbgemm_fp8_linear
    from .finegrained_fp8 import FP8Linear, replace_with_fp8_linear
    from .fsdp import is_fsdp_enabled, is_fsdp_managed_module
    from .ggml import (
        GGUF_CONFIG_DEFAULTS_MAPPING,
        GGUF_CONFIG_MAPPING,
        GGUF_TOKENIZER_MAPPING,
        _gguf_parse_value,
        load_dequant_gguf_tensor,
        load_gguf,
    )
    from .higgs import HiggsLinear, dequantize_higgs, quantize_with_higgs, replace_with_higgs_linear
    from .hqq import prepare_for_hqq_linear
    from .hub_kernels import (
        LayerRepository,
        lazy_load_kernel,
        register_kernel_mapping,
        replace_kernel_forward_from_hub,
        use_kernel_forward_from_hub,
        use_kernel_func_from_hub,
        use_kernelized_func,
    )
    from .integration_utils import (
        INTEGRATION_TO_CALLBACK,
        AzureMLCallback,
        ClearMLCallback,
        CodeCarbonCallback,
        CometCallback,
        DagsHubCallback,
        DVCLiveCallback,
        FlyteCallback,
        KubeflowCallback,
        MLflowCallback,
        NeptuneCallback,
        NeptuneMissingConfiguration,
        SwanLabCallback,
        TensorBoardCallback,
        TrackioCallback,
        WandbCallback,
        get_available_reporting_integrations,
        get_reporting_integration_callbacks,
        hp_params,
        is_azureml_available,
        is_clearml_available,
        is_codecarbon_available,
        is_comet_available,
        is_dagshub_available,
        is_dvclive_available,
        is_flyte_deck_standard_available,
        is_flytekit_available,
        is_kubeflow_available,
        is_mlflow_available,
        is_neptune_available,
        is_optuna_available,
        is_ray_available,
        is_ray_tune_available,
        is_swanlab_available,
        is_tensorboard_available,
        is_trackio_available,
        is_wandb_available,
        rewrite_logs,
        run_hp_search_optuna,
        run_hp_search_ray,
        run_hp_search_wandb,
    )
    from .liger import apply_liger_kernel
    from .metal_quantization import (
        MetalLinear,
        replace_with_metal_linear,
    )
    from .moe import (
        batched_mm_experts_forward,
        grouped_mm_experts_forward,
        use_experts_implementation,
    )
    from .mxfp4 import (
        Mxfp4GptOssExperts,
        dequantize,
        load_and_swizzle_mxfp4,
        quantize_to_mxfp4,
        replace_with_mxfp4_linear,
        swizzle_mxfp4,
    )
    from .neftune import activate_neftune, deactivate_neftune, neftune_post_forward_hook
    from .peft import PeftAdapterMixin
    from .quanto import replace_with_quanto_layers
    from .sinq import SinqDeserialize, SinqQuantize
    from .spqr import replace_with_spqr_linear
    from .vptq import replace_with_vptq_linear

    try:
        if not is_torch_available():
            raise OptionalDependencyNotAvailable()
    except OptionalDependencyNotAvailable:
        pass
    else:
        from .executorch import TorchExportableModuleWithStaticCache, convert_and_export_with_cache

    from .tensor_parallel import (
        ALL_PARALLEL_STYLES,
        shard_and_distribute_module,
        translate_to_torch_parallel_style,
    )

    try:
        if not is_torch_greater_or_equal("2.5"):
            raise OptionalDependencyNotAvailable()
    except OptionalDependencyNotAvailable:
        pass
    else:
        from .flex_attention import make_flex_block_causal_mask
else:
    import sys

    sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)