yichael
/
image-match


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
							#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
#           This file was automatically generated from src/transformers/models/modernvbert/modular_modernvbert.py.
#               Do NOT edit this file manually as any edits will be overwritten by the generation of
#             the file from the modular. If any change should be done, please apply the change to the
#                          modular_modernvbert.py file directly. One of our CI enforces this.
#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# Copyright 2026 Illuin Technology and contributors, and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Literal

from huggingface_hub.dataclasses import strict

from ...configuration_utils import PreTrainedConfig
from ...utils import auto_docstring
from ..auto import CONFIG_MAPPING, AutoConfig


@auto_docstring(checkpoint="ModernVBERT/modernvbert")
@strict
class ModernVBertConfig(PreTrainedConfig):
    r"""
    pixel_shuffle_factor (`int | None`, *optional*, defaults to 4):
        Scale factor used by any pixel-shuffle / upsampling operations in the vision head.
    initializer_cutoff_factor (`float | None`, *optional*, defaults to 2.0):
        The cutoff factor for the truncated_normal_initializer for initializing all weight matrices.
    classifier_pooling (`Literal["cls", "mean"]`, *optional*, defaults to `"cls"`):
        The pooling strategy to use for classification tasks.
    classifier_bias (`bool | None`, *optional*, defaults to `False`):
        Whether to add a bias term to the classification head

    Example:
    ```python
    >>> from transformers import ModernVBertConfig

    >>> # Initializing configuration
    >>> configuration = ModernVBertConfig()

    >>> # Initializing a model from the configuration (model class is implemented in
    >>> # `modernvbert.modeling_modernvbert`)

    >>> from transformers import ModernVBertModel
    >>> model = ModernVBertModel(configuration)

    >>> # Accessing the model configuration
    >>> cfg = model.config
    ```"""

    model_type = "modernvbert"
    sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}

    text_config: PreTrainedConfig | dict | None = None
    vision_config: PreTrainedConfig | dict | None = None
    image_token_id: int = 50407
    pixel_shuffle_factor: int = 4
    initializer_range: float = 0.02
    initializer_cutoff_factor: float = 2.0
    classifier_pooling: Literal["cls", "mean"] = "cls"
    classifier_dropout: float | int = 0.0
    classifier_bias: bool = False
    tie_word_embeddings: bool = False

    def __post_init__(self, **kwargs):
        if self.text_config is None:
            self.text_config = CONFIG_MAPPING["modernbert"]()
        elif isinstance(self.text_config, dict):
            self.text_config = CONFIG_MAPPING["modernbert"](**self.text_config)

        if self.vision_config is None:
            self.vision_config = CONFIG_MAPPING["siglip_vision_model"]()
        elif isinstance(self.vision_config, dict):
            self.vision_config = CONFIG_MAPPING["siglip_vision_model"](**self.vision_config)

        super().__post_init__(**kwargs)


__all__ = ["ModernVBertConfig"]