# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # This file was automatically generated from src/transformers/models/lasr/modular_lasr.py. # Do NOT edit this file manually as any edits will be overwritten by the generation of # the file from the modular. If any change should be done, please apply the change to the # modular_lasr.py file directly. One of our CI enforces this. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # Copyright 2025 The HuggingFace Inc. team and Google LLC. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from huggingface_hub.dataclasses import strict from ...configuration_utils import PreTrainedConfig from ...utils import auto_docstring @auto_docstring(checkpoint="google/medasr") @strict class LasrEncoderConfig(PreTrainedConfig): r""" convolution_bias (`bool`, *optional*, defaults to `False`): Whether to use bias in convolutions of the conformer's convolution module. conv_kernel_size (`int`, *optional*, defaults to 32): The kernel size of the convolution layers in the Conformer block. subsampling_conv_channels (`int`, *optional*, defaults to 256): The number of channels in the subsampling convolution layers. subsampling_conv_kernel_size (`int`, *optional*, defaults to 5): The kernel size of the subsampling convolution layers. subsampling_conv_stride (`int`, *optional*, defaults to 2): The stride of the subsampling convolution layers. dropout_positions (`float`, *optional*, defaults to 0.0): The dropout ratio for the positions in the input sequence. feed_forward_residual_weights (`tuple[float, float]`, *optional*, defaults to `[1.5, 0.5]`): The residual weights for the feed forward layers. conv_residual_weights (`tuple[float, float]`, *optional*, defaults to `[2.0, 1.0]`): The residual weights for the convolution layers. batch_norm_momentum (`float`, *optional*, defaults to 0.01): The momentum for the batch normalization layers Example: ```python >>> from transformers import LasrEncoderModel, LasrEncoderConfig >>> # Initializing a `LasrEncoder` configuration >>> configuration = LasrEncoderConfig() >>> # Initializing a model from the configuration >>> model = LasrEncoderModel(configuration) >>> # Accessing the model configuration >>> configuration = model.config ``` This configuration class is based on the LasrEncoder architecture from Google Health AI. You can find more details and pre-trained models at [TODO/TODO](https://huggingface.co/TODO/TODO). """ model_type = "lasr_encoder" keys_to_ignore_at_inference = ["past_key_values"] hidden_size: int = 512 num_hidden_layers: int = 17 num_attention_heads: int = 8 intermediate_size: int = 2048 hidden_act: str = "silu" attention_bias: bool = False convolution_bias: bool = False conv_kernel_size: int = 32 subsampling_conv_channels: int = 256 num_mel_bins: int = 128 subsampling_conv_kernel_size: int = 5 subsampling_conv_stride: int = 2 dropout: float | int = 0.1 dropout_positions: float | int = 0.0 layerdrop: float | int = 0.1 activation_dropout: float | int = 0.1 attention_dropout: float | int = 0.1 max_position_embeddings: int = 10000 initializer_range: float = 0.02 layer_norm_eps: float = 1e-6 feed_forward_residual_weights: list[float] | tuple[float, ...] = (1.5, 0.5) conv_residual_weights: list[float] | tuple[float, ...] = (2.0, 1.0) batch_norm_momentum: float = 0.01 rope_parameters: dict | None = None def __post_init__(self, **kwargs): self.num_key_value_heads = self.num_attention_heads super().__post_init__(**kwargs) @auto_docstring(checkpoint="google/medasr") @strict class LasrCTCConfig(PreTrainedConfig): r""" ctc_loss_reduction (`str`, *optional*, defaults to `"mean"`): Specifies the reduction to apply to the output of `torch.nn.CTCLoss`. Only relevant when training an instance of [`LasrForCTC`]. ctc_zero_infinity (`bool`, *optional*, defaults to `True`): Whether to zero infinite losses and the associated gradients of `torch.nn.CTCLoss`. Infinite losses mainly occur when the inputs are too short to be aligned to the targets. Only relevant when training an instance of [`LasrForCTC`]. Example: ```python >>> from transformers import LasrForCTC, LasrCTCConfig >>> # Initializing a Lasr configuration >>> configuration = LasrCTCConfig() >>> # Initializing a model from the configuration >>> model = LasrForCTC(configuration) >>> # Accessing the model configuration >>> configuration = model.config ``` This configuration class is based on the Lasr CTC architecture from Google Health AI. You can find more details and pre-trained models at [TODO/TODO](https://huggingface.co/TODO/TODO). """ model_type = "lasr_ctc" sub_configs = {"encoder_config": LasrEncoderConfig} vocab_size: int = 512 ctc_loss_reduction: str = "mean" ctc_zero_infinity: bool = True encoder_config: dict | PreTrainedConfig | None = None pad_token_id: int = 0 def __post_init__(self, **kwargs): if isinstance(self.encoder_config, dict): self.encoder_config = LasrEncoderConfig(**self.encoder_config) elif self.encoder_config is None: self.encoder_config = LasrEncoderConfig() self.initializer_range = self.encoder_config.initializer_range super().__post_init__(**kwargs) @property def inputs_to_logits_ratio(self): return self.encoder_config.subsampling_conv_stride**2 __all__ = ["LasrEncoderConfig", "LasrCTCConfig"]