configuration_xmod.py 3.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. # Copyright 2023 The Meta AI Team Authors and The HuggingFace Inc. team.
  2. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. """X-MOD configuration"""
  16. from huggingface_hub.dataclasses import strict
  17. from ...configuration_utils import PreTrainedConfig
  18. from ...utils import auto_docstring
  19. @auto_docstring(checkpoint="facebook/xmod-base")
  20. @strict
  21. class XmodConfig(PreTrainedConfig):
  22. r"""
  23. pre_norm (`bool`, *optional*, defaults to `False`):
  24. Whether to apply layer normalization before each block.
  25. adapter_reduction_factor (`int` or `float`, *optional*, defaults to 2):
  26. The factor by which the dimensionality of the adapter is reduced relative to `hidden_size`.
  27. adapter_layer_norm (`bool`, *optional*, defaults to `False`):
  28. Whether to apply a new layer normalization before the adapter modules (shared across all adapters).
  29. adapter_reuse_layer_norm (`bool`, *optional*, defaults to `True`):
  30. Whether to reuse the second layer normalization and apply it before the adapter modules as well.
  31. ln_before_adapter (`bool`, *optional*, defaults to `True`):
  32. Whether to apply the layer normalization before the residual connection around the adapter module.
  33. languages (`Iterable[str]`, *optional*, defaults to `["en_XX"]`):
  34. An iterable of language codes for which adapter modules should be initialized.
  35. default_language (`str`, *optional*):
  36. Language code of a default language. It will be assumed that the input is in this language if no language
  37. codes are explicitly passed to the forward method.
  38. Examples:
  39. ```python
  40. >>> from transformers import XmodConfig, XmodModel
  41. >>> # Initializing an X-MOD facebook/xmod-base style configuration
  42. >>> configuration = XmodConfig()
  43. >>> # Initializing a model (with random weights) from the facebook/xmod-base style configuration
  44. >>> model = XmodModel(configuration)
  45. >>> # Accessing the model configuration
  46. >>> configuration = model.config
  47. ```"""
  48. model_type = "xmod"
  49. vocab_size: int = 30522
  50. hidden_size: int = 768
  51. num_hidden_layers: int = 12
  52. num_attention_heads: int = 12
  53. intermediate_size: int = 3072
  54. hidden_act: str = "gelu"
  55. hidden_dropout_prob: float | int = 0.1
  56. attention_probs_dropout_prob: float | int = 0.1
  57. max_position_embeddings: int = 512
  58. type_vocab_size: int = 2
  59. initializer_range: float = 0.02
  60. layer_norm_eps: float = 1e-12
  61. pad_token_id: int | None = 1
  62. bos_token_id: int | None = 0
  63. eos_token_id: int | list[int] | None = 2
  64. use_cache: bool = True
  65. classifier_dropout: float | int | None = None
  66. pre_norm: bool = False
  67. adapter_reduction_factor: int = 2
  68. adapter_layer_norm: bool = False
  69. adapter_reuse_layer_norm: bool = True
  70. ln_before_adapter: bool = True
  71. languages: list[str] | tuple[str, ...] = ("en_XX",)
  72. default_language: str | None = None
  73. is_decoder: bool = False
  74. add_cross_attention: bool = False
  75. tie_word_embeddings: bool = True
  76. __all__ = ["XmodConfig"]