configuration_evolla.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. # Copyright 2025 Westlake Representational Learning Lab (Fajie Yuan Lab) team and the HuggingFace Inc. team. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """Evolla model configuration"""
  15. from huggingface_hub.dataclasses import strict
  16. from ...configuration_utils import PreTrainedConfig
  17. from ...modeling_rope_utils import RopeParameters
  18. from ...utils import auto_docstring, logging
  19. logger = logging.get_logger(__name__)
  20. @auto_docstring(checkpoint="westlake-repl/Evolla-10B-hf")
  21. @strict
  22. class SaProtConfig(PreTrainedConfig):
  23. r"""
  24. mask_token_id (`int`, *optional*, defaults to 4):
  25. The id of the *mask* token in the protein sequence model.
  26. position_embedding_type (`str`, *optional*, defaults to `"rotary"`):
  27. The type of position embedding to use in the protein sequence model. Currently only `"rotary"` is supported.
  28. emb_layer_norm_before (`bool`, *optional*, defaults to `False`):
  29. Whether to apply layer normalization before the position embedding in the protein sequence model.
  30. token_dropout (`bool`, *optional*, defaults to `True`):
  31. Whether to apply dropout to the tokens in the protein sequence model.
  32. """
  33. vocab_size: int = 446
  34. mask_token_id: int = 4
  35. pad_token_id: int = 1
  36. hidden_size: int = 1280
  37. num_hidden_layers: int = 33
  38. num_attention_heads: int = 20
  39. intermediate_size: int = 5120
  40. hidden_dropout_prob: float | int = 0.1
  41. attention_probs_dropout_prob: float | int = 0.1
  42. max_position_embeddings: int = 1026
  43. initializer_range: float = 0.02
  44. layer_norm_eps: float = 1e-05
  45. position_embedding_type: str = "rotary"
  46. emb_layer_norm_before: bool = False
  47. token_dropout: bool = True
  48. is_decoder: bool = False
  49. add_cross_attention: bool = False
  50. @auto_docstring(checkpoint="westlake-repl/Evolla-10B-hf")
  51. @strict
  52. class EvollaConfig(PreTrainedConfig):
  53. r"""
  54. protein_encoder_config (`dict`, *optional*):
  55. Dictionary of configuration options used to initialize [`SaProtConfig`].
  56. aligner_ffn_mult (`int`, *optional*, defaults to 4):
  57. The FFN multiplier for the aligner layer.
  58. aligner_enable_bias (`bool`, *optional*, defaults to `True`):
  59. Whether to use bias in the aligner layer.
  60. aligner_attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
  61. The dropout ratio for the attention probabilities in the aligner layer.
  62. aligner_num_add_layers (`int`, *optional*, defaults to 8):
  63. The number of additional layers for the aligner layer.
  64. resampler_depth (`int`, *optional*, defaults to 6):
  65. The depth of the resampler layer in the llama model.
  66. resampler_dim_head (`int`, *optional*, defaults to 64):
  67. The dimension of the heads in the resampler layer in the llama model.
  68. resampler_heads (`int`, *optional*, defaults to 8):
  69. The number of heads in the resampler layer in the llama model.
  70. resampler_num_latents (`int`, *optional*, defaults to 64):
  71. The number of latents in the resampler layer in the llama model.
  72. resampler_ff_mult (`int`, *optional*, defaults to 4):
  73. The FFN multiplier for the resampler layer.
  74. Example:
  75. ```python
  76. >>> from transformers import EvollaModel, EvollaConfig
  77. >>> # Initializing a Evolla evolla-10b style configuration
  78. >>> configuration = EvollaConfig()
  79. >>> # Initializing a model from the evolla-10b style configuration
  80. >>> model = EvollaModel(configuration)
  81. >>> # Accessing the model configuration
  82. >>> configuration = model.config
  83. ```"""
  84. model_type = "EvollaModel"
  85. sub_configs = {"protein_encoder_config": SaProtConfig}
  86. default_theta = 500000.0
  87. protein_encoder_config: dict | PreTrainedConfig | None = None
  88. vocab_size: int = 128256 # llama vocab size
  89. hidden_size: int = 4096 # llama hidden size
  90. intermediate_size: int = 14336 # llama intermediate size
  91. num_hidden_layers: int = 32 # llama num layers
  92. num_attention_heads: int = 32 # llama num heads
  93. num_key_value_heads: int | None = 8 # llama num key-value heads
  94. hidden_act: str = "silu" # llama activation function
  95. max_position_embeddings: int = 8192 # llama rope max length
  96. rms_norm_eps: float = 1e-05
  97. rope_parameters: RopeParameters | dict | None = None
  98. attention_bias: bool = False
  99. attention_dropout: float | int | None = 0.0
  100. mlp_bias: bool = False
  101. aligner_ffn_mult: int | None = 4
  102. aligner_enable_bias: bool | None = True
  103. aligner_attention_probs_dropout_prob: float | None = 0.1
  104. aligner_num_add_layers: int | None = 8
  105. resampler_depth: int | None = 6
  106. resampler_dim_head: int | None = 64
  107. resampler_heads: int | None = 8
  108. resampler_num_latents: int | None = 64
  109. resampler_ff_mult: int | None = 4
  110. initializer_range: float = 0.02
  111. pad_token_id: int | None = None
  112. bos_token_id: int | None = 128000
  113. eos_token_id: int | list[int] | None = 128009
  114. use_cache: bool = False
  115. tie_word_embeddings: bool = False
  116. is_decoder: bool | None = False
  117. add_cross_attention: bool | None = False
  118. def __post_init__(self, **kwargs):
  119. if self.protein_encoder_config is None:
  120. self.protein_encoder_config = SaProtConfig()
  121. logger.info("`protein_encoder_config` is `None`. Initializing the `SaProtConfig` with default values.")
  122. elif isinstance(self.protein_encoder_config, dict):
  123. self.protein_encoder_config = SaProtConfig(**self.protein_encoder_config)
  124. super().__post_init__(**kwargs)
  125. __all__ = ["EvollaConfig"]