configuration_cpmant.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. # Copyright 2022 The OpenBMB Team and The HuggingFace Inc. team. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """CPMAnt model configuration"""
  15. from huggingface_hub.dataclasses import strict
  16. from ...configuration_utils import PreTrainedConfig
  17. from ...utils import auto_docstring
  18. @auto_docstring(checkpoint="openbmb/cpm-ant-10b")
  19. @strict
  20. class CpmAntConfig(PreTrainedConfig):
  21. r"""
  22. position_bias_num_buckets (`int`, *optional*, defaults to 512):
  23. The number of position_bias buckets.
  24. position_bias_max_distance (`int`, *optional*, defaults to 2048):
  25. The maximum sequence length that this model might ever be used with. Typically set this to something large
  26. just in case (e.g., 512 or 1024 or 2048).
  27. prompt_types (`int`, *optional*, defaults to 32):
  28. The type of prompt.
  29. prompt_length (`int`, *optional*, defaults to 32):
  30. The length of prompt.
  31. segment_types (`int`, *optional*, defaults to 32):
  32. The type of segment.
  33. Example:
  34. ```python
  35. >>> from transformers import CpmAntModel, CpmAntConfig
  36. >>> # Initializing a CPMAnt cpm-ant-10b style configuration
  37. >>> configuration = CpmAntConfig()
  38. >>> # Initializing a model from the cpm-ant-10b style configuration
  39. >>> model = CpmAntModel(configuration)
  40. >>> # Accessing the model configuration
  41. >>> configuration = model.config
  42. ```"""
  43. model_type = "cpmant"
  44. vocab_size: int = 30720
  45. hidden_size: int = 4096
  46. num_attention_heads: int = 32
  47. dim_head: int = 128
  48. dim_ff: int = 10240
  49. num_hidden_layers: int = 48
  50. dropout_p: float | int = 0.0
  51. position_bias_num_buckets: int = 512
  52. position_bias_max_distance: int = 2048
  53. eps: float = 1e-6
  54. init_std: float = 1.0
  55. prompt_types: int = 32
  56. prompt_length: int = 32
  57. segment_types: int = 32
  58. use_cache: bool = True
  59. tie_word_embeddings: bool = True
  60. __all__ = ["CpmAntConfig"]