__init__.pyi 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. # Generated content DO NOT EDIT
  2. class Trainer:
  3. """
  4. Base class for all trainers
  5. This class is not supposed to be instantiated directly. Instead, any implementation of a
  6. Trainer will return an instance of this class when instantiated.
  7. """
  8. def __getstate__(self):
  9. """ """
  10. pass
  11. def __setstate__(self, state):
  12. """ """
  13. pass
  14. class BpeTrainer(Trainer):
  15. """
  16. Trainer capable of training a BPE model
  17. Args:
  18. vocab_size (:obj:`int`, `optional`):
  19. The size of the final vocabulary, including all tokens and alphabet.
  20. min_frequency (:obj:`int`, `optional`):
  21. The minimum frequency a pair should have in order to be merged.
  22. show_progress (:obj:`bool`, `optional`):
  23. Whether to show progress bars while training.
  24. special_tokens (:obj:`List[Union[str, AddedToken]]`, `optional`):
  25. A list of special tokens the model should know of.
  26. limit_alphabet (:obj:`int`, `optional`):
  27. The maximum different characters to keep in the alphabet.
  28. initial_alphabet (:obj:`List[str]`, `optional`):
  29. A list of characters to include in the initial alphabet, even
  30. if not seen in the training dataset.
  31. If the strings contain more than one character, only the first one
  32. is kept.
  33. continuing_subword_prefix (:obj:`str`, `optional`):
  34. A prefix to be used for every subword that is not a beginning-of-word.
  35. end_of_word_suffix (:obj:`str`, `optional`):
  36. A suffix to be used for every subword that is a end-of-word.
  37. max_token_length (:obj:`int`, `optional`):
  38. Prevents creating tokens longer than the specified size.
  39. This can help with reducing polluting your vocabulary with
  40. highly repetitive tokens like `======` for wikipedia
  41. """
  42. def __init__(
  43. self,
  44. vocab_size=30000,
  45. min_frequency=0,
  46. show_progress=True,
  47. special_tokens=[],
  48. limit_alphabet=None,
  49. initial_alphabet=[],
  50. continuing_subword_prefix=None,
  51. end_of_word_suffix=None,
  52. max_token_length=None,
  53. words={},
  54. ):
  55. pass
  56. def __getstate__(self):
  57. """ """
  58. pass
  59. def __setstate__(self, state):
  60. """ """
  61. pass
  62. @property
  63. def continuing_subword_prefix(self):
  64. """ """
  65. pass
  66. @continuing_subword_prefix.setter
  67. def continuing_subword_prefix(self, value):
  68. """ """
  69. pass
  70. @property
  71. def end_of_word_suffix(self):
  72. """ """
  73. pass
  74. @end_of_word_suffix.setter
  75. def end_of_word_suffix(self, value):
  76. """ """
  77. pass
  78. @property
  79. def initial_alphabet(self):
  80. """ """
  81. pass
  82. @initial_alphabet.setter
  83. def initial_alphabet(self, value):
  84. """ """
  85. pass
  86. @property
  87. def limit_alphabet(self):
  88. """ """
  89. pass
  90. @limit_alphabet.setter
  91. def limit_alphabet(self, value):
  92. """ """
  93. pass
  94. @property
  95. def max_token_length(self):
  96. """ """
  97. pass
  98. @max_token_length.setter
  99. def max_token_length(self, value):
  100. """ """
  101. pass
  102. @property
  103. def min_frequency(self):
  104. """ """
  105. pass
  106. @min_frequency.setter
  107. def min_frequency(self, value):
  108. """ """
  109. pass
  110. @property
  111. def show_progress(self):
  112. """ """
  113. pass
  114. @show_progress.setter
  115. def show_progress(self, value):
  116. """ """
  117. pass
  118. @property
  119. def special_tokens(self):
  120. """ """
  121. pass
  122. @special_tokens.setter
  123. def special_tokens(self, value):
  124. """ """
  125. pass
  126. @property
  127. def vocab_size(self):
  128. """ """
  129. pass
  130. @vocab_size.setter
  131. def vocab_size(self, value):
  132. """ """
  133. pass
  134. class UnigramTrainer(Trainer):
  135. """
  136. Trainer capable of training a Unigram model
  137. Args:
  138. vocab_size (:obj:`int`):
  139. The size of the final vocabulary, including all tokens and alphabet.
  140. show_progress (:obj:`bool`):
  141. Whether to show progress bars while training.
  142. special_tokens (:obj:`List[Union[str, AddedToken]]`):
  143. A list of special tokens the model should know of.
  144. initial_alphabet (:obj:`List[str]`):
  145. A list of characters to include in the initial alphabet, even
  146. if not seen in the training dataset.
  147. If the strings contain more than one character, only the first one
  148. is kept.
  149. shrinking_factor (:obj:`float`):
  150. The shrinking factor used at each step of the training to prune the
  151. vocabulary.
  152. unk_token (:obj:`str`):
  153. The token used for out-of-vocabulary tokens.
  154. max_piece_length (:obj:`int`):
  155. The maximum length of a given token.
  156. n_sub_iterations (:obj:`int`):
  157. The number of iterations of the EM algorithm to perform before
  158. pruning the vocabulary.
  159. """
  160. def __init__(
  161. self,
  162. vocab_size=8000,
  163. show_progress=True,
  164. special_tokens=[],
  165. initial_alphabet=[],
  166. shrinking_factor=0.75,
  167. unk_token=None,
  168. max_piece_length=16,
  169. n_sub_iterations=2,
  170. ):
  171. pass
  172. def __getstate__(self):
  173. """ """
  174. pass
  175. def __setstate__(self, state):
  176. """ """
  177. pass
  178. @property
  179. def initial_alphabet(self):
  180. """ """
  181. pass
  182. @initial_alphabet.setter
  183. def initial_alphabet(self, value):
  184. """ """
  185. pass
  186. @property
  187. def show_progress(self):
  188. """ """
  189. pass
  190. @show_progress.setter
  191. def show_progress(self, value):
  192. """ """
  193. pass
  194. @property
  195. def special_tokens(self):
  196. """ """
  197. pass
  198. @special_tokens.setter
  199. def special_tokens(self, value):
  200. """ """
  201. pass
  202. @property
  203. def vocab_size(self):
  204. """ """
  205. pass
  206. @vocab_size.setter
  207. def vocab_size(self, value):
  208. """ """
  209. pass
  210. class WordLevelTrainer(Trainer):
  211. """
  212. Trainer capable of training a WorldLevel model
  213. Args:
  214. vocab_size (:obj:`int`, `optional`):
  215. The size of the final vocabulary, including all tokens and alphabet.
  216. min_frequency (:obj:`int`, `optional`):
  217. The minimum frequency a pair should have in order to be merged.
  218. show_progress (:obj:`bool`, `optional`):
  219. Whether to show progress bars while training.
  220. special_tokens (:obj:`List[Union[str, AddedToken]]`):
  221. A list of special tokens the model should know of.
  222. """
  223. def __init__(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[]):
  224. pass
  225. def __getstate__(self):
  226. """ """
  227. pass
  228. def __setstate__(self, state):
  229. """ """
  230. pass
  231. @property
  232. def min_frequency(self):
  233. """ """
  234. pass
  235. @min_frequency.setter
  236. def min_frequency(self, value):
  237. """ """
  238. pass
  239. @property
  240. def show_progress(self):
  241. """ """
  242. pass
  243. @show_progress.setter
  244. def show_progress(self, value):
  245. """ """
  246. pass
  247. @property
  248. def special_tokens(self):
  249. """ """
  250. pass
  251. @special_tokens.setter
  252. def special_tokens(self, value):
  253. """ """
  254. pass
  255. @property
  256. def vocab_size(self):
  257. """ """
  258. pass
  259. @vocab_size.setter
  260. def vocab_size(self, value):
  261. """ """
  262. pass
  263. class WordPieceTrainer(Trainer):
  264. """
  265. Trainer capable of training a WordPiece model
  266. Args:
  267. vocab_size (:obj:`int`, `optional`):
  268. The size of the final vocabulary, including all tokens and alphabet.
  269. min_frequency (:obj:`int`, `optional`):
  270. The minimum frequency a pair should have in order to be merged.
  271. show_progress (:obj:`bool`, `optional`):
  272. Whether to show progress bars while training.
  273. special_tokens (:obj:`List[Union[str, AddedToken]]`, `optional`):
  274. A list of special tokens the model should know of.
  275. limit_alphabet (:obj:`int`, `optional`):
  276. The maximum different characters to keep in the alphabet.
  277. initial_alphabet (:obj:`List[str]`, `optional`):
  278. A list of characters to include in the initial alphabet, even
  279. if not seen in the training dataset.
  280. If the strings contain more than one character, only the first one
  281. is kept.
  282. continuing_subword_prefix (:obj:`str`, `optional`):
  283. A prefix to be used for every subword that is not a beginning-of-word.
  284. end_of_word_suffix (:obj:`str`, `optional`):
  285. A suffix to be used for every subword that is a end-of-word.
  286. """
  287. def __init__(
  288. self,
  289. vocab_size=30000,
  290. min_frequency=0,
  291. show_progress=True,
  292. special_tokens=[],
  293. limit_alphabet=None,
  294. initial_alphabet=[],
  295. continuing_subword_prefix="##",
  296. end_of_word_suffix=None,
  297. ):
  298. pass
  299. def __getstate__(self):
  300. """ """
  301. pass
  302. def __setstate__(self, state):
  303. """ """
  304. pass
  305. @property
  306. def continuing_subword_prefix(self):
  307. """ """
  308. pass
  309. @continuing_subword_prefix.setter
  310. def continuing_subword_prefix(self, value):
  311. """ """
  312. pass
  313. @property
  314. def end_of_word_suffix(self):
  315. """ """
  316. pass
  317. @end_of_word_suffix.setter
  318. def end_of_word_suffix(self, value):
  319. """ """
  320. pass
  321. @property
  322. def initial_alphabet(self):
  323. """ """
  324. pass
  325. @initial_alphabet.setter
  326. def initial_alphabet(self, value):
  327. """ """
  328. pass
  329. @property
  330. def limit_alphabet(self):
  331. """ """
  332. pass
  333. @limit_alphabet.setter
  334. def limit_alphabet(self, value):
  335. """ """
  336. pass
  337. @property
  338. def min_frequency(self):
  339. """ """
  340. pass
  341. @min_frequency.setter
  342. def min_frequency(self, value):
  343. """ """
  344. pass
  345. @property
  346. def show_progress(self):
  347. """ """
  348. pass
  349. @show_progress.setter
  350. def show_progress(self, value):
  351. """ """
  352. pass
  353. @property
  354. def special_tokens(self):
  355. """ """
  356. pass
  357. @special_tokens.setter
  358. def special_tokens(self, value):
  359. """ """
  360. pass
  361. @property
  362. def vocab_size(self):
  363. """ """
  364. pass
  365. @vocab_size.setter
  366. def vocab_size(self, value):
  367. """ """
  368. pass