__init__.pyi 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. # Generated content DO NOT EDIT
  2. class DecodeStream:
  3. """
  4. Class needed for streaming decode
  5. """
  6. def __init__(self, ids=None, skip_special_tokens=False):
  7. pass
  8. def __getstate__(self, /):
  9. """
  10. Helper for pickle.
  11. """
  12. pass
  13. def step(self, tokenizer, id):
  14. """
  15. Streaming decode step
  16. Args:
  17. tokenizer (:class:`~tokenizers.Tokenizer`):
  18. The tokenizer to use for decoding
  19. id (:obj:`int` or `List[int]`):
  20. The next token id or list of token ids to add to the stream
  21. Returns:
  22. :obj:`Optional[str]`: The next decoded string chunk, or None if not enough
  23. tokens have been provided yet.
  24. """
  25. pass
  26. class Decoder:
  27. """
  28. Base class for all decoders
  29. This class is not supposed to be instantiated directly. Instead, any implementation of
  30. a Decoder will return an instance of this class when instantiated.
  31. """
  32. def __getstate__(self):
  33. """ """
  34. pass
  35. def __setstate__(self, state):
  36. """ """
  37. pass
  38. @staticmethod
  39. def custom(decoder):
  40. """ """
  41. pass
  42. def decode(self, tokens):
  43. """
  44. Decode the given list of tokens to a final string
  45. Args:
  46. tokens (:obj:`List[str]`):
  47. The list of tokens to decode
  48. Returns:
  49. :obj:`str`: The decoded string
  50. """
  51. pass
  52. class BPEDecoder(Decoder):
  53. """
  54. BPEDecoder Decoder
  55. Args:
  56. suffix (:obj:`str`, `optional`, defaults to :obj:`</w>`):
  57. The suffix that was used to characterize an end-of-word. This suffix will
  58. be replaced by whitespaces during the decoding
  59. """
  60. def __init__(self, suffix="</w>"):
  61. pass
  62. def __getstate__(self):
  63. """ """
  64. pass
  65. def __setstate__(self, state):
  66. """ """
  67. pass
  68. @staticmethod
  69. def custom(decoder):
  70. """ """
  71. pass
  72. def decode(self, tokens):
  73. """
  74. Decode the given list of tokens to a final string
  75. Args:
  76. tokens (:obj:`List[str]`):
  77. The list of tokens to decode
  78. Returns:
  79. :obj:`str`: The decoded string
  80. """
  81. pass
  82. @property
  83. def suffix(self):
  84. """ """
  85. pass
  86. @suffix.setter
  87. def suffix(self, value):
  88. """ """
  89. pass
  90. class ByteFallback(Decoder):
  91. """
  92. ByteFallback Decoder
  93. ByteFallback is a simple trick which converts tokens looking like `<0x61>`
  94. to pure bytes, and attempts to make them into a string. If the tokens
  95. cannot be decoded you will get � instead for each inconvertible byte token
  96. """
  97. def __init__(self):
  98. pass
  99. def __getstate__(self):
  100. """ """
  101. pass
  102. def __setstate__(self, state):
  103. """ """
  104. pass
  105. @staticmethod
  106. def custom(decoder):
  107. """ """
  108. pass
  109. def decode(self, tokens):
  110. """
  111. Decode the given list of tokens to a final string
  112. Args:
  113. tokens (:obj:`List[str]`):
  114. The list of tokens to decode
  115. Returns:
  116. :obj:`str`: The decoded string
  117. """
  118. pass
  119. class ByteLevel(Decoder):
  120. """
  121. ByteLevel Decoder
  122. This decoder is to be used in tandem with the :class:`~tokenizers.pre_tokenizers.ByteLevel`
  123. :class:`~tokenizers.pre_tokenizers.PreTokenizer`.
  124. """
  125. def __init__(self):
  126. pass
  127. def __getstate__(self):
  128. """ """
  129. pass
  130. def __setstate__(self, state):
  131. """ """
  132. pass
  133. @staticmethod
  134. def custom(decoder):
  135. """ """
  136. pass
  137. def decode(self, tokens):
  138. """
  139. Decode the given list of tokens to a final string
  140. Args:
  141. tokens (:obj:`List[str]`):
  142. The list of tokens to decode
  143. Returns:
  144. :obj:`str`: The decoded string
  145. """
  146. pass
  147. class CTC(Decoder):
  148. """
  149. CTC Decoder
  150. Args:
  151. pad_token (:obj:`str`, `optional`, defaults to :obj:`<pad>`):
  152. The pad token used by CTC to delimit a new token.
  153. word_delimiter_token (:obj:`str`, `optional`, defaults to :obj:`|`):
  154. The word delimiter token. It will be replaced by a <space>
  155. cleanup (:obj:`bool`, `optional`, defaults to :obj:`True`):
  156. Whether to cleanup some tokenization artifacts.
  157. Mainly spaces before punctuation, and some abbreviated english forms.
  158. """
  159. def __init__(self, pad_token="<pad>", word_delimiter_token="|", cleanup=True):
  160. pass
  161. def __getstate__(self):
  162. """ """
  163. pass
  164. def __setstate__(self, state):
  165. """ """
  166. pass
  167. @property
  168. def cleanup(self):
  169. """ """
  170. pass
  171. @cleanup.setter
  172. def cleanup(self, value):
  173. """ """
  174. pass
  175. @staticmethod
  176. def custom(decoder):
  177. """ """
  178. pass
  179. def decode(self, tokens):
  180. """
  181. Decode the given list of tokens to a final string
  182. Args:
  183. tokens (:obj:`List[str]`):
  184. The list of tokens to decode
  185. Returns:
  186. :obj:`str`: The decoded string
  187. """
  188. pass
  189. @property
  190. def pad_token(self):
  191. """ """
  192. pass
  193. @pad_token.setter
  194. def pad_token(self, value):
  195. """ """
  196. pass
  197. @property
  198. def word_delimiter_token(self):
  199. """ """
  200. pass
  201. @word_delimiter_token.setter
  202. def word_delimiter_token(self, value):
  203. """ """
  204. pass
  205. class Fuse(Decoder):
  206. """
  207. Fuse Decoder
  208. Fuse simply fuses every token into a single string.
  209. This is the last step of decoding, this decoder exists only if
  210. there is need to add other decoders *after* the fusion
  211. """
  212. def __init__(self):
  213. pass
  214. def __getstate__(self):
  215. """ """
  216. pass
  217. def __setstate__(self, state):
  218. """ """
  219. pass
  220. @staticmethod
  221. def custom(decoder):
  222. """ """
  223. pass
  224. def decode(self, tokens):
  225. """
  226. Decode the given list of tokens to a final string
  227. Args:
  228. tokens (:obj:`List[str]`):
  229. The list of tokens to decode
  230. Returns:
  231. :obj:`str`: The decoded string
  232. """
  233. pass
  234. class Metaspace(Decoder):
  235. """
  236. Metaspace Decoder
  237. Args:
  238. replacement (:obj:`str`, `optional`, defaults to :obj:`▁`):
  239. The replacement character. Must be exactly one character. By default we
  240. use the `▁` (U+2581) meta symbol (Same as in SentencePiece).
  241. prepend_scheme (:obj:`str`, `optional`, defaults to :obj:`"always"`):
  242. Whether to add a space to the first word if there isn't already one. This
  243. lets us treat `hello` exactly like `say hello`.
  244. Choices: "always", "never", "first". First means the space is only added on the first
  245. token (relevant when special tokens are used or other pre_tokenizer are used).
  246. """
  247. def __init__(self, replacement="▁", prepend_scheme="always", split=True):
  248. pass
  249. def __getstate__(self):
  250. """ """
  251. pass
  252. def __setstate__(self, state):
  253. """ """
  254. pass
  255. @staticmethod
  256. def custom(decoder):
  257. """ """
  258. pass
  259. def decode(self, tokens):
  260. """
  261. Decode the given list of tokens to a final string
  262. Args:
  263. tokens (:obj:`List[str]`):
  264. The list of tokens to decode
  265. Returns:
  266. :obj:`str`: The decoded string
  267. """
  268. pass
  269. @property
  270. def prepend_scheme(self):
  271. """ """
  272. pass
  273. @prepend_scheme.setter
  274. def prepend_scheme(self, value):
  275. """ """
  276. pass
  277. @property
  278. def replacement(self):
  279. """ """
  280. pass
  281. @replacement.setter
  282. def replacement(self, value):
  283. """ """
  284. pass
  285. @property
  286. def split(self):
  287. """ """
  288. pass
  289. @split.setter
  290. def split(self, value):
  291. """ """
  292. pass
  293. class Replace(Decoder):
  294. """
  295. Replace Decoder
  296. This decoder is to be used in tandem with the :class:`~tokenizers.pre_tokenizers.Replace`
  297. :class:`~tokenizers.pre_tokenizers.PreTokenizer`.
  298. """
  299. def __init__(self, pattern, content):
  300. pass
  301. def __getstate__(self):
  302. """ """
  303. pass
  304. def __setstate__(self, state):
  305. """ """
  306. pass
  307. @staticmethod
  308. def custom(decoder):
  309. """ """
  310. pass
  311. def decode(self, tokens):
  312. """
  313. Decode the given list of tokens to a final string
  314. Args:
  315. tokens (:obj:`List[str]`):
  316. The list of tokens to decode
  317. Returns:
  318. :obj:`str`: The decoded string
  319. """
  320. pass
  321. class Sequence(Decoder):
  322. """
  323. Sequence Decoder
  324. Args:
  325. decoders (:obj:`List[Decoder]`)
  326. The decoders that need to be chained
  327. """
  328. def __init__(self, decoders):
  329. pass
  330. def __getnewargs__(self):
  331. """ """
  332. pass
  333. def __getstate__(self):
  334. """ """
  335. pass
  336. def __setstate__(self, state):
  337. """ """
  338. pass
  339. @staticmethod
  340. def custom(decoder):
  341. """ """
  342. pass
  343. def decode(self, tokens):
  344. """
  345. Decode the given list of tokens to a final string
  346. Args:
  347. tokens (:obj:`List[str]`):
  348. The list of tokens to decode
  349. Returns:
  350. :obj:`str`: The decoded string
  351. """
  352. pass
  353. class Strip(Decoder):
  354. """
  355. Strip normalizer
  356. Strips n left characters of each token, or n right characters of each token
  357. """
  358. def __init__(self, content=" ", left=0, right=0):
  359. pass
  360. def __getstate__(self):
  361. """ """
  362. pass
  363. def __setstate__(self, state):
  364. """ """
  365. pass
  366. @property
  367. def content(self):
  368. """ """
  369. pass
  370. @content.setter
  371. def content(self, value):
  372. """ """
  373. pass
  374. @staticmethod
  375. def custom(decoder):
  376. """ """
  377. pass
  378. def decode(self, tokens):
  379. """
  380. Decode the given list of tokens to a final string
  381. Args:
  382. tokens (:obj:`List[str]`):
  383. The list of tokens to decode
  384. Returns:
  385. :obj:`str`: The decoded string
  386. """
  387. pass
  388. @property
  389. def start(self):
  390. """ """
  391. pass
  392. @start.setter
  393. def start(self, value):
  394. """ """
  395. pass
  396. @property
  397. def stop(self):
  398. """ """
  399. pass
  400. @stop.setter
  401. def stop(self, value):
  402. """ """
  403. pass
  404. class WordPiece(Decoder):
  405. """
  406. WordPiece Decoder
  407. Args:
  408. prefix (:obj:`str`, `optional`, defaults to :obj:`##`):
  409. The prefix to use for subwords that are not a beginning-of-word
  410. cleanup (:obj:`bool`, `optional`, defaults to :obj:`True`):
  411. Whether to cleanup some tokenization artifacts. Mainly spaces before punctuation,
  412. and some abbreviated english forms.
  413. """
  414. def __init__(self, prefix="##", cleanup=True):
  415. pass
  416. def __getstate__(self):
  417. """ """
  418. pass
  419. def __setstate__(self, state):
  420. """ """
  421. pass
  422. @property
  423. def cleanup(self):
  424. """ """
  425. pass
  426. @cleanup.setter
  427. def cleanup(self, value):
  428. """ """
  429. pass
  430. @staticmethod
  431. def custom(decoder):
  432. """ """
  433. pass
  434. def decode(self, tokens):
  435. """
  436. Decode the given list of tokens to a final string
  437. Args:
  438. tokens (:obj:`List[str]`):
  439. The list of tokens to decode
  440. Returns:
  441. :obj:`str`: The decoded string
  442. """
  443. pass
  444. @property
  445. def prefix(self):
  446. """ """
  447. pass
  448. @prefix.setter
  449. def prefix(self, value):
  450. """ """
  451. pass