tokenization_bart.py 827 B

1234567891011121314151617181920212223
  1. # Copyright 2024 The HuggingFace Team. All rights reserved.
  2. #
  3. # This source code is licensed under the Apache 2.0 license found in the
  4. # LICENSE file in the root directory of this source tree.
  5. """
  6. Compatibility shims for BART tokenizers in v5.
  7. In v5 we consolidate on the tokenizers-library backend and remove separate
  8. "slow" vs "fast" implementations. BART uses the same byte-level BPE
  9. tokenizer as RoBERTa, so we expose `BartTokenizer` and `BartTokenizerFast`
  10. as aliases to `RobertaTokenizer` to preserve the public API expected by
  11. existing code and tests.
  12. """
  13. from ..roberta.tokenization_roberta import RobertaTokenizer as _RobertaTokenizer
  14. # Public aliases maintained for backwards compatibility
  15. BartTokenizer = _RobertaTokenizer
  16. BartTokenizerFast = _RobertaTokenizer
  17. __all__ = ["BartTokenizer", "BartTokenizerFast"]