yichael
/
image-match


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
							# Generated content DO NOT EDIT
class Trainer:
    """
    Base class for all trainers

    This class is not supposed to be instantiated directly. Instead, any implementation of a
    Trainer will return an instance of this class when instantiated.
    """
    def __getstate__(self):
        """ """
        pass

    def __setstate__(self, state):
        """ """
        pass

class BpeTrainer(Trainer):
    """
    Trainer capable of training a BPE model

    Args:
        vocab_size (:obj:`int`, `optional`):
            The size of the final vocabulary, including all tokens and alphabet.

        min_frequency (:obj:`int`, `optional`):
            The minimum frequency a pair should have in order to be merged.

        show_progress (:obj:`bool`, `optional`):
            Whether to show progress bars while training.

        special_tokens (:obj:`List[Union[str, AddedToken]]`, `optional`):
            A list of special tokens the model should know of.

        limit_alphabet (:obj:`int`, `optional`):
            The maximum different characters to keep in the alphabet.

        initial_alphabet (:obj:`List[str]`, `optional`):
            A list of characters to include in the initial alphabet, even
            if not seen in the training dataset.
            If the strings contain more than one character, only the first one
            is kept.

        continuing_subword_prefix (:obj:`str`, `optional`):
            A prefix to be used for every subword that is not a beginning-of-word.

        end_of_word_suffix (:obj:`str`, `optional`):
            A suffix to be used for every subword that is a end-of-word.

        max_token_length (:obj:`int`, `optional`):
            Prevents creating tokens longer than the specified size.
            This can help with reducing polluting your vocabulary with
            highly repetitive tokens like `======` for wikipedia

    """
    def __init__(
        self,
        vocab_size=30000,
        min_frequency=0,
        show_progress=True,
        special_tokens=[],
        limit_alphabet=None,
        initial_alphabet=[],
        continuing_subword_prefix=None,
        end_of_word_suffix=None,
        max_token_length=None,
        words={},
    ):
        pass

    def __getstate__(self):
        """ """
        pass

    def __setstate__(self, state):
        """ """
        pass

    @property
    def continuing_subword_prefix(self):
        """ """
        pass

    @continuing_subword_prefix.setter
    def continuing_subword_prefix(self, value):
        """ """
        pass

    @property
    def end_of_word_suffix(self):
        """ """
        pass

    @end_of_word_suffix.setter
    def end_of_word_suffix(self, value):
        """ """
        pass

    @property
    def initial_alphabet(self):
        """ """
        pass

    @initial_alphabet.setter
    def initial_alphabet(self, value):
        """ """
        pass

    @property
    def limit_alphabet(self):
        """ """
        pass

    @limit_alphabet.setter
    def limit_alphabet(self, value):
        """ """
        pass

    @property
    def max_token_length(self):
        """ """
        pass

    @max_token_length.setter
    def max_token_length(self, value):
        """ """
        pass

    @property
    def min_frequency(self):
        """ """
        pass

    @min_frequency.setter
    def min_frequency(self, value):
        """ """
        pass

    @property
    def show_progress(self):
        """ """
        pass

    @show_progress.setter
    def show_progress(self, value):
        """ """
        pass

    @property
    def special_tokens(self):
        """ """
        pass

    @special_tokens.setter
    def special_tokens(self, value):
        """ """
        pass

    @property
    def vocab_size(self):
        """ """
        pass

    @vocab_size.setter
    def vocab_size(self, value):
        """ """
        pass

class UnigramTrainer(Trainer):
    """
    Trainer capable of training a Unigram model

    Args:
        vocab_size (:obj:`int`):
            The size of the final vocabulary, including all tokens and alphabet.

        show_progress (:obj:`bool`):
            Whether to show progress bars while training.

        special_tokens (:obj:`List[Union[str, AddedToken]]`):
            A list of special tokens the model should know of.

        initial_alphabet (:obj:`List[str]`):
            A list of characters to include in the initial alphabet, even
            if not seen in the training dataset.
            If the strings contain more than one character, only the first one
            is kept.

        shrinking_factor (:obj:`float`):
            The shrinking factor used at each step of the training to prune the
            vocabulary.

        unk_token (:obj:`str`):
            The token used for out-of-vocabulary tokens.

        max_piece_length (:obj:`int`):
            The maximum length of a given token.

        n_sub_iterations (:obj:`int`):
            The number of iterations of the EM algorithm to perform before
            pruning the vocabulary.
    """
    def __init__(
        self,
        vocab_size=8000,
        show_progress=True,
        special_tokens=[],
        initial_alphabet=[],
        shrinking_factor=0.75,
        unk_token=None,
        max_piece_length=16,
        n_sub_iterations=2,
    ):
        pass

    def __getstate__(self):
        """ """
        pass

    def __setstate__(self, state):
        """ """
        pass

    @property
    def initial_alphabet(self):
        """ """
        pass

    @initial_alphabet.setter
    def initial_alphabet(self, value):
        """ """
        pass

    @property
    def show_progress(self):
        """ """
        pass

    @show_progress.setter
    def show_progress(self, value):
        """ """
        pass

    @property
    def special_tokens(self):
        """ """
        pass

    @special_tokens.setter
    def special_tokens(self, value):
        """ """
        pass

    @property
    def vocab_size(self):
        """ """
        pass

    @vocab_size.setter
    def vocab_size(self, value):
        """ """
        pass

class WordLevelTrainer(Trainer):
    """
    Trainer capable of training a WorldLevel model

    Args:
        vocab_size (:obj:`int`, `optional`):
            The size of the final vocabulary, including all tokens and alphabet.

        min_frequency (:obj:`int`, `optional`):
            The minimum frequency a pair should have in order to be merged.

        show_progress (:obj:`bool`, `optional`):
            Whether to show progress bars while training.

        special_tokens (:obj:`List[Union[str, AddedToken]]`):
            A list of special tokens the model should know of.
    """
    def __init__(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[]):
        pass

    def __getstate__(self):
        """ """
        pass

    def __setstate__(self, state):
        """ """
        pass

    @property
    def min_frequency(self):
        """ """
        pass

    @min_frequency.setter
    def min_frequency(self, value):
        """ """
        pass

    @property
    def show_progress(self):
        """ """
        pass

    @show_progress.setter
    def show_progress(self, value):
        """ """
        pass

    @property
    def special_tokens(self):
        """ """
        pass

    @special_tokens.setter
    def special_tokens(self, value):
        """ """
        pass

    @property
    def vocab_size(self):
        """ """
        pass

    @vocab_size.setter
    def vocab_size(self, value):
        """ """
        pass

class WordPieceTrainer(Trainer):
    """
    Trainer capable of training a WordPiece model

    Args:
        vocab_size (:obj:`int`, `optional`):
            The size of the final vocabulary, including all tokens and alphabet.

        min_frequency (:obj:`int`, `optional`):
            The minimum frequency a pair should have in order to be merged.

        show_progress (:obj:`bool`, `optional`):
            Whether to show progress bars while training.

        special_tokens (:obj:`List[Union[str, AddedToken]]`, `optional`):
            A list of special tokens the model should know of.

        limit_alphabet (:obj:`int`, `optional`):
            The maximum different characters to keep in the alphabet.

        initial_alphabet (:obj:`List[str]`, `optional`):
            A list of characters to include in the initial alphabet, even
            if not seen in the training dataset.
            If the strings contain more than one character, only the first one
            is kept.

        continuing_subword_prefix (:obj:`str`, `optional`):
            A prefix to be used for every subword that is not a beginning-of-word.

        end_of_word_suffix (:obj:`str`, `optional`):
            A suffix to be used for every subword that is a end-of-word.
    """
    def __init__(
        self,
        vocab_size=30000,
        min_frequency=0,
        show_progress=True,
        special_tokens=[],
        limit_alphabet=None,
        initial_alphabet=[],
        continuing_subword_prefix="##",
        end_of_word_suffix=None,
    ):
        pass

    def __getstate__(self):
        """ """
        pass

    def __setstate__(self, state):
        """ """
        pass

    @property
    def continuing_subword_prefix(self):
        """ """
        pass

    @continuing_subword_prefix.setter
    def continuing_subword_prefix(self, value):
        """ """
        pass

    @property
    def end_of_word_suffix(self):
        """ """
        pass

    @end_of_word_suffix.setter
    def end_of_word_suffix(self, value):
        """ """
        pass

    @property
    def initial_alphabet(self):
        """ """
        pass

    @initial_alphabet.setter
    def initial_alphabet(self, value):
        """ """
        pass

    @property
    def limit_alphabet(self):
        """ """
        pass

    @limit_alphabet.setter
    def limit_alphabet(self, value):
        """ """
        pass

    @property
    def min_frequency(self):
        """ """
        pass

    @min_frequency.setter
    def min_frequency(self, value):
        """ """
        pass

    @property
    def show_progress(self):
        """ """
        pass

    @show_progress.setter
    def show_progress(self, value):
        """ """
        pass

    @property
    def special_tokens(self):
        """ """
        pass

    @special_tokens.setter
    def special_tokens(self, value):
        """ """
        pass

    @property
    def vocab_size(self):
        """ """
        pass

    @vocab_size.setter
    def vocab_size(self, value):
        """ """
        pass