| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638 |
- # Copyright The Lightning team.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- # Copyright 2017 Maja Popovic
- # The code is derived from https://github.com/m-popovic/chrF/blob/6d3c384/chrF%2B%2B.py
- # The original author and copyright holder have agreed to relicense the derived code under the Apache License,
- # Version 2.0 (the "License")
- # Reference to the approval: https://github.com/Lightning-AI/torchmetrics/pull/2701#issuecomment-2316891785
- from collections import defaultdict
- from collections.abc import Sequence
- from itertools import chain
- from typing import List, Optional, Union
- import torch
- from torch import Tensor, tensor
- from torchmetrics.functional.text.helper import _validate_inputs
- _EPS_SMOOTHING = tensor(1e-16)
- # Taken from https://github.com/mjpost/sacrebleu/blob/master/sacrebleu/metrics/chrf.py
- _PUNCTUATIONS = set("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")
- def _prepare_n_grams_dicts(
- n_char_order: int, n_word_order: int
- ) -> tuple[
- dict[int, Tensor], dict[int, Tensor], dict[int, Tensor], dict[int, Tensor], dict[int, Tensor], dict[int, Tensor]
- ]:
- """Prepare dictionaries with default zero values for total ref, hypothesis and matching character and word n-grams.
- Args:
- n_char_order: A character n-gram order.
- n_word_order: A word n-gram order.
- Return:
- Dictionaries with default zero values for total reference, hypothesis and matching character and word
- n-grams.
- """
- total_preds_char_n_grams: dict[int, Tensor] = {n + 1: tensor(0.0) for n in range(n_char_order)}
- total_preds_word_n_grams: dict[int, Tensor] = {n + 1: tensor(0.0) for n in range(n_word_order)}
- total_target_char_n_grams: dict[int, Tensor] = {n + 1: tensor(0.0) for n in range(n_char_order)}
- total_target_word_n_grams: dict[int, Tensor] = {n + 1: tensor(0.0) for n in range(n_word_order)}
- total_matching_char_n_grams: dict[int, Tensor] = {n + 1: tensor(0.0) for n in range(n_char_order)}
- total_matching_word_n_grams: dict[int, Tensor] = {n + 1: tensor(0.0) for n in range(n_word_order)}
- return (
- total_preds_char_n_grams,
- total_preds_word_n_grams,
- total_target_char_n_grams,
- total_target_word_n_grams,
- total_matching_char_n_grams,
- total_matching_word_n_grams,
- )
- def _get_characters(sentence: str, whitespace: bool) -> list[str]:
- """Split sentence into individual characters.
- Args:
- sentence: An input sentence to split.
- whitespace: An indication whether to keep whitespaces during character n-gram extraction.
- Return:
- A list of separated characters.
- """
- if whitespace:
- return list(sentence)
- return list(sentence.strip().replace(" ", ""))
- def _separate_word_and_punctuation(word: str) -> list[str]:
- """Separates out punctuation from beginning and end of words for chrF.
- Adapted from https://github.com/m-popovic/chrF and
- https://github.com/mjpost/sacrebleu/blob/master/sacrebleu/metrics/chrf.py.
- Args:
- word: An input word to be separated from a punctuation if present.
- Return:
- A list of a single word or a separated word and punctuation.
- """
- if len(word) == 1:
- return [word]
- if word[-1] in _PUNCTUATIONS:
- return [word[:-1], word[-1]]
- if word[0] in _PUNCTUATIONS:
- return [word[0], word[1:]]
- return [word]
- def _get_words_and_punctuation(sentence: str) -> list[str]:
- """Separates out punctuation from beginning and end of words for chrF for all words in the sentence.
- Args:
- sentence: An input sentence to split
- Return:
- An aggregated list of separated words and punctuation.
- """
- return list(chain.from_iterable(_separate_word_and_punctuation(word) for word in sentence.strip().split()))
- def _ngram_counts(char_or_word_list: list[str], n_gram_order: int) -> dict[int, dict[tuple[str, ...], Tensor]]:
- """Calculate n-gram counts.
- Args:
- char_or_word_list: A list of characters of words
- n_gram_order: The largest number of n-gram.
- Return:
- A dictionary of dictionaries with a counts of given n-grams.
- """
- ngrams: dict[int, dict[tuple[str, ...], Tensor]] = defaultdict(lambda: defaultdict(lambda: tensor(0.0)))
- for n in range(1, n_gram_order + 1):
- for ngram in (tuple(char_or_word_list[i : i + n]) for i in range(len(char_or_word_list) - n + 1)):
- ngrams[n][ngram] += tensor(1)
- return ngrams
- def _get_n_grams_counts_and_total_ngrams(
- sentence: str, n_char_order: int, n_word_order: int, lowercase: bool, whitespace: bool
- ) -> tuple[
- dict[int, dict[tuple[str, ...], Tensor]],
- dict[int, dict[tuple[str, ...], Tensor]],
- dict[int, Tensor],
- dict[int, Tensor],
- ]:
- """Get n-grams and total n-grams.
- Args:
- sentence: An input sentence
- n_char_order: A character n-gram order.
- n_word_order: A word n-gram order.
- lowercase: An indication whether to enable case-insensitivity.
- whitespace: An indication whether to keep whitespaces during character n-gram extraction.
- Return:
- char_n_grams_counts: A dictionary of dictionaries with sentence character n-grams.
- word_n_grams_counts: A dictionary of dictionaries with sentence word n-grams.
- total_char_n_grams: A dictionary containing a total number of sentence character n-grams.
- total_word_n_grams: A dictionary containing a total number of sentence word n-grams.
- """
- def _char_and_word_ngrams_counts(
- sentence: str, n_char_order: int, n_word_order: int, lowercase: bool
- ) -> tuple[dict[int, dict[tuple[str, ...], Tensor]], dict[int, dict[tuple[str, ...], Tensor]]]:
- """Get a dictionary of dictionaries with a counts of given n-grams."""
- if lowercase:
- sentence = sentence.lower()
- char_n_grams_counts = _ngram_counts(_get_characters(sentence, whitespace), n_char_order)
- word_n_grams_counts = _ngram_counts(_get_words_and_punctuation(sentence), n_word_order)
- return char_n_grams_counts, word_n_grams_counts
- def _get_total_ngrams(n_grams_counts: dict[int, dict[tuple[str, ...], Tensor]]) -> dict[int, Tensor]:
- """Get total sum of n-grams over n-grams w.r.t n."""
- total_n_grams: dict[int, Tensor] = defaultdict(lambda: tensor(0.0))
- for n in n_grams_counts:
- total_n_grams[n] = sum(n_grams_counts[n].values()).detach().clone() # type: ignore
- return total_n_grams
- char_n_grams_counts, word_n_grams_counts = _char_and_word_ngrams_counts(
- sentence, n_char_order, n_word_order, lowercase
- )
- total_char_n_grams = _get_total_ngrams(char_n_grams_counts)
- total_word_n_grams = _get_total_ngrams(word_n_grams_counts)
- return char_n_grams_counts, word_n_grams_counts, total_char_n_grams, total_word_n_grams
- def _get_ngram_matches(
- hyp_n_grams_counts: dict[int, dict[tuple[str, ...], Tensor]],
- ref_n_grams_counts: dict[int, dict[tuple[str, ...], Tensor]],
- ) -> dict[int, Tensor]:
- """Get a number of n-gram matches between reference and hypothesis n-grams.
- Args:
- hyp_n_grams_counts: n-grams counts for hypothesis
- ref_n_grams_counts: n-grams counts for reference
- Return:
- matching_n_grams
- """
- matching_n_grams: dict[int, Tensor] = defaultdict(lambda: tensor(0.0))
- for n in hyp_n_grams_counts:
- min_n_grams = [
- torch.min(ref_n_grams_counts[n][n_gram], hyp_n_grams_counts[n][n_gram]) for n_gram in hyp_n_grams_counts[n]
- ]
- matching_n_grams[n] = sum(min_n_grams).detach().clone() # type: ignore
- return matching_n_grams
- def _sum_over_dicts(total_n_grams: dict[int, Tensor], n_grams: dict[int, Tensor]) -> dict[int, Tensor]:
- """Aggregate total n-grams to keep corpus-level statistics.
- Args:
- total_n_grams: A dictionary containing a total corpus-level number of n-grams.
- n_grams: A dictionary containing a sentence-level number of n-grams.
- Return:
- A dictionary containing a total corpus-level number of n-grams.
- """
- for n in n_grams:
- total_n_grams[n] += n_grams[n]
- return total_n_grams
- def _calculate_fscore(
- matching_char_n_grams: dict[int, Tensor],
- matching_word_n_grams: dict[int, Tensor],
- hyp_char_n_grams: dict[int, Tensor],
- hyp_word_n_grams: dict[int, Tensor],
- ref_char_n_grams: dict[int, Tensor],
- ref_word_n_grams: dict[int, Tensor],
- n_order: float,
- beta: float,
- ) -> Tensor:
- """Calculate sentence-level chrF/chrF++ score.
- For given hypothesis and reference statistics (either sentence-level or corpus-level)
- the chrF/chrF++ score is returned.
- Args:
- matching_char_n_grams:
- A total number of matching character n-grams between the best matching reference and hypothesis.
- matching_word_n_grams:
- A total number of matching word n-grams between the best matching reference and hypothesis.
- hyp_char_n_grams: A total number of hypothesis character n-grams.
- hyp_word_n_grams: A total number of hypothesis word n-grams.
- ref_char_n_grams: A total number of reference character n-grams.
- ref_word_n_grams: A total number of reference word n-grams.
- n_order: A sum of character and word n-gram order.
- beta: A parameter determining an importance of recall w.r.t. precision. If `beta=1`, their importance is equal.
- Return:
- A chrF/chrF++ score. This function is universal both for sentence-level and corpus-level calculation.
- """
- def _get_n_gram_fscore(
- matching_n_grams: dict[int, Tensor], ref_n_grams: dict[int, Tensor], hyp_n_grams: dict[int, Tensor], beta: float
- ) -> dict[int, Tensor]:
- """Get n-gram level f-score."""
- precision: dict[int, Tensor] = {
- n: matching_n_grams[n] / hyp_n_grams[n] if hyp_n_grams[n] > 0 else tensor(0.0) for n in matching_n_grams
- }
- recall: dict[int, Tensor] = {
- n: matching_n_grams[n] / ref_n_grams[n] if ref_n_grams[n] > 0 else tensor(0.0) for n in matching_n_grams
- }
- denominator: dict[int, Tensor] = {
- n: torch.max(beta**2 * precision[n] + recall[n], _EPS_SMOOTHING) for n in matching_n_grams
- }
- f_score: dict[int, Tensor] = {
- n: (1 + beta**2) * precision[n] * recall[n] / denominator[n] for n in matching_n_grams
- }
- return f_score
- char_n_gram_f_score = _get_n_gram_fscore(matching_char_n_grams, ref_char_n_grams, hyp_char_n_grams, beta)
- word_n_gram_f_score = _get_n_gram_fscore(matching_word_n_grams, ref_word_n_grams, hyp_word_n_grams, beta)
- return (sum(char_n_gram_f_score.values()) + sum(word_n_gram_f_score.values())) / tensor(n_order)
- def _calculate_sentence_level_chrf_score(
- targets: list[str],
- pred_char_n_grams_counts: dict[int, dict[tuple[str, ...], Tensor]],
- pred_word_n_grams_counts: dict[int, dict[tuple[str, ...], Tensor]],
- pred_char_n_grams: dict[int, Tensor],
- pred_word_n_grams: dict[int, Tensor],
- n_char_order: int,
- n_word_order: int,
- n_order: float,
- beta: float,
- lowercase: bool,
- whitespace: bool,
- ) -> tuple[Tensor, dict[int, Tensor], dict[int, Tensor], dict[int, Tensor], dict[int, Tensor]]:
- """Calculate the best sentence-level chrF/chrF++ score.
- For a given pre-processed hypothesis, all references are evaluated and score and statistics
- for the best matching reference is returned.
- Args:
- targets: An iterable of references.
- pred_char_n_grams_counts: A dictionary of dictionaries with hypothesis character n-grams.
- pred_word_n_grams_counts: A dictionary of dictionaries with hypothesis word n-grams.
- pred_char_n_grams: A total number of hypothesis character n-grams.
- pred_word_n_grams: A total number of hypothesis word n-grams.
- n_char_order: A character n-gram order.
- n_word_order: A word n-gram order.
- n_order: A sum of character and word n-gram order.
- beta: A parameter determining an importance of recall w.r.t. precision. If `beta=1`, their importance is equal.
- lowercase: An indication whether to enable case-insensitivity.
- whitespace: An indication whether to keep whitespaces during character n-gram extraction.
- Return:
- Return chrF/chrF++ score and statistics for the best matching hypothesis and reference.
- f_score: A sentence-level chrF/chrF++ score.
- matching_char_n_grams:
- A total number of matching character n-grams between the best matching reference and hypothesis.
- matching_word_n_grams:
- A total number of matching word n-grams between the best matching reference and hypothesis.
- target_char_n_grams: A total number of reference character n-grams.
- target_word_n_grams: A total number of reference word n-grams.
- """
- best_f_score = tensor(0.0)
- best_matching_char_n_grams: dict[int, Tensor] = defaultdict(lambda: tensor(0.0))
- best_matching_word_n_grams: dict[int, Tensor] = defaultdict(lambda: tensor(0.0))
- best_target_char_n_grams: dict[int, Tensor] = defaultdict(lambda: tensor(0.0))
- best_target_word_n_grams: dict[int, Tensor] = defaultdict(lambda: tensor(0.0))
- for target in targets:
- (
- target_char_n_grams_counts,
- target_word_n_grams_counts,
- target_char_n_grams,
- target_word_n_grams,
- ) = _get_n_grams_counts_and_total_ngrams(target, n_char_order, n_word_order, lowercase, whitespace)
- matching_char_n_grams = _get_ngram_matches(target_char_n_grams_counts, pred_char_n_grams_counts)
- matching_word_n_grams = _get_ngram_matches(target_word_n_grams_counts, pred_word_n_grams_counts)
- f_score = _calculate_fscore(
- matching_char_n_grams,
- matching_word_n_grams,
- pred_char_n_grams,
- pred_word_n_grams,
- target_char_n_grams,
- target_word_n_grams,
- n_order,
- beta,
- )
- if f_score > best_f_score:
- best_f_score = f_score
- best_matching_char_n_grams = matching_char_n_grams
- best_matching_word_n_grams = matching_word_n_grams
- best_target_char_n_grams = target_char_n_grams
- best_target_word_n_grams = target_word_n_grams
- return (
- best_f_score,
- best_matching_char_n_grams,
- best_matching_word_n_grams,
- best_target_char_n_grams,
- best_target_word_n_grams,
- )
- def _chrf_score_update(
- preds: Union[str, Sequence[str]],
- target: Union[Sequence[str], Sequence[Sequence[str]]],
- total_preds_char_n_grams: dict[int, Tensor],
- total_preds_word_n_grams: dict[int, Tensor],
- total_target_char_n_grams: dict[int, Tensor],
- total_target_word_n_grams: dict[int, Tensor],
- total_matching_char_n_grams: dict[int, Tensor],
- total_matching_word_n_grams: dict[int, Tensor],
- n_char_order: int,
- n_word_order: int,
- n_order: float,
- beta: float,
- lowercase: bool,
- whitespace: bool,
- sentence_chrf_score: Optional[List[Tensor]] = None,
- ) -> tuple[
- dict[int, Tensor],
- dict[int, Tensor],
- dict[int, Tensor],
- dict[int, Tensor],
- dict[int, Tensor],
- dict[int, Tensor],
- Optional[List[Tensor]],
- ]:
- """Update function for chrf score.
- Args:
- preds: An iterable of hypothesis corpus.
- target: An iterable of iterables of reference corpus.
- total_preds_char_n_grams: A dictionary containing a total number of hypothesis character n-grams.
- total_preds_word_n_grams: A dictionary containing a total number of hypothesis word n-grams.
- total_target_char_n_grams: A dictionary containing a total number of reference character n-grams.
- total_target_word_n_grams: A dictionary containing a total number of reference word n-grams.
- total_matching_char_n_grams:
- A dictionary containing a total number of matching character n-grams between references and hypotheses.
- total_matching_word_n_grams:
- A dictionary containing a total number of total matching word n-grams between references and hypotheses.
- n_char_order: A character n-gram order.
- n_word_order: A word n-gram order.
- n_order: Sum of character and word n-gram order.
- beta: A parameter determining an importance of recall w.r.t. precision. If `beta=1`, their importance is equal.
- lowercase: An indication whether to enable case-insensitivity.
- whitespace: An indication whether to keep whitespaces during character n-gram extraction.
- sentence_chrf_score: A list of sentence-level chrF/chrF++ scores.
- Return:
- total_target_char_n_grams: number of reference character n-grams.
- total_target_word_n_grams: number of reference word n-grams.
- total_preds_char_n_grams: number of hypothesis character n-grams.
- total_preds_word_n_grams: number of hypothesis word n-grams.
- total_matching_char_n_grams: number of matching character n-grams between references and hypotheses.
- total_matching_word_n_grams: number of total matching word n-grams between references and hypotheses.
- sentence_chrf_score: A list of sentence-level chrF/chrF++ scores.
- Raises:
- ValueError:
- If length of ``preds`` and ``target`` differs.
- """
- target_corpus, preds = _validate_inputs(target, preds)
- for pred, targets in zip(preds, target_corpus):
- (
- pred_char_n_grams_counts,
- pred_word_n_grams_counts,
- pred_char_n_grams,
- pred_word_n_grams,
- ) = _get_n_grams_counts_and_total_ngrams(pred, n_char_order, n_word_order, lowercase, whitespace)
- total_preds_char_n_grams = _sum_over_dicts(total_preds_char_n_grams, pred_char_n_grams)
- total_preds_word_n_grams = _sum_over_dicts(total_preds_word_n_grams, pred_word_n_grams)
- (
- sentence_level_f_score,
- matching_char_n_grams,
- matching_word_n_grams,
- target_char_n_grams,
- target_word_n_grams,
- ) = _calculate_sentence_level_chrf_score(
- targets, # type: ignore
- pred_char_n_grams_counts,
- pred_word_n_grams_counts,
- pred_char_n_grams,
- pred_word_n_grams,
- n_char_order,
- n_word_order,
- n_order,
- beta,
- lowercase,
- whitespace,
- )
- if sentence_chrf_score is not None:
- sentence_chrf_score.append(sentence_level_f_score.unsqueeze(0))
- total_target_char_n_grams = _sum_over_dicts(total_target_char_n_grams, target_char_n_grams)
- total_target_word_n_grams = _sum_over_dicts(total_target_word_n_grams, target_word_n_grams)
- total_matching_char_n_grams = _sum_over_dicts(total_matching_char_n_grams, matching_char_n_grams)
- total_matching_word_n_grams = _sum_over_dicts(total_matching_word_n_grams, matching_word_n_grams)
- return (
- total_preds_char_n_grams,
- total_preds_word_n_grams,
- total_target_char_n_grams,
- total_target_word_n_grams,
- total_matching_char_n_grams,
- total_matching_word_n_grams,
- sentence_chrf_score,
- )
- def _chrf_score_compute(
- total_preds_char_n_grams: dict[int, Tensor],
- total_preds_word_n_grams: dict[int, Tensor],
- total_target_char_n_grams: dict[int, Tensor],
- total_target_word_n_grams: dict[int, Tensor],
- total_matching_char_n_grams: dict[int, Tensor],
- total_matching_word_n_grams: dict[int, Tensor],
- n_order: float,
- beta: float,
- ) -> Tensor:
- """Compute chrF/chrF++ score based on pre-computed target, prediction and matching character and word n-grams.
- Args:
- total_preds_char_n_grams: number of hypothesis character n-grams.
- total_preds_word_n_grams: number of hypothesis word n-grams.
- total_target_char_n_grams: number of reference character n-grams.
- total_target_word_n_grams: number of reference word n-grams.
- total_matching_char_n_grams: number of matching character n-grams between references and hypotheses.
- total_matching_word_n_grams: number of total matching word n-grams between references and hypotheses.
- n_order: A sum of character and word n-gram order.
- beta:
- A parameter determining an importance of recall w.r.t. precision. If `beta=1`, their importance is equal.
- Return:
- A corpus-level chrF/chrF++ score.
- """
- return _calculate_fscore(
- total_matching_char_n_grams,
- total_matching_word_n_grams,
- total_preds_char_n_grams,
- total_preds_word_n_grams,
- total_target_char_n_grams,
- total_target_word_n_grams,
- n_order,
- beta,
- )
- def chrf_score(
- preds: Union[str, Sequence[str]],
- target: Sequence[Union[str, Sequence[str]]],
- n_char_order: int = 6,
- n_word_order: int = 2,
- beta: float = 2.0,
- lowercase: bool = False,
- whitespace: bool = False,
- return_sentence_level_score: bool = False,
- ) -> Union[Tensor, tuple[Tensor, Tensor]]:
- """Calculate `chrF score`_ of machine translated text with one or more references.
- This implementation supports both chrF score computation introduced in [1] and chrF++ score introduced in
- `chrF++ score`_. This implementation follows the implementations from https://github.com/m-popovic/chrF and
- https://github.com/mjpost/sacrebleu/blob/master/sacrebleu/metrics/chrf.py.
- Args:
- preds: An iterable of hypothesis corpus.
- target: An iterable of iterables of reference corpus.
- n_char_order:
- A character n-gram order. If `n_char_order=6`, the metrics refers to the official chrF/chrF++.
- n_word_order:
- A word n-gram order. If `n_word_order=2`, the metric refers to the official chrF++. If `n_word_order=0`, the
- metric is equivalent to the original chrF.
- beta:
- A parameter determining an importance of recall w.r.t. precision. If `beta=1`, their importance is equal.
- lowercase: An indication whether to enable case-insensitivity.
- whitespace: An indication whether to keep whitespaces during character n-gram extraction.
- return_sentence_level_score: An indication whether a sentence-level chrF/chrF++ score to be returned.
- Return:
- A corpus-level chrF/chrF++ score.
- (Optionally) A list of sentence-level chrF/chrF++ scores if `return_sentence_level_score=True`.
- Raises:
- ValueError:
- If ``n_char_order`` is not an integer greater than or equal to 1.
- ValueError:
- If ``n_word_order`` is not an integer greater than or equal to 0.
- ValueError:
- If ``beta`` is smaller than 0.
- Example:
- >>> from torchmetrics.functional.text import chrf_score
- >>> preds = ['the cat is on the mat']
- >>> target = [['there is a cat on the mat', 'a cat is on the mat']]
- >>> chrf_score(preds, target)
- tensor(0.8640)
- References:
- [1] chrF: character n-gram F-score for automatic MT evaluation by Maja Popović `chrF score`_
- [2] chrF++: words helping character n-grams by Maja Popović `chrF++ score`_
- """
- if not isinstance(n_char_order, int) or n_char_order < 1:
- raise ValueError("Expected argument `n_char_order` to be an integer greater than or equal to 1.")
- if not isinstance(n_word_order, int) or n_word_order < 0:
- raise ValueError("Expected argument `n_word_order` to be an integer greater than or equal to 0.")
- if beta < 0:
- raise ValueError("Expected argument `beta` to be greater than 0.")
- n_order = float(n_char_order + n_word_order)
- (
- total_preds_char_n_grams,
- total_preds_word_n_grams,
- total_target_char_n_grams,
- total_target_word_n_grams,
- total_matching_char_n_grams,
- total_matching_word_n_grams,
- ) = _prepare_n_grams_dicts(n_char_order, n_word_order)
- sentence_chrf_score: Optional[List[Tensor]] = [] if return_sentence_level_score else None
- (
- total_preds_char_n_grams,
- total_preds_word_n_grams,
- total_target_char_n_grams,
- total_target_word_n_grams,
- total_matching_char_n_grams,
- total_matching_word_n_grams,
- sentence_chrf_score,
- ) = _chrf_score_update(
- preds,
- target,
- total_preds_char_n_grams,
- total_preds_word_n_grams,
- total_target_char_n_grams,
- total_target_word_n_grams,
- total_matching_char_n_grams,
- total_matching_word_n_grams,
- n_char_order,
- n_word_order,
- n_order,
- beta,
- lowercase,
- whitespace,
- sentence_chrf_score,
- )
- chrf_f_score = _chrf_score_compute(
- total_preds_char_n_grams,
- total_preds_word_n_grams,
- total_target_char_n_grams,
- total_target_word_n_grams,
- total_matching_char_n_grams,
- total_matching_word_n_grams,
- n_order,
- beta,
- )
- if sentence_chrf_score:
- return chrf_f_score, torch.cat(sentence_chrf_score)
- return chrf_f_score
|