eed.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. # Copyright The Lightning team.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from collections.abc import Sequence
  15. from typing import Any, List, Optional, Union
  16. from torch import Tensor, stack
  17. from typing_extensions import Literal
  18. from torchmetrics.functional.text.eed import _eed_compute, _eed_update
  19. from torchmetrics.metric import Metric
  20. from torchmetrics.utilities.imports import _MATPLOTLIB_AVAILABLE
  21. from torchmetrics.utilities.plot import _AX_TYPE, _PLOT_OUT_TYPE
  22. if not _MATPLOTLIB_AVAILABLE:
  23. __doctest_skip__ = ["ExtendedEditDistance.plot"]
  24. class ExtendedEditDistance(Metric):
  25. """Compute extended edit distance score (`ExtendedEditDistance`_) for strings or list of strings.
  26. The metric utilises the Levenshtein distance and extends it by adding a jump operation.
  27. As input to ``forward`` and ``update`` the metric accepts the following input:
  28. - ``preds`` (:class:`~Sequence`): An iterable of hypothesis corpus
  29. - ``target`` (:class:`~Sequence`): An iterable of iterables of reference corpus
  30. As output of ``forward`` and ``compute`` the metric returns the following output:
  31. - ``eed`` (:class:`~torch.Tensor`): A tensor with the extended edit distance score
  32. Args:
  33. language: Language used in sentences. Only supports English (en) and Japanese (ja) for now.
  34. return_sentence_level_score: An indication of whether sentence-level EED score is to be returned
  35. alpha: optimal jump penalty, penalty for jumps between characters
  36. rho: coverage cost, penalty for repetition of characters
  37. deletion: penalty for deletion of character
  38. insertion: penalty for insertion or substitution of character
  39. kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
  40. Example:
  41. >>> from torchmetrics.text import ExtendedEditDistance
  42. >>> preds = ["this is the prediction", "here is an other sample"]
  43. >>> target = ["this is the reference", "here is another one"]
  44. >>> eed = ExtendedEditDistance()
  45. >>> eed(preds=preds, target=target)
  46. tensor(0.3078)
  47. """
  48. higher_is_better: bool = False
  49. is_differentiable: bool = False
  50. full_state_update: bool = False
  51. plot_lower_bound: float = 0.0
  52. plot_upper_bound: float = 1.0
  53. sentence_eed: List[Tensor]
  54. def __init__(
  55. self,
  56. language: Literal["en", "ja"] = "en",
  57. return_sentence_level_score: bool = False,
  58. alpha: float = 2.0,
  59. rho: float = 0.3,
  60. deletion: float = 0.2,
  61. insertion: float = 1.0,
  62. **kwargs: Any,
  63. ) -> None:
  64. super().__init__(**kwargs)
  65. if language not in ("en", "ja"):
  66. raise ValueError(f"Expected argument `language` to either be `en` or `ja` but got {language}")
  67. self.language: Literal["en", "ja"] = language
  68. self.return_sentence_level_score = return_sentence_level_score
  69. # input validation for parameters
  70. for param_name, param in zip(["alpha", "rho", "deletion", "insertion"], [alpha, rho, deletion, insertion]):
  71. if not isinstance(param, float) or (isinstance(param, float) and param < 0):
  72. raise ValueError(f"Parameter `{param_name}` is expected to be a non-negative float.")
  73. self.alpha = alpha
  74. self.rho = rho
  75. self.deletion = deletion
  76. self.insertion = insertion
  77. self.add_state("sentence_eed", [], dist_reduce_fx="cat")
  78. def update(
  79. self,
  80. preds: Union[str, Sequence[str]],
  81. target: Sequence[Union[str, Sequence[str]]],
  82. ) -> None:
  83. """Update state with predictions and targets."""
  84. self.sentence_eed = _eed_update(
  85. preds,
  86. target,
  87. self.language,
  88. self.alpha,
  89. self.rho,
  90. self.deletion,
  91. self.insertion,
  92. self.sentence_eed,
  93. )
  94. def compute(self) -> Union[Tensor, tuple[Tensor, Tensor]]:
  95. """Calculate extended edit distance score."""
  96. average = _eed_compute(self.sentence_eed)
  97. if self.return_sentence_level_score:
  98. return average, stack(self.sentence_eed)
  99. return average
  100. def plot(
  101. self, val: Optional[Union[Tensor, Sequence[Tensor]]] = None, ax: Optional[_AX_TYPE] = None
  102. ) -> _PLOT_OUT_TYPE:
  103. """Plot a single or multiple values from the metric.
  104. Args:
  105. val: Either a single result from calling `metric.forward` or `metric.compute` or a list of these results.
  106. If no value is provided, will automatically call `metric.compute` and plot that result.
  107. ax: An matplotlib axis object. If provided will add plot to that axis
  108. Returns:
  109. Figure and Axes object
  110. Raises:
  111. ModuleNotFoundError:
  112. If `matplotlib` is not installed
  113. .. plot::
  114. :scale: 75
  115. >>> # Example plotting a single value
  116. >>> from torchmetrics.text import ExtendedEditDistance
  117. >>> metric = ExtendedEditDistance()
  118. >>> preds = ["this is the prediction", "there is an other sample"]
  119. >>> target = ["this is the reference", "there is another one"]
  120. >>> metric.update(preds, target)
  121. >>> fig_, ax_ = metric.plot()
  122. .. plot::
  123. :scale: 75
  124. >>> # Example plotting multiple values
  125. >>> from torchmetrics.text import ExtendedEditDistance
  126. >>> metric = ExtendedEditDistance()
  127. >>> preds = ["this is the prediction", "there is an other sample"]
  128. >>> target = ["this is the reference", "there is another one"]
  129. >>> values = [ ]
  130. >>> for _ in range(10):
  131. ... values.append(metric(preds, target))
  132. >>> fig_, ax_ = metric.plot(values)
  133. """
  134. return self._plot(val, ax)