rouge.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. # Copyright The Lightning team.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from collections.abc import Sequence
  15. from typing import Any, Callable, Optional, Union
  16. from torch import Tensor
  17. from typing_extensions import Literal
  18. from torchmetrics import Metric
  19. from torchmetrics.functional.text.rouge import (
  20. ALLOWED_ACCUMULATE_VALUES,
  21. ALLOWED_ROUGE_KEYS,
  22. _rouge_score_compute,
  23. _rouge_score_update,
  24. )
  25. from torchmetrics.utilities.imports import _MATPLOTLIB_AVAILABLE, _NLTK_AVAILABLE
  26. from torchmetrics.utilities.plot import _AX_TYPE, _PLOT_OUT_TYPE
  27. if not _MATPLOTLIB_AVAILABLE:
  28. __doctest_skip__ = ["ROUGEScore.plot"]
  29. __doctest_requires__ = {("ROUGEScore",): ["nltk"]}
  30. class ROUGEScore(Metric):
  31. """`Calculate Rouge Score`_, used for automatic summarization.
  32. This implementation should imitate the behaviour of the ``rouge-score`` package `Python ROUGE Implementation`
  33. As input to ``forward`` and ``update`` the metric accepts the following input:
  34. - ``preds`` (:class:`~Sequence`): An iterable of predicted sentences or a single predicted sentence
  35. - ``target`` (:class:`~Sequence`): An iterable of target sentences
  36. or an iterable of interables of target sentences
  37. or a single target sentence
  38. As output of ``forward`` and ``compute`` the metric returns the following output:
  39. - ``rouge`` (:class:`~Dict`): A dictionary of tensor rouge scores for each input str rouge key
  40. Args:
  41. use_stemmer: Use Porter stemmer to strip word suffixes to improve matching.
  42. normalizer: A user's own normalizer function.
  43. If this is ``None``, replacing any non-alpha-numeric characters with spaces is default.
  44. This function must take a ``str`` and return a ``str``.
  45. tokenizer:
  46. A user's own tokenizer function. If this is ``None``, splitting by spaces is default
  47. This function must take a ``str`` and return ``Sequence[str]``
  48. accumulate:
  49. Useful in case of multi-reference rouge score.
  50. - ``avg`` takes the avg of all references with respect to predictions
  51. - ``best`` takes the best fmeasure score obtained between prediction and multiple corresponding references.
  52. rouge_keys: A list of rouge types to calculate.
  53. Keys that are allowed are ``rougeL``, ``rougeLsum``, and ``rouge1`` through ``rouge9``.
  54. kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
  55. Example:
  56. >>> from torchmetrics.text.rouge import ROUGEScore
  57. >>> preds = "My name is John"
  58. >>> target = "Is your name John"
  59. >>> rouge = ROUGEScore()
  60. >>> from pprint import pprint
  61. >>> pprint(rouge(preds, target))
  62. {'rouge1_fmeasure': tensor(0.7500),
  63. 'rouge1_precision': tensor(0.7500),
  64. 'rouge1_recall': tensor(0.7500),
  65. 'rouge2_fmeasure': tensor(0.),
  66. 'rouge2_precision': tensor(0.),
  67. 'rouge2_recall': tensor(0.),
  68. 'rougeL_fmeasure': tensor(0.5000),
  69. 'rougeL_precision': tensor(0.5000),
  70. 'rougeL_recall': tensor(0.5000),
  71. 'rougeLsum_fmeasure': tensor(0.5000),
  72. 'rougeLsum_precision': tensor(0.5000),
  73. 'rougeLsum_recall': tensor(0.5000)}
  74. Raises:
  75. ValueError:
  76. If the python packages ``nltk`` is not installed.
  77. ValueError:
  78. If any of the ``rouge_keys`` does not belong to the allowed set of keys.
  79. """
  80. is_differentiable: bool = False
  81. higher_is_better: bool = True
  82. full_state_update: bool = True
  83. plot_lower_bound: float = 0.0
  84. plot_upper_bound: float = 1.0
  85. def __init__(
  86. self,
  87. use_stemmer: bool = False,
  88. normalizer: Optional[Callable[[str], str]] = None,
  89. tokenizer: Optional[Callable[[str], Sequence[str]]] = None,
  90. accumulate: Literal["avg", "best"] = "best",
  91. rouge_keys: Union[str, tuple[str, ...]] = ("rouge1", "rouge2", "rougeL", "rougeLsum"),
  92. **kwargs: Any,
  93. ) -> None:
  94. super().__init__(**kwargs)
  95. if use_stemmer or "rougeLsum" in rouge_keys:
  96. if not _NLTK_AVAILABLE:
  97. raise ModuleNotFoundError(
  98. "Stemmer and/or `rougeLsum` requires that `nltk` is installed. Use `pip install nltk`."
  99. )
  100. import nltk
  101. if not isinstance(rouge_keys, tuple):
  102. rouge_keys = (rouge_keys,)
  103. for key in rouge_keys:
  104. if key not in ALLOWED_ROUGE_KEYS:
  105. raise ValueError(f"Got unknown rouge key {key}. Expected to be one of {ALLOWED_ROUGE_KEYS}")
  106. if accumulate not in ALLOWED_ACCUMULATE_VALUES:
  107. raise ValueError(
  108. f"Got unknown accumulate value {accumulate}. Expected to be one of {ALLOWED_ACCUMULATE_VALUES}"
  109. )
  110. self.rouge_keys = rouge_keys
  111. self.rouge_keys_values = [ALLOWED_ROUGE_KEYS[key] for key in rouge_keys]
  112. self.stemmer = nltk.stem.porter.PorterStemmer() if use_stemmer else None
  113. self.normalizer = normalizer
  114. self.tokenizer = tokenizer
  115. self.accumulate = accumulate
  116. # Adding stated dynamically to prevent IndexError during sync function as some lists can be empty.
  117. for rouge_key in self.rouge_keys:
  118. for score in ["fmeasure", "precision", "recall"]:
  119. self.add_state(f"{rouge_key}_{score}", [], dist_reduce_fx=None)
  120. def update(
  121. self, preds: Union[str, Sequence[str]], target: Union[str, Sequence[str], Sequence[Sequence[str]]]
  122. ) -> None:
  123. """Update state with predictions and targets."""
  124. if isinstance(target, list) and all(isinstance(tgt, str) for tgt in target):
  125. target = [target] if isinstance(preds, str) else [[tgt] for tgt in target]
  126. if isinstance(preds, str):
  127. preds = [preds]
  128. if isinstance(target, str):
  129. target = [[target]]
  130. output: dict[Union[int, str], list[dict[str, Tensor]]] = _rouge_score_update(
  131. preds,
  132. target,
  133. self.rouge_keys_values,
  134. stemmer=self.stemmer,
  135. normalizer=self.normalizer,
  136. tokenizer=self.tokenizer,
  137. accumulate=self.accumulate,
  138. )
  139. for rouge_key, metrics in output.items():
  140. for metric in metrics:
  141. for tp, value in metric.items():
  142. getattr(self, f"rouge{rouge_key}_{tp}").append(value.to(self.device)) # todo
  143. def compute(self) -> dict[str, Tensor]:
  144. """Calculate (Aggregate and provide confidence intervals) ROUGE score."""
  145. update_output = {}
  146. for rouge_key in self.rouge_keys_values:
  147. for tp in ["fmeasure", "precision", "recall"]:
  148. update_output[f"rouge{rouge_key}_{tp}"] = getattr(self, f"rouge{rouge_key}_{tp}")
  149. return _rouge_score_compute(update_output)
  150. def __hash__(self) -> int:
  151. """Return a unique hash for the specific instance of this metric."""
  152. # override to hash list objects.
  153. # this is a bug in the upstream pytorch release.
  154. hash_vals = [self.__class__.__name__]
  155. for key in self._defaults:
  156. value = getattr(self, key)
  157. if isinstance(value, list):
  158. value = tuple(value)
  159. hash_vals.append(value)
  160. return hash(tuple(hash_vals))
  161. def plot(
  162. self, val: Optional[Union[Tensor, Sequence[Tensor]]] = None, ax: Optional[_AX_TYPE] = None
  163. ) -> _PLOT_OUT_TYPE:
  164. """Plot a single or multiple values from the metric.
  165. Args:
  166. val: Either a single result from calling `metric.forward` or `metric.compute` or a list of these results.
  167. If no value is provided, will automatically call `metric.compute` and plot that result.
  168. ax: An matplotlib axis object. If provided will add plot to that axis
  169. Returns:
  170. Figure and Axes object
  171. Raises:
  172. ModuleNotFoundError:
  173. If `matplotlib` is not installed
  174. .. plot::
  175. :scale: 75
  176. >>> # Example plotting a single value
  177. >>> from torchmetrics.text.rouge import ROUGEScore
  178. >>> metric = ROUGEScore()
  179. >>> preds = "My name is John"
  180. >>> target = "Is your name John"
  181. >>> metric.update(preds, target)
  182. >>> fig_, ax_ = metric.plot()
  183. .. plot::
  184. :scale: 75
  185. >>> # Example plotting multiple values
  186. >>> from torchmetrics.text.rouge import ROUGEScore
  187. >>> metric = ROUGEScore()
  188. >>> preds = "My name is John"
  189. >>> target = "Is your name John"
  190. >>> values = [ ]
  191. >>> for _ in range(10):
  192. ... values.append(metric(preds, target))
  193. >>> fig_, ax_ = metric.plot(values)
  194. """
  195. return self._plot(val, ax)