lve.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. # Copyright The Lightning team.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import Any, List, Optional, Sequence, Union
  15. from torch import Tensor
  16. from torchmetrics.functional.multimodal.lve import lip_vertex_error
  17. from torchmetrics.metric import Metric
  18. from torchmetrics.utilities.data import dim_zero_cat
  19. from torchmetrics.utilities.imports import _MATPLOTLIB_AVAILABLE
  20. from torchmetrics.utilities.plot import _AX_TYPE, _PLOT_OUT_TYPE
  21. if not _MATPLOTLIB_AVAILABLE:
  22. __doctest_skip__ = ["LipVertexError.plot"]
  23. class LipVertexError(Metric):
  24. r"""Implements Lip Vertex Error (LVE) metric for 3D talking head evaluation.
  25. The Lip Vertex Error (LVE) metric evaluates the quality of lip synchronization in 3D facial animations by measuring
  26. the maximum Euclidean distance (L2 error) between corresponding lip vertices of the generated and ground truth
  27. meshes for each frame. The metric is defined as:
  28. .. math::
  29. \text{LVE} = \frac{1}{N} \sum_{i=1}^{N} \max_{v \in \text{lip}} \|x_{i,v} - \hat{x}_{i,v}\|_2^2
  30. where :math:`N` is the number of frames, :math:`x_{i,v}` represents the 3D coordinates of vertex :math:`v` in the
  31. lip region of the ground truth frame :math:`i`, and :math:`\hat{x}_{i,v}` represents the corresponding vertex in the
  32. predicted frame. The metric computes the maximum squared L2 distance between corresponding lip vertices for each
  33. frame and averages across all frames. A lower LVE value indicates better lip synchronization quality.
  34. As input to ``forward`` and ``update``, the metric accepts the following input:
  35. - ``preds`` (:class:`~torch.Tensor`): Predicted vertices tensor of shape (T, V, 3) where T is number of frames,
  36. V is number of vertices, and 3 represents XYZ coordinates
  37. - ``target`` (:class:`~torch.Tensor`): Ground truth vertices tensor of shape (T', V, 3) where T' can be different
  38. from T
  39. As output of ``forward`` and ``compute``, the metric returns the following output:
  40. - ``lve_score`` (:class:`~torch.Tensor`): A scalar tensor containing the mean Lip Vertex Error value across
  41. all frames.
  42. Args:
  43. mouth_map: List of vertex indices corresponding to the mouth region
  44. validate_args: bool indicating if input arguments and tensors should be validated for correctness.
  45. Set to ``False`` for faster computations.
  46. kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
  47. Raises:
  48. ValueError:
  49. If the number of dimensions of `vertices_pred` or `vertices_gt` is not 3.
  50. If vertex dimensions (V) or coordinate dimensions (3) don't match
  51. If ``mouth_map`` is empty or contains invalid indices
  52. Example:
  53. >>> import torch
  54. >>> from torchmetrics.functional.multimodal import lip_vertex_error
  55. >>> vertices_pred = torch.randn(10, 100, 3, generator=torch.manual_seed(42))
  56. >>> vertices_gt = torch.randn(10, 100, 3, generator=torch.manual_seed(43))
  57. >>> mouth_map = [0, 1, 2, 3, 4]
  58. >>> lip_vertex_error(vertices_pred, vertices_gt, mouth_map)
  59. tensor(12.7688)
  60. """
  61. is_differentiable: bool = True
  62. higher_is_better: bool = False
  63. full_state_update: bool = False
  64. plot_lower_bound: float = 0.0
  65. vertices_pred_list: List[Tensor]
  66. vertices_gt_list: List[Tensor]
  67. def __init__(
  68. self,
  69. mouth_map: List[int],
  70. validate_args: bool = True,
  71. **kwargs: Any,
  72. ) -> None:
  73. super().__init__(**kwargs)
  74. self.mouth_map = mouth_map
  75. self.validate_args = validate_args
  76. if not self.mouth_map:
  77. raise ValueError("mouth_map cannot be empty.")
  78. self.add_state("vertices_pred_list", default=[], dist_reduce_fx=None)
  79. self.add_state("vertices_gt_list", default=[], dist_reduce_fx=None)
  80. def update(self, vertices_pred: Tensor, vertices_gt: Tensor) -> None:
  81. """Update metric states with predictions and targets.
  82. Args:
  83. vertices_pred: Predicted vertices tensor of shape (T, V, 3) where T is number of frames,
  84. V is number of vertices, and 3 represents XYZ coordinates
  85. vertices_gt: Ground truth vertices tensor of shape (T', V, 3) where T' can be different from T
  86. """
  87. if self.validate_args:
  88. if vertices_pred.ndim != 3 or vertices_gt.ndim != 3:
  89. raise ValueError(
  90. f"Expected both vertices_pred and vertices_gt to have 3 dimensions but got "
  91. f"{vertices_pred.ndim} and {vertices_gt.ndim} dimensions respectively."
  92. )
  93. if vertices_pred.shape[1:] != vertices_gt.shape[1:]:
  94. raise ValueError(
  95. f"Expected vertices_pred and vertices_gt to have same vertex and coordinate dimensions but got "
  96. f"shapes {vertices_pred.shape} and {vertices_gt.shape}."
  97. )
  98. if max(self.mouth_map) >= vertices_pred.shape[1]:
  99. raise ValueError(
  100. f"mouth_map contains invalid vertex indices. Max index {max(self.mouth_map)} is larger than "
  101. f"number of vertices {vertices_pred.shape[1]}."
  102. )
  103. min_frames = min(vertices_pred.shape[0], vertices_gt.shape[0])
  104. vertices_pred = vertices_pred[:min_frames]
  105. vertices_gt = vertices_gt[:min_frames]
  106. self.vertices_pred_list.append(vertices_pred)
  107. self.vertices_gt_list.append(vertices_gt)
  108. def compute(self) -> Tensor:
  109. """Compute the Lip Vertex Error over all accumulated states.
  110. Returns:
  111. torch.Tensor: A scalar tensor with the mean LVE value
  112. """
  113. vertices_pred = dim_zero_cat(self.vertices_pred_list)
  114. vertices_gt = dim_zero_cat(self.vertices_gt_list)
  115. return lip_vertex_error(vertices_pred, vertices_gt, self.mouth_map, self.validate_args)
  116. def plot(
  117. self, val: Optional[Union[Tensor, Sequence[Tensor]]] = None, ax: Optional[_AX_TYPE] = None
  118. ) -> _PLOT_OUT_TYPE:
  119. """Plot a single or multiple values from the metric.
  120. Args:
  121. val: Either a single result from calling `metric.forward` or `metric.compute` or a list of these results.
  122. If no value is provided, will automatically call `metric.compute` and plot that result.
  123. ax: An matplotlib axis object. If provided will add plot to that axis
  124. Returns:
  125. Figure and Axes object
  126. Raises:
  127. ModuleNotFoundError:
  128. If `matplotlib` is not installed
  129. .. plot::
  130. :scale: 75
  131. >>> # Example plotting a single value
  132. >>> import torch
  133. >>> from torchmetrics.multimodal.lve import LipVertexError
  134. >>> metric = LipVertexError(mouth_map=[0, 1, 2, 3, 4])
  135. >>> vertices_pred = torch.randn(10, 100, 3, generator=torch.manual_seed(42))
  136. >>> vertices_gt = torch.randn(10, 100, 3, generator=torch.manual_seed(43))
  137. >>> metric.update(vertices_pred, vertices_gt)
  138. >>> fig_, ax_ = metric.plot()
  139. .. plot::
  140. :scale: 75
  141. >>> # Example plotting multiple values
  142. >>> import torch
  143. >>> from torchmetrics.multimodal.lve import LipVertexError
  144. >>> metric = LipVertexError(mouth_map=[0, 1, 2, 3, 4])
  145. >>> values = []
  146. >>> for _ in range(10):
  147. ... vertices_pred = torch.randn(10, 100, 3, generator=torch.manual_seed(42+_))
  148. ... vertices_gt = torch.randn(10, 100, 3, generator=torch.manual_seed(43+_))
  149. ... values.append(metric(vertices_pred, vertices_gt))
  150. >>> fig_, ax_ = metric.plot(values)
  151. """
  152. return self._plot(val, ax)