snr.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. # Copyright The Lightning team.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import torch
  15. from torch import Tensor
  16. from torchmetrics.functional.audio.sdr import scale_invariant_signal_distortion_ratio
  17. from torchmetrics.utilities.checks import _check_same_shape
  18. def signal_noise_ratio(preds: Tensor, target: Tensor, zero_mean: bool = False) -> Tensor:
  19. r"""Calculate `Signal-to-noise ratio`_ (SNR_) meric for evaluating quality of audio.
  20. .. math::
  21. \text{SNR} = \frac{P_{signal}}{P_{noise}}
  22. where :math:`P` denotes the power of each signal. The SNR metric compares the level of the desired signal to
  23. the level of background noise. Therefore, a high value of SNR means that the audio is clear.
  24. Args:
  25. preds: float tensor with shape ``(...,time)``
  26. target: float tensor with shape ``(...,time)``
  27. zero_mean: if to zero mean target and preds or not
  28. Returns:
  29. Float tensor with shape ``(...,)`` of SNR values per sample
  30. Raises:
  31. RuntimeError:
  32. If ``preds`` and ``target`` does not have the same shape
  33. Example:
  34. >>> from torchmetrics.functional.audio import signal_noise_ratio
  35. >>> target = torch.tensor([3.0, -0.5, 2.0, 7.0])
  36. >>> preds = torch.tensor([2.5, 0.0, 2.0, 8.0])
  37. >>> signal_noise_ratio(preds, target)
  38. tensor(16.1805)
  39. """
  40. _check_same_shape(preds, target)
  41. eps = torch.finfo(preds.dtype).eps
  42. if zero_mean:
  43. target = target - torch.mean(target, dim=-1, keepdim=True)
  44. preds = preds - torch.mean(preds, dim=-1, keepdim=True)
  45. noise = target - preds
  46. snr_value = (torch.sum(target**2, dim=-1) + eps) / (torch.sum(noise**2, dim=-1) + eps)
  47. return 10 * torch.log10(snr_value)
  48. def scale_invariant_signal_noise_ratio(preds: Tensor, target: Tensor) -> Tensor:
  49. """`Scale-invariant signal-to-noise ratio`_ (SI-SNR).
  50. Args:
  51. preds: float tensor with shape ``(...,time)``
  52. target: float tensor with shape ``(...,time)``
  53. Returns:
  54. Float tensor with shape ``(...,)`` of SI-SNR values per sample
  55. Raises:
  56. RuntimeError:
  57. If ``preds`` and ``target`` does not have the same shape
  58. Example:
  59. >>> import torch
  60. >>> from torchmetrics.functional.audio import scale_invariant_signal_noise_ratio
  61. >>> target = torch.tensor([3.0, -0.5, 2.0, 7.0])
  62. >>> preds = torch.tensor([2.5, 0.0, 2.0, 8.0])
  63. >>> scale_invariant_signal_noise_ratio(preds, target)
  64. tensor(15.0918)
  65. """
  66. return scale_invariant_signal_distortion_ratio(preds=preds, target=target, zero_mean=True)
  67. def complex_scale_invariant_signal_noise_ratio(preds: Tensor, target: Tensor, zero_mean: bool = False) -> Tensor:
  68. """`Complex scale-invariant signal-to-noise ratio`_ (C-SI-SNR).
  69. Args:
  70. preds: real float tensor with shape ``(...,frequency,time,2)`` or complex float tensor with
  71. shape ``(..., frequency,time)``
  72. target: real float tensor with shape ``(...,frequency,time,2)`` or complex float tensor with
  73. shape ``(..., frequency,time)``
  74. zero_mean: When set to True, the mean of all signals is subtracted prior to computation of the metrics
  75. Returns:
  76. Float tensor with shape ``(...,)`` of C-SI-SNR values per sample
  77. Raises:
  78. RuntimeError:
  79. If ``preds`` is not the shape (...,frequency,time,2) (after being converted to real if it is complex).
  80. If ``preds`` and ``target`` does not have the same shape.
  81. Example:
  82. >>> from torch import randn
  83. >>> from torchmetrics.functional.audio import complex_scale_invariant_signal_noise_ratio
  84. >>> preds = randn((1,257,100,2))
  85. >>> target = randn((1,257,100,2))
  86. >>> complex_scale_invariant_signal_noise_ratio(preds, target)
  87. tensor([-38.8832])
  88. """
  89. if preds.is_complex():
  90. preds = torch.view_as_real(preds)
  91. if target.is_complex():
  92. target = torch.view_as_real(target)
  93. if (preds.ndim < 3 or preds.shape[-1] != 2) or (target.ndim < 3 or target.shape[-1] != 2):
  94. raise RuntimeError(
  95. "Predictions and targets are expected to have the shape (..., frequency, time, 2),"
  96. f" but got {preds.shape} and {target.shape}."
  97. )
  98. preds = preds.reshape(*preds.shape[:-3], -1)
  99. target = target.reshape(*target.shape[:-3], -1)
  100. return scale_invariant_signal_distortion_ratio(preds=preds, target=target, zero_mean=zero_mean)