pesq.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. # Copyright The Lightning team.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import Any
  15. import numpy as np
  16. import torch
  17. from torch import Tensor
  18. from torchmetrics.utilities.checks import _check_same_shape
  19. from torchmetrics.utilities.imports import _MULTIPROCESSING_AVAILABLE, _PESQ_AVAILABLE
  20. __doctest_requires__ = {("perceptual_evaluation_speech_quality",): ["pesq"]}
  21. def perceptual_evaluation_speech_quality(
  22. preds: Tensor,
  23. target: Tensor,
  24. fs: int,
  25. mode: str,
  26. keep_same_device: bool = False,
  27. n_processes: int = 1,
  28. ) -> Tensor:
  29. r"""Calculate `Perceptual Evaluation of Speech Quality`_ (PESQ).
  30. It's a recognized industry standard for audio quality that takes into considerations characteristics such as: audio
  31. sharpness, call volume, background noise, clipping, audio interference etc. PESQ returns a score between -0.5 and
  32. 4.5 with the higher scores indicating a better quality.
  33. This metric is a wrapper for the `pesq package`_. Note that input will be moved to `cpu` to perform the metric
  34. calculation.
  35. .. hint::
  36. Usingsing this metrics requires you to have ``pesq`` install. Either install as ``pip install
  37. torchmetrics[audio]`` or ``pip install pesq``. Note that ``pesq`` will compile with your currently
  38. installed version of numpy, meaning that if you upgrade numpy at some point in the future you will
  39. most likely have to reinstall ``pesq``.
  40. Args:
  41. preds: float tensor with shape ``(...,time)``
  42. target: float tensor with shape ``(...,time)``
  43. fs: sampling frequency, should be 16000 or 8000 (Hz)
  44. mode: ``'wb'`` (wide-band) or ``'nb'`` (narrow-band)
  45. keep_same_device: whether to move the pesq value to the device of preds
  46. n_processes: integer specifying the number of processes to run in parallel for the metric calculation.
  47. Only applies to batches of data and if ``multiprocessing`` package is installed.
  48. Returns:
  49. Float tensor with shape ``(...,)`` of PESQ values per sample
  50. Raises:
  51. ModuleNotFoundError:
  52. If ``pesq`` package is not installed
  53. ValueError:
  54. If ``fs`` is not either ``8000`` or ``16000``
  55. ValueError:
  56. If ``mode`` is not either ``"wb"`` or ``"nb"``
  57. RuntimeError:
  58. If ``preds`` and ``target`` do not have the same shape
  59. Example:
  60. >>> from torch import randn
  61. >>> from torchmetrics.functional.audio.pesq import perceptual_evaluation_speech_quality
  62. >>> preds = randn(8000)
  63. >>> target = randn(8000)
  64. >>> perceptual_evaluation_speech_quality(preds, target, 8000, 'nb')
  65. tensor(2.2885)
  66. >>> perceptual_evaluation_speech_quality(preds, target, 16000, 'wb')
  67. tensor(1.6805)
  68. """
  69. if not _PESQ_AVAILABLE:
  70. raise ModuleNotFoundError(
  71. "PESQ metric requires that pesq is installed."
  72. " Either install as `pip install torchmetrics[audio]` or `pip install pesq`."
  73. )
  74. import pesq as pesq_backend
  75. def _issubtype_number(x: Any) -> bool:
  76. return np.issubdtype(type(x), np.number)
  77. _filter_error_msg = np.vectorize(_issubtype_number)
  78. if fs not in (8000, 16000):
  79. raise ValueError(f"Expected argument `fs` to either be 8000 or 16000 but got {fs}")
  80. if mode not in ("wb", "nb"):
  81. raise ValueError(f"Expected argument `mode` to either be 'wb' or 'nb' but got {mode}")
  82. _check_same_shape(preds, target)
  83. if preds.ndim == 1:
  84. pesq_val_np = pesq_backend.pesq(fs, target.detach().cpu().numpy(), preds.detach().cpu().numpy(), mode)
  85. pesq_val = torch.tensor(pesq_val_np)
  86. else:
  87. preds_np = preds.reshape(-1, preds.shape[-1]).detach().cpu().numpy()
  88. target_np = target.reshape(-1, preds.shape[-1]).detach().cpu().numpy()
  89. if _MULTIPROCESSING_AVAILABLE and n_processes != 1:
  90. pesq_val_np = pesq_backend.pesq_batch(fs, target_np, preds_np, mode, n_processor=n_processes)
  91. pesq_val_np = np.array(pesq_val_np)
  92. else:
  93. pesq_val_np = np.empty(shape=(preds_np.shape[0]))
  94. for b in range(preds_np.shape[0]):
  95. pesq_val_np[b] = pesq_backend.pesq(fs, target_np[b, :], preds_np[b, :], mode)
  96. pesq_val = torch.from_numpy(pesq_val_np[_filter_error_msg(pesq_val_np)].astype(np.float32))
  97. pesq_val = pesq_val.reshape(len(pesq_val))
  98. if keep_same_device:
  99. return pesq_val.to(preds.device)
  100. return pesq_val