From 88112e6ae31b917764511af13787677caa2af1cb Mon Sep 17 00:00:00 2001 From: shahules786 Date: Thu, 13 Oct 2022 10:38:58 +0530 Subject: [PATCH 1/2] fix pesq --- enhancer/loss.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/enhancer/loss.py b/enhancer/loss.py index cdd15a5..5092656 100644 --- a/enhancer/loss.py +++ b/enhancer/loss.py @@ -1,5 +1,6 @@ import logging +import numpy as np import torch import torch.nn as nn from torchmetrics.audio.pesq import PerceptualEvaluationSpeechQuality @@ -116,17 +117,24 @@ class Stoi: class Pesq: - def __init__(self, sr: int, mode="nb"): + def __init__(self, sr: int, mode="wb"): - self.pesq = PerceptualEvaluationSpeechQuality(fs=sr, mode=mode) + self.sr = sr self.name = "pesq" + self.mode = mode + self.pesq = PerceptualEvaluationSpeechQuality(fs=sr, mode=mode) def __call__(self, prediction: torch.Tensor, target: torch.Tensor): - try: - return self.pesq(prediction, target) - except Exception as e: - logging.warning(f"{e} error occured while calculating PESQ") - return torch.tensor(0.0) + + pesq_values = [] + for pred, target_ in zip(prediction, target): + try: + pesq_values.append( + self.pesq(pred.squeeze(), target_.squeeze()).item() + ) + except Exception as e: + logging.warning(f"{e} error occured while calculating PESQ") + return torch.tensor(np.mean(pesq_values)) class LossWrapper(nn.Module): @@ -177,7 +185,7 @@ class LossWrapper(nn.Module): LOSS_MAP = { "mae": mean_absolute_error, "mse": mean_squared_error, - "SI-SDR": Si_SDR, + "si-sdr": Si_SDR, "pesq": Pesq, "stoi": Stoi, } From 3e42d843985fea42a3cbf81b893ea0dd2a7daf5c Mon Sep 17 00:00:00 2001 From: shahules786 Date: Thu, 13 Oct 2022 10:49:05 +0530 Subject: [PATCH 2/2] add logo --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f721ab0..586df14 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ -# mayavoz +

+ +

+ mayavoz is a Pytorch-based opensource toolkit for speech enhancement. It is designed to save time for audio researchers. Is provides easy to use pretrained audio enhancement models and facilitates highly customisable custom model training . | **[Quick Start]()** | **[Installation]()** | **[Tutorials]()** | **[Available Recipes]()**