add enhance function

2022-09-19 22:35:21 +05:30 · 2022-09-19 22:35:21 +05:30 · 9ef6665b84
parent 872303642f
commit 9ef6665b84
1 changed files with 64 additions and 2 deletions
--- a/enhancer/models/model.py
+++ b/enhancer/models/model.py
@ -1,9 +1,15 @@
+from asyncore import write
 from importlib import import_module
+from lib2to3.pgen2.token import OP
+import wave
+from xmlrpc.client import boolean
 from huggingface_hub import cached_download, hf_hub_url
+import numpy as np
 import os
-from typing import Optional, Union, List, Path, Text
+from typing import Optional, Union, List, Path, Text, Dict, Any
 from torch.optim import Adam
 import torch
+from torch.nn.functional import pad
 import pytorch_lightning as pl
 from pytorch_lightning.utilities.cloud_io import load as pl_load
 from urllib.parse import urlparse
@ -11,7 +17,9 @@ from urllib.parse import urlparse

 from enhancer import __version__
 from enhancer.data.dataset import Dataset
+from enhancer.utils.io import Audio
 from enhancer.utils.loss import Avergeloss
+from enhancer.inference import Inference

 CACHE_DIR = ""
 HF_TORCH_WEIGHTS = ""
@ -30,8 +38,8 @@ class Model(pl.LightningModule):
    ):
        super().__init__()
        assert num_channels ==1 , "Enhancer only support for mono channel models"
-        self.save_hyperparameters("num_channels","sampling_rate","lr","loss","metric")
        self.dataset = dataset
+        self.save_hyperparameters("num_channels","sampling_rate","lr","loss","metric")
        
    
    @property
@ -40,6 +48,8 @@ class Model(pl.LightningModule):

    @dataset.setter
    def dataset(self,dataset):
+        if dataset is not None:
+            self.save_hyperparameters("duration",self.dataset.duration)
        self._dataset = dataset

    def setup(self,stage:Optional[str]=None):
@ -99,6 +109,10 @@ class Model(pl.LightningModule):

        }

+    def on_load_checkpoint(self, checkpoint: Dict[str, Any]):
+        pass
+
+
    @classmethod
    def from_pretrained(
        cls,
@ -157,7 +171,55 @@ class Model(pl.LightningModule):
            print(e)


+        return model 
+
+    def infer_batch(self,batch,batch_size):
        
+        assert batch.ndim == 3, f"Expected batch with 3 dimensions (batch,channels,samples) got only {batch.ndim}"
+        batch_predictions = []
+        self.eval().to(self.device)
+
+        for batch_id in range(batch.shape[0],batch_size):
+            batch_data = batch[batch_id:batch_id+batch_size,:,:].to(self.device)
+            prediction = self(batch_data)
+            batch_predictions.append(prediction)
+        
+        return torch.vstack(batch_predictions)
+
+    def enhance(
+        self,
+        audio:Union[Path,np.ndarray,torch.Tensor],
+        sampling_rate:Optional[int]=None,
+        batch_size:int=32,
+        save_output:boolean=False,
+        duration:Optional[int]=None,
+        step_size:Optional[int]=None,):
+
+        model_sampling_rate = self.model.hprams("sampling_rate")
+        if duration is None:
+            duration = self.model.hparams("duration")
+        waveform = Inference.read_input(audio,sampling_rate,model_sampling_rate)
+        waveform.to(self.device)
+        window_size = round(duration * model_sampling_rate)
+        batched_waveform = Inference.batchify(waveform,window_size,step_size=step_size)
+        batch_prediction = self.infer_batch(batched_waveform,batch_size=batch_size)
+        waveform = Inference.aggreagate(batch_prediction,window_size,step_size)
+        
+        if save_output and isinstance(audio,(str,Path)):
+            Inference.write_output(waveform,audio,model_sampling_rate)
+
+        else:
+            return waveform            
+
+
+
+
+
+        
+
+
+       
+