demucs forward

2022-09-05 17:12:03 +05:30 · 2022-09-05 17:12:03 +05:30 · 409afc31fc
parent 9df1dafccf
commit 409afc31fc
1 changed files with 51 additions and 4 deletions
--- a/enhancer/models/demucs.py
+++ b/enhancer/models/demucs.py
@ -1,5 +1,9 @@
 from typing import bool
 from torch import nn
+import torch.functional as F
+import math 
+
+from enhancer.utils.io import Audio as audio

 class DeLSTM(nn.Module):
    def __init__(
@ -35,8 +39,10 @@ class Demus(nn.Module):
        glu:bool = True,
        bidirectional:bool=True,
        resample:int=2,
+        sampling_rate = 16000

    ):
+        super().__init__()
        self.c_in = c_in 
        self.c_out = c_out 
        self.hidden = hidden
@ -46,10 +52,10 @@ class Demus(nn.Module):
        self.depth = depth
        self.bidirectional = bidirectional
        self.activation = nn.GLU(1) if glu else nn.ReLU()
+        self.resample = resample
+        self.sampling_rate = sampling_rate
        multi_factor = 2 if glu else 1

-        ## do resampling
-
        self.encoder = nn.ModuleList()
        self.decoder = nn.ModuleList()

@ -78,7 +84,48 @@ class Demus(nn.Module):
        
        self.de_lstm = DeLSTM(input_size=c_in,hidden_size=c_in,num_layers=2,bidirectional=self.bidirectional)

-    def forward(self,input):
+    def forward(self,mixed_signal):
+        
+        length = mixed_signal.shape[-1]
+        x = F.pad((0,self.get_padding_length(length) - length)) 
+        if self.resample>1:
+            x = audio.resample_audio(audio=x, 
+                        sampling_rate = int(self.sampling_rate * self.resample))
+
+        encoder_outputs = []
+        for encoder in self.encoder:
+            x = encoder(x)
+            encoder_outputs.append(x)
+        
+        x,_ = self.de_lstm(x)
+
+        for decoder in self.decoder:
+            skip_connection = encoder_outputs.pop(-1)
+            x += skip_connection[..., :x.shape[-1]]
+            x = decoder(x)
+        
+        if self.resample > 1:
+            x = audio.resample_audio(x,int(self.sampling_rate * self.resample),
+                                    self.sampling_rate)
+
+        return x
+        
+    def get_padding_length(self,input_length):
+
+        input_length = math.ceil(input_length * self.resample)
+
+  
+        for layer in range(self.depth):                                        # encoder operation
+            input_length = math.ceil((input_length - self.kernel_size)/self.stride)+1
+            input_length = max(1,input_length)
+        for layer in range(self.depth):                                        # decoder operaration
+            input_length = (input_length-1) * self.stride + self.kernel_size
+        input_length = math.ceil(input_length/self.resample)
+
+        return int(input_length)
+
+        
+