mayavoz/enhancer/models/waveunet.py

103 lines
2.6 KiB
Python

import torch.nn as nn
class WavenetDecoder(nn.Module):
def __init__(
self,
in_channels:int,
out_channels:int,
kernel_size:int=5,
padding:int=2,
stride:int=1,
dilation:int=1,
):
super(WavenetDecoder,self).__init__()
self.decoder = nn.Sequential(
nn.Conv1d(in_channels,out_channels,kernel_size,stride=stride,padding=padding,dilation=dilation),
nn.BatchNorm1d(out_channels),
nn.LeakyReLU(negative_slope=0.1)
)
def forward(self,waveform):
return self.decoder(waveform)
class WavenetEncoder(nn.Module):
def __init__(
self,
in_channels:int,
out_channels:int,
kernel_size:int=15,
padding:int=7,
stride:int=1,
dilation:int=1,
):
super(WavenetEncoder,self).__init__()
self.encoder = nn.Sequential(
nn.Conv1d(in_channels,out_channels,kernel_size,stride=stride,padding=padding,dilation=dilation),
nn.BatchNorm1d(out_channels),
nn.LeakyReLU(negative_slope=0.1)
)
def forward(
self,
waveform
):
return self.encoder(waveform)
class WaveUnet(nn.Module):
def __init__(
self,
inp_channels:int=1,
num_layers:int=12,
initial_output_channels:int=24
):
super(WaveUnet,self).__init__()
self.encoders = nn.ModuleList()
self.decoders = nn.ModuleList()
out_channels = initial_output_channels
for layer in range(num_layers):
encoder = WavenetEncoder(inp_channels,out_channels)
self.encoders.append(encoder)
inp_channels = out_channels
out_channels += initial_output_channels
if layer == num_layers -1 :
decoder = WavenetDecoder(num_layers * initial_output_channels + inp_channels,inp_channels)
else:
decoder = WavenetDecoder(inp_channels+out_channels,inp_channels)
self.decoders.insert(0,decoder)
bottleneck_dim = num_layers * initial_output_channels
self.bottleneck = nn.Sequential(
nn.Conv1d(bottleneck_dim,bottleneck_dim, 15, stride=1,
padding=7),
nn.BatchNorm1d(bottleneck_dim),
nn.LeakyReLU(negative_slope=0.1, inplace=True)
)
def forward(
self,waveform
):
for encoder in self.encoders:
out = encoder(waveform)
out = self.bottleneck(out)
for decoder in self.decoders:
out = decoder(out)
return decoder