92 lines
2.8 KiB
Python
92 lines
2.8 KiB
Python
from typing import Optional
|
|
|
|
import numpy as np
|
|
import torch
|
|
import torch.nn.functional as F
|
|
from scipy.signal import get_window
|
|
from torch import nn
|
|
|
|
|
|
class ConvFFT(nn.Module):
|
|
def __init__(
|
|
self,
|
|
window_len: int,
|
|
nfft: Optional[int] = None,
|
|
window: str = "hamming",
|
|
):
|
|
super().__init__()
|
|
self.window_len = window_len
|
|
self.nfft = nfft if nfft else np.int(2 ** np.ceil(np.log2(window_len)))
|
|
self.window = torch.from_numpy(
|
|
get_window(window, window_len).astype("float32")
|
|
)
|
|
|
|
@property
|
|
def init_kernel(self):
|
|
|
|
fourier_basis = np.fft.rfft(np.eye(self.nfft))[: self.window_len]
|
|
real, imag = np.real(fourier_basis), np.imag(fourier_basis)
|
|
kernel = np.concatenate([real, imag], 1).T
|
|
kernel = torch.from_numpy(kernel.astype("float32")).unsqueeze(1)
|
|
kernel *= self.window
|
|
return kernel
|
|
|
|
|
|
class ConvSTFT(ConvFFT):
|
|
def __init__(
|
|
self,
|
|
window_len: int,
|
|
hop_size: Optional[int] = None,
|
|
nfft: Optional[int] = None,
|
|
window: str = "hamming",
|
|
):
|
|
super().__init__(window_len=window_len, nfft=nfft, window=window)
|
|
self.hop_size = hop_size if hop_size else window_len // 2
|
|
self.register_buffer("weight", self.init_kernel)
|
|
|
|
def forward(self, input):
|
|
|
|
if input.dim() < 2:
|
|
raise ValueError(
|
|
f"Expected signal with shape 2 or 3 got {input.dim()}"
|
|
)
|
|
elif input.dim() == 2:
|
|
input = input.unsqueeze(1)
|
|
else:
|
|
pass
|
|
input = F.pad(
|
|
input,
|
|
(self.window_len - self.hop_size, self.window_len - self.hop_size),
|
|
)
|
|
output = F.conv1d(input, self.weight, stride=self.hop_size)
|
|
|
|
return output
|
|
|
|
|
|
class ConviSTFT(ConvFFT):
|
|
def __init__(
|
|
self,
|
|
window_len: int,
|
|
hop_size: Optional[int] = None,
|
|
nfft: Optional[int] = None,
|
|
window: str = "hamming",
|
|
):
|
|
super().__init__(window_len=window_len, nfft=nfft, window=window)
|
|
self.hop_size = hop_size if hop_size else window_len // 2
|
|
self.register_buffer("weight", self.init_kernel)
|
|
self.register_buffer("enframe", torch.eye(window_len).unsqueeze(1))
|
|
|
|
def forward(self, input, phase=None):
|
|
|
|
if phase is not None:
|
|
real = input * torch.cos(phase)
|
|
imag = input * torch.sin(phase)
|
|
input = torch.cat([real, imag], 1)
|
|
out = F.conv_transpose1d(input, self.weight, stride=self.hop_size)
|
|
coeff = self.window.unsqueeze(1).repeat(1, 1, input.size(-1)) ** 2
|
|
coeff = F.conv_transpose1d(coeff, self.enframe, stride=self.hop_size)
|
|
out = out / coeff
|
|
pad = self.window_len - self.hop_size
|
|
out = out[..., pad:-pad]
|
|
return out
|