Compare commits
72 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
31cd404e03 | |
|
|
8cf8dd9717 | |
|
|
c2b2b83fd5 | |
|
|
bcb94d5f34 | |
|
|
e0abb458d5 | |
|
|
e322c6280d | |
|
|
7548647dfb | |
|
|
1035fbb236 | |
|
|
a06c0a3865 | |
|
|
64f52fe010 | |
|
|
078d3eb244 | |
|
|
629adf0232 | |
|
|
501948e866 | |
|
|
aa61056376 | |
|
|
187e48d125 | |
|
|
7882b8cca3 | |
|
|
789da44114 | |
|
|
78138c5f93 | |
|
|
b8a05c775c | |
|
|
2b68598d7b | |
|
|
29a432540e | |
|
|
8d1c057b86 | |
|
|
6e0f69f575 | |
|
|
0e58691a2c | |
|
|
807f4b93ea | |
|
|
315d646347 | |
|
|
f34e49e341 | |
|
|
fa47860f57 | |
|
|
f7eb0a600c | |
|
|
ba2d00648c | |
|
|
8a55a77640 | |
|
|
94a4ea38ed | |
|
|
8d25b0ed79 | |
|
|
09ba645315 | |
|
|
8906496366 | |
|
|
e4a2eb7844 | |
|
|
8a6af87627 | |
|
|
5a392332ba | |
|
|
f66a5236e1 | |
|
|
d415bb0c59 | |
|
|
8c1524a998 | |
|
|
7161f84a27 | |
|
|
2c79e60a85 | |
|
|
41ee2fce0b | |
|
|
0c5db496e2 | |
|
|
031221b79e | |
|
|
50062eaf40 | |
|
|
0b02b73094 | |
|
|
2ccc2822cd | |
|
|
1667de624e | |
|
|
32579b7a39 | |
|
|
bb68e9e4eb | |
|
|
a21ef707ad | |
|
|
81c5f13ff6 | |
|
|
a417e226f3 | |
|
|
5d8f49d78e | |
|
|
14156743f9 | |
|
|
845575a2ad | |
|
|
c9b78b0e73 | |
|
|
3068476512 | |
|
|
ffb364196e | |
|
|
52cefcb962 | |
|
|
61923f6d68 | |
|
|
e90efe3163 | |
|
|
aa043aaf40 | |
|
|
4f6ccadf4b | |
|
|
0e982cd493 | |
|
|
0787d946da | |
|
|
e06ba07889 | |
|
|
741fd7b87c | |
|
|
a064151e2e | |
|
|
25557757c7 |
|
|
@ -40,4 +40,5 @@ repos:
|
||||||
- id: end-of-file-fixer
|
- id: end-of-file-fixer
|
||||||
- id: requirements-txt-fixer
|
- id: requirements-txt-fixer
|
||||||
- id: mixed-line-ending
|
- id: mixed-line-ending
|
||||||
|
exclude: noisyspeech_synthesizer.cfg
|
||||||
args: ['--fix=no']
|
args: ['--fix=no']
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,76 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Wed Jun 26 15:54:05 2019
|
||||||
|
|
||||||
|
@author: chkarada
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import soundfile as sf
|
||||||
|
|
||||||
|
|
||||||
|
# Function to read audio
|
||||||
|
def audioread(path, norm=True, start=0, stop=None):
|
||||||
|
path = os.path.abspath(path)
|
||||||
|
if not os.path.exists(path):
|
||||||
|
raise ValueError("[{}] does not exist!".format(path))
|
||||||
|
try:
|
||||||
|
x, sr = sf.read(path, start=start, stop=stop)
|
||||||
|
except RuntimeError: # fix for sph pcm-embedded shortened v2
|
||||||
|
print("WARNING: Audio type not supported")
|
||||||
|
|
||||||
|
if len(x.shape) == 1: # mono
|
||||||
|
if norm:
|
||||||
|
rms = (x**2).mean() ** 0.5
|
||||||
|
scalar = 10 ** (-25 / 20) / (rms)
|
||||||
|
x = x * scalar
|
||||||
|
return x, sr
|
||||||
|
else: # multi-channel
|
||||||
|
x = x.T
|
||||||
|
x = x.sum(axis=0) / x.shape[0]
|
||||||
|
if norm:
|
||||||
|
rms = (x**2).mean() ** 0.5
|
||||||
|
scalar = 10 ** (-25 / 20) / (rms)
|
||||||
|
x = x * scalar
|
||||||
|
return x, sr
|
||||||
|
|
||||||
|
|
||||||
|
# Funtion to write audio
|
||||||
|
def audiowrite(data, fs, destpath, norm=False):
|
||||||
|
if norm:
|
||||||
|
eps = 0.0
|
||||||
|
rms = (data**2).mean() ** 0.5
|
||||||
|
scalar = 10 ** (-25 / 10) / (rms + eps)
|
||||||
|
data = data * scalar
|
||||||
|
if max(abs(data)) >= 1:
|
||||||
|
data = data / max(abs(data), eps)
|
||||||
|
|
||||||
|
destpath = os.path.abspath(destpath)
|
||||||
|
destdir = os.path.dirname(destpath)
|
||||||
|
|
||||||
|
if not os.path.exists(destdir):
|
||||||
|
os.makedirs(destdir)
|
||||||
|
|
||||||
|
sf.write(destpath, data, fs)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
# Function to mix clean speech and noise at various SNR levels
|
||||||
|
def snr_mixer(clean, noise, snr):
|
||||||
|
# Normalizing to -25 dB FS
|
||||||
|
rmsclean = (clean**2).mean() ** 0.5
|
||||||
|
scalarclean = 10 ** (-25 / 20) / rmsclean
|
||||||
|
clean = clean * scalarclean
|
||||||
|
rmsclean = (clean**2).mean() ** 0.5
|
||||||
|
|
||||||
|
rmsnoise = (noise**2).mean() ** 0.5
|
||||||
|
scalarnoise = 10 ** (-25 / 20) / rmsnoise
|
||||||
|
noise = noise * scalarnoise
|
||||||
|
rmsnoise = (noise**2).mean() ** 0.5
|
||||||
|
|
||||||
|
# Set the noise level for a given SNR
|
||||||
|
noisescalar = np.sqrt(rmsclean / (10 ** (snr / 20)) / rmsnoise)
|
||||||
|
noisenewlevel = noise * noisescalar
|
||||||
|
noisyspeech = clean + noisenewlevel
|
||||||
|
return clean, noisenewlevel, noisyspeech
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
defaults:
|
defaults:
|
||||||
- model : WaveUnet
|
- model : Demucs
|
||||||
- dataset : Vctk
|
- dataset : Vctk
|
||||||
- optimizer : Adam
|
- optimizer : Adam
|
||||||
- hyperparameters : default
|
- hyperparameters : default
|
||||||
- trainer : default
|
- trainer : fastrun_dev
|
||||||
- mlflow : experiment
|
- mlflow : experiment
|
||||||
|
|
|
||||||
|
|
@ -2,10 +2,9 @@ _target_: enhancer.data.dataset.EnhancerDataset
|
||||||
root_dir : /Users/shahules/Myprojects/enhancer/datasets/vctk_test
|
root_dir : /Users/shahules/Myprojects/enhancer/datasets/vctk_test
|
||||||
name : dns-2020
|
name : dns-2020
|
||||||
duration : 1.0
|
duration : 1.0
|
||||||
sampling_rate: 16000
|
sampling_rate: 8000
|
||||||
batch_size: 32
|
batch_size: 32
|
||||||
files:
|
files:
|
||||||
root_dir : /Users/shahules/Myprojects/enhancer/datasets/vctk_test
|
|
||||||
train_clean : clean_test_wav
|
train_clean : clean_test_wav
|
||||||
test_clean : clean_test_wav
|
test_clean : clean_test_wav
|
||||||
train_noisy : clean_test_wav
|
train_noisy : clean_test_wav
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,10 @@
|
||||||
_target_: enhancer.data.dataset.EnhancerDataset
|
_target_: enhancer.data.dataset.EnhancerDataset
|
||||||
name : vctk
|
name : vctk
|
||||||
root_dir : /scratch/c.sistc3/DS_10283_2791
|
root_dir : /scratch/c.sistc3/DS_10283_2791
|
||||||
duration : 1.0
|
duration : 1.5
|
||||||
sampling_rate: 16000
|
sampling_rate: 16000
|
||||||
batch_size: 128
|
batch_size: 256
|
||||||
|
valid_size : 0.05
|
||||||
|
|
||||||
files:
|
files:
|
||||||
train_clean : clean_trainset_28spk_wav
|
train_clean : clean_trainset_28spk_wav
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
loss : mse
|
loss : mse
|
||||||
metric : mae
|
metric : [stoi,pesq,si-sdr]
|
||||||
lr : 0.0001
|
lr : 0.001
|
||||||
ReduceLr_patience : 5
|
ReduceLr_patience : 10
|
||||||
ReduceLr_factor : 0.1
|
ReduceLr_factor : 0.5
|
||||||
min_lr : 0.000001
|
min_lr : 0.00
|
||||||
EarlyStopping_factor : 10
|
EarlyStopping_factor : 10
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,15 @@
|
||||||
_target_: pytorch_lightning.Trainer
|
_target_: pytorch_lightning.Trainer
|
||||||
accelerator: auto
|
accelerator: gpu
|
||||||
accumulate_grad_batches: 1
|
accumulate_grad_batches: 1
|
||||||
amp_backend: native
|
amp_backend: native
|
||||||
auto_lr_find: True
|
auto_lr_find: False
|
||||||
auto_scale_batch_size: False
|
auto_scale_batch_size: False
|
||||||
auto_select_gpus: True
|
auto_select_gpus: True
|
||||||
benchmark: False
|
benchmark: False
|
||||||
check_val_every_n_epoch: 1
|
check_val_every_n_epoch: 1
|
||||||
detect_anomaly: False
|
detect_anomaly: False
|
||||||
deterministic: False
|
deterministic: False
|
||||||
devices: -1
|
devices: 2
|
||||||
enable_checkpointing: True
|
enable_checkpointing: True
|
||||||
enable_model_summary: True
|
enable_model_summary: True
|
||||||
enable_progress_bar: True
|
enable_progress_bar: True
|
||||||
|
|
@ -22,9 +22,8 @@ limit_predict_batches: 1.0
|
||||||
limit_test_batches: 1.0
|
limit_test_batches: 1.0
|
||||||
limit_train_batches: 1.0
|
limit_train_batches: 1.0
|
||||||
limit_val_batches: 1.0
|
limit_val_batches: 1.0
|
||||||
log_every_n_steps: 50
|
log_every_n_steps: 100
|
||||||
max_epochs: 3
|
max_epochs: 250
|
||||||
max_steps: -1
|
|
||||||
max_time: null
|
max_time: null
|
||||||
min_epochs: 1
|
min_epochs: 1
|
||||||
min_steps: null
|
min_steps: null
|
||||||
|
|
|
||||||
|
|
@ -32,8 +32,21 @@ echo "Making temp dir"
|
||||||
mkdir temp
|
mkdir temp
|
||||||
pwd
|
pwd
|
||||||
|
|
||||||
#python transcriber/tasks/embeddings/timit.py --directory /scratch/$USER/TIMIT/data/lisa/data/timit/raw/TIMIT/TRAIN --output ./data/train
|
# echo "files"
|
||||||
#python transcriber/tasks/embeddings/timit.py --directory /scratch/$USER/TIMIT/data/lisa/data/timit/raw/TIMIT/TEST --output ./data/test
|
# rm -rf /scratch/c.sistc3/MS-SNSD/DNS30/CleanSpeech_training
|
||||||
|
# rm -rf /scratch/c.sistc3/MS-SNSD/DNS30/NoisySpeech_training
|
||||||
|
# rm -rf /scratch/c.sistc3/MS-SNSD/DNS30/NoisySpeech_testing
|
||||||
|
# rm -rf /scratch/c.sistc3/MS-SNSD/DNS30/CleanSpeech_testing
|
||||||
|
|
||||||
echo "Start Training..."
|
# cp -r /scratch/c.sistc3/MS-SNSD/DNS30/NoisySpeech_testing /scratch/c.sistc3/MS-SNSD/DNS15/
|
||||||
python enhancer/cli/train.py
|
# cp -r /scratch/c.sistc3/MS-SNSD/DNS30/CleanSpeech_testing /scratch/c.sistc3/MS-SNSD/DNS15/
|
||||||
|
# rm -rf /scratch/c.sistc3/MS-SNSD/DNS20
|
||||||
|
|
||||||
|
# mkdir /scratch/c.sistc3/MS-SNSD/DNS20
|
||||||
|
|
||||||
|
python noisyspeech_synthesizer.py
|
||||||
|
|
||||||
|
mv ./CleanSpeech_testing/ /scratch/c.sistc3/MS-SNSD/DNS20
|
||||||
|
mv ./NoisySpeech_testing/ /scratch/c.sistc3/MS-SNSD/DNS20
|
||||||
|
ls /scratch/c.sistc3/MS-SNSD/DNS20
|
||||||
|
#python enhancer/cli/train.py
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,30 @@
|
||||||
|
# Configuration for generating Noisy Speech Dataset
|
||||||
|
|
||||||
|
# - sampling_rate: Specify the sampling rate. Default is 16 kHz
|
||||||
|
# - audioformat: default is .wav
|
||||||
|
# - audio_length: Minimum Length of each audio clip (noisy and clean speech) in seconds that will be generated by augmenting utterances.
|
||||||
|
# - silence_length: Duration of silence introduced between clean speech utterances.
|
||||||
|
# - total_hours: Total number of hours of data required. Units are in hours.
|
||||||
|
# - snr_lower: Lower bound for SNR required (default: 0 dB)
|
||||||
|
# - snr_upper: Upper bound for SNR required (default: 40 dB)
|
||||||
|
# - total_snrlevels: Number of SNR levels required (default: 5, which means there are 5 levels between snr_lower and snr_upper)
|
||||||
|
# - noise_dir: Default is None. But specify the noise directory path if noise files are not in the source directory
|
||||||
|
# - Speech_dir: Default is None. But specify the speech directory path if speech files are not in the source directory
|
||||||
|
# - noise_types_excluded: Noise files starting with the following tags to be excluded in the noise list. Example: noise_types_excluded: Babble, AirConditioner
|
||||||
|
# Specify 'None' if no noise files to be excluded.
|
||||||
|
|
||||||
|
[noisy_speech]
|
||||||
|
|
||||||
|
sampling_rate: 16000
|
||||||
|
audioformat: *.wav
|
||||||
|
audio_length: 10
|
||||||
|
silence_length: 0.2
|
||||||
|
total_hours: 1
|
||||||
|
snr_lower: 0
|
||||||
|
snr_upper: 40
|
||||||
|
total_snrlevels: 2
|
||||||
|
naming: test
|
||||||
|
|
||||||
|
noise_dir: /scratch/c.sistc3/MS-SNSD/noise_test
|
||||||
|
speech_dir: /scratch/c.sistc3/MS-SNSD/clean_test
|
||||||
|
noise_types_excluded: None
|
||||||
|
|
@ -0,0 +1,155 @@
|
||||||
|
"""
|
||||||
|
@author: chkarada
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import configparser as CP
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from audiolib import audioread, audiowrite, snr_mixer
|
||||||
|
|
||||||
|
|
||||||
|
def main(cfg):
|
||||||
|
snr_lower = float(cfg["snr_lower"])
|
||||||
|
snr_upper = float(cfg["snr_upper"])
|
||||||
|
total_snrlevels = int(cfg["total_snrlevels"])
|
||||||
|
|
||||||
|
clean_dir = os.path.join(os.path.dirname(__file__), "clean_train")
|
||||||
|
if cfg["speech_dir"] != "None":
|
||||||
|
clean_dir = cfg["speech_dir"]
|
||||||
|
if not os.path.exists(clean_dir):
|
||||||
|
assert False, "Clean speech data is required"
|
||||||
|
|
||||||
|
noise_dir = os.path.join(os.path.dirname(__file__), "noise_train")
|
||||||
|
if cfg["noise_dir"] != "None":
|
||||||
|
noise_dir = cfg["noise_dir"]
|
||||||
|
if not os.path.exists(noise_dir):
|
||||||
|
assert False, "Noise data is required"
|
||||||
|
name = cfg["naming"]
|
||||||
|
fs = float(cfg["sampling_rate"])
|
||||||
|
audioformat = cfg["audioformat"]
|
||||||
|
total_hours = float(cfg["total_hours"])
|
||||||
|
audio_length = float(cfg["audio_length"])
|
||||||
|
silence_length = float(cfg["silence_length"])
|
||||||
|
noisyspeech_dir = os.path.join(
|
||||||
|
os.path.dirname(__file__), f"NoisySpeech_{name}ing"
|
||||||
|
)
|
||||||
|
if not os.path.exists(noisyspeech_dir):
|
||||||
|
os.makedirs(noisyspeech_dir)
|
||||||
|
clean_proc_dir = os.path.join(
|
||||||
|
os.path.dirname(__file__), f"CleanSpeech_{name}ing"
|
||||||
|
)
|
||||||
|
if not os.path.exists(clean_proc_dir):
|
||||||
|
os.makedirs(clean_proc_dir)
|
||||||
|
noise_proc_dir = os.path.join(
|
||||||
|
os.path.dirname(__file__), f"NoisySpeech_{name}ing"
|
||||||
|
)
|
||||||
|
if not os.path.exists(noise_proc_dir):
|
||||||
|
os.makedirs(noise_proc_dir)
|
||||||
|
|
||||||
|
total_secs = total_hours * 60 * 60
|
||||||
|
total_samples = int(total_secs * fs)
|
||||||
|
audio_length = int(audio_length * fs)
|
||||||
|
SNR = np.linspace(snr_lower, snr_upper, total_snrlevels)
|
||||||
|
cleanfilenames = glob.glob(os.path.join(clean_dir, audioformat))
|
||||||
|
if cfg["noise_types_excluded"] == "None":
|
||||||
|
noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
|
||||||
|
else:
|
||||||
|
filestoexclude = cfg["noise_types_excluded"].split(",")
|
||||||
|
noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
|
||||||
|
for i in range(len(filestoexclude)):
|
||||||
|
noisefilenames = [
|
||||||
|
fn
|
||||||
|
for fn in noisefilenames
|
||||||
|
if not os.path.basename(fn).startswith(filestoexclude[i])
|
||||||
|
]
|
||||||
|
|
||||||
|
filecounter = 0
|
||||||
|
num_samples = 0
|
||||||
|
|
||||||
|
while num_samples < total_samples:
|
||||||
|
idx_s = np.random.randint(0, np.size(cleanfilenames))
|
||||||
|
clean, fs = audioread(cleanfilenames[idx_s])
|
||||||
|
|
||||||
|
if len(clean) > audio_length:
|
||||||
|
clean = clean
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
while len(clean) <= audio_length:
|
||||||
|
idx_s = idx_s + 1
|
||||||
|
if idx_s >= np.size(cleanfilenames) - 1:
|
||||||
|
idx_s = np.random.randint(0, np.size(cleanfilenames))
|
||||||
|
newclean, fs = audioread(cleanfilenames[idx_s])
|
||||||
|
cleanconcat = np.append(
|
||||||
|
clean, np.zeros(int(fs * silence_length))
|
||||||
|
)
|
||||||
|
clean = np.append(cleanconcat, newclean)
|
||||||
|
|
||||||
|
idx_n = np.random.randint(0, np.size(noisefilenames))
|
||||||
|
noise, fs = audioread(noisefilenames[idx_n])
|
||||||
|
|
||||||
|
if len(noise) >= len(clean):
|
||||||
|
noise = noise[0 : len(clean)]
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
while len(noise) <= len(clean):
|
||||||
|
idx_n = idx_n + 1
|
||||||
|
if idx_n >= np.size(noisefilenames) - 1:
|
||||||
|
idx_n = np.random.randint(0, np.size(noisefilenames))
|
||||||
|
newnoise, fs = audioread(noisefilenames[idx_n])
|
||||||
|
noiseconcat = np.append(
|
||||||
|
noise, np.zeros(int(fs * silence_length))
|
||||||
|
)
|
||||||
|
noise = np.append(noiseconcat, newnoise)
|
||||||
|
noise = noise[0 : len(clean)]
|
||||||
|
filecounter = filecounter + 1
|
||||||
|
|
||||||
|
for i in range(np.size(SNR)):
|
||||||
|
clean_snr, noise_snr, noisy_snr = snr_mixer(
|
||||||
|
clean=clean, noise=noise, snr=SNR[i]
|
||||||
|
)
|
||||||
|
noisyfilename = (
|
||||||
|
"noisy"
|
||||||
|
+ str(filecounter)
|
||||||
|
+ "_SNRdb_"
|
||||||
|
+ str(SNR[i])
|
||||||
|
+ "_clnsp"
|
||||||
|
+ str(filecounter)
|
||||||
|
+ ".wav"
|
||||||
|
)
|
||||||
|
cleanfilename = "clnsp" + str(filecounter) + ".wav"
|
||||||
|
noisefilename = (
|
||||||
|
"noisy" + str(filecounter) + "_SNRdb_" + str(SNR[i]) + ".wav"
|
||||||
|
)
|
||||||
|
noisypath = os.path.join(noisyspeech_dir, noisyfilename)
|
||||||
|
cleanpath = os.path.join(clean_proc_dir, cleanfilename)
|
||||||
|
noisepath = os.path.join(noise_proc_dir, noisefilename)
|
||||||
|
audiowrite(noisy_snr, fs, noisypath, norm=False)
|
||||||
|
audiowrite(clean_snr, fs, cleanpath, norm=False)
|
||||||
|
audiowrite(noise_snr, fs, noisepath, norm=False)
|
||||||
|
num_samples = num_samples + len(noisy_snr)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
# Configurations: read noisyspeech_synthesizer.cfg
|
||||||
|
parser.add_argument(
|
||||||
|
"--cfg",
|
||||||
|
default="noisyspeech_synthesizer.cfg",
|
||||||
|
help="Read noisyspeech_synthesizer.cfg for all the details",
|
||||||
|
)
|
||||||
|
parser.add_argument("--cfg_str", type=str, default="noisy_speech")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
cfgpath = os.path.join(os.path.dirname(__file__), args.cfg)
|
||||||
|
assert os.path.exists(cfgpath), f"No configuration file as [{cfgpath}]"
|
||||||
|
cfg = CP.ConfigParser()
|
||||||
|
cfg._interpolation = CP.ExtendedInterpolation()
|
||||||
|
cfg.read(cfgpath)
|
||||||
|
|
||||||
|
main(cfg._sections[args.cfg_str])
|
||||||
|
|
@ -2,7 +2,6 @@
|
||||||
line-length = 80
|
line-length = 80
|
||||||
target-version = ['py38']
|
target-version = ['py38']
|
||||||
exclude = '''
|
exclude = '''
|
||||||
|
|
||||||
(
|
(
|
||||||
/(
|
/(
|
||||||
\.eggs # exclude a few common directories in the
|
\.eggs # exclude a few common directories in the
|
||||||
|
|
@ -10,6 +9,9 @@ exclude = '''
|
||||||
| \.mypy_cache
|
| \.mypy_cache
|
||||||
| \.tox
|
| \.tox
|
||||||
| \.venv
|
| \.venv
|
||||||
|
| noisyspeech_synthesizer.py
|
||||||
|
| noisyspeech_synthesizer.cfg
|
||||||
|
|
||||||
)/
|
)/
|
||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,19 @@
|
||||||
boto3>=1.24.86
|
# torch>=1.12.1
|
||||||
huggingface-hub>=0.10.0
|
# torchaudio>=0.12.1
|
||||||
hydra-core>=1.2.0
|
# tqdm>=4.64.1
|
||||||
joblib>=1.2.0
|
configparser
|
||||||
librosa>=0.9.2
|
# boto3>=1.24.86
|
||||||
mlflow>=1.29.0
|
# huggingface-hub>=0.10.0
|
||||||
|
# hydra-core>=1.2.0
|
||||||
|
# joblib>=1.2.0
|
||||||
|
# librosa>=0.9.2
|
||||||
|
# mlflow>=1.29.0
|
||||||
numpy>=1.23.3
|
numpy>=1.23.3
|
||||||
pesq==0.0.4
|
# pesq==0.0.4
|
||||||
protobuf>=3.19.6
|
# protobuf>=3.19.6
|
||||||
pystoi==0.3.3
|
# pystoi==0.3.3
|
||||||
pytest-lazy-fixture>=0.6.3
|
# pytest-lazy-fixture>=0.6.3
|
||||||
pytorch-lightning>=1.7.7
|
# pytorch-lightning>=1.7.7
|
||||||
scikit-learn>=1.1.2
|
# scikit-learn>=1.1.2
|
||||||
scipy>=1.9.1
|
scipy>=1.9.1
|
||||||
soundfile>=0.11.0
|
soundfile>=0.11.0
|
||||||
torch>=1.12.1
|
|
||||||
torchaudio>=0.12.1
|
|
||||||
tqdm>=4.64.1
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue