Compare commits

...

72 Commits

Author SHA1 Message Date
shahules786 31cd404e03 rmv mdkir 2022-11-20 19:58:20 +05:30
shahules786 8cf8dd9717 20hrs 2022-11-20 19:56:56 +05:30
shahules786 c2b2b83fd5 20hrs 2022-11-20 19:16:54 +05:30
shahules786 bcb94d5f34 20hrs 2022-11-20 19:11:41 +05:30
shahules786 e0abb458d5 15hrs 2022-11-03 10:40:22 +05:30
shahules786 e322c6280d 15hrs 2022-11-03 09:52:18 +05:30
shahules786 7548647dfb fix cp 2022-11-02 16:52:58 +05:30
shahules786 1035fbb236 15hrs 2022-11-02 10:50:38 +05:30
shahules786 a06c0a3865 generate test 1hr 2022-10-28 09:55:25 +05:30
shahules786 64f52fe010 redo data 2022-10-27 21:39:33 +05:30
shahules786 078d3eb244 fix num hrs 2022-10-17 15:01:04 +05:30
shahules786 629adf0232 dns 30 hrs demucs 2022-10-17 11:05:55 +05:30
shahules786 501948e866 dns 30 hrs demucs 2022-10-17 11:04:43 +05:30
shahules786 aa61056376 generate 1hrs 2022-10-16 11:44:22 +05:30
shahules786 187e48d125 generate 20hrs 2022-10-16 11:20:01 +05:30
shahules786 7882b8cca3 generate test 2022-10-15 11:46:52 +05:30
shahules786 789da44114 generate test 2022-10-15 11:08:14 +05:30
shahules786 78138c5f93 generate test 2022-10-15 10:13:34 +05:30
shahules786 b8a05c775c mv files 2022-10-14 16:44:37 +05:30
shahules786 2b68598d7b print files 2022-10-14 15:23:22 +05:30
shahules786 29a432540e print files 2022-10-14 15:22:37 +05:30
shahules786 8d1c057b86 changes to prep dns 2020 2022-10-14 15:20:34 +05:30
shahules786 6e0f69f575 Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-14 12:46:48 +05:30
shahules786 0e58691a2c demucs 250 2022-10-14 12:45:34 +05:30
shahules786 807f4b93ea Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-14 12:43:47 +05:30
shahules786 315d646347 Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-14 11:32:59 +05:30
shahules786 f34e49e341 WaveUnet 2022-10-14 11:15:16 +05:30
shahules786 fa47860f57 set BS to 256 2022-10-14 11:12:16 +05:30
shahules786 f7eb0a600c 500 epochs 2022-10-14 10:47:20 +05:30
shahules786 ba2d00648c demucs 100 epochs 2022-10-13 10:57:24 +05:30
shahules786 8a55a77640 run 100 epochs 2022-10-13 10:52:22 +05:30
shahules786 94a4ea38ed Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-13 10:50:59 +05:30
shahules786 8d25b0ed79 reduce epochs 2022-10-12 20:27:05 +05:30
shahules786 09ba645315 fix logging 2022-10-12 20:23:55 +05:30
shahules786 8906496366 waveunet 500 epochs 2022-10-12 10:49:00 +05:30
shahules786 e4a2eb7844 Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-12 10:32:52 +05:30
shahules786 8a6af87627 pesq 2022-10-11 21:56:55 +05:30
shahules786 5a392332ba ensure 2 gpus 2022-10-11 21:56:35 +05:30
shahules786 f66a5236e1 Revert "demucs"
This reverts commit d415bb0c59.
2022-10-11 21:54:47 +05:30
shahules786 d415bb0c59 demucs 2022-10-11 21:41:19 +05:30
shahules786 8c1524a998 500 epochs 2022-10-11 21:38:27 +05:30
shahules786 7161f84a27 Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-11 21:36:59 +05:30
shahules786 2c79e60a85 params 2022-10-11 21:33:19 +05:30
shahules786 41ee2fce0b Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-11 21:30:40 +05:30
shahules786 0c5db496e2 run waveunet 2022-10-11 16:51:41 +05:30
shahules786 031221b79e merge dev 2022-10-11 16:50:09 +05:30
shahules786 50062eaf40 rmv inplace operation 2022-10-11 15:10:34 +05:30
shahules786 0b02b73094 run demucs 32 2022-10-11 11:12:44 +05:30
shahules786 2ccc2822cd Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-11 11:12:02 +05:30
shahules786 1667de624e min settings 2022-10-10 21:04:43 +05:30
shahules786 32579b7a39 Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-10 21:04:01 +05:30
shahules786 bb68e9e4eb demucs 2022-10-10 16:48:40 +05:30
shahules786 a21ef707ad ensure gpu 2022-10-10 15:59:48 +05:30
shahules786 81c5f13ff6 log metric 2022-10-10 15:32:37 +05:30
shahules786 a417e226f3 testrun for metrics 2022-10-10 12:49:41 +05:30
shahules786 5d8f49d78e Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-10 12:48:11 +05:30
shahules786 14156743f9 Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-08 11:04:32 +05:30
shahules786 845575a2ad config 2022-10-08 10:18:22 +05:30
shahules786 c9b78b0e73 Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-08 10:12:38 +05:30
shahules786 3068476512 reduce batch_size 2022-10-08 09:59:23 +05:30
shahules786 ffb364196e increase sr 2022-10-07 11:32:33 +05:30
shahules786 52cefcb962 run demucs 2022-10-07 10:56:14 +05:30
shahules786 61923f6d68 config 2022-10-07 10:46:06 +05:30
shahules786 e90efe3163 Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-07 10:43:34 +05:30
shahules786 aa043aaf40 rmv max_steps 2022-10-06 11:52:05 +05:30
shahules786 4f6ccadf4b Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-06 11:49:40 +05:30
shahules786 0e982cd493 Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-06 10:33:26 +05:30
shahules786 0787d946da decrease epochs 2022-10-06 10:21:07 +05:30
shahules786 e06ba07889 Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-06 10:19:38 +05:30
shahules786 741fd7b87c run cli 2022-10-06 09:55:01 +05:30
shahules786 a064151e2e Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk 2022-10-06 09:54:14 +05:30
shahules786 25557757c7 inc epochs 2022-10-03 21:26:59 +05:30
12 changed files with 314 additions and 37 deletions

View File

@ -40,4 +40,5 @@ repos:
- id: end-of-file-fixer
- id: requirements-txt-fixer
- id: mixed-line-ending
exclude: noisyspeech_synthesizer.cfg
args: ['--fix=no']

76
audiolib.py Normal file
View File

@ -0,0 +1,76 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 26 15:54:05 2019
@author: chkarada
"""
import os
import numpy as np
import soundfile as sf
# Function to read audio
def audioread(path, norm=True, start=0, stop=None):
path = os.path.abspath(path)
if not os.path.exists(path):
raise ValueError("[{}] does not exist!".format(path))
try:
x, sr = sf.read(path, start=start, stop=stop)
except RuntimeError: # fix for sph pcm-embedded shortened v2
print("WARNING: Audio type not supported")
if len(x.shape) == 1: # mono
if norm:
rms = (x**2).mean() ** 0.5
scalar = 10 ** (-25 / 20) / (rms)
x = x * scalar
return x, sr
else: # multi-channel
x = x.T
x = x.sum(axis=0) / x.shape[0]
if norm:
rms = (x**2).mean() ** 0.5
scalar = 10 ** (-25 / 20) / (rms)
x = x * scalar
return x, sr
# Funtion to write audio
def audiowrite(data, fs, destpath, norm=False):
if norm:
eps = 0.0
rms = (data**2).mean() ** 0.5
scalar = 10 ** (-25 / 10) / (rms + eps)
data = data * scalar
if max(abs(data)) >= 1:
data = data / max(abs(data), eps)
destpath = os.path.abspath(destpath)
destdir = os.path.dirname(destpath)
if not os.path.exists(destdir):
os.makedirs(destdir)
sf.write(destpath, data, fs)
return
# Function to mix clean speech and noise at various SNR levels
def snr_mixer(clean, noise, snr):
# Normalizing to -25 dB FS
rmsclean = (clean**2).mean() ** 0.5
scalarclean = 10 ** (-25 / 20) / rmsclean
clean = clean * scalarclean
rmsclean = (clean**2).mean() ** 0.5
rmsnoise = (noise**2).mean() ** 0.5
scalarnoise = 10 ** (-25 / 20) / rmsnoise
noise = noise * scalarnoise
rmsnoise = (noise**2).mean() ** 0.5
# Set the noise level for a given SNR
noisescalar = np.sqrt(rmsclean / (10 ** (snr / 20)) / rmsnoise)
noisenewlevel = noise * noisescalar
noisyspeech = clean + noisenewlevel
return clean, noisenewlevel, noisyspeech

View File

@ -1,7 +1,7 @@
defaults:
- model : WaveUnet
- model : Demucs
- dataset : Vctk
- optimizer : Adam
- hyperparameters : default
- trainer : default
- trainer : fastrun_dev
- mlflow : experiment

View File

@ -2,10 +2,9 @@ _target_: enhancer.data.dataset.EnhancerDataset
root_dir : /Users/shahules/Myprojects/enhancer/datasets/vctk_test
name : dns-2020
duration : 1.0
sampling_rate: 16000
sampling_rate: 8000
batch_size: 32
files:
root_dir : /Users/shahules/Myprojects/enhancer/datasets/vctk_test
train_clean : clean_test_wav
test_clean : clean_test_wav
train_noisy : clean_test_wav

View File

@ -1,9 +1,10 @@
_target_: enhancer.data.dataset.EnhancerDataset
name : vctk
root_dir : /scratch/c.sistc3/DS_10283_2791
duration : 1.0
duration : 1.5
sampling_rate: 16000
batch_size: 128
batch_size: 256
valid_size : 0.05
files:
train_clean : clean_trainset_28spk_wav

View File

@ -1,7 +1,7 @@
loss : mse
metric : mae
lr : 0.0001
ReduceLr_patience : 5
ReduceLr_factor : 0.1
min_lr : 0.000001
metric : [stoi,pesq,si-sdr]
lr : 0.001
ReduceLr_patience : 10
ReduceLr_factor : 0.5
min_lr : 0.00
EarlyStopping_factor : 10

View File

@ -1,15 +1,15 @@
_target_: pytorch_lightning.Trainer
accelerator: auto
accelerator: gpu
accumulate_grad_batches: 1
amp_backend: native
auto_lr_find: True
auto_lr_find: False
auto_scale_batch_size: False
auto_select_gpus: True
benchmark: False
check_val_every_n_epoch: 1
detect_anomaly: False
deterministic: False
devices: -1
devices: 2
enable_checkpointing: True
enable_model_summary: True
enable_progress_bar: True
@ -22,9 +22,8 @@ limit_predict_batches: 1.0
limit_test_batches: 1.0
limit_train_batches: 1.0
limit_val_batches: 1.0
log_every_n_steps: 50
max_epochs: 3
max_steps: -1
log_every_n_steps: 100
max_epochs: 250
max_time: null
min_epochs: 1
min_steps: null

View File

@ -32,8 +32,21 @@ echo "Making temp dir"
mkdir temp
pwd
#python transcriber/tasks/embeddings/timit.py --directory /scratch/$USER/TIMIT/data/lisa/data/timit/raw/TIMIT/TRAIN --output ./data/train
#python transcriber/tasks/embeddings/timit.py --directory /scratch/$USER/TIMIT/data/lisa/data/timit/raw/TIMIT/TEST --output ./data/test
# echo "files"
# rm -rf /scratch/c.sistc3/MS-SNSD/DNS30/CleanSpeech_training
# rm -rf /scratch/c.sistc3/MS-SNSD/DNS30/NoisySpeech_training
# rm -rf /scratch/c.sistc3/MS-SNSD/DNS30/NoisySpeech_testing
# rm -rf /scratch/c.sistc3/MS-SNSD/DNS30/CleanSpeech_testing
echo "Start Training..."
python enhancer/cli/train.py
# cp -r /scratch/c.sistc3/MS-SNSD/DNS30/NoisySpeech_testing /scratch/c.sistc3/MS-SNSD/DNS15/
# cp -r /scratch/c.sistc3/MS-SNSD/DNS30/CleanSpeech_testing /scratch/c.sistc3/MS-SNSD/DNS15/
# rm -rf /scratch/c.sistc3/MS-SNSD/DNS20
# mkdir /scratch/c.sistc3/MS-SNSD/DNS20
python noisyspeech_synthesizer.py
mv ./CleanSpeech_testing/ /scratch/c.sistc3/MS-SNSD/DNS20
mv ./NoisySpeech_testing/ /scratch/c.sistc3/MS-SNSD/DNS20
ls /scratch/c.sistc3/MS-SNSD/DNS20
#python enhancer/cli/train.py

View File

@ -0,0 +1,30 @@
# Configuration for generating Noisy Speech Dataset
# - sampling_rate: Specify the sampling rate. Default is 16 kHz
# - audioformat: default is .wav
# - audio_length: Minimum Length of each audio clip (noisy and clean speech) in seconds that will be generated by augmenting utterances.
# - silence_length: Duration of silence introduced between clean speech utterances.
# - total_hours: Total number of hours of data required. Units are in hours.
# - snr_lower: Lower bound for SNR required (default: 0 dB)
# - snr_upper: Upper bound for SNR required (default: 40 dB)
# - total_snrlevels: Number of SNR levels required (default: 5, which means there are 5 levels between snr_lower and snr_upper)
# - noise_dir: Default is None. But specify the noise directory path if noise files are not in the source directory
# - Speech_dir: Default is None. But specify the speech directory path if speech files are not in the source directory
# - noise_types_excluded: Noise files starting with the following tags to be excluded in the noise list. Example: noise_types_excluded: Babble, AirConditioner
# Specify 'None' if no noise files to be excluded.
[noisy_speech]
sampling_rate: 16000
audioformat: *.wav
audio_length: 10
silence_length: 0.2
total_hours: 1
snr_lower: 0
snr_upper: 40
total_snrlevels: 2
naming: test
noise_dir: /scratch/c.sistc3/MS-SNSD/noise_test
speech_dir: /scratch/c.sistc3/MS-SNSD/clean_test
noise_types_excluded: None

155
noisyspeech_synthesizer.py Normal file
View File

@ -0,0 +1,155 @@
"""
@author: chkarada
"""
import argparse
import configparser as CP
import glob
import os
import numpy as np
from audiolib import audioread, audiowrite, snr_mixer
def main(cfg):
snr_lower = float(cfg["snr_lower"])
snr_upper = float(cfg["snr_upper"])
total_snrlevels = int(cfg["total_snrlevels"])
clean_dir = os.path.join(os.path.dirname(__file__), "clean_train")
if cfg["speech_dir"] != "None":
clean_dir = cfg["speech_dir"]
if not os.path.exists(clean_dir):
assert False, "Clean speech data is required"
noise_dir = os.path.join(os.path.dirname(__file__), "noise_train")
if cfg["noise_dir"] != "None":
noise_dir = cfg["noise_dir"]
if not os.path.exists(noise_dir):
assert False, "Noise data is required"
name = cfg["naming"]
fs = float(cfg["sampling_rate"])
audioformat = cfg["audioformat"]
total_hours = float(cfg["total_hours"])
audio_length = float(cfg["audio_length"])
silence_length = float(cfg["silence_length"])
noisyspeech_dir = os.path.join(
os.path.dirname(__file__), f"NoisySpeech_{name}ing"
)
if not os.path.exists(noisyspeech_dir):
os.makedirs(noisyspeech_dir)
clean_proc_dir = os.path.join(
os.path.dirname(__file__), f"CleanSpeech_{name}ing"
)
if not os.path.exists(clean_proc_dir):
os.makedirs(clean_proc_dir)
noise_proc_dir = os.path.join(
os.path.dirname(__file__), f"NoisySpeech_{name}ing"
)
if not os.path.exists(noise_proc_dir):
os.makedirs(noise_proc_dir)
total_secs = total_hours * 60 * 60
total_samples = int(total_secs * fs)
audio_length = int(audio_length * fs)
SNR = np.linspace(snr_lower, snr_upper, total_snrlevels)
cleanfilenames = glob.glob(os.path.join(clean_dir, audioformat))
if cfg["noise_types_excluded"] == "None":
noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
else:
filestoexclude = cfg["noise_types_excluded"].split(",")
noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
for i in range(len(filestoexclude)):
noisefilenames = [
fn
for fn in noisefilenames
if not os.path.basename(fn).startswith(filestoexclude[i])
]
filecounter = 0
num_samples = 0
while num_samples < total_samples:
idx_s = np.random.randint(0, np.size(cleanfilenames))
clean, fs = audioread(cleanfilenames[idx_s])
if len(clean) > audio_length:
clean = clean
else:
while len(clean) <= audio_length:
idx_s = idx_s + 1
if idx_s >= np.size(cleanfilenames) - 1:
idx_s = np.random.randint(0, np.size(cleanfilenames))
newclean, fs = audioread(cleanfilenames[idx_s])
cleanconcat = np.append(
clean, np.zeros(int(fs * silence_length))
)
clean = np.append(cleanconcat, newclean)
idx_n = np.random.randint(0, np.size(noisefilenames))
noise, fs = audioread(noisefilenames[idx_n])
if len(noise) >= len(clean):
noise = noise[0 : len(clean)]
else:
while len(noise) <= len(clean):
idx_n = idx_n + 1
if idx_n >= np.size(noisefilenames) - 1:
idx_n = np.random.randint(0, np.size(noisefilenames))
newnoise, fs = audioread(noisefilenames[idx_n])
noiseconcat = np.append(
noise, np.zeros(int(fs * silence_length))
)
noise = np.append(noiseconcat, newnoise)
noise = noise[0 : len(clean)]
filecounter = filecounter + 1
for i in range(np.size(SNR)):
clean_snr, noise_snr, noisy_snr = snr_mixer(
clean=clean, noise=noise, snr=SNR[i]
)
noisyfilename = (
"noisy"
+ str(filecounter)
+ "_SNRdb_"
+ str(SNR[i])
+ "_clnsp"
+ str(filecounter)
+ ".wav"
)
cleanfilename = "clnsp" + str(filecounter) + ".wav"
noisefilename = (
"noisy" + str(filecounter) + "_SNRdb_" + str(SNR[i]) + ".wav"
)
noisypath = os.path.join(noisyspeech_dir, noisyfilename)
cleanpath = os.path.join(clean_proc_dir, cleanfilename)
noisepath = os.path.join(noise_proc_dir, noisefilename)
audiowrite(noisy_snr, fs, noisypath, norm=False)
audiowrite(clean_snr, fs, cleanpath, norm=False)
audiowrite(noise_snr, fs, noisepath, norm=False)
num_samples = num_samples + len(noisy_snr)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
# Configurations: read noisyspeech_synthesizer.cfg
parser.add_argument(
"--cfg",
default="noisyspeech_synthesizer.cfg",
help="Read noisyspeech_synthesizer.cfg for all the details",
)
parser.add_argument("--cfg_str", type=str, default="noisy_speech")
args = parser.parse_args()
cfgpath = os.path.join(os.path.dirname(__file__), args.cfg)
assert os.path.exists(cfgpath), f"No configuration file as [{cfgpath}]"
cfg = CP.ConfigParser()
cfg._interpolation = CP.ExtendedInterpolation()
cfg.read(cfgpath)
main(cfg._sections[args.cfg_str])

View File

@ -2,7 +2,6 @@
line-length = 80
target-version = ['py38']
exclude = '''
(
/(
\.eggs # exclude a few common directories in the
@ -10,6 +9,9 @@ exclude = '''
| \.mypy_cache
| \.tox
| \.venv
| noisyspeech_synthesizer.py
| noisyspeech_synthesizer.cfg
)/
)
'''

View File

@ -1,18 +1,19 @@
boto3>=1.24.86
huggingface-hub>=0.10.0
hydra-core>=1.2.0
joblib>=1.2.0
librosa>=0.9.2
mlflow>=1.29.0
# torch>=1.12.1
# torchaudio>=0.12.1
# tqdm>=4.64.1
configparser
# boto3>=1.24.86
# huggingface-hub>=0.10.0
# hydra-core>=1.2.0
# joblib>=1.2.0
# librosa>=0.9.2
# mlflow>=1.29.0
numpy>=1.23.3
pesq==0.0.4
protobuf>=3.19.6
pystoi==0.3.3
pytest-lazy-fixture>=0.6.3
pytorch-lightning>=1.7.7
scikit-learn>=1.1.2
# pesq==0.0.4
# protobuf>=3.19.6
# pystoi==0.3.3
# pytest-lazy-fixture>=0.6.3
# pytorch-lightning>=1.7.7
# scikit-learn>=1.1.2
scipy>=1.9.1
soundfile>=0.11.0
torch>=1.12.1
torchaudio>=0.12.1
tqdm>=4.64.1