changes to prep dns 2020

This commit is contained in:
shahules786 2022-10-14 15:20:34 +05:30
parent 6e0f69f575
commit 8d1c057b86
7 changed files with 280 additions and 18 deletions

View File

@ -40,4 +40,5 @@ repos:
- id: end-of-file-fixer
- id: requirements-txt-fixer
- id: mixed-line-ending
exclude: noisyspeech_synthesizer.cfg
args: ['--fix=no']

76
audiolib.py Normal file
View File

@ -0,0 +1,76 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 26 15:54:05 2019
@author: chkarada
"""
import os
import numpy as np
import soundfile as sf
# Function to read audio
def audioread(path, norm=True, start=0, stop=None):
path = os.path.abspath(path)
if not os.path.exists(path):
raise ValueError("[{}] does not exist!".format(path))
try:
x, sr = sf.read(path, start=start, stop=stop)
except RuntimeError: # fix for sph pcm-embedded shortened v2
print("WARNING: Audio type not supported")
if len(x.shape) == 1: # mono
if norm:
rms = (x**2).mean() ** 0.5
scalar = 10 ** (-25 / 20) / (rms)
x = x * scalar
return x, sr
else: # multi-channel
x = x.T
x = x.sum(axis=0) / x.shape[0]
if norm:
rms = (x**2).mean() ** 0.5
scalar = 10 ** (-25 / 20) / (rms)
x = x * scalar
return x, sr
# Funtion to write audio
def audiowrite(data, fs, destpath, norm=False):
if norm:
eps = 0.0
rms = (data**2).mean() ** 0.5
scalar = 10 ** (-25 / 10) / (rms + eps)
data = data * scalar
if max(abs(data)) >= 1:
data = data / max(abs(data), eps)
destpath = os.path.abspath(destpath)
destdir = os.path.dirname(destpath)
if not os.path.exists(destdir):
os.makedirs(destdir)
sf.write(destpath, data, fs)
return
# Function to mix clean speech and noise at various SNR levels
def snr_mixer(clean, noise, snr):
# Normalizing to -25 dB FS
rmsclean = (clean**2).mean() ** 0.5
scalarclean = 10 ** (-25 / 20) / rmsclean
clean = clean * scalarclean
rmsclean = (clean**2).mean() ** 0.5
rmsnoise = (noise**2).mean() ** 0.5
scalarnoise = 10 ** (-25 / 20) / rmsnoise
noise = noise * scalarnoise
rmsnoise = (noise**2).mean() ** 0.5
# Set the noise level for a given SNR
noisescalar = np.sqrt(rmsclean / (10 ** (snr / 20)) / rmsnoise)
noisenewlevel = noise * noisescalar
noisyspeech = clean + noisenewlevel
return clean, noisenewlevel, noisyspeech

View File

@ -34,6 +34,6 @@ pwd
#python transcriber/tasks/embeddings/timit.py --directory /scratch/$USER/TIMIT/data/lisa/data/timit/raw/TIMIT/TRAIN --output ./data/train
#python transcriber/tasks/embeddings/timit.py --directory /scratch/$USER/TIMIT/data/lisa/data/timit/raw/TIMIT/TEST --output ./data/test
python noisyspeech_synthesizer.py
echo "Start Training..."
python enhancer/cli/train.py
#python enhancer/cli/train.py

View File

@ -0,0 +1,29 @@
# Configuration for generating Noisy Speech Dataset
# - sampling_rate: Specify the sampling rate. Default is 16 kHz
# - audioformat: default is .wav
# - audio_length: Minimum Length of each audio clip (noisy and clean speech) in seconds that will be generated by augmenting utterances.
# - silence_length: Duration of silence introduced between clean speech utterances.
# - total_hours: Total number of hours of data required. Units are in hours.
# - snr_lower: Lower bound for SNR required (default: 0 dB)
# - snr_upper: Upper bound for SNR required (default: 40 dB)
# - total_snrlevels: Number of SNR levels required (default: 5, which means there are 5 levels between snr_lower and snr_upper)
# - noise_dir: Default is None. But specify the noise directory path if noise files are not in the source directory
# - Speech_dir: Default is None. But specify the speech directory path if speech files are not in the source directory
# - noise_types_excluded: Noise files starting with the following tags to be excluded in the noise list. Example: noise_types_excluded: Babble, AirConditioner
# Specify 'None' if no noise files to be excluded.
[noisy_speech]
sampling_rate: 16000
audioformat: *.wav
audio_length: 10
silence_length: 0.2
total_hours: 20
snr_lower: 0
snr_upper: 40
total_snrlevels: 5
noise_dir: /scratch/c.sistc3/MS-SNSD/noise_train
speech_dir: /scratch/c.sistc3/MS-SNSD/clean_train
noise_types_excluded: None

153
noisyspeech_synthesizer.py Normal file
View File

@ -0,0 +1,153 @@
"""
@author: chkarada
"""
import argparse
import configparser as CP
import glob
import os
import numpy as np
from audiolib import audioread, audiowrite, snr_mixer
def main(cfg):
snr_lower = float(cfg["snr_lower"])
snr_upper = float(cfg["snr_upper"])
total_snrlevels = int(cfg["total_snrlevels"])
clean_dir = os.path.join(os.path.dirname(__file__), "clean_train")
if cfg["speech_dir"] != "None":
clean_dir = cfg["speech_dir"]
if not os.path.exists(clean_dir):
assert False, "Clean speech data is required"
noise_dir = os.path.join(os.path.dirname(__file__), "noise_train")
if cfg["noise_dir"] != "None":
noise_dir = cfg["noise_dir"]
if not os.path.exists(noise_dir):
assert False, "Noise data is required"
fs = float(cfg["sampling_rate"])
audioformat = cfg["audioformat"]
total_hours = float(cfg["total_hours"])
audio_length = float(cfg["audio_length"])
silence_length = float(cfg["silence_length"])
noisyspeech_dir = os.path.join(
os.path.dirname(__file__), "NoisySpeech_training"
)
if not os.path.exists(noisyspeech_dir):
os.makedirs(noisyspeech_dir)
clean_proc_dir = os.path.join(
os.path.dirname(__file__), "CleanSpeech_training"
)
if not os.path.exists(clean_proc_dir):
os.makedirs(clean_proc_dir)
noise_proc_dir = os.path.join(os.path.dirname(__file__), "Noise_training")
if not os.path.exists(noise_proc_dir):
os.makedirs(noise_proc_dir)
total_secs = total_hours * 60 * 60
total_samples = int(total_secs * fs)
audio_length = int(audio_length * fs)
SNR = np.linspace(snr_lower, snr_upper, total_snrlevels)
cleanfilenames = glob.glob(os.path.join(clean_dir, audioformat))
if cfg["noise_types_excluded"] == "None":
noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
else:
filestoexclude = cfg["noise_types_excluded"].split(",")
noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
for i in range(len(filestoexclude)):
noisefilenames = [
fn
for fn in noisefilenames
if not os.path.basename(fn).startswith(filestoexclude[i])
]
filecounter = 0
num_samples = 0
while num_samples < total_samples:
idx_s = np.random.randint(0, np.size(cleanfilenames))
clean, fs = audioread(cleanfilenames[idx_s])
if len(clean) > audio_length:
clean = clean
else:
while len(clean) <= audio_length:
idx_s = idx_s + 1
if idx_s >= np.size(cleanfilenames) - 1:
idx_s = np.random.randint(0, np.size(cleanfilenames))
newclean, fs = audioread(cleanfilenames[idx_s])
cleanconcat = np.append(
clean, np.zeros(int(fs * silence_length))
)
clean = np.append(cleanconcat, newclean)
idx_n = np.random.randint(0, np.size(noisefilenames))
noise, fs = audioread(noisefilenames[idx_n])
if len(noise) >= len(clean):
noise = noise[0 : len(clean)]
else:
while len(noise) <= len(clean):
idx_n = idx_n + 1
if idx_n >= np.size(noisefilenames) - 1:
idx_n = np.random.randint(0, np.size(noisefilenames))
newnoise, fs = audioread(noisefilenames[idx_n])
noiseconcat = np.append(
noise, np.zeros(int(fs * silence_length))
)
noise = np.append(noiseconcat, newnoise)
noise = noise[0 : len(clean)]
filecounter = filecounter + 1
for i in range(np.size(SNR)):
clean_snr, noise_snr, noisy_snr = snr_mixer(
clean=clean, noise=noise, snr=SNR[i]
)
noisyfilename = (
"noisy"
+ str(filecounter)
+ "_SNRdb_"
+ str(SNR[i])
+ "_clnsp"
+ str(filecounter)
+ ".wav"
)
cleanfilename = "clnsp" + str(filecounter) + ".wav"
noisefilename = (
"noisy" + str(filecounter) + "_SNRdb_" + str(SNR[i]) + ".wav"
)
noisypath = os.path.join(noisyspeech_dir, noisyfilename)
cleanpath = os.path.join(clean_proc_dir, cleanfilename)
noisepath = os.path.join(noise_proc_dir, noisefilename)
audiowrite(noisy_snr, fs, noisypath, norm=False)
audiowrite(clean_snr, fs, cleanpath, norm=False)
audiowrite(noise_snr, fs, noisepath, norm=False)
num_samples = num_samples + len(noisy_snr)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
# Configurations: read noisyspeech_synthesizer.cfg
parser.add_argument(
"--cfg",
default="noisyspeech_synthesizer.cfg",
help="Read noisyspeech_synthesizer.cfg for all the details",
)
parser.add_argument("--cfg_str", type=str, default="noisy_speech")
args = parser.parse_args()
cfgpath = os.path.join(os.path.dirname(__file__), args.cfg)
assert os.path.exists(cfgpath), f"No configuration file as [{cfgpath}]"
cfg = CP.ConfigParser()
cfg._interpolation = CP.ExtendedInterpolation()
cfg.read(cfgpath)
main(cfg._sections[args.cfg_str])

View File

@ -2,7 +2,6 @@
line-length = 80
target-version = ['py38']
exclude = '''
(
/(
\.eggs # exclude a few common directories in the
@ -10,6 +9,9 @@ exclude = '''
| \.mypy_cache
| \.tox
| \.venv
| noisyspeech_synthesizer.py
| noisyspeech_synthesizer.cfg
)/
)
'''

View File

@ -1,18 +1,19 @@
boto3>=1.24.86
huggingface-hub>=0.10.0
hydra-core>=1.2.0
joblib>=1.2.0
librosa>=0.9.2
mlflow>=1.29.0
# torch>=1.12.1
# torchaudio>=0.12.1
# tqdm>=4.64.1
configparser
# boto3>=1.24.86
# huggingface-hub>=0.10.0
# hydra-core>=1.2.0
# joblib>=1.2.0
# librosa>=0.9.2
# mlflow>=1.29.0
numpy>=1.23.3
pesq==0.0.4
protobuf>=3.19.6
pystoi==0.3.3
pytest-lazy-fixture>=0.6.3
pytorch-lightning>=1.7.7
scikit-learn>=1.1.2
# pesq==0.0.4
# protobuf>=3.19.6
# pystoi==0.3.3
# pytest-lazy-fixture>=0.6.3
# pytorch-lightning>=1.7.7
# scikit-learn>=1.1.2
scipy>=1.9.1
soundfile>=0.11.0
torch>=1.12.1
torchaudio>=0.12.1
tqdm>=4.64.1