mayavoz/noisyspeech_synthesizer.py

156 lines
5.5 KiB
Python

"""
@author: chkarada
"""
import argparse
import configparser as CP
import glob
import os
import numpy as np
from audiolib import audioread, audiowrite, snr_mixer
def main(cfg):
snr_lower = float(cfg["snr_lower"])
snr_upper = float(cfg["snr_upper"])
total_snrlevels = int(cfg["total_snrlevels"])
clean_dir = os.path.join(os.path.dirname(__file__), "clean_train")
if cfg["speech_dir"] != "None":
clean_dir = cfg["speech_dir"]
if not os.path.exists(clean_dir):
assert False, "Clean speech data is required"
noise_dir = os.path.join(os.path.dirname(__file__), "noise_train")
if cfg["noise_dir"] != "None":
noise_dir = cfg["noise_dir"]
if not os.path.exists(noise_dir):
assert False, "Noise data is required"
name = cfg["naming"]
fs = float(cfg["sampling_rate"])
audioformat = cfg["audioformat"]
total_hours = float(cfg["total_hours"])
audio_length = float(cfg["audio_length"])
silence_length = float(cfg["silence_length"])
noisyspeech_dir = os.path.join(
os.path.dirname(__file__), f"NoisySpeech_{name}ing"
)
if not os.path.exists(noisyspeech_dir):
os.makedirs(noisyspeech_dir)
clean_proc_dir = os.path.join(
os.path.dirname(__file__), f"CleanSpeech_{name}ing"
)
if not os.path.exists(clean_proc_dir):
os.makedirs(clean_proc_dir)
noise_proc_dir = os.path.join(
os.path.dirname(__file__), f"NoisySpeech_{name}ing"
)
if not os.path.exists(noise_proc_dir):
os.makedirs(noise_proc_dir)
total_secs = total_hours * 60 * 60
total_samples = int(total_secs * fs)
audio_length = int(audio_length * fs)
SNR = np.linspace(snr_lower, snr_upper, total_snrlevels)
cleanfilenames = glob.glob(os.path.join(clean_dir, audioformat))
if cfg["noise_types_excluded"] == "None":
noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
else:
filestoexclude = cfg["noise_types_excluded"].split(",")
noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
for i in range(len(filestoexclude)):
noisefilenames = [
fn
for fn in noisefilenames
if not os.path.basename(fn).startswith(filestoexclude[i])
]
filecounter = 0
num_samples = 0
while num_samples < total_samples:
idx_s = np.random.randint(0, np.size(cleanfilenames))
clean, fs = audioread(cleanfilenames[idx_s])
if len(clean) > audio_length:
clean = clean
else:
while len(clean) <= audio_length:
idx_s = idx_s + 1
if idx_s >= np.size(cleanfilenames) - 1:
idx_s = np.random.randint(0, np.size(cleanfilenames))
newclean, fs = audioread(cleanfilenames[idx_s])
cleanconcat = np.append(
clean, np.zeros(int(fs * silence_length))
)
clean = np.append(cleanconcat, newclean)
idx_n = np.random.randint(0, np.size(noisefilenames))
noise, fs = audioread(noisefilenames[idx_n])
if len(noise) >= len(clean):
noise = noise[0 : len(clean)]
else:
while len(noise) <= len(clean):
idx_n = idx_n + 1
if idx_n >= np.size(noisefilenames) - 1:
idx_n = np.random.randint(0, np.size(noisefilenames))
newnoise, fs = audioread(noisefilenames[idx_n])
noiseconcat = np.append(
noise, np.zeros(int(fs * silence_length))
)
noise = np.append(noiseconcat, newnoise)
noise = noise[0 : len(clean)]
filecounter = filecounter + 1
for i in range(np.size(SNR)):
clean_snr, noise_snr, noisy_snr = snr_mixer(
clean=clean, noise=noise, snr=SNR[i]
)
noisyfilename = (
"noisy"
+ str(filecounter)
+ "_SNRdb_"
+ str(SNR[i])
+ "_clnsp"
+ str(filecounter)
+ ".wav"
)
cleanfilename = "clnsp" + str(filecounter) + ".wav"
noisefilename = (
"noisy" + str(filecounter) + "_SNRdb_" + str(SNR[i]) + ".wav"
)
noisypath = os.path.join(noisyspeech_dir, noisyfilename)
cleanpath = os.path.join(clean_proc_dir, cleanfilename)
noisepath = os.path.join(noise_proc_dir, noisefilename)
audiowrite(noisy_snr, fs, noisypath, norm=False)
audiowrite(clean_snr, fs, cleanpath, norm=False)
audiowrite(noise_snr, fs, noisepath, norm=False)
num_samples = num_samples + len(noisy_snr)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
# Configurations: read noisyspeech_synthesizer.cfg
parser.add_argument(
"--cfg",
default="noisyspeech_synthesizer.cfg",
help="Read noisyspeech_synthesizer.cfg for all the details",
)
parser.add_argument("--cfg_str", type=str, default="noisy_speech")
args = parser.parse_args()
cfgpath = os.path.join(os.path.dirname(__file__), args.cfg)
assert os.path.exists(cfgpath), f"No configuration file as [{cfgpath}]"
cfg = CP.ConfigParser()
cfg._interpolation = CP.ExtendedInterpolation()
cfg.read(cfgpath)
main(cfg._sections[args.cfg_str])