154 lines
5.5 KiB
Python
154 lines
5.5 KiB
Python
"""
|
|
@author: chkarada
|
|
"""
|
|
import argparse
|
|
import configparser as CP
|
|
import glob
|
|
import os
|
|
|
|
import numpy as np
|
|
|
|
from audiolib import audioread, audiowrite, snr_mixer
|
|
|
|
|
|
def main(cfg):
|
|
snr_lower = float(cfg["snr_lower"])
|
|
snr_upper = float(cfg["snr_upper"])
|
|
total_snrlevels = int(cfg["total_snrlevels"])
|
|
|
|
clean_dir = os.path.join(os.path.dirname(__file__), "clean_train")
|
|
if cfg["speech_dir"] != "None":
|
|
clean_dir = cfg["speech_dir"]
|
|
if not os.path.exists(clean_dir):
|
|
assert False, "Clean speech data is required"
|
|
|
|
noise_dir = os.path.join(os.path.dirname(__file__), "noise_train")
|
|
if cfg["noise_dir"] != "None":
|
|
noise_dir = cfg["noise_dir"]
|
|
if not os.path.exists(noise_dir):
|
|
assert False, "Noise data is required"
|
|
name = cfg["naming"]
|
|
fs = float(cfg["sampling_rate"])
|
|
audioformat = cfg["audioformat"]
|
|
total_hours = float(cfg["total_hours"])
|
|
audio_length = float(cfg["audio_length"])
|
|
silence_length = float(cfg["silence_length"])
|
|
noisyspeech_dir = os.path.join(
|
|
os.path.dirname(__file__), f"NoisySpeech_{name}ing"
|
|
)
|
|
if not os.path.exists(noisyspeech_dir):
|
|
os.makedirs(noisyspeech_dir)
|
|
clean_proc_dir = os.path.join(
|
|
os.path.dirname(__file__), f"CleanSpeech_{name}ing"
|
|
)
|
|
if not os.path.exists(clean_proc_dir):
|
|
os.makedirs(clean_proc_dir)
|
|
noise_proc_dir = os.path.join(os.path.dirname(__file__), f"Noise_{name}ing")
|
|
if not os.path.exists(noise_proc_dir):
|
|
os.makedirs(noise_proc_dir)
|
|
|
|
total_secs = total_hours * 60 * 60
|
|
total_samples = int(total_secs * fs)
|
|
audio_length = int(audio_length * fs)
|
|
SNR = np.linspace(snr_lower, snr_upper, total_snrlevels)
|
|
cleanfilenames = glob.glob(os.path.join(clean_dir, audioformat))
|
|
if cfg["noise_types_excluded"] == "None":
|
|
noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
|
|
else:
|
|
filestoexclude = cfg["noise_types_excluded"].split(",")
|
|
noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
|
|
for i in range(len(filestoexclude)):
|
|
noisefilenames = [
|
|
fn
|
|
for fn in noisefilenames
|
|
if not os.path.basename(fn).startswith(filestoexclude[i])
|
|
]
|
|
|
|
filecounter = 0
|
|
num_samples = 0
|
|
|
|
while num_samples < total_samples:
|
|
idx_s = np.random.randint(0, np.size(cleanfilenames))
|
|
clean, fs = audioread(cleanfilenames[idx_s])
|
|
|
|
if len(clean) > audio_length:
|
|
clean = clean
|
|
|
|
else:
|
|
|
|
while len(clean) <= audio_length:
|
|
idx_s = idx_s + 1
|
|
if idx_s >= np.size(cleanfilenames) - 1:
|
|
idx_s = np.random.randint(0, np.size(cleanfilenames))
|
|
newclean, fs = audioread(cleanfilenames[idx_s])
|
|
cleanconcat = np.append(
|
|
clean, np.zeros(int(fs * silence_length))
|
|
)
|
|
clean = np.append(cleanconcat, newclean)
|
|
|
|
idx_n = np.random.randint(0, np.size(noisefilenames))
|
|
noise, fs = audioread(noisefilenames[idx_n])
|
|
|
|
if len(noise) >= len(clean):
|
|
noise = noise[0 : len(clean)]
|
|
|
|
else:
|
|
|
|
while len(noise) <= len(clean):
|
|
idx_n = idx_n + 1
|
|
if idx_n >= np.size(noisefilenames) - 1:
|
|
idx_n = np.random.randint(0, np.size(noisefilenames))
|
|
newnoise, fs = audioread(noisefilenames[idx_n])
|
|
noiseconcat = np.append(
|
|
noise, np.zeros(int(fs * silence_length))
|
|
)
|
|
noise = np.append(noiseconcat, newnoise)
|
|
noise = noise[0 : len(clean)]
|
|
filecounter = filecounter + 1
|
|
|
|
for i in range(np.size(SNR)):
|
|
clean_snr, noise_snr, noisy_snr = snr_mixer(
|
|
clean=clean, noise=noise, snr=SNR[i]
|
|
)
|
|
noisyfilename = (
|
|
"noisy"
|
|
+ str(filecounter)
|
|
+ "_SNRdb_"
|
|
+ str(SNR[i])
|
|
+ "_clnsp"
|
|
+ str(filecounter)
|
|
+ ".wav"
|
|
)
|
|
cleanfilename = "clnsp" + str(filecounter) + ".wav"
|
|
noisefilename = (
|
|
"noisy" + str(filecounter) + "_SNRdb_" + str(SNR[i]) + ".wav"
|
|
)
|
|
noisypath = os.path.join(noisyspeech_dir, noisyfilename)
|
|
cleanpath = os.path.join(clean_proc_dir, cleanfilename)
|
|
noisepath = os.path.join(noise_proc_dir, noisefilename)
|
|
audiowrite(noisy_snr, fs, noisypath, norm=False)
|
|
audiowrite(clean_snr, fs, cleanpath, norm=False)
|
|
audiowrite(noise_snr, fs, noisepath, norm=False)
|
|
num_samples = num_samples + len(noisy_snr)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
|
|
# Configurations: read noisyspeech_synthesizer.cfg
|
|
parser.add_argument(
|
|
"--cfg",
|
|
default="noisyspeech_synthesizer.cfg",
|
|
help="Read noisyspeech_synthesizer.cfg for all the details",
|
|
)
|
|
parser.add_argument("--cfg_str", type=str, default="noisy_speech")
|
|
args = parser.parse_args()
|
|
|
|
cfgpath = os.path.join(os.path.dirname(__file__), args.cfg)
|
|
assert os.path.exists(cfgpath), f"No configuration file as [{cfgpath}]"
|
|
cfg = CP.ConfigParser()
|
|
cfg._interpolation = CP.ExtendedInterpolation()
|
|
cfg.read(cfgpath)
|
|
|
|
main(cfg._sections[args.cfg_str])
|