156 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			156 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			Python
		
	
	
	
| """
 | |
| @author: chkarada
 | |
| """
 | |
| import argparse
 | |
| import configparser as CP
 | |
| import glob
 | |
| import os
 | |
| 
 | |
| import numpy as np
 | |
| 
 | |
| from audiolib import audioread, audiowrite, snr_mixer
 | |
| 
 | |
| 
 | |
| def main(cfg):
 | |
|     snr_lower = float(cfg["snr_lower"])
 | |
|     snr_upper = float(cfg["snr_upper"])
 | |
|     total_snrlevels = int(cfg["total_snrlevels"])
 | |
| 
 | |
|     clean_dir = os.path.join(os.path.dirname(__file__), "clean_train")
 | |
|     if cfg["speech_dir"] != "None":
 | |
|         clean_dir = cfg["speech_dir"]
 | |
|     if not os.path.exists(clean_dir):
 | |
|         assert False, "Clean speech data is required"
 | |
| 
 | |
|     noise_dir = os.path.join(os.path.dirname(__file__), "noise_train")
 | |
|     if cfg["noise_dir"] != "None":
 | |
|         noise_dir = cfg["noise_dir"]
 | |
|     if not os.path.exists(noise_dir):
 | |
|         assert False, "Noise data is required"
 | |
|     name = cfg["naming"]
 | |
|     fs = float(cfg["sampling_rate"])
 | |
|     audioformat = cfg["audioformat"]
 | |
|     total_hours = float(cfg["total_hours"])
 | |
|     audio_length = float(cfg["audio_length"])
 | |
|     silence_length = float(cfg["silence_length"])
 | |
|     noisyspeech_dir = os.path.join(
 | |
|         os.path.dirname(__file__), f"NoisySpeech_{name}ing"
 | |
|     )
 | |
|     if not os.path.exists(noisyspeech_dir):
 | |
|         os.makedirs(noisyspeech_dir)
 | |
|     clean_proc_dir = os.path.join(
 | |
|         os.path.dirname(__file__), f"CleanSpeech_{name}ing"
 | |
|     )
 | |
|     if not os.path.exists(clean_proc_dir):
 | |
|         os.makedirs(clean_proc_dir)
 | |
|     noise_proc_dir = os.path.join(
 | |
|         os.path.dirname(__file__), f"NoisySpeech_{name}ing"
 | |
|     )
 | |
|     if not os.path.exists(noise_proc_dir):
 | |
|         os.makedirs(noise_proc_dir)
 | |
| 
 | |
|     total_secs = total_hours * 60 * 60
 | |
|     total_samples = int(total_secs * fs)
 | |
|     audio_length = int(audio_length * fs)
 | |
|     SNR = np.linspace(snr_lower, snr_upper, total_snrlevels)
 | |
|     cleanfilenames = glob.glob(os.path.join(clean_dir, audioformat))
 | |
|     if cfg["noise_types_excluded"] == "None":
 | |
|         noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
 | |
|     else:
 | |
|         filestoexclude = cfg["noise_types_excluded"].split(",")
 | |
|         noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
 | |
|         for i in range(len(filestoexclude)):
 | |
|             noisefilenames = [
 | |
|                 fn
 | |
|                 for fn in noisefilenames
 | |
|                 if not os.path.basename(fn).startswith(filestoexclude[i])
 | |
|             ]
 | |
| 
 | |
|     filecounter = 0
 | |
|     num_samples = 0
 | |
| 
 | |
|     while num_samples < total_samples:
 | |
|         idx_s = np.random.randint(0, np.size(cleanfilenames))
 | |
|         clean, fs = audioread(cleanfilenames[idx_s])
 | |
| 
 | |
|         if len(clean) > audio_length:
 | |
|             clean = clean
 | |
| 
 | |
|         else:
 | |
| 
 | |
|             while len(clean) <= audio_length:
 | |
|                 idx_s = idx_s + 1
 | |
|                 if idx_s >= np.size(cleanfilenames) - 1:
 | |
|                     idx_s = np.random.randint(0, np.size(cleanfilenames))
 | |
|                 newclean, fs = audioread(cleanfilenames[idx_s])
 | |
|                 cleanconcat = np.append(
 | |
|                     clean, np.zeros(int(fs * silence_length))
 | |
|                 )
 | |
|                 clean = np.append(cleanconcat, newclean)
 | |
| 
 | |
|         idx_n = np.random.randint(0, np.size(noisefilenames))
 | |
|         noise, fs = audioread(noisefilenames[idx_n])
 | |
| 
 | |
|         if len(noise) >= len(clean):
 | |
|             noise = noise[0 : len(clean)]
 | |
| 
 | |
|         else:
 | |
| 
 | |
|             while len(noise) <= len(clean):
 | |
|                 idx_n = idx_n + 1
 | |
|                 if idx_n >= np.size(noisefilenames) - 1:
 | |
|                     idx_n = np.random.randint(0, np.size(noisefilenames))
 | |
|                 newnoise, fs = audioread(noisefilenames[idx_n])
 | |
|                 noiseconcat = np.append(
 | |
|                     noise, np.zeros(int(fs * silence_length))
 | |
|                 )
 | |
|                 noise = np.append(noiseconcat, newnoise)
 | |
|         noise = noise[0 : len(clean)]
 | |
|         filecounter = filecounter + 1
 | |
| 
 | |
|         for i in range(np.size(SNR)):
 | |
|             clean_snr, noise_snr, noisy_snr = snr_mixer(
 | |
|                 clean=clean, noise=noise, snr=SNR[i]
 | |
|             )
 | |
|             noisyfilename = (
 | |
|                 "noisy"
 | |
|                 + str(filecounter)
 | |
|                 + "_SNRdb_"
 | |
|                 + str(SNR[i])
 | |
|                 + "_clnsp"
 | |
|                 + str(filecounter)
 | |
|                 + ".wav"
 | |
|             )
 | |
|             cleanfilename = "clnsp" + str(filecounter) + ".wav"
 | |
|             noisefilename = (
 | |
|                 "noisy" + str(filecounter) + "_SNRdb_" + str(SNR[i]) + ".wav"
 | |
|             )
 | |
|             noisypath = os.path.join(noisyspeech_dir, noisyfilename)
 | |
|             cleanpath = os.path.join(clean_proc_dir, cleanfilename)
 | |
|             noisepath = os.path.join(noise_proc_dir, noisefilename)
 | |
|             audiowrite(noisy_snr, fs, noisypath, norm=False)
 | |
|             audiowrite(clean_snr, fs, cleanpath, norm=False)
 | |
|             audiowrite(noise_snr, fs, noisepath, norm=False)
 | |
|             num_samples = num_samples + len(noisy_snr)
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     parser = argparse.ArgumentParser()
 | |
| 
 | |
|     # Configurations: read noisyspeech_synthesizer.cfg
 | |
|     parser.add_argument(
 | |
|         "--cfg",
 | |
|         default="noisyspeech_synthesizer.cfg",
 | |
|         help="Read noisyspeech_synthesizer.cfg for all the details",
 | |
|     )
 | |
|     parser.add_argument("--cfg_str", type=str, default="noisy_speech")
 | |
|     args = parser.parse_args()
 | |
| 
 | |
|     cfgpath = os.path.join(os.path.dirname(__file__), args.cfg)
 | |
|     assert os.path.exists(cfgpath), f"No configuration file as [{cfgpath}]"
 | |
|     cfg = CP.ConfigParser()
 | |
|     cfg._interpolation = CP.ExtendedInterpolation()
 | |
|     cfg.read(cfgpath)
 | |
| 
 | |
|     main(cfg._sections[args.cfg_str])
 |