# Configuration for generating Noisy Speech Dataset # - sampling_rate: Specify the sampling rate. Default is 16 kHz # - audioformat: default is .wav # - audio_length: Minimum Length of each audio clip (noisy and clean speech) in seconds that will be generated by augmenting utterances. # - silence_length: Duration of silence introduced between clean speech utterances. # - total_hours: Total number of hours of data required. Units are in hours. # - snr_lower: Lower bound for SNR required (default: 0 dB) # - snr_upper: Upper bound for SNR required (default: 40 dB) # - total_snrlevels: Number of SNR levels required (default: 5, which means there are 5 levels between snr_lower and snr_upper) # - noise_dir: Default is None. But specify the noise directory path if noise files are not in the source directory # - Speech_dir: Default is None. But specify the speech directory path if speech files are not in the source directory # - noise_types_excluded: Noise files starting with the following tags to be excluded in the noise list. Example: noise_types_excluded: Babble, AirConditioner # Specify 'None' if no noise files to be excluded. [noisy_speech] sampling_rate: 16000 audioformat: *.wav audio_length: 10 silence_length: 0.2 total_hours: 30 snr_lower: 0 snr_upper: 40 total_snrlevels: 2 naming: train noise_dir: /scratch/c.sistc3/MS-SNSD/noise_train speech_dir: /scratch/c.sistc3/MS-SNSD/clean_train noise_types_excluded: None