import glob import os import numpy as np from scipy.io import wavfile class Fileprocessor: def __init__( self, clean_dir, noisy_dir, sr = 16000, matching_function = None ): self.clean_dir = clean_dir self.noisy_dir = noisy_dir self.sr = sr self.matching_function = matching_function @classmethod def from_name(cls, name:str, clean_dir, noisy_dir, sr, matching_function=None ): if name.lower() == "vctk": return cls(clean_dir,noisy_dir,sr, Fileprocessor.match_vtck) elif name.lower() == "dns-2020": return cls(clean_dir,noisy_dir,sr, Fileprocessor.match_dns2020) else: return cls(clean_dir,noisy_dir,sr, matching_function) def prepare_matching_dict(self): if self.matching_function is None: raise ValueError("Not a valid matching function") return self.matching_function(self.clean_dir,self.noisy_dir,self.sr) @staticmethod def match_vtck(clean_path,noisy_path,sr): matching_wavfiles = dict() clean_filenames = [file.split('/')[-1] for file in glob.glob(os.path.join(clean_path,"*.wav"))] noisy_filenames = [file.split('/')[-1] for file in glob.glob(os.path.join(noisy_path,"*.wav"))] common_filenames = np.intersect1d(noisy_filenames,clean_filenames) for file_name in common_filenames: sr_clean, clean_file = wavfile.read(os.path.join(clean_path,file_name)) sr_noisy, noisy_file = wavfile.read(os.path.join(noisy_path,file_name)) if ((clean_file.shape[-1]==noisy_file.shape[-1]) and (sr_clean==sr) and (sr_noisy==sr)): matching_wavfiles.update( {os.path.join(clean_path,file_name):{"noisy":os.path.join(noisy_path,file_name), "duration":clean_file.shape[-1]/sr} } ) return matching_wavfiles @staticmethod def match_dns2020(clean_path,noisy_path,sr): matching_wavfiles = dict() clean_filenames = [file.split('/')[-1] for file in glob.glob(os.path.join(clean_path,"*.wav"))] for clean_file in clean_filenames: noisy_filenames = glob.glob(os.path.join(noisy_path,f"*_{clean_file}.wav")) for noisy_file in noisy_filenames: sr_clean, clean_file = wavfile.read(os.path.join(clean_path,clean_file)) sr_noisy, noisy_file = wavfile.read(noisy_file) if ((clean_file.shape[-1]==noisy_file.shape[-1]) and (sr_clean==sr) and (sr_noisy==sr)): matching_wavfiles.update( {os.path.join(clean_path,clean_file):{"noisy":noisy_file, "duration":clean_file.shape[-1]/sr} } ) return matching_wavfiles