From bcbc82dbade8318b0a8c0b30d20cb3c8e884ed79 Mon Sep 17 00:00:00 2001 From: shahules786 Date: Mon, 22 Aug 2022 13:25:18 +0530 Subject: [PATCH] audio io --- enhancer/utils/io.py | 68 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 enhancer/utils/io.py diff --git a/enhancer/utils/io.py b/enhancer/utils/io.py new file mode 100644 index 0000000..510ab86 --- /dev/null +++ b/enhancer/utils/io.py @@ -0,0 +1,68 @@ +import os +import librosa +from typing import Optional +from matplotlib.pyplot import axis +import numpy as np +import torch + +class Audio: + + def __init__( + self, + sampling_rate:int=16000, + mono:bool=True, + return_tensor=True + ) -> None: + + self.sampling_rate = sampling_rate + self.mono = mono + self.return_tensor = return_tensor + + def __call__( + self, + audio, + sampling_rate:Optional[int]=None, + offset:Optional[float] = None, + duration:Optional[float] = None + ): + if isinstance(audio,str): + if os.path.exists(audio): + audio,sampling_rate = librosa.load(audio,sr=sampling_rate,mono=False, + offset=offset,duration=duration) + else: + raise FileNotFoundError(f"File {audio} deos not exist") + elif isinstance(audio,np.ndarray): + if len(audio.shape) == 1: + audio = audio.reshape(1,-1) + else: + raise ValueError("audio should be either filepath or numpy ndarray") + + if self.mono: + audio = self.convert_mono(audio) + + resampled_audio = self.resample_audio(audio,sampling_rate) + if self.return_tensor: + return torch.tensor(resampled_audio) + else: + return resampled_audio + + def convert_mono( + self, + audio + + ): + num_channels,num_samples = audio.shape + if num_channels>1 and self.mono: + return audio.mean(axis=0).reshape(1,num_samples) + return audio + + + def resample_audio( + self, + audio, + sampling_rate + ): + if self.sampling_rate!=sampling_rate: + audio = librosa.resample(audio,orig_sr=sampling_rate,target_sr=self.sampling_rate) + + return audio \ No newline at end of file