77 lines
2.1 KiB
Python
77 lines
2.1 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Wed Jun 26 15:54:05 2019
|
|
|
|
@author: chkarada
|
|
"""
|
|
import os
|
|
|
|
import numpy as np
|
|
import soundfile as sf
|
|
|
|
|
|
# Function to read audio
|
|
def audioread(path, norm=True, start=0, stop=None):
|
|
path = os.path.abspath(path)
|
|
if not os.path.exists(path):
|
|
raise ValueError("[{}] does not exist!".format(path))
|
|
try:
|
|
x, sr = sf.read(path, start=start, stop=stop)
|
|
except RuntimeError: # fix for sph pcm-embedded shortened v2
|
|
print("WARNING: Audio type not supported")
|
|
|
|
if len(x.shape) == 1: # mono
|
|
if norm:
|
|
rms = (x**2).mean() ** 0.5
|
|
scalar = 10 ** (-25 / 20) / (rms)
|
|
x = x * scalar
|
|
return x, sr
|
|
else: # multi-channel
|
|
x = x.T
|
|
x = x.sum(axis=0) / x.shape[0]
|
|
if norm:
|
|
rms = (x**2).mean() ** 0.5
|
|
scalar = 10 ** (-25 / 20) / (rms)
|
|
x = x * scalar
|
|
return x, sr
|
|
|
|
|
|
# Funtion to write audio
|
|
def audiowrite(data, fs, destpath, norm=False):
|
|
if norm:
|
|
eps = 0.0
|
|
rms = (data**2).mean() ** 0.5
|
|
scalar = 10 ** (-25 / 10) / (rms + eps)
|
|
data = data * scalar
|
|
if max(abs(data)) >= 1:
|
|
data = data / max(abs(data), eps)
|
|
|
|
destpath = os.path.abspath(destpath)
|
|
destdir = os.path.dirname(destpath)
|
|
|
|
if not os.path.exists(destdir):
|
|
os.makedirs(destdir)
|
|
|
|
sf.write(destpath, data, fs)
|
|
return
|
|
|
|
|
|
# Function to mix clean speech and noise at various SNR levels
|
|
def snr_mixer(clean, noise, snr):
|
|
# Normalizing to -25 dB FS
|
|
rmsclean = (clean**2).mean() ** 0.5
|
|
scalarclean = 10 ** (-25 / 20) / rmsclean
|
|
clean = clean * scalarclean
|
|
rmsclean = (clean**2).mean() ** 0.5
|
|
|
|
rmsnoise = (noise**2).mean() ** 0.5
|
|
scalarnoise = 10 ** (-25 / 20) / rmsnoise
|
|
noise = noise * scalarnoise
|
|
rmsnoise = (noise**2).mean() ** 0.5
|
|
|
|
# Set the noise level for a given SNR
|
|
noisescalar = np.sqrt(rmsclean / (10 ** (snr / 20)) / rmsnoise)
|
|
noisenewlevel = noise * noisescalar
|
|
noisyspeech = clean + noisenewlevel
|
|
return clean, noisenewlevel, noisyspeech
|