changes to prep dns 2020

2022-10-14 15:20:34 +05:30 · 2022-10-14 15:20:34 +05:30 · 8d1c057b86
parent 6e0f69f575
commit 8d1c057b86
7 changed files with 280 additions and 18 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -40,4 +40,5 @@ repos:
      - id: end-of-file-fixer
      - id: requirements-txt-fixer
      - id: mixed-line-ending
        exclude: noisyspeech_synthesizer.cfg
        args: ['--fix=no']
--- a/audiolib.py
+++ b/audiolib.py
@ -0,0 +1,76 @@
 # -*- coding: utf-8 -*-
 """
 Created on Wed Jun 26 15:54:05 2019
@author: chkarada
 """
 import os
 import numpy as np
 import soundfile as sf
 # Function to read audio
 def audioread(path, norm=True, start=0, stop=None):
    path = os.path.abspath(path)
    if not os.path.exists(path):
        raise ValueError("[{}] does not exist!".format(path))
    try:
        x, sr = sf.read(path, start=start, stop=stop)
    except RuntimeError:  # fix for sph pcm-embedded shortened v2
        print("WARNING: Audio type not supported")
    if len(x.shape) == 1:  # mono
        if norm:
            rms = (x**2).mean() ** 0.5
            scalar = 10 ** (-25 / 20) / (rms)
            x = x * scalar
        return x, sr
    else:  # multi-channel
        x = x.T
        x = x.sum(axis=0) / x.shape[0]
        if norm:
            rms = (x**2).mean() ** 0.5
            scalar = 10 ** (-25 / 20) / (rms)
            x = x * scalar
        return x, sr
 # Funtion to write audio
 def audiowrite(data, fs, destpath, norm=False):
    if norm:
        eps = 0.0
        rms = (data**2).mean() ** 0.5
        scalar = 10 ** (-25 / 10) / (rms + eps)
        data = data * scalar
        if max(abs(data)) >= 1:
            data = data / max(abs(data), eps)
    destpath = os.path.abspath(destpath)
    destdir = os.path.dirname(destpath)
    if not os.path.exists(destdir):
        os.makedirs(destdir)
    sf.write(destpath, data, fs)
    return
 # Function to mix clean speech and noise at various SNR levels
 def snr_mixer(clean, noise, snr):
    # Normalizing to -25 dB FS
    rmsclean = (clean**2).mean() ** 0.5
    scalarclean = 10 ** (-25 / 20) / rmsclean
    clean = clean * scalarclean
    rmsclean = (clean**2).mean() ** 0.5
    rmsnoise = (noise**2).mean() ** 0.5
    scalarnoise = 10 ** (-25 / 20) / rmsnoise
    noise = noise * scalarnoise
    rmsnoise = (noise**2).mean() ** 0.5
    # Set the noise level for a given SNR
    noisescalar = np.sqrt(rmsclean / (10 ** (snr / 20)) / rmsnoise)
    noisenewlevel = noise * noisescalar
    noisyspeech = clean + noisenewlevel
    return clean, noisenewlevel, noisyspeech
--- a/hpc_entrypoint.sh
+++ b/hpc_entrypoint.sh
@ -34,6 +34,6 @@ pwd
 #python transcriber/tasks/embeddings/timit.py --directory /scratch/$USER/TIMIT/data/lisa/data/timit/raw/TIMIT/TRAIN --output ./data/train
 #python transcriber/tasks/embeddings/timit.py --directory /scratch/$USER/TIMIT/data/lisa/data/timit/raw/TIMIT/TEST --output ./data/test
-
+python noisyspeech_synthesizer.py
 echo "Start Training..."
-python enhancer/cli/train.py
+#python enhancer/cli/train.py
--- a/noisyspeech_synthesizer.cfg
+++ b/noisyspeech_synthesizer.cfg
@ -0,0 +1,29 @@
 # Configuration for generating Noisy Speech Dataset
 # - sampling_rate: Specify the sampling rate. Default is 16 kHz
 # - audioformat: default is .wav
 # - audio_length: Minimum Length of each audio clip (noisy and clean speech) in seconds that will be generated by augmenting utterances.
 # - silence_length: Duration of silence introduced between clean speech utterances.
 # - total_hours: Total number of hours of data required. Units are in hours.
 # - snr_lower: Lower bound for SNR required (default: 0 dB)
 # - snr_upper: Upper bound for SNR required (default: 40 dB)
 # - total_snrlevels: Number of SNR levels required (default: 5, which means there are 5 levels between snr_lower and snr_upper)
 # - noise_dir: Default is None. But specify the noise directory path if noise files are not in the source directory
 # - Speech_dir: Default is None. But specify the speech directory path if speech files are not in the source directory
 # - noise_types_excluded: Noise files starting with the following tags to be excluded in the noise list. Example: noise_types_excluded: Babble, AirConditioner
 #                         Specify 'None' if no noise files to be excluded.
 [noisy_speech]
 sampling_rate: 16000
 audioformat: *.wav
 audio_length: 10
 silence_length: 0.2
 total_hours: 20
 snr_lower: 0
 snr_upper: 40
 total_snrlevels: 5
 noise_dir: /scratch/c.sistc3/MS-SNSD/noise_train
 speech_dir: /scratch/c.sistc3/MS-SNSD/clean_train
 noise_types_excluded: None
--- a/noisyspeech_synthesizer.py
+++ b/noisyspeech_synthesizer.py
@ -0,0 +1,153 @@
 """
@author: chkarada
 """
 import argparse
 import configparser as CP
 import glob
 import os
 import numpy as np
 from audiolib import audioread, audiowrite, snr_mixer
 def main(cfg):
    snr_lower = float(cfg["snr_lower"])
    snr_upper = float(cfg["snr_upper"])
    total_snrlevels = int(cfg["total_snrlevels"])
    clean_dir = os.path.join(os.path.dirname(__file__), "clean_train")
    if cfg["speech_dir"] != "None":
        clean_dir = cfg["speech_dir"]
    if not os.path.exists(clean_dir):
        assert False, "Clean speech data is required"
    noise_dir = os.path.join(os.path.dirname(__file__), "noise_train")
    if cfg["noise_dir"] != "None":
        noise_dir = cfg["noise_dir"]
    if not os.path.exists(noise_dir):
        assert False, "Noise data is required"
    fs = float(cfg["sampling_rate"])
    audioformat = cfg["audioformat"]
    total_hours = float(cfg["total_hours"])
    audio_length = float(cfg["audio_length"])
    silence_length = float(cfg["silence_length"])
    noisyspeech_dir = os.path.join(
        os.path.dirname(__file__), "NoisySpeech_training"
    )
    if not os.path.exists(noisyspeech_dir):
        os.makedirs(noisyspeech_dir)
    clean_proc_dir = os.path.join(
        os.path.dirname(__file__), "CleanSpeech_training"
    )
    if not os.path.exists(clean_proc_dir):
        os.makedirs(clean_proc_dir)
    noise_proc_dir = os.path.join(os.path.dirname(__file__), "Noise_training")
    if not os.path.exists(noise_proc_dir):
        os.makedirs(noise_proc_dir)
    total_secs = total_hours * 60 * 60
    total_samples = int(total_secs * fs)
    audio_length = int(audio_length * fs)
    SNR = np.linspace(snr_lower, snr_upper, total_snrlevels)
    cleanfilenames = glob.glob(os.path.join(clean_dir, audioformat))
    if cfg["noise_types_excluded"] == "None":
        noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
    else:
        filestoexclude = cfg["noise_types_excluded"].split(",")
        noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
        for i in range(len(filestoexclude)):
            noisefilenames = [
                fn
                for fn in noisefilenames
                if not os.path.basename(fn).startswith(filestoexclude[i])
            ]
    filecounter = 0
    num_samples = 0
    while num_samples < total_samples:
        idx_s = np.random.randint(0, np.size(cleanfilenames))
        clean, fs = audioread(cleanfilenames[idx_s])
        if len(clean) > audio_length:
            clean = clean
        else:
            while len(clean) <= audio_length:
                idx_s = idx_s + 1
                if idx_s >= np.size(cleanfilenames) - 1:
                    idx_s = np.random.randint(0, np.size(cleanfilenames))
                newclean, fs = audioread(cleanfilenames[idx_s])
                cleanconcat = np.append(
                    clean, np.zeros(int(fs * silence_length))
                )
                clean = np.append(cleanconcat, newclean)
        idx_n = np.random.randint(0, np.size(noisefilenames))
        noise, fs = audioread(noisefilenames[idx_n])
        if len(noise) >= len(clean):
            noise = noise[0 : len(clean)]
        else:
            while len(noise) <= len(clean):
                idx_n = idx_n + 1
                if idx_n >= np.size(noisefilenames) - 1:
                    idx_n = np.random.randint(0, np.size(noisefilenames))
                newnoise, fs = audioread(noisefilenames[idx_n])
                noiseconcat = np.append(
                    noise, np.zeros(int(fs * silence_length))
                )
                noise = np.append(noiseconcat, newnoise)
        noise = noise[0 : len(clean)]
        filecounter = filecounter + 1
        for i in range(np.size(SNR)):
            clean_snr, noise_snr, noisy_snr = snr_mixer(
                clean=clean, noise=noise, snr=SNR[i]
            )
            noisyfilename = (
                "noisy"
                + str(filecounter)
                + "_SNRdb_"
                + str(SNR[i])
                + "_clnsp"
                + str(filecounter)
                + ".wav"
            )
            cleanfilename = "clnsp" + str(filecounter) + ".wav"
            noisefilename = (
                "noisy" + str(filecounter) + "_SNRdb_" + str(SNR[i]) + ".wav"
            )
            noisypath = os.path.join(noisyspeech_dir, noisyfilename)
            cleanpath = os.path.join(clean_proc_dir, cleanfilename)
            noisepath = os.path.join(noise_proc_dir, noisefilename)
            audiowrite(noisy_snr, fs, noisypath, norm=False)
            audiowrite(clean_snr, fs, cleanpath, norm=False)
            audiowrite(noise_snr, fs, noisepath, norm=False)
            num_samples = num_samples + len(noisy_snr)
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    # Configurations: read noisyspeech_synthesizer.cfg
    parser.add_argument(
        "--cfg",
        default="noisyspeech_synthesizer.cfg",
        help="Read noisyspeech_synthesizer.cfg for all the details",
    )
    parser.add_argument("--cfg_str", type=str, default="noisy_speech")
    args = parser.parse_args()
    cfgpath = os.path.join(os.path.dirname(__file__), args.cfg)
    assert os.path.exists(cfgpath), f"No configuration file as [{cfgpath}]"
    cfg = CP.ConfigParser()
    cfg._interpolation = CP.ExtendedInterpolation()
    cfg.read(cfgpath)
    main(cfg._sections[args.cfg_str])
--- a/pyproject.toml
+++ b/pyproject.toml
@ -2,7 +2,6 @@
 line-length = 80
 target-version = ['py38']
 exclude = '''
 (
  /(
      \.eggs         # exclude a few common directories in the
@ -10,6 +9,9 @@ exclude = '''
    | \.mypy_cache
    | \.tox
    | \.venv
    | noisyspeech_synthesizer.py
    | noisyspeech_synthesizer.cfg
  )/
 )
 '''
--- a/requirements.txt
+++ b/requirements.txt
@ -1,18 +1,19 @@
-boto3>=1.24.86
+# torch>=1.12.1
-huggingface-hub>=0.10.0
+# torchaudio>=0.12.1
-hydra-core>=1.2.0
+# tqdm>=4.64.1
-joblib>=1.2.0
+configparser
-librosa>=0.9.2
+# boto3>=1.24.86
-mlflow>=1.29.0
+# huggingface-hub>=0.10.0
 # hydra-core>=1.2.0
 # joblib>=1.2.0
 # librosa>=0.9.2
 # mlflow>=1.29.0
 numpy>=1.23.3
-pesq==0.0.4
+# pesq==0.0.4
-protobuf>=3.19.6
+# protobuf>=3.19.6
-pystoi==0.3.3
+# pystoi==0.3.3
-pytest-lazy-fixture>=0.6.3
+# pytest-lazy-fixture>=0.6.3
-pytorch-lightning>=1.7.7
+# pytorch-lightning>=1.7.7
-scikit-learn>=1.1.2
+# scikit-learn>=1.1.2
 scipy>=1.9.1
 soundfile>=0.11.0
 torch>=1.12.1
 torchaudio>=0.12.1
 tqdm>=4.64.1