rmv mdkir

20hrs
2022-11-20 19:58:20 +05:30 · 2022-11-20 19:56:56 +05:30 · 2022-11-20 19:16:54 +05:30 · 2022-11-20 19:11:41 +05:30 · 2022-11-03 10:40:22 +05:30 · 2022-11-03 09:52:18 +05:30
12 changed files with 314 additions and 37 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -40,4 +40,5 @@ repos:
      - id: end-of-file-fixer
      - id: requirements-txt-fixer
      - id: mixed-line-ending
        exclude: noisyspeech_synthesizer.cfg
        args: ['--fix=no']
--- a/audiolib.py
+++ b/audiolib.py
@ -0,0 +1,76 @@
 # -*- coding: utf-8 -*-
 """
 Created on Wed Jun 26 15:54:05 2019
@author: chkarada
 """
 import os
 import numpy as np
 import soundfile as sf
 # Function to read audio
 def audioread(path, norm=True, start=0, stop=None):
    path = os.path.abspath(path)
    if not os.path.exists(path):
        raise ValueError("[{}] does not exist!".format(path))
    try:
        x, sr = sf.read(path, start=start, stop=stop)
    except RuntimeError:  # fix for sph pcm-embedded shortened v2
        print("WARNING: Audio type not supported")
    if len(x.shape) == 1:  # mono
        if norm:
            rms = (x**2).mean() ** 0.5
            scalar = 10 ** (-25 / 20) / (rms)
            x = x * scalar
        return x, sr
    else:  # multi-channel
        x = x.T
        x = x.sum(axis=0) / x.shape[0]
        if norm:
            rms = (x**2).mean() ** 0.5
            scalar = 10 ** (-25 / 20) / (rms)
            x = x * scalar
        return x, sr
 # Funtion to write audio
 def audiowrite(data, fs, destpath, norm=False):
    if norm:
        eps = 0.0
        rms = (data**2).mean() ** 0.5
        scalar = 10 ** (-25 / 10) / (rms + eps)
        data = data * scalar
        if max(abs(data)) >= 1:
            data = data / max(abs(data), eps)
    destpath = os.path.abspath(destpath)
    destdir = os.path.dirname(destpath)
    if not os.path.exists(destdir):
        os.makedirs(destdir)
    sf.write(destpath, data, fs)
    return
 # Function to mix clean speech and noise at various SNR levels
 def snr_mixer(clean, noise, snr):
    # Normalizing to -25 dB FS
    rmsclean = (clean**2).mean() ** 0.5
    scalarclean = 10 ** (-25 / 20) / rmsclean
    clean = clean * scalarclean
    rmsclean = (clean**2).mean() ** 0.5
    rmsnoise = (noise**2).mean() ** 0.5
    scalarnoise = 10 ** (-25 / 20) / rmsnoise
    noise = noise * scalarnoise
    rmsnoise = (noise**2).mean() ** 0.5
    # Set the noise level for a given SNR
    noisescalar = np.sqrt(rmsclean / (10 ** (snr / 20)) / rmsnoise)
    noisenewlevel = noise * noisescalar
    noisyspeech = clean + noisenewlevel
    return clean, noisenewlevel, noisyspeech
--- a/enhancer/cli/train_config/config.yaml
+++ b/enhancer/cli/train_config/config.yaml
@ -1,7 +1,7 @@
 defaults:
-  - model : WaveUnet
+  - model : Demucs
  - dataset : Vctk
  - optimizer : Adam
  - hyperparameters : default
-  - trainer : default
+  - trainer : fastrun_dev
  - mlflow : experiment
--- a/enhancer/cli/train_config/dataset/DNS-2020.yaml
+++ b/enhancer/cli/train_config/dataset/DNS-2020.yaml
@ -2,10 +2,9 @@ _target_: enhancer.data.dataset.EnhancerDataset
 root_dir : /Users/shahules/Myprojects/enhancer/datasets/vctk_test
 name : dns-2020
 duration : 1.0
-sampling_rate: 16000
+sampling_rate: 8000
 batch_size: 32
 files:
  root_dir : /Users/shahules/Myprojects/enhancer/datasets/vctk_test
  train_clean : clean_test_wav
  test_clean : clean_test_wav
  train_noisy : clean_test_wav
--- a/enhancer/cli/train_config/dataset/Vctk.yaml
+++ b/enhancer/cli/train_config/dataset/Vctk.yaml
@ -1,9 +1,10 @@
 _target_: enhancer.data.dataset.EnhancerDataset
 name : vctk
 root_dir : /scratch/c.sistc3/DS_10283_2791
-duration : 1.0
+duration : 1.5
 sampling_rate: 16000
-batch_size: 128
+batch_size: 256
 valid_size : 0.05
 files:
  train_clean : clean_trainset_28spk_wav
--- a/enhancer/cli/train_config/hyperparameters/default.yaml
+++ b/enhancer/cli/train_config/hyperparameters/default.yaml
@ -1,7 +1,7 @@
 loss : mse
-metric : mae
+metric : [stoi,pesq,si-sdr]
-lr : 0.0001
+lr : 0.001
-ReduceLr_patience : 5
+ReduceLr_patience : 10
-ReduceLr_factor : 0.1
+ReduceLr_factor : 0.5
-min_lr : 0.000001
+min_lr : 0.00
 EarlyStopping_factor : 10
--- a/enhancer/cli/train_config/trainer/default.yaml
+++ b/enhancer/cli/train_config/trainer/default.yaml
@ -1,15 +1,15 @@
 _target_: pytorch_lightning.Trainer
-accelerator: auto
+accelerator: gpu
 accumulate_grad_batches: 1
 amp_backend: native
-auto_lr_find: True
+auto_lr_find: False
 auto_scale_batch_size: False
 auto_select_gpus: True
 benchmark: False
 check_val_every_n_epoch: 1
 detect_anomaly: False
 deterministic: False
-devices: -1
+devices: 2
 enable_checkpointing: True
 enable_model_summary: True
 enable_progress_bar: True
@ -22,9 +22,8 @@ limit_predict_batches: 1.0
 limit_test_batches: 1.0
 limit_train_batches: 1.0
 limit_val_batches: 1.0
-log_every_n_steps: 50
+log_every_n_steps: 100
-max_epochs: 3
+max_epochs: 250
 max_steps: -1
 max_time: null
 min_epochs: 1
 min_steps: null
--- a/hpc_entrypoint.sh
+++ b/hpc_entrypoint.sh
@ -32,8 +32,21 @@ echo "Making temp dir"
 mkdir temp
 pwd
-#python transcriber/tasks/embeddings/timit.py --directory /scratch/$USER/TIMIT/data/lisa/data/timit/raw/TIMIT/TRAIN --output ./data/train
+# echo "files"
-#python transcriber/tasks/embeddings/timit.py --directory /scratch/$USER/TIMIT/data/lisa/data/timit/raw/TIMIT/TEST --output ./data/test
+# rm -rf  /scratch/c.sistc3/MS-SNSD/DNS30/CleanSpeech_training
 # rm -rf /scratch/c.sistc3/MS-SNSD/DNS30/NoisySpeech_training
 # rm -rf /scratch/c.sistc3/MS-SNSD/DNS30/NoisySpeech_testing
 # rm -rf /scratch/c.sistc3/MS-SNSD/DNS30/CleanSpeech_testing
-echo "Start Training..."
+# cp -r /scratch/c.sistc3/MS-SNSD/DNS30/NoisySpeech_testing /scratch/c.sistc3/MS-SNSD/DNS15/
-python enhancer/cli/train.py
+# cp -r /scratch/c.sistc3/MS-SNSD/DNS30/CleanSpeech_testing /scratch/c.sistc3/MS-SNSD/DNS15/
 # rm -rf /scratch/c.sistc3/MS-SNSD/DNS20
 # mkdir  /scratch/c.sistc3/MS-SNSD/DNS20
 python noisyspeech_synthesizer.py
 mv ./CleanSpeech_testing/ /scratch/c.sistc3/MS-SNSD/DNS20
 mv ./NoisySpeech_testing/ /scratch/c.sistc3/MS-SNSD/DNS20
 ls /scratch/c.sistc3/MS-SNSD/DNS20
 #python enhancer/cli/train.py
--- a/noisyspeech_synthesizer.cfg
+++ b/noisyspeech_synthesizer.cfg
@ -0,0 +1,30 @@
 # Configuration for generating Noisy Speech Dataset
 # - sampling_rate: Specify the sampling rate. Default is 16 kHz
 # - audioformat: default is .wav
 # - audio_length: Minimum Length of each audio clip (noisy and clean speech) in seconds that will be generated by augmenting utterances.
 # - silence_length: Duration of silence introduced between clean speech utterances.
 # - total_hours: Total number of hours of data required. Units are in hours.
 # - snr_lower: Lower bound for SNR required (default: 0 dB)
 # - snr_upper: Upper bound for SNR required (default: 40 dB)
 # - total_snrlevels: Number of SNR levels required (default: 5, which means there are 5 levels between snr_lower and snr_upper)
 # - noise_dir: Default is None. But specify the noise directory path if noise files are not in the source directory
 # - Speech_dir: Default is None. But specify the speech directory path if speech files are not in the source directory
 # - noise_types_excluded: Noise files starting with the following tags to be excluded in the noise list. Example: noise_types_excluded: Babble, AirConditioner
 #                         Specify 'None' if no noise files to be excluded.
 [noisy_speech]
 sampling_rate: 16000
 audioformat: *.wav
 audio_length: 10
 silence_length: 0.2
 total_hours: 1
 snr_lower: 0
 snr_upper: 40
 total_snrlevels: 2
 naming: test
 noise_dir: /scratch/c.sistc3/MS-SNSD/noise_test
 speech_dir: /scratch/c.sistc3/MS-SNSD/clean_test
 noise_types_excluded: None
--- a/noisyspeech_synthesizer.py
+++ b/noisyspeech_synthesizer.py
@ -0,0 +1,155 @@
 """
@author: chkarada
 """
 import argparse
 import configparser as CP
 import glob
 import os
 import numpy as np
 from audiolib import audioread, audiowrite, snr_mixer
 def main(cfg):
    snr_lower = float(cfg["snr_lower"])
    snr_upper = float(cfg["snr_upper"])
    total_snrlevels = int(cfg["total_snrlevels"])
    clean_dir = os.path.join(os.path.dirname(__file__), "clean_train")
    if cfg["speech_dir"] != "None":
        clean_dir = cfg["speech_dir"]
    if not os.path.exists(clean_dir):
        assert False, "Clean speech data is required"
    noise_dir = os.path.join(os.path.dirname(__file__), "noise_train")
    if cfg["noise_dir"] != "None":
        noise_dir = cfg["noise_dir"]
    if not os.path.exists(noise_dir):
        assert False, "Noise data is required"
    name = cfg["naming"]
    fs = float(cfg["sampling_rate"])
    audioformat = cfg["audioformat"]
    total_hours = float(cfg["total_hours"])
    audio_length = float(cfg["audio_length"])
    silence_length = float(cfg["silence_length"])
    noisyspeech_dir = os.path.join(
        os.path.dirname(__file__), f"NoisySpeech_{name}ing"
    )
    if not os.path.exists(noisyspeech_dir):
        os.makedirs(noisyspeech_dir)
    clean_proc_dir = os.path.join(
        os.path.dirname(__file__), f"CleanSpeech_{name}ing"
    )
    if not os.path.exists(clean_proc_dir):
        os.makedirs(clean_proc_dir)
    noise_proc_dir = os.path.join(
        os.path.dirname(__file__), f"NoisySpeech_{name}ing"
    )
    if not os.path.exists(noise_proc_dir):
        os.makedirs(noise_proc_dir)
    total_secs = total_hours * 60 * 60
    total_samples = int(total_secs * fs)
    audio_length = int(audio_length * fs)
    SNR = np.linspace(snr_lower, snr_upper, total_snrlevels)
    cleanfilenames = glob.glob(os.path.join(clean_dir, audioformat))
    if cfg["noise_types_excluded"] == "None":
        noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
    else:
        filestoexclude = cfg["noise_types_excluded"].split(",")
        noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
        for i in range(len(filestoexclude)):
            noisefilenames = [
                fn
                for fn in noisefilenames
                if not os.path.basename(fn).startswith(filestoexclude[i])
            ]
    filecounter = 0
    num_samples = 0
    while num_samples < total_samples:
        idx_s = np.random.randint(0, np.size(cleanfilenames))
        clean, fs = audioread(cleanfilenames[idx_s])
        if len(clean) > audio_length:
            clean = clean
        else:
            while len(clean) <= audio_length:
                idx_s = idx_s + 1
                if idx_s >= np.size(cleanfilenames) - 1:
                    idx_s = np.random.randint(0, np.size(cleanfilenames))
                newclean, fs = audioread(cleanfilenames[idx_s])
                cleanconcat = np.append(
                    clean, np.zeros(int(fs * silence_length))
                )
                clean = np.append(cleanconcat, newclean)
        idx_n = np.random.randint(0, np.size(noisefilenames))
        noise, fs = audioread(noisefilenames[idx_n])
        if len(noise) >= len(clean):
            noise = noise[0 : len(clean)]
        else:
            while len(noise) <= len(clean):
                idx_n = idx_n + 1
                if idx_n >= np.size(noisefilenames) - 1:
                    idx_n = np.random.randint(0, np.size(noisefilenames))
                newnoise, fs = audioread(noisefilenames[idx_n])
                noiseconcat = np.append(
                    noise, np.zeros(int(fs * silence_length))
                )
                noise = np.append(noiseconcat, newnoise)
        noise = noise[0 : len(clean)]
        filecounter = filecounter + 1
        for i in range(np.size(SNR)):
            clean_snr, noise_snr, noisy_snr = snr_mixer(
                clean=clean, noise=noise, snr=SNR[i]
            )
            noisyfilename = (
                "noisy"
                + str(filecounter)
                + "_SNRdb_"
                + str(SNR[i])
                + "_clnsp"
                + str(filecounter)
                + ".wav"
            )
            cleanfilename = "clnsp" + str(filecounter) + ".wav"
            noisefilename = (
                "noisy" + str(filecounter) + "_SNRdb_" + str(SNR[i]) + ".wav"
            )
            noisypath = os.path.join(noisyspeech_dir, noisyfilename)
            cleanpath = os.path.join(clean_proc_dir, cleanfilename)
            noisepath = os.path.join(noise_proc_dir, noisefilename)
            audiowrite(noisy_snr, fs, noisypath, norm=False)
            audiowrite(clean_snr, fs, cleanpath, norm=False)
            audiowrite(noise_snr, fs, noisepath, norm=False)
            num_samples = num_samples + len(noisy_snr)
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    # Configurations: read noisyspeech_synthesizer.cfg
    parser.add_argument(
        "--cfg",
        default="noisyspeech_synthesizer.cfg",
        help="Read noisyspeech_synthesizer.cfg for all the details",
    )
    parser.add_argument("--cfg_str", type=str, default="noisy_speech")
    args = parser.parse_args()
    cfgpath = os.path.join(os.path.dirname(__file__), args.cfg)
    assert os.path.exists(cfgpath), f"No configuration file as [{cfgpath}]"
    cfg = CP.ConfigParser()
    cfg._interpolation = CP.ExtendedInterpolation()
    cfg.read(cfgpath)
    main(cfg._sections[args.cfg_str])
--- a/pyproject.toml
+++ b/pyproject.toml
@ -2,7 +2,6 @@
 line-length = 80
 target-version = ['py38']
 exclude = '''
 (
  /(
      \.eggs         # exclude a few common directories in the
@ -10,6 +9,9 @@ exclude = '''
    | \.mypy_cache
    | \.tox
    | \.venv
    | noisyspeech_synthesizer.py
    | noisyspeech_synthesizer.cfg
  )/
 )
 '''
--- a/requirements.txt
+++ b/requirements.txt
@ -1,18 +1,19 @@
-boto3>=1.24.86
+# torch>=1.12.1
-huggingface-hub>=0.10.0
+# torchaudio>=0.12.1
-hydra-core>=1.2.0
+# tqdm>=4.64.1
-joblib>=1.2.0
+configparser
-librosa>=0.9.2
+# boto3>=1.24.86
-mlflow>=1.29.0
+# huggingface-hub>=0.10.0
 # hydra-core>=1.2.0
 # joblib>=1.2.0
 # librosa>=0.9.2
 # mlflow>=1.29.0
 numpy>=1.23.3
-pesq==0.0.4
+# pesq==0.0.4
-protobuf>=3.19.6
+# protobuf>=3.19.6
-pystoi==0.3.3
+# pystoi==0.3.3
-pytest-lazy-fixture>=0.6.3
+# pytest-lazy-fixture>=0.6.3
-pytorch-lightning>=1.7.7
+# pytorch-lightning>=1.7.7
-scikit-learn>=1.1.2
+# scikit-learn>=1.1.2
 scipy>=1.9.1
 soundfile>=0.11.0
 torch>=1.12.1
 torchaudio>=0.12.1
 tqdm>=4.64.1
Author	SHA1	Message	Date
shahules786	31cd404e03	rmv mdkir	2022-11-20 19:58:20 +05:30
shahules786	8cf8dd9717	20hrs	2022-11-20 19:56:56 +05:30
shahules786	c2b2b83fd5	20hrs	2022-11-20 19:16:54 +05:30
shahules786	bcb94d5f34	20hrs	2022-11-20 19:11:41 +05:30
shahules786	e0abb458d5	15hrs	2022-11-03 10:40:22 +05:30
shahules786	e322c6280d	15hrs	2022-11-03 09:52:18 +05:30
shahules786	7548647dfb	fix cp	2022-11-02 16:52:58 +05:30
shahules786	1035fbb236	15hrs	2022-11-02 10:50:38 +05:30
shahules786	a06c0a3865	generate test 1hr	2022-10-28 09:55:25 +05:30
shahules786	64f52fe010	redo data	2022-10-27 21:39:33 +05:30
shahules786	078d3eb244	fix num hrs	2022-10-17 15:01:04 +05:30
shahules786	629adf0232	dns 30 hrs demucs	2022-10-17 11:05:55 +05:30
shahules786	501948e866	dns 30 hrs demucs	2022-10-17 11:04:43 +05:30
shahules786	aa61056376	generate 1hrs	2022-10-16 11:44:22 +05:30
shahules786	187e48d125	generate 20hrs	2022-10-16 11:20:01 +05:30
shahules786	7882b8cca3	generate test	2022-10-15 11:46:52 +05:30
shahules786	789da44114	generate test	2022-10-15 11:08:14 +05:30
shahules786	78138c5f93	generate test	2022-10-15 10:13:34 +05:30
shahules786	b8a05c775c	mv files	2022-10-14 16:44:37 +05:30
shahules786	2b68598d7b	print files	2022-10-14 15:23:22 +05:30
shahules786	29a432540e	print files	2022-10-14 15:22:37 +05:30
shahules786	8d1c057b86	changes to prep dns 2020	2022-10-14 15:20:34 +05:30
shahules786	6e0f69f575	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-14 12:46:48 +05:30
shahules786	0e58691a2c	demucs 250	2022-10-14 12:45:34 +05:30
shahules786	807f4b93ea	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-14 12:43:47 +05:30
shahules786	315d646347	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-14 11:32:59 +05:30
shahules786	f34e49e341	WaveUnet	2022-10-14 11:15:16 +05:30
shahules786	fa47860f57	set BS to 256	2022-10-14 11:12:16 +05:30
shahules786	f7eb0a600c	500 epochs	2022-10-14 10:47:20 +05:30
shahules786	ba2d00648c	demucs 100 epochs	2022-10-13 10:57:24 +05:30
shahules786	8a55a77640	run 100 epochs	2022-10-13 10:52:22 +05:30
shahules786	94a4ea38ed	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-13 10:50:59 +05:30
shahules786	8d25b0ed79	reduce epochs	2022-10-12 20:27:05 +05:30
shahules786	09ba645315	fix logging	2022-10-12 20:23:55 +05:30
shahules786	8906496366	waveunet 500 epochs	2022-10-12 10:49:00 +05:30
shahules786	e4a2eb7844	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-12 10:32:52 +05:30
shahules786	8a6af87627	pesq	2022-10-11 21:56:55 +05:30
shahules786	5a392332ba	ensure 2 gpus	2022-10-11 21:56:35 +05:30
shahules786	f66a5236e1	Revert "demucs" This reverts commit `d415bb0c59`.	2022-10-11 21:54:47 +05:30
shahules786	d415bb0c59	demucs	2022-10-11 21:41:19 +05:30
shahules786	8c1524a998	500 epochs	2022-10-11 21:38:27 +05:30
shahules786	7161f84a27	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-11 21:36:59 +05:30
shahules786	2c79e60a85	params	2022-10-11 21:33:19 +05:30
shahules786	41ee2fce0b	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-11 21:30:40 +05:30
shahules786	0c5db496e2	run waveunet	2022-10-11 16:51:41 +05:30
shahules786	031221b79e	merge dev	2022-10-11 16:50:09 +05:30
shahules786	50062eaf40	rmv inplace operation	2022-10-11 15:10:34 +05:30
shahules786	0b02b73094	run demucs 32	2022-10-11 11:12:44 +05:30
shahules786	2ccc2822cd	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-11 11:12:02 +05:30
shahules786	1667de624e	min settings	2022-10-10 21:04:43 +05:30
shahules786	32579b7a39	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-10 21:04:01 +05:30
shahules786	bb68e9e4eb	demucs	2022-10-10 16:48:40 +05:30
shahules786	a21ef707ad	ensure gpu	2022-10-10 15:59:48 +05:30
shahules786	81c5f13ff6	log metric	2022-10-10 15:32:37 +05:30
shahules786	a417e226f3	testrun for metrics	2022-10-10 12:49:41 +05:30
shahules786	5d8f49d78e	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-10 12:48:11 +05:30
shahules786	14156743f9	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-08 11:04:32 +05:30
shahules786	845575a2ad	config	2022-10-08 10:18:22 +05:30
shahules786	c9b78b0e73	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-08 10:12:38 +05:30
shahules786	3068476512	reduce batch_size	2022-10-08 09:59:23 +05:30
shahules786	ffb364196e	increase sr	2022-10-07 11:32:33 +05:30
shahules786	52cefcb962	run demucs	2022-10-07 10:56:14 +05:30
shahules786	61923f6d68	config	2022-10-07 10:46:06 +05:30
shahules786	e90efe3163	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-07 10:43:34 +05:30
shahules786	aa043aaf40	rmv max_steps	2022-10-06 11:52:05 +05:30
shahules786	4f6ccadf4b	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-06 11:49:40 +05:30
shahules786	0e982cd493	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-06 10:33:26 +05:30
shahules786	0787d946da	decrease epochs	2022-10-06 10:21:07 +05:30
shahules786	e06ba07889	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-06 10:19:38 +05:30
shahules786	741fd7b87c	run cli	2022-10-06 09:55:01 +05:30
shahules786	a064151e2e	Merge branch 'dev' of https://github.com/shahules786/enhancer into dev-hawk	2022-10-06 09:54:14 +05:30
shahules786	25557757c7	inc epochs	2022-10-03 21:26:59 +05:30