Merge pull request #27 from shahules786/dev

Rename dataset
2022-11-24 12:30:23 +05:30 · 2022-11-24 12:30:23 +05:30 · f2111321bf
parent dd27de7467 25139d7d3f
commit f2111321bf
37 changed files with 69 additions and 140 deletions
--- a/README.md
+++ b/README.md
@ -30,13 +30,15 @@ model("noisy_audio.wav")
 | Model     | Dataset      | STOI    | PESQ  | URL                           |
 | :---:     |  :---:       | :---:   | :---: | :---:                         |
-| WaveUnet  | Vctk-28spk   | 0.836   | 2.78  |  shahules786/mayavoz-waveunet-valentini-28spk      |
+| WaveUnet  | Valentini-28spk   | 0.836   | 2.78  |  shahules786/mayavoz-waveunet-valentini-28spk      |
-| Demucs    | Vctk-28spk   | 0.961   | 2.56  |  shahules786/mayavoz-demucs-valentini-28spk       |
+| Demucs    | Valentini-28spk   | 0.961   | 2.56  |  shahules786/mayavoz-demucs-valentini-28spk       |
-| DCCRN     | Vctk-28spk   | 0.724   | 2.55  |  shahules786/mayavoz-dccrn-valentini-28spk         |
+| DCCRN     | Valentini-28spk   | 0.724   | 2.55  |  shahules786/mayavoz-dccrn-valentini-28spk         |
-| Demucs     | DNS2020 (20hrs)  | 0.56 | 1.26  |  shahules786/mayavoz-demucs-dns2020-20hr        |
+| Demucs     | MS-SNSD-20hrs  | 0.56 | 1.26  | shahules786/mayavoz-demucs-ms-snsd-20       |
 Test scores are based on respective test set associated with train dataset.
 **See [tutorials](/notebooks/) to train your custom model**
 ## Installation
 Only Python 3.8+ is officially supported (though it might work with Python 3.7)
--- a/mayavoz/cli/train_config/dataset/DNS-2020.yaml
+++ b/mayavoz/cli/train_config/dataset/DNS-2020.yaml
@ -1,5 +1,5 @@
 _target_: mayavoz.data.dataset.MayaDataset
-name : dns-2020
+name : MS-SDSD
 root_dir : /Users/shahules/Myprojects/MS-SNSD
 duration : 2.0
 sampling_rate: 16000
--- a/recipes/Valentini-dataset/28spk/cli/train_config/dataset/Vctk.yaml
+++ b/recipes/Valentini-dataset/28spk/cli/train_config/dataset/Vctk.yaml
@ -1,5 +1,5 @@
 _target_: mayavoz.data.dataset.MayaDataset
-name : vctk
+name : Valentini
 root_dir : /scratch/c.sistc3/DS_10283_2791
 duration : 4.5
 stride : 2
--- a/mayavoz/cli/train_config/dataset/Vctk.yaml
+++ b/mayavoz/cli/train_config/dataset/Vctk.yaml
@ -1,13 +0,0 @@
 _target_: mayavoz.data.dataset.MayaDataset
 name : vctk
 root_dir : /scratch/c.sistc3/DS_10283_2791
 duration : 4.5
 stride : 2
 sampling_rate: 16000
 batch_size: 32
 valid_minutes : 15
 files:
  train_clean : clean_trainset_28spk_wav
  test_clean : clean_testset_wav
  train_noisy : noisy_trainset_28spk_wav
  test_noisy : noisy_testset_wav
--- a/mayavoz/data/fileprocessor.py
+++ b/mayavoz/data/fileprocessor.py
@ -95,7 +95,7 @@ class Fileprocessor:
        if matching_function is None:
            if name.lower() in ("vctk", "valentini"):
                return cls(clean_dir, noisy_dir, ProcessorFunctions.one_to_one)
-            elif name.lower() == "dns-2020":
+            elif name.lower() == "ms-snsd":
                return cls(clean_dir, noisy_dir, ProcessorFunctions.one_to_many)
            else:
                raise ValueError(
--- a/notebooks/Custom_model_training.ipynb
+++ b/notebooks/Custom_model_training.ipynb
@ -82,7 +82,7 @@
    "- `min_valid_minutes`: minimum validation in minutes. Validation is automatically selected from training set. (exclusive users).\n",
    "- `matching_function`: there are two types of mapping functions.\n",
    "    - `one_to_one` : In this one clean file will only have one corresponding noisy file. For example Valentini datasets\n",
-    "    - `one_to_many` : In this one clean file will only have one corresponding noisy file. For example DNS dataset.\n"
+    "    - `one_to_many` : In this one clean file will only have one corresponding noisy file. For example MS-SNSD dataset.\n"
   ]
  },
  {
--- a/notebooks/Getting_started.ipynb
+++ b/notebooks/Getting_started.ipynb
@ -302,7 +302,7 @@
   "source": [
    "Now there are two types of `matching_function`\n",
    "- `one_to_one` : In this one clean file will only have one corresponding noisy file. For example Valentini datasets\n",
-    "- `one_to_many` : In this one clean file will only have one corresponding noisy file. For example DNS dataset."
+    "- `one_to_many` : In this one clean file will only have one corresponding noisy file. For example MS-SNSD dataset."
   ]
  },
  {
@ -479,7 +479,7 @@
   "metadata": {},
   "source": [
    "### TL;DR\n",
-    "Calling the following command would train mayavoz Demucs model on DNS-2020 dataset.\n",
+    "Calling the following command would train mayavoz Demucs model on MS-SNSD dataset.\n",
    "\n",
    "```bash\n",
    "mayavoz-train \\\n",
@ -540,7 +540,7 @@
    "mayavoz-train --cfg job \\\n",
    "    model=Demucs \\\n",
    "    Demucs.sampling_rate=16000 \\\n",
-    "    dataset=DNS-2020\n",
+    "    dataset=MS-SNSD\n",
    "\n",
    "```\n",
    "\n",
@ -562,7 +562,7 @@
    "```bash\n",
    "mayavoz-train \\\n",
    "    model=Demucs model.sampling_rate=16000 \\\n",
-    "    dataset=DNS-2020\n",
+    "    dataset=MS-SNSD\n",
    "\n",
    "```"
   ]
@ -570,9 +570,9 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "enhancer",
+   "display_name": "Python 3.8.13 ('enhancer')",
   "language": "python",
-   "name": "enhancer"
+   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
--- a/recipes/DNS/DNS-2020/README.md
+++ b/recipes/DNS/DNS-2020/README.md
@ -1,15 +0,0 @@
 ### DNS Challenge's dataset
 The Deep Noise Suppression (DNS) Challenge is a single-channel speech enhancement
 challenge organized by Microsoft, with a focus on real-time applications.
 More info can be found on the [official page](https://dns-challenge.azurewebsites.net/).
 **References**
 The challenge paper, [here](https://arxiv.org/abs/2001.08662).
 ```BibTex
@misc{DNSChallenge2020,
 title={The INTERSPEECH 2020 Deep Noise Suppression Challenge: Datasets, Subjective Speech Quality and Testing Framework},
 author={Chandan K. A. Reddy and Ebrahim Beyrami and Harishchandra Dubey and Vishak Gopal and Roger Cheng and Ross Cutler and Sergiy Matusevych and Robert Aichner and Ashkan Aazami and Sebastian Braun and Puneet Rana and Sriram Srinivasan and Johannes Gehrke}, year={2020},
 doi=https://doi.org/10.48550/arXiv.2001.08662,
 }
 ```
--- a/recipes/DNS/DNS-2020/cli/train_config/dataset/Vctk.yaml
+++ b/recipes/DNS/DNS-2020/cli/train_config/dataset/Vctk.yaml
@ -1,13 +0,0 @@
 _target_: mayavoz.data.dataset.MayaDataset
 name : vctk
 root_dir : /scratch/c.sistc3/DS_10283_2791
 duration : 4.5
 stride : 2
 sampling_rate: 16000
 batch_size: 32
 valid_minutes : 15
 files:
  train_clean : clean_trainset_28spk_wav
  test_clean : clean_testset_wav
  train_noisy : noisy_trainset_28spk_wav
  test_noisy : noisy_testset_wav
--- a/recipes/DNS/DNS-2020/cli/train_config/hyperparameters/default.yaml
+++ b/recipes/DNS/DNS-2020/cli/train_config/hyperparameters/default.yaml
@ -1,7 +0,0 @@
 loss : mae
 metric : [stoi,pesq,si-sdr]
 lr : 0.0003
 ReduceLr_patience : 5
 ReduceLr_factor : 0.2
 min_lr : 0.000001
 EarlyStopping_factor : 10
--- a/recipes/DNS/DNS-2020/cli/train_config/model/WaveUnet.yaml
+++ b/recipes/DNS/DNS-2020/cli/train_config/model/WaveUnet.yaml
@ -1,5 +0,0 @@
 _target_: mayavoz.models.waveunet.WaveUnet
 num_channels : 1
 depth : 9
 initial_output_channels: 24
 sampling_rate : 16000
--- a/recipes/Valentini-dataset/28spk/cli/train.py
+++ b/recipes/Valentini-dataset/28spk/cli/train.py
@ -19,9 +19,9 @@ JOB_ID = os.environ.get("SLURM_JOBID", "0")
@hydra.main(config_path="train_config", config_name="config")
-def main(config: DictConfig):
+def train(config: DictConfig):
-    OmegaConf.save(config, "config_log.yaml")
+    OmegaConf.save(config, "config.yaml")
    callbacks = []
    logger = MLFlowLogger(
@ -96,7 +96,7 @@ def main(config: DictConfig):
    trainer.test(model)
    logger.experiment.log_artifact(
-        logger.run_id, f"{trainer.default_root_dir}/config_log.yaml"
+        logger.run_id, f"{trainer.default_root_dir}/config.yaml"
    )
    saved_location = os.path.join(
@ -117,4 +117,4 @@ def main(config: DictConfig):
 if __name__ == "__main__":
-    main()
+    train()
--- a/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/config.yaml
+++ b/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/config.yaml
@ -1,6 +1,6 @@
 defaults:
  - model : Demucs
-  - dataset : Vctk
+  - dataset : MS-SNSD
  - optimizer : Adam
  - hyperparameters : default
  - trainer : default
--- a/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/dataset/MS-SNSD.yaml
+++ b/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/dataset/MS-SNSD.yaml
@ -1,10 +1,11 @@
 _target_: mayavoz.data.dataset.MayaDataset
 name : MS-SDSD
 root_dir : /Users/shahules/Myprojects/MS-SNSD
-name : dns-2020
+duration : 1.5
-duration : 2.0
+stride : 1
 sampling_rate: 16000
 batch_size: 32
-valid_size: 0.05
+min_valid_minutes: 25
 files:
  train_clean : CleanSpeech_training
  test_clean : CleanSpeech_training
--- a/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/hyperparameters/default.yaml
+++ b/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/hyperparameters/default.yaml
@ -0,0 +1,7 @@
 loss : si-snr
 metric : [stoi,pesq]
 lr : 0.001
 ReduceLr_patience : 10
 ReduceLr_factor : 0.5
 min_lr : 0.000001
 EarlyStopping_factor : 10
--- a/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/mlflow/experiment.yaml
+++ b/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/mlflow/experiment.yaml
--- a/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/model/DCCRN.yaml
+++ b/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/model/DCCRN.yaml
--- a/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/optimizer/Adam.yaml
+++ b/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/optimizer/Adam.yaml
--- a/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/trainer/default.yaml
+++ b/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/trainer/default.yaml
--- a/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/trainer/fastrun_dev.yaml
+++ b/recipes/MS-SNSD/MS-SNSD-20/DCCRN/train_config/trainer/fastrun_dev.yaml
--- a/recipes/MS-SNSD/MS-SNSD-20/Demucs/train.py
+++ b/recipes/MS-SNSD/MS-SNSD-20/Demucs/train.py
@ -19,9 +19,9 @@ JOB_ID = os.environ.get("SLURM_JOBID", "0")
@hydra.main(config_path="train_config", config_name="config")
-def main(config: DictConfig):
+def train(config: DictConfig):
-    OmegaConf.save(config, "config_log.yaml")
+    OmegaConf.save(config, "config.yaml")
    callbacks = []
    logger = MLFlowLogger(
@ -96,7 +96,7 @@ def main(config: DictConfig):
    trainer.test(model)
    logger.experiment.log_artifact(
-        logger.run_id, f"{trainer.default_root_dir}/config_log.yaml"
+        logger.run_id, f"{trainer.default_root_dir}/config.yaml"
    )
    saved_location = os.path.join(
@ -117,4 +117,4 @@ def main(config: DictConfig):
 if __name__ == "__main__":
-    main()
+    train()
--- a/recipes/Valentini-dataset/28spk/cli/train_config/config.yaml
+++ b/recipes/Valentini-dataset/28spk/cli/train_config/config.yaml
@ -1,6 +1,6 @@
 defaults:
  - model : Demucs
-  - dataset : Vctk
+  - dataset : MS-SNSD
  - optimizer : Adam
  - hyperparameters : default
  - trainer : default
--- a/recipes/Valentini-dataset/28spk/cli/train_config/dataset/DNS-2020.yaml
+++ b/recipes/Valentini-dataset/28spk/cli/train_config/dataset/DNS-2020.yaml
@ -1,10 +1,11 @@
 _target_: mayavoz.data.dataset.MayaDataset
 name : MS-SDSD
 root_dir : /Users/shahules/Myprojects/MS-SNSD
-name : dns-2020
+duration : 5
-duration : 2.0
+stride : 1
 sampling_rate: 16000
 batch_size: 32
-valid_size: 0.05
+min_valid_minutes: 25
 files:
  train_clean : CleanSpeech_training
  test_clean : CleanSpeech_training
--- a/recipes/MS-SNSD/MS-SNSD-20/Demucs/train_config/hyperparameters/default.yaml
+++ b/recipes/MS-SNSD/MS-SNSD-20/Demucs/train_config/hyperparameters/default.yaml
@ -0,0 +1,7 @@
 loss : mae
 metric : [stoi,pesq]
 lr : 0.0003
 ReduceLr_patience : 10
 ReduceLr_factor : 0.5
 min_lr : 0.000001
 EarlyStopping_factor : 10
--- a/recipes/MS-SNSD/MS-SNSD-20/Demucs/train_config/mlflow/experiment.yaml
+++ b/recipes/MS-SNSD/MS-SNSD-20/Demucs/train_config/mlflow/experiment.yaml
@ -0,0 +1,2 @@
 experiment_name : shahules/mayavoz
 run_name : demucs-ms-snsd
--- a/recipes/MS-SNSD/MS-SNSD-20/Demucs/train_config/model/Demucs.yaml
+++ b/recipes/MS-SNSD/MS-SNSD-20/Demucs/train_config/model/Demucs.yaml
--- a/recipes/Valentini-dataset/28spk/cli/train_config/optimizer/Adam.yaml
+++ b/recipes/Valentini-dataset/28spk/cli/train_config/optimizer/Adam.yaml
--- a/recipes/Valentini-dataset/28spk/cli/train_config/trainer/default.yaml
+++ b/recipes/Valentini-dataset/28spk/cli/train_config/trainer/default.yaml
--- a/recipes/Valentini-dataset/28spk/cli/train_config/trainer/fastrun_dev.yaml
+++ b/recipes/Valentini-dataset/28spk/cli/train_config/trainer/fastrun_dev.yaml
--- a/recipes/MS-SNSD/MS-SNSD-20/README.md
+++ b/recipes/MS-SNSD/MS-SNSD-20/README.md
@ -0,0 +1,17 @@
 ### Microsoft Scalable Noisy Speech Dataset (MS-SNSD)
 MS-SNSD is a speech datasetthat can scale to arbitrary sizes depending on the number of speakers, noise types, and Speech to Noise Ratio (SNR) levels desired.
 ### Dataset download & setup
 - Follow steps in the official repo [here](https://github.com/microsoft/MS-SNSD) to download and setup the dataset.
 **References**
 ```BibTex
@article{reddy2019scalable,
  title={A Scalable Noisy Speech Dataset and Online Subjective Test Framework},
  author={Reddy, Chandan KA and Beyrami, Ebrahim and Pool, Jamie and Cutler, Ross and Srinivasan, Sriram and Gehrke, Johannes},
  journal={Proc. Interspeech 2019},
  pages={1816--1820},
  year={2019}
 }
 ```
--- a/recipes/Valentini-dataset/28spk/cli/train_config/hyperparameters/default.yaml
+++ b/recipes/Valentini-dataset/28spk/cli/train_config/hyperparameters/default.yaml
@ -1,7 +0,0 @@
 loss : mae
 metric : [stoi,pesq,si-sdr]
 lr : 0.0003
 ReduceLr_patience : 5
 ReduceLr_factor : 0.2
 min_lr : 0.000001
 EarlyStopping_factor : 10
--- a/recipes/Valentini-dataset/28spk/cli/train_config/mlflow/experiment.yaml
+++ b/recipes/Valentini-dataset/28spk/cli/train_config/mlflow/experiment.yaml
@ -1,2 +0,0 @@
 experiment_name : shahules/mayavoz
 run_name : Demucs + Vtck with stride + augmentations
--- a/recipes/Valentini-dataset/28spk/cli/train_config/model/DCCRN.yaml
+++ b/recipes/Valentini-dataset/28spk/cli/train_config/model/DCCRN.yaml
@ -1,25 +0,0 @@
 _target_: mayavoz.models.dccrn.DCCRN
 num_channels: 1
 sampling_rate : 16000
 complex_lstm : True
 complex_norm : True
 complex_relu : True
 masking_mode : True
 encoder_decoder:
  initial_output_channels : 32
  depth : 6
  kernel_size : 5
  growth_factor : 2
  stride : 2
  padding : 2
  output_padding : 1
 lstm:
  num_layers : 2
  hidden_size : 256
 stft:
  window_len : 400
  hop_size : 100
  nfft : 512
--- a/recipes/Valentini-dataset/28spk/cli/train_config/model/Demucs.yaml
+++ b/recipes/Valentini-dataset/28spk/cli/train_config/model/Demucs.yaml
@ -1,16 +0,0 @@
 _target_: mayavoz.models.demucs.Demucs
 num_channels: 1
 resample: 4
 sampling_rate : 16000
 encoder_decoder:
  depth: 4
  initial_output_channels: 64
  kernel_size: 8
  stride: 4
  growth_factor: 2
  glu: True
 lstm:
  bidirectional: False
  num_layers: 2
--- a/recipes/Valentini-dataset/28spk/cli/train_config/model/WaveUnet.yaml
+++ b/recipes/Valentini-dataset/28spk/cli/train_config/model/WaveUnet.yaml
@ -1,5 +0,0 @@
 _target_: mayavoz.models.waveunet.WaveUnet
 num_channels : 1
 depth : 9
 initial_output_channels: 24
 sampling_rate : 16000
--- a/recipes/Valentini-dataset/README.md
+++ b/recipes/Valentini-dataset/README.md
@ -4,7 +4,7 @@ Clean and noisy parallel speech database. The database was designed to train and
 **References**
 ```BibTex
-@misc{DNSChallenge2020,
+@misc{
 title={Noisy speech database for training speech enhancement algorithms and TTS models},
 author={Valentini-Botinhao, Cassia}, year={2017},
 doi=https://doi.org/10.7488/ds/2117,
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@ -36,7 +36,7 @@ def test_fileprocessor_vctk():
    assert len(matching_dict) == 2
-@pytest.mark.parametrize("dataset_name", ["vctk", "dns-2020"])
+@pytest.mark.parametrize("dataset_name", ["vctk", "MS-SNSD"])
 def test_fileprocessor_names(dataset_name):
    fp = Fileprocessor.from_name(dataset_name, "clean_dir", "noisy_dir")
    assert hasattr(fp.matching_function, "__call__")
		`@ -0,0 +1,2 @@`
							`experiment_name : shahules/mayavoz`
							`run_name : demucs-ms-snsd`
		`@ -1,2 +0,0 @@`
			`experiment_name : shahules/mayavoz`
			`run_name : Demucs + Vtck with stride + augmentations`