Merge pull request #27 from shahules786/dev

Rename dataset
This commit is contained in:
Shahul ES 2022-11-24 12:30:23 +05:30 committed by GitHub
commit f2111321bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
37 changed files with 69 additions and 140 deletions

View File

@ -30,13 +30,15 @@ model("noisy_audio.wav")
| Model | Dataset | STOI | PESQ | URL | | Model | Dataset | STOI | PESQ | URL |
| :---: | :---: | :---: | :---: | :---: | | :---: | :---: | :---: | :---: | :---: |
| WaveUnet | Vctk-28spk | 0.836 | 2.78 | shahules786/mayavoz-waveunet-valentini-28spk | | WaveUnet | Valentini-28spk | 0.836 | 2.78 | shahules786/mayavoz-waveunet-valentini-28spk |
| Demucs | Vctk-28spk | 0.961 | 2.56 | shahules786/mayavoz-demucs-valentini-28spk | | Demucs | Valentini-28spk | 0.961 | 2.56 | shahules786/mayavoz-demucs-valentini-28spk |
| DCCRN | Vctk-28spk | 0.724 | 2.55 | shahules786/mayavoz-dccrn-valentini-28spk | | DCCRN | Valentini-28spk | 0.724 | 2.55 | shahules786/mayavoz-dccrn-valentini-28spk |
| Demucs | DNS2020 (20hrs) | 0.56 | 1.26 | shahules786/mayavoz-demucs-dns2020-20hr | | Demucs | MS-SNSD-20hrs | 0.56 | 1.26 | shahules786/mayavoz-demucs-ms-snsd-20 |
Test scores are based on respective test set associated with train dataset. Test scores are based on respective test set associated with train dataset.
**See [tutorials](/notebooks/) to train your custom model**
## Installation ## Installation
Only Python 3.8+ is officially supported (though it might work with Python 3.7) Only Python 3.8+ is officially supported (though it might work with Python 3.7)

View File

@ -1,5 +1,5 @@
_target_: mayavoz.data.dataset.MayaDataset _target_: mayavoz.data.dataset.MayaDataset
name : dns-2020 name : MS-SDSD
root_dir : /Users/shahules/Myprojects/MS-SNSD root_dir : /Users/shahules/Myprojects/MS-SNSD
duration : 2.0 duration : 2.0
sampling_rate: 16000 sampling_rate: 16000

View File

@ -1,5 +1,5 @@
_target_: mayavoz.data.dataset.MayaDataset _target_: mayavoz.data.dataset.MayaDataset
name : vctk name : Valentini
root_dir : /scratch/c.sistc3/DS_10283_2791 root_dir : /scratch/c.sistc3/DS_10283_2791
duration : 4.5 duration : 4.5
stride : 2 stride : 2

View File

@ -1,13 +0,0 @@
_target_: mayavoz.data.dataset.MayaDataset
name : vctk
root_dir : /scratch/c.sistc3/DS_10283_2791
duration : 4.5
stride : 2
sampling_rate: 16000
batch_size: 32
valid_minutes : 15
files:
train_clean : clean_trainset_28spk_wav
test_clean : clean_testset_wav
train_noisy : noisy_trainset_28spk_wav
test_noisy : noisy_testset_wav

View File

@ -95,7 +95,7 @@ class Fileprocessor:
if matching_function is None: if matching_function is None:
if name.lower() in ("vctk", "valentini"): if name.lower() in ("vctk", "valentini"):
return cls(clean_dir, noisy_dir, ProcessorFunctions.one_to_one) return cls(clean_dir, noisy_dir, ProcessorFunctions.one_to_one)
elif name.lower() == "dns-2020": elif name.lower() == "ms-snsd":
return cls(clean_dir, noisy_dir, ProcessorFunctions.one_to_many) return cls(clean_dir, noisy_dir, ProcessorFunctions.one_to_many)
else: else:
raise ValueError( raise ValueError(

View File

@ -82,7 +82,7 @@
"- `min_valid_minutes`: minimum validation in minutes. Validation is automatically selected from training set. (exclusive users).\n", "- `min_valid_minutes`: minimum validation in minutes. Validation is automatically selected from training set. (exclusive users).\n",
"- `matching_function`: there are two types of mapping functions.\n", "- `matching_function`: there are two types of mapping functions.\n",
" - `one_to_one` : In this one clean file will only have one corresponding noisy file. For example Valentini datasets\n", " - `one_to_one` : In this one clean file will only have one corresponding noisy file. For example Valentini datasets\n",
" - `one_to_many` : In this one clean file will only have one corresponding noisy file. For example DNS dataset.\n" " - `one_to_many` : In this one clean file will only have one corresponding noisy file. For example MS-SNSD dataset.\n"
] ]
}, },
{ {

View File

@ -302,7 +302,7 @@
"source": [ "source": [
"Now there are two types of `matching_function`\n", "Now there are two types of `matching_function`\n",
"- `one_to_one` : In this one clean file will only have one corresponding noisy file. For example Valentini datasets\n", "- `one_to_one` : In this one clean file will only have one corresponding noisy file. For example Valentini datasets\n",
"- `one_to_many` : In this one clean file will only have one corresponding noisy file. For example DNS dataset." "- `one_to_many` : In this one clean file will only have one corresponding noisy file. For example MS-SNSD dataset."
] ]
}, },
{ {
@ -479,7 +479,7 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"### TL;DR\n", "### TL;DR\n",
"Calling the following command would train mayavoz Demucs model on DNS-2020 dataset.\n", "Calling the following command would train mayavoz Demucs model on MS-SNSD dataset.\n",
"\n", "\n",
"```bash\n", "```bash\n",
"mayavoz-train \\\n", "mayavoz-train \\\n",
@ -540,7 +540,7 @@
"mayavoz-train --cfg job \\\n", "mayavoz-train --cfg job \\\n",
" model=Demucs \\\n", " model=Demucs \\\n",
" Demucs.sampling_rate=16000 \\\n", " Demucs.sampling_rate=16000 \\\n",
" dataset=DNS-2020\n", " dataset=MS-SNSD\n",
"\n", "\n",
"```\n", "```\n",
"\n", "\n",
@ -562,7 +562,7 @@
"```bash\n", "```bash\n",
"mayavoz-train \\\n", "mayavoz-train \\\n",
" model=Demucs model.sampling_rate=16000 \\\n", " model=Demucs model.sampling_rate=16000 \\\n",
" dataset=DNS-2020\n", " dataset=MS-SNSD\n",
"\n", "\n",
"```" "```"
] ]
@ -570,9 +570,9 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "enhancer", "display_name": "Python 3.8.13 ('enhancer')",
"language": "python", "language": "python",
"name": "enhancer" "name": "python3"
}, },
"language_info": { "language_info": {
"codemirror_mode": { "codemirror_mode": {

View File

@ -1,15 +0,0 @@
### DNS Challenge's dataset
The Deep Noise Suppression (DNS) Challenge is a single-channel speech enhancement
challenge organized by Microsoft, with a focus on real-time applications.
More info can be found on the [official page](https://dns-challenge.azurewebsites.net/).
**References**
The challenge paper, [here](https://arxiv.org/abs/2001.08662).
```BibTex
@misc{DNSChallenge2020,
title={The INTERSPEECH 2020 Deep Noise Suppression Challenge: Datasets, Subjective Speech Quality and Testing Framework},
author={Chandan K. A. Reddy and Ebrahim Beyrami and Harishchandra Dubey and Vishak Gopal and Roger Cheng and Ross Cutler and Sergiy Matusevych and Robert Aichner and Ashkan Aazami and Sebastian Braun and Puneet Rana and Sriram Srinivasan and Johannes Gehrke}, year={2020},
doi=https://doi.org/10.48550/arXiv.2001.08662,
}
```

View File

@ -1,13 +0,0 @@
_target_: mayavoz.data.dataset.MayaDataset
name : vctk
root_dir : /scratch/c.sistc3/DS_10283_2791
duration : 4.5
stride : 2
sampling_rate: 16000
batch_size: 32
valid_minutes : 15
files:
train_clean : clean_trainset_28spk_wav
test_clean : clean_testset_wav
train_noisy : noisy_trainset_28spk_wav
test_noisy : noisy_testset_wav

View File

@ -1,7 +0,0 @@
loss : mae
metric : [stoi,pesq,si-sdr]
lr : 0.0003
ReduceLr_patience : 5
ReduceLr_factor : 0.2
min_lr : 0.000001
EarlyStopping_factor : 10

View File

@ -1,5 +0,0 @@
_target_: mayavoz.models.waveunet.WaveUnet
num_channels : 1
depth : 9
initial_output_channels: 24
sampling_rate : 16000

View File

@ -19,9 +19,9 @@ JOB_ID = os.environ.get("SLURM_JOBID", "0")
@hydra.main(config_path="train_config", config_name="config") @hydra.main(config_path="train_config", config_name="config")
def main(config: DictConfig): def train(config: DictConfig):
OmegaConf.save(config, "config_log.yaml") OmegaConf.save(config, "config.yaml")
callbacks = [] callbacks = []
logger = MLFlowLogger( logger = MLFlowLogger(
@ -96,7 +96,7 @@ def main(config: DictConfig):
trainer.test(model) trainer.test(model)
logger.experiment.log_artifact( logger.experiment.log_artifact(
logger.run_id, f"{trainer.default_root_dir}/config_log.yaml" logger.run_id, f"{trainer.default_root_dir}/config.yaml"
) )
saved_location = os.path.join( saved_location = os.path.join(
@ -117,4 +117,4 @@ def main(config: DictConfig):
if __name__ == "__main__": if __name__ == "__main__":
main() train()

View File

@ -1,6 +1,6 @@
defaults: defaults:
- model : Demucs - model : Demucs
- dataset : Vctk - dataset : MS-SNSD
- optimizer : Adam - optimizer : Adam
- hyperparameters : default - hyperparameters : default
- trainer : default - trainer : default

View File

@ -1,10 +1,11 @@
_target_: mayavoz.data.dataset.MayaDataset _target_: mayavoz.data.dataset.MayaDataset
name : MS-SDSD
root_dir : /Users/shahules/Myprojects/MS-SNSD root_dir : /Users/shahules/Myprojects/MS-SNSD
name : dns-2020 duration : 1.5
duration : 2.0 stride : 1
sampling_rate: 16000 sampling_rate: 16000
batch_size: 32 batch_size: 32
valid_size: 0.05 min_valid_minutes: 25
files: files:
train_clean : CleanSpeech_training train_clean : CleanSpeech_training
test_clean : CleanSpeech_training test_clean : CleanSpeech_training

View File

@ -0,0 +1,7 @@
loss : si-snr
metric : [stoi,pesq]
lr : 0.001
ReduceLr_patience : 10
ReduceLr_factor : 0.5
min_lr : 0.000001
EarlyStopping_factor : 10

View File

@ -19,9 +19,9 @@ JOB_ID = os.environ.get("SLURM_JOBID", "0")
@hydra.main(config_path="train_config", config_name="config") @hydra.main(config_path="train_config", config_name="config")
def main(config: DictConfig): def train(config: DictConfig):
OmegaConf.save(config, "config_log.yaml") OmegaConf.save(config, "config.yaml")
callbacks = [] callbacks = []
logger = MLFlowLogger( logger = MLFlowLogger(
@ -96,7 +96,7 @@ def main(config: DictConfig):
trainer.test(model) trainer.test(model)
logger.experiment.log_artifact( logger.experiment.log_artifact(
logger.run_id, f"{trainer.default_root_dir}/config_log.yaml" logger.run_id, f"{trainer.default_root_dir}/config.yaml"
) )
saved_location = os.path.join( saved_location = os.path.join(
@ -117,4 +117,4 @@ def main(config: DictConfig):
if __name__ == "__main__": if __name__ == "__main__":
main() train()

View File

@ -1,6 +1,6 @@
defaults: defaults:
- model : Demucs - model : Demucs
- dataset : Vctk - dataset : MS-SNSD
- optimizer : Adam - optimizer : Adam
- hyperparameters : default - hyperparameters : default
- trainer : default - trainer : default

View File

@ -1,10 +1,11 @@
_target_: mayavoz.data.dataset.MayaDataset _target_: mayavoz.data.dataset.MayaDataset
name : MS-SDSD
root_dir : /Users/shahules/Myprojects/MS-SNSD root_dir : /Users/shahules/Myprojects/MS-SNSD
name : dns-2020 duration : 5
duration : 2.0 stride : 1
sampling_rate: 16000 sampling_rate: 16000
batch_size: 32 batch_size: 32
valid_size: 0.05 min_valid_minutes: 25
files: files:
train_clean : CleanSpeech_training train_clean : CleanSpeech_training
test_clean : CleanSpeech_training test_clean : CleanSpeech_training

View File

@ -0,0 +1,7 @@
loss : mae
metric : [stoi,pesq]
lr : 0.0003
ReduceLr_patience : 10
ReduceLr_factor : 0.5
min_lr : 0.000001
EarlyStopping_factor : 10

View File

@ -0,0 +1,2 @@
experiment_name : shahules/mayavoz
run_name : demucs-ms-snsd

View File

@ -0,0 +1,17 @@
### Microsoft Scalable Noisy Speech Dataset (MS-SNSD)
MS-SNSD is a speech datasetthat can scale to arbitrary sizes depending on the number of speakers, noise types, and Speech to Noise Ratio (SNR) levels desired.
### Dataset download & setup
- Follow steps in the official repo [here](https://github.com/microsoft/MS-SNSD) to download and setup the dataset.
**References**
```BibTex
@article{reddy2019scalable,
title={A Scalable Noisy Speech Dataset and Online Subjective Test Framework},
author={Reddy, Chandan KA and Beyrami, Ebrahim and Pool, Jamie and Cutler, Ross and Srinivasan, Sriram and Gehrke, Johannes},
journal={Proc. Interspeech 2019},
pages={1816--1820},
year={2019}
}
```

View File

@ -1,7 +0,0 @@
loss : mae
metric : [stoi,pesq,si-sdr]
lr : 0.0003
ReduceLr_patience : 5
ReduceLr_factor : 0.2
min_lr : 0.000001
EarlyStopping_factor : 10

View File

@ -1,2 +0,0 @@
experiment_name : shahules/mayavoz
run_name : Demucs + Vtck with stride + augmentations

View File

@ -1,25 +0,0 @@
_target_: mayavoz.models.dccrn.DCCRN
num_channels: 1
sampling_rate : 16000
complex_lstm : True
complex_norm : True
complex_relu : True
masking_mode : True
encoder_decoder:
initial_output_channels : 32
depth : 6
kernel_size : 5
growth_factor : 2
stride : 2
padding : 2
output_padding : 1
lstm:
num_layers : 2
hidden_size : 256
stft:
window_len : 400
hop_size : 100
nfft : 512

View File

@ -1,16 +0,0 @@
_target_: mayavoz.models.demucs.Demucs
num_channels: 1
resample: 4
sampling_rate : 16000
encoder_decoder:
depth: 4
initial_output_channels: 64
kernel_size: 8
stride: 4
growth_factor: 2
glu: True
lstm:
bidirectional: False
num_layers: 2

View File

@ -1,5 +0,0 @@
_target_: mayavoz.models.waveunet.WaveUnet
num_channels : 1
depth : 9
initial_output_channels: 24
sampling_rate : 16000

View File

@ -4,7 +4,7 @@ Clean and noisy parallel speech database. The database was designed to train and
**References** **References**
```BibTex ```BibTex
@misc{DNSChallenge2020, @misc{
title={Noisy speech database for training speech enhancement algorithms and TTS models}, title={Noisy speech database for training speech enhancement algorithms and TTS models},
author={Valentini-Botinhao, Cassia}, year={2017}, author={Valentini-Botinhao, Cassia}, year={2017},
doi=https://doi.org/10.7488/ds/2117, doi=https://doi.org/10.7488/ds/2117,

View File

@ -36,7 +36,7 @@ def test_fileprocessor_vctk():
assert len(matching_dict) == 2 assert len(matching_dict) == 2
@pytest.mark.parametrize("dataset_name", ["vctk", "dns-2020"]) @pytest.mark.parametrize("dataset_name", ["vctk", "MS-SNSD"])
def test_fileprocessor_names(dataset_name): def test_fileprocessor_names(dataset_name):
fp = Fileprocessor.from_name(dataset_name, "clean_dir", "noisy_dir") fp = Fileprocessor.from_name(dataset_name, "clean_dir", "noisy_dir")
assert hasattr(fp.matching_function, "__call__") assert hasattr(fp.matching_function, "__call__")