From 90fbfbce73752c392dbb2b89ec2aea6e2ffdb55c Mon Sep 17 00:00:00 2001 From: shahules786 Date: Tue, 15 Nov 2022 21:39:18 +0530 Subject: [PATCH] examples --- notebooks/Getting_started.ipynb | 324 +++++++++++++++++++++++++------- 1 file changed, 260 insertions(+), 64 deletions(-) diff --git a/notebooks/Getting_started.ipynb b/notebooks/Getting_started.ipynb index c9a47dd..b25b51f 100644 --- a/notebooks/Getting_started.ipynb +++ b/notebooks/Getting_started.ipynb @@ -30,6 +30,17 @@ "! pip install -q mayavoz " ] }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e3b59ac5", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.chdir(\"/Users/shahules/Myprojects/enhancer\")" + ] + }, { "cell_type": "markdown", "id": "87ee497f", @@ -62,14 +73,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "67698871", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/enhancer/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "\n", "from mayavoz import Mayamodel\n", - "model = Mayamodel.from_pretrained(\"mayavoz/waveunet\")\n" + "model = Mayamodel.from_pretrained(\"shahules786/mayavoz-dccrn-valentini-28spk\")\n" ] }, { @@ -82,13 +102,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "d7996c16", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([1, 1, 36414])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "file = \"myvoice.wav\"\n", - "audio = model.enhance(\"myvoice.wav\")\n", + "audio = model.enhance(\"my_voice.wav\")\n", "audio.shape" ] }, @@ -96,19 +126,84 @@ "cell_type": "markdown", "id": "8ee20a83", "metadata": {}, + "source": [ + "**Inference using numpy ndarray**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "e1a1c718", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(36414,)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import torch\n", + "from librosa import load\n", + "my_voice,sr = load(\"my_voice.wav\",sr=16000)\n", + "my_voice.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "56b5c01b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1, 1, 36414)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "audio = model.enhance(my_voice,sampling_rate=sr)\n", + "audio.shape" + ] + }, + { + "cell_type": "markdown", + "id": "e0ab4d43", + "metadata": {}, "source": [ "**Inference using torch tensor**\n" ] }, { "cell_type": "code", - "execution_count": null, - "id": "e1a1c718", + "execution_count": 22, + "id": "fc6192b9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([1, 1, 36414])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "audio_tensor = torch.rand(1,1,32000) ## random audio data\n", - "audio = model.enhance(audio_tensor)\n", + "my_voice = torch.from_numpy(my_voice)\n", + "audio = model.enhance(my_voice,sampling_rate=sr)\n", "audio.shape" ] }, @@ -122,24 +217,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "9e0313f7", "metadata": {}, "outputs": [], "source": [ - "audio = model.enhance(\"myvoice.wav\",save_output=True)" + "audio = model.enhance(\"my_voice.wav\",save_output=True)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "25077720", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "from Ipython.audio import Audio\n", - "\n", - "Audio(\"myvoice_cleaned.wav\",rate=SAMPLING_RATE)" + "from IPython.display import Audio\n", + "SAMPLING_RATE = 16000\n", + "Audio(\"cleaned_my_voice.wav\",rate=SAMPLING_RATE)" ] }, { @@ -183,19 +297,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "2c8c2b12", "metadata": {}, "outputs": [], "source": [ "from mayavoz.utils import Files\n", "\n", - "name = \"dataset_name\"\n", - "root_dir = \"root_directory_of_your_dataset\"\n", - "files = Files(train_clean=\"train_cleanfiles_foldername\",\n", - " train_noisy=\"noisy_train_foldername\",\n", - " test_clean=\"clean_test_foldername\",\n", - " test_noisy=\"noisy_test_foldername\")\n", + "name = \"valentini\"\n", + "root_dir = \"/Users/shahules/Myprojects/enhancer/datasets/vctk\"\n", + "files = Files(train_clean=\"clean_testset_wav\",\n", + " train_noisy=\"clean_testset_wav\",\n", + " test_clean=\"noisy_testset_wav\",\n", + " test_noisy=\"noisy_testset_wav\")\n", "duration = 4.0 \n", "stride = None\n", "sampling_rate = 16000" @@ -207,13 +321,13 @@ "metadata": {}, "source": [ "Now there are two types of `matching_function`\n", - "- `one_to_one` : In this one clean file will only have one corresponding noisy file. For example VCTK datasets\n", + "- `one_to_one` : In this one clean file will only have one corresponding noisy file. For example Valentini datasets\n", "- `one_to_many` : In this one clean file will only have one corresponding noisy file. For example DNS dataset." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "4b0fdc62", "metadata": {}, "outputs": [], @@ -223,25 +337,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "ff0cfe60", "metadata": {}, "outputs": [], "source": [ - "from mayavoz.dataset import MayaDataset\n", + "from mayavoz.data import MayaDataset\n", "dataset = MayaDataset(\n", " name=name,\n", " root_dir=root_dir,\n", " files=files,\n", " duration=duration,\n", " stride=stride,\n", - " sampling_rate=sampling_rate\n", + " sampling_rate=sampling_rate,\n", + " min_valid_minutes = 5.0,\n", " )\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "acfdc655", "metadata": {}, "outputs": [], @@ -252,7 +367,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 12, "id": "4fabe46d", "metadata": {}, "outputs": [], @@ -262,13 +377,91 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "20d98ed0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Selected fp257 for valid\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + " | Name | Type | Params\n", + "----------------------------------------\n", + "0 | _loss | LossWrapper | 0 \n", + "1 | encoder | ModuleList | 4.7 M \n", + "2 | decoder | ModuleList | 4.7 M \n", + "3 | de_lstm | DemucsLSTM | 24.8 M\n", + "----------------------------------------\n", + "34.2 M Trainable params\n", + "0 Non-trainable params\n", + "34.2 M Total params\n", + "136.866 Total estimated model params size (MB)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total train duration 27.4 minutes\n", + "Total validation duration 29.733333333333334 minutes\n", + "Total test duration 57.2 minutes\n", + "Epoch 0: 48%|▍| 13/27 [15:18<16:29, 70.66s/it, loss=0.0265, v_num=2, train_loss\n", + "Validation: 0it [00:00, ?it/s]\u001b[A\n", + "Validation: 0%| | 0/14 [00:00