{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": {}, "colab_type": "code", "id": "iyLoWDsb9rEs" }, "outputs": [], "source": [ "# unzip the audio files from commom voice dataset with Turkish language and Portuguese language\n", "#! tar -xf data/cv-corpus-15.0-2023-09-08-pt.tar.gz\n", "#! tar -xf data/cv-corpus-15.0-2023-09-08-tr.tar.gz" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/User/en_tr_pt_titanet_large\n" ] } ], "source": [ "# Convert the mp3 files to wav files with 16kHz sampling rate and 16 bits, 1 channel\n", "import os\n", "NEMO_ROOT = os.getcwd()\n", "print(NEMO_ROOT)\n", "import glob\n", "import subprocess\n", "\n", "data_dir = os.path.join(NEMO_ROOT,'data')\n", "#os.makedirs(data_dir, exist_ok=True)\n", "\n", "#print(\"Converting .mp3 to .wav...\")\n", "#mp3_list = glob.glob(data_dir + '/cv-corpus-15.0-2023-09-08/pt/clips/*.mp3', recursive=True)\n", "#for mp3_path in mp3_list:\n", "# wav_path = mp3_path[:-4] + '.wav'\n", "# cmd = [\"sox\", mp3_path, \"--rate\", \"16k\", \"--bits\", \"16\", \"--channels\", \"1\", wav_path]\n", "# subprocess.run(cmd)\n", "#print(\"Finished conversion.\\n******\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "#print(\"Converting .mp3 to .wav...\")\n", "#mp3_list = glob.glob(data_dir + '/cv-corpus-15.0-2023-09-08/tr/clips/*.mp3', recursive=True)\n", "#for mp3_path in mp3_list:\n", "# wav_path = mp3_path[:-4] + '.wav'\n", "# cmd = [\"sox\", mp3_path, \"--rate\", \"16k\", \"--bits\", \"16\", \"--channels\", \"1\", wav_path]\n", "# subprocess.run(cmd)\n", "#print(\"Finished conversion.\\n******\")" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": {}, "colab_type": "code", "id": "vqUBayc_Ctcr" }, "outputs": [], "source": [ "# prepare the train, dev, test dataset for Portuguese language\n", "import pandas as pd\n", "import os\n", "\n", "#pt_duration_df = pd.read_csv('data/cv-corpus-15.0-2023-09-08/pt/clip_durations.tsv', sep='\\t')\n", "#pt_train_df = pd.read_csv('data/cv-corpus-15.0-2023-09-08/pt/train.tsv', sep='\\t')\n", "#pt_dev_df = pd.read_csv('data/cv-corpus-15.0-2023-09-08/pt/dev.tsv', sep='\\t')\n", "#pt_test_df = pd.read_csv('data/cv-corpus-15.0-2023-09-08/pt/test.tsv', sep='\\t')\n", "\n", "#merged_pt_train_df = pd.merge(pt_train_df, pt_duration_df, left_on='path', right_on='clip', how='left')[['path', 'duration[ms]', 'client_id']].rename(columns={'duration[ms]': 'duration', 'client_id': 'label'})\n", "#merged_pt_dev_df = pd.merge(pt_dev_df, pt_duration_df, left_on='path', right_on='clip', how='left')[['path', 'duration[ms]', 'client_id']].rename(columns={'duration[ms]': 'duration', 'client_id': 'label'})\n", "#merged_pt_test_df = pd.merge(pt_test_df, pt_duration_df, left_on='path', right_on='clip', how='left')[['path', 'duration[ms]', 'client_id']].rename(columns={'duration[ms]': 'duration', 'client_id': 'label'})" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "#merged_pt_train_df['audio_filepath'] = merged_pt_train_df['path'].apply(lambda x: os.path.join('/Users/Peng_Wei/work/mlrun_related/en_tr_pt_titanet_large/data/cv-corpus-15.0-2023-09-08/pt/clips', x))\n", "#merged_pt_dev_df['audio_filepath'] = merged_pt_dev_df['path'].apply(lambda x: os.path.join('/Users/Peng_Wei/work/mlrun_related/en_tr_pt_titanet_large/data/cv-corpus-15.0-2023-09-08/pt/clips', x))\n", "#merged_pt_test_df['audio_filepath'] = merged_pt_test_df['path'].apply(lambda x: os.path.join('/Users/Peng_Wei/work/mlrun_related/en_tr_pt_titanet_large/data/cv-corpus-15.0-2023-09-08/pt/clips', x))\n", "\n", "#merged_pt_train_df[\"audio_filepath\"] = merged_pt_train_df[\"audio_filepath\"].str.replace(\".mp3\", \".wav\")\n", "#merged_pt_dev_df[\"audio_filepath\"] = merged_pt_dev_df[\"audio_filepath\"].str.replace(\".mp3\", \".wav\")\n", "#merged_pt_test_df[\"audio_filepath\"] = merged_pt_test_df[\"audio_filepath\"].str.replace(\".mp3\", \".wav\")\n", "\n", "#merged_pt_train_df['duration'] = merged_pt_train_df['duration'].apply(lambda x: x / 1000)\n", "#merged_pt_dev_df['duration'] = merged_pt_dev_df['duration'].apply(lambda x: x / 1000)\n", "#merged_pt_test_df['duration'] = merged_pt_test_df['duration'].apply(lambda x: x / 1000)\n", "\n", "#merged_pt_train_df = merged_pt_train_df[['audio_filepath', 'duration', 'label']]\n", "#merged_pt_dev_df = merged_pt_dev_df[['audio_filepath', 'duration', 'label']]\n", "#merged_pt_test_df = merged_pt_test_df[['audio_filepath', 'duration', 'label']]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | audio_filepath | \n", "duration | \n", "label | \n", "
---|---|---|---|
0 | \n", "/Users/Peng_Wei/work/mlrun_related/en_tr_pt_ti... | \n", "6.504 | \n", "c1b7c535717cd09b0e3e9de74b0382d810b266e47091d8... | \n", "
1 | \n", "/Users/Peng_Wei/work/mlrun_related/en_tr_pt_ti... | \n", "4.656 | \n", "c1b7c535717cd09b0e3e9de74b0382d810b266e47091d8... | \n", "
2 | \n", "/Users/Peng_Wei/work/mlrun_related/en_tr_pt_ti... | \n", "3.504 | \n", "c1b7c535717cd09b0e3e9de74b0382d810b266e47091d8... | \n", "
3 | \n", "/Users/Peng_Wei/work/mlrun_related/en_tr_pt_ti... | \n", "3.456 | \n", "c1b7c535717cd09b0e3e9de74b0382d810b266e47091d8... | \n", "
4 | \n", "/Users/Peng_Wei/work/mlrun_related/en_tr_pt_ti... | \n", "4.224 | \n", "c1b7c535717cd09b0e3e9de74b0382d810b266e47091d8... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
21052 | \n", "/Users/Peng_Wei/work/mlrun_related/en_tr_pt_ti... | \n", "4.860 | \n", "d8288aee86a2a6a3ab6f3e8d4028ef097b51698b2f7392... | \n", "
21053 | \n", "/Users/Peng_Wei/work/mlrun_related/en_tr_pt_ti... | \n", "2.196 | \n", "d8288aee86a2a6a3ab6f3e8d4028ef097b51698b2f7392... | \n", "
21054 | \n", "/Users/Peng_Wei/work/mlrun_related/en_tr_pt_ti... | \n", "2.124 | \n", "d8288aee86a2a6a3ab6f3e8d4028ef097b51698b2f7392... | \n", "
21055 | \n", "/Users/Peng_Wei/work/mlrun_related/en_tr_pt_ti... | \n", "1.908 | \n", "d8288aee86a2a6a3ab6f3e8d4028ef097b51698b2f7392... | \n", "
21056 | \n", "/Users/Peng_Wei/work/mlrun_related/en_tr_pt_ti... | \n", "5.436 | \n", "d8288aee86a2a6a3ab6f3e8d4028ef097b51698b2f7392... | \n", "
21057 rows × 3 columns
\n", "