{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "dc710b2d90564aea92472ad151453444": { "model_module": "@jupyter-widgets/controls", "model_name": "VBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "VBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "VBoxView", "box_style": "", "children": [], "layout": "IPY_MODEL_86af62717c2f4d51b4a59d73f71be1fe" } }, "83ac947298704da296eff1ea8a7fd6ad": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_623925336fed4ce784c8915a8592f642", "placeholder": "", "style": "IPY_MODEL_0c287eb0e7674b56b1c3799e68ade613", "value": "
Step | \n", "Training Loss | \n", "Validation Loss | \n", "
---|---|---|
1000 | \n", "0.526500 | \n", "0.485982 | \n", "
2000 | \n", "0.502900 | \n", "0.474044 | \n", "
3000 | \n", "0.498400 | \n", "0.472146 | \n", "
4000 | \n", "0.496500 | \n", "0.470817 | \n", "
"
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py:2817: UserWarning: Moving the following attributes in the config to the generation config: {'max_length': 1876}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config.\n",
" warnings.warn(\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"TrainOutput(global_step=4000, training_loss=0.5268438875675201, metrics={'train_runtime': 7569.18, 'train_samples_per_second': 16.911, 'train_steps_per_second': 0.528, 'total_flos': 1.7291685751808688e+16, 'train_loss': 0.5268438875675201, 'epoch': 46.44412191582003})"
]
},
"metadata": {},
"execution_count": 50
}
]
},
{
"cell_type": "code",
"source": [
"trainer.push_to_hub()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 106
},
"id": "CDoKUQCZrwAf",
"outputId": "a887161a-725b-426b-b953-91c159477bb0"
},
"execution_count": 51,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"CommitInfo(commit_url='https://huggingface.co/ahk-d/speecht5_finetuned_voxpopuli_de/commit/f8d4ca958006e881a2228df5089e9a184063bc63', commit_message='End of training', commit_description='', oid='f8d4ca958006e881a2228df5089e9a184063bc63', pr_url=None, repo_url=RepoUrl('https://huggingface.co/ahk-d/speecht5_finetuned_voxpopuli_de', endpoint='https://huggingface.co', repo_type='model', repo_id='ahk-d/speecht5_finetuned_voxpopuli_de'), pr_revision=None, pr_num=None)"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 51
}
]
},
{
"cell_type": "code",
"source": [
"def generate_speech_with_random_speaker(text):\n",
" dataset = sliced_dataset['test']\n",
" processor = SpeechT5Processor.from_pretrained(\"microsoft/speecht5_tts\")\n",
" model = SpeechT5ForTextToSpeech.from_pretrained(\"ahk-d/speecht5_finetuned_voxpopuli_de\")\n",
" vocoder = SpeechT5HifiGan.from_pretrained(\"microsoft/speecht5_hifigan\")\n",
" random_index = random.randint(0, len(dataset) - 1)\n",
" example = dataset[random_index]\n",
" speaker_embeddings = torch.tensor(example[\"speaker_embeddings\"]).unsqueeze(0)\n",
"\n",
" inputs = processor(text=text, return_tensors=\"pt\")\n",
" speech = model.generate_speech(inputs[\"input_ids\"], speaker_embeddings, vocoder=vocoder)\n",
"\n",
" return IAudio(speech.numpy(), rate=16000)"
],
"metadata": {
"id": "JKkb2e32zJCt"
},
"execution_count": 88,
"outputs": []
},
{
"cell_type": "code",
"source": [
"generate_speech_with_random_speaker('das ist nicht meine katze')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 75
},
"id": "UiATG1WVzXBN",
"outputId": "3c7ba93b-3386-47ac-8a53-b3213dc92ac6"
},
"execution_count": 89,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"