Yasaman commited on
Commit
97a29e1
·
1 Parent(s): 2387cd8

Delete app.ipynb

Browse files
Files changed (1) hide show
  1. app.ipynb +0 -142
app.ipynb DELETED
@@ -1,142 +0,0 @@
1
- {
2
- "nbformat": 4,
3
- "nbformat_minor": 0,
4
- "metadata": {
5
- "colab": {
6
- "provenance": []
7
- },
8
- "kernelspec": {
9
- "name": "python3",
10
- "display_name": "Python 3"
11
- },
12
- "language_info": {
13
- "name": "python"
14
- }
15
- },
16
- "cells": [
17
- {
18
- "cell_type": "code",
19
- "execution_count": null,
20
- "metadata": {
21
- "id": "kPCLdTfJyktF"
22
- },
23
- "outputs": [],
24
- "source": [
25
- "import torch\n",
26
- "\n",
27
- "import gradio as gr\n",
28
- "import pytube as pt\n",
29
- "from transformers import pipeline\n",
30
- "\n",
31
- "asr = pipeline(\n",
32
- " task=\"automatic-speech-recognition\",\n",
33
- " model=\"Yasaman/whisper_fa\",\n",
34
- " chunk_length_s=30,\n",
35
- " device=\"cpu\",\n",
36
- ")\n",
37
- "\n",
38
- "summarizer = pipeline(\n",
39
- " \"summarization\",\n",
40
- " model=\"alireza7/PEGASUS-persian-base-PN-summary\",\n",
41
- ")\n",
42
- "\n",
43
- "translator = pipeline(\n",
44
- " \"translation\", \n",
45
- " model=\"Helsinki-NLP/opus-mt-iir-en\")\n",
46
- "\n",
47
- "def transcribe(microphone, file_upload):\n",
48
- " warn_output = \"\"\n",
49
- " if (microphone is not None) and (file_upload is not None):\n",
50
- " warn_output = (\n",
51
- " \"WARNING: You've uploaded an audio file and used the microphone. \"\n",
52
- " \"The recorded file from the microphone will be used and the uploaded audio will be discarded.\\n\"\n",
53
- " )\n",
54
- "\n",
55
- " elif (microphone is None) and (file_upload is None):\n",
56
- " return \"ERROR: You have to either use the microphone or upload an audio file\"\n",
57
- "\n",
58
- " file = microphone if microphone is not None else file_upload\n",
59
- "\n",
60
- " text = asr(file)[\"text\"]\n",
61
- "\n",
62
- " translate = translator(text)\n",
63
- " translate = translate[0][\"translation_text\"]\n",
64
- "\n",
65
- " return warn_output + text, translate\n",
66
- "\n",
67
- "def _return_yt_html_embed(yt_url):\n",
68
- " video_id = yt_url.split(\"?v=\")[-1]\n",
69
- " HTML_str = (\n",
70
- " f'<center> <iframe width=\"500\" height=\"320\" src=\"https://www.youtube.com/embed/{video_id}\"> </iframe>'\n",
71
- " \" </center>\"\n",
72
- " )\n",
73
- " return HTML_str\n",
74
- "\n",
75
- "\n",
76
- "def yt_transcribe(yt_url):\n",
77
- " yt = pt.YouTube(yt_url)\n",
78
- " html_embed_str = _return_yt_html_embed(yt_url)\n",
79
- " stream = yt.streams.filter(only_audio=True)[0]\n",
80
- " stream.download(filename=\"audio.mp3\")\n",
81
- "\n",
82
- " text = asr(\"audio.mp3\")[\"text\"]\n",
83
- "\n",
84
- " summary = summarizer(text)\n",
85
- " summary = summary[0][\"summary_text\"]\n",
86
- " \n",
87
- " translate = translator(summary)\n",
88
- " translate = translate[0][\"translation_text\"]\n",
89
- "\n",
90
- " return html_embed_str, text, summary, translate\n",
91
- "\n",
92
- "demo = gr.Blocks()\n",
93
- "\n",
94
- "mf_transcribe = gr.Interface(\n",
95
- " fn=transcribe,\n",
96
- " inputs=[\n",
97
- " gr.inputs.Audio(source=\"microphone\", type=\"filepath\", optional=True),\n",
98
- " gr.inputs.Audio(source=\"upload\", type=\"filepath\", optional=True),\n",
99
- " ],\n",
100
- " outputs=[\n",
101
- " gr.Textbox(label=\"Transcribed text\"),\n",
102
- " gr.Textbox(label=\"Translated text\"),\n",
103
- " ],\n",
104
- " layout=\"horizontal\",\n",
105
- " theme=\"huggingface\",\n",
106
- " title=\"Whisper Demo: Transcribe and Translate Persian Audio\",\n",
107
- " description=(\n",
108
- " \"Transcribe and Translate long-form microphone or audio inputs with the click of a button! Demo uses the the fine-tuned\"\n",
109
- " f\" [Yasaman/whisper_fa](https://huggingface.co/Yasaman/whisper_fa) and 🤗 Transformers to transcribe audio files\"\n",
110
- " \" of arbitrary length. It also uses another model for the translation.\"\n",
111
- " ),\n",
112
- " allow_flagging=\"never\",\n",
113
- ")\n",
114
- "\n",
115
- "yt_transcribe = gr.Interface(\n",
116
- " fn=yt_transcribe,\n",
117
- " inputs=[gr.inputs.Textbox(lines=1, placeholder=\"Paste the URL to a YouTube video here\", label=\"YouTube URL\")],\n",
118
- " outputs=[\"html\",\n",
119
- " gr.Textbox(label=\"Transcribed text\"),\n",
120
- " gr.Textbox(label=\"Summarized text\"),\n",
121
- " gr.Textbox(label=\"Translated text\"),\n",
122
- " ],\n",
123
- " layout=\"horizontal\",\n",
124
- " theme=\"huggingface\",\n",
125
- " title=\"Whisper Demo: Transcribe, Summarize and Translate YouTube\",\n",
126
- " description=(\n",
127
- " \"Transcribe, Summarize and Translate long-form YouTube videos with the click of a button! Demo uses the the fine-tuned \"\n",
128
- " f\" [Yasaman/whisper_fa](https://huggingface.co/Yasaman/whisper_fa) and 🤗 Transformers to transcribe audio files of\"\n",
129
- " \" arbitrary length. It also uses other two models to first summarize and then translate the text input. You can try with the following example: \" \n",
130
- " f\" [Video1](https://www.youtube.com/watch?v=qtRzP3KvQZk)\"\n",
131
- " ),\n",
132
- " allow_flagging=\"never\",\n",
133
- ")\n",
134
- "\n",
135
- "with demo:\n",
136
- " gr.TabbedInterface([mf_transcribe, yt_transcribe], [\"Transcribe and Translate Audio\", \"Transcribe, Summarize and Translate YouTube\"])\n",
137
- "\n",
138
- "demo.launch(enable_queue=True)"
139
- ]
140
- }
141
- ]
142
- }