Spaces:

VinayHajare
/

Marathi-Audio-Transcriber-and-Translator

Runtime error

App Files Files

VinayHajare commited on Aug 5, 2023

Commit

c99e855

1 Parent(s): ae1d5fa

Update app.py

Browse files

Updated the app.py and fix some bugs occuring while transcribing YouTube video

Files changed (1) hide show

app.py +19 -10

app.py CHANGED Viewed

@@ -38,7 +38,6 @@ def format_timestamp(seconds: float, always_include_hours: bool = False, decimal
         # we have a malformed timestamp so just return it as is
         return seconds
 def transcribe(file, task, return_timestamps):
     outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
     text = outputs["text"]
@@ -59,14 +58,20 @@ def _return_yt_html_embed(yt_url):
     )
     return HTML_str
-def yt_transcribe(yt_url):
     yt = pt.YouTube(yt_url)
     html_embed_str = _return_yt_html_embed(yt_url)
     stream = yt.streams.filter(only_audio=True)[0]
     stream.download(filename="audio.mp3")
-    text = pipe("audio.mp3")["text"]
     return html_embed_str, text
 demo = gr.Blocks()
@@ -84,7 +89,7 @@ mic_transcribe = gr.Interface(
     title="Whisper Demo: Transcribe Marathi Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
-        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
         " of arbitrary length."
     ),
     allow_flagging="never",
@@ -103,7 +108,7 @@ file_transcribe = gr.Interface(
     title="Whisper Demo: Transcribe Marathi Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
-        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
         " of arbitrary length."
     ),
     cache_examples=True,
@@ -112,20 +117,24 @@ file_transcribe = gr.Interface(
 yt_transcribe = gr.Interface(
     fn=yt_transcribe,
-    inputs=[gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
     outputs=["html", "text"],
     layout="horizontal",
     theme="huggingface",
     title="Whisper Demo: Transcribe Marathi YouTube Video",
     description=(
         "Transcribe long-form YouTube videos with the click of a button! Demo uses the the fine-tuned checkpoint:"
-        f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of"
         " arbitrary length."
     ),
     allow_flagging="never",
 )
 with demo:
-    gr.TabbedInterface([mic_transcribe, file_transcribe,yt_transcribe], ["Transcribe Microphone", "Transcribe Audio File", "Transcribe YouTube Video"])
 demo.launch(enable_queue=True)

         # we have a malformed timestamp so just return it as is
         return seconds
 def transcribe(file, task, return_timestamps):
     outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
     text = outputs["text"]
     )
     return HTML_str
+def yt_transcribe(yt_url, task, return_timestamps):
     yt = pt.YouTube(yt_url)
     html_embed_str = _return_yt_html_embed(yt_url)
     stream = yt.streams.filter(only_audio=True)[0]
     stream.download(filename="audio.mp3")
+    outputs = pipe("audio.mp3",batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
+    text = outputs["text"]
+    if return_timestamps:
+        timestamps = outputs["chunks"]
+        timestamps = [
+            f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
+            for chunk in timestamps
+        ]
+        text = "\n".join(str(feature) for feature in timestamps)
     return html_embed_str, text
 demo = gr.Blocks()
     title="Whisper Demo: Transcribe Marathi Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
+        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and ðŸ¤— Transformers to transcribe audio files"
         " of arbitrary length."
     ),
     allow_flagging="never",
     title="Whisper Demo: Transcribe Marathi Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
+        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and ðŸ¤— Transformers to transcribe audio files"
         " of arbitrary length."
     ),
     cache_examples=True,
 yt_transcribe = gr.Interface(
     fn=yt_transcribe,
+    inputs=[
+       gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube Video URL"),
+       gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
+       gr.inputs.Checkbox(default=False, label="Return timestamps"),
+    ],
     outputs=["html", "text"],
     layout="horizontal",
     theme="huggingface",
     title="Whisper Demo: Transcribe Marathi YouTube Video",
     description=(
         "Transcribe long-form YouTube videos with the click of a button! Demo uses the the fine-tuned checkpoint:"
+        f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and ðŸ¤— Transformers to transcribe audio files of"
         " arbitrary length."
     ),
     allow_flagging="never",
 )
 with demo:
+    gr.TabbedInterface([mic_transcribe, file_transcribe, yt_transcribe], ["Transcribe Microphone", "Transcribe Audio File", "Transcribe YouTube Video"])
 demo.launch(enable_queue=True)