Spaces:
Runtime error
Runtime error
Commit
·
c99e855
1
Parent(s):
ae1d5fa
Update app.py
Browse filesUpdated the app.py and fix some bugs occuring while transcribing YouTube video
app.py
CHANGED
@@ -38,7 +38,6 @@ def format_timestamp(seconds: float, always_include_hours: bool = False, decimal
|
|
38 |
# we have a malformed timestamp so just return it as is
|
39 |
return seconds
|
40 |
|
41 |
-
|
42 |
def transcribe(file, task, return_timestamps):
|
43 |
outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
|
44 |
text = outputs["text"]
|
@@ -59,14 +58,20 @@ def _return_yt_html_embed(yt_url):
|
|
59 |
)
|
60 |
return HTML_str
|
61 |
|
62 |
-
def yt_transcribe(yt_url):
|
63 |
yt = pt.YouTube(yt_url)
|
64 |
html_embed_str = _return_yt_html_embed(yt_url)
|
65 |
stream = yt.streams.filter(only_audio=True)[0]
|
66 |
stream.download(filename="audio.mp3")
|
67 |
-
|
68 |
-
text =
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
return html_embed_str, text
|
71 |
|
72 |
demo = gr.Blocks()
|
@@ -84,7 +89,7 @@ mic_transcribe = gr.Interface(
|
|
84 |
title="Whisper Demo: Transcribe Marathi Audio",
|
85 |
description=(
|
86 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
87 |
-
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and
|
88 |
" of arbitrary length."
|
89 |
),
|
90 |
allow_flagging="never",
|
@@ -103,7 +108,7 @@ file_transcribe = gr.Interface(
|
|
103 |
title="Whisper Demo: Transcribe Marathi Audio",
|
104 |
description=(
|
105 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
106 |
-
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and
|
107 |
" of arbitrary length."
|
108 |
),
|
109 |
cache_examples=True,
|
@@ -112,20 +117,24 @@ file_transcribe = gr.Interface(
|
|
112 |
|
113 |
yt_transcribe = gr.Interface(
|
114 |
fn=yt_transcribe,
|
115 |
-
inputs=[
|
|
|
|
|
|
|
|
|
116 |
outputs=["html", "text"],
|
117 |
layout="horizontal",
|
118 |
theme="huggingface",
|
119 |
title="Whisper Demo: Transcribe Marathi YouTube Video",
|
120 |
description=(
|
121 |
"Transcribe long-form YouTube videos with the click of a button! Demo uses the the fine-tuned checkpoint:"
|
122 |
-
f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and
|
123 |
" arbitrary length."
|
124 |
),
|
125 |
allow_flagging="never",
|
126 |
)
|
127 |
|
128 |
with demo:
|
129 |
-
gr.TabbedInterface([mic_transcribe, file_transcribe,yt_transcribe], ["Transcribe Microphone", "Transcribe Audio File", "Transcribe YouTube Video"])
|
130 |
|
131 |
demo.launch(enable_queue=True)
|
|
|
38 |
# we have a malformed timestamp so just return it as is
|
39 |
return seconds
|
40 |
|
|
|
41 |
def transcribe(file, task, return_timestamps):
|
42 |
outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
|
43 |
text = outputs["text"]
|
|
|
58 |
)
|
59 |
return HTML_str
|
60 |
|
61 |
+
def yt_transcribe(yt_url, task, return_timestamps):
|
62 |
yt = pt.YouTube(yt_url)
|
63 |
html_embed_str = _return_yt_html_embed(yt_url)
|
64 |
stream = yt.streams.filter(only_audio=True)[0]
|
65 |
stream.download(filename="audio.mp3")
|
66 |
+
outputs = pipe("audio.mp3",batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
|
67 |
+
text = outputs["text"]
|
68 |
+
if return_timestamps:
|
69 |
+
timestamps = outputs["chunks"]
|
70 |
+
timestamps = [
|
71 |
+
f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
|
72 |
+
for chunk in timestamps
|
73 |
+
]
|
74 |
+
text = "\n".join(str(feature) for feature in timestamps)
|
75 |
return html_embed_str, text
|
76 |
|
77 |
demo = gr.Blocks()
|
|
|
89 |
title="Whisper Demo: Transcribe Marathi Audio",
|
90 |
description=(
|
91 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
92 |
+
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
|
93 |
" of arbitrary length."
|
94 |
),
|
95 |
allow_flagging="never",
|
|
|
108 |
title="Whisper Demo: Transcribe Marathi Audio",
|
109 |
description=(
|
110 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
111 |
+
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
|
112 |
" of arbitrary length."
|
113 |
),
|
114 |
cache_examples=True,
|
|
|
117 |
|
118 |
yt_transcribe = gr.Interface(
|
119 |
fn=yt_transcribe,
|
120 |
+
inputs=[
|
121 |
+
gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube Video URL"),
|
122 |
+
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
|
123 |
+
gr.inputs.Checkbox(default=False, label="Return timestamps"),
|
124 |
+
],
|
125 |
outputs=["html", "text"],
|
126 |
layout="horizontal",
|
127 |
theme="huggingface",
|
128 |
title="Whisper Demo: Transcribe Marathi YouTube Video",
|
129 |
description=(
|
130 |
"Transcribe long-form YouTube videos with the click of a button! Demo uses the the fine-tuned checkpoint:"
|
131 |
+
f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of"
|
132 |
" arbitrary length."
|
133 |
),
|
134 |
allow_flagging="never",
|
135 |
)
|
136 |
|
137 |
with demo:
|
138 |
+
gr.TabbedInterface([mic_transcribe, file_transcribe, yt_transcribe], ["Transcribe Microphone", "Transcribe Audio File", "Transcribe YouTube Video"])
|
139 |
|
140 |
demo.launch(enable_queue=True)
|