Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ from transformers import pipeline
|
|
3 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
4 |
import gradio as gr
|
5 |
|
6 |
-
MODEL_NAME = "
|
7 |
BATCH_SIZE = 8
|
8 |
|
9 |
device = 0 if torch.cuda.is_available() else "cpu"
|
@@ -37,7 +37,7 @@ def format_timestamp(seconds: float, always_include_hours: bool = False, decimal
|
|
37 |
return seconds
|
38 |
|
39 |
|
40 |
-
def transcribe(file,
|
41 |
outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
|
42 |
text = outputs["text"]
|
43 |
if return_timestamps:
|
@@ -47,7 +47,11 @@ def transcribe(file, task, return_timestamps):
|
|
47 |
for chunk in timestamps
|
48 |
]
|
49 |
text = "\n".join(str(feature) for feature in timestamps)
|
50 |
-
|
|
|
|
|
|
|
|
|
51 |
|
52 |
|
53 |
demo = gr.Blocks()
|
@@ -56,12 +60,12 @@ mic_transcribe = gr.Interface(
|
|
56 |
fn=transcribe,
|
57 |
inputs=[
|
58 |
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
|
59 |
-
gr.inputs.
|
60 |
gr.inputs.Checkbox(default=False, label="Return timestamps"),
|
61 |
],
|
62 |
-
outputs="text",
|
63 |
layout="vertical",
|
64 |
-
theme="
|
65 |
title="Whisper Demo: Transcribe Audio",
|
66 |
description=(
|
67 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
@@ -75,12 +79,12 @@ file_transcribe = gr.Interface(
|
|
75 |
fn=transcribe,
|
76 |
inputs=[
|
77 |
gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
|
78 |
-
gr.inputs.
|
79 |
gr.inputs.Checkbox(default=False, label="Return timestamps"),
|
80 |
],
|
81 |
-
outputs="text",
|
82 |
layout="vertical",
|
83 |
-
theme="
|
84 |
title="Whisper Demo: Transcribe Audio",
|
85 |
description=(
|
86 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
@@ -96,7 +100,6 @@ file_transcribe = gr.Interface(
|
|
96 |
)
|
97 |
|
98 |
with demo:
|
99 |
-
gr.Column(scale=1)
|
100 |
gr.TabbedInterface([mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"])
|
101 |
|
102 |
demo.launch(enable_queue=True)
|
|
|
3 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
4 |
import gradio as gr
|
5 |
|
6 |
+
MODEL_NAME = "vinai/PhoWhisper-large"
|
7 |
BATCH_SIZE = 8
|
8 |
|
9 |
device = 0 if torch.cuda.is_available() else "cpu"
|
|
|
37 |
return seconds
|
38 |
|
39 |
|
40 |
+
def transcribe(file, string, return_timestamps):
|
41 |
outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
|
42 |
text = outputs["text"]
|
43 |
if return_timestamps:
|
|
|
47 |
for chunk in timestamps
|
48 |
]
|
49 |
text = "\n".join(str(feature) for feature in timestamps)
|
50 |
+
if text == string:
|
51 |
+
grade = "good!"
|
52 |
+
else:
|
53 |
+
grade = "could use some work..."
|
54 |
+
return text, grade
|
55 |
|
56 |
|
57 |
demo = gr.Blocks()
|
|
|
60 |
fn=transcribe,
|
61 |
inputs=[
|
62 |
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
|
63 |
+
gr.inputs.Textbox(labels="Word/Phrase"),
|
64 |
gr.inputs.Checkbox(default=False, label="Return timestamps"),
|
65 |
],
|
66 |
+
outputs=["text", "grade"],
|
67 |
layout="vertical",
|
68 |
+
theme="huggingface",
|
69 |
title="Whisper Demo: Transcribe Audio",
|
70 |
description=(
|
71 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
|
|
79 |
fn=transcribe,
|
80 |
inputs=[
|
81 |
gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
|
82 |
+
gr.inputs.Textbox(labels="Word/Phrase"),
|
83 |
gr.inputs.Checkbox(default=False, label="Return timestamps"),
|
84 |
],
|
85 |
+
outputs=["text", "grade"],
|
86 |
layout="vertical",
|
87 |
+
theme="huggingface",
|
88 |
title="Whisper Demo: Transcribe Audio",
|
89 |
description=(
|
90 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
|
|
100 |
)
|
101 |
|
102 |
with demo:
|
|
|
103 |
gr.TabbedInterface([mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"])
|
104 |
|
105 |
demo.launch(enable_queue=True)
|