vietnamese-pronunciation

Runtime error

App Files Files Community

slapula commited on May 23

Commit

e1a80bb

•

1 Parent(s): ccebb5b

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -10

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ from transformers import pipeline
 from transformers.pipelines.audio_utils import ffmpeg_read
 import gradio as gr
-MODEL_NAME = "openai/whisper-small"
 BATCH_SIZE = 8
 device = 0 if torch.cuda.is_available() else "cpu"
@@ -37,7 +37,7 @@ def format_timestamp(seconds: float, always_include_hours: bool = False, decimal
         return seconds
-def transcribe(file, task, return_timestamps):
     outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
     text = outputs["text"]
     if return_timestamps:
@@ -47,7 +47,11 @@ def transcribe(file, task, return_timestamps):
             for chunk in timestamps
         ]
         text = "\n".join(str(feature) for feature in timestamps)
-    return text
 demo = gr.Blocks()
@@ -56,12 +60,12 @@ mic_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
         gr.inputs.Audio(source="microphone", type="filepath", optional=True),
-        gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
         gr.inputs.Checkbox(default=False, label="Return timestamps"),
     ],
-    outputs="text",
     layout="vertical",
-    theme="monochrome",
     title="Whisper Demo: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
@@ -75,12 +79,12 @@ file_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
         gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
-        gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
         gr.inputs.Checkbox(default=False, label="Return timestamps"),
     ],
-    outputs="text",
     layout="vertical",
-    theme="monochrome",
     title="Whisper Demo: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
@@ -96,7 +100,6 @@ file_transcribe = gr.Interface(
 )
 with demo:
-    gr.Column(scale=1)
     gr.TabbedInterface([mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"])
 demo.launch(enable_queue=True)

 from transformers.pipelines.audio_utils import ffmpeg_read
 import gradio as gr
+MODEL_NAME = "vinai/PhoWhisper-large"
 BATCH_SIZE = 8
 device = 0 if torch.cuda.is_available() else "cpu"
         return seconds
+def transcribe(file, string, return_timestamps):
     outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
     text = outputs["text"]
     if return_timestamps:
             for chunk in timestamps
         ]
         text = "\n".join(str(feature) for feature in timestamps)
+    if text == string:
+        grade = "good!"
+    else:
+        grade = "could use some work..."
+    return text, grade
 demo = gr.Blocks()
     fn=transcribe,
     inputs=[
         gr.inputs.Audio(source="microphone", type="filepath", optional=True),
+        gr.inputs.Textbox(labels="Word/Phrase"),
         gr.inputs.Checkbox(default=False, label="Return timestamps"),
     ],
+    outputs=["text", "grade"],
     layout="vertical",
+    theme="huggingface",
     title="Whisper Demo: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
     fn=transcribe,
     inputs=[
         gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
+        gr.inputs.Textbox(labels="Word/Phrase"),
         gr.inputs.Checkbox(default=False, label="Return timestamps"),
     ],
+    outputs=["text", "grade"],
     layout="vertical",
+    theme="huggingface",
     title="Whisper Demo: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
 )
 with demo:
     gr.TabbedInterface([mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"])
 demo.launch(enable_queue=True)