File size: 2,599 Bytes
5cb9c90 57d9268 5cb9c90 9ecefd1 57d9268 5cb9c90 d3490a8 103d57b 5cb9c90 27c943a 103d57b 57d9268 9ecefd1 57d9268 103d57b 57d9268 103d57b 57d9268 103d57b 57d9268 9ecefd1 5cb9c90 d3490a8 e41a956 d3490a8 e41a956 98c06b0 d199239 d4123fe d199239 98c06b0 1561f9d 94ef5c0 98c06b0 1e836f3 1561f9d 94ef5c0 1561f9d 57d9268 b8c0ef3 2fdad2f 94ef5c0 2fdad2f 94ef5c0 b8c0ef3 94ef5c0 b8c0ef3 57d9268 5cb9c90 57d9268 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# coding=utf-8
import gradio as gr
import numpy as np
import soundfile as sf
import spaces
import torch
import torchaudio
from gradio.themes import Base
from sv import process_audio
@spaces.GPU
def model_inference(input_wav, language):
# Simplify language selection
language = language if language else "auto"
# Handle input_wav format
if isinstance(input_wav, tuple):
fs, input_wav = input_wav
input_wav = input_wav.astype(np.float32) / np.iinfo(np.int16).max
input_wav = input_wav.mean(-1) if len(input_wav.shape) > 1 else input_wav
if fs != 16000:
resampler = torchaudio.transforms.Resample(fs, 16000)
input_wav = resampler(torch.from_numpy(input_wav).float()[None, :])[
0
].numpy()
# Process audio
with sf.SoundFile("temp.wav", "w", samplerate=16000, channels=1) as f:
f.write(input_wav)
result = process_audio("temp.wav", language=language)
return result
def launch():
# Create a custom theme
custom_css = """
.gradio-container {color: rgb(70, 70, 70);}
"""
with gr.Blocks(css=custom_css) as demo:
gr.Markdown("# Cantonese Call Transcriber")
gr.Markdown(
"""
This tool transcribes Cantonese audio calls into text.
## How to use:
1. Upload an audio file or use the example provided at the bottom of the page.
2. Click the 'Process Audio' button.
3. The transcription will appear in the output box.
"""
)
# Define components
audio_input = gr.Audio(label="Input")
text_output = gr.Textbox(lines=10, label="Output")
# Custom render function for Examples
def render_example(example):
return gr.Button("Try Example Audio")
# Update the Examples component
gr.Examples(
examples=[["example/scb.mp3"]],
inputs=[audio_input],
outputs=[text_output],
fn=lambda x: model_inference(x, "yue"),
examples_per_page=1,
)
# Main interface
with gr.Row():
with gr.Column(scale=2):
audio_input
fn_button = gr.Button("Process Audio", variant="primary")
with gr.Column(scale=3):
text_output
# Set up event handler
fn_button.click(
fn=lambda x: model_inference(x, "yue"),
inputs=[audio_input],
outputs=[text_output],
)
demo.launch()
if __name__ == "__main__":
launch()
|