Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from faster_whisper import WhisperModel | |
import pandas as pd | |
model_size = "large-v2" | |
# get device | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
if device == "cuda:0": | |
# Run on GPU with FP16 | |
model_whisper = WhisperModel(model_size, device="cuda", compute_type="float16") | |
# or Run on GPU with INT8 | |
# model = WhisperModel(model_size, device="cuda", compute_type="int8_float16") | |
else: | |
# Run on CPU with INT8 | |
model_whisper = WhisperModel(model_size, device="cpu", compute_type="int8") | |
def get_filename(file_obj): | |
return file_obj.name.split("/")[-1] | |
def audio_to_transcript(file_obj): | |
# get all audio segments | |
try: | |
filename = get_filename(file_obj) | |
segments, _ = model_whisper.transcribe(file_obj.name, beam_size=5, vad_filter=True) | |
except: | |
filename = file_obj.split("/")[-1] | |
segments, _ = model_whisper.transcribe(file_obj, beam_size=5, vad_filter=True) | |
start_segments, end_segments, text_segments = list(), list(), list() | |
for segment in segments: | |
start, end, text = segment.start, segment.end, segment.text | |
start_segments.append(start) | |
end_segments.append(end) | |
text_segments.append(text) | |
# save transcript into csv | |
df = pd.DataFrame() | |
df["start"] = start_segments | |
df["end"] = end_segments | |
df["text"] = text_segments | |
csv_file = filename.split(".")[0] + ".csv" | |
df.to_csv(csv_file, encoding="utf-8", index=False) | |
path_to_csv = gr.File.update(value=csv_file, visible=True) | |
return filename, path_to_csv, df | |
## Gradio interface | |
headers = ["start", "end", "text"] | |
iface = gr.Interface(fn=audio_to_transcript, | |
inputs=gr.File(label="Audio file"), | |
outputs=[ | |
gr.Textbox(label="Audio file name"), | |
gr.File(label="Transcript csv file"), | |
gr.DataFrame(label="Transcript", headers=headers), | |
], | |
allow_flagging="never", | |
title="Audio to Transcript", | |
description="Just paste any audio file and get its corresponding transcript with timeline.", | |
) | |
iface.launch() |