Spaces:

patharanor
/

asr-th

Runtime error

File size: 1,727 Bytes

e6839e8
7fc5b77
e6839e8
79be08a
 
 
2f990e6
 
e6839e8
7fc5b77
 
79be08a
7fc5b77
79be08a
 
 
7fc5b77
 
 
 
 
 
e6839e8
 
79be08a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b3124c
79be08a
 
 
 
e6839e8
79be08a
e6839e8

import gradio as gr
import torch
import numpy as np
from transformers import pipeline
from utils.thai_word import ThaiWord
from pythainlp.tokenize import word_tokenize
from collections import deque
from copy import deepcopy

MODEL_NAME = "biodatlab/whisper-th-medium-combined"
DEVICE = 0 if torch.cuda.is_available() else "cpu"
thw = ThaiWord()

# stride_length_s is a tuple of the left and right stride length.
# With only 1 number, both sides get the same stride, by default
# the stride_length on one side is 1/6th of the chunk_length_s
transcriber = pipeline(
    "automatic-speech-recognition", 
    model=MODEL_NAME,
    chunk_length_s=30,
    device=DEVICE
)

def transcribe(audio):
    result = ''
    try:
        sr, y = audio
        y = y.astype(np.float32)
        y /= np.max(np.abs(y))

        text = transcriber(
            {"sampling_rate": sr, "raw": y},
            generate_kwargs={"language":"<|th|>", "task":"transcribe"},
            return_timestamps=False,
            batch_size=16
        )["text"]

        if text is not None:
            # pretty text
            tokens = word_tokenize(text, engine="attacut", join_broken_num=True)
            print(tokens)
            result = f'pretty: {thw.pretty(deque(deepcopy(tokens)))}\n\n original: {text}' 
        else:
            result = 'โปรดลองพูดอีกครั้ง'
    except Exception as e:
        result = f'ไม่สามารถแปลงข้อความเสียงได้ โปรดลองอีกครั้ง\n\nพบข้อผิดพลาด: {str(e)}'

    return result


demo = gr.Interface(
    transcribe,
    gr.Audio(sources=["microphone"]),
    "text",
)

demo.launch()