Spaces:

speechbox
/

whisper-restore-punctuation

Running

File size: 1,022 Bytes

7fb9ab5
84d2d73
8026eab
 
7fb9ab5
8026eab
7fb9ab5
 
8026eab
 
 
 
 
 
 
 
 
7fb9ab5
 
8026eab
 
 
84d2d73
8026eab
8e7e341
8026eab
8e7e341
8026eab
7fb9ab5
8026eab
 
 
 
 
eb85cb3
8026eab
 
 
 
84d2d73

from speechbox import PunctuationRestorer
import librosa
import subprocess
import gradio as gr

restorer = PunctuationRestorer.from_pretrained("openai/whisper-tiny.en")


def convert_to_wav(path):
    if path[-3:] != 'wav':
        new_path = '.'.join(path.split('.')[:-1]) + '.wav'
    try:
        subprocess.call(['ffmpeg', '-i', path, new_path, '-y'])
    except:  # noqa: E722
        return path, 'Error: Could not convert file to .wav'
    path = new_path
    return path, None


def restore(audio, original_transcript):
    path, error = convert_to_wav(audio)
    print(error)
    data, samplerate = librosa.load(path, sr=16_000)

    text, _ = restorer(data, original_transcript, samplerate, num_beams=1)

    return text


gr.Interface(
    title='Punctuation Restorer',
    fn=restore,
    inputs=[
        gr.inputs.Audio(source="upload", type="filepath"),
        gr.inputs.Textbox(default="", label="normalized text")
    ],
    outputs=[
        gr.outputs.Textbox(label='Restored text'),
    ]
  ).launch()