Spaces:
Runtime error
Runtime error
File size: 1,195 Bytes
38229d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import streamlit as st
from transformers import Wav2Vec2ForCTC, Wav2Vec2ProcessorWithLM
import torch
import torchaudio
import torchaudio.functional as F
st.set_page_config(
page_title='Swedish Speech-to-Text',
page_icon='🎙️'
)
# Import model and processor
model_name = 'viktor-enzell/wav2vec2-large-voxrex-swedish-4gram'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Wav2Vec2ForCTC.from_pretrained(model_name).to(device)
processor = Wav2Vec2ProcessorWithLM.from_pretrained(model_name)
def run_inference(file):
waveform, sample_rate = torchaudio.load(file)
if sample_rate == 16_000:
waveform = waveform[0]
else:
waveform = F.resample(waveform, sample_rate, 16_000)[0]
inputs = processor(
waveform,
sampling_rate=16_000,
return_tensors='pt',
padding=True
).to(device)
with torch.no_grad():
logits = model(**inputs).logits
return processor.batch_decode(logits.cpu().numpy()).text[0].lower()
uploaded_file = st.file_uploader('Choose a file', type=['.wav'])
if uploaded_file is not None:
transcript = run_inference(uploaded_file)
st.write(transcript)
|