viktor-enzell commited on
Commit
4dce433
1 Parent(s): 091b848

Caching inference function.

Browse files
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -40,13 +40,18 @@ class ASR:
40
  return self.processor.batch_decode(logits.cpu().numpy()).text[0].lower()
41
 
42
 
43
- @st.cache(allow_output_mutation=True, show_spinner=True)
44
  def load_model():
45
  asr = ASR()
46
  asr.load_model()
47
  return asr
48
 
49
 
 
 
 
 
 
50
  if __name__ == "__main__":
51
  st.set_page_config(
52
  page_title="Swedish Speech-to-Text",
@@ -57,12 +62,13 @@ if __name__ == "__main__":
57
  width=100,
58
  )
59
  st.markdown("""
60
- # Swedish high-quality transcription
61
 
62
- Generate Swedish transcripts for download from an audio file with this high-quality speech-to-text model. The model is KBLab's wav2vec 2.0 large VoxRex Swedish (C) with a 4-gram language model, which you can access [here](https://huggingface.co/viktor-enzell/wav2vec2-large-voxrex-swedish-4gram).
63
  """)
64
 
65
- asr = load_model()
 
66
 
67
  uploaded_file = st.file_uploader("Choose a file", type=[".wav"])
68
  if uploaded_file is not None:
@@ -74,7 +80,8 @@ if __name__ == "__main__":
74
  # audio_output = ffmpeg.output(audio_input, "tmp.wav", format="wav")
75
  # ffmpeg.run(audio_output)
76
 
77
- transcript = asr.run_inference(uploaded_file)
 
78
 
79
  st.download_button("Download transcript", transcript, "transcript.txt")
80
 
 
40
  return self.processor.batch_decode(logits.cpu().numpy()).text[0].lower()
41
 
42
 
43
+ @st.cache(allow_output_mutation=True, show_spinner=False)
44
  def load_model():
45
  asr = ASR()
46
  asr.load_model()
47
  return asr
48
 
49
 
50
+ @st.cache(allow_output_mutation=True, hash_funcs={ASR: lambda _: None}, show_spinner=False)
51
+ def run_inference(asr, file):
52
+ return asr.run_inference(file)
53
+
54
+
55
  if __name__ == "__main__":
56
  st.set_page_config(
57
  page_title="Swedish Speech-to-Text",
 
62
  width=100,
63
  )
64
  st.markdown("""
65
+ # Swedish Speech-to-text
66
 
67
+ Generate and download high-quality Swedish transcripts for your audio files. The speech-to-text model is KBLab's wav2vec 2.0 large VoxRex Swedish (C) with a 4-gram language model, which you can access [here](https://huggingface.co/viktor-enzell/wav2vec2-large-voxrex-swedish-4gram).
68
  """)
69
 
70
+ with st.spinner(text="Loading model..."):
71
+ asr = load_model()
72
 
73
  uploaded_file = st.file_uploader("Choose a file", type=[".wav"])
74
  if uploaded_file is not None:
 
80
  # audio_output = ffmpeg.output(audio_input, "tmp.wav", format="wav")
81
  # ffmpeg.run(audio_output)
82
 
83
+ with st.spinner(text="Transcribing..."):
84
+ transcript = run_inference(asr, uploaded_file)
85
 
86
  st.download_button("Download transcript", transcript, "transcript.txt")
87