Spaces:

pratikshahp
/

speech-to-text

Sleeping

pratikshahp commited on Mar 26, 2024

Commit

c520d9a

verified ·

1 Parent(s): a1c0ebf

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,30 +1,34 @@
 import torch
-from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
 import streamlit as st
 from audio_recorder_streamlit import audio_recorder
-audio_bytes = audio_recorder(pause_threshold=3.0, sample_rate=16_000)
-if audio_bytes:
-    st.audio(audio_bytes, format="audio/wav")
-    # Load pre-trained model and tokenizer
-    tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
     model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
-    # Tokenize the audio input
-    input_values = tokenizer(audio_bytes, return_tensors='pt').input_values
-    # Perform inference
     logits = model(input_values).logits
     predicted_ids = torch.argmax(logits, dim=-1)
-    # Decode the audio to generate text
-    transcriptions = tokenizer.decode(predicted_ids[0])
-    if transcriptions is not None:
-        st.write(transcriptions)
     else:
-        st.write("Error: Failed to decode audio.")
 else:
     st.write("No audio recorded.")

 import torch
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import streamlit as st
 from audio_recorder_streamlit import audio_recorder
+# Function to transcribe audio to text
+def transcribe_audio(audio_bytes):
+    processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
     model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
+    input_values = processor(audio_bytes, return_tensors="pt", sampling_rate=16000).input_values
     logits = model(input_values).logits
     predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = processor.decode(predicted_ids[0])
+    return transcription
+# Streamlit app
+st.title("Audio to Text Transcription")
+audio_bytes = audio_recorder(pause_threshold=3.0, sample_rate=16_000)
+if audio_bytes:
+    st.audio(audio_bytes, format="audio/wav")
+    transcription = transcribe_audio(audio_bytes)
+    if transcription:
+        st.write("Transcription:")
+        st.write(transcription)
     else:
+        st.write("Error: Failed to transcribe audio.")
 else:
     st.write("No audio recorded.")