Spaces:
Sleeping
Sleeping
File size: 2,864 Bytes
b3991f9 3dd86d3 b3991f9 3dd86d3 b3991f9 3dd86d3 1e507fc 7ac3886 ab03346 3dd86d3 d5bd0f2 3dd86d3 21ae366 3dd86d3 e124ded 7ac3886 e124ded 3dd86d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import streamlit as st
from audio_recorder_streamlit import audio_recorder
import time
import re
import os
import whisper
model = whisper.load_model('medium')
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
#loading the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
model_hindi = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
def translator(text):
# function to translate English text to Hindi
input_ids = tokenizer.encode(text, return_tensors="pt", padding=True)
outputs = model_hindi.generate(input_ids)
decoded_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return decoded_text
def split_sentences(generated_text):
split_text = re.split(r'(?<!,)[.!?]', generated_text)
split_text = [sentence.strip() for sentence in split_text]
return split_text
def transcribe(audio):
result = model.transcribe(audio)
generated_text = result["text"]
def process_transcription(generated_text):
generated_text = split_sentences(generated_text)
processed_text = ""
for text in generated_text:
translated_text = translator(text)
processed_text += translated_text + " "
return processed_text
text_hindi = process_transcription(generated_text)
return result["text"], text_hindi
def main():
st.title("Translate and Transcribe Audio")
st.subheader("Click on Mic button and start speaking")
#st.write("click to stop recording")
audio_bytes = audio_recorder()
if audio_bytes:
st.audio(audio_bytes, format="audio/wav")
# To save audio to a file:
wav_file = open("audio.mp3", "wb")
wav_file.write(audio_bytes)
print('Output dump is there')
with st.spinner("Transcribing audio... Please wait."):
result_text, translated_text = transcribe('audio.mp3')
st.subheader("Original Text (English):")
st.write(result_text)
st.subheader("Translated Text (Hindi):")
st.write(translated_text)
st.subheader("Upload your Audio for Transcription")
#st.write("Upload your Audio")
uploaded_file = st.file_uploader("WAV format", type=["wav"])
if uploaded_file is not None:
with st.spinner("Transcribing and translating audio... Please wait."):
audio_path = "uploaded_audio.wav"
with open(audio_path, "wb") as f:
f.write(uploaded_file.getvalue())
result_text, translated_text = transcribe(audio_path)
st.subheader("Original Text (English):")
st.write(result_text)
st.subheader("Translated Text (Hindi):")
st.write(translated_text)
# Remove the temporary audio file
os.remove(audio_path)
if __name__ == "__main__":
main()
|