File size: 2,864 Bytes
b3991f9
 
3dd86d3
 
 
b3991f9
3dd86d3
 
b3991f9
3dd86d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e507fc
7ac3886
 
ab03346
3dd86d3
 
d5bd0f2
 
 
 
 
 
3dd86d3
21ae366
3dd86d3
 
 
 
 
 
 
e124ded
7ac3886
 
 
e124ded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3dd86d3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import streamlit as st
from audio_recorder_streamlit import audio_recorder
import time
import re
import os

import whisper
model = whisper.load_model('medium')

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

#loading the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
model_hindi = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-hi")

def translator(text):
    # function to translate English text to Hindi
    input_ids = tokenizer.encode(text, return_tensors="pt", padding=True)
    outputs = model_hindi.generate(input_ids)
    decoded_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return decoded_text

def split_sentences(generated_text):
    split_text = re.split(r'(?<!,)[.!?]', generated_text)
    split_text = [sentence.strip() for sentence in split_text]
    return split_text

def transcribe(audio):
    result = model.transcribe(audio)
    generated_text = result["text"]
    
    def process_transcription(generated_text):
        generated_text = split_sentences(generated_text)
        processed_text = ""

        for text in generated_text:
            translated_text = translator(text)
            processed_text += translated_text + " "
        
        return processed_text

    text_hindi = process_transcription(generated_text)
    return result["text"], text_hindi

def main():
    st.title("Translate and Transcribe Audio")

    st.subheader("Click on Mic button and start speaking")
    #st.write("click to stop recording")

    audio_bytes = audio_recorder()
    if audio_bytes:
        st.audio(audio_bytes, format="audio/wav")
        # To save audio to a file:
        wav_file = open("audio.mp3", "wb")
        wav_file.write(audio_bytes)
        print('Output dump is there')

        with st.spinner("Transcribing audio... Please wait."):
            result_text, translated_text = transcribe('audio.mp3')

        st.subheader("Original Text (English):")
        st.write(result_text)

        st.subheader("Translated Text (Hindi):")
        st.write(translated_text)


    st.subheader("Upload your Audio for Transcription")    
    #st.write("Upload your Audio")
    uploaded_file = st.file_uploader("WAV format", type=["wav"])
    if uploaded_file is not None:
        with st.spinner("Transcribing and translating audio... Please wait."):
            audio_path = "uploaded_audio.wav"
            with open(audio_path, "wb") as f:
                f.write(uploaded_file.getvalue())

            result_text, translated_text = transcribe(audio_path)

        st.subheader("Original Text (English):")
        st.write(result_text)

        st.subheader("Translated Text (Hindi):")
        st.write(translated_text)

        # Remove the temporary audio file
        os.remove(audio_path)

if __name__ == "__main__":
    main()