budhadityac24
commited on
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from huggingface_hub import InferenceClient
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import os
|
5 |
+
from groq import Groq
|
6 |
+
import time
|
7 |
+
|
8 |
+
# Load environment variables
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
# Initialize the Hugging Face InferenceClient and Groq client
|
12 |
+
client = InferenceClient()
|
13 |
+
client2 = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
14 |
+
|
15 |
+
# Streamlit application
|
16 |
+
def main():
|
17 |
+
st.title("Audio Translator: English to Hinglish")
|
18 |
+
|
19 |
+
uploaded_file = st.file_uploader("Upload an audio file", type=["mp3", "wav"])
|
20 |
+
|
21 |
+
if uploaded_file is not None:
|
22 |
+
st.audio(uploaded_file, format="audio/wav")
|
23 |
+
|
24 |
+
with st.spinner('Processing...'):
|
25 |
+
# Save the uploaded audio temporarily
|
26 |
+
input_file_path = "input_audio.wav"
|
27 |
+
with open(input_file_path, "wb") as f:
|
28 |
+
f.write(uploaded_file.read())
|
29 |
+
|
30 |
+
# Perform automatic speech recognition
|
31 |
+
response = client.automatic_speech_recognition(input_file_path)
|
32 |
+
text_eng = response.text
|
33 |
+
|
34 |
+
# Display the recognized text
|
35 |
+
st.subheader("Recognized English Text")
|
36 |
+
st.write(text_eng)
|
37 |
+
|
38 |
+
# Create a loading message
|
39 |
+
with st.spinner('Translating to Hinglish...'):
|
40 |
+
time.sleep(2) # Simulate a delay for loading message
|
41 |
+
|
42 |
+
# Generate Hinglish translation
|
43 |
+
completion = client2.chat.completions.create(
|
44 |
+
model="llama3-70b-8192",
|
45 |
+
messages=[
|
46 |
+
{
|
47 |
+
"role": "system",
|
48 |
+
"content": "You are an expert English to Hinglish Translator. The translated text should sound natural and also convert all the difficult words and phrases in English to Hinglish. The translated text must be able to keep certain words in English to keep the Hindi translation Easy. ### Example: English: I had about a 30 minute demo just using this new headset Hinglish: मुझे सिर्फ ३० minute का demo मिला था नये headset का इस्तमाल करने के लिए ### Generate a dataset of 5 examples for English to Hinglish translation where Hindi words should be in Devanagari and English words should be in English. Use the above example as a reference. Create examples biased towards content creators."
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"role": "user",
|
52 |
+
"content": "English:" + text_eng
|
53 |
+
}
|
54 |
+
],
|
55 |
+
temperature=1,
|
56 |
+
max_tokens=1024,
|
57 |
+
top_p=1,
|
58 |
+
stream=True,
|
59 |
+
stop=None,
|
60 |
+
)
|
61 |
+
|
62 |
+
response_string = ""
|
63 |
+
for chunk in completion:
|
64 |
+
response_string += chunk.choices[0].delta.content or ""
|
65 |
+
|
66 |
+
# Display the translated text
|
67 |
+
st.subheader("Translated Hinglish Text")
|
68 |
+
st.write(response_string)
|
69 |
+
|
70 |
+
# Convert translated text to speech
|
71 |
+
final_resp = client.text_to_speech(response_string, model="facebook/mms-tts-hin")
|
72 |
+
|
73 |
+
# Save the translated speech temporarily
|
74 |
+
output_file_path = "translated_speech.wav"
|
75 |
+
with open(output_file_path, "wb") as f:
|
76 |
+
f.write(final_resp)
|
77 |
+
|
78 |
+
# Play the translated audio
|
79 |
+
st.audio(output_file_path, format="audio/wav")
|
80 |
+
st.success("Translation and speech synthesis completed!")
|
81 |
+
|
82 |
+
if __name__ == "__main__":
|
83 |
+
main()
|