|
import streamlit as st |
|
from huggingface_hub import InferenceClient |
|
from dotenv import load_dotenv |
|
import os |
|
from groq import Groq |
|
import time |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
client = InferenceClient() |
|
client2 = Groq(api_key=os.getenv("GROQ_API_KEY")) |
|
|
|
|
|
def main(): |
|
st.title("VoiceVoyager") |
|
st.caption("The linguistic chameleon that clones your voice and speaks Hindi, so you don't have to learn a new language to go viral in Varanasi.") |
|
uploaded_file = st.file_uploader("Upload an audio file", type=["mp3", "wav"]) |
|
|
|
if uploaded_file is not None: |
|
st.audio(uploaded_file, format="audio/wav") |
|
|
|
with st.spinner('Processing...'): |
|
|
|
input_file_path = "input_audio.wav" |
|
with open(input_file_path, "wb") as f: |
|
f.write(uploaded_file.read()) |
|
|
|
|
|
response = client.automatic_speech_recognition(input_file_path) |
|
text_eng = response.text |
|
|
|
|
|
st.subheader("Recognized English Text") |
|
st.write(text_eng) |
|
|
|
|
|
with st.spinner('Translating to Hinglish...'): |
|
time.sleep(2) |
|
|
|
|
|
completion = client2.chat.completions.create( |
|
model="llama3-70b-8192", |
|
messages=[ |
|
{ |
|
"role": "system", |
|
"content": "You are an expert English to Hinglish Translator. The translated text should sound natural and also convert all the difficult words and phrases in English to Hinglish. The translated text must be able to keep certain words in English to keep the Hindi translation Easy. ### Example: English: I had about a 30 minute demo just using this new headset Hinglish: मुझे सिर्फ ३० minute का demo मिला था नये headset का इस्तमाल करने के लिए ### Generate a dataset of 5 examples for English to Hinglish translation where Hindi words should be in Devanagari and English words should be in English. Use the above example as a reference. Create examples biased towards content creators." |
|
}, |
|
{ |
|
"role": "user", |
|
"content": "English:" + text_eng |
|
} |
|
], |
|
temperature=1, |
|
max_tokens=1024, |
|
top_p=1, |
|
stream=True, |
|
stop=None, |
|
) |
|
|
|
response_string = "" |
|
for chunk in completion: |
|
response_string += chunk.choices[0].delta.content or "" |
|
|
|
|
|
st.subheader("Translated Hinglish Text") |
|
st.write(response_string) |
|
|
|
|
|
final_resp = client.text_to_speech(response_string, model="facebook/mms-tts-hin") |
|
|
|
|
|
output_file_path = "translated_speech.wav" |
|
with open(output_file_path, "wb") as f: |
|
f.write(final_resp) |
|
|
|
|
|
st.audio(output_file_path, format="audio/wav") |
|
st.success("Translation and speech synthesis completed!") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|