import streamlit as st
from huggingface_hub import InferenceClient
from dotenv import load_dotenv
import os
from groq import Groq
import time

# Load environment variables
load_dotenv()

# Initialize the Hugging Face InferenceClient and Groq client
client = InferenceClient()
client2 = Groq(api_key=os.getenv("GROQ_API_KEY"))

# Streamlit application
def main():
    st.title("VoiceVoyager")
    st.caption("The linguistic chameleon that clones your voice and speaks Hindi, so you don't have to learn a new language to go viral in Varanasi.")
    uploaded_file = st.file_uploader("Upload an audio file", type=["mp3", "wav"])

    if uploaded_file is not None:
        st.audio(uploaded_file, format="audio/wav")

        with st.spinner('Processing...'):
            # Save the uploaded audio temporarily
            input_file_path = "input_audio.wav"
            with open(input_file_path, "wb") as f:
                f.write(uploaded_file.read())

            # Perform automatic speech recognition
            response = client.automatic_speech_recognition(input_file_path)
            text_eng = response.text

            # Display the recognized text
            st.subheader("Recognized English Text")
            st.write(text_eng)

            # Create a loading message
            with st.spinner('Translating to Hinglish...'):
                time.sleep(2)  # Simulate a delay for loading message

                # Generate Hinglish translation
                completion = client2.chat.completions.create(
                    model="llama3-70b-8192",
                    messages=[
                        {
                            "role": "system",
                            "content": "You are an expert English to Hinglish Translator. The translated text should sound natural and also convert all the difficult words and phrases in English to Hinglish. The translated text must be able to keep certain words in English to keep the Hindi translation Easy. ### Example: English: I had about a 30 minute demo just using this new headset Hinglish: मुझे सिर्फ ३० minute का demo मिला था नये headset का इस्तमाल करने के लिए ### Generate a dataset of 5 examples for English to Hinglish translation where Hindi words should be in Devanagari and English words should be in English. Use the above example as a reference. Create examples biased towards content creators."
                        },
                        {
                            "role": "user",
                            "content": "English:" + text_eng
                        }
                    ],
                    temperature=1,
                    max_tokens=1024,
                    top_p=1,
                    stream=True,
                    stop=None,
                )

                response_string = ""
                for chunk in completion:
                    response_string += chunk.choices[0].delta.content or ""

                # Display the translated text
                st.subheader("Translated Hinglish Text")
                st.write(response_string)

                # Convert translated text to speech
                final_resp = client.text_to_speech(response_string, model="facebook/mms-tts-hin")

                # Save the translated speech temporarily
                output_file_path = "translated_speech.wav"
                with open(output_file_path, "wb") as f:
                    f.write(final_resp)

                # Play the translated audio
                st.audio(output_file_path, format="audio/wav")
                st.success("Translation and speech synthesis completed!")

if __name__ == "__main__":
    main()