import streamlit as st
from huggingface_hub import InferenceClient

# Access your Hugging Face token (stored as a secret in the environment)
hf_token = st.secrets["Hf_Token"]

# Streamlit App UI
st.title("LLM Model Inference with Streamlit")


# Dropdown for selecting between LLaMA 3 models
model_id = st.selectbox(
    "Select a LLaMA model",
    ["meta-llama/Llama-3.2-3B-Instruct","meta-llama/Llama-3.2-1B-Instruct","meta-llama/Meta-Llama-3-8B-Instruct"]  # Replace with the correct model names
)
user_input = st.chat_input("Enter a text prompt:")
# Initialize the Inference Client
client = InferenceClient(token=hf_token)

# Button to trigger the inference
if user_input:
    with st.spinner(f"Generating text using {model_id}..."):
            # Perform inference using the selected model
        response = client.chat.completions.create(
            model=model_id,
            messages=[
                {"role": "system", "content": "You are a question answering assistant."},
                {"role": "user", "content": user_input}
            ],
            max_tokens=500,
            stream=False
        )
        st.success("Text generated!")
        st.write(response['choices'][0]['message']['content'])
else:
    st.warning("Please enter a prompt to generate text.")