import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain_core.prompts import ChatPromptTemplate

# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-3B-Instruct")
model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-3B-Instruct")

st.title("Unsloth Llama-3.2-3B-Instruct Text Generation")
st.write("Enter a prompt and generate text using the Unsloth Llama 3.2 3B model.")

prompt = """
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.
    If you don't know the answer, just say that you don't know.
    Answer in bullet points. Make sure your answer is relevant to the question and it is answered from the context only.
    Question: {question} 
    Context: {context} 
    Answer:
"""

prompt = ChatPromptTemplate.from_template(prompt)

with st.form("llm-form"):
    user_input = st.text_area("Enter your question or statement:")
    submit = st.form_submit_button("Submit")

    if submit:
        inputs = tokenizer(user_input, return_tensors="pt")
        outputs = model.generate(inputs["input_ids"], max_length=200)
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        st.write(generated_text)