import gradio as gr
from llama_cpp import Llama
import spaces

# Cargar el modelo Llama
llm = Llama.from_pretrained(
    repo_id="mradermacher/Llama-3.2-3B-Instruct-uncensored-i1-GGUF",
    filename="Llama-3.2-3B-Instruct-uncensored.i1-IQ1_S.gguf",
)

# Función para manejar la entrada de texto con anotación @spaces.GPU
@spaces.GPU(enable_queue=False)
def generate_response(text):
    result = llm.create_chat_completion(
        messages=[{"role": "user", "content": text}]
    )
    return result["choices"][0]["message"]["content"]

# Crear la interfaz de usuario con Gradio
iface = gr.Interface(
    fn=generate_response,
    inputs=gr.Textbox(label="Input Text", placeholder="Type your text here..."),
    outputs=gr.Textbox(label="Response"),
    title="Llama Language Model",
    description="""
    This interface uses the Llama-3.2-3B-Instruct-uncensored model for generating text responses.
    Non-commercial use only.
    """,
    theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"),
    examples=[["What is the capital of France?"], ["Explain the theory of relativity."], ["Write a poem about nature."]],
)

# Lanzar la interfaz
iface.launch()