import gradio as gr from llama_cpp import Llama import spaces # Cargar el modelo Llama llm = Llama.from_pretrained( repo_id="mradermacher/Llama-3.2-3B-Instruct-uncensored-i1-GGUF", filename="Llama-3.2-3B-Instruct-uncensored.i1-IQ1_S.gguf", ) # FunciĆ³n para manejar la entrada de texto con anotaciĆ³n @spaces.GPU @spaces.GPU(enable_queue=False) def generate_response(text): result = llm.create_chat_completion( messages=[{"role": "user", "content": text}] ) return result["choices"][0]["message"]["content"] # Crear la interfaz de usuario con Gradio iface = gr.Interface( fn=generate_response, inputs=gr.Textbox(label="Input Text", placeholder="Type your text here..."), outputs=gr.Textbox(label="Response"), title="Llama Language Model", description=""" This interface uses the Llama-3.2-3B-Instruct-uncensored model for generating text responses. Non-commercial use only. """, theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"), examples=[["What is the capital of France?"], ["Explain the theory of relativity."], ["Write a poem about nature."]], ) # Lanzar la interfaz iface.launch()