import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load model and tokenizer model_name = "Spestly/Athena-1-1.5B" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32, low_cpu_mem_usage=True) # Set to evaluation mode model.eval() def generate_response(message, history): instruction = ( "You are an LLM called Athena. You are finetuned by Aayan Mishra. You are NOT trained by Anthropic. " "You are a Qwen 2.5 fine-tune. Your purpose is the help the user accomplish their request to the best of your abilities. " "Below is an instruction that describes a task. Answer it clearly and concisely.\n\n" f"### Instruction:\n{message}\n\n### Response:" ) inputs = tokenizer(instruction, return_tensors="pt") with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=1000, num_return_sequences=1, temperature=0.7, top_p=0.9, do_sample=True ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) response = response.split("### Response:")[-1].strip() return response iface = gr.ChatInterface( generate_response, chatbot=gr.Chatbot(height=600, type="messages"), textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7), title="Athena-1", description="Chat with Athena-1", theme="soft", examples=[ "Can you give me a good salsa recipe?", "Write an engaging two-line horror story.", "What is the capital of Australia?", ], type="messages" ) iface.launch()