import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch import gc # Global model and tokenizer model = None tokenizer = None def load_model(): global model, tokenizer model_name = "Spestly/Athena-1-1.5B" model = AutoModelForCausalLM.from_pretrained( model_name, low_cpu_mem_usage=True, torch_dtype=torch.float32, device_map="cpu" ) tokenizer = AutoTokenizer.from_pretrained(model_name) model.eval() def generate_response(input_text, history): global model, tokenizer # Load model if not loaded if model is None or tokenizer is None: load_model() try: instruction = "You are an AI assistant. Respond to the following instruction clearly and concisely:\n" formatted_input = instruction + input_text inputs = tokenizer( formatted_input, return_tensors="pt", truncation=True, max_length=256 ) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.9, temperature=0.7, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, use_cache=True, repetition_penalty=1.2, num_beams=1 ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Manual garbage collection gc.collect() # Update history history = history + f"\nUser: {input_text}\nAthena: {response}\n" return response, history except Exception as e: return f"Error: {str(e)}", history print("Starting app...") with gr.Blocks() as demo: gr.Markdown(""" # Athena-1.1.5B Chat Smaller and Smarter. """) # Initialize state and components state = gr.State("") user_input = gr.Textbox( label="Your Message", placeholder="Type your question here...", lines=3 ) with gr.Row(): with gr.Column(scale=2): chat_history = gr.Textbox( label="Chat History", placeholder="Chat history will appear here...", lines=10, interactive=False ) with gr.Column(scale=1): gr.Markdown("### Examples") examples = [ "What's been the role of music in human societies?", "Escribe un poema corto sobre la historia del MediterrĂ¡neo.", "Jane has 8 apples. Solve this in Python." ] example_buttons = [] for example in examples: example_button = gr.Button(example) example_button.click( lambda e=example: (e, state.value), outputs=[user_input, state] ) example_buttons.append(example_button) output_text = gr.Textbox( label="Athena's Response", lines=5, interactive=False ) submit_button = gr.Button("Submit") # Handle submission submit_button.click( generate_response, inputs=[user_input, state], outputs=[output_text, state] ) if __name__ == "__main__": demo.launch()