Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
import gc | |
# Global model and tokenizer | |
model = None | |
tokenizer = None | |
def load_model(): | |
global model, tokenizer | |
model_name = "Spestly/Athena-1-1.5B" | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
low_cpu_mem_usage=True, | |
torch_dtype=torch.float32, | |
device_map="cpu" | |
) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model.eval() | |
def generate_response(input_text, history): | |
global model, tokenizer | |
# Load model if not loaded | |
if model is None or tokenizer is None: | |
load_model() | |
try: | |
instruction = "You are an AI assistant. Respond to the following instruction clearly and concisely:\n" | |
formatted_input = instruction + input_text | |
inputs = tokenizer( | |
formatted_input, | |
return_tensors="pt", | |
truncation=True, | |
max_length=256 | |
) | |
with torch.no_grad(): | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=100, | |
do_sample=True, | |
top_k=50, | |
top_p=0.9, | |
temperature=0.7, | |
pad_token_id=tokenizer.pad_token_id, | |
eos_token_id=tokenizer.eos_token_id, | |
use_cache=True, | |
repetition_penalty=1.2, | |
num_beams=1 | |
) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Manual garbage collection | |
gc.collect() | |
# Update history | |
history = history + f"\nUser: {input_text}\nAthena: {response}\n" | |
return response, history | |
except Exception as e: | |
return f"Error: {str(e)}", history | |
print("Starting app...") | |
with gr.Blocks() as demo: | |
gr.Markdown(""" | |
# Athena-1.1.5B Chat | |
Smaller and Smarter. | |
""") | |
# Initialize state and components | |
state = gr.State("") | |
user_input = gr.Textbox( | |
label="Your Message", | |
placeholder="Type your question here...", | |
lines=3 | |
) | |
with gr.Row(): | |
with gr.Column(scale=2): | |
chat_history = gr.Textbox( | |
label="Chat History", | |
placeholder="Chat history will appear here...", | |
lines=10, | |
interactive=False | |
) | |
with gr.Column(scale=1): | |
gr.Markdown("### Examples") | |
examples = [ | |
"What's been the role of music in human societies?", | |
"Escribe un poema corto sobre la historia del Mediterráneo.", | |
"Jane has 8 apples. Solve this in Python." | |
] | |
example_buttons = [] | |
for example in examples: | |
example_button = gr.Button(example) | |
example_button.click( | |
lambda e=example: (e, state.value), | |
outputs=[user_input, state] | |
) | |
example_buttons.append(example_button) | |
output_text = gr.Textbox( | |
label="Athena's Response", | |
lines=5, | |
interactive=False | |
) | |
submit_button = gr.Button("Submit") | |
# Handle submission | |
submit_button.click( | |
generate_response, | |
inputs=[user_input, state], | |
outputs=[output_text, state] | |
) | |
if __name__ == "__main__": | |
demo.launch() |