Spaces:
Running
Running
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import gradio as gr | |
tokenizer = AutoTokenizer.from_pretrained('nicholasKluge/Aira-Instruct-124M', | |
use_auth_token="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") | |
model = AutoModelForCausalLM.from_pretrained('nicholasKluge/Aira-Instruct-124M', | |
use_auth_token="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") | |
import gradio as gr | |
with gr.Blocks() as demo: | |
gr.Markdown("""<h1><center>🔥Aira Demo 🤓🚀</h1></center>""") | |
chatbot = gr.Chatbot(label="Aira") | |
msg = gr.Textbox(label="Write a question or comment to Aira", placeholder="Hi Aira, how are you?") | |
with gr.Accordion("Parameters ⚙️", open=True): | |
top_k = gr.Slider( minimum=10, maximum=100, value=50, step=5, interactive=True, label="Top-k",) | |
top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.70, step=0.05, interactive=True, label="Top-p",) | |
temperature = gr.Slider( minimum=0.001, maximum=2.0, value=0.1, step=0.1, interactive=True, label="Temperature",) | |
max_length = gr.Slider( minimum=10, maximum=500, value=100, step=10, interactive=True, label="Max Length",) | |
clear = gr.Button("Clear Conversation 🧹") | |
def generate_response(message, chat_history, top_k, top_p, temperature, max_length): | |
inputs = tokenizer(tokenizer.bos_token + message + tokenizer.eos_token, return_tensors="pt") | |
response = model.generate(**inputs, | |
bos_token_id=tokenizer.bos_token_id, | |
pad_token_id=tokenizer.pad_token_id, | |
eos_token_id=tokenizer.eos_token_id, | |
do_sample=True, | |
early_stopping=True, | |
top_k=top_k, | |
max_length=max_length, | |
top_p=top_p, | |
temperature=temperature, | |
num_return_sequences=1) | |
chat_history.append((f"👤 {message}", f"""🤖 {tokenizer.decode(response[0], skip_special_tokens=True).replace(message, "")}""")) | |
return "", chat_history | |
msg.submit(generate_response, [msg, chatbot, top_k, top_p, temperature, max_length], [msg, chatbot]) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
demo.launch() |