Command-R / app.py
Tonic's picture
Update app.py
486a2f6 verified
raw
history blame
No virus
1.62 kB
import spaces
import gradio as gr
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Load the tokenizer and model
model_id = "CohereForAI/c4ai-command-r-v01"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
@spaces.GPU
def generate_response(user_input, max_new_tokens, temperature):
# Format message with the command-r chat template
messages = [{"role": "user", "content": user_input}]
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
# Generate tokens
gen_tokens = model.generate(
input_ids['input_ids'],
max_length=max_new_tokens + input_ids['input_ids'].shape[1], # Adjusting max_length to account for input length
do_sample=True,
temperature=temperature,
)
# Decode tokens to string
gen_text = tokenizer.decode(gen_tokens[0])
return gen_text
# Define the Gradio interface
iface = gr.Interface(
fn=generate_response,
inputs=[
gr.inputs.Textbox(lines=2, label="Your Message"),
gr.inputs.Slider(minimum=10, maximum=100, default=50, label="Max New Tokens"),
gr.inputs.Slider(minimum=0.1, maximum=1.0, step=0.1, default=0.3, label="Temperature")
],
outputs=gr.outputs.Textbox(label="Model Response"),
title="Text Generation Model Interface",
description="This is a Gradio interface for a text generation model. Enter your message and adjust the parameters to generate a response."
)
# Launch the application
iface.launch()