Locutusque's picture
Update app.py
3d0fb66 verified
raw
history blame
1.79 kB
import gradio as gr
from transformers import pipeline, AutoTokenizer
import torch
import spaces
@spaces.GPU
def load_model(model_name):
return pipeline("text-generation", model=model_name, device="cuda", torch_dtype=torch.float16)
@spaces.GPU
def generate(
model_name,
user_input,
temperature=0.4,
top_p=0.95,
top_k=50,
max_new_tokens=256,
):
pipe = load_model(model_name)
# Set tokenize correctly. Otherwise ticking the box breaks it.
prompt = f"<|im_start|>user\n{user_input}<|im_end|>\n"
outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True,
temperature=temperature, top_k=top_k, top_p=top_p, repetition_penalty=1.10)
return outputs[0]["generated_text"]
model_choices = ["Locutusque/UltraQwen-7B", "Locutusque/UltraQwen-1_8B", "Locutusque/TinyMistral-248M-v2.5-Instruct", "M4-ai/TinyMistral-6x248M-Instruct"]
# What at the best options?
g = gr.Interface(
fn=generate,
inputs=[
gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True),
gr.components.Textbox(lines=2, label="Prompt", value="How many planets are in our solar system?"),
gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"),
gr.components.Slider(minimum=0, maximum=1, value=0.95, label="Top p"),
gr.components.Slider(minimum=0, maximum=100, step=1, value=50, label="Top k"),
gr.components.Slider(minimum=1, maximum=1024, step=1, value=256, label="Max tokens"),
],
outputs=[gr.Textbox(lines=10, label="Output")],
title="Hugging Face Transformers Model",
description="A simple interface for generating text with a Hugging Face Transformers model.",
concurrency_limit=1
)
g.launch(max_threads=2)