Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline, AutoTokenizer | |
import torch | |
import spaces | |
def load_model(model_name): | |
return pipeline("text-generation", model=model_name, device="cuda", torch_dtype=torch.float16, trust_remote_code=True) | |
def generate( | |
model_name, | |
user_input, | |
temperature=0.4, | |
top_p=0.95, | |
top_k=50, | |
max_new_tokens=256, | |
): | |
pipe = load_model(model_name) | |
# Set tokenize correctly. Otherwise ticking the box breaks it. | |
prompt = f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n" | |
outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True, | |
temperature=temperature, top_k=top_k, top_p=top_p, repetition_penalty=1.10) | |
return outputs[0]["generated_text"] | |
model_choices = ["Locutusque/UltraQwen-7B", "Locutusque/UltraQwen-1_8B", "Locutusque/TinyMistral-248M-v2.5-Instruct", "M4-ai/TinyMistral-6x248M-Instruct", "Locutusque/Hercules-1.0-Mistral-7B"] | |
# What at the best options? | |
g = gr.Interface( | |
fn=generate, | |
inputs=[ | |
gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True), | |
gr.components.Textbox(lines=2, label="Prompt", value="How many planets are in our solar system?"), | |
gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"), | |
gr.components.Slider(minimum=0, maximum=1, value=0.95, label="Top p"), | |
gr.components.Slider(minimum=0, maximum=100, step=1, value=50, label="Top k"), | |
gr.components.Slider(minimum=1, maximum=1024, step=1, value=256, label="Max tokens"), | |
], | |
outputs=[gr.Textbox(lines=10, label="Output")], | |
title="Hugging Face Transformers Model", | |
description="Try out Locutusque's language models here! Credit goes to Mediocreatmybest for this space.", | |
concurrency_limit=1 | |
) | |
g.launch(max_threads=2) | |