File size: 2,722 Bytes
709d394 486a2f6 cf5249f 709d394 3ea359d 5cf089b a13c01c 515f252 f7f4304 38fedf1 fbcfe87 709d394 14a9208 709d394 3ea359d 709d394 dc0acc6 709d394 ef2fea2 600a2a9 709d394 29437cc 709d394 1cdad52 4f6966f b5aae38 6e1661f cd0aa02 6e1661f cd0aa02 a13c01c 6e1661f a13c01c b5aae38 acf224c 7fc9307 acf224c 7fc9307 a13c01c 8325138 1cdad52 4f6966f 1cdad52 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import spaces
import gradio as gr
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,AwqConfig
import torch
import os
key = os.environ.get("key")
from huggingface_hub import login
login(key)
nf4_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16
)
model_id = "Viet-Mistral/Vistral-7B-Chat"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id,quantization_config=nf4_config)
@spaces.GPU
def generate_response(user_input, max_new_tokens, temperature):
os.system("nvidia-smi")
messages = [{"role": "user", "content": user_input}]
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
input_ids = input_ids.to(model.device)
gen_tokens = model.generate(
input_ids = input_ids,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
)
gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
if gen_text.startswith(user_input):
gen_text = gen_text[len(user_input):].lstrip()
return gen_text
examples = [
{"message": "What is the weather like today?", "max_new_tokens": 250, "temperature": 0.5},
{"message": "Tell me a joke.", "max_new_tokens": 650, "temperature": 0.7},
{"message": "Explain the concept of machine learning.", "max_new_tokens": 980, "temperature": 0.4}
]
example_choices = [f"Example {i+1}" for i in range(len(examples))]
def load_example(choice):
index = example_choices.index(choice)
example = examples[index]
return example["message"], example["max_new_tokens"], example["temperature"]
with gr.Blocks() as demo:
with gr.Row():
max_new_tokens_slider = gr.Slider(minimum=100, maximum=4000, value=980, label="Max New Tokens")
temperature_slider = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.3, label="Temperature")
message_box = gr.Textbox(lines=2, label="Your Message")
generate_button = gr.Button("Try🫡Command-R")
output_box = gr.Textbox(label="🫡Command-R")
generate_button.click(
fn=generate_response,
inputs=[message_box, max_new_tokens_slider, temperature_slider],
outputs=output_box
)
example_dropdown = gr.Dropdown(label="🫡Load Example", choices=example_choices)
example_button = gr.Button("🫡Load")
example_button.click(
fn=load_example,
inputs=example_dropdown,
outputs=[message_box, max_new_tokens_slider, temperature_slider]
)
demo.launch()
|