Yi-9B / app.py
Tonic's picture
Update app.py
afe5a38 verified
raw
history blame contribute delete
No virus
5.41 kB
import spaces
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
import gradio as gr
import sentencepiece
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:120'
model_id = "01-ai/Yi-1.5-9B-Chat"
tokenizer_path = "./"
# eos_token_id = 7
DESCRIPTION = """
# 欢迎来到 Tonic的 YI-9B基地!👋🏻Welcome to 🙋🏻‍♂️Tonic's🧑🏻‍🚀Yi-1.5-9B-Chat!🚀
You can use this Space to test out the current model 您可以使用此空间测试当前模型 [01-ai/Yi-9B](https://huggingface.co/01-ai/Yi-1.5-9B-Chat) 您也可以通过克隆这个空间来使用 YI-9B基地 只需点击这里".
You can also use 🧑🏻‍🚀01-ai/Yi-1.5-9B-Chat🚀 by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic1/Yi-9B?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
加入我们:🌟TeamTonic 总是在制作酷炫的演示!加入我们活跃的建设者🛠️社区,在👻DDiscord](https://discord.gg/nXx5wbX9),在🤗Huggingface:[TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) 在 在🌐Github [Tonic-AI](https://github.com/tonic-ai)上,为🌟[Multitonic](https://github.com/tonic-ai/multitonic)做出贡献。 Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/nXx5wbX9) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to 🌟 [Multitonic](https://github.com/tonic-ai/multitonic)
"""
tokenizer = AutoTokenizer.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
# tokenizer = YiTokenizer.from_pretrained(tokenizer_path)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True)
# tokenizer.eos_token_id = eos_token_id
# model.config.eos_token_id = eos_token_id
def format_prompt(user_message, system_message="You are YiTonic, an AI language model created by Tonic-AI. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and follow ethical guidelines and promote positive behavior."):
prompt = f"<|im_start|>assistant\n{system_message}<|im_end|>\n<|im_start|>\nuser\n{user_message}<|im_end|>\nassistant\n"
return prompt
@spaces.GPU
def predict(message, system_message, max_new_tokens=600, temperature=3.5, top_p=0.9, top_k=40, do_sample=False):
formatted_prompt = format_prompt(message, system_message)
input_ids = tokenizer.encode(formatted_prompt, return_tensors='pt')
input_ids = input_ids.to(model.device)
response_ids = model.generate(
input_ids,
max_length=max_new_tokens + input_ids.shape[1],
temperature=temperature,
top_p=top_p,
top_k=top_k,
no_repeat_ngram_size=9,
pad_token_id=tokenizer.eos_token_id,
do_sample=do_sample
)
response = tokenizer.decode(response_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
truncate_str = "<|im_end|>"
if truncate_str and truncate_str in response:
response = response.split(truncate_str)[0]
return [("bot", response)]
with gr.Blocks() as demo:
gr.Markdown(DESCRIPTION)
with gr.Group():
textbox = gr.Textbox(placeholder='Your Message Here', label='Your Message', lines=2)
system_prompt = gr.Textbox(placeholder='Provide a System Prompt In The First Person', label='System Prompt', lines=2, value="You are YiTonic, an AI language model created by Tonic-AI. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.")
with gr.Group():
chatbot = gr.Chatbot(label='TonicYi-9B-Base-🧠🤯')
with gr.Group():
submit_button = gr.Button('Submit', variant='primary')
with gr.Accordion(label='Advanced options', open=False):
max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=55000, step=1, value=4056)
temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=1.2)
top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=40)
do_sample_checkbox = gr.Checkbox(label='Disable for faster inference', value=True)
submit_button.click(
fn=predict,
inputs=[textbox, system_prompt, max_new_tokens, temperature, top_p, top_k, do_sample_checkbox],
outputs=chatbot
)
demo.launch()