File size: 4,796 Bytes
2bc99a0
 
 
 
927b5de
 
2bc99a0
b723b47
a3c3064
fc09aa4
af27f87
63a0917
3d2716e
2bc99a0
5628f77
 
9b2a1cf
a3c3064
5628f77
31689b5
2bc99a0
 
9bc49ef
0d5c130
9bc49ef
 
 
 
31689b5
0d5c130
63a0917
b9faabf
9bc49ef
2bc99a0
 
aa9a886
 
a3c3064
2bc99a0
 
aa9a886
2bc99a0
 
 
 
b9faabf
5ab0bbc
a3c3064
2bc99a0
 
a3c3064
b9faabf
5628f77
b9faabf
34c221f
b9faabf
 
 
 
5ab0bbc
b9faabf
5628f77
8de5029
c23b550
97a4588
24855cf
97a4588
 
 
 
c5ff21b
c23b550
 
 
 
 
 
 
 
 
97a4588
 
927b5de
97a4588
 
 
 
 
 
 
 
24855cf
97a4588
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os
import math
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import gradio as gr
import sentencepiece
from tokenization_xgen import XgenTokenizer

title = "Welcome to 🙋🏻‍♂️Tonic's🌷Tulu Chat!"
description = "[allenai/tulu-2-dpo-7b](https://huggingface.co/allenai/tulu-2-dpo-7b) and larger Tulu-2 models are Instruct Llama Finetunes using the [mistralai/Mistral-7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) recipe. You can use [allenai/tulu-2-13b](https://huggingface.co/allenai/tulu-2-13b) here via API using Gradio by scrolling down and clicking Use 'Via API' or privately by [cloning this space on huggingface](https://huggingface.co/spaces/Tonic1/TuluDemo?duplicate=true) See also the large model here : [allenai/tulu-2-dpo-70b](https://huggingface.co/allenai/tulu-2-dpo-70b) . [Join my active builders' server on discord](https://discord.gg/VqTxc76K3u). Let's build together!."

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_name = "allenai/tulu-2-dpo-13b"
tokenizer = AutoTokenizer.from_pretrained("allenai/tulu-2-dpo-13b")
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")

class TuluChatBot:
    def __init__(self, model, tokenizer, system_message="You are 🌷Tulu, an AI language model created by Tonic-AI. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."):
        self.model = model
        self.tokenizer = tokenizer
        self.system_message = system_message

    def set_system_message(self, new_system_message):
        self.system_message = new_system_message

    def format_prompt(self, user_message):
        prompt = f"<|assistant|>\n {self.system_message}\n\n <|user|>{user_message}\n\n<|assistant|>\n"
        return prompt

    def predict(self, user_message, temperature, max_new_tokens, top_p, repetition_penalty, do_sample):
        prompt = self.format_prompt(user_message)
        inputs = self.tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
        input_ids = inputs["input_ids"].to(self.model.device)
        attention_mask = inputs["attention_mask"].to(self.model.device)


        output_ids = self.model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=input_ids.shape[1] + max_new_tokens,
            temperature=temperature,
            top_p=top_p,
            repetition_penalty=repetition_penalty,
            do_sample=do_sample
        )

        response = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
        return response

def gradio_predict(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample):
    Tulu_bot.set_system_message(system_message)
    if not do_sample:
        max_length = 1269
        temperature = 1.2  # Default value
        top_p = 0.9        # Default value
        repetition_penalty = 0.9  # Default value
    response = Tulu_bot.predict(user_message, temperature, max_new_tokens, top_p, repetition_penalty, do_sample)
    return response

Tulu_bot = TuluChatBot(model, tokenizer)


with gr.Blocks() as demo:
    theme="ParityError/Anime"
    with gr.Row():
        user_message = gr.Textbox(label="Your Message", lines=3)
        system_message = gr.Textbox(label="Introduce a Character Here or Set a Scene (system prompt)", lines=2)
    with gr.Row():
        do_sample = gr.Checkbox(label="Advanced", value=False)
    
    # Advanced settings in an Accordion
    with gr.Accordion("Advanced Settings", open=False).style(visible=lambda do_sample: do_sample):
        with gr.Row():
            max_new_tokens = gr.Slider(label="Max new tokens", value=1269, minimum=550, maximum=3200, step=1)
            temperature = gr.Slider(label="Temperature", value=1.2, minimum=0.05, maximum=4.0, step=0.05)
            top_p = gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.01, maximum=0.99, step=0.05)
            repetition_penalty = gr.Slider(label="Repetition penalty", value=1.9, minimum=1.0, maximum=2.0, step=0.05)

    submit_button = gr.Button("Submit")
    output_text = gr.Textbox()

    def process(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample):
        return gradio_predict(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample)

    submit_button.click(
        process,
        inputs=[user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample],
        outputs=output_text
    )
    

demo.launch()