File size: 5,943 Bytes
1ed72c0
 
 
 
9157e0c
5258909
1ed72c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e97b66a
1ed72c0
5258909
1ed72c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import gradio as gr
from ctransformers import AutoModelForCausalLM, AutoConfig, Config #import for GGUF/GGML models
import datetime

# modelfile="TinyLlama/TinyLlama-1.1B-Chat-v0.6"
modelfile="TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"

i_temperature = 0.30 
i_max_new_tokens=1100
i_repetitionpenalty = 1.2
i_contextlength=12048
logfile = 'TinyLlama.1B.txt'

print("loading model...")

stt = datetime.datetime.now()
conf = AutoConfig(Config(temperature=i_temperature, 
                         repetition_penalty=i_repetitionpenalty, 
                         batch_size=64,
                         max_new_tokens=i_max_new_tokens, 
                         context_length=i_contextlength))
llm = AutoModelForCausalLM.from_pretrained(modelfile,
                                           model_type="llama",
                                           config=conf)
dt = datetime.datetime.now() - stt
print(f"Model loaded in {dt}")

def writehistory(text):
    with open(logfile, 'a', encoding='utf-8') as f:
        f.write(text)
        f.write('\n')
    f.close()

with gr.Blocks(theme='ParityError/Interstellar') as demo: 
    # TITLE SECTION
    with gr.Row():
        with gr.Column(scale=12):
            gr.HTML("<center>"
            + "<h1>🦙 TinyLlama 1.1B 🐋 4K context window</h2></center>")  
            gr.Markdown("""
            **Currently Running**: [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;  **Chat History Log File**: *TinyLlama.1B.txt*

            - **Base Model**: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T, Fine tuned on OpenOrca GPT4 subset for 1 epoch, Using CHATML format. 
            - **License**: Apache 2.0, following the TinyLlama base model. 
                        The model output is not censored and the authors do not endorse the opinions in the generated content. Use at your own risk.
            """)         
        gr.Image(value='imgs/TinyLlama_logo.png', width=70)

   # chat and parameters settings
    with gr.Row():
        with gr.Column(scale=4):
            chatbot = gr.Chatbot(height = 350, show_copy_button=True, avatar_images = ["imgs/user_logo.png","imgs/TinyLlama_logo.png"])
            with gr.Row():
                with gr.Column(scale=14):
                    msg = gr.Textbox(show_label=False, placeholder="Enter text", lines=2)
                submitBtn = gr.Button("\n💬 Send\n", size="lg", variant="primary", min_width=140)

        with gr.Column(min_width=50, scale=1):
                with gr.Tab(label="Parameter Setting"):
                    gr.Markdown("# Parameters")
                    top_p = gr.Slider(minimum=-0, 
                                      maximum=1.0,
                                      value=0.95,
                                      step=0.05,
                                      interactive=True,
                                      label="Top-p")
                    temperature = gr.Slider(minimum=0.1,
                                            maximum=1.0,
                                            value=0.30,
                                            step=0.01,
                                            interactive=True,
                                            label="Temperature")
                    max_length_tokens = gr.Slider(minimum=0,
                                                  maximum=4096,
                                                  value=1060,
                                                  step=4,
                                                  interactive=True,
                                                  label="Max Generation Tokens")
                    rep_pen = gr.Slider(minimum=0,
                                        maximum=5,
                                        value=1.2,
                                        step=0.05,
                                        interactive=True,
                                        label="Repetition Penalty")

                clear = gr.Button("🗑️ Clear All Messages", variant='secondary')
    def user(user_message, history):
        writehistory(f"USER: {user_message}")
        return "", history + [[user_message, None]]

    def bot(history, t, p, m, r):
        SYSTEM_PROMPT = """<|im_start|>system
        You are a helpful bot. Your answers are clear and concise.
        <|im_end|>

        """    
        prompt = f"<|im_start|>system<|im_end|><|im_start|>user\n{history[-1][0]}<|im_end|>\n<|im_start|>assistant\n"  
        print(f"history lenght: {len(history)}")
        if len(history) == 1:
            print("this is the first round")
        else:
            print("here we should pass more conversations")
        history[-1][1] = ""
        for character in llm(prompt,
                             temperature = t,
                             top_p = p, 
                             repetition_penalty = r, 
                             max_new_tokens=m,
                             stop = ['<|im_end|>'],
                             stream = True):
            history[-1][1] += character
            yield history
        writehistory(f"temperature: {t}, top_p: {p}, maxNewTokens: {m}, repetitionPenalty: {r}\n---\nBOT: {history}\n\n")
        # Log in the terminal the messages
        print(f"USER: {history[-1][0]}\n---\ntemperature: {t}, top_p: {p}, maxNewTokens: {m}, repetitionPenalty: {r}\n---\nBOT: {history[-1][1]}\n\n")    
    # Clicking the submitBtn will call the generation with Parameters in the slides
    submitBtn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, [chatbot,temperature,top_p,max_length_tokens,rep_pen], chatbot)
    clear.click(lambda: None, None, chatbot, queue=False)
    
demo.queue()  # required to yield the streams from the text generation
demo.launch(inbrowser=True, share=True)