import gradio as gr from ctransformers import AutoModelForCausalLM, AutoConfig, Config # import for GGUF/GGML models import datetime # modelfile = "models/tinyllama-1.1b-1t-openorca.Q4_K_M.gguf" # modelfile="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" modelfile="TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T" i_temperature = 0.30 i_max_new_tokens=1100 i_repetitionpenalty = 1.2 i_contextlength=12048 logfile = 'TinyLlama.1B.txt' print("loading model...") stt = datetime.datetime.now() conf = AutoConfig(Config(temperature=i_temperature, repetition_penalty=i_repetitionpenalty, batch_size=64, max_new_tokens=i_max_new_tokens, context_length=i_contextlength)) llm = AutoModelForCausalLM.from_pretrained(modelfile, model_type="llama", config=conf) dt = datetime.datetime.now() - stt print(f"Model loaded in {dt}") def writehistory(text): with open(logfile, 'a', encoding='utf-8') as f: f.write(text) f.write('\n') f.close() with gr.Blocks(theme='ParityError/Interstellar') as demo: # TITLE SECTION with gr.Row(): with gr.Column(scale=12): gr.HTML("
" + "

šŸ¦™ TinyLlama 1.1B šŸ‹ 4K context window

") gr.Markdown(""" **Currently Running**: [TinyLlama/TinyLlama-1.1B-Chat-v0.6](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6)        **Chat History Log File**: *TinyLlama.1B.txt* - **Base Model**: TinyLlama/TinyLlama-1.1B-Chat-v0.6, Fine tuned on OpenOrca GPT4 subset for 1 epoch, Using CHATML format. - **License**: Apache 2.0, following the TinyLlama base model. The model output is not censored and the authors do not endorse the opinions in the generated content. Use at your own risk. """) gr.Image(value='imgs/TinyLlama_logo.png', width=70) # chat and parameters settings with gr.Row(): with gr.Column(scale=4): chatbot = gr.Chatbot(height = 350, show_copy_button=True, avatar_images = ["imgs/user_logo.png","imgs/TinyLlama_logo.png"]) with gr.Row(): with gr.Column(scale=14): msg = gr.Textbox(show_label=False, placeholder="Enter text", lines=2) submitBtn = gr.Button("\nšŸ’¬ Send\n", size="lg", variant="primary", min_width=140) with gr.Column(min_width=50, scale=1): with gr.Tab(label="Parameter Setting"): gr.Markdown("# Parameters") top_p = gr.Slider(minimum=-0, maximum=1.0, value=0.95, step=0.05, interactive=True, label="Top-p") temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.30, step=0.01, interactive=True, label="Temperature") max_length_tokens = gr.Slider(minimum=0, maximum=4096, value=1060, step=4, interactive=True, label="Max Generation Tokens") rep_pen = gr.Slider(minimum=0, maximum=5, value=1.2, step=0.05, interactive=True, label="Repetition Penalty") clear = gr.Button("šŸ—‘ļø Clear All Messages", variant='secondary') def user(user_message, history): writehistory(f"USER: {user_message}") return "", history + [[user_message, None]] def bot(history, t, p, m, r): # SYSTEM_PROMPT = """<|im_start|>system # You are a helpful bot. Your answers are clear and concise. # <|im_end|> # """ # prompt = f"<|im_start|>system<|im_end|><|im_start|>user\n{history[-1][0]}<|im_end|>\n<|im_start|>assistant\n" SYSTEM_PROMPT = """<|im_start|>system You are a customer support chatbot for an online platform. Your purpose is to assist users with their inquiries and provide accurate information. You have been trained with a knowledge base that includes rules and limitations regarding chargebacks. The knowledge base consists of the following information: 1. Chargebacks beyond 90 days are not possible. 2. Chargebacks above $1000 are not allowed. 3. Chargebacks for transactions with a valid 3D secure are not allowed. Use the provided conversation example as a starting point for training. Your goal is to respond to user queries in a helpful and informative manner, ensuring that you adhere to the platform's chargeback policies. <|im_end|> """ prompt = f"<|im_start|>system<|im_end|><|im_start|>user\n{history[-1][0]}<|im_end|>\n<|im_start|>assistant\n" print(f"history lenght: {len(history)}") if len(history) == 1: print("this is the first round") else: print("here we should pass more conversations") history[-1][1] = "" for character in llm(prompt, temperature = t, top_p = p, repetition_penalty = r, max_new_tokens=m, stop = ['<|im_end|>'], stream = True): history[-1][1] += character yield history writehistory(f"temperature: {t}, top_p: {p}, maxNewTokens: {m}, repetitionPenalty: {r}\n---\nBOT: {history}\n\n") # Log in the terminal the messages print(f"USER: {history[-1][0]}\n---\ntemperature: {t}, top_p: {p}, maxNewTokens: {m}, repetitionPenalty: {r}\n---\nBOT: {history[-1][1]}\n\n") # Clicking the submitBtn will call the generation with Parameters in the slides submitBtn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, [chatbot,temperature,top_p,max_length_tokens,rep_pen], chatbot) clear.click(lambda: None, None, chatbot, queue=False) demo.queue() # required to yield the streams from the text generation demo.launch(inbrowser=True, share=True)