import os import gradio as gr from gradio_client import Client # Setting up environment variables to access private spaces via Gradio clients HF_TOKEN = os.environ.get("HF_TOKEN", None) LLAMA_3 = os.environ.get("LLAMA_3", None) LLAMA_3_1 = os.environ.get("LLAMA_3_1", None) # Initializing Gradio Python Clients for the two Llama chatbots client_llama3_1 = Client(f"{LLAMA_3_1}", hf_token=HF_TOKEN) client_llama3 = Client(f"{LLAMA_3}", hf_token=HF_TOKEN) css = """ h1 { margin: 0; flex-grow: 1; font-size: 24px; min-width: 200px; } """ TITLE = """

Meta Llama3.1 8B V/s Meta Llama3 8B

""" PLACEHOLDER_LLAMA3 = """

Meta Llama3

Ask me anything...

""" PLACEHOLDER_LLAMA3_1 = """

Meta Llama3.1

Ask me anything...

""" # Inference functions for Chatbots def user_llama3(user_message, history_llama3): return "", history_llama3 + [[user_message, None]] def user_llama3_1(user_message, history_llama3_1): return "", history_llama3_1 + [[user_message, None]] def chat_llama3(history_llama3, temp, max_tokens): history_llama3[-1][1] = "" for result in client_llama3.submit(history_llama3[-1][0], temp, max_tokens): if "assistant" in result: result_list_temp = result.split('assistant') history_llama3[-1][1] = result_list_temp[-1] #result yield history_llama3 def chat_llama3_1(history_llama3_1, temp, max_tokens): history_llama3_1[-1][1] = "" for result in client_llama3_1.submit(history_llama3_1[-1][0], temp, max_tokens): history_llama3_1[-1][1] = result yield history_llama3_1 # Gradio block chatbot_llama3 = gr.Chatbot(height=450, label='Llama3 8b Chat', placeholder=PLACEHOLDER_LLAMA3,) chatbot_llama3_1 = gr.Chatbot(height=450, label='Llama3.1 8b Chat', placeholder=PLACEHOLDER_LLAMA3_1,) textbox = gr.Textbox(placeholder="Type your text and press Enter", scale=7, label="User Mesaages") additional_inputs_accordion = gr.Accordion(label="⚙️ Parameters", open=False, render=False) temperature = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.95, label="Temperature", render=False) max_tokens = gr.Slider(minimum=128, maximum=4096, step=1, value=512, label="Max new tokens", render=False ) examples=[ ["There's a llama in my garden 😱 What should I do?"], ["What is the best way to open a can of worms?"], ["The odd numbers in this group add up to an even number: 15, 32, 5, 13, 82, 7, 1. "], ['How to setup a human base on Mars? Give short answer.'], ['Explain theory of relativity to me like I’m 8 years old.'], ['What is 9,000 * 9,000?'], ['Write a pun-filled happy birthday message to my friend Alex.'], ['Justify why a penguin might make a good king of the jungle.'] ] with gr.Blocks(fill_height=True,css=css ) as demo: gr.HTML(TITLE) with gr.Row(): chatbot_llama3_1.render() chatbot_llama3.render() with gr.Row(): textbox.render() clear = gr.Button("Clear") additional_inputs_accordion.render() with additional_inputs_accordion: temperature.render() max_tokens.render() examples = gr.Examples(examples, textbox) textbox.submit(user_llama3, [textbox, chatbot_llama3], [textbox, chatbot_llama3], queue=False).then( chat_llama3, [chatbot_llama3, temperature, max_tokens], chatbot_llama3) textbox.submit(user_llama3_1, [textbox, chatbot_llama3_1], [textbox, chatbot_llama3_1], queue=False).then( chat_llama3_1, [chatbot_llama3_1, temperature, max_tokens], chatbot_llama3_1) clear.click(lambda: None, None, chatbot_llama3, queue=False) clear.click(lambda: None, None, chatbot_llama3_1, queue=False) if __name__ == "__main__": demo.launch(debug=True, )