import gradio as gr import random import time import requests import json def http_yield(prompt): print(prompt) bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"]) for character in bot_message: yield character def http_bot_yield(prompt): headers = {"User-Agent": "vLLM Client"} pload = { "prompt": f"[INST] {prompt} [/INST] ", "stream": True, "max_tokens": 1024, "temperature": 0.1 } response = requests.post( 'http://164.52.204.24/generate', headers=headers, json=pload, stream=True ) for chunk in response.iter_lines(chunk_size=8192, decode_unicode=False, delimiter=b"\0"): if chunk: data = json.loads(chunk.decode("utf-8")) output = data["text"][0].split('[/INST] ')[-1] # print("output --->", output) yield output with gr.Blocks() as demo: chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.Button("Clear") def user(user_message, history): print("", history + [[user_message, None]]) return "", history + [[user_message, None]] def bot(history): print(history[-1][0]) prompt = history[-1][0] history[-1][1] = "" # b_text = "" print(history) for character in http_bot_yield(prompt): # print("yld --- > ", history[-1][1]) history[-1][1] = character # b_text += character # print("yield --- > ", b_text) time.sleep(0.05) yield history msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, chatbot, chatbot ) clear.click(lambda: None, None, chatbot, queue=False) demo.title = "🐢 Olive: OdiaGPT Model built by the OdiaGenAI Team" demo.queue() demo.launch()