import gradio as gr import random import time from transformers import BloomTokenizerFast, BloomForCausalLM path = 'YeungNLP/firefly-2b6-v2' tokenizer = BloomTokenizerFast.from_pretrained(path) model = BloomForCausalLM.from_pretrained(path) model.eval() import intel_extension_for_pytorch as ipex model = ipex.optimize(model) def generate(text): text = '{}'.format(text) input_ids = tokenizer(text, return_tensors="pt").input_ids #input_ids = input_ids.to(device) outputs = model.generate(input_ids, max_new_tokens=200, do_sample=True, top_p=0.7, temperature=0.35, repetition_penalty=1.2, eos_token_id=tokenizer.eos_token_id) rets = tokenizer.batch_decode(outputs) output = rets[0].strip().replace(text, "").replace('', "") return output with gr.Blocks() as demo: chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.Button("Clear") def respond(message, chat_history): #bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"]) bot_message = generate(message) chat_history.append((message, bot_message)) time.sleep(1) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) clear.click(lambda: None, None, chatbot, queue=False) demo.queue(api_open=False) if __name__ == "__main__": demo.launch()