import gradio as gr import random import time from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("redoop/opt-1.3b") model = AutoModelForCausalLM.from_pretrained("redoop/opt-1.3b") model.eval() #import intel_extension_for_pytorch as ipex #model = ipex.optimize(model) def generate(text): #text = '{}'.format(text) input_ids = tokenizer(text, return_tensors="pt").input_ids #input_ids = input_ids.to(device) outputs = model.generate(input_ids, max_new_tokens=200, do_sample=True, top_p=0.7, temperature=0.35, repetition_penalty=1.2, eos_token_id=tokenizer.eos_token_id) rets = tokenizer.batch_decode(outputs) output = rets[0].strip().replace(text, "").replace('<|endoftext|>', "") return output with gr.Blocks() as demo: chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.Button("Clear") def respond(message, chat_history): #bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"]) bot_message = generate(message) chat_history.append((message, bot_message)) time.sleep(1) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) clear.click(lambda: None, None, chatbot, queue=False) demo.queue(api_open=False) if __name__ == "__main__": demo.launch()