from transformers import AutoTokenizer, AutoModel import gradio as gr tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda() def predict(history, input): response, history = model.chat(tokenizer, input, history) return history,"" with gr.Blocks() as demo: chatbot = gr.Chatbot([], elem_id="chatbot").style(height=750) with gr.Row(): with gr.Column(): txt = gr.Textbox( show_label=False, placeholder="Enter text and press enter", ).style(container=False) txt.submit(predict, [chatbot, txt], [chatbot, txt]) if __name__ == "__main__": demo.launch()