from transformers import AutoTokenizer, AutoModel
import gradio as gr


tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()

def predict(history, input):   
    response, history = model.chat(tokenizer, input, history)
    return history,""

with gr.Blocks() as demo:
    chatbot = gr.Chatbot([], elem_id="chatbot").style(height=750)
    
    with gr.Row():
        with gr.Column():
            txt = gr.Textbox(
                show_label=False,
                placeholder="Enter text and press enter",
            ).style(container=False)
            
    txt.submit(predict, [chatbot, txt], [chatbot, txt])

if __name__ == "__main__":
    demo.launch()