import gradio as gr from openmind import AutoModelForCausalLM, AutoTokenizer from transformers.generation.utils import GenerationConfig import torch def load_model(): device = 'npu:0' model_path = "Baichuan/Baichuan2_7b_chat_pt" tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, trust_remote_code=True).to(device) model.generation_config = GenerationConfig.from_pretrained(model_path) return model, tokenizer def chat(content, history): messages = [] messages.append({"role": "user", "content": content}) response = model.chat(tokenizer, messages) return response if __name__ == "__main__": model, tokenizer = load_model() gr.ChatInterface(chat, title="Baichuan2_7B 对话", description="Baichuan 2 是百川智能推出的新一代开源大语言模型,采?2.6 万亿 Tokens 的高质量语料训练,欢迎体验baichuan2_chat_7B模型?, examples=['解释一下“温故而知?, '请制定一份杭州一日游计划'] ).queue().launch(debug=True) 1 1111