import gradio as gr import torch import time from transformers import AutoTokenizer, AutoModelForCausalLM # 加载 tokenizer 和模型 tokenizer_path = "studyinglover/IntelliKernel-0.03b-sft" model_path = "studyinglover/IntelliKernel-0.03b-sft" tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # 定义一个生成回复的函数 def chat_with_model(history, user_input, top_k, temperature): # 将用户输入追加到对话历史 history.append({"role": "user", "content": user_input}) # 生成新提示 new_prompt = tokenizer.apply_chat_template( history, tokenize=False, add_generation_prompt=True )[-(model.config.max_seq_len - 1) :] # 编码输入并发送到设备 x = tokenizer(new_prompt, return_tensors="pt").input_ids.to(device) # 使用模型生成回复并计时 output_text = "" start_time = time.time() with torch.inference_mode(): _output = model.generate( x, tokenizer.eos_token_id, max_new_tokens=512, top_k=top_k, temperature=temperature, stream=True, ) for i in _output: output = tokenizer.decode(i[0].tolist()) output_text += output end_time = time.time() elapsed_time = end_time - start_time num_tokens = len(tokenizer.encode(output_text)) token_speed = num_tokens / elapsed_time if elapsed_time > 0 else 0 # 更新最新对话的 token 数量和生成速度 token_info = ( f"Token 数量: {num_tokens}\nToken 输出速度: {token_speed:.2f} tokens/sec" ) # 将模型回复加入对话历史 history.append({"role": "assistant", "content": output_text.strip()}) # 返回更新后的对话历史和 token 信息 return history, "", token_info # 使用 Gradio 构建对话机器人界面 with gr.Blocks() as iface: with gr.Row(): with gr.Column(scale=1): # 左侧参数控制区域 top_k_slider = gr.Slider(0, 100, value=8, step=1, label="Top-k") temp_slider = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature") token_info_box = gr.Markdown( "Token 数量: \nToken 输出速度: " ) # 显示 token 信息的框 with gr.Column(scale=3): # 右侧对话区域 gr.Markdown( "# Chat with AI\n这是一个简单的聊天模型界面,输入内容后模型将生成相应的回复。" ) chatbot = gr.Chatbot(type="messages") # 使用 "messages" 类型记录对话 msg = gr.Textbox(label="Your Message") # 用户输入框 with gr.Row(): send_btn = gr.Button("Send Message") # 发送消息按钮 clear = gr.Button("Clear Chat") # 清除聊天记录按钮 # 设置交互逻辑 send_btn.click( chat_with_model, [chatbot, msg, top_k_slider, temp_slider], [chatbot, msg, token_info_box], ) # 发送消息 msg.submit( chat_with_model, [chatbot, msg, top_k_slider, temp_slider], [chatbot, msg, token_info_box], ) # 按回车发送 clear.click(lambda: None, None, chatbot, queue=False) # 清除聊天记录 iface.launch()