import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer # 加载模型和分词器 model_name = "nvidia/NVLM-D-72B" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) def generate_response(prompt): # 编码输入 inputs = tokenizer(prompt, return_tensors="pt") # 生成输出 outputs = model.generate(**inputs) # 解码输出 response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # 创建 Gradio 接口 iface = gr.Interface( fn=generate_response, inputs=gr.inputs.Textbox(lines=2, placeholder="输入你的问题..."), outputs="text", title="NVLM-D-72B 交互式问答", description="使用 NVIDIA 的 NVLM-D-72B 模型进行问答。" ) # 启动应用 if __name__ == "__main__": iface.launch()