import gradio as gr from transformers import AutoProcessor, AutoModelForCausalLM from PIL import Image import torch # 加载模型和处理器 model_name = "microsoft/llava-med-v1.5-mistral-7b" model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto") processor = AutoProcessor.from_pretrained(model_name) def predict(image, question): # 将图像和问题处理为模型输入格式 inputs = processor(images=image, text=question, return_tensors="pt").to("cuda") # 生成答案 with torch.no_grad(): outputs = model.generate(**inputs) # 解码输出 answer = processor.batch_decode(outputs, skip_special_tokens=True)[0] return answer # 创建 Gradio 界面 interface = gr.Interface( fn=predict, inputs=[gr.inputs.Image(type="pil"), gr.inputs.Textbox(label="Question")], outputs="text", title="Medical Visual Question Answering" ) if __name__ == "__main__": interface.launch()