import gradio as gr from transformers import BlipProcessor, BlipForConditionalGeneration from PIL import Image # 加载模型和处理器 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large") def generate_caption(image): # 将图片处理为模型输入格式 inputs = processor(image, return_tensors="pt") # 生成描述 out = model.generate(**inputs) # 解码生成的文本 caption = processor.decode(out[0], skip_special_tokens=True) return caption # 创建Gradio界面 interface = gr.Interface( fn=generate_caption, inputs=gr.Image(type="pil"), outputs=gr.Textbox(), title="Image Captioning with BLIP", description="上传一张图片,使用Salesforce的BLIP模型生成描述。", ) # 运行应用 if __name__ == "__main__": interface.launch()