import streamlit as st from transformers import AutoProcessor, AutoModelForCausalLM from PIL import Image import torch # 设置模型和处理器 model_id = "OpenFace-CQUPT/Human_LLaVA" processor = AutoProcessor.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id).to("cuda" if torch.cuda.is_available() else "cpu") # Streamlit 界面设置 st.title("Visual Question Answering App") st.write("Upload an image and ask a question about it!") # 图片上传 uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) question = st.text_input("Ask a question about the image:") # 处理输入并获取答案 if uploaded_image is not None and question: image = Image.open(uploaded_image) # 显示图片和问题 st.image(image, caption="Uploaded Image", use_column_width=True) st.write("Question:", question) # 使用模型生成答案 with st.spinner("Generating answer..."): inputs = processor(images=image, text=question, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu") with torch.no_grad(): output = model.generate(**inputs) answer = processor.decode(output[0], skip_special_tokens=True) # 显示答案 st.write("Answer:", answer)