File size: 1,833 Bytes
d52c0fd
 
 
 
 
 
 
 
 
 
af4e81f
 
 
 
 
 
eb86d7b
b3847c9
eb86d7b
 
 
 
 
2ef1f88
35cb283
eb86d7b
 
 
d52c0fd
 
 
 
 
 
 
 
 
 
 
 
 
 
af4e81f
eb86d7b
f34a95b
d52c0fd
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import streamlit as st
from transformers import ViltProcessor, ViltForQuestionAnswering, BlipForQuestionAnswering, AutoProcessor
from PIL import Image

# Define available models
models = {
    "ViLT": (ViltProcessor, ViltForQuestionAnswering, "dandelin/vilt-b32-finetuned-vqa"),
    "BLIP": (AutoProcessor, BlipForQuestionAnswering, "Salesforce/blip-vqa-base"),
}

def get_format_response(image,question,selected_model):
    # Load selected model and processor
    processor, model_class, model_name = models[selected_model]
    processor = processor.from_pretrained(model_name)
    model = model_class.from_pretrained(model_name)
    encoding = processor(image, question, return_tensors="pt")
    if selected_model=='ViLT':
        outputs = model(**encoding)
        logits = outputs.logits
        idx = logits.argmax(-1).item()
        answer = model.config.id2label[idx]
        return answer
    else:
        outputs = model.generate(**encoding)
        answer = processor.decode(outputs[0], skip_special_tokens=True)
        return answer
        

# Streamlit app
st.title("Simple VQA App πŸ€–πŸŽˆ")
st.subheader("A demo app showcasing VQA models. ViLT and BLIP model.")
# Sidebar for model selection
selected_model = st.sidebar.selectbox("Select Model", list(models.keys()))

# Image and question input
uploaded_image = st.file_uploader("Upload Image")
question = st.text_input("Ask a Question about the Image")

# Process image and question if provided
if uploaded_image and question:
    image = Image.open(uploaded_image)
    st.image(image, caption="Uploaded Image")
    answer = get_format_response(image,question,selected_model)
    # Display answer
    st.write(f"πŸ€” {selected_model} Answer: {answer} πŸŽ‰")
# Disclaimer
st.sidebar.markdown("This is a demo app showcasing VQA models. Actual performance may vary.")