Spaces:

Emerging-Tech
/

visualqa

Running

App Files Files Community

Nikhil0987 commited on Feb 26, 2024

Commit

aef40c2

verified ·

1 Parent(s): 927ea7f

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -41

app.py CHANGED Viewed

@@ -1,43 +1,32 @@
 import requests
 from PIL import Image
-from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
-import streamlit as st
-image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
-image = Image.open(requests.get(image_url, stream=True).raw)
-model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-ai2d-base")
-processor = Pix2StructProcessor.from_pretrained("google/pix2struct-ai2d-base")
-question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
-inputs = processor(images=image, text=question, return_tensors="pt")
-predictions = model.generate(**inputs,max_new_tokens= 1000)
-# print(processor.decode(predictions[0], skip_special_tokens=True))
-def load_image():
-    with st.sidebar:
-        if img := st.text_input("Enter Image URL") or st.selectbox("Select Image", ("https://images.unsplash.com/photo-1593466144596-8abd50ad2c52?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3434&q=80", "https://images.unsplash.com/photo-1566438480900-0609be27a4be?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3394&q=80")):
-            if st.button("Load Image"):
-                st.write("Image Uploaded!")
-                st.image(img)
-        else:
-            st.warning("Please enter an image URL and click 'Load Image' before asking a question.")
-    return img
-def visual_qna():
-    st.title("Visual Q&A")
-    img = load_image()
-    if img:
-        if query := st.chat_input("Enter your message"):
-            response = model(question=query, image=img)
-            with st.chat_message("assistant"):
-                st.write(response)
-    else:
-        st.warning("Please enter an image URL and click 'Load Image' before asking a question.")

+import streamlit as st
 import requests
 from PIL import Image
+from transformers import BlipProcessor, BlipForQuestionAnswering
+# Model Loading
+processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
+model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large")
+# Streamlit App Structure
+st.title("Visual Question Answering ")
+def get_image():
+    img_url = st.text_input("Enter Image URL", value='https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg')
+    if img_url:
+        raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
+        st.image(raw_image)
+        return raw_image
+def process_vqa(image, question):
+    if image and question:
+        inputs = processor(image, question, return_tensors="pt")
+        output = model.generate(**inputs)
+        answer = processor.decode(output[0], skip_special_tokens=True)
+        st.write("Answer:", answer)
+# User Input
+image = get_image()
+question = st.text_input("Ask your question about the image:")
+# Process Question and Generate Answer
+process_vqa(image, question)