Nikhil0987 commited on
Commit
aef40c2
·
verified ·
1 Parent(s): 927ea7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -41
app.py CHANGED
@@ -1,43 +1,32 @@
 
1
  import requests
2
  from PIL import Image
3
- from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
4
- import streamlit as st
5
-
6
-
7
- image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
8
- image = Image.open(requests.get(image_url, stream=True).raw)
9
-
10
- model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-ai2d-base")
11
- processor = Pix2StructProcessor.from_pretrained("google/pix2struct-ai2d-base")
12
-
13
- question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
14
-
15
- inputs = processor(images=image, text=question, return_tensors="pt")
16
-
17
- predictions = model.generate(**inputs,max_new_tokens= 1000)
18
- # print(processor.decode(predictions[0], skip_special_tokens=True))
19
-
20
-
21
-
22
- def load_image():
23
- with st.sidebar:
24
- if img := st.text_input("Enter Image URL") or st.selectbox("Select Image", ("https://images.unsplash.com/photo-1593466144596-8abd50ad2c52?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3434&q=80", "https://images.unsplash.com/photo-1566438480900-0609be27a4be?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3394&q=80")):
25
- if st.button("Load Image"):
26
- st.write("Image Uploaded!")
27
- st.image(img)
28
- else:
29
- st.warning("Please enter an image URL and click 'Load Image' before asking a question.")
30
- return img
31
-
32
-
33
-
34
- def visual_qna():
35
- st.title("Visual Q&A")
36
- img = load_image()
37
- if img:
38
- if query := st.chat_input("Enter your message"):
39
- response = model(question=query, image=img)
40
- with st.chat_message("assistant"):
41
- st.write(response)
42
- else:
43
- st.warning("Please enter an image URL and click 'Load Image' before asking a question.")
 
1
+ import streamlit as st
2
  import requests
3
  from PIL import Image
4
+ from transformers import BlipProcessor, BlipForQuestionAnswering
5
+
6
+ # Model Loading
7
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
8
+ model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large")
9
+
10
+ # Streamlit App Structure
11
+ st.title("Visual Question Answering ")
12
+
13
+ def get_image():
14
+ img_url = st.text_input("Enter Image URL", value='https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg')
15
+ if img_url:
16
+ raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
17
+ st.image(raw_image)
18
+ return raw_image
19
+
20
+ def process_vqa(image, question):
21
+ if image and question:
22
+ inputs = processor(image, question, return_tensors="pt")
23
+ output = model.generate(**inputs)
24
+ answer = processor.decode(output[0], skip_special_tokens=True)
25
+ st.write("Answer:", answer)
26
+
27
+ # User Input
28
+ image = get_image()
29
+ question = st.text_input("Ask your question about the image:")
30
+
31
+ # Process Question and Generate Answer
32
+ process_vqa(image, question)