greencatted commited on
Commit
bb28428
1 Parent(s): 452f58c

Back to BLIP

Browse files
Files changed (1) hide show
  1. app.py +12 -24
app.py CHANGED
@@ -1,37 +1,25 @@
1
  import streamlit as st
2
  from PIL import Image
3
-
4
  import torch
5
- from transformers import MllamaForConditionalGeneration, AutoProcessor
6
 
7
- model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
 
 
8
 
9
- model = MllamaForConditionalGeneration.from_pretrained(
10
- model_id,
11
- torch_dtype=torch.bfloat16,
12
- device_map="auto",
13
  )
14
- processor = AutoProcessor.from_pretrained(model_id)
15
 
16
  enable = st.checkbox("Enable camera")
17
  picture = st.camera_input("Take a picture", disabled=not enable)
18
 
19
  if picture:
20
  image = Image.open(picture)
 
 
21
 
22
- messages = [
23
- {"role": "user", "content": [
24
- {"type": "image"},
25
- {"type": "text", "text": "Provide your best guess as to where this person is holding his online meeting. Just state your guess of location in your response."}
26
- ]}
27
- ]
28
- input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
29
- inputs = processor(
30
- image,
31
- input_text,
32
- add_special_tokens=False,
33
- return_tensors="pt"
34
- ).to(model.device)
35
-
36
- output = model.generate(**inputs, max_new_tokens=30)
37
- print(processor.decode(output[0]))
 
1
  import streamlit as st
2
  from PIL import Image
 
3
  import torch
 
4
 
5
+ from transformers import Blip2Processor, Blip2ForConditionalGeneration
6
+
7
+ device = "cuda" if torch.cuda.is_available() else "cpu"
8
 
9
+ processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
10
+ model = Blip2ForConditionalGeneration.from_pretrained(
11
+ "Salesforce/blip2-opt-2.7b", load_in_8bit=True, device_map={"": 0}, torch_dtype=torch.float16
 
12
  )
13
+
14
 
15
  enable = st.checkbox("Enable camera")
16
  picture = st.camera_input("Take a picture", disabled=not enable)
17
 
18
  if picture:
19
  image = Image.open(picture)
20
+ prompt = "Question: At what location is this person most likely attending this online meeting? Answer:"
21
+ inputs = processor(images=image, text=prompt, return_tensors="pt").to(device="cuda", dtype=torch.float16)
22
 
23
+ generated_ids = model.generate(**inputs)
24
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
25
+ st.write(generated_text)