greencatted commited on
Commit
fb8d2a9
1 Parent(s): fdb4a11

using llama instead

Browse files
Files changed (1) hide show
  1. app.py +61 -15
app.py CHANGED
@@ -1,23 +1,69 @@
1
  import streamlit as st
2
- from PIL import Image
3
- import torch
4
 
5
- from transformers import Blip2Processor, Blip2ForConditionalGeneration
6
 
7
- processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
8
- model = Blip2ForConditionalGeneration.from_pretrained(
9
- "Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  )
11
 
 
 
 
12
 
13
- enable = st.checkbox("Enable camera")
14
- picture = st.camera_input("Take a picture", disabled=not enable)
15
 
16
- if picture:
17
- image = Image.open(picture)
18
- prompt = "Question: At what location is this person most likely attending this online meeting? Answer:"
19
- inputs = processor(images=image, text=prompt, return_tensors="pt")
 
 
 
 
 
 
 
 
20
 
21
- generated_ids = model.generate(**inputs)
22
- generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
23
- st.write(generated_text)
 
1
  import streamlit as st
2
+ import base64
 
3
 
4
+ from huggingface_hub import InferenceClient
5
 
6
+ client = InferenceClient(api_key=st.secrets["HF_TOKEN"])
7
+
8
+ def classify_picture(image_url):
9
+ messages = [
10
+ {
11
+ "role": "user",
12
+ "content": [
13
+ {
14
+ "type": "image_url",
15
+ "image_url": {
16
+ "url": image_url
17
+ }
18
+ },
19
+ {
20
+ "type": "text",
21
+ "text": """Based upon the image, at what sort of location or in where is this participant most likely attending their online meeting? Be more specific rather than general. Answer in one sentence in the format of: "This participant is most likely attending their online meeting [location]."""
22
+ },
23
+ ]
24
+ }
25
+ ]
26
+
27
+ stream = client.chat.completions.create(
28
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
29
+ messages=messages,
30
+ max_tokens=100,
31
+ stream=True
32
+ )
33
+
34
+ result = ""
35
+ for chunk in stream:
36
+ result += chunk.choices[0].delta.content
37
+ return result
38
+
39
+ st.title("Online Meeting Location Classifier")
40
+
41
+ option = st.selectbox(
42
+ "Target Image",
43
+ ("Camera", "Demo 1", "Demo 2", "Demo 3", "Demo 4"),
44
+ index=None,
45
+ placeholder="Select Image...",
46
  )
47
 
48
+ if option == "Camera":
49
+ enable = st.checkbox("Enable camera")
50
+ picture_buffer = st.camera_input("Take a picture")
51
 
52
+ if picture_buffer is not None:
53
+ image_url = f"data:image/jpeg;base64,{base64.b64encode(picture_buffer.getvalue()).decode("utf-8")}"
54
 
55
+ if st.button("Generate Location"):
56
+ st.write(classify_picture(image_url))
57
+ elif option is not None:
58
+ if option == "Demo 1":
59
+ image_url = "https://media.istockphoto.com/id/1353209475/video/happy-indian-business-man-talking-to-webcam-having-virtual-meeting-in-office.jpg?s=640x640&k=20&c=DUEzmtpErdZxDavYMOmLuKExEWIVWUusRUh9wYd20j0="
60
+ elif option == "Demo 2":
61
+ image_url = "https://cdn.prod.website-files.com/62180286278929909e43b116/63c216577ab752f188142733_Using%20a%20Custom%20Zoom%20Background.jpg"
62
+ elif option == "Demo 3":
63
+ image_url = "https://media.istockphoto.com/id/1319103417/photo/inspired-female-teacher-in-headphones-meet-students-online-give-class.jpg?s=612x612&w=0&k=20&c=cSGIQIwPFpVi7m4MR2_SLlwDcpBELdnNnLNgdtH6Bak="
64
+ elif option == "Demo 4":
65
+ image_url = "https://www.rivertribe.co.uk/wp-content/uploads/2020/04/EV1Twz9XgAExEJ5-e1587484451269.jpg"
66
+ st.image(image_url, use_column_width=True)
67
 
68
+ if st.button("Generate Location"):
69
+ st.write(classify_picture(image_url))