Spaces:

greencatted
/

ZoomLocation

Running

App Files Files Community

greencatted commited on Nov 6, 2024

Commit

fb8d2a9

•

1 Parent(s): fdb4a11

using llama instead

Browse files

Files changed (1) hide show

app.py +61 -15

app.py CHANGED Viewed

@@ -1,23 +1,69 @@
 import streamlit as st
-from PIL import Image
-import torch
-from transformers import Blip2Processor, Blip2ForConditionalGeneration
-processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
-model = Blip2ForConditionalGeneration.from_pretrained(
-    "Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16
 )
-enable = st.checkbox("Enable camera")
-picture = st.camera_input("Take a picture", disabled=not enable)
-if picture:
-	image = Image.open(picture)
-	prompt = "Question: At what location is this person most likely attending this online meeting? Answer:"
-	inputs = processor(images=image, text=prompt, return_tensors="pt")
-	generated_ids = model.generate(**inputs)
-	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
-	st.write(generated_text)

 import streamlit as st
+import base64
+from huggingface_hub import InferenceClient
+client = InferenceClient(api_key=st.secrets["HF_TOKEN"])
+def classify_picture(image_url):
+	messages = [
+		{
+			"role": "user",
+			"content": [
+				{
+					"type": "image_url",
+					"image_url": {
+						"url": image_url
+					}
+				},
+				{
+					"type": "text",
+					"text": """Based upon the image, at what sort of location or in where is this participant most likely attending their online meeting? Be more specific rather than general. Answer in one sentence in the format of: "This participant is most likely attending their online meeting [location]."""
+				},
+			]
+		}
+	]
+	stream = client.chat.completions.create(
+		model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+		messages=messages,
+		max_tokens=100,
+		stream=True
+	)
+	result = ""
+	for chunk in stream:
+		result += chunk.choices[0].delta.content
+	return result
+st.title("Online Meeting Location Classifier")
+option = st.selectbox(
+    "Target Image",
+    ("Camera", "Demo 1", "Demo 2", "Demo 3", "Demo 4"),
+	index=None,
+    placeholder="Select Image...",
 )
+if option == "Camera":
+	enable = st.checkbox("Enable camera")
+	picture_buffer = st.camera_input("Take a picture")
+	if picture_buffer is not None:
+		image_url = f"data:image/jpeg;base64,{base64.b64encode(picture_buffer.getvalue()).decode("utf-8")}"
+		if st.button("Generate Location"):
+			st.write(classify_picture(image_url))
+elif option is not None:
+	if option == "Demo 1":
+		image_url = "https://media.istockphoto.com/id/1353209475/video/happy-indian-business-man-talking-to-webcam-having-virtual-meeting-in-office.jpg?s=640x640&k=20&c=DUEzmtpErdZxDavYMOmLuKExEWIVWUusRUh9wYd20j0="
+	elif option == "Demo 2":
+		image_url = "https://cdn.prod.website-files.com/62180286278929909e43b116/63c216577ab752f188142733_Using%20a%20Custom%20Zoom%20Background.jpg"
+	elif option == "Demo 3":
+		image_url = "https://media.istockphoto.com/id/1319103417/photo/inspired-female-teacher-in-headphones-meet-students-online-give-class.jpg?s=612x612&w=0&k=20&c=cSGIQIwPFpVi7m4MR2_SLlwDcpBELdnNnLNgdtH6Bak="
+	elif option == "Demo 4":
+		image_url = "https://www.rivertribe.co.uk/wp-content/uploads/2020/04/EV1Twz9XgAExEJ5-e1587484451269.jpg"
+	st.image(image_url, use_column_width=True)
+	if st.button("Generate Location"):
+		st.write(classify_picture(image_url))