Simba commited on
Commit
2874322
1 Parent(s): d6c1203
Files changed (6) hide show
  1. README.md +2 -2
  2. app.py +50 -0
  3. neovision/__init__.py +2 -0
  4. neovision/core.py +28 -0
  5. neovision/utils.py +62 -0
  6. requirements.txt +6 -0
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: VisionB
3
- emoji: 💻
4
  colorFrom: pink
5
  colorTo: pink
6
  sdk: gradio
 
1
  ---
2
+ title: NeoVision
3
+ emoji: 🏢
4
  colorFrom: pink
5
  colorTo: pink
6
  sdk: gradio
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import uuid
4
+
5
+ import gradio as gr
6
+ import numpy as np
7
+
8
+ import neovision
9
+
10
+ MARKDOWN = """
11
+ # neovision 💬 + 📸
12
+
13
+ This is a demo of neovision, a tool that allows you to chat with your webcamusinf GTP Vision.
14
+ """
15
+
16
+ connector = neovision.OpanAIConnector()
17
+
18
+
19
+ def save_image_to_drive(image: np.ndarray) -> str:
20
+ image_filename = f"{uuid.uuid4()}.jpeg"
21
+ image_directory = "data"
22
+ os.makedirs(image_directory, exist_ok=True)
23
+ image_path = os.path.join(image_directory, image_filename)
24
+ cv2.imwrite(image_path, image)
25
+ return image_path
26
+
27
+
28
+
29
+ def respond(image: np.ndarray, prompt: str, chat_history):
30
+ image = np.fliplr(image)
31
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
32
+ image_path = save_image_to_drive(image)
33
+ response = connector.simple_prompt(image=image, prompt=prompt)
34
+ chat_history.append(((image_path,), None))
35
+ chat_history.append((prompt, response))
36
+ return "", chat_history
37
+
38
+
39
+ with gr.Blocks() as demo:
40
+ gr.Markdown(MARKDOWN)
41
+ with gr.Row():
42
+ webcam = gr.Image(source="webcam", streaming=True)
43
+ with gr.Column():
44
+ chatbot = gr.Chatbot(height=500)
45
+ message = gr.Textbox()
46
+ clear_button = gr.ClearButton([message, chatbot])
47
+
48
+ message.submit(respond, [webcam, message, chatbot], [message, chatbot])
49
+
50
+ demo.launch(debug=False, show_error=True)
neovision/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from neovision.utils import encode_image_to_base64, compose_payload
2
+ from neovision.core import OpanAIConnector
neovision/core.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import numpy as np
4
+ import requests
5
+
6
+ from neovision.utils import compose_payload
7
+
8
+
9
+ API_KEY = os.getenv('OPENAI_API_KEY')
10
+
11
+
12
+ class OpanAIConnector:
13
+
14
+ def __init__(self, api_key: str = API_KEY):
15
+ if api_key is None:
16
+ raise ValueError("API_KEY is not set")
17
+ self.api_key = api_key
18
+
19
+ def simple_prompt(self, image: np.ndarray, prompt: str) -> str:
20
+ headers = {
21
+ "Content-Type": "application/json",
22
+ "Authorization": f"Bearer {self.api_key}"
23
+ }
24
+ payload = compose_payload(image=image, prompt=prompt)
25
+ response = requests.post("https://api.openai.com/v1/chat/completions",
26
+ headers=headers, json=payload).json()
27
+
28
+ return response['choices'][0]['message']['content']
neovision/utils.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import base64
3
+ import numpy as np
4
+
5
+
6
+ def encode_image_to_base64(image: np.ndarray) -> str:
7
+ """
8
+ Encodes a given image represented as a NumPy array to a base64-encoded string.
9
+
10
+ Parameters:
11
+ image (np.ndarray): A NumPy array representing the image to be encoded.
12
+
13
+ Returns:
14
+ str: A base64-encoded string representing the input image in JPEG format.
15
+
16
+ Raises:
17
+ ValueError: If the image cannot be encoded to JPEG format.
18
+ """
19
+
20
+ success, buffer = cv2.imencode('.jpg', image)
21
+ if not success:
22
+ raise ValueError("Could not encode image to JPEG format.")
23
+
24
+ encoded_image = base64.b64encode(buffer).decode('utf-8')
25
+ return encoded_image
26
+
27
+
28
+ def compose_payload(image: np.ndarray, prompt: str) -> dict:
29
+ """
30
+ Composes a payload dictionary with a base64 encoded image and a text prompt for the GPT-4 Vision model.
31
+
32
+ Args:
33
+ image (np.ndarray): The image in the form of a NumPy array to encode and send.
34
+ prompt (str): The prompt text to accompany the image in the payload.
35
+
36
+ Returns:
37
+ dict: A dictionary structured as a payload for the GPT-4 Vision model, including the model name,
38
+ an array of messages each containing a role and content with text and the base64 encoded image,
39
+ and the maximum number of tokens to generate.
40
+ """
41
+ base64_image = encode_image_to_base64(image)
42
+ return {
43
+ "model": "gpt-4-vision-preview",
44
+ "messages": [
45
+ {
46
+ "role": "user",
47
+ "content": [
48
+ {
49
+ "type": "text",
50
+ "text": prompt
51
+ },
52
+ {
53
+ "type": "image_url",
54
+ "image_url": {
55
+ "url": f"data:image/jpeg;base64,{base64_image}"
56
+ }
57
+ }
58
+ ]
59
+ }
60
+ ],
61
+ "max_tokens": 300
62
+ }
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ supervision
2
+ openai
3
+ opencv-python
4
+ numpy
5
+ requests
6
+ gradio