SkalskiP commited on
Commit
54c9770
·
1 Parent(s): f9e8ad9

Add number extraction feature and update instructions

Browse files

Added 'extract_numbers_in_brackets' function in utils.py for efficient extraction of numbers enclosed in square brackets from a given string.

Files changed (3) hide show
  1. app.py +16 -7
  2. gpt4v.py +4 -1
  3. utils.py +18 -0
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import os
2
- from typing import List, Dict
3
 
4
  import cv2
5
  import gradio as gr
@@ -16,6 +16,7 @@ HOME = os.getenv("HOME")
16
  DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
17
 
18
  SAM_CHECKPOINT = os.path.join(HOME, "app/weights/sam_vit_h_4b8939.pth")
 
19
  SAM_MODEL_TYPE = "vit_h"
20
 
21
  MARKDOWN = """
@@ -44,7 +45,7 @@ def inference(
44
  image_and_mask: Dict[str, np.ndarray],
45
  annotation_mode: List[str],
46
  mask_alpha: float
47
- ) -> np.ndarray:
48
  image = image_and_mask['image']
49
  mask = cv2.cvtColor(image_and_mask['mask'], cv2.COLOR_RGB2GRAY)
50
  is_interactive = not np.all(mask == 0)
@@ -68,7 +69,7 @@ def inference(
68
  with_mask="Mask" in annotation_mode,
69
  with_polygon="Polygon" in annotation_mode,
70
  with_label="Mark" in annotation_mode)
71
- return cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
72
 
73
 
74
  def prompt(message, history, image: np.ndarray, api_key: str) -> str:
@@ -83,6 +84,10 @@ def prompt(message, history, image: np.ndarray, api_key: str) -> str:
83
  )
84
 
85
 
 
 
 
 
86
  image_input = gr.Image(
87
  label="Input",
88
  type="numpy",
@@ -100,9 +105,8 @@ slider_mask_alpha = gr.Slider(
100
  maximum=1,
101
  value=0.05,
102
  label="Mask Alpha")
103
- image_output = gr.Image(
104
- label="SoM Visual Prompt",
105
- type="numpy")
106
  openai_api_key = gr.Textbox(
107
  show_label=False,
108
  placeholder="Before you start chatting, set your OpenAI API key here",
@@ -115,6 +119,7 @@ run_button = gr.Button("Run")
115
 
116
  with gr.Blocks() as demo:
117
  gr.Markdown(MARKDOWN)
 
118
  with gr.Row():
119
  with gr.Column():
120
  image_input.render()
@@ -139,6 +144,10 @@ with gr.Blocks() as demo:
139
  run_button.click(
140
  fn=inference,
141
  inputs=[image_input, checkbox_annotation_mode, slider_mask_alpha],
142
- outputs=image_output)
 
 
 
 
143
 
144
  demo.queue().launch(debug=False, show_error=True)
 
1
  import os
2
+ from typing import List, Dict, Tuple, Any
3
 
4
  import cv2
5
  import gradio as gr
 
16
  DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
17
 
18
  SAM_CHECKPOINT = os.path.join(HOME, "app/weights/sam_vit_h_4b8939.pth")
19
+ # SAM_CHECKPOINT = "weights/sam_vit_h_4b8939.pth"
20
  SAM_MODEL_TYPE = "vit_h"
21
 
22
  MARKDOWN = """
 
45
  image_and_mask: Dict[str, np.ndarray],
46
  annotation_mode: List[str],
47
  mask_alpha: float
48
+ ) -> Tuple[Tuple[np.ndarray, List[Any]], sv.Detections]:
49
  image = image_and_mask['image']
50
  mask = cv2.cvtColor(image_and_mask['mask'], cv2.COLOR_RGB2GRAY)
51
  is_interactive = not np.all(mask == 0)
 
69
  with_mask="Mask" in annotation_mode,
70
  with_polygon="Polygon" in annotation_mode,
71
  with_label="Mark" in annotation_mode)
72
+ return (cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB), []), detections
73
 
74
 
75
  def prompt(message, history, image: np.ndarray, api_key: str) -> str:
 
84
  )
85
 
86
 
87
+ def on_image_input_clear():
88
+ return None, None
89
+
90
+
91
  image_input = gr.Image(
92
  label="Input",
93
  type="numpy",
 
105
  maximum=1,
106
  value=0.05,
107
  label="Mask Alpha")
108
+ image_output = gr.AnnotatedImage(
109
+ label="SoM Visual Prompt")
 
110
  openai_api_key = gr.Textbox(
111
  show_label=False,
112
  placeholder="Before you start chatting, set your OpenAI API key here",
 
119
 
120
  with gr.Blocks() as demo:
121
  gr.Markdown(MARKDOWN)
122
+ detections_state = gr.State()
123
  with gr.Row():
124
  with gr.Column():
125
  image_input.render()
 
144
  run_button.click(
145
  fn=inference,
146
  inputs=[image_input, checkbox_annotation_mode, slider_mask_alpha],
147
+ outputs=[image_output, detections_state])
148
+ image_input.clear(
149
+ fn=on_image_input_clear,
150
+ outputs=[image_output, detections_state]
151
+ )
152
 
153
  demo.queue().launch(debug=False, show_error=True)
gpt4v.py CHANGED
@@ -6,7 +6,10 @@ import numpy as np
6
 
7
 
8
  META_PROMPT = '''
9
- - For any marks mentioned in your answer, please highlight them with [].
 
 
 
10
  '''
11
  API_URL = "https://api.openai.com/v1/chat/completions"
12
 
 
6
 
7
 
8
  META_PROMPT = '''
9
+ For any labels or markings on an image that you reference in your response, please
10
+ enclose them in square brackets ([]) and list them explicitly. Do not use ranges; for
11
+ example, instead of '1 - 4', list as '[1], [2], [3], [4]'. These labels could be
12
+ numbers or letters and typically correspond to specific segments or parts of the image.
13
  '''
14
  API_URL = "https://api.openai.com/v1/chat/completions"
15
 
utils.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import cv2
2
 
3
  import numpy as np
@@ -205,3 +208,18 @@ def postprocess_masks(
205
  xyxy=sv.mask_to_xyxy(masks),
206
  mask=masks
207
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import List
3
+
4
  import cv2
5
 
6
  import numpy as np
 
208
  xyxy=sv.mask_to_xyxy(masks),
209
  mask=masks
210
  )
211
+
212
+
213
+ def extract_numbers_in_brackets(text: str) -> List[int]:
214
+ """
215
+ Extracts all numbers enclosed in square brackets from a given string.
216
+
217
+ Args:
218
+ text (str): The string to be searched.
219
+
220
+ Returns:
221
+ List[int]: A list of integers found within square brackets.
222
+ """
223
+ pattern = r'\[(\d+)\]'
224
+ numbers = [int(num) for num in re.findall(pattern, text)]
225
+ return numbers