capjamesg commited on
Commit
ebdc756
1 Parent(s): e24ec90
Files changed (2) hide show
  1. app.py +63 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from autodistill_gpt_4v import GPT4V
3
+ from autodistill.detection import CaptionOntology
4
+ from autodistill_grounding_dino import GroundingDINO
5
+ from autodistill.utils import plot
6
+ import tempfile
7
+ import cv2
8
+
9
+ from autodistill.core.custom_detection_model import CustomDetectionModel
10
+
11
+ MARKDOWN = """
12
+ # DINO-GPT4V
13
+
14
+ Use Grounding DINO and GPT-4V to label specific objects.
15
+
16
+ Visit [awesome-openai-vision-api-experiments](https://github.com/roboflow/awesome-openai-vision-api-experiments)
17
+ repository to find more OpenAI Vision API experiments or contribute your own."""
18
+
19
+ def respond(api_key, input_image, dino_prompt, gpt_prompt):
20
+ input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
21
+ cv2.imwrite("input.jpg", input_image)
22
+
23
+ DINOGPT = CustomDetectionModel(
24
+ detection_model=GroundingDINO(
25
+ CaptionOntology({dino_prompt: dino_prompt})
26
+ ),
27
+ classification_model=GPT4V(
28
+ CaptionOntology({k: k for k in gpt_prompt.split(", ")}),
29
+ api_key=api_key
30
+ )
31
+ )
32
+
33
+ results = DINOGPT.predict("input.jpg")
34
+
35
+ result = plot(
36
+ image=cv2.imread("input.jpg"),
37
+ detections=results,
38
+ classes=gpt_prompt.split(", "),
39
+ raw=True
40
+ )
41
+
42
+ return result
43
+
44
+ with gr.Blocks() as demo:
45
+ gr.Markdown(MARKDOWN)
46
+ with gr.Row():
47
+ with gr.Column():
48
+ api_key_textbox = gr.Textbox(
49
+ label="OpenAI API KEY", type="password")
50
+ dino_prompt = gr.Textbox(label="Grounding DINO Prompt")
51
+ gpt_prompt = gr.Textbox(label="GPT-4V Prompt")
52
+ input_image = gr.Image(type="numpy", label="Input Image")
53
+ with gr.Column():
54
+ output_image = gr.Image(type="numpy", label="Output Image")
55
+ submit_button = gr.Button(label="Submit")
56
+
57
+ submit_button.click(
58
+ fn=respond,
59
+ inputs=[api_key_textbox, input_image, dino_prompt, gpt_prompt],
60
+ outputs=[output_image]
61
+ )
62
+
63
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ numpy
2
+ opencv-python
3
+ requests
4
+ gradio==3.50.2
5
+ autodistill
6
+ autodistill_grounding_dino
7
+ autodistill_gpt_4v