Files changed (1) hide show
  1. app.py +140 -28
app.py CHANGED
@@ -65,31 +65,143 @@ def inference(raw_image, model_n , input_tag):
65
  return tag_1[0],'none',caption[0]
66
 
67
 
68
- inputs = [
69
- gr.inputs.Image(type='pil'),
70
- gr.inputs.Radio(choices=['Recognize Anything Model',"Tag2Text Model"],
71
- type="value",
72
- default="Recognize Anything Model",
73
- label="Select Model" ),
74
- gr.inputs.Textbox(lines=2, label="User Specified Tags (Optional and Currently only Tag2Text is Supported, Enter with commas)")
75
- ]
76
-
77
- outputs = [gr.outputs.Textbox(label="Tags"),gr.outputs.Textbox(label="标签"), gr.outputs.Textbox(label="Caption (currently only Tag2Text is supported)")]
78
-
79
- # title = "Recognize Anything Model"
80
- title = "<font size='10'> Recognize Anything Model</font>"
81
-
82
- description = "Welcome to the Recognize Anything Model (RAM) and Tag2Text Model demo! <li><b>Recognize Anything Model:</b> Upload your image to get the <b>English and Chinese outputs of the image tags</b>!</li><li><b>Tag2Text Model:</b> Upload your image to get the <b>tags</b> and <b>caption</b> of the image. Optional: You can also input specified tags to get the corresponding caption.</li> "
83
-
84
-
85
- article = "<p style='text-align: center'>RAM and Tag2Text is training on open-source datasets, and we are persisting in refining and iterating upon it.<br/><a href='https://recognize-anything.github.io/' target='_blank'>Recognize Anything: A Strong Image Tagging Model</a> | <a href='https://https://tag2text.github.io/' target='_blank'>Tag2Text: Guiding Language-Image Model via Image Tagging</a> | <a href='https://github.com/xinyu1205/Tag2Text' target='_blank'>Github Repo</a></p>"
86
-
87
- demo = gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=[
88
- ['images/demo1.jpg',"Recognize Anything Model","none"],
89
- ['images/demo2.jpg',"Recognize Anything Model","none"],
90
- ['images/demo4.jpg',"Recognize Anything Model","none"],
91
- ['images/demo4.jpg',"Tag2Text Model","power line"],
92
- ['images/demo4.jpg',"Tag2Text Model","track, train"] ,
93
- ])
94
-
95
- demo.launch(enable_queue=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  return tag_1[0],'none',caption[0]
66
 
67
 
68
+ def build_gui():
69
+
70
+ description = """
71
+ <center><strong><font size='10'>Recognize Anything Model</font></strong></center>
72
+ <br>
73
+ Welcome to the Recognize Anything Model (RAM) and Tag2Text Model demo! <br><br>
74
+ <li>
75
+ <b>Recognize Anything Model:</b> Upload your image to get the <b>English and Chinese outputs of the image tags</b>!
76
+ </li>
77
+ <li>
78
+ <b>Tag2Text Model:</b> Upload your image to get the <b>tags</b> and <b>caption</b> of the image.
79
+ Optional: You can also input specified tags to get the corresponding caption.
80
+ </li>
81
+ """ # noqa
82
+
83
+ article = """
84
+ <p style='text-align: center'>
85
+ RAM and Tag2Text is training on open-source datasets, and we are persisting in refining and iterating upon it.<br/>
86
+ <a href='https://recognize-anything.github.io/' target='_blank'>Recognize Anything: A Strong Image Tagging Model</a>
87
+ |
88
+ <a href='https://https://tag2text.github.io/' target='_blank'>Tag2Text: Guiding Language-Image Model via Image Tagging</a>
89
+ |
90
+ <a href='https://github.com/xinyu1205/Tag2Text' target='_blank'>Github Repo</a>
91
+ </p>
92
+ """ # noqa
93
+
94
+ def inference_with_ram(img):
95
+ res = inference(img, "Recognize Anything Model", None)
96
+ return res[0], res[1]
97
+
98
+ def inference_with_t2t(img, input_tags):
99
+ res = inference(img, "Tag2Text Model", input_tags)
100
+ return res[0], res[2]
101
+
102
+ with gr.Blocks(title="Recognize Anything Model") as demo:
103
+ ###############
104
+ # components
105
+ ###############
106
+ gr.HTML(description)
107
+
108
+ with gr.Tab(label="Recognize Anything Model"):
109
+ with gr.Row():
110
+ with gr.Column():
111
+ ram_in_img = gr.Image(type="pil")
112
+ with gr.Row():
113
+ ram_btn_run = gr.Button(value="Run")
114
+ ram_btn_clear = gr.Button(value="Clear")
115
+ with gr.Column():
116
+ ram_out_tag = gr.Textbox(label="Tags")
117
+ ram_out_biaoqian = gr.Textbox(label="标签")
118
+ gr.Examples(
119
+ examples=[
120
+ ["images/demo1.jpg"],
121
+ ["images/demo2.jpg"],
122
+ ["images/demo4.jpg"],
123
+ ],
124
+ fn=inference_with_ram,
125
+ inputs=[ram_in_img],
126
+ outputs=[ram_out_tag, ram_out_biaoqian],
127
+ cache_examples=True
128
+ )
129
+
130
+ with gr.Tab(label="Tag2Text Model"):
131
+ with gr.Row():
132
+ with gr.Column():
133
+ t2t_in_img = gr.Image(type="pil")
134
+ t2t_in_tag = gr.Textbox(label="User Specified Tags (Optional, separated by comma)")
135
+ with gr.Row():
136
+ t2t_btn_run = gr.Button(value="Run")
137
+ t2t_btn_clear = gr.Button(value="Clear")
138
+ with gr.Column():
139
+ t2t_out_tag = gr.Textbox(label="Tags")
140
+ t2t_out_cap = gr.Textbox(label="Caption")
141
+ gr.Examples(
142
+ examples=[
143
+ ["images/demo4.jpg", ""],
144
+ ["images/demo4.jpg", "power line"],
145
+ ["images/demo4.jpg", "track, train"],
146
+ ],
147
+ fn=inference_with_t2t,
148
+ inputs=[t2t_in_img, t2t_in_tag],
149
+ outputs=[t2t_out_tag, t2t_out_cap],
150
+ cache_examples=True
151
+ )
152
+
153
+ gr.HTML(article)
154
+
155
+ ###############
156
+ # events
157
+ ###############
158
+ # run inference
159
+ ram_btn_run.click(
160
+ fn=inference_with_ram,
161
+ inputs=[ram_in_img],
162
+ outputs=[ram_out_tag, ram_out_biaoqian]
163
+ )
164
+ t2t_btn_run.click(
165
+ fn=inference_with_t2t,
166
+ inputs=[t2t_in_img, t2t_in_tag],
167
+ outputs=[t2t_out_tag, t2t_out_cap]
168
+ )
169
+
170
+ # # images of two image panels should keep the same
171
+ # # and clear old outputs when image changes
172
+ # # slow due to internet latency when deployed on huggingface, comment out
173
+ # def sync_img(v):
174
+ # return [gr.update(value=v)] + [gr.update(value="")] * 4
175
+
176
+ # ram_in_img.upload(fn=sync_img, inputs=[ram_in_img], outputs=[
177
+ # t2t_in_img, ram_out_tag, ram_out_biaoqian, t2t_out_tag, t2t_out_cap
178
+ # ])
179
+ # ram_in_img.clear(fn=sync_img, inputs=[ram_in_img], outputs=[
180
+ # t2t_in_img, ram_out_tag, ram_out_biaoqian, t2t_out_tag, t2t_out_cap
181
+ # ])
182
+ # t2t_in_img.clear(fn=sync_img, inputs=[t2t_in_img], outputs=[
183
+ # ram_in_img, ram_out_tag, ram_out_biaoqian, t2t_out_tag, t2t_out_cap
184
+ # ])
185
+ # t2t_in_img.upload(fn=sync_img, inputs=[t2t_in_img], outputs=[
186
+ # ram_in_img, ram_out_tag, ram_out_biaoqian, t2t_out_tag, t2t_out_cap
187
+ # ])
188
+
189
+ # clear all
190
+ def clear_all():
191
+ return [gr.update(value=None)] * 2 + [gr.update(value="")] * 5
192
+
193
+ ram_btn_clear.click(fn=clear_all, inputs=[], outputs=[
194
+ ram_in_img, t2t_in_img,
195
+ ram_out_tag, ram_out_biaoqian, t2t_in_tag, t2t_out_tag, t2t_out_cap
196
+ ])
197
+ t2t_btn_clear.click(fn=clear_all, inputs=[], outputs=[
198
+ ram_in_img, t2t_in_img,
199
+ ram_out_tag, ram_out_biaoqian, t2t_in_tag, t2t_out_tag, t2t_out_cap
200
+ ])
201
+
202
+ return demo
203
+
204
+
205
+ if __name__ == "__main__":
206
+ demo = build_gui()
207
+ demo.launch(enable_queue=True)