Update preprocessor_config.json
Browse files- preprocessor_config.json +36 -3
preprocessor_config.json
CHANGED
@@ -16,9 +16,6 @@
|
|
16 |
"data_format",
|
17 |
"input_data_format"
|
18 |
],
|
19 |
-
"auto_map": {
|
20 |
-
"AutoProcessor": "processing_florence2.Florence2Processor"
|
21 |
-
},
|
22 |
"crop_size": {
|
23 |
"height": 768,
|
24 |
"width": 768
|
@@ -46,5 +43,41 @@
|
|
46 |
"size": {
|
47 |
"height": 768,
|
48 |
"width": 768
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
}
|
50 |
}
|
|
|
16 |
"data_format",
|
17 |
"input_data_format"
|
18 |
],
|
|
|
|
|
|
|
19 |
"crop_size": {
|
20 |
"height": 768,
|
21 |
"width": 768
|
|
|
43 |
"size": {
|
44 |
"height": 768,
|
45 |
"width": 768
|
46 |
+
},
|
47 |
+
"tasks_answer_post_processing_type": {
|
48 |
+
"<OCR>": "pure_text",
|
49 |
+
"<OCR_WITH_REGION>": "ocr",
|
50 |
+
"<CAPTION>": "pure_text",
|
51 |
+
"<DETAILED_CAPTION>": "pure_text",
|
52 |
+
"<MORE_DETAILED_CAPTION>": "pure_text",
|
53 |
+
"<OD>": "description_with_bboxes",
|
54 |
+
"<DENSE_REGION_CAPTION>": "description_with_bboxes",
|
55 |
+
"<CAPTION_TO_PHRASE_GROUNDING>": "phrase_grounding",
|
56 |
+
"<REFERRING_EXPRESSION_SEGMENTATION>": "polygons",
|
57 |
+
"<REGION_TO_SEGMENTATION>": "polygons",
|
58 |
+
"<OPEN_VOCABULARY_DETECTION>": "description_with_bboxes_or_polygons",
|
59 |
+
"<REGION_TO_CATEGORY>": "pure_text",
|
60 |
+
"<REGION_TO_DESCRIPTION>": "pure_text",
|
61 |
+
"<REGION_TO_OCR>": "pure_text",
|
62 |
+
"<REGION_PROPOSAL>": "bboxes"
|
63 |
+
},
|
64 |
+
"task_prompts_without_inputs": {
|
65 |
+
"<OCR>": "What is the text in the image?",
|
66 |
+
"<OCR_WITH_REGION>": "What is the text in the image, with regions?",
|
67 |
+
"<CAPTION>": "What does the image describe?",
|
68 |
+
"<DETAILED_CAPTION>": "Describe in detail what is shown in the image.",
|
69 |
+
"<MORE_DETAILED_CAPTION>": "Describe with a paragraph what is shown in the image.",
|
70 |
+
"<OD>": "Locate the objects with category name in the image.",
|
71 |
+
"<DENSE_REGION_CAPTION>": "Locate the objects in the image, with their descriptions.",
|
72 |
+
"<REGION_PROPOSAL>": "Locate the region proposals in the image."
|
73 |
+
},
|
74 |
+
"task_prompts_with_input": {
|
75 |
+
"<CAPTION_TO_PHRASE_GROUNDING>": "Locate the phrases in the caption: {input}",
|
76 |
+
"<REFERRING_EXPRESSION_SEGMENTATION>": "Locate {input} in the image with mask",
|
77 |
+
"<REGION_TO_SEGMENTATION>": "What is the polygon mask of region {input}",
|
78 |
+
"<OPEN_VOCABULARY_DETECTION>": "Locate {input} in the image.",
|
79 |
+
"<REGION_TO_CATEGORY>": "What is the region {input}?",
|
80 |
+
"<REGION_TO_DESCRIPTION>": "What does the region {input} describe?",
|
81 |
+
"<REGION_TO_OCR>": "What text is in the region {input}?"
|
82 |
}
|
83 |
}
|