{ "_valid_processor_keys": [ "images", "do_resize", "size", "resample", "do_center_crop", "crop_size", "do_rescale", "rescale_factor", "do_normalize", "image_mean", "image_std", "do_convert_rgb", "return_tensors", "data_format", "input_data_format" ], "auto_map": { "AutoProcessor": "processing_florence2.Florence2Processor" }, "crop_size": { "height": 768, "width": 768 }, "do_center_crop": false, "do_convert_rgb": null, "do_normalize": true, "do_rescale": true, "do_resize": true, "image_mean": [ 0.485, 0.456, 0.406 ], "image_processor_type": "CLIPImageProcessor", "image_seq_length": 577, "image_std": [ 0.229, 0.224, 0.225 ], "processor_class": "Florence2Processor", "resample": 3, "rescale_factor": 0.00392156862745098, "size": { "height": 768, "width": 768 }, "tasks_answer_post_processing_type": { "": "pure_text", "": "ocr", "": "pure_text", "": "pure_text", "": "pure_text", "": "description_with_bboxes", "": "description_with_bboxes", "": "phrase_grounding", "": "polygons", "": "polygons", "": "description_with_bboxes_or_polygons", "": "pure_text", "": "pure_text", "": "pure_text", "": "bboxes" }, "task_prompts_without_inputs": { "": "What is the text in the image?", "": "What is the text in the image, with regions?", "": "What does the image describe?", "": "Describe in detail what is shown in the image.", "": "Describe with a paragraph what is shown in the image.", "": "Locate the objects with category name in the image.", "": "Locate the objects in the image, with their descriptions.", "": "Locate the region proposals in the image." }, "task_prompts_with_input": { "": "Locate the phrases in the caption: {input}", "": "Locate {input} in the image with mask", "": "What is the polygon mask of region {input}", "": "Locate {input} in the image.", "": "What is the region {input}?", "": "What does the region {input} describe?", "": "What text is in the region {input}?" } }