import pathlib from constants import MODELS_REPO, MODELS_NAMES import gradio as gr import torch from transformers import (AutoFeatureExtractor, DetrForObjectDetection, YolosForObjectDetection) from visualization import visualize_attention_map, visualize_prediction from style import css, description, title def make_prediction(img, feature_extractor, model): inputs = feature_extractor(img, return_tensors="pt") outputs = model(**inputs) img_size = torch.tensor([tuple(reversed(img.size))]) processed_outputs = feature_extractor.post_process(outputs, img_size) print(outputs.keys()) # if model type is YOLOS, then return "attentions" if "attentions" in outputs.keys(): return ( processed_outputs[0], outputs["attentions"], outputs["attentions"], outputs["attentions"], ) return ( processed_outputs[0], outputs["decoder_attentions"], outputs["encoder_attentions"], outputs["cross_attentions"], ) def detect_objects(model_name, image_input, threshold): feature_extractor = AutoFeatureExtractor.from_pretrained(MODELS_REPO[model_name]) if "DETR" in model_name: model = DetrForObjectDetection.from_pretrained(MODELS_REPO[model_name]) model_details = "DETR details" elif "YOLOS" in model_name: model = YolosForObjectDetection.from_pretrained(MODELS_REPO[model_name]) ( processed_outputs, decoder_attention_map, encoder_attention_map, cross_attention_map, ) = make_prediction(image_input, feature_extractor, model) viz_img = visualize_prediction( image_input, processed_outputs, threshold, model.config.id2label ) decoder_attention_map_img = visualize_attention_map( image_input, decoder_attention_map ) encoder_attention_map_img = visualize_attention_map( image_input, encoder_attention_map ) cross_attention_map_img = visualize_attention_map(image_input, cross_attention_map) return ( viz_img, decoder_attention_map_img, encoder_attention_map_img, cross_attention_map_img, model_details ) def set_example_image(example: list) -> dict: return gr.Image.update(value=example[0]) with gr.Blocks(css=css) as app: gr.Markdown(title) gr.Markdown(description) with gr.Tabs(): with gr.TabItem("Image upload and detections visualization"): with gr.Row(): with gr.Column(): img_input = gr.Image(type="pil") with gr.Column(): options = gr.Dropdown( value=MODELS_NAMES[0], choices=MODELS_NAMES, label="Select an object detection model", show_label=True, ) slider_input = gr.Slider( minimum=0.2, maximum=1, value=0.7, label="Prediction threshold" ) detect_button = gr.Button("Detect leukocytes") with gr.Row(): example_images = gr.Dataset( components=[img_input], samples=[ [path.as_posix()] for path in sorted( pathlib.Path("cd45rb_test_imgs").rglob("*.png") ) ], ) with gr.Row(): with gr.Column(): gr.Markdown( """The selected image with detected bounding boxes by the model""" ) img_output_from_upload = gr.Image(shape=(850, 850)) with gr.TabItem("Attention maps visualization"): with gr.Row(): with gr.Column(): gr.Markdown("""Encoder attentions""") encoder_att_map_output = gr.Image(shape=(850, 850)) with gr.Column(): gr.Markdown("""Decoder attentions""") decoder_att_map_output = gr.Image(shape=(850, 850)) with gr.Column(): gr.Markdown("""Cross attentions""") cross_att_map_output = gr.Image(shape=(850, 850)) with gr.TabItem("Model details"): with gr.Row(): model_details = gr.Markdown(""" """) detect_button.click( detect_objects, inputs=[options, img_input, slider_input], outputs=[ img_output_from_upload, decoder_att_map_output, encoder_att_map_output, cross_att_map_output, model_details, ], queue=True, ) example_images.click( fn=set_example_image, inputs=[example_images], outputs=[img_input] ) app.launch(enable_queue=True)