from pathlib import Path from typing import Any, Dict, Optional, Tuple import gradio as gr import numpy as np from easydict import EasyDict as edict from omegaconf import OmegaConf from hloc import flush_logs, read_logs from ui.sfm import SfmEngine from ui.utils import ( GRADIO_VERSION, gen_examples, generate_warp_images, get_matcher_zoo, load_config, ransac_zoo, run_matching, run_ransac, send_to_match, ) DESCRIPTION = """ # Image Matching WebUI This Space demonstrates [Image Matching WebUI](https://github.com/Vincentqyw/image-matching-webui) by vincent qin. Feel free to play with it, or duplicate to run image matching without a queue!
🔎 For more details about supported local features and matchers, please refer to https://github.com/Vincentqyw/image-matching-webui 🚀 All algorithms run on CPU for inference, causing slow speeds and high latency. For faster inference, please download the [source code](https://github.com/Vincentqyw/image-matching-webui) for local deployment. 🐛 Your feedback is valuable to me. Please do not hesitate to report any bugs [here](https://github.com/Vincentqyw/image-matching-webui/issues). """ CSS = """ #warning {background-color: #FFCCCB} .logs_class textarea {font-size: 12px !important} """ class ImageMatchingApp: def __init__(self, server_name="0.0.0.0", server_port=7860, **kwargs): self.server_name = server_name self.server_port = server_port self.config_path = kwargs.get( "config", Path(__file__).parent / "config.yaml" ) self.cfg = load_config(self.config_path) self.matcher_zoo = get_matcher_zoo(self.cfg["matcher_zoo"]) self.app = None self.init_interface() # print all the keys def init_matcher_dropdown(self): algos = [] for k, v in self.cfg["matcher_zoo"].items(): if v.get("enable", True): algos.append(k) return algos def init_interface(self): with gr.Blocks(css=CSS) as self.app: with gr.Tab("Image Matching"): with gr.Row(): with gr.Column(scale=1): gr.Image( str( Path(__file__).parent.parent / "assets/logo.webp" ), elem_id="logo-img", show_label=False, show_share_button=False, show_download_button=False, ) with gr.Column(scale=3): gr.Markdown(DESCRIPTION) with gr.Row(equal_height=False): with gr.Column(): with gr.Row(): matcher_list = gr.Dropdown( choices=self.init_matcher_dropdown(), value="disk+lightglue", label="Matching Model", interactive=True, ) match_image_src = gr.Radio( ( ["upload", "webcam", "clipboard"] if GRADIO_VERSION > "3" else ["upload", "webcam", "canvas"] ), label="Image Source", value="upload", ) with gr.Row(): input_image0 = gr.Image( label="Image 0", type="numpy", image_mode="RGB", height=300 if GRADIO_VERSION > "3" else None, interactive=True, ) input_image1 = gr.Image( label="Image 1", type="numpy", image_mode="RGB", height=300 if GRADIO_VERSION > "3" else None, interactive=True, ) with gr.Row(): button_reset = gr.Button(value="Reset") button_run = gr.Button( value="Run Match", variant="primary" ) with gr.Accordion("Advanced Setting", open=False): with gr.Accordion("Image Setting", open=True): with gr.Row(): image_force_resize_cb = gr.Checkbox( label="Force Resize", value=False, interactive=True, ) image_setting_height = gr.Slider( minimum=48, maximum=2048, step=16, label="Image Height", value=480, visible=False, ) image_setting_width = gr.Slider( minimum=64, maximum=2048, step=16, label="Image Width", value=640, visible=False, ) with gr.Accordion("Matching Setting", open=True): with gr.Row(): match_setting_threshold = gr.Slider( minimum=0.0, maximum=1, step=0.001, label="Match threshold", value=0.1, ) match_setting_max_keypoints = gr.Slider( minimum=10, maximum=10000, step=10, label="Max features", value=1000, ) # TODO: add line settings with gr.Row(): detect_keypoints_threshold = gr.Slider( minimum=0, maximum=1, step=0.001, label="Keypoint threshold", value=0.015, ) detect_line_threshold = ( # noqa: F841 gr.Slider( minimum=0.1, maximum=1, step=0.01, label="Line threshold", value=0.2, ) ) # matcher_lists = gr.Radio( # ["NN-mutual", "Dual-Softmax"], # label="Matcher mode", # value="NN-mutual", # ) with gr.Accordion("RANSAC Setting", open=True): with gr.Row(equal_height=False): ransac_method = gr.Dropdown( choices=ransac_zoo.keys(), value=self.cfg["defaults"][ "ransac_method" ], label="RANSAC Method", interactive=True, ) ransac_reproj_threshold = gr.Slider( minimum=0.0, maximum=12, step=0.01, label="Ransac Reproj threshold", value=8.0, ) ransac_confidence = gr.Slider( minimum=0.0, maximum=1, step=0.00001, label="Ransac Confidence", value=self.cfg["defaults"][ "ransac_confidence" ], ) ransac_max_iter = gr.Slider( minimum=0.0, maximum=100000, step=100, label="Ransac Iterations", value=self.cfg["defaults"][ "ransac_max_iter" ], ) button_ransac = gr.Button( value="Rerun RANSAC", variant="primary" ) with gr.Accordion("Geometry Setting", open=False): with gr.Row(equal_height=False): choice_geometry_type = gr.Radio( ["Fundamental", "Homography"], label="Reconstruct Geometry", value=self.cfg["defaults"][ "setting_geometry" ], ) # image resize image_force_resize_cb.select( fn=self._on_select_force_resize, inputs=image_force_resize_cb, outputs=[image_setting_width, image_setting_height], ) # collect inputs state_cache = gr.State({}) inputs = [ input_image0, input_image1, match_setting_threshold, match_setting_max_keypoints, detect_keypoints_threshold, matcher_list, ransac_method, ransac_reproj_threshold, ransac_confidence, ransac_max_iter, choice_geometry_type, gr.State(self.matcher_zoo), image_force_resize_cb, image_setting_width, image_setting_height, ] # Add some examples with gr.Row(): # Example inputs with gr.Accordion( "Open for More: Examples", open=True ): gr.Examples( examples=gen_examples(), inputs=inputs, outputs=[], fn=run_matching, cache_examples=False, label=( "Examples (click one of the images below to Run" " Match). Thx: WxBS" ), ) with gr.Accordion("Supported Algorithms", open=False): # add a table of supported algorithms self.display_supported_algorithms() with gr.Column(): with gr.Accordion("Open for More: Logs", open=False): logs = gr.Textbox( placeholder="\n" * 10, label="Logs", info="Verbose from inference will be displayed below.", lines=10, max_lines=10, autoscroll=True, elem_id="logs", show_copy_button=True, container=True, elem_classes="logs_class", ) self.app.load(read_logs, None, logs, every=1) btn_clear_logs = gr.Button( "Clear logs", elem_id="logs-button" ) btn_clear_logs.click(flush_logs, [], []) with gr.Accordion( "Open for More: Keypoints", open=True ): output_keypoints = gr.Image( label="Keypoints", type="numpy" ) with gr.Accordion( "Open for More: Raw Matches", open=False ): output_matches_raw = gr.Image( label="Raw Matches", type="numpy", ) with gr.Accordion( "Open for More: RANSAC Matches", open=True ): output_matches_ransac = gr.Image( label="Ransac Matches", type="numpy" ) with gr.Accordion( "Open for More: Matches Statistics", open=False ): output_pred = gr.File( label="Outputs", elem_id="download" ) matches_result_info = gr.JSON( label="Matches Statistics" ) matcher_info = gr.JSON(label="Match info") with gr.Accordion( "Open for More: Warped Image", open=True ): output_wrapped = gr.Image( label="Wrapped Pair", type="numpy" ) # send to input button_rerun = gr.Button( value="Send to Input Match Pair", variant="primary", ) with gr.Accordion( "Open for More: Geometry info", open=False ): geometry_result = gr.JSON( label="Reconstructed Geometry" ) # callbacks match_image_src.change( fn=self.ui_change_imagebox, inputs=match_image_src, outputs=input_image0, ) match_image_src.change( fn=self.ui_change_imagebox, inputs=match_image_src, outputs=input_image1, ) # collect outputs outputs = [ output_keypoints, output_matches_raw, output_matches_ransac, matches_result_info, matcher_info, geometry_result, output_wrapped, state_cache, output_pred, ] # button callbacks button_run.click( fn=run_matching, inputs=inputs, outputs=outputs ) # Reset images reset_outputs = [ input_image0, input_image1, match_setting_threshold, match_setting_max_keypoints, detect_keypoints_threshold, matcher_list, input_image0, input_image1, match_image_src, output_keypoints, output_matches_raw, output_matches_ransac, matches_result_info, matcher_info, output_wrapped, geometry_result, ransac_method, ransac_reproj_threshold, ransac_confidence, ransac_max_iter, choice_geometry_type, output_pred, image_force_resize_cb, ] button_reset.click( fn=self.ui_reset_state, inputs=None, outputs=reset_outputs, ) # run ransac button action button_ransac.click( fn=run_ransac, inputs=[ state_cache, choice_geometry_type, ransac_method, ransac_reproj_threshold, ransac_confidence, ransac_max_iter, ], outputs=[ output_matches_ransac, matches_result_info, output_wrapped, output_pred, ], ) # send warped image to match button_rerun.click( fn=send_to_match, inputs=[state_cache], outputs=[input_image0, input_image1], ) # estimate geo choice_geometry_type.change( fn=generate_warp_images, inputs=[ input_image0, input_image1, geometry_result, choice_geometry_type, ], outputs=[output_wrapped, geometry_result], ) with gr.Tab("Structure from Motion(under-dev)"): sfm_ui = AppSfmUI( # noqa: F841 { **self.cfg, "matcher_zoo": self.matcher_zoo, "outputs": "experiments/sfm", } ) sfm_ui.call_empty() def run(self): self.app.queue().launch( server_name=self.server_name, server_port=self.server_port, share=False, ) def ui_change_imagebox(self, choice): """ Updates the image box with the given choice. Args: choice (list): The list of image sources to be displayed in the image box. Returns: dict: A dictionary containing the updated value, sources, and type for the image box. """ ret_dict = { "value": None, # The updated value of the image box "__type__": "update", # The type of update for the image box } if GRADIO_VERSION > "3": return { **ret_dict, "sources": choice, # The list of image sources to be displayed } else: return { **ret_dict, "source": choice, # The list of image sources to be displayed } def _on_select_force_resize(self, visible: bool = False): return gr.update(visible=visible), gr.update(visible=visible) def ui_reset_state( self, *args: Any, ) -> Tuple[ Optional[np.ndarray], Optional[np.ndarray], float, int, float, str, Dict[str, Any], Dict[str, Any], str, Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray], Dict[str, Any], Dict[str, Any], Optional[np.ndarray], Dict[str, Any], str, int, float, int, bool, ]: """ Reset the state of the UI. Returns: tuple: A tuple containing the initial values for the UI state. """ key: str = list(self.matcher_zoo.keys())[ 0 ] # Get the first key from matcher_zoo flush_logs() return ( None, # image0: Optional[np.ndarray] None, # image1: Optional[np.ndarray] self.cfg["defaults"][ "match_threshold" ], # matching_threshold: float self.cfg["defaults"]["max_keypoints"], # max_keypoints: int self.cfg["defaults"][ "keypoint_threshold" ], # keypoint_threshold: float key, # matcher: str self.ui_change_imagebox("upload"), # input image0: Dict[str, Any] self.ui_change_imagebox("upload"), # input image1: Dict[str, Any] "upload", # match_image_src: str None, # keypoints: Optional[np.ndarray] None, # raw matches: Optional[np.ndarray] None, # ransac matches: Optional[np.ndarray] {}, # matches result info: Dict[str, Any] {}, # matcher config: Dict[str, Any] None, # warped image: Optional[np.ndarray] {}, # geometry result: Dict[str, Any] self.cfg["defaults"]["ransac_method"], # ransac_method: str self.cfg["defaults"][ "ransac_reproj_threshold" ], # ransac_reproj_threshold: float self.cfg["defaults"][ "ransac_confidence" ], # ransac_confidence: float self.cfg["defaults"]["ransac_max_iter"], # ransac_max_iter: int self.cfg["defaults"]["setting_geometry"], # geometry: str None, # predictions False, ) def display_supported_algorithms(self, style="tab"): def get_link(link, tag="Link"): return "[{}]({})".format(tag, link) if link is not None else "None" data = [] cfg = self.cfg["matcher_zoo"] if style == "md": markdown_table = "| Algo. | Conference | Code | Project | Paper |\n" markdown_table += ( "| ----- | ---------- | ---- | ------- | ----- |\n" ) for k, v in cfg.items(): if not v["info"]["display"]: continue github_link = get_link(v["info"]["github"]) project_link = get_link(v["info"]["project"]) paper_link = get_link( v["info"]["paper"], ( Path(v["info"]["paper"]).name[-10:] if v["info"]["paper"] is not None else "Link" ), ) markdown_table += "{}|{}|{}|{}|{}\n".format( v["info"]["name"], # display name v["info"]["source"], github_link, project_link, paper_link, ) return gr.Markdown(markdown_table) elif style == "tab": for k, v in cfg.items(): if not v["info"].get("display", True): continue data.append( [ v["info"]["name"], v["info"]["source"], v["info"]["github"], v["info"]["paper"], v["info"]["project"], ] ) tab = gr.Dataframe( headers=["Algo.", "Conference", "Code", "Paper", "Project"], datatype=["str", "str", "str", "str", "str"], col_count=(5, "fixed"), value=data, # wrap=True, # min_width = 1000, # height=1000, ) return tab class AppBaseUI: def __init__(self, cfg: Dict[str, Any] = {}): self.cfg = OmegaConf.create(cfg) self.inputs = edict({}) self.outputs = edict({}) self.ui = edict({}) def _init_ui(self): NotImplemented def call(self, **kwargs): NotImplemented def info(self): gr.Info("SFM is under construction.") class AppSfmUI(AppBaseUI): def __init__(self, cfg: Dict[str, Any] = None): super().__init__(cfg) assert "matcher_zoo" in self.cfg self.matcher_zoo = self.cfg["matcher_zoo"] self.sfm_engine = SfmEngine(cfg) self._init_ui() def init_retrieval_dropdown(self): algos = [] for k, v in self.cfg["retrieval_zoo"].items(): if v.get("enable", True): algos.append(k) return algos def _update_options(self, option): if option == "sparse": return gr.Textbox("sparse", visible=True) elif option == "dense": return gr.Textbox("dense", visible=True) else: return gr.Textbox("not set", visible=True) def _on_select_custom_params(self, value: bool = False): return gr.update(visible=value) def _init_ui(self): with gr.Row(): # data settting and camera settings with gr.Column(): self.inputs.input_images = gr.File( label="SfM", interactive=True, file_count="multiple", min_width=300, ) # camera setting with gr.Accordion("Camera Settings", open=True): with gr.Column(): with gr.Row(): with gr.Column(): self.inputs.camera_model = gr.Dropdown( choices=[ "PINHOLE", "SIMPLE_RADIAL", "OPENCV", ], value="PINHOLE", label="Camera Model", interactive=True, ) with gr.Column(): gr.Checkbox( label="Shared Params", value=True, interactive=True, ) camera_custom_params_cb = gr.Checkbox( label="Custom Params", value=False, interactive=True, ) with gr.Row(): self.inputs.camera_params = gr.Textbox( label="Camera Params", value="0,0,0,0", interactive=False, visible=False, ) camera_custom_params_cb.select( fn=self._on_select_custom_params, inputs=camera_custom_params_cb, outputs=self.inputs.camera_params, ) with gr.Accordion("Matching Settings", open=True): # feature extraction and matching setting with gr.Row(): # matcher setting self.inputs.matcher_key = gr.Dropdown( choices=self.matcher_zoo.keys(), value="disk+lightglue", label="Matching Model", interactive=True, ) with gr.Row(): with gr.Accordion("Advanced Settings", open=False): with gr.Column(): with gr.Row(): # matching setting self.inputs.max_keypoints = gr.Slider( label="Max Keypoints", minimum=100, maximum=10000, value=1000, interactive=True, ) self.inputs.keypoint_threshold = gr.Slider( label="Keypoint Threshold", minimum=0, maximum=1, value=0.01, ) with gr.Row(): self.inputs.match_threshold = gr.Slider( label="Match Threshold", minimum=0.01, maximum=12.0, value=0.2, ) self.inputs.ransac_threshold = gr.Slider( label="Ransac Threshold", minimum=0.01, maximum=12.0, value=4.0, step=0.01, interactive=True, ) with gr.Row(): self.inputs.ransac_confidence = gr.Slider( label="Ransac Confidence", minimum=0.01, maximum=1.0, value=0.9999, step=0.0001, interactive=True, ) self.inputs.ransac_max_iter = gr.Slider( label="Ransac Max Iter", minimum=1, maximum=100, value=100, step=1, interactive=True, ) with gr.Accordion("Scene Graph Settings", open=True): # mapping setting self.inputs.scene_graph = gr.Dropdown( choices=["all", "swin", "oneref"], value="all", label="Scene Graph", interactive=True, ) # global feature setting self.inputs.global_feature = gr.Dropdown( choices=self.init_retrieval_dropdown(), value="netvlad", label="Global features", interactive=True, ) self.inputs.top_k = gr.Slider( label="Number of Images per Image to Match", minimum=1, maximum=100, value=10, step=1, ) # button_match = gr.Button("Run Matching", variant="primary") # mapping setting with gr.Column(): with gr.Accordion("Mapping Settings", open=True): with gr.Row(): with gr.Accordion("Buddle Settings", open=True): with gr.Row(): self.inputs.mapper_refine_focal_length = ( gr.Checkbox( label="Refine Focal Length", value=False, interactive=True, ) ) self.inputs.mapper_refine_principle_points = ( gr.Checkbox( label="Refine Principle Points", value=False, interactive=True, ) ) self.inputs.mapper_refine_extra_params = ( gr.Checkbox( label="Refine Extra Params", value=False, interactive=True, ) ) with gr.Accordion("Retriangluation Settings", open=True): gr.Textbox( label="Retriangluation Details", ) self.ui.button_sfm = gr.Button("Run SFM", variant="primary") self.outputs.model_3d = gr.Model3D( interactive=True, ) self.outputs.output_image = gr.Image( label="SFM Visualize", type="numpy", image_mode="RGB", interactive=False, ) def call_empty(self): self.ui.button_sfm.click(fn=self.info, inputs=[], outputs=[]) def call(self): self.ui.button_sfm.click( fn=self.sfm_engine.call, inputs=[ self.inputs.matcher_key, self.inputs.input_images, # images self.inputs.camera_model, self.inputs.camera_params, self.inputs.max_keypoints, self.inputs.keypoint_threshold, self.inputs.match_threshold, self.inputs.ransac_threshold, self.inputs.ransac_confidence, self.inputs.ransac_max_iter, self.inputs.scene_graph, self.inputs.global_feature, self.inputs.top_k, self.inputs.mapper_refine_focal_length, self.inputs.mapper_refine_principle_points, self.inputs.mapper_refine_extra_params, ], outputs=[self.outputs.model_3d, self.outputs.output_image], )