Spaces:

Realcat
/

image-matching-webui

Running

App Files Files Community

Realcat commited on Oct 23, 2024

Commit

f77c97c

verified ·

1 Parent(s): 4f55d39

Update README.md (#2)

Browse files

- update: gradio to 5.1.0 (bffff04dc54c8f36fbfacec804fc614ff70f3fa8)
- Update README.md (a03efcfb6140a0132faf34afd01ec54e93088987)

Files changed (18) hide show

README.md +2 -2
api/__init__.py +0 -0
api/client.py +148 -70
api/server.py +393 -29
api/test/CMakeLists.txt +16 -0
api/test/build_and_run.sh +16 -0
api/test/client.cpp +84 -0
api/test/helper.h +410 -0
api/types.py +16 -0
requirements.txt +1 -2
test_app_cli.py +9 -8
ui/__init__.py +5 -0
ui/api.py +0 -293
ui/app_class.py +4 -20
ui/config.yaml +6 -2
ui/sfm.py +8 -2
ui/utils.py +4 -2
ui/viz.py +3 -0

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🤗
 colorFrom: red
 colorTo: yellow
 sdk: gradio
-sdk_version: 4.28.3
 app_file: app.py
 pinned: true
 license: apache-2.0
@@ -152,4 +152,4 @@ This code is built based on [Hierarchical-Localization](https://github.com/cvg/H
 [stars-shield]: https://img.shields.io/github/stars/Vincentqyw/image-matching-webui.svg?style=for-the-badge
 [stars-url]: https://github.com/Vincentqyw/image-matching-webui/stargazers
 [issues-shield]: https://img.shields.io/github/issues/Vincentqyw/image-matching-webui.svg?style=for-the-badge
-[issues-url]: https://github.com/Vincentqyw/image-matching-webui/issues

 colorFrom: red
 colorTo: yellow
 sdk: gradio
+sdk_version: 5.3.0
 app_file: app.py
 pinned: true
 license: apache-2.0
 [stars-shield]: https://img.shields.io/github/stars/Vincentqyw/image-matching-webui.svg?style=for-the-badge
 [stars-url]: https://github.com/Vincentqyw/image-matching-webui/stargazers
 [issues-shield]: https://img.shields.io/github/issues/Vincentqyw/image-matching-webui.svg?style=for-the-badge
+[issues-url]: https://github.com/Vincentqyw/image-matching-webui/issues

api/__init__.py ADDED Viewed

File without changes

api/client.py CHANGED Viewed

@@ -1,18 +1,102 @@
 import argparse
 import pickle
 import time
-from typing import Dict
 import numpy as np
 import requests
-from loguru import logger
-API_URL_MATCH = "http://127.0.0.1:8001/v1/match"
-API_URL_EXTRACT = "http://127.0.0.1:8001/v1/extract"
-API_URL_EXTRACT_V2 = "http://127.0.0.1:8001/v2/extract"
-def send_generate_request(path0: str, path1: str) -> Dict[str, np.ndarray]:
     """
     Send a request to the API to generate a match between two images.
@@ -28,6 +112,7 @@ def send_generate_request(path0: str, path1: str) -> Dict[str, np.ndarray]:
     """
     files = {"image0": open(path0, "rb"), "image1": open(path1, "rb")}
     try:
         response = requests.post(API_URL_MATCH, files=files)
         pred = {}
         if response.status_code == 200:
@@ -44,68 +129,56 @@ def send_generate_request(path0: str, path1: str) -> Dict[str, np.ndarray]:
     return pred
-def send_generate_request1(path0: str) -> Dict[str, np.ndarray]:
     """
     Send a request to the API to extract features from an image.
     Args:
-        path0 (str): The path to the image.
     Returns:
-        Dict[str, np.ndarray]: A dictionary containing the extracted features.
-            The keys are "keypoints", "descriptors", and "scores", and the
-            values are ndarrays of shape (N, 2), (N, 128), and (N,),
-            respectively.
     """
-    files = {"image": open(path0, "rb")}
-    try:
-        response = requests.post(API_URL_EXTRACT, files=files)
-        pred: Dict[str, np.ndarray] = {}
-        if response.status_code == 200:
-            pred = response.json()
-            for key in list(pred.keys()):
-                pred[key] = np.array(pred[key])
-        else:
-            print(
-                f"Error: Response code {response.status_code} - {response.text}"
             )
-    finally:
-        files["image"].close()
-    return pred
-def send_generate_request2(image_path: str) -> Dict[str, np.ndarray]:
-    """
-    Send a request to the API to extract features from an image.
-    Args:
-        image_path (str): The path to the image.
-    Returns:
-        Dict[str, np.ndarray]: A dictionary containing the extracted features.
-            The keys are "keypoints", "descriptors", and "scores", and the
-            values are ndarrays of shape (N, 2), (N, 128), and (N,), respectively.
-    """
-    data = {
-        "image_path": image_path,
-        "max_keypoints": 1024,
-        "reference_points": [[0.0, 0.0], [1.0, 1.0]],
-    }
-    pred = {}
     try:
-        response = requests.post(API_URL_EXTRACT_V2, json=data)
-        pred: Dict[str, np.ndarray] = {}
-        if response.status_code == 200:
-            pred = response.json()
-            for key in list(pred.keys()):
-                pred[key] = np.array(pred[key])
-        else:
-            print(
-                f"Error: Response code {response.status_code} - {response.text}"
-            )
     except Exception as e:
         print(f"An error occurred: {e}")
-    return pred
 if __name__ == "__main__":
@@ -116,32 +189,37 @@ if __name__ == "__main__":
         "--image0",
         required=False,
         help="Path for the file's melody",
-        default="../datasets/sacre_coeur/mapping_rot/02928139_3448003521_rot45.jpg",
     )
     parser.add_argument(
         "--image1",
         required=False,
         help="Path for the file's melody",
-        default="../datasets/sacre_coeur/mapping_rot/02928139_3448003521_rot90.jpg",
     )
     args = parser.parse_args()
-    for i in range(10):
-        t1 = time.time()
-        preds = send_generate_request(args.image0, args.image1)
-        t2 = time.time()
-        logger.info(f"Time cost1: {(t2 - t1)} seconds")
-    for i in range(10):
-        t1 = time.time()
-        preds = send_generate_request1(args.image0)
-        t2 = time.time()
-        logger.info(f"Time cost2: {(t2 - t1)} seconds")
     for i in range(10):
         t1 = time.time()
-        preds = send_generate_request2(args.image0)
         t2 = time.time()
-        logger.info(f"Time cost2: {(t2 - t1)} seconds")
     with open("preds.pkl", "wb") as f:
         pickle.dump(preds, f)

 import argparse
+import base64
+import os
 import pickle
 import time
+from typing import Dict, List
+import cv2
 import numpy as np
 import requests
+ENDPOINT = "http://127.0.0.1:8001"
+if "REMOTE_URL_RAILWAY" in os.environ:
+    ENDPOINT = os.environ["REMOTE_URL_RAILWAY"]
+print(f"API ENDPOINT: {ENDPOINT}")
+API_VERSION = f"{ENDPOINT}/version"
+API_URL_MATCH = f"{ENDPOINT}/v1/match"
+API_URL_EXTRACT = f"{ENDPOINT}/v1/extract"
+def read_image(path: str) -> str:
+    """
+    Read an image from a file, encode it as a JPEG and then as a base64 string.
+    Args:
+        path (str): The path to the image to read.
+    Returns:
+        str: The base64 encoded image.
+    """
+    # Read the image from the file
+    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
+    # Encode the image as a png, NO COMPRESSION!!!
+    retval, buffer = cv2.imencode(".png", img)
+    # Encode the JPEG as a base64 string
+    b64img = base64.b64encode(buffer).decode("utf-8")
+    return b64img
+def do_api_requests(url=API_URL_EXTRACT, **kwargs):
+    """
+    Helper function to send an API request to the image matching service.
+    Args:
+        url (str): The URL of the API endpoint to use. Defaults to the
+            feature extraction endpoint.
+        **kwargs: Additional keyword arguments to pass to the API.
+    Returns:
+        List[Dict[str, np.ndarray]]: A list of dictionaries containing the
+            extracted features. The keys are "keypoints", "descriptors", and
+            "scores", and the values are ndarrays of shape (N, 2), (N, ?),
+            and (N,), respectively.
+    """
+    # Set up the request body
+    reqbody = {
+        # List of image data base64 encoded
+        "data": [],
+        # List of maximum number of keypoints to extract from each image
+        "max_keypoints": [100, 100],
+        # List of timestamps for each image (not used?)
+        "timestamps": ["0", "1"],
+        # Whether to convert the images to grayscale
+        "grayscale": 0,
+        # List of image height and width
+        "image_hw": [[640, 480], [320, 240]],
+        # Type of feature to extract
+        "feature_type": 0,
+        # List of rotation angles for each image
+        "rotates": [0.0, 0.0],
+        # List of scale factors for each image
+        "scales": [1.0, 1.0],
+        # List of reference points for each image (not used)
+        "reference_points": [[640, 480], [320, 240]],
+        # Whether to binarize the descriptors
+        "binarize": True,
+    }
+    # Update the request body with the additional keyword arguments
+    reqbody.update(kwargs)
+    try:
+        # Send the request
+        r = requests.post(url, json=reqbody)
+        if r.status_code == 200:
+            # Return the response
+            return r.json()
+        else:
+            # Print an error message if the response code is not 200
+            print(f"Error: Response code {r.status_code} - {r.text}")
+    except Exception as e:
+        # Print an error message if an exception occurs
+        print(f"An error occurred: {e}")
+def send_request_match(path0: str, path1: str) -> Dict[str, np.ndarray]:
     """
     Send a request to the API to generate a match between two images.
     """
     files = {"image0": open(path0, "rb"), "image1": open(path1, "rb")}
     try:
+        # TODO: replace files with post json
         response = requests.post(API_URL_MATCH, files=files)
         pred = {}
         if response.status_code == 200:
     return pred
+def send_request_extract(
+    input_images: str, viz: bool = False
+) -> List[Dict[str, np.ndarray]]:
     """
     Send a request to the API to extract features from an image.
     Args:
+        input_images (str): The path to the image.
     Returns:
+        List[Dict[str, np.ndarray]]: A list of dictionaries containing the
+            extracted features. The keys are "keypoints", "descriptors", and
+            "scores", and the values are ndarrays of shape (N, 2), (N, 128),
+            and (N,), respectively.
     """
+    image_data = read_image(input_images)
+    inputs = {
+        "data": [image_data],
+    }
+    response = do_api_requests(
+        url=API_URL_EXTRACT,
+        **inputs,
+    )
+    print("Keypoints detected: {}".format(len(response[0]["keypoints"])))
+    # draw matching, debug only
+    if viz:
+        from hloc.utils.viz import plot_keypoints
+        from ui.viz import fig2im, plot_images
+        kpts = np.array(response[0]["keypoints_orig"])
+        if "image_orig" in response[0].keys():
+            img_orig = np.array(["image_orig"])
+            output_keypoints = plot_images([img_orig], titles="titles", dpi=300)
+            plot_keypoints([kpts])
+            output_keypoints = fig2im(output_keypoints)
+            cv2.imwrite(
+                "demo_match.jpg",
+                output_keypoints[:, :, ::-1].copy(),  # RGB -> BGR
             )
+    return response
+def get_api_version():
     try:
+        response = requests.get(API_VERSION).json()
+        print("API VERSION: {}".format(response["version"]))
     except Exception as e:
         print(f"An error occurred: {e}")
 if __name__ == "__main__":
         "--image0",
         required=False,
         help="Path for the file's melody",
+        default="datasets/sacre_coeur/mapping_rot/02928139_3448003521_rot45.jpg",
     )
     parser.add_argument(
         "--image1",
         required=False,
         help="Path for the file's melody",
+        default="datasets/sacre_coeur/mapping_rot/02928139_3448003521_rot90.jpg",
     )
     args = parser.parse_args()
+    # get api version
+    get_api_version()
+    # request match
+    # for i in range(10):
+    #     t1 = time.time()
+    #     preds = send_request_match(args.image0, args.image1)
+    #     t2 = time.time()
+    #     print(
+    #         "Time cost1: {} seconds, matched: {}".format(
+    #             (t2 - t1), len(preds["mmkeypoints0_orig"])
+    #         )
+    #     )
+    # request extract
     for i in range(10):
         t1 = time.time()
+        preds = send_request_extract(args.image0)
         t2 = time.time()
+        print(f"Time cost2: {(t2 - t1)} seconds")
+    # dump preds
     with open("preds.pkl", "wb") as f:
         pickle.dump(preds, f)

api/server.py CHANGED Viewed

@@ -1,73 +1,435 @@
 # server.py
 import sys
 from pathlib import Path
-from typing import Union
 import numpy as np
 import uvicorn
 from fastapi import FastAPI, File, UploadFile
 from fastapi.responses import JSONResponse
 from PIL import Image
-sys.path.append("..")
-from pydantic import BaseModel
-from ui.api import ImageMatchingAPI
-from ui.utils import DEVICE
-class ImageInfo(BaseModel):
-    image_path: str
-    max_keypoints: int
-    reference_points: list
 class ImageMatchingService:
     def __init__(self, conf: dict, device: str):
         self.api = ImageMatchingAPI(conf=conf, device=device)
         self.app = FastAPI()
         self.register_routes()
     def register_routes(self):
         @self.app.post("/v1/match")
         async def match(
             image0: UploadFile = File(...), image1: UploadFile = File(...)
         ):
             try:
                 image0_array = self.load_image(image0)
                 image1_array = self.load_image(image1)
                 output = self.api(image0_array, image1_array)
                 skip_keys = ["image0_orig", "image1_orig"]
-                pred = self.filter_output(output, skip_keys)
                 return JSONResponse(content=pred)
             except Exception as e:
                 return JSONResponse(content={"error": str(e)}, status_code=500)
         @self.app.post("/v1/extract")
-        async def extract(image: UploadFile = File(...)):
-            try:
-                image_array = self.load_image(image)
-                output = self.api.extract(image_array)
-                skip_keys = ["descriptors", "image", "image_orig"]
-                pred = self.filter_output(output, skip_keys)
-                return JSONResponse(content=pred)
-            except Exception as e:
-                return JSONResponse(content={"error": str(e)}, status_code=500)
-        @self.app.post("/v2/extract")
-        async def extract_v2(image_path: ImageInfo):
-            img_path = image_path.image_path
             try:
-                safe_path = Path(img_path).resolve(strict=False)
-                image_array = self.load_image(str(safe_path))
-                output = self.api.extract(image_array)
-                skip_keys = ["descriptors", "image", "image_orig"]
-                pred = self.filter_output(output, skip_keys)
-                return JSONResponse(content=pred)
             except Exception as e:
                 return JSONResponse(content={"error": str(e)}, status_code=500)
     def load_image(self, file_path: Union[str, UploadFile]) -> np.ndarray:
@@ -88,7 +450,9 @@ class ImageMatchingService:
             image_array = np.array(img)
         return image_array
-    def filter_output(self, output: dict, skip_keys: list) -> dict:
         pred = {}
         for key, value in output.items():
             if key in skip_keys:

 # server.py
+import base64
+import io
 import sys
+import warnings
 from pathlib import Path
+from typing import Any, Dict, Optional, Union
+import cv2
+import matplotlib.pyplot as plt
 import numpy as np
+import torch
 import uvicorn
 from fastapi import FastAPI, File, UploadFile
+from fastapi.exceptions import HTTPException
 from fastapi.responses import JSONResponse
 from PIL import Image
+sys.path.append(str(Path(__file__).parents[1]))
+from api.types import ImagesInput
+from hloc import DEVICE, extract_features, logger, match_dense, match_features
+from hloc.utils.viz import add_text, plot_keypoints
+from ui import get_version
+from ui.utils import filter_matches, get_feature_model, get_model
+from ui.viz import display_matches, fig2im, plot_images
+warnings.simplefilter("ignore")
+def decode_base64_to_image(encoding):
+    if encoding.startswith("data:image/"):
+        encoding = encoding.split(";")[1].split(",")[1]
+    try:
+        image = Image.open(io.BytesIO(base64.b64decode(encoding)))
+        return image
+    except Exception as e:
+        logger.warning(f"API cannot decode image: {e}")
+        raise HTTPException(
+            status_code=500, detail="Invalid encoded image"
+        ) from e
+def to_base64_nparray(encoding: str) -> np.ndarray:
+    return np.array(decode_base64_to_image(encoding)).astype("uint8")
+class ImageMatchingAPI(torch.nn.Module):
+    default_conf = {
+        "ransac": {
+            "enable": True,
+            "estimator": "poselib",
+            "geometry": "homography",
+            "method": "RANSAC",
+            "reproj_threshold": 3,
+            "confidence": 0.9999,
+            "max_iter": 10000,
+        },
+    }
+    def __init__(
+        self,
+        conf: dict = {},
+        device: str = "cpu",
+        detect_threshold: float = 0.015,
+        max_keypoints: int = 1024,
+        match_threshold: float = 0.2,
+    ) -> None:
+        """
+        Initializes an instance of the ImageMatchingAPI class.
+        Args:
+            conf (dict): A dictionary containing the configuration parameters.
+            device (str, optional): The device to use for computation. Defaults to "cpu".
+            detect_threshold (float, optional): The threshold for detecting keypoints. Defaults to 0.015.
+            max_keypoints (int, optional): The maximum number of keypoints to extract. Defaults to 1024.
+            match_threshold (float, optional): The threshold for matching keypoints. Defaults to 0.2.
+        Returns:
+            None
+        """
+        super().__init__()
+        self.device = device
+        self.conf = {**self.default_conf, **conf}
+        self._updata_config(detect_threshold, max_keypoints, match_threshold)
+        self._init_models()
+        if device == "cuda":
+            memory_allocated = torch.cuda.memory_allocated(device)
+            memory_reserved = torch.cuda.memory_reserved(device)
+            logger.info(
+                f"GPU memory allocated: {memory_allocated / 1024**2:.3f} MB"
+            )
+            logger.info(
+                f"GPU memory reserved: {memory_reserved / 1024**2:.3f} MB"
+            )
+        self.pred = None
+    def parse_match_config(self, conf):
+        if conf["dense"]:
+            return {
+                **conf,
+                "matcher": match_dense.confs.get(
+                    conf["matcher"]["model"]["name"]
+                ),
+                "dense": True,
+            }
+        else:
+            return {
+                **conf,
+                "feature": extract_features.confs.get(
+                    conf["feature"]["model"]["name"]
+                ),
+                "matcher": match_features.confs.get(
+                    conf["matcher"]["model"]["name"]
+                ),
+                "dense": False,
+            }
+    def _updata_config(
+        self,
+        detect_threshold: float = 0.015,
+        max_keypoints: int = 1024,
+        match_threshold: float = 0.2,
+    ):
+        self.dense = self.conf["dense"]
+        if self.conf["dense"]:
+            try:
+                self.conf["matcher"]["model"][
+                    "match_threshold"
+                ] = match_threshold
+            except TypeError as e:
+                logger.error(e)
+        else:
+            self.conf["feature"]["model"]["max_keypoints"] = max_keypoints
+            self.conf["feature"]["model"][
+                "keypoint_threshold"
+            ] = detect_threshold
+            self.extract_conf = self.conf["feature"]
+        self.match_conf = self.conf["matcher"]
+    def _init_models(self):
+        # initialize matcher
+        self.matcher = get_model(self.match_conf)
+        # initialize extractor
+        if self.dense:
+            self.extractor = None
+        else:
+            self.extractor = get_feature_model(self.conf["feature"])
+    def _forward(self, img0, img1):
+        if self.dense:
+            pred = match_dense.match_images(
+                self.matcher,
+                img0,
+                img1,
+                self.match_conf["preprocessing"],
+                device=self.device,
+            )
+            last_fixed = "{}".format(  # noqa: F841
+                self.match_conf["model"]["name"]
+            )
+        else:
+            pred0 = extract_features.extract(
+                self.extractor, img0, self.extract_conf["preprocessing"]
+            )
+            pred1 = extract_features.extract(
+                self.extractor, img1, self.extract_conf["preprocessing"]
+            )
+            pred = match_features.match_images(self.matcher, pred0, pred1)
+        return pred
+    @torch.inference_mode()
+    def extract(self, img0: np.ndarray, **kwargs) -> Dict[str, np.ndarray]:
+        """Extract features from a single image.
+        Args:
+            img0 (np.ndarray): image
+        Returns:
+            Dict[str, np.ndarray]: feature dict
+        """
+        # setting prams
+        self.extractor.conf["max_keypoints"] = kwargs.get("max_keypoints", 512)
+        self.extractor.conf["keypoint_threshold"] = kwargs.get(
+            "keypoint_threshold", 0.0
+        )
+        pred = extract_features.extract(
+            self.extractor, img0, self.extract_conf["preprocessing"]
+        )
+        pred = {
+            k: v.cpu().detach()[0].numpy() if isinstance(v, torch.Tensor) else v
+            for k, v in pred.items()
+        }
+        # back to origin scale
+        s0 = pred["original_size"] / pred["size"]
+        pred["keypoints_orig"] = (
+            match_features.scale_keypoints(pred["keypoints"] + 0.5, s0) - 0.5
+        )
+        # TODO: rotate back
+        binarize = kwargs.get("binarize", False)
+        if binarize:
+            assert "descriptors" in pred
+            pred["descriptors"] = (pred["descriptors"] > 0).astype(np.uint8)
+            pred["descriptors"] = pred["descriptors"].T  # N x DIM
+        return pred
+    @torch.inference_mode()
+    def forward(
+        self,
+        img0: np.ndarray,
+        img1: np.ndarray,
+    ) -> Dict[str, np.ndarray]:
+        """
+        Forward pass of the image matching API.
+        Args:
+            img0: A 3D NumPy array of shape (H, W, C) representing the first image.
+                  Values are in the range [0, 1] and are in RGB mode.
+            img1: A 3D NumPy array of shape (H, W, C) representing the second image.
+                  Values are in the range [0, 1] and are in RGB mode.
+        Returns:
+            A dictionary containing the following keys:
+            - image0_orig: The original image 0.
+            - image1_orig: The original image 1.
+            - keypoints0_orig: The keypoints detected in image 0.
+            - keypoints1_orig: The keypoints detected in image 1.
+            - mkeypoints0_orig: The raw matches between image 0 and image 1.
+            - mkeypoints1_orig: The raw matches between image 1 and image 0.
+            - mmkeypoints0_orig: The RANSAC inliers in image 0.
+            - mmkeypoints1_orig: The RANSAC inliers in image 1.
+            - mconf: The confidence scores for the raw matches.
+            - mmconf: The confidence scores for the RANSAC inliers.
+        """
+        # Take as input a pair of images (not a batch)
+        assert isinstance(img0, np.ndarray)
+        assert isinstance(img1, np.ndarray)
+        self.pred = self._forward(img0, img1)
+        if self.conf["ransac"]["enable"]:
+            self.pred = self._geometry_check(self.pred)
+        return self.pred
+    def _geometry_check(
+        self,
+        pred: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        """
+        Filter matches using RANSAC. If keypoints are available, filter by keypoints.
+        If lines are available, filter by lines. If both keypoints and lines are
+        available, filter by keypoints.
+        Args:
+            pred (Dict[str, Any]): dict of matches, including original keypoints.
+                                  See :func:`filter_matches` for the expected keys.
+        Returns:
+            Dict[str, Any]: filtered matches
+        """
+        pred = filter_matches(
+            pred,
+            ransac_method=self.conf["ransac"]["method"],
+            ransac_reproj_threshold=self.conf["ransac"]["reproj_threshold"],
+            ransac_confidence=self.conf["ransac"]["confidence"],
+            ransac_max_iter=self.conf["ransac"]["max_iter"],
+        )
+        return pred
+    def visualize(
+        self,
+        log_path: Optional[Path] = None,
+    ) -> None:
+        """
+        Visualize the matches.
+        Args:
+            log_path (Path, optional): The directory to save the images. Defaults to None.
+        Returns:
+            None
+        """
+        if self.conf["dense"]:
+            postfix = str(self.conf["matcher"]["model"]["name"])
+        else:
+            postfix = "{}_{}".format(
+                str(self.conf["feature"]["model"]["name"]),
+                str(self.conf["matcher"]["model"]["name"]),
+            )
+        titles = [
+            "Image 0 - Keypoints",
+            "Image 1 - Keypoints",
+        ]
+        pred: Dict[str, Any] = self.pred
+        image0: np.ndarray = pred["image0_orig"]
+        image1: np.ndarray = pred["image1_orig"]
+        output_keypoints: np.ndarray = plot_images(
+            [image0, image1], titles=titles, dpi=300
+        )
+        if (
+            "keypoints0_orig" in pred.keys()
+            and "keypoints1_orig" in pred.keys()
+        ):
+            plot_keypoints([pred["keypoints0_orig"], pred["keypoints1_orig"]])
+            text: str = (
+                f"# keypoints0: {len(pred['keypoints0_orig'])} \n"
+                + f"# keypoints1: {len(pred['keypoints1_orig'])}"
+            )
+            add_text(0, text, fs=15)
+        output_keypoints = fig2im(output_keypoints)
+        # plot images with raw matches
+        titles = [
+            "Image 0 - Raw matched keypoints",
+            "Image 1 - Raw matched keypoints",
+        ]
+        output_matches_raw, num_matches_raw = display_matches(
+            pred, titles=titles, tag="KPTS_RAW"
+        )
+        # plot images with ransac matches
+        titles = [
+            "Image 0 - Ransac matched keypoints",
+            "Image 1 - Ransac matched keypoints",
+        ]
+        output_matches_ransac, num_matches_ransac = display_matches(
+            pred, titles=titles, tag="KPTS_RANSAC"
+        )
+        if log_path is not None:
+            img_keypoints_path: Path = log_path / f"img_keypoints_{postfix}.png"
+            img_matches_raw_path: Path = (
+                log_path / f"img_matches_raw_{postfix}.png"
+            )
+            img_matches_ransac_path: Path = (
+                log_path / f"img_matches_ransac_{postfix}.png"
+            )
+            cv2.imwrite(
+                str(img_keypoints_path),
+                output_keypoints[:, :, ::-1].copy(),  # RGB -> BGR
+            )
+            cv2.imwrite(
+                str(img_matches_raw_path),
+                output_matches_raw[:, :, ::-1].copy(),  # RGB -> BGR
+            )
+            cv2.imwrite(
+                str(img_matches_ransac_path),
+                output_matches_ransac[:, :, ::-1].copy(),  # RGB -> BGR
+            )
+            plt.close("all")
 class ImageMatchingService:
     def __init__(self, conf: dict, device: str):
+        self.conf = conf
         self.api = ImageMatchingAPI(conf=conf, device=device)
         self.app = FastAPI()
         self.register_routes()
     def register_routes(self):
+        @self.app.get("/version")
+        async def version():
+            return {"version": get_version()}
         @self.app.post("/v1/match")
         async def match(
             image0: UploadFile = File(...), image1: UploadFile = File(...)
         ):
+            """
+            Handle the image matching request and return the processed result.
+            Args:
+                image0 (UploadFile): The first image file for matching.
+                image1 (UploadFile): The second image file for matching.
+            Returns:
+                JSONResponse: A JSON response containing the filtered match results
+                              or an error message in case of failure.
+            """
             try:
+                # Load the images from the uploaded files
                 image0_array = self.load_image(image0)
                 image1_array = self.load_image(image1)
+                # Perform image matching using the API
                 output = self.api(image0_array, image1_array)
+                # Keys to skip in the output
                 skip_keys = ["image0_orig", "image1_orig"]
+                # Postprocess the output to filter unwanted data
+                pred = self.postprocess(output, skip_keys)
+                # Return the filtered prediction as a JSON response
                 return JSONResponse(content=pred)
             except Exception as e:
+                # Return an error message with status code 500 in case of exception
                 return JSONResponse(content={"error": str(e)}, status_code=500)
         @self.app.post("/v1/extract")
+        async def extract(input_info: ImagesInput):
+            """
+            Extract keypoints and descriptors from images.
+            Args:
+                input_info: An object containing the image data and options.
+            Returns:
+                A list of dictionaries containing the keypoints and descriptors.
+            """
             try:
+                preds = []
+                for i, input_image in enumerate(input_info.data):
+                    # Load the image from the input data
+                    image_array = to_base64_nparray(input_image)
+                    # Extract keypoints and descriptors
+                    output = self.api.extract(
+                        image_array,
+                        max_keypoints=input_info.max_keypoints[i],
+                        binarize=input_info.binarize,
+                    )
+                    # Do not return the original image and image_orig
+                    # skip_keys = ["image", "image_orig"]
+                    skip_keys = []
+                    # Postprocess the output
+                    pred = self.postprocess(output, skip_keys)
+                    preds.append(pred)
+                # Return the list of extracted features
+                return JSONResponse(content=preds)
             except Exception as e:
+                # Return an error message if an exception occurs
                 return JSONResponse(content={"error": str(e)}, status_code=500)
     def load_image(self, file_path: Union[str, UploadFile]) -> np.ndarray:
             image_array = np.array(img)
         return image_array
+    def postprocess(
+        self, output: dict, skip_keys: list, binarize: bool = True
+    ) -> dict:
         pred = {}
         for key, value in output.items():
             if key in skip_keys:

api/test/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+cmake_minimum_required(VERSION 3.10)
+project(imatchui)
+set(OpenCV_DIR /usr/include/opencv4)
+find_package(OpenCV REQUIRED)
+find_package(Boost REQUIRED COMPONENTS system)
+if(Boost_FOUND)
+    include_directories(${Boost_INCLUDE_DIRS})
+endif()
+add_executable(client client.cpp)
+target_include_directories(client PRIVATE ${Boost_LIBRARIES} ${OpenCV_INCLUDE_DIRS})
+target_link_libraries(client PRIVATE curl jsoncpp b64 ${OpenCV_LIBS})

api/test/build_and_run.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+# g++ main.cpp -I/usr/include/opencv4 -lcurl -ljsoncpp -lb64 -lopencv_core -lopencv_imgcodecs -o main
+# sudo apt-get update
+# sudo apt-get install libboost-all-dev -y
+# sudo apt-get install libcurl4-openssl-dev libjsoncpp-dev libb64-dev libopencv-dev -y
+cd build
+cmake ..
+make -j12
+echo " ======== RUN DEMO ========"
+./client
+echo " ======== END DEMO ========"
+cd ..

api/test/client.cpp ADDED Viewed

	@@ -0,0 +1,84 @@

+#include <curl/curl.h>
+#include <opencv2/opencv.hpp>
+#include "helper.h"
+int main() {
+    std::string img_path = "../../../datasets/sacre_coeur/mapping_rot/02928139_3448003521_rot45.jpg";
+    cv::Mat original_img = cv::imread(img_path, cv::IMREAD_GRAYSCALE);
+    if (original_img.empty()) {
+        throw std::runtime_error("Failed to decode image");
+    }
+    // Convert the image to Base64
+    std::string base64_img = image_to_base64(original_img);
+    // Convert the Base64 back to an image
+    cv::Mat decoded_img = base64_to_image(base64_img);
+    cv::imwrite("decoded_image.jpg", decoded_img);
+    cv::imwrite("original_img.jpg", original_img);
+    // The images should be identical
+    if (cv::countNonZero(original_img != decoded_img) != 0) {
+        std::cerr << "The images are not identical" << std::endl;
+        return -1;
+    } else {
+        std::cout << "The images are identical!" << std::endl;
+    }
+    // construct params
+    APIParams params{
+        .data = {base64_img},
+        .max_keypoints = {100, 100},
+        .timestamps = {"0", "1"},
+        .grayscale = {0},
+        .image_hw = {{480, 640}, {240, 320}},
+        .feature_type = 0,
+        .rotates = {0.0f, 0.0f},
+        .scales = {1.0f, 1.0f},
+        .reference_points = {
+            {1.23e+2f, 1.2e+1f},
+            {5.0e-1f, 3.0e-1f},
+            {2.3e+2f, 2.2e+1f},
+            {6.0e-1f, 4.0e-1f}
+        },
+        .binarize = {1}
+    };
+    KeyPointResults kpts_results;
+    // Convert the parameters to JSON
+    Json::Value jsonData = paramsToJson(params);
+    std::string url = "http://127.0.0.1:8001/v1/extract";
+    Json::StreamWriterBuilder writer;
+    std::string output = Json::writeString(writer, jsonData);
+    CURL* curl;
+    CURLcode res;
+    std::string readBuffer;
+    curl_global_init(CURL_GLOBAL_DEFAULT);
+    curl = curl_easy_init();
+    if (curl) {
+        struct curl_slist* hs = NULL;
+        hs = curl_slist_append(hs, "Content-Type: application/json");
+        curl_easy_setopt(curl, CURLOPT_HTTPHEADER, hs);
+        curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
+        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, output.c_str());
+        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
+        curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer);
+        res = curl_easy_perform(curl);
+        if (res != CURLE_OK)
+            fprintf(stderr, "curl_easy_perform() failed: %s\n",
+                    curl_easy_strerror(res));
+        else {
+            // std::cout << "Response from server: " << readBuffer << std::endl;
+           kpts_results =  decode_response(readBuffer);
+        }
+        curl_easy_cleanup(curl);
+    }
+    curl_global_cleanup();
+    return 0;
+}

api/test/helper.h ADDED Viewed

	@@ -0,0 +1,410 @@

+#include <sstream>
+#include <fstream>
+#include <vector>
+#include <b64/encode.h>
+#include <jsoncpp/json/json.h>
+#include <opencv2/opencv.hpp>
+// base64 to image
+#include <boost/archive/iterators/binary_from_base64.hpp>
+#include <boost/archive/iterators/transform_width.hpp>
+#include <boost/archive/iterators/base64_from_binary.hpp>
+/// Parameters used in the API
+struct APIParams {
+    /// A list of images, base64 encoded
+    std::vector<std::string> data;
+    /// The maximum number of keypoints to detect for each image
+    std::vector<int> max_keypoints;
+    /// The timestamps of the images
+    std::vector<std::string> timestamps;
+    /// Whether to convert the images to grayscale
+    bool grayscale;
+    /// The height and width of each image
+    std::vector<std::vector<int>> image_hw;
+    /// The type of feature detector to use
+    int feature_type;
+    /// The rotations of the images
+    std::vector<double> rotates;
+    /// The scales of the images
+    std::vector<double> scales;
+    /// The reference points of the images
+    std::vector<std::vector<float>> reference_points;
+    /// Whether to binarize the descriptors
+    bool binarize;
+};
+/**
+ * @brief Contains the results of a keypoint detector.
+ *
+ * @details Stores the keypoints and descriptors for each image.
+ */
+class KeyPointResults {
+public:
+    KeyPointResults() {}
+    /**
+     * @brief Constructor.
+     *
+     * @param kp The keypoints for each image.
+     */
+    KeyPointResults(const std::vector<std::vector<cv::KeyPoint>>& kp,
+                    const std::vector<cv::Mat>& desc)
+        : keypoints(kp), descriptors(desc) {}
+    /**
+     * @brief Append keypoints to the result.
+     *
+     * @param kpts The keypoints to append.
+     */
+    inline void append_keypoints(std::vector<cv::KeyPoint>& kpts) {
+        keypoints.emplace_back(kpts);
+    }
+    /**
+     * @brief Append descriptors to the result.
+     *
+     * @param desc The descriptors to append.
+     */
+    inline void append_descriptors(cv::Mat& desc) {
+        descriptors.emplace_back(desc);
+    }
+    /**
+     * @brief Get the keypoints.
+     *
+     * @return The keypoints.
+     */
+    inline std::vector<std::vector<cv::KeyPoint>> get_keypoints() {
+        return keypoints;
+    }
+    /**
+     * @brief Get the descriptors.
+     *
+     * @return The descriptors.
+     */
+    inline std::vector<cv::Mat> get_descriptors() {
+        return descriptors;
+    }
+private:
+    std::vector<std::vector<cv::KeyPoint>> keypoints;
+    std::vector<cv::Mat> descriptors;
+    std::vector<std::vector<float>> scores;
+};
+/**
+ * @brief Decodes a base64 encoded string.
+ *
+ * @param base64 The base64 encoded string to decode.
+ * @return The decoded string.
+ */
+std::string base64_decode(const std::string& base64) {
+    using namespace boost::archive::iterators;
+    using It = transform_width<binary_from_base64<std::string::const_iterator>, 8, 6>;
+    // Find the position of the last non-whitespace character
+    auto end = base64.find_last_not_of(" \t\n\r");
+    if (end != std::string::npos) {
+        // Move one past the last non-whitespace character
+        end += 1;
+    }
+    // Decode the base64 string and return the result
+    return std::string(It(base64.begin()), It(base64.begin() + end));
+}
+/**
+ * @brief Decodes a base64 string into an OpenCV image
+ *
+ * @param base64 The base64 encoded string
+ * @return The decoded OpenCV image
+ */
+cv::Mat base64_to_image(const std::string& base64) {
+    // Decode the base64 string
+    std::string decodedStr = base64_decode(base64);
+    // Decode the image
+    std::vector<uchar> data(decodedStr.begin(), decodedStr.end());
+    cv::Mat img = cv::imdecode(data, cv::IMREAD_GRAYSCALE);
+    // Check for errors
+    if (img.empty()) {
+        throw std::runtime_error("Failed to decode image");
+    }
+    return img;
+}
+/**
+ * @brief Encodes an OpenCV image into a base64 string
+ *
+ * This function takes an OpenCV image and encodes it into a base64 string.
+ * The image is first encoded as a PNG image, and then the resulting
+ * bytes are encoded as a base64 string.
+ *
+ * @param img The OpenCV image
+ * @return The base64 encoded string
+ *
+ * @throws std::runtime_error if the image is empty or encoding fails
+ */
+std::string image_to_base64(cv::Mat &img) {
+    if (img.empty()) {
+        throw std::runtime_error("Failed to read image");
+    }
+    // Encode the image as a PNG
+    std::vector<uchar> buf;
+    if (!cv::imencode(".png", img, buf)) {
+        throw std::runtime_error("Failed to encode image");
+    }
+    // Encode the bytes as a base64 string
+    using namespace boost::archive::iterators;
+    using It = base64_from_binary<transform_width<std::vector<uchar>::const_iterator, 6, 8>>;
+    std::string base64(It(buf.begin()), It(buf.end()));
+    // Pad the string with '=' characters to a multiple of 4 bytes
+    base64.append((3 - buf.size() % 3) % 3, '=');
+    return base64;
+}
+/**
+ * @brief Callback function for libcurl to write data to a string
+ *
+ * This function is used as a callback for libcurl to write data to a string.
+ * It takes the contents, size, and nmemb as parameters, and writes the data to
+ * the string.
+ *
+ * @param contents The data to write
+ * @param size The size of the data
+ * @param nmemb The number of members in the data
+ * @param s The string to write the data to
+ * @return The number of bytes written
+ */
+size_t WriteCallback(void* contents, size_t size, size_t nmemb, std::string* s) {
+    size_t newLength = size * nmemb;
+    try {
+        // Resize the string to fit the new data
+        s->resize(s->size() + newLength);
+    } catch (std::bad_alloc& e) {
+        // If there's an error allocating memory, return 0
+        return 0;
+    }
+    // Copy the data to the string
+    std::copy(static_cast<const char*>(contents),
+              static_cast<const char*>(contents) + newLength,
+              s->begin() + s->size() - newLength);
+    return newLength;
+}
+// Helper functions
+/**
+ * @brief Helper function to convert a type to a Json::Value
+ *
+ * This function takes a value of type T and converts it to a Json::Value.
+ * It is used to simplify the process of converting a type to a Json::Value.
+ *
+ * @param val The value to convert
+ * @return The converted Json::Value
+ */
+template <typename T>
+Json::Value toJson(const T& val) {
+    return Json::Value(val);
+}
+/**
+ * @brief Converts a vector to a Json::Value
+ *
+ * This function takes a vector of type T and converts it to a Json::Value.
+ * Each element in the vector is appended to the Json::Value array.
+ *
+ * @param vec The vector to convert to Json::Value
+ * @return The Json::Value representing the vector
+ */
+template <typename T>
+Json::Value vectorToJson(const std::vector<T>& vec) {
+    Json::Value json(Json::arrayValue);
+    for (const auto& item : vec) {
+        json.append(item);
+    }
+    return json;
+}
+/**
+ * @brief Converts a nested vector to a Json::Value
+ *
+ * This function takes a nested vector of type T and converts it to a Json::Value.
+ * Each sub-vector is converted to a Json::Value array and appended to the main Json::Value array.
+ *
+ * @param vec The nested vector to convert to Json::Value
+ * @return The Json::Value representing the nested vector
+ */
+template <typename T>
+Json::Value nestedVectorToJson(const std::vector<std::vector<T>>& vec) {
+    Json::Value json(Json::arrayValue);
+    for (const auto& subVec : vec) {
+        json.append(vectorToJson(subVec));
+    }
+    return json;
+}
+/**
+ * @brief Converts the APIParams struct to a Json::Value
+ *
+ * This function takes an APIParams struct and converts it to a Json::Value.
+ * The Json::Value is a JSON object with the following fields:
+ * - data: a JSON array of base64 encoded images
+ * - max_keypoints: a JSON array of integers, max number of keypoints for each image
+ * - timestamps: a JSON array of timestamps, one for each image
+ * - grayscale: a JSON boolean, whether to convert images to grayscale
+ * - image_hw: a nested JSON array, each sub-array contains the height and width of an image
+ * - feature_type: a JSON integer, the type of feature detector to use
+ * - rotates: a JSON array of doubles, the rotation of each image
+ * - scales: a JSON array of doubles, the scale of each image
+ * - reference_points: a nested JSON array, each sub-array contains the reference points of an image
+ * - binarize: a JSON boolean, whether to binarize the descriptors
+ *
+ * @param params The APIParams struct to convert
+ * @return The Json::Value representing the APIParams struct
+ */
+Json::Value paramsToJson(const APIParams& params) {
+    Json::Value json;
+    json["data"] = vectorToJson(params.data);
+    json["max_keypoints"] = vectorToJson(params.max_keypoints);
+    json["timestamps"] = vectorToJson(params.timestamps);
+    json["grayscale"] = toJson(params.grayscale);
+    json["image_hw"] = nestedVectorToJson(params.image_hw);
+    json["feature_type"] = toJson(params.feature_type);
+    json["rotates"] = vectorToJson(params.rotates);
+    json["scales"] = vectorToJson(params.scales);
+    json["reference_points"] = nestedVectorToJson(params.reference_points);
+    json["binarize"] = toJson(params.binarize);
+    return json;
+}
+template<typename T>
+cv::Mat jsonToMat(Json::Value json) {
+    int rows = json.size();
+    int cols = json[0].size();
+    // Create a single array to hold all the data.
+    std::vector<T> data;
+    data.reserve(rows * cols);
+    for (int i = 0; i < rows; i++) {
+        for (int j = 0; j < cols; j++) {
+            data.push_back(static_cast<T>(json[i][j].asInt()));
+        }
+    }
+    // Create a cv::Mat object that points to the data.
+    cv::Mat mat(rows, cols, CV_8UC1, data.data());  // Change the type if necessary.
+    // cv::Mat mat(cols, rows,CV_8UC1, data.data());  // Change the type if necessary.
+    return mat;
+}
+/**
+ * @brief Decodes the response of the server and prints the keypoints
+ *
+ * This function takes the response of the server, a JSON string, and decodes
+ * it. It then prints the keypoints and draws them on the original image.
+ *
+ * @param response The response of the server
+ * @return The keypoints and descriptors
+ */
+KeyPointResults decode_response(const std::string& response, bool viz=true) {
+    Json::CharReaderBuilder builder;
+    Json::CharReader* reader = builder.newCharReader();
+    Json::Value jsonData;
+    std::string errors;
+    // Parse the JSON response
+    bool parsingSuccessful = reader->parse(response.c_str(),
+        response.c_str() + response.size(), &jsonData, &errors);
+    delete reader;
+    if (!parsingSuccessful) {
+        // Handle error
+        std::cout << "Failed to parse the JSON, errors:" << std::endl;
+        std::cout << errors << std::endl;
+        return KeyPointResults();
+    }
+    KeyPointResults kpts_results;
+    // Iterate over the images
+    for (const auto& jsonItem : jsonData) {
+        auto jkeypoints = jsonItem["keypoints"];
+        auto jkeypoints_orig = jsonItem["keypoints_orig"];
+        auto jdescriptors = jsonItem["descriptors"];
+        auto jscores = jsonItem["scores"];
+        auto jimageSize = jsonItem["image_size"];
+        auto joriginalSize = jsonItem["original_size"];
+        auto jsize = jsonItem["size"];
+        std::vector<cv::KeyPoint> vkeypoints;
+        std::vector<float> vscores;
+        // Iterate over the keypoints
+        int counter = 0;
+        for (const auto& keypoint : jkeypoints_orig) {
+            if (counter < 10) {
+                // Print the first 10 keypoints
+                std::cout << keypoint[0].asFloat() << ", "
+                    << keypoint[1].asFloat() << std::endl;
+            }
+            counter++;
+            // Convert the Json::Value to a cv::KeyPoint
+            vkeypoints.emplace_back(cv::KeyPoint(keypoint[0].asFloat(),
+                keypoint[1].asFloat(), 0.0));
+        }
+        if (viz && jsonItem.isMember("image_orig")) {
+            auto jimg_orig = jsonItem["image_orig"];
+            cv::Mat img = jsonToMat<uchar>(jimg_orig);
+            cv::imwrite("viz_image_orig.jpg", img);
+            // Draw keypoints on the image
+            cv::Mat imgWithKeypoints;
+            cv::drawKeypoints(img, vkeypoints,
+                imgWithKeypoints, cv::Scalar(0, 0, 255));
+            // Write the image with keypoints
+            std::string filename = "viz_image_orig_keypoints.jpg";
+            cv::imwrite(filename, imgWithKeypoints);
+        }
+        // Iterate over the descriptors
+        cv::Mat descriptors = jsonToMat<uchar>(jdescriptors);
+        kpts_results.append_keypoints(vkeypoints);
+        kpts_results.append_descriptors(descriptors);
+    }
+    return kpts_results;
+}

api/types.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from typing import List
+from pydantic import BaseModel
+class ImagesInput(BaseModel):
+    data: List[str] = []
+    max_keypoints: List[int] = []
+    timestamps: List[str] = []
+    grayscale: bool = False
+    image_hw: List[List[int]] = [[], []]
+    feature_type: int = 0
+    rotates: List[float] = []
+    scales: List[float] = []
+    reference_points: List[List[float]] = []
+    binarize: bool = False

requirements.txt CHANGED Viewed

@@ -2,8 +2,7 @@ e2cnn
 einops
 easydict
 gdown
-gradio==4.44.0
-gradio_client==1.3.0
 h5py
 huggingface_hub
 imageio

 einops
 easydict
 gdown
+gradio==5.1.0
 h5py
 huggingface_hub
 imageio

test_app_cli.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import cv2
 from hloc import logger
-from ui.utils import (
-    get_matcher_zoo,
-    load_config,
-    DEVICE,
-    ROOT,
-)
-from ui.api import ImageMatchingAPI
 def test_all(config: dict = None):
@@ -68,7 +69,7 @@ def test_one():
         "dense": False,
     }
     api = ImageMatchingAPI(conf=conf, device=DEVICE)
-    pred = api(image0, image1)
     log_path = ROOT / "experiments" / "one"
     log_path.mkdir(exist_ok=True, parents=True)
     api.visualize(log_path=log_path)

+import sys
+from pathlib import Path
 import cv2
 from hloc import logger
+from ui.utils import DEVICE, ROOT, get_matcher_zoo, load_config
+sys.path.append(str(Path(__file__).parents[1]))
+from api.server import ImageMatchingAPI
 def test_all(config: dict = None):
         "dense": False,
     }
     api = ImageMatchingAPI(conf=conf, device=DEVICE)
+    api(image0, image1)
     log_path = ROOT / "experiments" / "one"
     log_path.mkdir(exist_ok=True, parents=True)
     api.visualize(log_path=log_path)

ui/__init__.py CHANGED Viewed

	@@ -0,0 +1,5 @@

+__version__ = "1.0.1"
+def get_version():
+    return __version__

ui/api.py DELETED Viewed

@@ -1,293 +0,0 @@
-import warnings
-from pathlib import Path
-from typing import Any, Dict, Optional
-import cv2
-import matplotlib.pyplot as plt
-import numpy as np
-import torch
-from hloc import extract_features, logger, match_dense, match_features
-from hloc.utils.viz import add_text, plot_keypoints
-from .utils import (
-    ROOT,
-    filter_matches,
-    get_feature_model,
-    get_model,
-    load_config,
-)
-from .viz import display_matches, fig2im, plot_images
-warnings.simplefilter("ignore")
-class ImageMatchingAPI(torch.nn.Module):
-    default_conf = {
-        "ransac": {
-            "enable": True,
-            "estimator": "poselib",
-            "geometry": "homography",
-            "method": "RANSAC",
-            "reproj_threshold": 3,
-            "confidence": 0.9999,
-            "max_iter": 10000,
-        },
-    }
-    def __init__(
-        self,
-        conf: dict = {},
-        device: str = "cpu",
-        detect_threshold: float = 0.015,
-        max_keypoints: int = 1024,
-        match_threshold: float = 0.2,
-    ) -> None:
-        """
-        Initializes an instance of the ImageMatchingAPI class.
-        Args:
-            conf (dict): A dictionary containing the configuration parameters.
-            device (str, optional): The device to use for computation. Defaults to "cpu".
-            detect_threshold (float, optional): The threshold for detecting keypoints. Defaults to 0.015.
-            max_keypoints (int, optional): The maximum number of keypoints to extract. Defaults to 1024.
-            match_threshold (float, optional): The threshold for matching keypoints. Defaults to 0.2.
-        Returns:
-            None
-        """
-        super().__init__()
-        self.device = device
-        self.conf = {**self.default_conf, **conf}
-        self._updata_config(detect_threshold, max_keypoints, match_threshold)
-        self._init_models()
-        if device == "cuda":
-            memory_allocated = torch.cuda.memory_allocated(device)
-            memory_reserved = torch.cuda.memory_reserved(device)
-            logger.info(
-                f"GPU memory allocated: {memory_allocated / 1024**2:.3f} MB"
-            )
-            logger.info(
-                f"GPU memory reserved: {memory_reserved / 1024**2:.3f} MB"
-            )
-        self.pred = None
-    def parse_match_config(self, conf):
-        if conf["dense"]:
-            return {
-                **conf,
-                "matcher": match_dense.confs.get(
-                    conf["matcher"]["model"]["name"]
-                ),
-                "dense": True,
-            }
-        else:
-            return {
-                **conf,
-                "feature": extract_features.confs.get(
-                    conf["feature"]["model"]["name"]
-                ),
-                "matcher": match_features.confs.get(
-                    conf["matcher"]["model"]["name"]
-                ),
-                "dense": False,
-            }
-    def _updata_config(
-        self,
-        detect_threshold: float = 0.015,
-        max_keypoints: int = 1024,
-        match_threshold: float = 0.2,
-    ):
-        self.dense = self.conf["dense"]
-        if self.conf["dense"]:
-            try:
-                self.conf["matcher"]["model"][
-                    "match_threshold"
-                ] = match_threshold
-            except TypeError as e:
-                logger.error(e)
-        else:
-            self.conf["feature"]["model"]["max_keypoints"] = max_keypoints
-            self.conf["feature"]["model"][
-                "keypoint_threshold"
-            ] = detect_threshold
-            self.extract_conf = self.conf["feature"]
-        self.match_conf = self.conf["matcher"]
-    def _init_models(self):
-        # initialize matcher
-        self.matcher = get_model(self.match_conf)
-        # initialize extractor
-        if self.dense:
-            self.extractor = None
-        else:
-            self.extractor = get_feature_model(self.conf["feature"])
-    def _forward(self, img0, img1):
-        if self.dense:
-            pred = match_dense.match_images(
-                self.matcher,
-                img0,
-                img1,
-                self.match_conf["preprocessing"],
-                device=self.device,
-            )
-            last_fixed = "{}".format(  # noqa: F841
-                self.match_conf["model"]["name"]
-            )
-        else:
-            pred0 = extract_features.extract(
-                self.extractor, img0, self.extract_conf["preprocessing"]
-            )
-            pred1 = extract_features.extract(
-                self.extractor, img1, self.extract_conf["preprocessing"]
-            )
-            pred = match_features.match_images(self.matcher, pred0, pred1)
-        return pred
-    @torch.inference_mode()
-    def forward(
-        self,
-        img0: np.ndarray,
-        img1: np.ndarray,
-    ) -> Dict[str, np.ndarray]:
-        """
-        Forward pass of the image matching API.
-        Args:
-            img0: A 3D NumPy array of shape (H, W, C) representing the first image.
-                  Values are in the range [0, 1] and are in RGB mode.
-            img1: A 3D NumPy array of shape (H, W, C) representing the second image.
-                  Values are in the range [0, 1] and are in RGB mode.
-        Returns:
-            A dictionary containing the following keys:
-            - image0_orig: The original image 0.
-            - image1_orig: The original image 1.
-            - keypoints0_orig: The keypoints detected in image 0.
-            - keypoints1_orig: The keypoints detected in image 1.
-            - mkeypoints0_orig: The raw matches between image 0 and image 1.
-            - mkeypoints1_orig: The raw matches between image 1 and image 0.
-            - mmkeypoints0_orig: The RANSAC inliers in image 0.
-            - mmkeypoints1_orig: The RANSAC inliers in image 1.
-            - mconf: The confidence scores for the raw matches.
-            - mmconf: The confidence scores for the RANSAC inliers.
-        """
-        # Take as input a pair of images (not a batch)
-        assert isinstance(img0, np.ndarray)
-        assert isinstance(img1, np.ndarray)
-        self.pred = self._forward(img0, img1)
-        if self.conf["ransac"]["enable"]:
-            self.pred = self._geometry_check(self.pred)
-        return self.pred
-    def _geometry_check(
-        self,
-        pred: Dict[str, Any],
-    ) -> Dict[str, Any]:
-        """
-        Filter matches using RANSAC. If keypoints are available, filter by keypoints.
-        If lines are available, filter by lines. If both keypoints and lines are
-        available, filter by keypoints.
-        Args:
-            pred (Dict[str, Any]): dict of matches, including original keypoints.
-                                  See :func:`filter_matches` for the expected keys.
-        Returns:
-            Dict[str, Any]: filtered matches
-        """
-        pred = filter_matches(
-            pred,
-            ransac_method=self.conf["ransac"]["method"],
-            ransac_reproj_threshold=self.conf["ransac"]["reproj_threshold"],
-            ransac_confidence=self.conf["ransac"]["confidence"],
-            ransac_max_iter=self.conf["ransac"]["max_iter"],
-        )
-        return pred
-    def visualize(
-        self,
-        log_path: Optional[Path] = None,
-    ) -> None:
-        """
-        Visualize the matches.
-        Args:
-            log_path (Path, optional): The directory to save the images. Defaults to None.
-        Returns:
-            None
-        """
-        if self.conf["dense"]:
-            postfix = str(self.conf["matcher"]["model"]["name"])
-        else:
-            postfix = "{}_{}".format(
-                str(self.conf["feature"]["model"]["name"]),
-                str(self.conf["matcher"]["model"]["name"]),
-            )
-        titles = [
-            "Image 0 - Keypoints",
-            "Image 1 - Keypoints",
-        ]
-        pred: Dict[str, Any] = self.pred
-        image0: np.ndarray = pred["image0_orig"]
-        image1: np.ndarray = pred["image1_orig"]
-        output_keypoints: np.ndarray = plot_images(
-            [image0, image1], titles=titles, dpi=300
-        )
-        if (
-            "keypoints0_orig" in pred.keys()
-            and "keypoints1_orig" in pred.keys()
-        ):
-            plot_keypoints([pred["keypoints0_orig"], pred["keypoints1_orig"]])
-            text: str = (
-                f"# keypoints0: {len(pred['keypoints0_orig'])} \n"
-                + f"# keypoints1: {len(pred['keypoints1_orig'])}"
-            )
-            add_text(0, text, fs=15)
-        output_keypoints = fig2im(output_keypoints)
-        # plot images with raw matches
-        titles = [
-            "Image 0 - Raw matched keypoints",
-            "Image 1 - Raw matched keypoints",
-        ]
-        output_matches_raw, num_matches_raw = display_matches(
-            pred, titles=titles, tag="KPTS_RAW"
-        )
-        # plot images with ransac matches
-        titles = [
-            "Image 0 - Ransac matched keypoints",
-            "Image 1 - Ransac matched keypoints",
-        ]
-        output_matches_ransac, num_matches_ransac = display_matches(
-            pred, titles=titles, tag="KPTS_RANSAC"
-        )
-        if log_path is not None:
-            img_keypoints_path: Path = log_path / f"img_keypoints_{postfix}.png"
-            img_matches_raw_path: Path = (
-                log_path / f"img_matches_raw_{postfix}.png"
-            )
-            img_matches_ransac_path: Path = (
-                log_path / f"img_matches_ransac_{postfix}.png"
-            )
-            cv2.imwrite(
-                str(img_keypoints_path),
-                output_keypoints[:, :, ::-1].copy(),  # RGB -> BGR
-            )
-            cv2.imwrite(
-                str(img_matches_raw_path),
-                output_matches_raw[:, :, ::-1].copy(),  # RGB -> BGR
-            )
-            cv2.imwrite(
-                str(img_matches_ransac_path),
-                output_matches_ransac[:, :, ::-1].copy(),  # RGB -> BGR
-            )
-            plt.close("all")
-if __name__ == "__main__":
-    config = load_config(ROOT / "ui/config.yaml")
-    api = ImageMatchingAPI(config)

ui/app_class.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from pathlib import Path
 from typing import Any, Dict, Optional, Tuple
@@ -6,7 +7,8 @@ import numpy as np
 from easydict import EasyDict as edict
 from omegaconf import OmegaConf
-from hloc import flush_logs, read_logs
 from ui.sfm import SfmEngine
 from ui.utils import (
     GRADIO_VERSION,
@@ -272,24 +274,6 @@ class ImageMatchingApp:
                             self.display_supported_algorithms()
                     with gr.Column():
-                        with gr.Accordion("Open for More: Logs", open=False):
-                            logs = gr.Textbox(
-                                placeholder="\n" * 10,
-                                label="Logs",
-                                info="Verbose from inference will be displayed below.",
-                                lines=10,
-                                max_lines=10,
-                                autoscroll=True,
-                                elem_id="logs",
-                                show_copy_button=True,
-                                container=True,
-                                elem_classes="logs_class",
-                            )
-                            self.app.load(read_logs, None, logs, every=1)
-                            btn_clear_logs = gr.Button(
-                                "Clear logs", elem_id="logs-button"
-                            )
-                            btn_clear_logs.click(flush_logs, [], [])
                         with gr.Accordion(
                             "Open for More: Keypoints", open=True
@@ -523,7 +507,7 @@ class ImageMatchingApp:
         key: str = list(self.matcher_zoo.keys())[
             0
         ]  # Get the first key from matcher_zoo
-        flush_logs()
         return (
             None,  # image0: Optional[np.ndarray]
             None,  # image1: Optional[np.ndarray]

+import sys
 from pathlib import Path
 from typing import Any, Dict, Optional, Tuple
 from easydict import EasyDict as edict
 from omegaconf import OmegaConf
+sys.path.append(str(Path(__file__).parents[1]))
 from ui.sfm import SfmEngine
 from ui.utils import (
     GRADIO_VERSION,
                             self.display_supported_algorithms()
                     with gr.Column():
                         with gr.Accordion(
                             "Open for More: Keypoints", open=True
         key: str = list(self.matcher_zoo.keys())[
             0
         ]  # Get the first key from matcher_zoo
+        # flush_logs()
         return (
             None,  # image0: Optional[np.ndarray]
             None,  # image1: Optional[np.ndarray]

ui/config.yaml CHANGED Viewed

@@ -41,6 +41,7 @@ matcher_zoo:
   DUSt3R:
     # TODO: duster is under development
     enable: true
     matcher: duster
     dense: true
     info:
@@ -52,6 +53,7 @@ matcher_zoo:
       display: true
   GIM(dkm):
     enable: true
     matcher: gim(dkm)
     dense: true
     info:
@@ -63,6 +65,7 @@ matcher_zoo:
       display: true
   RoMa:
     matcher: roma
     dense: true
     info:
       name: RoMa #dispaly name
@@ -73,6 +76,7 @@ matcher_zoo:
       display: true
   dkm:
     matcher: dkm
     dense: true
     info:
       name: DKM #dispaly name
@@ -398,9 +402,9 @@ matcher_zoo:
       display: true
   sfd2+imp:
     matcher: imp
     feature: sfd2
-    enable: true
     dense: false
     info:
       name: SFD2+IMP #dispaly name
@@ -411,9 +415,9 @@ matcher_zoo:
       display: true
   sfd2+mnn:
     matcher: NN-mutual
     feature: sfd2
-    enable: true
     dense: false
     info:
       name: SFD2+MNN #dispaly name

   DUSt3R:
     # TODO: duster is under development
     enable: true
+    # skip_ci: true
     matcher: duster
     dense: true
     info:
       display: true
   GIM(dkm):
     enable: true
+    # skip_ci: true
     matcher: gim(dkm)
     dense: true
     info:
       display: true
   RoMa:
     matcher: roma
+    skip_ci: true
     dense: true
     info:
       name: RoMa #dispaly name
       display: true
   dkm:
     matcher: dkm
+    skip_ci: true
     dense: true
     info:
       name: DKM #dispaly name
       display: true
   sfd2+imp:
+    enable: true
     matcher: imp
     feature: sfd2
     dense: false
     info:
       name: SFD2+IMP #dispaly name
       display: true
   sfd2+mnn:
+    enable: true
     matcher: NN-mutual
     feature: sfd2
     dense: false
     info:
       name: SFD2+MNN #dispaly name

ui/sfm.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import shutil
 import tempfile
 from pathlib import Path
 from typing import Any, Dict, List
-import pycolmap
 from hloc import (
     extract_features,
@@ -14,7 +15,12 @@ from hloc import (
     visualization,
 )
-from .viz import fig2im
 class SfmEngine:

 import shutil
+import sys
 import tempfile
 from pathlib import Path
 from typing import Any, Dict, List
+sys.path.append(str(Path(__file__).parents[1]))
 from hloc import (
     extract_features,
     visualization,
 )
+try:
+    import pycolmap
+except ImportError:
+    logger.warning("pycolmap not installed, some features may not work")
+from ui.viz import fig2im
 class SfmEngine:

ui/utils.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import pickle
 import random
 import shutil
 import time
 import warnings
 from itertools import combinations
@@ -16,6 +17,8 @@ import poselib
 import psutil
 from PIL import Image
 from hloc import (
     DEVICE,
     extract_features,
@@ -26,8 +29,7 @@ from hloc import (
     matchers,
 )
 from hloc.utils.base_model import dynamic_load
-from .viz import display_keypoints, display_matches, fig2im, plot_images
 warnings.simplefilter("ignore")

 import pickle
 import random
 import shutil
+import sys
 import time
 import warnings
 from itertools import combinations
 import psutil
 from PIL import Image
+sys.path.append(str(Path(__file__).parents[1]))
 from hloc import (
     DEVICE,
     extract_features,
     matchers,
 )
 from hloc.utils.base_model import dynamic_load
+from ui.viz import display_keypoints, display_matches, fig2im, plot_images
 warnings.simplefilter("ignore")

ui/viz.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import typing
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple, Union
@@ -8,6 +9,8 @@ import matplotlib.pyplot as plt
 import numpy as np
 import seaborn as sns
 from hloc.utils.viz import add_text, plot_keypoints
 np.random.seed(1995)

+import sys
 import typing
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple, Union
 import numpy as np
 import seaborn as sns
+sys.path.append(str(Path(__file__).parents[1]))
 from hloc.utils.viz import add_text, plot_keypoints
 np.random.seed(1995)