|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from pathlib import Path |
|
import argparse |
|
import cv2 |
|
import matplotlib.cm as cm |
|
import torch |
|
|
|
from models.matching import Matching |
|
from models.utils import (AverageTimer, VideoStreamer, |
|
make_matching_plot_fast, frame2tensor) |
|
|
|
torch.set_grad_enabled(False) |
|
|
|
|
|
if __name__ == '__main__': |
|
parser = argparse.ArgumentParser( |
|
description='SuperGlue demo', |
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
|
parser.add_argument( |
|
'--input', type=str, default='0', |
|
help='ID of a USB webcam, URL of an IP camera, ' |
|
'or path to an image directory or movie file') |
|
parser.add_argument( |
|
'--output_dir', type=str, default=None, |
|
help='Directory where to write output frames (If None, no output)') |
|
|
|
parser.add_argument( |
|
'--image_glob', type=str, nargs='+', default=['*.png', '*.jpg', '*.jpeg'], |
|
help='Glob if a directory of images is specified') |
|
parser.add_argument( |
|
'--skip', type=int, default=1, |
|
help='Images to skip if input is a movie or directory') |
|
parser.add_argument( |
|
'--max_length', type=int, default=1000000, |
|
help='Maximum length if input is a movie or directory') |
|
parser.add_argument( |
|
'--resize', type=int, nargs='+', default=[640, 480], |
|
help='Resize the input image before running inference. If two numbers, ' |
|
'resize to the exact dimensions, if one number, resize the max ' |
|
'dimension, if -1, do not resize') |
|
|
|
parser.add_argument( |
|
'--superglue', choices={'indoor', 'outdoor'}, default='indoor', |
|
help='SuperGlue weights') |
|
parser.add_argument( |
|
'--max_keypoints', type=int, default=-1, |
|
help='Maximum number of keypoints detected by Superpoint' |
|
' (\'-1\' keeps all keypoints)') |
|
parser.add_argument( |
|
'--keypoint_threshold', type=float, default=0.005, |
|
help='SuperPoint keypoint detector confidence threshold') |
|
parser.add_argument( |
|
'--nms_radius', type=int, default=4, |
|
help='SuperPoint Non Maximum Suppression (NMS) radius' |
|
' (Must be positive)') |
|
parser.add_argument( |
|
'--sinkhorn_iterations', type=int, default=20, |
|
help='Number of Sinkhorn iterations performed by SuperGlue') |
|
parser.add_argument( |
|
'--match_threshold', type=float, default=0.2, |
|
help='SuperGlue match threshold') |
|
|
|
parser.add_argument( |
|
'--show_keypoints', action='store_true', |
|
help='Show the detected keypoints') |
|
parser.add_argument( |
|
'--no_display', action='store_true', |
|
help='Do not display images to screen. Useful if running remotely') |
|
parser.add_argument( |
|
'--force_cpu', action='store_true', |
|
help='Force pytorch to run in CPU mode.') |
|
|
|
opt = parser.parse_args() |
|
print(opt) |
|
|
|
if len(opt.resize) == 2 and opt.resize[1] == -1: |
|
opt.resize = opt.resize[0:1] |
|
if len(opt.resize) == 2: |
|
print('Will resize to {}x{} (WxH)'.format( |
|
opt.resize[0], opt.resize[1])) |
|
elif len(opt.resize) == 1 and opt.resize[0] > 0: |
|
print('Will resize max dimension to {}'.format(opt.resize[0])) |
|
elif len(opt.resize) == 1: |
|
print('Will not resize images') |
|
else: |
|
raise ValueError('Cannot specify more than two integers for --resize') |
|
|
|
device = 'cuda' if torch.cuda.is_available() and not opt.force_cpu else 'cpu' |
|
print('Running inference on device \"{}\"'.format(device)) |
|
config = { |
|
'superpoint': { |
|
'nms_radius': opt.nms_radius, |
|
'keypoint_threshold': opt.keypoint_threshold, |
|
'max_keypoints': opt.max_keypoints |
|
}, |
|
'superglue': { |
|
'weights': opt.superglue, |
|
'sinkhorn_iterations': opt.sinkhorn_iterations, |
|
'match_threshold': opt.match_threshold, |
|
} |
|
} |
|
matching = Matching(config).eval().to(device) |
|
keys = ['keypoints', 'scores', 'descriptors'] |
|
|
|
vs = VideoStreamer(opt.input, opt.resize, opt.skip, |
|
opt.image_glob, opt.max_length) |
|
frame, ret = vs.next_frame() |
|
assert ret, 'Error when reading the first frame (try different --input?)' |
|
|
|
frame_tensor = frame2tensor(frame, device) |
|
last_data = matching.superpoint({'image': frame_tensor}) |
|
last_data = {k+'0': last_data[k] for k in keys} |
|
last_data['image0'] = frame_tensor |
|
last_frame = frame |
|
last_image_id = 0 |
|
|
|
if opt.output_dir is not None: |
|
print('==> Will write outputs to {}'.format(opt.output_dir)) |
|
Path(opt.output_dir).mkdir(exist_ok=True) |
|
|
|
|
|
if not opt.no_display: |
|
cv2.namedWindow('SuperGlue matches', cv2.WINDOW_NORMAL) |
|
cv2.resizeWindow('SuperGlue matches', 640*2, 480) |
|
else: |
|
print('Skipping visualization, will not show a GUI.') |
|
|
|
|
|
print('==> Keyboard control:\n' |
|
'\tn: select the current frame as the anchor\n' |
|
'\te/r: increase/decrease the keypoint confidence threshold\n' |
|
'\td/f: increase/decrease the match filtering threshold\n' |
|
'\tk: toggle the visualization of keypoints\n' |
|
'\tq: quit') |
|
|
|
timer = AverageTimer() |
|
|
|
while True: |
|
frame, ret = vs.next_frame() |
|
if not ret: |
|
print('Finished demo_superglue.py') |
|
break |
|
timer.update('data') |
|
stem0, stem1 = last_image_id, vs.i - 1 |
|
|
|
frame_tensor = frame2tensor(frame, device) |
|
pred = matching({**last_data, 'image1': frame_tensor}) |
|
kpts0 = last_data['keypoints0'][0].cpu().numpy() |
|
kpts1 = pred['keypoints1'][0].cpu().numpy() |
|
matches = pred['matches0'][0].cpu().numpy() |
|
confidence = pred['matching_scores0'][0].cpu().numpy() |
|
timer.update('forward') |
|
|
|
valid = matches > -1 |
|
mkpts0 = kpts0[valid] |
|
mkpts1 = kpts1[matches[valid]] |
|
color = cm.jet(confidence[valid]) |
|
text = [ |
|
'SuperGlue', |
|
'Keypoints: {}:{}'.format(len(kpts0), len(kpts1)), |
|
'Matches: {}'.format(len(mkpts0)) |
|
] |
|
k_thresh = matching.superpoint.config['keypoint_threshold'] |
|
m_thresh = matching.superglue.config['match_threshold'] |
|
small_text = [ |
|
'Keypoint Threshold: {:.4f}'.format(k_thresh), |
|
'Match Threshold: {:.2f}'.format(m_thresh), |
|
'Image Pair: {:06}:{:06}'.format(stem0, stem1), |
|
] |
|
out = make_matching_plot_fast( |
|
last_frame, frame, kpts0, kpts1, mkpts0, mkpts1, color, text, |
|
path=None, show_keypoints=opt.show_keypoints, small_text=small_text) |
|
|
|
if not opt.no_display: |
|
cv2.imshow('SuperGlue matches', out) |
|
key = chr(cv2.waitKey(1) & 0xFF) |
|
if key == 'q': |
|
vs.cleanup() |
|
print('Exiting (via q) demo_superglue.py') |
|
break |
|
elif key == 'n': |
|
last_data = {k+'0': pred[k+'1'] for k in keys} |
|
last_data['image0'] = frame_tensor |
|
last_frame = frame |
|
last_image_id = (vs.i - 1) |
|
elif key in ['e', 'r']: |
|
|
|
d = 0.1 * (-1 if key == 'e' else 1) |
|
matching.superpoint.config['keypoint_threshold'] = min(max( |
|
0.0001, matching.superpoint.config['keypoint_threshold']*(1+d)), 1) |
|
print('\nChanged the keypoint threshold to {:.4f}'.format( |
|
matching.superpoint.config['keypoint_threshold'])) |
|
elif key in ['d', 'f']: |
|
|
|
d = 0.05 * (-1 if key == 'd' else 1) |
|
matching.superglue.config['match_threshold'] = min(max( |
|
0.05, matching.superglue.config['match_threshold']+d), .95) |
|
print('\nChanged the match threshold to {:.2f}'.format( |
|
matching.superglue.config['match_threshold'])) |
|
elif key == 'k': |
|
opt.show_keypoints = not opt.show_keypoints |
|
|
|
timer.update('viz') |
|
timer.print() |
|
|
|
if opt.output_dir is not None: |
|
|
|
stem = 'matches_{:06}_{:06}'.format(stem0, stem1) |
|
out_file = str(Path(opt.output_dir, stem + '.png')) |
|
print('\nWriting image to {}'.format(out_file)) |
|
cv2.imwrite(out_file, out) |
|
|
|
cv2.destroyAllWindows() |
|
vs.cleanup() |
|
|