|
import argparse |
|
|
|
import numpy as np |
|
|
|
import imageio |
|
|
|
import torch |
|
|
|
from tqdm import tqdm |
|
|
|
import scipy |
|
import scipy.io |
|
import scipy.misc |
|
|
|
from lib.model_test import D2Net |
|
from lib.utils import preprocess_image |
|
from lib.pyramid import process_multiscale |
|
|
|
|
|
use_cuda = torch.cuda.is_available() |
|
device = torch.device("cuda:0" if use_cuda else "cpu") |
|
|
|
|
|
parser = argparse.ArgumentParser(description='Feature extraction script') |
|
|
|
parser.add_argument( |
|
'--image_list_file', type=str, required=True, |
|
help='path to a file containing a list of images to process' |
|
) |
|
|
|
parser.add_argument( |
|
'--preprocessing', type=str, default='caffe', |
|
help='image preprocessing (caffe or torch)' |
|
) |
|
parser.add_argument( |
|
'--model_file', type=str, default='models/d2_tf.pth', |
|
help='path to the full model' |
|
) |
|
|
|
parser.add_argument( |
|
'--max_edge', type=int, default=1600, |
|
help='maximum image size at network input' |
|
) |
|
parser.add_argument( |
|
'--max_sum_edges', type=int, default=2800, |
|
help='maximum sum of image sizes at network input' |
|
) |
|
|
|
parser.add_argument( |
|
'--output_extension', type=str, default='.d2-net', |
|
help='extension for the output' |
|
) |
|
parser.add_argument( |
|
'--output_type', type=str, default='npz', |
|
help='output file type (npz or mat)' |
|
) |
|
|
|
parser.add_argument( |
|
'--multiscale', dest='multiscale', action='store_true', |
|
help='extract multiscale features' |
|
) |
|
parser.set_defaults(multiscale=False) |
|
|
|
parser.add_argument( |
|
'--no-relu', dest='use_relu', action='store_false', |
|
help='remove ReLU after the dense feature extraction module' |
|
) |
|
parser.set_defaults(use_relu=True) |
|
|
|
args = parser.parse_args() |
|
|
|
print(args) |
|
|
|
|
|
model = D2Net( |
|
model_file=args.model_file, |
|
use_relu=args.use_relu, |
|
use_cuda=use_cuda |
|
) |
|
|
|
|
|
with open(args.image_list_file, 'r') as f: |
|
lines = f.readlines() |
|
for line in tqdm(lines, total=len(lines)): |
|
path = line.strip() |
|
|
|
image = imageio.imread(path) |
|
if len(image.shape) == 2: |
|
image = image[:, :, np.newaxis] |
|
image = np.repeat(image, 3, -1) |
|
|
|
|
|
resized_image = image |
|
if max(resized_image.shape) > args.max_edge: |
|
resized_image = scipy.misc.imresize( |
|
resized_image, |
|
args.max_edge / max(resized_image.shape) |
|
).astype('float') |
|
if sum(resized_image.shape[: 2]) > args.max_sum_edges: |
|
resized_image = scipy.misc.imresize( |
|
resized_image, |
|
args.max_sum_edges / sum(resized_image.shape[: 2]) |
|
).astype('float') |
|
|
|
fact_i = image.shape[0] / resized_image.shape[0] |
|
fact_j = image.shape[1] / resized_image.shape[1] |
|
|
|
input_image = preprocess_image( |
|
resized_image, |
|
preprocessing=args.preprocessing |
|
) |
|
with torch.no_grad(): |
|
if args.multiscale: |
|
keypoints, scores, descriptors = process_multiscale( |
|
torch.tensor( |
|
input_image[np.newaxis, :, :, :].astype(np.float32), |
|
device=device |
|
), |
|
model |
|
) |
|
else: |
|
keypoints, scores, descriptors = process_multiscale( |
|
torch.tensor( |
|
input_image[np.newaxis, :, :, :].astype(np.float32), |
|
device=device |
|
), |
|
model, |
|
scales=[1] |
|
) |
|
|
|
|
|
keypoints[:, 0] *= fact_i |
|
keypoints[:, 1] *= fact_j |
|
|
|
keypoints = keypoints[:, [1, 0, 2]] |
|
|
|
if args.output_type == 'npz': |
|
with open(path + args.output_extension, 'wb') as output_file: |
|
np.savez( |
|
output_file, |
|
keypoints=keypoints, |
|
scores=scores, |
|
descriptors=descriptors |
|
) |
|
elif args.output_type == 'mat': |
|
with open(path + args.output_extension, 'wb') as output_file: |
|
scipy.io.savemat( |
|
output_file, |
|
{ |
|
'keypoints': keypoints, |
|
'scores': scores, |
|
'descriptors': descriptors |
|
} |
|
) |
|
else: |
|
raise ValueError('Unknown output type.') |
|
|