Huiwenshi commited on
Commit
c4e8c30
1 Parent(s): 525400c

Delete folder ./third_party/weights with huggingface_hub

Browse files
third_party/weights/DUSt3R_ViTLarge_BaseDecoder_512_dpt/README.md DELETED
@@ -1,117 +0,0 @@
1
- ---
2
- tags:
3
- - vision
4
- ---
5
-
6
- ## DUSt3R
7
-
8
- # Model info
9
-
10
- Project page: https://dust3r.europe.naverlabs.com/
11
-
12
- # How to use
13
-
14
- Here's how to load the model (after [installing](https://github.com/naver/dust3r?tab=readme-ov-file#installation) the dust3r package):
15
-
16
- ```python
17
- from dust3r.model import AsymmetricCroCo3DStereo
18
- import torch
19
-
20
- model = AsymmetricCroCo3DStereo.from_pretrained("nielsr/DUSt3R_ViTLarge_BaseDecoder_512_dpt")
21
-
22
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
- model.to(device)
24
- ```
25
-
26
- Next, one can run inference as follows:
27
-
28
- ```
29
- from dust3r.inference import inference
30
- from dust3r.utils.image import load_images
31
- from dust3r.image_pairs import make_pairs
32
- from dust3r.cloud_opt import global_aligner, GlobalAlignerMode
33
-
34
- if __name__ == '__main__':
35
- batch_size = 1
36
- schedule = 'cosine'
37
- lr = 0.01
38
- niter = 300
39
-
40
- # load_images can take a list of images or a directory
41
- images = load_images(['croco/assets/Chateau1.png', 'croco/assets/Chateau2.png'], size=512)
42
- pairs = make_pairs(images, scene_graph='complete', prefilter=None, symmetrize=True)
43
- output = inference(pairs, model, device, batch_size=batch_size)
44
-
45
- # at this stage, you have the raw dust3r predictions
46
- view1, pred1 = output['view1'], output['pred1']
47
- view2, pred2 = output['view2'], output['pred2']
48
- # here, view1, pred1, view2, pred2 are dicts of lists of len(2)
49
- # -> because we symmetrize we have (im1, im2) and (im2, im1) pairs
50
- # in each view you have:
51
- # an integer image identifier: view1['idx'] and view2['idx']
52
- # the img: view1['img'] and view2['img']
53
- # the image shape: view1['true_shape'] and view2['true_shape']
54
- # an instance string output by the dataloader: view1['instance'] and view2['instance']
55
- # pred1 and pred2 contains the confidence values: pred1['conf'] and pred2['conf']
56
- # pred1 contains 3D points for view1['img'] in view1['img'] space: pred1['pts3d']
57
- # pred2 contains 3D points for view2['img'] in view1['img'] space: pred2['pts3d_in_other_view']
58
-
59
- # next we'll use the global_aligner to align the predictions
60
- # depending on your task, you may be fine with the raw output and not need it
61
- # with only two input images, you could use GlobalAlignerMode.PairViewer: it would just convert the output
62
- # if using GlobalAlignerMode.PairViewer, no need to run compute_global_alignment
63
- scene = global_aligner(output, device=device, mode=GlobalAlignerMode.PointCloudOptimizer)
64
- loss = scene.compute_global_alignment(init="mst", niter=niter, schedule=schedule, lr=lr)
65
-
66
- # retrieve useful values from scene:
67
- imgs = scene.imgs
68
- focals = scene.get_focals()
69
- poses = scene.get_im_poses()
70
- pts3d = scene.get_pts3d()
71
- confidence_masks = scene.get_masks()
72
-
73
- # visualize reconstruction
74
- scene.show()
75
-
76
- # find 2D-2D matches between the two images
77
- from dust3r.utils.geometry import find_reciprocal_matches, xy_grid
78
- pts2d_list, pts3d_list = [], []
79
- for i in range(2):
80
- conf_i = confidence_masks[i].cpu().numpy()
81
- pts2d_list.append(xy_grid(*imgs[i].shape[:2][::-1])[conf_i]) # imgs[i].shape[:2] = (H, W)
82
- pts3d_list.append(pts3d[i].detach().cpu().numpy()[conf_i])
83
- reciprocal_in_P2, nn2_in_P1, num_matches = find_reciprocal_matches(*pts3d_list)
84
- print(f'found {num_matches} matches')
85
- matches_im1 = pts2d_list[1][reciprocal_in_P2]
86
- matches_im0 = pts2d_list[0][nn2_in_P1][reciprocal_in_P2]
87
-
88
- # visualize a few matches
89
- import numpy as np
90
- from matplotlib import pyplot as pl
91
- n_viz = 10
92
- match_idx_to_viz = np.round(np.linspace(0, num_matches-1, n_viz)).astype(int)
93
- viz_matches_im0, viz_matches_im1 = matches_im0[match_idx_to_viz], matches_im1[match_idx_to_viz]
94
-
95
- H0, W0, H1, W1 = *imgs[0].shape[:2], *imgs[1].shape[:2]
96
- img0 = np.pad(imgs[0], ((0, max(H1 - H0, 0)), (0, 0), (0, 0)), 'constant', constant_values=0)
97
- img1 = np.pad(imgs[1], ((0, max(H0 - H1, 0)), (0, 0), (0, 0)), 'constant', constant_values=0)
98
- img = np.concatenate((img0, img1), axis=1)
99
- pl.figure()
100
- pl.imshow(img)
101
- cmap = pl.get_cmap('jet')
102
- for i in range(n_viz):
103
- (x0, y0), (x1, y1) = viz_matches_im0[i].T, viz_matches_im1[i].T
104
- pl.plot([x0, x1 + W0], [y0, y1], '-+', color=cmap(i / (n_viz - 1)), scalex=False, scaley=False)
105
- pl.show(block=True)
106
-
107
- ```
108
-
109
- ### BibTeX entry and citation info
110
-
111
- ```bibtex
112
- @journal{dust3r2023,
113
- title={{DUSt3R: Geometric 3D Vision Made Easy}},
114
- author={{Wang, Shuzhe and Leroy, Vincent and Cabon, Yohann and Chidlovskii, Boris and Revaud Jerome}},
115
- journal={arXiv preprint 2312.14132},
116
- year={2023}}
117
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
third_party/weights/DUSt3R_ViTLarge_BaseDecoder_512_dpt/config.json DELETED
@@ -1,28 +0,0 @@
1
- {
2
- "output_mode": "pts3d",
3
- "head_type": "dpt",
4
- "depth_mode": [
5
- "exp",
6
- -Infinity,
7
- Infinity
8
- ],
9
- "conf_mode": [
10
- "exp",
11
- 1,
12
- Infinity
13
- ],
14
- "freeze": "none",
15
- "landscape_only": false,
16
- "patch_embed_cls": "PatchEmbedDust3R",
17
- "enc_depth": 24,
18
- "dec_depth": 12,
19
- "enc_embed_dim": 1024,
20
- "dec_embed_dim": 768,
21
- "enc_num_heads": 16,
22
- "dec_num_heads": 12,
23
- "pos_embed": "RoPE100",
24
- "img_size": [
25
- 512,
26
- 512
27
- ]
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
third_party/weights/DUSt3R_ViTLarge_BaseDecoder_512_dpt/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdbd4c6d7e91df3f3dc3551a0aadc7983bc85ed9e02794fba633eb1ed10174b5
3
- size 2284790056