Spaces:
Running
Running
hocherie
commited on
Commit
·
e150a4c
1
Parent(s):
a293d60
upload files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- Dockerfile +35 -0
- LICENSE +433 -0
- app.py +147 -0
- config.yaml +36 -0
- get_weights.sh +9 -0
- mapper/__init__.py +30 -0
- mapper/callbacks.py +105 -0
- mapper/conf/data/kitti.yaml +40 -0
- mapper/conf/data/mia.yaml +44 -0
- mapper/conf/data/nuscenes.yaml +38 -0
- mapper/conf/mapper_kitti.yaml +23 -0
- mapper/conf/mapper_nuscenes.yaml +26 -0
- mapper/conf/model/image_encoder/dino.yaml +5 -0
- mapper/conf/model/image_encoder/resnet.yaml +12 -0
- mapper/conf/model/mapper.yaml +15 -0
- mapper/conf/pretrain.yaml +24 -0
- mapper/conf/pretrain_resnet.yaml +26 -0
- mapper/conf/training.yaml +30 -0
- mapper/data/__init__.py +7 -0
- mapper/data/base.py +19 -0
- mapper/data/image.py +140 -0
- mapper/data/kitti/data_module.py +32 -0
- mapper/data/kitti/dataset.py +317 -0
- mapper/data/kitti/transform.py +149 -0
- mapper/data/mapillary/data_module.py +317 -0
- mapper/data/mapillary/dataset.py +255 -0
- mapper/data/module.py +64 -0
- mapper/data/nuscenes/data_module.py +33 -0
- mapper/data/nuscenes/dataset.py +207 -0
- mapper/data/nuscenes/splits_roddick.py +197 -0
- mapper/data/nuscenes/utils.py +214 -0
- mapper/data/schema.py +75 -0
- mapper/data/sequential.py +45 -0
- mapper/data/torch.py +102 -0
- mapper/data/utils.py +21 -0
- mapper/mapper.py +112 -0
- mapper/models/__init__.py +28 -0
- mapper/models/base.py +59 -0
- mapper/models/bev_projection.py +95 -0
- mapper/models/dinov2/__init__.py +6 -0
- mapper/models/dinov2/configs/__init__.py +22 -0
- mapper/models/dinov2/configs/eval/vitb14_pretrain.yaml +6 -0
- mapper/models/dinov2/configs/eval/vitg14_pretrain.yaml +7 -0
- mapper/models/dinov2/configs/eval/vitl14_pretrain.yaml +6 -0
- mapper/models/dinov2/configs/eval/vits14_pretrain.yaml +6 -0
- mapper/models/dinov2/configs/eval/vits14_reg4_pretrain.yaml +9 -0
- mapper/models/dinov2/configs/ssl_default_config.yaml +118 -0
- mapper/models/dinov2/configs/train/vitg14.yaml +26 -0
- mapper/models/dinov2/configs/train/vitl14.yaml +26 -0
- mapper/models/dinov2/configs/train/vitl16_short.yaml +6 -0
Dockerfile
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM pytorch/pytorch:2.3.1-cuda11.8-cudnn8-runtime
|
2 |
+
|
3 |
+
# Install dependencies
|
4 |
+
RUN apt-get update && apt-get install -y \
|
5 |
+
git \
|
6 |
+
wget \
|
7 |
+
unzip \
|
8 |
+
vim \
|
9 |
+
ffmpeg \
|
10 |
+
libsm6 \
|
11 |
+
libxext6
|
12 |
+
|
13 |
+
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
|
14 |
+
|
15 |
+
RUN useradd -m -u 1000 user
|
16 |
+
|
17 |
+
USER user
|
18 |
+
|
19 |
+
ENV HOME=/home/user \
|
20 |
+
PATH=/home/user/.local/bin:$PATH
|
21 |
+
|
22 |
+
WORKDIR $HOME/mapper
|
23 |
+
|
24 |
+
RUN pip install --no-cache-dir gradio[oauth]==4.44.0 "uvicorn>=0.14.0" spaces
|
25 |
+
|
26 |
+
COPY --chown=user . $HOME/mapper
|
27 |
+
|
28 |
+
# Install Python dependencies
|
29 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
30 |
+
RUN pip install fastapi==0.115.0
|
31 |
+
# Get Weights
|
32 |
+
RUN bash get_weights.sh
|
33 |
+
|
34 |
+
# Start the app
|
35 |
+
CMD ["python", "app.py"]
|
LICENSE
ADDED
@@ -0,0 +1,433 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Attribution-ShareAlike 4.0 International License
|
2 |
+
|
3 |
+
Copyright (c) 2024,
|
4 |
+
Cherie Ho* · Jiaye (Tony) Zou* · Omar Alama*
|
5 |
+
Sai Mitheran Jagadesh Kumar · Benjamin Chiang · Taneesh Gupta · Chen Wang
|
6 |
+
Nikhil Keetha · Katia Sycara · Sebastian Scherer
|
7 |
+
Carnegie Mellon University
|
8 |
+
|
9 |
+
=======================================================================
|
10 |
+
|
11 |
+
Creative Commons Corporation ("Creative Commons") is not a law firm and
|
12 |
+
does not provide legal services or legal advice. Distribution of
|
13 |
+
Creative Commons public licenses does not create a lawyer-client or
|
14 |
+
other relationship. Creative Commons makes its licenses and related
|
15 |
+
information available on an "as-is" basis. Creative Commons gives no
|
16 |
+
warranties regarding its licenses, any material licensed under their
|
17 |
+
terms and conditions, or any related information. Creative Commons
|
18 |
+
disclaims all liability for damages resulting from their use to the
|
19 |
+
fullest extent possible.
|
20 |
+
|
21 |
+
Using Creative Commons Public Licenses
|
22 |
+
|
23 |
+
Creative Commons public licenses provide a standard set of terms and
|
24 |
+
conditions that creators and other rights holders may use to share
|
25 |
+
original works of authorship and other material subject to copyright
|
26 |
+
and certain other rights specified in the public license below. The
|
27 |
+
following considerations are for informational purposes only, are not
|
28 |
+
exhaustive, and do not form part of our licenses.
|
29 |
+
|
30 |
+
Considerations for licensors: Our public licenses are
|
31 |
+
intended for use by those authorized to give the public
|
32 |
+
permission to use material in ways otherwise restricted by
|
33 |
+
copyright and certain other rights. Our licenses are
|
34 |
+
irrevocable. Licensors should read and understand the terms
|
35 |
+
and conditions of the license they choose before applying it.
|
36 |
+
Licensors should also secure all rights necessary before
|
37 |
+
applying our licenses so that the public can reuse the
|
38 |
+
material as expected. Licensors should clearly mark any
|
39 |
+
material not subject to the license. This includes other CC-
|
40 |
+
licensed material, or material used under an exception or
|
41 |
+
limitation to copyright. More considerations for licensors:
|
42 |
+
wiki.creativecommons.org/Considerations_for_licensors
|
43 |
+
|
44 |
+
Considerations for the public: By using one of our public
|
45 |
+
licenses, a licensor grants the public permission to use the
|
46 |
+
licensed material under specified terms and conditions. If
|
47 |
+
the licensor's permission is not necessary for any reason--for
|
48 |
+
example, because of any applicable exception or limitation to
|
49 |
+
copyright--then that use is not regulated by the license. Our
|
50 |
+
licenses grant only permissions under copyright and certain
|
51 |
+
other rights that a licensor has authority to grant. Use of
|
52 |
+
the licensed material may still be restricted for other
|
53 |
+
reasons, including because others have copyright or other
|
54 |
+
rights in the material. A licensor may make special requests,
|
55 |
+
such as asking that all changes be marked or described.
|
56 |
+
Although not required by our licenses, you are encouraged to
|
57 |
+
respect those requests where reasonable. More_considerations
|
58 |
+
for the public:
|
59 |
+
wiki.creativecommons.org/Considerations_for_licensees
|
60 |
+
|
61 |
+
=======================================================================
|
62 |
+
|
63 |
+
Creative Commons Attribution-ShareAlike 4.0 International Public
|
64 |
+
License
|
65 |
+
|
66 |
+
By exercising the Licensed Rights (defined below), You accept and agree
|
67 |
+
to be bound by the terms and conditions of this Creative Commons
|
68 |
+
Attribution-ShareAlike 4.0 International Public License ("Public
|
69 |
+
License"). To the extent this Public License may be interpreted as a
|
70 |
+
contract, You are granted the Licensed Rights in consideration of Your
|
71 |
+
acceptance of these terms and conditions, and the Licensor grants You
|
72 |
+
such rights in consideration of benefits the Licensor receives from
|
73 |
+
making the Licensed Material available under these terms and
|
74 |
+
conditions.
|
75 |
+
|
76 |
+
|
77 |
+
Section 1 -- Definitions.
|
78 |
+
|
79 |
+
a. Adapted Material means material subject to Copyright and Similar
|
80 |
+
Rights that is derived from or based upon the Licensed Material
|
81 |
+
and in which the Licensed Material is translated, altered,
|
82 |
+
arranged, transformed, or otherwise modified in a manner requiring
|
83 |
+
permission under the Copyright and Similar Rights held by the
|
84 |
+
Licensor. For purposes of this Public License, where the Licensed
|
85 |
+
Material is a musical work, performance, or sound recording,
|
86 |
+
Adapted Material is always produced where the Licensed Material is
|
87 |
+
synched in timed relation with a moving image.
|
88 |
+
|
89 |
+
b. Adapter's License means the license You apply to Your Copyright
|
90 |
+
and Similar Rights in Your contributions to Adapted Material in
|
91 |
+
accordance with the terms and conditions of this Public License.
|
92 |
+
|
93 |
+
c. BY-SA Compatible License means a license listed at
|
94 |
+
creativecommons.org/compatiblelicenses, approved by Creative
|
95 |
+
Commons as essentially the equivalent of this Public License.
|
96 |
+
|
97 |
+
d. Copyright and Similar Rights means copyright and/or similar rights
|
98 |
+
closely related to copyright including, without limitation,
|
99 |
+
performance, broadcast, sound recording, and Sui Generis Database
|
100 |
+
Rights, without regard to how the rights are labeled or
|
101 |
+
categorized. For purposes of this Public License, the rights
|
102 |
+
specified in Section 2(b)(1)-(2) are not Copyright and Similar
|
103 |
+
Rights.
|
104 |
+
|
105 |
+
e. Effective Technological Measures means those measures that, in the
|
106 |
+
absence of proper authority, may not be circumvented under laws
|
107 |
+
fulfilling obligations under Article 11 of the WIPO Copyright
|
108 |
+
Treaty adopted on December 20, 1996, and/or similar international
|
109 |
+
agreements.
|
110 |
+
|
111 |
+
f. Exceptions and Limitations means fair use, fair dealing, and/or
|
112 |
+
any other exception or limitation to Copyright and Similar Rights
|
113 |
+
that applies to Your use of the Licensed Material.
|
114 |
+
|
115 |
+
g. License Elements means the license attributes listed in the name
|
116 |
+
of a Creative Commons Public License. The License Elements of this
|
117 |
+
Public License are Attribution and ShareAlike.
|
118 |
+
|
119 |
+
h. Licensed Material means the artistic or literary work, database,
|
120 |
+
or other material to which the Licensor applied this Public
|
121 |
+
License.
|
122 |
+
|
123 |
+
i. Licensed Rights means the rights granted to You subject to the
|
124 |
+
terms and conditions of this Public License, which are limited to
|
125 |
+
all Copyright and Similar Rights that apply to Your use of the
|
126 |
+
Licensed Material and that the Licensor has authority to license.
|
127 |
+
|
128 |
+
j. Licensor means the individual(s) or entity(ies) granting rights
|
129 |
+
under this Public License.
|
130 |
+
|
131 |
+
k. Share means to provide material to the public by any means or
|
132 |
+
process that requires permission under the Licensed Rights, such
|
133 |
+
as reproduction, public display, public performance, distribution,
|
134 |
+
dissemination, communication, or importation, and to make material
|
135 |
+
available to the public including in ways that members of the
|
136 |
+
public may access the material from a place and at a time
|
137 |
+
individually chosen by them.
|
138 |
+
|
139 |
+
l. Sui Generis Database Rights means rights other than copyright
|
140 |
+
resulting from Directive 96/9/EC of the European Parliament and of
|
141 |
+
the Council of 11 March 1996 on the legal protection of databases,
|
142 |
+
as amended and/or succeeded, as well as other essentially
|
143 |
+
equivalent rights anywhere in the world.
|
144 |
+
|
145 |
+
m. You means the individual or entity exercising the Licensed Rights
|
146 |
+
under this Public License. Your has a corresponding meaning.
|
147 |
+
|
148 |
+
|
149 |
+
Section 2 -- Scope.
|
150 |
+
|
151 |
+
a. License grant.
|
152 |
+
|
153 |
+
1. Subject to the terms and conditions of this Public License,
|
154 |
+
the Licensor hereby grants You a worldwide, royalty-free,
|
155 |
+
non-sublicensable, non-exclusive, irrevocable license to
|
156 |
+
exercise the Licensed Rights in the Licensed Material to:
|
157 |
+
|
158 |
+
a. reproduce and Share the Licensed Material, in whole or
|
159 |
+
in part; and
|
160 |
+
|
161 |
+
b. produce, reproduce, and Share Adapted Material.
|
162 |
+
|
163 |
+
2. Exceptions and Limitations. For the avoidance of doubt, where
|
164 |
+
Exceptions and Limitations apply to Your use, this Public
|
165 |
+
License does not apply, and You do not need to comply with
|
166 |
+
its terms and conditions.
|
167 |
+
|
168 |
+
3. Term. The term of this Public License is specified in Section
|
169 |
+
6(a).
|
170 |
+
|
171 |
+
4. Media and formats; technical modifications allowed. The
|
172 |
+
Licensor authorizes You to exercise the Licensed Rights in
|
173 |
+
all media and formats whether now known or hereafter created,
|
174 |
+
and to make technical modifications necessary to do so. The
|
175 |
+
Licensor waives and/or agrees not to assert any right or
|
176 |
+
authority to forbid You from making technical modifications
|
177 |
+
necessary to exercise the Licensed Rights, including
|
178 |
+
technical modifications necessary to circumvent Effective
|
179 |
+
Technological Measures. For purposes of this Public License,
|
180 |
+
simply making modifications authorized by this Section 2(a)
|
181 |
+
(4) never produces Adapted Material.
|
182 |
+
|
183 |
+
5. Downstream recipients.
|
184 |
+
|
185 |
+
a. Offer from the Licensor -- Licensed Material. Every
|
186 |
+
recipient of the Licensed Material automatically
|
187 |
+
receives an offer from the Licensor to exercise the
|
188 |
+
Licensed Rights under the terms and conditions of this
|
189 |
+
Public License.
|
190 |
+
|
191 |
+
b. Additional offer from the Licensor -- Adapted Material.
|
192 |
+
Every recipient of Adapted Material from You
|
193 |
+
automatically receives an offer from the Licensor to
|
194 |
+
exercise the Licensed Rights in the Adapted Material
|
195 |
+
under the conditions of the Adapter's License You apply.
|
196 |
+
|
197 |
+
c. No downstream restrictions. You may not offer or impose
|
198 |
+
any additional or different terms or conditions on, or
|
199 |
+
apply any Effective Technological Measures to, the
|
200 |
+
Licensed Material if doing so restricts exercise of the
|
201 |
+
Licensed Rights by any recipient of the Licensed
|
202 |
+
Material.
|
203 |
+
|
204 |
+
6. No endorsement. Nothing in this Public License constitutes or
|
205 |
+
may be construed as permission to assert or imply that You
|
206 |
+
are, or that Your use of the Licensed Material is, connected
|
207 |
+
with, or sponsored, endorsed, or granted official status by,
|
208 |
+
the Licensor or others designated to receive attribution as
|
209 |
+
provided in Section 3(a)(1)(A)(i).
|
210 |
+
|
211 |
+
b. Other rights.
|
212 |
+
|
213 |
+
1. Moral rights, such as the right of integrity, are not
|
214 |
+
licensed under this Public License, nor are publicity,
|
215 |
+
privacy, and/or other similar personality rights; however, to
|
216 |
+
the extent possible, the Licensor waives and/or agrees not to
|
217 |
+
assert any such rights held by the Licensor to the limited
|
218 |
+
extent necessary to allow You to exercise the Licensed
|
219 |
+
Rights, but not otherwise.
|
220 |
+
|
221 |
+
2. Patent and trademark rights are not licensed under this
|
222 |
+
Public License.
|
223 |
+
|
224 |
+
3. To the extent possible, the Licensor waives any right to
|
225 |
+
collect royalties from You for the exercise of the Licensed
|
226 |
+
Rights, whether directly or through a collecting society
|
227 |
+
under any voluntary or waivable statutory or compulsory
|
228 |
+
licensing scheme. In all other cases the Licensor expressly
|
229 |
+
reserves any right to collect such royalties.
|
230 |
+
|
231 |
+
|
232 |
+
Section 3 -- License Conditions.
|
233 |
+
|
234 |
+
Your exercise of the Licensed Rights is expressly made subject to the
|
235 |
+
following conditions.
|
236 |
+
|
237 |
+
a. Attribution.
|
238 |
+
|
239 |
+
1. If You Share the Licensed Material (including in modified
|
240 |
+
form), You must:
|
241 |
+
|
242 |
+
a. retain the following if it is supplied by the Licensor
|
243 |
+
with the Licensed Material:
|
244 |
+
|
245 |
+
i. identification of the creator(s) of the Licensed
|
246 |
+
Material and any others designated to receive
|
247 |
+
attribution, in any reasonable manner requested by
|
248 |
+
the Licensor (including by pseudonym if
|
249 |
+
designated);
|
250 |
+
|
251 |
+
ii. a copyright notice;
|
252 |
+
|
253 |
+
iii. a notice that refers to this Public License;
|
254 |
+
|
255 |
+
iv. a notice that refers to the disclaimer of
|
256 |
+
warranties;
|
257 |
+
|
258 |
+
v. a URI or hyperlink to the Licensed Material to the
|
259 |
+
extent reasonably practicable;
|
260 |
+
|
261 |
+
b. indicate if You modified the Licensed Material and
|
262 |
+
retain an indication of any previous modifications; and
|
263 |
+
|
264 |
+
c. indicate the Licensed Material is licensed under this
|
265 |
+
Public License, and include the text of, or the URI or
|
266 |
+
hyperlink to, this Public License.
|
267 |
+
|
268 |
+
2. You may satisfy the conditions in Section 3(a)(1) in any
|
269 |
+
reasonable manner based on the medium, means, and context in
|
270 |
+
which You Share the Licensed Material. For example, it may be
|
271 |
+
reasonable to satisfy the conditions by providing a URI or
|
272 |
+
hyperlink to a resource that includes the required
|
273 |
+
information.
|
274 |
+
|
275 |
+
3. If requested by the Licensor, You must remove any of the
|
276 |
+
information required by Section 3(a)(1)(A) to the extent
|
277 |
+
reasonably practicable.
|
278 |
+
|
279 |
+
b. ShareAlike.
|
280 |
+
|
281 |
+
In addition to the conditions in Section 3(a), if You Share
|
282 |
+
Adapted Material You produce, the following conditions also apply.
|
283 |
+
|
284 |
+
1. The Adapter's License You apply must be a Creative Commons
|
285 |
+
license with the same License Elements, this version or
|
286 |
+
later, or a BY-SA Compatible License.
|
287 |
+
|
288 |
+
2. You must include the text of, or the URI or hyperlink to, the
|
289 |
+
Adapter's License You apply. You may satisfy this condition
|
290 |
+
in any reasonable manner based on the medium, means, and
|
291 |
+
context in which You Share Adapted Material.
|
292 |
+
|
293 |
+
3. You may not offer or impose any additional or different terms
|
294 |
+
or conditions on, or apply any Effective Technological
|
295 |
+
Measures to, Adapted Material that restrict exercise of the
|
296 |
+
rights granted under the Adapter's License You apply.
|
297 |
+
|
298 |
+
|
299 |
+
Section 4 -- Sui Generis Database Rights.
|
300 |
+
|
301 |
+
Where the Licensed Rights include Sui Generis Database Rights that
|
302 |
+
apply to Your use of the Licensed Material:
|
303 |
+
|
304 |
+
a. for the avoidance of doubt, Section 2(a)(1) grants You the right
|
305 |
+
to extract, reuse, reproduce, and Share all or a substantial
|
306 |
+
portion of the contents of the database;
|
307 |
+
|
308 |
+
b. if You include all or a substantial portion of the database
|
309 |
+
contents in a database in which You have Sui Generis Database
|
310 |
+
Rights, then the database in which You have Sui Generis Database
|
311 |
+
Rights (but not its individual contents) is Adapted Material,
|
312 |
+
|
313 |
+
including for purposes of Section 3(b); and
|
314 |
+
c. You must comply with the conditions in Section 3(a) if You Share
|
315 |
+
all or a substantial portion of the contents of the database.
|
316 |
+
|
317 |
+
For the avoidance of doubt, this Section 4 supplements and does not
|
318 |
+
replace Your obligations under this Public License where the Licensed
|
319 |
+
Rights include other Copyright and Similar Rights.
|
320 |
+
|
321 |
+
|
322 |
+
Section 5 -- Disclaimer of Warranties and Limitation of Liability.
|
323 |
+
|
324 |
+
a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
|
325 |
+
EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
|
326 |
+
AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
|
327 |
+
ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
|
328 |
+
IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
|
329 |
+
WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
330 |
+
PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
|
331 |
+
ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
|
332 |
+
KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
|
333 |
+
ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
|
334 |
+
|
335 |
+
b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
|
336 |
+
TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
|
337 |
+
NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
|
338 |
+
INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
|
339 |
+
COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
|
340 |
+
USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
|
341 |
+
ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
|
342 |
+
DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
|
343 |
+
IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
|
344 |
+
|
345 |
+
c. The disclaimer of warranties and limitation of liability provided
|
346 |
+
above shall be interpreted in a manner that, to the extent
|
347 |
+
possible, most closely approximates an absolute disclaimer and
|
348 |
+
waiver of all liability.
|
349 |
+
|
350 |
+
|
351 |
+
Section 6 -- Term and Termination.
|
352 |
+
|
353 |
+
a. This Public License applies for the term of the Copyright and
|
354 |
+
Similar Rights licensed here. However, if You fail to comply with
|
355 |
+
this Public License, then Your rights under this Public License
|
356 |
+
terminate automatically.
|
357 |
+
|
358 |
+
b. Where Your right to use the Licensed Material has terminated under
|
359 |
+
Section 6(a), it reinstates:
|
360 |
+
|
361 |
+
1. automatically as of the date the violation is cured, provided
|
362 |
+
it is cured within 30 days of Your discovery of the
|
363 |
+
violation; or
|
364 |
+
|
365 |
+
2. upon express reinstatement by the Licensor.
|
366 |
+
|
367 |
+
For the avoidance of doubt, this Section 6(b) does not affect any
|
368 |
+
right the Licensor may have to seek remedies for Your violations
|
369 |
+
of this Public License.
|
370 |
+
|
371 |
+
c. For the avoidance of doubt, the Licensor may also offer the
|
372 |
+
Licensed Material under separate terms or conditions or stop
|
373 |
+
distributing the Licensed Material at any time; however, doing so
|
374 |
+
will not terminate this Public License.
|
375 |
+
|
376 |
+
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
|
377 |
+
License.
|
378 |
+
|
379 |
+
|
380 |
+
Section 7 -- Other Terms and Conditions.
|
381 |
+
|
382 |
+
a. The Licensor shall not be bound by any additional or different
|
383 |
+
terms or conditions communicated by You unless expressly agreed.
|
384 |
+
|
385 |
+
b. Any arrangements, understandings, or agreements regarding the
|
386 |
+
Licensed Material not stated herein are separate from and
|
387 |
+
independent of the terms and conditions of this Public License.
|
388 |
+
|
389 |
+
|
390 |
+
Section 8 -- Interpretation.
|
391 |
+
|
392 |
+
a. For the avoidance of doubt, this Public License does not, and
|
393 |
+
shall not be interpreted to, reduce, limit, restrict, or impose
|
394 |
+
conditions on any use of the Licensed Material that could lawfully
|
395 |
+
be made without permission under this Public License.
|
396 |
+
|
397 |
+
b. To the extent possible, if any provision of this Public License is
|
398 |
+
deemed unenforceable, it shall be automatically reformed to the
|
399 |
+
minimum extent necessary to make it enforceable. If the provision
|
400 |
+
cannot be reformed, it shall be severed from this Public License
|
401 |
+
without affecting the enforceability of the remaining terms and
|
402 |
+
conditions.
|
403 |
+
|
404 |
+
c. No term or condition of this Public License will be waived and no
|
405 |
+
failure to comply consented to unless expressly agreed to by the
|
406 |
+
Licensor.
|
407 |
+
|
408 |
+
d. Nothing in this Public License constitutes or may be interpreted
|
409 |
+
as a limitation upon, or waiver of, any privileges and immunities
|
410 |
+
that apply to the Licensor or You, including from the legal
|
411 |
+
processes of any jurisdiction or authority.
|
412 |
+
|
413 |
+
|
414 |
+
=======================================================================
|
415 |
+
|
416 |
+
Creative Commons is not a party to its public
|
417 |
+
licenses. Notwithstanding, Creative Commons may elect to apply one of
|
418 |
+
its public licenses to material it publishes and in those instances
|
419 |
+
will be considered the “Licensor.” The text of the Creative Commons
|
420 |
+
public licenses is dedicated to the public domain under the CC0 Public
|
421 |
+
Domain Dedication. Except for the limited purpose of indicating that
|
422 |
+
material is shared under a Creative Commons public license or as
|
423 |
+
otherwise permitted by the Creative Commons policies published at
|
424 |
+
creativecommons.org/policies, Creative Commons does not authorize the
|
425 |
+
use of the trademark "Creative Commons" or any other trademark or logo
|
426 |
+
of Creative Commons without its prior written consent including,
|
427 |
+
without limitation, in connection with any unauthorized modifications
|
428 |
+
to any of its public licenses or any other arrangements,
|
429 |
+
understandings, or agreements concerning use of licensed material. For
|
430 |
+
the avoidance of doubt, this paragraph does not form part of the
|
431 |
+
public licenses.
|
432 |
+
|
433 |
+
Creative Commons may be contacted at creativecommons.org.
|
app.py
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from matplotlib import pyplot as plt
|
3 |
+
from mapper.utils.io import read_image
|
4 |
+
from mapper.utils.exif import EXIF
|
5 |
+
from mapper.utils.wrappers import Camera
|
6 |
+
from mapper.data.image import rectify_image, pad_image, resize_image
|
7 |
+
from mapper.utils.viz_2d import one_hot_argmax_to_rgb, plot_images
|
8 |
+
from mapper.module import GenericModule
|
9 |
+
from perspective2d import PerspectiveFields
|
10 |
+
import torch
|
11 |
+
import numpy as np
|
12 |
+
from typing import Optional, Tuple
|
13 |
+
from omegaconf import OmegaConf
|
14 |
+
|
15 |
+
description = """
|
16 |
+
<h1 align="center">
|
17 |
+
<ins>MapItAnywhere (MIA) </ins>
|
18 |
+
<br>
|
19 |
+
Empowering Bird’s Eye View Mapping using Large-scale Public Data
|
20 |
+
<br>
|
21 |
+
<h3 align="center">
|
22 |
+
<a href="https://mapitanywhere.github.io" target="_blank">Project Page</a> |
|
23 |
+
<a href="https://arxiv.org/abs/2109.08203" target="_blank">Paper</a> |
|
24 |
+
<a href="https://github.com/MapItAnywhere/MapItAnywhere" target="_blank">Code</a>
|
25 |
+
</h3>
|
26 |
+
<p align="center">
|
27 |
+
Mapper generates birds-eye-view maps from in-the-wild monocular first-person view images. You can try our demo by uploading your images or using the examples provided. Tip: You can also try out images across the world using <a href="https://www.mapillary.com/app" target="_blank">Mapillary</a> 😉 Also try out some examples that are taken in cities we have not trained on!
|
28 |
+
</p>
|
29 |
+
"""
|
30 |
+
|
31 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
32 |
+
|
33 |
+
cfg = OmegaConf.load("config.yaml")
|
34 |
+
|
35 |
+
class ImageCalibrator(PerspectiveFields):
|
36 |
+
def __init__(self, version: str = "Paramnet-360Cities-edina-centered"):
|
37 |
+
super().__init__(version)
|
38 |
+
self.eval()
|
39 |
+
|
40 |
+
def run(
|
41 |
+
self,
|
42 |
+
image_rgb: np.ndarray,
|
43 |
+
focal_length: Optional[float] = None,
|
44 |
+
exif: Optional[EXIF] = None,
|
45 |
+
) -> Tuple[Tuple[float, float], Camera]:
|
46 |
+
h, w, *_ = image_rgb.shape
|
47 |
+
if focal_length is None and exif is not None:
|
48 |
+
_, focal_ratio = exif.extract_focal()
|
49 |
+
if focal_ratio != 0:
|
50 |
+
focal_length = focal_ratio * max(h, w)
|
51 |
+
calib = self.inference(img_bgr=image_rgb[..., ::-1])
|
52 |
+
roll_pitch = (calib["pred_roll"].item(), calib["pred_pitch"].item())
|
53 |
+
if focal_length is None:
|
54 |
+
vfov = calib["pred_vfov"].item()
|
55 |
+
focal_length = h / 2 / np.tan(np.deg2rad(vfov) / 2)
|
56 |
+
|
57 |
+
camera = Camera.from_dict(
|
58 |
+
{
|
59 |
+
"model": "SIMPLE_PINHOLE",
|
60 |
+
"width": w,
|
61 |
+
"height": h,
|
62 |
+
"params": [focal_length, w / 2 + 0.5, h / 2 + 0.5],
|
63 |
+
}
|
64 |
+
)
|
65 |
+
return roll_pitch, camera
|
66 |
+
|
67 |
+
def preprocess_pipeline(image, roll_pitch, camera):
|
68 |
+
image = torch.from_numpy(image).float() / 255
|
69 |
+
image = image.permute(2, 0, 1).to(device)
|
70 |
+
camera = camera.to(device)
|
71 |
+
|
72 |
+
image, valid = rectify_image(image, camera.float(), -roll_pitch[0], -roll_pitch[1])
|
73 |
+
|
74 |
+
roll_pitch *= 0
|
75 |
+
|
76 |
+
image, _, camera, valid = resize_image(
|
77 |
+
image=image,
|
78 |
+
size=512,
|
79 |
+
camera=camera,
|
80 |
+
fn=max,
|
81 |
+
valid=valid
|
82 |
+
)
|
83 |
+
|
84 |
+
# image, valid, camera = pad_image(
|
85 |
+
# image, 512, camera, valid
|
86 |
+
# )
|
87 |
+
|
88 |
+
camera = torch.stack([camera])
|
89 |
+
|
90 |
+
return {
|
91 |
+
"image": image.unsqueeze(0).to(device),
|
92 |
+
"valid": valid.unsqueeze(0).to(device),
|
93 |
+
"camera": camera.float().to(device),
|
94 |
+
}
|
95 |
+
|
96 |
+
|
97 |
+
calibrator = ImageCalibrator().to(device)
|
98 |
+
model = GenericModule(cfg)
|
99 |
+
model = model.load_from_checkpoint("trained_weights/mapper-excl-ood.ckpt", strict=False, cfg=cfg)
|
100 |
+
model = model.to(device)
|
101 |
+
model = model.eval()
|
102 |
+
|
103 |
+
def run(input_img):
|
104 |
+
image_path = input_img.name
|
105 |
+
|
106 |
+
image = read_image(image_path)
|
107 |
+
with open(image_path, "rb") as fid:
|
108 |
+
exif = EXIF(fid, lambda: image.shape[:2])
|
109 |
+
|
110 |
+
gravity, camera = calibrator.run(image, exif=exif)
|
111 |
+
|
112 |
+
data = preprocess_pipeline(image, gravity, camera)
|
113 |
+
res = model(data)
|
114 |
+
|
115 |
+
prediction = res['output']
|
116 |
+
rgb_prediction = one_hot_argmax_to_rgb(prediction, 6).squeeze(0).permute(1, 2, 0).cpu().long().numpy()
|
117 |
+
valid = res['valid_bev'].squeeze(0)[..., :-1]
|
118 |
+
rgb_prediction[~valid.cpu().numpy()] = 255
|
119 |
+
|
120 |
+
# TODO: add legend here
|
121 |
+
|
122 |
+
plot_images([image, rgb_prediction], titles=["Input Image", "Top-Down Prediction"], pad=2, adaptive=True)
|
123 |
+
|
124 |
+
return plt.gcf()
|
125 |
+
|
126 |
+
|
127 |
+
examples = [
|
128 |
+
["examples/left_crossing.jpg"],
|
129 |
+
["examples/crossing.jpg"],
|
130 |
+
["examples/two_roads.jpg"],
|
131 |
+
["examples/japan_narrow_road.jpeg"],
|
132 |
+
["examples/zurich_crossing.jpg"],
|
133 |
+
["examples/night_road.jpg"],
|
134 |
+
["examples/night_crossing.jpg"],
|
135 |
+
]
|
136 |
+
|
137 |
+
demo = gr.Interface(
|
138 |
+
fn=run,
|
139 |
+
inputs=[
|
140 |
+
gr.File(file_types=["image"], label="Input Image")
|
141 |
+
],
|
142 |
+
outputs=[
|
143 |
+
gr.Plot(label="Prediction", format="png"),
|
144 |
+
],
|
145 |
+
description=description,
|
146 |
+
examples=examples)
|
147 |
+
demo.launch(share=True, server_name="0.0.0.0")
|
config.yaml
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
image_encoder:
|
3 |
+
backbone:
|
4 |
+
pretrained: true
|
5 |
+
frozen: true
|
6 |
+
output_dim: 128
|
7 |
+
name: feature_extractor_DPT
|
8 |
+
segmentation_head:
|
9 |
+
dropout_rate: 0.2
|
10 |
+
name: map_perception_net
|
11 |
+
num_classes: 6
|
12 |
+
latent_dim: 128
|
13 |
+
z_max: 50
|
14 |
+
x_max: 25
|
15 |
+
pixel_per_meter: 2
|
16 |
+
num_scale_bins: 32
|
17 |
+
loss:
|
18 |
+
num_classes: 6
|
19 |
+
xent_weight: 1.0
|
20 |
+
dice_weight: 1.0
|
21 |
+
focal_loss: false
|
22 |
+
focal_loss_gamma: 2.0
|
23 |
+
requires_frustrum: true
|
24 |
+
requires_flood_mask: false
|
25 |
+
class_weights:
|
26 |
+
- 1.00351229
|
27 |
+
- 4.34782609
|
28 |
+
- 1.00110121
|
29 |
+
- 1.03124678
|
30 |
+
- 6.69792364
|
31 |
+
- 7.55857899
|
32 |
+
label_smoothing: 0.1
|
33 |
+
scale_range:
|
34 |
+
- 0
|
35 |
+
- 9
|
36 |
+
z_min: null
|
get_weights.sh
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# URL of the file to download
|
4 |
+
ood_weights="https://huggingface.co/mapitanywhere/mapper/resolve/main/weights/mapper-excl-ood/model.ckpt"
|
5 |
+
|
6 |
+
mkdir -p trained_weights
|
7 |
+
|
8 |
+
# Download the file using curl
|
9 |
+
wget $ood_weights -O trained_weights/mapper-excl-ood.ckpt
|
mapper/__init__.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2 |
+
import os, sys
|
3 |
+
|
4 |
+
sys.path.append(os.path.dirname(os.path.realpath(__file__)))
|
5 |
+
from pathlib import Path
|
6 |
+
import logging
|
7 |
+
|
8 |
+
import pytorch_lightning # noqa: F401
|
9 |
+
|
10 |
+
|
11 |
+
formatter = logging.Formatter(
|
12 |
+
fmt="[%(asctime)s %(name)s %(levelname)s] %(message)s",
|
13 |
+
datefmt="%Y-%m-%d %H:%M:%S",
|
14 |
+
)
|
15 |
+
handler = logging.StreamHandler()
|
16 |
+
handler.setFormatter(formatter)
|
17 |
+
handler.setLevel(logging.INFO)
|
18 |
+
|
19 |
+
logger = logging.getLogger("mapper")
|
20 |
+
logger.setLevel(logging.INFO)
|
21 |
+
logger.addHandler(handler)
|
22 |
+
logger.propagate = False
|
23 |
+
|
24 |
+
pl_logger = logging.getLogger("pytorch_lightning")
|
25 |
+
if len(pl_logger.handlers):
|
26 |
+
pl_logger.handlers[0].setFormatter(formatter)
|
27 |
+
|
28 |
+
repo_dir = Path(__file__).parent.parent
|
29 |
+
EXPERIMENTS_PATH = repo_dir / "experiments/"
|
30 |
+
DATASETS_PATH = repo_dir / "datasets/"
|
mapper/callbacks.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import pytorch_lightning as pl
|
3 |
+
from pathlib import Path
|
4 |
+
from typing import Any
|
5 |
+
import torchvision
|
6 |
+
import wandb
|
7 |
+
|
8 |
+
|
9 |
+
class EvalSaveCallback(pl.Callback):
|
10 |
+
|
11 |
+
def __init__(self, save_dir: Path) -> None:
|
12 |
+
super().__init__()
|
13 |
+
self.save_dir = save_dir
|
14 |
+
|
15 |
+
def save(self, outputs, batch, batch_idx):
|
16 |
+
name = batch['name']
|
17 |
+
|
18 |
+
filename = self.save_dir / f"{batch_idx:06d}_{name[0]}.pt"
|
19 |
+
torch.save({
|
20 |
+
"fpv": batch['image'],
|
21 |
+
"seg_masks": batch['seg_masks'],
|
22 |
+
'name': name,
|
23 |
+
"output": outputs["output"],
|
24 |
+
"valid_bev": outputs["valid_bev"],
|
25 |
+
}, filename)
|
26 |
+
|
27 |
+
def on_test_batch_end(self, trainer: pl.Trainer,
|
28 |
+
pl_module: pl.LightningModule,
|
29 |
+
outputs: torch.Tensor | Any | None,
|
30 |
+
batch: Any,
|
31 |
+
batch_idx: int,
|
32 |
+
dataloader_idx: int = 0) -> None:
|
33 |
+
if not outputs:
|
34 |
+
return
|
35 |
+
|
36 |
+
self.save(outputs, batch, batch_idx)
|
37 |
+
|
38 |
+
def on_validation_batch_end(self, trainer: pl.Trainer,
|
39 |
+
pl_module: pl.LightningModule,
|
40 |
+
outputs: torch.Tensor | Any | None,
|
41 |
+
batch: Any,
|
42 |
+
batch_idx: int,
|
43 |
+
dataloader_idx: int = 0) -> None:
|
44 |
+
if not outputs:
|
45 |
+
|
46 |
+
return
|
47 |
+
|
48 |
+
self.save(outputs, batch, batch_idx)
|
49 |
+
|
50 |
+
|
51 |
+
class ImageLoggerCallback(pl.Callback):
|
52 |
+
def __init__(self, num_classes):
|
53 |
+
super().__init__()
|
54 |
+
self.num_classes = num_classes
|
55 |
+
|
56 |
+
def log_image(self, trainer, pl_module, outputs, batch, batch_idx, mode="train"):
|
57 |
+
fpv_rgb = batch["image"]
|
58 |
+
fpv_grid = torchvision.utils.make_grid(
|
59 |
+
fpv_rgb, nrow=8, normalize=False)
|
60 |
+
images = [
|
61 |
+
wandb.Image(fpv_grid, caption="fpv")
|
62 |
+
]
|
63 |
+
|
64 |
+
pred = outputs['output'].permute(0, 2, 3, 1)
|
65 |
+
pred[outputs["valid_bev"][..., :-1] == 0] = 0
|
66 |
+
pred = (pred > 0.5).float()
|
67 |
+
pred = pred.permute(0, 3, 1, 2)
|
68 |
+
|
69 |
+
for i in range(self.num_classes):
|
70 |
+
gt_class_i = batch['seg_masks'][..., i]
|
71 |
+
gt_class_i_grid = torchvision.utils.make_grid(
|
72 |
+
gt_class_i.unsqueeze(1), nrow=8, normalize=False, pad_value=0)
|
73 |
+
pred_class_i = pred[:, i]
|
74 |
+
pred_class_i_grid = torchvision.utils.make_grid(
|
75 |
+
pred_class_i.unsqueeze(1), nrow=8, normalize=False, pad_value=0)
|
76 |
+
|
77 |
+
images += [
|
78 |
+
wandb.Image(gt_class_i_grid, caption=f"gt_class_{i}"),
|
79 |
+
wandb.Image(pred_class_i_grid, caption=f"pred_class_{i}")
|
80 |
+
]
|
81 |
+
|
82 |
+
trainer.logger.experiment.log(
|
83 |
+
{
|
84 |
+
"{}/images".format(mode): images
|
85 |
+
}
|
86 |
+
)
|
87 |
+
|
88 |
+
def on_validation_batch_end(self, trainer, pl_module: pl.LightningModule, outputs, batch, batch_idx):
|
89 |
+
if batch_idx == 0:
|
90 |
+
with torch.no_grad():
|
91 |
+
outputs = pl_module(batch)
|
92 |
+
self.log_image(trainer, pl_module, outputs,
|
93 |
+
batch, batch_idx, mode="val")
|
94 |
+
|
95 |
+
def on_train_batch_end(self, trainer, pl_module: pl.LightningModule, outputs, batch, batch_idx):
|
96 |
+
if batch_idx == 0:
|
97 |
+
pl_module.eval()
|
98 |
+
|
99 |
+
with torch.no_grad():
|
100 |
+
outputs = pl_module(batch)
|
101 |
+
|
102 |
+
self.log_image(trainer, pl_module, outputs,
|
103 |
+
batch, batch_idx, mode="train")
|
104 |
+
|
105 |
+
pl_module.train()
|
mapper/conf/data/kitti.yaml
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: kitti
|
2 |
+
seam_root_dir: /path/to/generated/seam
|
3 |
+
dataset_root_dir: /path/to/kitti/dataset
|
4 |
+
bev_percentage: 100
|
5 |
+
pixel_per_meter: 2
|
6 |
+
crop_size_meters: 50
|
7 |
+
target_focal_length: 256
|
8 |
+
resize_image: null
|
9 |
+
pad_to_multiple: 14
|
10 |
+
num_classes: 8
|
11 |
+
loading:
|
12 |
+
train:
|
13 |
+
batch_size: 32
|
14 |
+
num_workers: 32
|
15 |
+
val:
|
16 |
+
batch_size: 32
|
17 |
+
num_workers: 32
|
18 |
+
test:
|
19 |
+
batch_size: 32
|
20 |
+
num_workers: 32
|
21 |
+
pad_to_square: true
|
22 |
+
rectify_pitch: true
|
23 |
+
gravity_align: false
|
24 |
+
class_mapping: [0, 0, 1, 2, 0, 3]
|
25 |
+
augmentations:
|
26 |
+
enabled: True
|
27 |
+
brightness: 0.5
|
28 |
+
contrast: 0.5
|
29 |
+
saturation: 0.5
|
30 |
+
random_flip: 0.5
|
31 |
+
hue: 0.5
|
32 |
+
random_resized_crop: False
|
33 |
+
gaussian_noise:
|
34 |
+
enabled: False
|
35 |
+
mean: 0.0
|
36 |
+
std: 0.1
|
37 |
+
brightness_contrast:
|
38 |
+
enabled: True
|
39 |
+
brightness_factor: 0.2
|
40 |
+
contrast_factor: 0.2
|
mapper/conf/data/mia.yaml
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: mapillary
|
2 |
+
scenes:
|
3 |
+
- chicago
|
4 |
+
- new_york
|
5 |
+
- los_angeles
|
6 |
+
- san_francisco
|
7 |
+
split: /path/to/split/file
|
8 |
+
data_dir: /path/to/mia/dataset
|
9 |
+
loading:
|
10 |
+
train:
|
11 |
+
batch_size: 128
|
12 |
+
num_workers: 30
|
13 |
+
val:
|
14 |
+
batch_size: 128
|
15 |
+
num_workers: 30
|
16 |
+
test:
|
17 |
+
batch_size: 1
|
18 |
+
num_workers: 0
|
19 |
+
testsmall:
|
20 |
+
batch_size: 1
|
21 |
+
num_workers: 0
|
22 |
+
num_classes: 6
|
23 |
+
pixel_per_meter: 2
|
24 |
+
crop_size_meters: 64
|
25 |
+
resize_image: 512
|
26 |
+
pad_to_square: true
|
27 |
+
rectify_pitch: true
|
28 |
+
gravity_align: true
|
29 |
+
augmentations:
|
30 |
+
enabled: True
|
31 |
+
brightness: 0.5
|
32 |
+
contrast: 0.5
|
33 |
+
saturation: 0.5
|
34 |
+
random_flip: 0.5
|
35 |
+
hue: 0.5
|
36 |
+
random_resized_crop: False
|
37 |
+
gaussian_noise:
|
38 |
+
enabled: False
|
39 |
+
mean: 0.0
|
40 |
+
std: 0.1
|
41 |
+
brightness_contrast:
|
42 |
+
enabled: True
|
43 |
+
brightness_factor: 0.2
|
44 |
+
contrast_factor: 0.2
|
mapper/conf/data/nuscenes.yaml
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: nuscenes
|
2 |
+
data_dir: /path/to/nuscenes/data
|
3 |
+
map_dir: /path/to/generated/maps
|
4 |
+
version: v1.0-trainval
|
5 |
+
pixel_per_meter: 2
|
6 |
+
crop_size_meters: 50
|
7 |
+
resize_image: 512
|
8 |
+
percentage: 1.0
|
9 |
+
class_mapping: [0, 1, 2, 0, 0, 3]
|
10 |
+
num_classes: 14
|
11 |
+
loading:
|
12 |
+
train:
|
13 |
+
batch_size: 128
|
14 |
+
num_workers: 10
|
15 |
+
val:
|
16 |
+
batch_size: 128
|
17 |
+
num_workers: 10
|
18 |
+
test:
|
19 |
+
batch_size: 128
|
20 |
+
num_workers: 10
|
21 |
+
pad_to_square: true
|
22 |
+
rectify_pitch: true
|
23 |
+
gravity_align: true
|
24 |
+
augmentations:
|
25 |
+
enabled: True
|
26 |
+
brightness: 0.5
|
27 |
+
contrast: 0.5
|
28 |
+
saturation: 0.5
|
29 |
+
hue: 0.5
|
30 |
+
random_resized_crop: False
|
31 |
+
gaussian_noise:
|
32 |
+
enabled: False
|
33 |
+
mean: 0.0
|
34 |
+
std: 0.1
|
35 |
+
brightness_contrast:
|
36 |
+
enabled: True
|
37 |
+
brightness_factor: 0.2
|
38 |
+
contrast_factor: 0.2
|
mapper/conf/mapper_kitti.yaml
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defaults:
|
2 |
+
- schema/data: kitti
|
3 |
+
- data: kitti
|
4 |
+
- model: mapper
|
5 |
+
- training
|
6 |
+
- _self_
|
7 |
+
|
8 |
+
experiment:
|
9 |
+
name: MIA_DINOv2_Mapper_KITTI
|
10 |
+
|
11 |
+
model:
|
12 |
+
loss:
|
13 |
+
xent_weight: 1.0
|
14 |
+
dice_weight: 1.0
|
15 |
+
focal_loss: false
|
16 |
+
focal_loss_gamma: 2.0
|
17 |
+
requires_frustrum: true
|
18 |
+
requires_flood_mask: true
|
19 |
+
class_weights: null
|
20 |
+
label_smoothing: 0.1
|
21 |
+
|
22 |
+
training:
|
23 |
+
checkpoint: /path/to/checkpoint
|
mapper/conf/mapper_nuscenes.yaml
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defaults:
|
2 |
+
- schema/data: nuscenes
|
3 |
+
- data: nuscenes
|
4 |
+
- model: mapper
|
5 |
+
- training
|
6 |
+
- _self_
|
7 |
+
|
8 |
+
experiment:
|
9 |
+
name: MIA_DINOv2_Mapper_NuScenes
|
10 |
+
|
11 |
+
model:
|
12 |
+
loss:
|
13 |
+
xent_weight: 1.0
|
14 |
+
dice_weight: 1.0
|
15 |
+
focal_loss: false
|
16 |
+
focal_loss_gamma: 2.0
|
17 |
+
class_weights: [1.00060036, 1.85908161, 1.0249052, 0., 0., 2.57267816]
|
18 |
+
requires_frustrum: true
|
19 |
+
label_smoothing: 0.1
|
20 |
+
|
21 |
+
training:
|
22 |
+
checkpoint: /path/to/checkpoint
|
23 |
+
finetune: true
|
24 |
+
lr: 0.0001
|
25 |
+
trainer:
|
26 |
+
max_epochs: 50
|
mapper/conf/model/image_encoder/dino.yaml
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: feature_extractor_DPT
|
2 |
+
backbone:
|
3 |
+
pretrained: true
|
4 |
+
frozen: true
|
5 |
+
output_dim: ${model.latent_dim} # Match Latent Dimension
|
mapper/conf/model/image_encoder/resnet.yaml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: feature_extractor_resnet
|
2 |
+
backbone:
|
3 |
+
pretrained: true
|
4 |
+
frozen: true
|
5 |
+
output_dim: ${model.latent_dim} # Match Latent Dimension
|
6 |
+
input_dim: 3
|
7 |
+
encoder: resnet50
|
8 |
+
num_downsample: null
|
9 |
+
remove_stride_from_first_conv: false
|
10 |
+
decoder_norm: "nn.BatchNorm2d"
|
11 |
+
do_average_pooling: false
|
12 |
+
checkpointed: false
|
mapper/conf/model/mapper.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defaults:
|
2 |
+
- schema/backbone: dino
|
3 |
+
- image_encoder: dino
|
4 |
+
|
5 |
+
segmentation_head:
|
6 |
+
dropout_rate: 0.2
|
7 |
+
name: map_perception_net
|
8 |
+
num_classes: 6
|
9 |
+
latent_dim: 128
|
10 |
+
z_max: 50
|
11 |
+
x_max: 25
|
12 |
+
pixel_per_meter: ${data.pixel_per_meter}
|
13 |
+
num_scale_bins: 32
|
14 |
+
loss:
|
15 |
+
num_classes: ${..num_classes}
|
mapper/conf/pretrain.yaml
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defaults:
|
2 |
+
- schema/data: mia
|
3 |
+
- data: mia
|
4 |
+
- model: mapper
|
5 |
+
- training
|
6 |
+
- _self_
|
7 |
+
|
8 |
+
experiment:
|
9 |
+
name: MIA_DINOv2_Pretrain
|
10 |
+
|
11 |
+
model:
|
12 |
+
loss:
|
13 |
+
xent_weight: 1.0
|
14 |
+
dice_weight: 1.0
|
15 |
+
focal_loss: false
|
16 |
+
focal_loss_gamma: 2.0
|
17 |
+
requires_frustrum: true
|
18 |
+
class_weights: [ 1.00351229, 4.34782609, 1.00110121, 1.03124678,
|
19 |
+
6.69792364, 7.55857899 ]
|
20 |
+
label_smoothing: 0.1
|
21 |
+
|
22 |
+
training:
|
23 |
+
trainer:
|
24 |
+
max_epochs: 15
|
mapper/conf/pretrain_resnet.yaml
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defaults:
|
2 |
+
- schema/data: mia
|
3 |
+
- data: mia
|
4 |
+
- model: mapper
|
5 |
+
- training
|
6 |
+
- _self_
|
7 |
+
- override model/schema/backbone: resnet
|
8 |
+
- override model/image_encoder: resnet
|
9 |
+
|
10 |
+
experiment:
|
11 |
+
name: MIA_DINOv2_Pretrain
|
12 |
+
|
13 |
+
model:
|
14 |
+
loss:
|
15 |
+
xent_weight: 1.0
|
16 |
+
dice_weight: 1.0
|
17 |
+
focal_loss: false
|
18 |
+
focal_loss_gamma: 2.0
|
19 |
+
requires_frustrum: true
|
20 |
+
class_weights: [ 1.00351229, 4.34782609, 1.00110121, 1.03124678,
|
21 |
+
6.69792364, 7.55857899 ]
|
22 |
+
|
23 |
+
training:
|
24 |
+
trainer:
|
25 |
+
max_steps: 10
|
26 |
+
max_epochs: 15
|
mapper/conf/training.yaml
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
experiment:
|
2 |
+
name: MGL_DINOv2_v4-baseline-less-class
|
3 |
+
seed: 42
|
4 |
+
training:
|
5 |
+
num_classes: ${model.num_classes}
|
6 |
+
lr: 0.001
|
7 |
+
lr_scheduler:
|
8 |
+
name: "CosineAnnealingLR"
|
9 |
+
args:
|
10 |
+
T_max: $total_epochs
|
11 |
+
eta_min: 0.0000001
|
12 |
+
checkpoint: null
|
13 |
+
finetune: false
|
14 |
+
eval: false
|
15 |
+
save_dir: eval_results
|
16 |
+
trainer:
|
17 |
+
# val_check_interval: 250
|
18 |
+
# log_every_n_steps: 100
|
19 |
+
# limit_val_batches: 0
|
20 |
+
# max_steps: 500000
|
21 |
+
# num_epochs: 15
|
22 |
+
precision: bf16-mixed
|
23 |
+
accelerator: gpu
|
24 |
+
strategy: ddp_find_unused_parameters_true
|
25 |
+
checkpointing:
|
26 |
+
dirpath: checkpoints/
|
27 |
+
monitor: val/total/loss
|
28 |
+
save_top_k: -1
|
29 |
+
mode: min
|
30 |
+
save_last: True
|
mapper/data/__init__.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .mapillary.data_module import MapillaryDataModule
|
2 |
+
from .nuscenes.data_module import NuScenesData
|
3 |
+
|
4 |
+
modules = {
|
5 |
+
"mapillary": MapillaryDataModule,
|
6 |
+
"nuscenes": NuScenesData
|
7 |
+
}
|
mapper/data/base.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import abstractmethod
|
2 |
+
from typing import Optional
|
3 |
+
|
4 |
+
|
5 |
+
class DataBase():
|
6 |
+
def __init__(self) -> None:
|
7 |
+
raise NotImplementedError
|
8 |
+
|
9 |
+
@abstractmethod
|
10 |
+
def prepare_data(self) -> None:
|
11 |
+
raise NotImplementedError
|
12 |
+
|
13 |
+
@abstractmethod
|
14 |
+
def setup(self, stage: Optional[str] = None):
|
15 |
+
raise NotImplementedError
|
16 |
+
|
17 |
+
@abstractmethod
|
18 |
+
def dataset(self, stage: str):
|
19 |
+
raise NotImplementedError
|
mapper/data/image.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2 |
+
|
3 |
+
from typing import Callable, Optional, Union, Sequence
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
import torchvision.transforms.functional as tvf
|
8 |
+
import collections
|
9 |
+
from scipy.spatial.transform import Rotation
|
10 |
+
|
11 |
+
from ..utils.geometry import from_homogeneous, to_homogeneous
|
12 |
+
from ..utils.wrappers import Camera
|
13 |
+
|
14 |
+
|
15 |
+
def rectify_image(
|
16 |
+
image: torch.Tensor,
|
17 |
+
cam: Camera,
|
18 |
+
roll: float,
|
19 |
+
pitch: Optional[float] = None,
|
20 |
+
valid: Optional[torch.Tensor] = None,
|
21 |
+
):
|
22 |
+
*_, h, w = image.shape
|
23 |
+
grid = torch.meshgrid(
|
24 |
+
[torch.arange(w, device=image.device), torch.arange(h, device=image.device)],
|
25 |
+
indexing="xy",
|
26 |
+
)
|
27 |
+
grid = torch.stack(grid, -1).to(image.dtype)
|
28 |
+
|
29 |
+
if pitch is not None:
|
30 |
+
args = ("ZX", (roll, pitch))
|
31 |
+
else:
|
32 |
+
args = ("Z", roll)
|
33 |
+
R = Rotation.from_euler(*args, degrees=True).as_matrix()
|
34 |
+
R = torch.from_numpy(R).to(image)
|
35 |
+
|
36 |
+
grid_rect = to_homogeneous(cam.normalize(grid)) @ R.T
|
37 |
+
grid_rect = cam.denormalize(from_homogeneous(grid_rect))
|
38 |
+
grid_norm = (grid_rect + 0.5) / grid.new_tensor([w, h]) * 2 - 1
|
39 |
+
rectified = torch.nn.functional.grid_sample(
|
40 |
+
image[None],
|
41 |
+
grid_norm[None],
|
42 |
+
align_corners=False,
|
43 |
+
mode="bilinear",
|
44 |
+
).squeeze(0)
|
45 |
+
if valid is None:
|
46 |
+
valid = torch.all((grid_norm >= -1) & (grid_norm <= 1), -1)
|
47 |
+
else:
|
48 |
+
valid = (
|
49 |
+
torch.nn.functional.grid_sample(
|
50 |
+
valid[None, None].float(),
|
51 |
+
grid_norm[None],
|
52 |
+
align_corners=False,
|
53 |
+
mode="nearest",
|
54 |
+
)[0, 0]
|
55 |
+
> 0
|
56 |
+
)
|
57 |
+
return rectified, valid
|
58 |
+
|
59 |
+
|
60 |
+
def resize_image(
|
61 |
+
image: torch.Tensor,
|
62 |
+
size: Union[int, Sequence, np.ndarray],
|
63 |
+
fn: Optional[Callable] = None,
|
64 |
+
camera: Optional[Camera] = None,
|
65 |
+
valid: np.ndarray = None,
|
66 |
+
):
|
67 |
+
"""Resize an image to a fixed size, or according to max or min edge."""
|
68 |
+
*_, h, w = image.shape
|
69 |
+
if fn is not None:
|
70 |
+
assert isinstance(size, int)
|
71 |
+
scale = size / fn(h, w)
|
72 |
+
h_new, w_new = int(round(h * scale)), int(round(w * scale))
|
73 |
+
scale = (scale, scale)
|
74 |
+
else:
|
75 |
+
if isinstance(size, (collections.abc.Sequence, np.ndarray)):
|
76 |
+
w_new, h_new = size
|
77 |
+
elif isinstance(size, int):
|
78 |
+
w_new = h_new = size
|
79 |
+
else:
|
80 |
+
raise ValueError(f"Incorrect new size: {size}")
|
81 |
+
scale = (w_new / w, h_new / h)
|
82 |
+
if (w, h) != (w_new, h_new):
|
83 |
+
mode = tvf.InterpolationMode.BILINEAR
|
84 |
+
image = tvf.resize(image, (int(h_new), int(w_new)), interpolation=mode, antialias=True)
|
85 |
+
image.clip_(0, 1)
|
86 |
+
if camera is not None:
|
87 |
+
camera = camera.scale(scale)
|
88 |
+
if valid is not None:
|
89 |
+
valid = tvf.resize(
|
90 |
+
valid.unsqueeze(0),
|
91 |
+
(int(h_new), int(w_new)),
|
92 |
+
interpolation=tvf.InterpolationMode.NEAREST,
|
93 |
+
).squeeze(0)
|
94 |
+
ret = [image, scale]
|
95 |
+
if camera is not None:
|
96 |
+
ret.append(camera)
|
97 |
+
if valid is not None:
|
98 |
+
ret.append(valid)
|
99 |
+
return ret
|
100 |
+
|
101 |
+
|
102 |
+
def pad_image(
|
103 |
+
image: torch.Tensor,
|
104 |
+
size: Union[int, Sequence, np.ndarray],
|
105 |
+
camera: Optional[Camera] = None,
|
106 |
+
valid: torch.Tensor = None,
|
107 |
+
crop_and_center: bool = False,
|
108 |
+
):
|
109 |
+
if isinstance(size, int):
|
110 |
+
w_new = h_new = size
|
111 |
+
elif isinstance(size, (collections.abc.Sequence, np.ndarray)):
|
112 |
+
w_new, h_new = size
|
113 |
+
else:
|
114 |
+
raise ValueError(f"Incorrect new size: {size}")
|
115 |
+
*c, h, w = image.shape
|
116 |
+
if crop_and_center:
|
117 |
+
diff = np.array([w - w_new, h - h_new])
|
118 |
+
left, top = left_top = np.round(diff / 2).astype(int)
|
119 |
+
right, bottom = diff - left_top
|
120 |
+
else:
|
121 |
+
assert h <= h_new
|
122 |
+
assert w <= w_new
|
123 |
+
top = bottom = left = right = 0
|
124 |
+
slice_out = np.s_[..., : min(h, h_new), : min(w, w_new)]
|
125 |
+
slice_in = np.s_[
|
126 |
+
..., max(top, 0) : h - max(bottom, 0), max(left, 0) : w - max(right, 0)
|
127 |
+
]
|
128 |
+
if (w, h) == (w_new, h_new):
|
129 |
+
out = image
|
130 |
+
else:
|
131 |
+
out = torch.zeros((*c, h_new, w_new), dtype=image.dtype)
|
132 |
+
out[slice_out] = image[slice_in]
|
133 |
+
if camera is not None:
|
134 |
+
camera = camera.crop((max(left, 0), max(top, 0)), (w_new, h_new))
|
135 |
+
out_valid = torch.zeros((h_new, w_new), dtype=torch.bool)
|
136 |
+
out_valid[slice_out] = True if valid is None else valid[slice_in]
|
137 |
+
if camera is not None:
|
138 |
+
return out, out_valid, camera
|
139 |
+
else:
|
140 |
+
return out, out_valid
|
mapper/data/kitti/data_module.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ..base import DataBase
|
2 |
+
from .dataset import BEVKitti360Dataset
|
3 |
+
from ..schema import KITTIDataConfiguration
|
4 |
+
|
5 |
+
class BEVKitti360Data(DataBase):
|
6 |
+
def __init__(self, cfg: KITTIDataConfiguration) -> None:
|
7 |
+
self.cfg = cfg
|
8 |
+
self._dataset = {}
|
9 |
+
|
10 |
+
def prepare_data(self) -> None:
|
11 |
+
return
|
12 |
+
|
13 |
+
def setup(self, stage: str) -> None:
|
14 |
+
split = {
|
15 |
+
'fit': 'train',
|
16 |
+
'val': 'val',
|
17 |
+
'validate': 'val',
|
18 |
+
'test': 'val',
|
19 |
+
"train": "train"
|
20 |
+
}[stage]
|
21 |
+
|
22 |
+
self._dataset[stage] = BEVKitti360Dataset(
|
23 |
+
cfg=self.cfg,
|
24 |
+
split_name=split
|
25 |
+
)
|
26 |
+
|
27 |
+
def dataset(self, stage: str):
|
28 |
+
if self._dataset.get(stage) is None:
|
29 |
+
self.setup(stage)
|
30 |
+
|
31 |
+
return self._dataset[stage]
|
32 |
+
|
mapper/data/kitti/dataset.py
ADDED
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import numpy as np
|
3 |
+
import torch.utils.data as data
|
4 |
+
import umsgpack
|
5 |
+
from PIL import Image
|
6 |
+
import json
|
7 |
+
import torchvision.transforms as tvf
|
8 |
+
|
9 |
+
from .transform import BEVTransform
|
10 |
+
from ..schema import KITTIDataConfiguration
|
11 |
+
|
12 |
+
class BEVKitti360Dataset(data.Dataset):
|
13 |
+
_IMG_DIR = "img"
|
14 |
+
_BEV_MSK_DIR = "bev_msk"
|
15 |
+
_BEV_PLABEL_DIR = "bev_plabel_dynamic"
|
16 |
+
_FV_MSK_DIR = "front_msk_seam"
|
17 |
+
_BEV_DIR = "bev_ortho"
|
18 |
+
_LST_DIR = "split"
|
19 |
+
_PERCENTAGES_DIR = "percentages"
|
20 |
+
_BEV_METADATA_FILE = "metadata_ortho.bin"
|
21 |
+
_FV_METADATA_FILE = "metadata_front.bin"
|
22 |
+
|
23 |
+
def __init__(self, cfg: KITTIDataConfiguration, split_name="train"):
|
24 |
+
super(BEVKitti360Dataset, self).__init__()
|
25 |
+
self.cfg = cfg
|
26 |
+
self.seam_root_dir = cfg.seam_root_dir # Directory of seamless data
|
27 |
+
self.kitti_root_dir = cfg.dataset_root_dir # Directory of the KITTI360 data
|
28 |
+
self.split_name = split_name
|
29 |
+
|
30 |
+
self.rgb_cameras = ['front']
|
31 |
+
if cfg.bev_percentage < 1:
|
32 |
+
self.bev_percentage = cfg.bev_percentage
|
33 |
+
else:
|
34 |
+
self.bev_percentage = int(cfg.bev_percentage)
|
35 |
+
|
36 |
+
# Folders
|
37 |
+
self._img_dir = os.path.join(self.seam_root_dir, BEVKitti360Dataset._IMG_DIR)
|
38 |
+
self._bev_msk_dir = os.path.join(self.seam_root_dir, BEVKitti360Dataset._BEV_MSK_DIR, BEVKitti360Dataset._BEV_DIR)
|
39 |
+
self._bev_plabel_dir = os.path.join(self.seam_root_dir, BEVKitti360Dataset._BEV_PLABEL_DIR, BEVKitti360Dataset._BEV_DIR)
|
40 |
+
self._fv_msk_dir = os.path.join(self.seam_root_dir, BEVKitti360Dataset._FV_MSK_DIR, "front")
|
41 |
+
self._lst_dir = os.path.join(self.seam_root_dir, BEVKitti360Dataset._LST_DIR)
|
42 |
+
self._percentages_dir = os.path.join(self.seam_root_dir, BEVKitti360Dataset._LST_DIR, BEVKitti360Dataset._PERCENTAGES_DIR)
|
43 |
+
|
44 |
+
# Load meta-data and split
|
45 |
+
self._bev_meta, self._bev_images, self._bev_images_all, self._fv_meta, self._fv_images, self._fv_images_all,\
|
46 |
+
self._img_map, self.bev_percent_split = self._load_split()
|
47 |
+
|
48 |
+
self.tfs = self.get_augmentations() if split_name == "train" else tvf.Compose([])
|
49 |
+
self.transform = BEVTransform(cfg, self.tfs)
|
50 |
+
|
51 |
+
def get_augmentations(self):
|
52 |
+
|
53 |
+
print(f"Augmentation!", "\n" * 10)
|
54 |
+
augmentations = [
|
55 |
+
tvf.ColorJitter(
|
56 |
+
brightness=self.cfg.augmentations.brightness,
|
57 |
+
contrast=self.cfg.augmentations.contrast,
|
58 |
+
saturation=self.cfg.augmentations.saturation,
|
59 |
+
hue=self.cfg.augmentations.hue,
|
60 |
+
)
|
61 |
+
]
|
62 |
+
|
63 |
+
if self.cfg.augmentations.random_resized_crop:
|
64 |
+
augmentations.append(
|
65 |
+
tvf.RandomResizedCrop(scale=(0.8, 1.0))
|
66 |
+
) # RandomResizedCrop
|
67 |
+
|
68 |
+
if self.cfg.augmentations.gaussian_noise.enabled:
|
69 |
+
augmentations.append(
|
70 |
+
tvf.GaussianNoise(
|
71 |
+
mean=self.cfg.augmentations.gaussian_noise.mean,
|
72 |
+
std=self.cfg.augmentations.gaussian_noise.std,
|
73 |
+
)
|
74 |
+
) # Gaussian noise
|
75 |
+
|
76 |
+
if self.cfg.augmentations.brightness_contrast.enabled:
|
77 |
+
augmentations.append(
|
78 |
+
tvf.ColorJitter(
|
79 |
+
brightness=self.cfg.augmentations.brightness_contrast.brightness_factor,
|
80 |
+
contrast=self.cfg.augmentations.brightness_contrast.contrast_factor,
|
81 |
+
saturation=0, # Keep saturation at 0 for brightness and contrast adjustment
|
82 |
+
hue=0,
|
83 |
+
)
|
84 |
+
) # Brightness and contrast adjustment
|
85 |
+
|
86 |
+
return tvf.Compose(augmentations)
|
87 |
+
|
88 |
+
# Load the train or the validation split
|
89 |
+
def _load_split(self):
|
90 |
+
with open(os.path.join(self.seam_root_dir, BEVKitti360Dataset._BEV_METADATA_FILE), "rb") as fid:
|
91 |
+
bev_metadata = umsgpack.unpack(fid, encoding="utf-8")
|
92 |
+
|
93 |
+
with open(os.path.join(self.seam_root_dir, BEVKitti360Dataset._FV_METADATA_FILE), 'rb') as fid:
|
94 |
+
fv_metadata = umsgpack.unpack(fid, encoding="utf-8")
|
95 |
+
|
96 |
+
# Read the files for this split
|
97 |
+
with open(os.path.join(self._lst_dir, self.split_name + ".txt"), "r") as fid:
|
98 |
+
lst = fid.readlines()
|
99 |
+
lst = [line.strip() for line in lst]
|
100 |
+
|
101 |
+
if self.split_name == "train":
|
102 |
+
# Get all the frames in the train dataset. This will be used for generating samples for temporal consistency.
|
103 |
+
with open(os.path.join(self._lst_dir, "{}_all.txt".format(self.split_name)), 'r') as fid:
|
104 |
+
lst_all = fid.readlines()
|
105 |
+
lst_all = [line.strip() for line in lst_all]
|
106 |
+
|
107 |
+
# Get all the samples for which the BEV plabels have to be loaded.
|
108 |
+
percentage_file = os.path.join(self._percentages_dir, "{}_{}.txt".format(self.split_name, self.bev_percentage))
|
109 |
+
print("Loading {}% file".format(self.bev_percentage))
|
110 |
+
with open(percentage_file, 'r') as fid:
|
111 |
+
lst_percent = fid.readlines()
|
112 |
+
lst_percent = [line.strip() for line in lst_percent]
|
113 |
+
else:
|
114 |
+
lst_all = lst
|
115 |
+
lst_percent = lst
|
116 |
+
|
117 |
+
# Remove elements from lst if they are not in _FRONT_MSK_DIR
|
118 |
+
fv_msk_frames = os.listdir(self._fv_msk_dir)
|
119 |
+
fv_msk_frames = [frame.split(".")[0] for frame in fv_msk_frames]
|
120 |
+
fv_msk_frames_exist_map = {entry: True for entry in fv_msk_frames} # This is to speed-up the dataloader
|
121 |
+
lst = [entry for entry in lst if entry in fv_msk_frames_exist_map]
|
122 |
+
lst_all = [entry for entry in lst_all if entry in fv_msk_frames_exist_map]
|
123 |
+
|
124 |
+
# Filter based on the samples plabels
|
125 |
+
if self.bev_percentage < 100:
|
126 |
+
lst_filt = [entry for entry in lst if entry in lst_percent]
|
127 |
+
lst = lst_filt
|
128 |
+
|
129 |
+
# Remove any potential duplicates
|
130 |
+
lst = set(lst)
|
131 |
+
lst_percent = set(lst_percent)
|
132 |
+
|
133 |
+
img_map = {}
|
134 |
+
for camera in self.rgb_cameras:
|
135 |
+
with open(os.path.join(self._img_dir, "{}.json".format(camera))) as fp:
|
136 |
+
map_list = json.load(fp)
|
137 |
+
map_dict = {k: v for d in map_list for k, v in d.items()}
|
138 |
+
img_map[camera] = map_dict
|
139 |
+
|
140 |
+
bev_meta = bev_metadata["meta"]
|
141 |
+
bev_images = [img_desc for img_desc in bev_metadata["images"] if img_desc["id"] in lst]
|
142 |
+
fv_meta = fv_metadata["meta"]
|
143 |
+
fv_images = [img_desc for img_desc in fv_metadata['images'] if img_desc['id'] in lst]
|
144 |
+
|
145 |
+
# Check for inconsistency due to inconsistencies in the input files or dataset
|
146 |
+
bev_images_ids = [bev_img["id"] for bev_img in bev_images]
|
147 |
+
fv_images_ids = [fv_img["id"] for fv_img in fv_images]
|
148 |
+
assert set(bev_images_ids) == set(fv_images_ids) and len(bev_images_ids) == len(fv_images_ids), 'Inconsistency between fv_images and bev_images detected'
|
149 |
+
|
150 |
+
if lst_all is not None:
|
151 |
+
bev_images_all = [img_desc for img_desc in bev_metadata['images'] if img_desc['id'] in lst_all]
|
152 |
+
fv_images_all = [img_desc for img_desc in fv_metadata['images'] if img_desc['id'] in lst_all]
|
153 |
+
else:
|
154 |
+
bev_images_all, fv_images_all = None, None
|
155 |
+
|
156 |
+
return bev_meta, bev_images, bev_images_all, fv_meta, fv_images, fv_images_all, img_map, lst_percent
|
157 |
+
|
158 |
+
def _find_index(self, list, key, value):
|
159 |
+
for i, dic in enumerate(list):
|
160 |
+
if dic[key] == value:
|
161 |
+
return i
|
162 |
+
return None
|
163 |
+
|
164 |
+
def _load_item(self, item_idx):
|
165 |
+
# Find the index of the element in the list containing all elements
|
166 |
+
all_idx = self._find_index(self._fv_images_all, "id", self._fv_images[item_idx]['id'])
|
167 |
+
if all_idx is None:
|
168 |
+
raise IOError("Required index not found!")
|
169 |
+
|
170 |
+
bev_img_desc = self._bev_images[item_idx]
|
171 |
+
fv_img_desc = self._fv_images[item_idx]
|
172 |
+
|
173 |
+
scene, frame_id = self._bev_images[item_idx]["id"].split(";")
|
174 |
+
|
175 |
+
# Get the RGB file names
|
176 |
+
img_file = os.path.join(
|
177 |
+
self.kitti_root_dir,
|
178 |
+
self._img_map["front"]["{}.png"
|
179 |
+
.format(bev_img_desc['id'])]
|
180 |
+
)
|
181 |
+
|
182 |
+
if not os.path.exists(img_file):
|
183 |
+
raise IOError(
|
184 |
+
"RGB image not found! Scene: {}, Frame: {}".format(scene, frame_id)
|
185 |
+
)
|
186 |
+
|
187 |
+
# Load the images
|
188 |
+
img = Image.open(img_file).convert(mode="RGB")
|
189 |
+
|
190 |
+
# Load the BEV mask
|
191 |
+
bev_msk_file = os.path.join(
|
192 |
+
self._bev_msk_dir,
|
193 |
+
"{}.png".format(bev_img_desc['id'])
|
194 |
+
)
|
195 |
+
bev_msk = Image.open(bev_msk_file)
|
196 |
+
bev_plabel = None
|
197 |
+
|
198 |
+
# Load the front mask
|
199 |
+
fv_msk_file = os.path.join(
|
200 |
+
self._fv_msk_dir,
|
201 |
+
"{}.png".format(fv_img_desc['id'])
|
202 |
+
)
|
203 |
+
fv_msk = Image.open(fv_msk_file)
|
204 |
+
|
205 |
+
|
206 |
+
bev_weights_msk_combined = None
|
207 |
+
|
208 |
+
# Get the other information
|
209 |
+
bev_cat = bev_img_desc["cat"]
|
210 |
+
bev_iscrowd = bev_img_desc["iscrowd"]
|
211 |
+
fv_cat = fv_img_desc['cat']
|
212 |
+
fv_iscrowd = fv_img_desc['iscrowd']
|
213 |
+
fv_intrinsics = fv_img_desc["cam_intrinsic"]
|
214 |
+
ego_pose = fv_img_desc['ego_pose'] # This loads the cam0 pose
|
215 |
+
|
216 |
+
# Get the ids of all the frames
|
217 |
+
frame_ids = bev_img_desc["id"]
|
218 |
+
|
219 |
+
return img, bev_msk, bev_plabel, fv_msk, bev_weights_msk_combined, bev_cat, \
|
220 |
+
bev_iscrowd, fv_cat, fv_iscrowd, fv_intrinsics, ego_pose, frame_ids
|
221 |
+
|
222 |
+
@property
|
223 |
+
def fv_categories(self):
|
224 |
+
"""Category names"""
|
225 |
+
return self._fv_meta["categories"]
|
226 |
+
|
227 |
+
@property
|
228 |
+
def fv_num_categories(self):
|
229 |
+
"""Number of categories"""
|
230 |
+
return len(self.fv_categories)
|
231 |
+
|
232 |
+
@property
|
233 |
+
def fv_num_stuff(self):
|
234 |
+
"""Number of "stuff" categories"""
|
235 |
+
return self._fv_meta["num_stuff"]
|
236 |
+
|
237 |
+
@property
|
238 |
+
def fv_num_thing(self):
|
239 |
+
"""Number of "thing" categories"""
|
240 |
+
return self.fv_num_categories - self.fv_num_stuff
|
241 |
+
|
242 |
+
@property
|
243 |
+
def bev_categories(self):
|
244 |
+
"""Category names"""
|
245 |
+
return self._bev_meta["categories"]
|
246 |
+
|
247 |
+
@property
|
248 |
+
def bev_num_categories(self):
|
249 |
+
"""Number of categories"""
|
250 |
+
return len(self.bev_categories)
|
251 |
+
|
252 |
+
@property
|
253 |
+
def bev_num_stuff(self):
|
254 |
+
"""Number of "stuff" categories"""
|
255 |
+
return self._bev_meta["num_stuff"]
|
256 |
+
|
257 |
+
@property
|
258 |
+
def bev_num_thing(self):
|
259 |
+
"""Number of "thing" categories"""
|
260 |
+
return self.bev_num_categories - self.bev_num_stuff
|
261 |
+
|
262 |
+
@property
|
263 |
+
def original_ids(self):
|
264 |
+
"""Original class id of each category"""
|
265 |
+
return self._fv_meta["original_ids"]
|
266 |
+
|
267 |
+
@property
|
268 |
+
def palette(self):
|
269 |
+
"""Default palette to be used when color-coding semantic labels"""
|
270 |
+
return np.array(self._fv_meta["palette"], dtype=np.uint8)
|
271 |
+
|
272 |
+
@property
|
273 |
+
def img_sizes(self):
|
274 |
+
"""Size of each image of the dataset"""
|
275 |
+
return [img_desc["size"] for img_desc in self._fv_images]
|
276 |
+
|
277 |
+
@property
|
278 |
+
def img_categories(self):
|
279 |
+
"""Categories present in each image of the dataset"""
|
280 |
+
return [img_desc["cat"] for img_desc in self._fv_images]
|
281 |
+
|
282 |
+
@property
|
283 |
+
def dataset_name(self):
|
284 |
+
return "Kitti360"
|
285 |
+
|
286 |
+
def __len__(self):
|
287 |
+
if self.cfg.percentage < 1:
|
288 |
+
return int(len(self._fv_images) * self.cfg.percentage)
|
289 |
+
|
290 |
+
return len(self._fv_images)
|
291 |
+
|
292 |
+
def __getitem__(self, item):
|
293 |
+
img, bev_msk, bev_plabel, fv_msk, bev_weights_msk, bev_cat, bev_iscrowd, fv_cat, fv_iscrowd, fv_intrinsics, ego_pose, idx = self._load_item(item)
|
294 |
+
|
295 |
+
rec = self.transform(img=img, bev_msk=bev_msk, bev_plabel=bev_plabel, fv_msk=fv_msk, bev_weights_msk=bev_weights_msk, bev_cat=bev_cat,
|
296 |
+
bev_iscrowd=bev_iscrowd, fv_cat=fv_cat, fv_iscrowd=fv_iscrowd, fv_intrinsics=fv_intrinsics,
|
297 |
+
ego_pose=ego_pose)
|
298 |
+
size = (img.size[1], img.size[0])
|
299 |
+
|
300 |
+
# Close the file
|
301 |
+
img.close()
|
302 |
+
bev_msk.close()
|
303 |
+
fv_msk.close()
|
304 |
+
|
305 |
+
rec["index"] = idx
|
306 |
+
rec["size"] = size
|
307 |
+
rec['name'] = idx
|
308 |
+
|
309 |
+
return rec
|
310 |
+
|
311 |
+
def get_image_desc(self, idx):
|
312 |
+
"""Look up an image descriptor given the id"""
|
313 |
+
matching = [img_desc for img_desc in self._images if img_desc["id"] == idx]
|
314 |
+
if len(matching) == 1:
|
315 |
+
return matching[0]
|
316 |
+
else:
|
317 |
+
raise ValueError("No image found with id %s" % idx)
|
mapper/data/kitti/transform.py
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
from torchvision.transforms import functional as tfn
|
4 |
+
import torchvision.transforms.functional as tvf
|
5 |
+
|
6 |
+
from ..utils import decompose_rotmat
|
7 |
+
from ..image import pad_image, rectify_image, resize_image
|
8 |
+
from ...utils.wrappers import Camera
|
9 |
+
from ..schema import KITTIDataConfiguration
|
10 |
+
|
11 |
+
|
12 |
+
class BEVTransform:
|
13 |
+
def __init__(self,
|
14 |
+
cfg: KITTIDataConfiguration, augmentations):
|
15 |
+
self.cfg = cfg
|
16 |
+
self.augmentations = augmentations
|
17 |
+
|
18 |
+
@staticmethod
|
19 |
+
def _compact_labels(msk, cat, iscrowd):
|
20 |
+
ids = np.unique(msk)
|
21 |
+
if 0 not in ids:
|
22 |
+
ids = np.concatenate((np.array([0], dtype=np.int32), ids), axis=0)
|
23 |
+
|
24 |
+
ids_to_compact = np.zeros((ids.max() + 1,), dtype=np.int32)
|
25 |
+
ids_to_compact[ids] = np.arange(0, ids.size, dtype=np.int32)
|
26 |
+
|
27 |
+
msk = ids_to_compact[msk]
|
28 |
+
cat = cat[ids]
|
29 |
+
iscrowd = iscrowd[ids]
|
30 |
+
|
31 |
+
return msk, cat, iscrowd
|
32 |
+
|
33 |
+
def __call__(self, img, bev_msk=None, bev_plabel=None, fv_msk=None, bev_weights_msk=None,
|
34 |
+
bev_cat=None, bev_iscrowd=None, fv_cat=None, fv_iscrowd=None,
|
35 |
+
fv_intrinsics=None, ego_pose=None):
|
36 |
+
# Wrap in np.array
|
37 |
+
if bev_cat is not None:
|
38 |
+
bev_cat = np.array(bev_cat, dtype=np.int32)
|
39 |
+
if bev_iscrowd is not None:
|
40 |
+
bev_iscrowd = np.array(bev_iscrowd, dtype=np.uint8)
|
41 |
+
|
42 |
+
if ego_pose is not None:
|
43 |
+
ego_pose = np.array(ego_pose, dtype=np.float32)
|
44 |
+
|
45 |
+
roll, pitch, yaw = decompose_rotmat(ego_pose[:3, :3])
|
46 |
+
|
47 |
+
# Image transformations
|
48 |
+
img = tfn.to_tensor(img)
|
49 |
+
# img = [self._normalize_image(rgb) for rgb in img]
|
50 |
+
fx = fv_intrinsics[0][0]
|
51 |
+
fy = fv_intrinsics[1][1]
|
52 |
+
cx = fv_intrinsics[0][2]
|
53 |
+
cy = fv_intrinsics[1][2]
|
54 |
+
width = img.shape[2]
|
55 |
+
height = img.shape[1]
|
56 |
+
|
57 |
+
cam = Camera(torch.tensor(
|
58 |
+
[width, height, fx, fy, cx - 0.5, cy - 0.5])).float()
|
59 |
+
|
60 |
+
if not self.cfg.gravity_align:
|
61 |
+
# Turn off gravity alignment
|
62 |
+
roll = 0.0
|
63 |
+
pitch = 0.0
|
64 |
+
img, valid = rectify_image(img, cam, roll, pitch)
|
65 |
+
else:
|
66 |
+
img, valid = rectify_image(
|
67 |
+
img, cam, roll, pitch if self.cfg.rectify_pitch else None
|
68 |
+
)
|
69 |
+
roll = 0.0
|
70 |
+
if self.cfg.rectify_pitch:
|
71 |
+
pitch = 0.0
|
72 |
+
|
73 |
+
if self.cfg.target_focal_length is not None:
|
74 |
+
# Resize to a canonical focal length
|
75 |
+
factor = self.cfg.target_focal_length / cam.f.numpy()
|
76 |
+
size = (np.array(img.shape[-2:][::-1]) * factor).astype(int)
|
77 |
+
img, _, cam, valid = resize_image(img, size, camera=cam, valid=valid)
|
78 |
+
size_out = self.cfg.resize_image
|
79 |
+
if size_out is None:
|
80 |
+
# Round the edges up such that they are multiple of a factor
|
81 |
+
stride = self.cfg.pad_to_multiple
|
82 |
+
size_out = (np.ceil((size / stride)) * stride).astype(int)
|
83 |
+
# Crop or pad such that both edges are of the given size
|
84 |
+
img, valid, cam = pad_image(
|
85 |
+
img, size_out, cam, valid, crop_and_center=False
|
86 |
+
)
|
87 |
+
elif self.cfg.resize_image is not None:
|
88 |
+
img, _, cam, valid = resize_image(
|
89 |
+
img, self.cfg.resize_image, fn=max, camera=cam, valid=valid
|
90 |
+
)
|
91 |
+
if self.cfg.pad_to_square:
|
92 |
+
# Pad such that both edges are of the given size
|
93 |
+
img, valid, cam = pad_image(img, self.cfg.resize_image, cam, valid)
|
94 |
+
|
95 |
+
# Label transformations,
|
96 |
+
if bev_msk is not None:
|
97 |
+
bev_msk = np.expand_dims(
|
98 |
+
np.array(bev_msk, dtype=np.int32, copy=False),
|
99 |
+
axis=0
|
100 |
+
)
|
101 |
+
bev_msk, bev_cat, bev_iscrowd = self._compact_labels(
|
102 |
+
bev_msk, bev_cat, bev_iscrowd
|
103 |
+
)
|
104 |
+
|
105 |
+
bev_msk = torch.from_numpy(bev_msk)
|
106 |
+
bev_cat = torch.from_numpy(bev_cat)
|
107 |
+
|
108 |
+
rotated_mask = torch.rot90(bev_msk, dims=(1, 2))
|
109 |
+
cropped_mask = rotated_mask[:, :672, (rotated_mask.size(2) - 672) // 2:-(rotated_mask.size(2) - 672) // 2]
|
110 |
+
|
111 |
+
bev_msk = cropped_mask.squeeze(0)
|
112 |
+
seg_masks = bev_cat[bev_msk]
|
113 |
+
|
114 |
+
seg_masks_onehot = seg_masks.clone()
|
115 |
+
seg_masks_onehot[seg_masks_onehot == 255] = 0
|
116 |
+
seg_masks_onehot = torch.nn.functional.one_hot(
|
117 |
+
seg_masks_onehot.to(torch.int64),
|
118 |
+
num_classes=self.cfg.num_classes
|
119 |
+
)
|
120 |
+
seg_masks_onehot[seg_masks == 255] = 0
|
121 |
+
|
122 |
+
seg_masks_onehot = seg_masks_onehot.permute(2, 0, 1)
|
123 |
+
|
124 |
+
seg_masks_down = tvf.resize(seg_masks_onehot, (100, 100))
|
125 |
+
|
126 |
+
seg_masks_down = seg_masks_down.permute(1, 2, 0)
|
127 |
+
|
128 |
+
if self.cfg.class_mapping is not None:
|
129 |
+
seg_masks_down = seg_masks_down[:, :, self.cfg.class_mapping]
|
130 |
+
|
131 |
+
img = self.augmentations(img)
|
132 |
+
flood_masks = torch.all(seg_masks_down == 0, dim=2).float()
|
133 |
+
|
134 |
+
|
135 |
+
ret = {
|
136 |
+
"image": img,
|
137 |
+
"valid": valid,
|
138 |
+
"camera": cam,
|
139 |
+
"seg_masks": (seg_masks_down).float().contiguous(),
|
140 |
+
"flood_masks": flood_masks,
|
141 |
+
"roll_pitch_yaw": torch.tensor((roll, pitch, yaw)).float(),
|
142 |
+
"confidence_map": flood_masks,
|
143 |
+
}
|
144 |
+
|
145 |
+
for key, value in ret.items():
|
146 |
+
if isinstance(value, np.ndarray):
|
147 |
+
ret[key] = torch.from_numpy(value)
|
148 |
+
|
149 |
+
return ret
|
mapper/data/mapillary/data_module.py
ADDED
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from collections import defaultdict
|
3 |
+
import os
|
4 |
+
import shutil
|
5 |
+
import tarfile
|
6 |
+
from pathlib import Path
|
7 |
+
from typing import Optional
|
8 |
+
|
9 |
+
import numpy as np
|
10 |
+
import pytorch_lightning as pl
|
11 |
+
import torch
|
12 |
+
import torch.utils.data as torchdata
|
13 |
+
from omegaconf import DictConfig
|
14 |
+
|
15 |
+
from ... import logger
|
16 |
+
from .dataset import MapLocDataset
|
17 |
+
from ..sequential import chunk_sequence
|
18 |
+
from ..torch import collate, worker_init_fn
|
19 |
+
from ..schema import MIADataConfiguration
|
20 |
+
|
21 |
+
def pack_dump_dict(dump):
|
22 |
+
for per_seq in dump.values():
|
23 |
+
if "points" in per_seq:
|
24 |
+
for chunk in list(per_seq["points"]):
|
25 |
+
points = per_seq["points"].pop(chunk)
|
26 |
+
if points is not None:
|
27 |
+
per_seq["points"][chunk] = np.array(
|
28 |
+
per_seq["points"][chunk], np.float64
|
29 |
+
)
|
30 |
+
for view in per_seq["views"].values():
|
31 |
+
for k in ["R_c2w", "roll_pitch_yaw"]:
|
32 |
+
view[k] = np.array(view[k], np.float32)
|
33 |
+
for k in ["chunk_id"]:
|
34 |
+
if k in view:
|
35 |
+
view.pop(k)
|
36 |
+
if "observations" in view:
|
37 |
+
view["observations"] = np.array(view["observations"])
|
38 |
+
for camera in per_seq["cameras"].values():
|
39 |
+
for k in ["params"]:
|
40 |
+
camera[k] = np.array(camera[k], np.float32)
|
41 |
+
return dump
|
42 |
+
|
43 |
+
|
44 |
+
class MapillaryDataModule(pl.LightningDataModule):
|
45 |
+
dump_filename = "dump.json"
|
46 |
+
images_archive = "images.tar.gz"
|
47 |
+
images_dirname = "images/"
|
48 |
+
semantic_masks_dirname = "semantic_masks/"
|
49 |
+
flood_dirname = "flood_fill/"
|
50 |
+
|
51 |
+
def __init__(self, cfg: MIADataConfiguration):
|
52 |
+
super().__init__()
|
53 |
+
self.cfg = cfg
|
54 |
+
self.root = self.cfg.data_dir
|
55 |
+
self.local_dir = None
|
56 |
+
|
57 |
+
def prepare_data(self):
|
58 |
+
for scene in self.cfg.scenes:
|
59 |
+
dump_dir = self.root / scene
|
60 |
+
assert (dump_dir / self.dump_filename).exists(), dump_dir
|
61 |
+
# assert (dump_dir / self.cfg.tiles_filename).exists(), dump_dir
|
62 |
+
if self.local_dir is None:
|
63 |
+
assert (dump_dir / self.images_dirname).exists(), dump_dir
|
64 |
+
continue
|
65 |
+
assert (dump_dir / self.semantic_masks_dirname).exists(), dump_dir
|
66 |
+
assert (dump_dir / self.flood_dirname).exists(), dump_dir
|
67 |
+
# Cache the folder of images locally to speed up reading
|
68 |
+
local_dir = self.local_dir / scene
|
69 |
+
if local_dir.exists():
|
70 |
+
shutil.rmtree(local_dir)
|
71 |
+
local_dir.mkdir(exist_ok=True, parents=True)
|
72 |
+
images_archive = dump_dir / self.images_archive
|
73 |
+
logger.info("Extracting the image archive %s.", images_archive)
|
74 |
+
with tarfile.open(images_archive) as fp:
|
75 |
+
fp.extractall(local_dir)
|
76 |
+
|
77 |
+
def setup(self, stage: Optional[str] = None):
|
78 |
+
self.dumps = {}
|
79 |
+
# self.tile_managers = {}
|
80 |
+
self.image_dirs = {}
|
81 |
+
self.seg_masks_dir = {}
|
82 |
+
self.flood_masks_dir = {}
|
83 |
+
names = []
|
84 |
+
|
85 |
+
for scene in self.cfg.scenes:
|
86 |
+
logger.info("Loading scene %s.", scene)
|
87 |
+
dump_dir = self.root / scene
|
88 |
+
|
89 |
+
logger.info("Loading dump json file %s.", self.dump_filename)
|
90 |
+
with (dump_dir / self.dump_filename).open("r") as fp:
|
91 |
+
self.dumps[scene] = pack_dump_dict(json.load(fp))
|
92 |
+
for seq, per_seq in self.dumps[scene].items():
|
93 |
+
for cam_id, cam_dict in per_seq["cameras"].items():
|
94 |
+
if cam_dict["model"] != "PINHOLE":
|
95 |
+
raise ValueError(
|
96 |
+
f"Unsupported camera model: {cam_dict['model']} for {scene},{seq},{cam_id}"
|
97 |
+
)
|
98 |
+
|
99 |
+
self.image_dirs[scene] = (
|
100 |
+
(self.local_dir or self.root) / scene / self.images_dirname
|
101 |
+
)
|
102 |
+
assert self.image_dirs[scene].exists(), self.image_dirs[scene]
|
103 |
+
|
104 |
+
self.seg_masks_dir[scene] = (
|
105 |
+
(self.local_dir or self.root) / scene / self.semantic_masks_dirname
|
106 |
+
)
|
107 |
+
assert self.seg_masks_dir[scene].exists(), self.seg_masks_dir[scene]
|
108 |
+
|
109 |
+
self.flood_masks_dir[scene] = (
|
110 |
+
(self.local_dir or self.root) / scene / self.flood_dirname
|
111 |
+
)
|
112 |
+
assert self.flood_masks_dir[scene].exists(), self.flood_masks_dir[scene]
|
113 |
+
|
114 |
+
images = set(x.split('.')[0] for x in os.listdir(self.image_dirs[scene]))
|
115 |
+
flood_masks = set(x.split('.')[0] for x in os.listdir(self.flood_masks_dir[scene]))
|
116 |
+
semantic_masks = set(x.split('.')[0] for x in os.listdir(self.seg_masks_dir[scene]))
|
117 |
+
|
118 |
+
for seq, data in self.dumps[scene].items():
|
119 |
+
for name in data["views"]:
|
120 |
+
if name in images and name.split("_")[0] in flood_masks and name.split("_")[0] in semantic_masks:
|
121 |
+
names.append((scene, seq, name))
|
122 |
+
|
123 |
+
self.parse_splits(self.cfg.split, names)
|
124 |
+
if self.cfg.filter_for is not None:
|
125 |
+
self.filter_elements()
|
126 |
+
self.pack_data()
|
127 |
+
|
128 |
+
def pack_data(self):
|
129 |
+
# We pack the data into compact tensors that can be shared across processes without copy
|
130 |
+
exclude = {
|
131 |
+
"compass_angle",
|
132 |
+
"compass_accuracy",
|
133 |
+
"gps_accuracy",
|
134 |
+
"chunk_key",
|
135 |
+
"panorama_offset",
|
136 |
+
}
|
137 |
+
cameras = {
|
138 |
+
scene: {seq: per_seq["cameras"] for seq, per_seq in per_scene.items()}
|
139 |
+
for scene, per_scene in self.dumps.items()
|
140 |
+
}
|
141 |
+
points = {
|
142 |
+
scene: {
|
143 |
+
seq: {
|
144 |
+
i: torch.from_numpy(p) for i, p in per_seq.get("points", {}).items()
|
145 |
+
}
|
146 |
+
for seq, per_seq in per_scene.items()
|
147 |
+
}
|
148 |
+
for scene, per_scene in self.dumps.items()
|
149 |
+
}
|
150 |
+
self.data = {}
|
151 |
+
|
152 |
+
# TODO: remove
|
153 |
+
if self.cfg.split == "splits_MGL_13loc.json":
|
154 |
+
# Use Last 20% as Val
|
155 |
+
num_samples_to_move = int(len(self.splits['train']) * 0.2)
|
156 |
+
samples_to_move = self.splits['train'][-num_samples_to_move:]
|
157 |
+
self.splits['val'].extend(samples_to_move)
|
158 |
+
self.splits['train'] = self.splits['train'][:-num_samples_to_move]
|
159 |
+
print(f"Dataset Len: {len(self.splits['train']), len(self.splits['val'])}\n\n\n\n")
|
160 |
+
elif self.cfg.split == "splits_MGL_soma_70k_mappred_random.json":
|
161 |
+
for stage, names in self.splits.items():
|
162 |
+
print("Length of splits {}: ".format(stage), len(self.splits[stage]))
|
163 |
+
for stage, names in self.splits.items():
|
164 |
+
view = self.dumps[names[0][0]][names[0][1]]["views"][names[0][2]]
|
165 |
+
data = {k: [] for k in view.keys() - exclude}
|
166 |
+
for scene, seq, name in names:
|
167 |
+
for k in data:
|
168 |
+
data[k].append(self.dumps[scene][seq]["views"][name].get(k, None))
|
169 |
+
for k in data:
|
170 |
+
v = np.array(data[k])
|
171 |
+
if np.issubdtype(v.dtype, np.integer) or np.issubdtype(
|
172 |
+
v.dtype, np.floating
|
173 |
+
):
|
174 |
+
v = torch.from_numpy(v)
|
175 |
+
data[k] = v
|
176 |
+
data["cameras"] = cameras
|
177 |
+
data["points"] = points
|
178 |
+
self.data[stage] = data
|
179 |
+
self.splits[stage] = np.array(names)
|
180 |
+
|
181 |
+
def filter_elements(self):
|
182 |
+
for stage, names in self.splits.items():
|
183 |
+
names_select = []
|
184 |
+
for scene, seq, name in names:
|
185 |
+
view = self.dumps[scene][seq]["views"][name]
|
186 |
+
if self.cfg.filter_for == "ground_plane":
|
187 |
+
if not (1.0 <= view["height"] <= 3.0):
|
188 |
+
continue
|
189 |
+
planes = self.dumps[scene][seq].get("plane")
|
190 |
+
if planes is not None:
|
191 |
+
inliers = planes[str(view["chunk_id"])][-1]
|
192 |
+
if inliers < 10:
|
193 |
+
continue
|
194 |
+
if self.cfg.filter_by_ground_angle is not None:
|
195 |
+
plane = np.array(view["plane_params"])
|
196 |
+
normal = plane[:3] / np.linalg.norm(plane[:3])
|
197 |
+
angle = np.rad2deg(np.arccos(np.abs(normal[-1])))
|
198 |
+
if angle > self.cfg.filter_by_ground_angle:
|
199 |
+
continue
|
200 |
+
elif self.cfg.filter_for == "pointcloud":
|
201 |
+
if len(view["observations"]) < self.cfg.min_num_points:
|
202 |
+
continue
|
203 |
+
elif self.cfg.filter_for is not None:
|
204 |
+
raise ValueError(f"Unknown filtering: {self.cfg.filter_for}")
|
205 |
+
names_select.append((scene, seq, name))
|
206 |
+
logger.info(
|
207 |
+
"%s: Keep %d/%d images after filtering for %s.",
|
208 |
+
stage,
|
209 |
+
len(names_select),
|
210 |
+
len(names),
|
211 |
+
self.cfg.filter_for,
|
212 |
+
)
|
213 |
+
self.splits[stage] = names_select
|
214 |
+
|
215 |
+
def parse_splits(self, split_arg, names):
|
216 |
+
if split_arg is None:
|
217 |
+
self.splits = {
|
218 |
+
"train": names,
|
219 |
+
"val": names,
|
220 |
+
}
|
221 |
+
elif isinstance(split_arg, int):
|
222 |
+
names = np.random.RandomState(self.cfg.seed).permutation(names).tolist()
|
223 |
+
self.splits = {
|
224 |
+
"train": names[split_arg:],
|
225 |
+
"val": names[:split_arg],
|
226 |
+
}
|
227 |
+
elif isinstance(split_arg, float):
|
228 |
+
names = np.random.RandomState(self.cfg.seed).permutation(names).tolist()
|
229 |
+
self.splits = {
|
230 |
+
"train": names[int(split_arg * len(names)) :],
|
231 |
+
"val": names[: int(split_arg * len(names))],
|
232 |
+
}
|
233 |
+
elif isinstance(split_arg, DictConfig):
|
234 |
+
scenes_val = set(split_arg.val)
|
235 |
+
scenes_train = set(split_arg.train)
|
236 |
+
assert len(scenes_val - set(self.cfg.scenes)) == 0
|
237 |
+
assert len(scenes_train - set(self.cfg.scenes)) == 0
|
238 |
+
self.splits = {
|
239 |
+
"train": [n for n in names if n[0] in scenes_train],
|
240 |
+
"val": [n for n in names if n[0] in scenes_val],
|
241 |
+
}
|
242 |
+
elif isinstance(split_arg, str):
|
243 |
+
|
244 |
+
if "/" in split_arg:
|
245 |
+
split_path = self.root / split_arg
|
246 |
+
else:
|
247 |
+
split_path = Path(split_arg)
|
248 |
+
|
249 |
+
with split_path.open("r") as fp:
|
250 |
+
splits = json.load(fp)
|
251 |
+
splits = {
|
252 |
+
k: {loc: set(ids) for loc, ids in split.items()}
|
253 |
+
for k, split in splits.items()
|
254 |
+
}
|
255 |
+
self.splits = {}
|
256 |
+
|
257 |
+
for k, split in splits.items():
|
258 |
+
self.splits[k] = [
|
259 |
+
n
|
260 |
+
for n in names
|
261 |
+
if n[0] in split and int(n[-1].rsplit("_", 1)[0]) in split[n[0]]
|
262 |
+
]
|
263 |
+
else:
|
264 |
+
raise ValueError(split_arg)
|
265 |
+
|
266 |
+
def dataset(self, stage: str):
|
267 |
+
return MapLocDataset(
|
268 |
+
stage,
|
269 |
+
self.cfg,
|
270 |
+
self.splits[stage],
|
271 |
+
self.data[stage],
|
272 |
+
self.image_dirs,
|
273 |
+
self.seg_masks_dir,
|
274 |
+
self.flood_masks_dir,
|
275 |
+
|
276 |
+
image_ext=".jpg",
|
277 |
+
)
|
278 |
+
|
279 |
+
def sequence_dataset(self, stage: str, **kwargs):
|
280 |
+
keys = self.splits[stage]
|
281 |
+
seq2indices = defaultdict(list)
|
282 |
+
for index, (_, seq, _) in enumerate(keys):
|
283 |
+
seq2indices[seq].append(index)
|
284 |
+
# chunk the sequences to the required length
|
285 |
+
chunk2indices = {}
|
286 |
+
for seq, indices in seq2indices.items():
|
287 |
+
chunks = chunk_sequence(self.data[stage], indices, **kwargs)
|
288 |
+
for i, sub_indices in enumerate(chunks):
|
289 |
+
chunk2indices[seq, i] = sub_indices
|
290 |
+
# store the index of each chunk in its sequence
|
291 |
+
chunk_indices = torch.full((len(keys),), -1)
|
292 |
+
for (_, chunk_index), idx in chunk2indices.items():
|
293 |
+
chunk_indices[idx] = chunk_index
|
294 |
+
self.data[stage]["chunk_index"] = chunk_indices
|
295 |
+
dataset = self.dataset(stage)
|
296 |
+
return dataset, chunk2indices
|
297 |
+
|
298 |
+
def sequence_dataloader(self, stage: str, shuffle: bool = False, **kwargs):
|
299 |
+
dataset, chunk2idx = self.sequence_dataset(stage, **kwargs)
|
300 |
+
chunk_keys = sorted(chunk2idx)
|
301 |
+
if shuffle:
|
302 |
+
perm = torch.randperm(len(chunk_keys))
|
303 |
+
chunk_keys = [chunk_keys[i] for i in perm]
|
304 |
+
key_indices = [i for key in chunk_keys for i in chunk2idx[key]]
|
305 |
+
num_workers = self.cfg.loading[stage]["num_workers"]
|
306 |
+
loader = torchdata.DataLoader(
|
307 |
+
dataset,
|
308 |
+
batch_size=None,
|
309 |
+
sampler=key_indices,
|
310 |
+
num_workers=num_workers,
|
311 |
+
shuffle=False,
|
312 |
+
pin_memory=True,
|
313 |
+
persistent_workers=num_workers > 0,
|
314 |
+
worker_init_fn=worker_init_fn,
|
315 |
+
collate_fn=collate,
|
316 |
+
)
|
317 |
+
return loader, chunk_keys, chunk2idx
|
mapper/data/mapillary/dataset.py
ADDED
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from copy import deepcopy
|
2 |
+
from pathlib import Path
|
3 |
+
from typing import Any, Dict, List
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
import torch.utils.data as torchdata
|
8 |
+
import torchvision.transforms as tvf
|
9 |
+
from PIL import Image
|
10 |
+
from pathlib import Path
|
11 |
+
|
12 |
+
from ...models.utils import deg2rad, rotmat2d
|
13 |
+
from ...utils.io import read_image
|
14 |
+
from ...utils.wrappers import Camera
|
15 |
+
from ..image import pad_image, rectify_image, resize_image
|
16 |
+
from ..utils import decompose_rotmat
|
17 |
+
from ..schema import MIADataConfiguration
|
18 |
+
|
19 |
+
|
20 |
+
class MapLocDataset(torchdata.Dataset):
|
21 |
+
def __init__(
|
22 |
+
self,
|
23 |
+
stage: str,
|
24 |
+
cfg: MIADataConfiguration,
|
25 |
+
names: List[str],
|
26 |
+
data: Dict[str, Any],
|
27 |
+
image_dirs: Dict[str, Path],
|
28 |
+
seg_mask_dirs: Dict[str, Path],
|
29 |
+
flood_masks_dirs: Dict[str, Path],
|
30 |
+
image_ext: str = "",
|
31 |
+
):
|
32 |
+
self.stage = stage
|
33 |
+
self.cfg = deepcopy(cfg)
|
34 |
+
self.data = data
|
35 |
+
self.image_dirs = image_dirs
|
36 |
+
self.seg_mask_dirs = seg_mask_dirs
|
37 |
+
self.flood_masks_dirs = flood_masks_dirs
|
38 |
+
self.names = names
|
39 |
+
self.image_ext = image_ext
|
40 |
+
|
41 |
+
tfs = []
|
42 |
+
self.tfs = tvf.Compose(tfs)
|
43 |
+
self.augmentations = self.get_augmentations()
|
44 |
+
|
45 |
+
def __len__(self):
|
46 |
+
return len(self.names)
|
47 |
+
|
48 |
+
def __getitem__(self, idx):
|
49 |
+
if self.stage == "train" and self.cfg.random:
|
50 |
+
seed = None
|
51 |
+
else:
|
52 |
+
seed = [self.cfg.seed, idx]
|
53 |
+
(seed,) = np.random.SeedSequence(seed).generate_state(1)
|
54 |
+
|
55 |
+
scene, seq, name = self.names[idx]
|
56 |
+
|
57 |
+
view = self.get_view(
|
58 |
+
idx, scene, seq, name, seed
|
59 |
+
)
|
60 |
+
|
61 |
+
return view
|
62 |
+
|
63 |
+
def get_augmentations(self):
|
64 |
+
if self.stage != "train" or not self.cfg.augmentations.enabled:
|
65 |
+
print(f"No Augmentation!", "\n" * 10)
|
66 |
+
self.cfg.augmentations.random_flip = 0.0
|
67 |
+
return tvf.Compose([])
|
68 |
+
|
69 |
+
print(f"Augmentation!", "\n" * 10)
|
70 |
+
augmentations = [
|
71 |
+
tvf.ColorJitter(
|
72 |
+
brightness=self.cfg.augmentations.brightness,
|
73 |
+
contrast=self.cfg.augmentations.contrast,
|
74 |
+
saturation=self.cfg.augmentations.saturation,
|
75 |
+
hue=self.cfg.augmentations.hue,
|
76 |
+
)
|
77 |
+
]
|
78 |
+
|
79 |
+
if self.cfg.augmentations.random_resized_crop:
|
80 |
+
augmentations.append(
|
81 |
+
tvf.RandomResizedCrop(scale=(0.8, 1.0))
|
82 |
+
) # RandomResizedCrop
|
83 |
+
|
84 |
+
if self.cfg.augmentations.gaussian_noise.enabled:
|
85 |
+
augmentations.append(
|
86 |
+
tvf.GaussianNoise(
|
87 |
+
mean=self.cfg.augmentations.gaussian_noise.mean,
|
88 |
+
std=self.cfg.augmentations.gaussian_noise.std,
|
89 |
+
)
|
90 |
+
) # Gaussian noise
|
91 |
+
|
92 |
+
if self.cfg.augmentations.brightness_contrast.enabled:
|
93 |
+
augmentations.append(
|
94 |
+
tvf.ColorJitter(
|
95 |
+
brightness=self.cfg.augmentations.brightness_contrast.brightness_factor,
|
96 |
+
contrast=self.cfg.augmentations.brightness_contrast.contrast_factor,
|
97 |
+
saturation=0, # Keep saturation at 0 for brightness and contrast adjustment
|
98 |
+
hue=0,
|
99 |
+
)
|
100 |
+
) # Brightness and contrast adjustment
|
101 |
+
|
102 |
+
return tvf.Compose(augmentations)
|
103 |
+
|
104 |
+
def random_flip(self, image, cam, valid, seg_mask, flood_mask, conf_mask):
|
105 |
+
if torch.rand(1) < self.cfg.augmentations.random_flip:
|
106 |
+
image = torch.flip(image, [-1])
|
107 |
+
cam = cam.flip()
|
108 |
+
valid = torch.flip(valid, [-1])
|
109 |
+
seg_mask = torch.flip(seg_mask, [1])
|
110 |
+
flood_mask = torch.flip(flood_mask, [-1])
|
111 |
+
conf_mask = torch.flip(conf_mask, [-1])
|
112 |
+
|
113 |
+
return image, cam, valid, seg_mask, flood_mask, conf_mask
|
114 |
+
|
115 |
+
def get_view(self, idx, scene, seq, name, seed):
|
116 |
+
data = {
|
117 |
+
"index": idx,
|
118 |
+
"name": name,
|
119 |
+
"scene": scene,
|
120 |
+
"sequence": seq,
|
121 |
+
}
|
122 |
+
cam_dict = self.data["cameras"][scene][seq][self.data["camera_id"][idx]]
|
123 |
+
cam = Camera.from_dict(cam_dict).float()
|
124 |
+
|
125 |
+
if "roll_pitch_yaw" in self.data:
|
126 |
+
roll, pitch, yaw = self.data["roll_pitch_yaw"][idx].numpy()
|
127 |
+
else:
|
128 |
+
roll, pitch, yaw = decompose_rotmat(
|
129 |
+
self.data["R_c2w"][idx].numpy())
|
130 |
+
|
131 |
+
image = read_image(self.image_dirs[scene] / (name + self.image_ext))
|
132 |
+
image = Image.fromarray(image)
|
133 |
+
image = self.augmentations(image)
|
134 |
+
image = np.array(image)
|
135 |
+
|
136 |
+
if "plane_params" in self.data:
|
137 |
+
# transform the plane parameters from world to camera frames
|
138 |
+
plane_w = self.data["plane_params"][idx]
|
139 |
+
data["ground_plane"] = torch.cat(
|
140 |
+
[rotmat2d(deg2rad(torch.tensor(yaw)))
|
141 |
+
@ plane_w[:2], plane_w[2:]]
|
142 |
+
)
|
143 |
+
|
144 |
+
image, valid, cam, roll, pitch = self.process_image(
|
145 |
+
image, cam, roll, pitch, seed
|
146 |
+
)
|
147 |
+
|
148 |
+
if "chunk_index" in self.data: # TODO: (cherie) do we need this?
|
149 |
+
data["chunk_id"] = (scene, seq, self.data["chunk_index"][idx])
|
150 |
+
|
151 |
+
# Semantic map extraction
|
152 |
+
seg_mask_path = self.seg_mask_dirs[scene] / \
|
153 |
+
(name.split("_")[0] + ".npy")
|
154 |
+
seg_masks_ours = np.load(seg_mask_path)
|
155 |
+
mask_center = (
|
156 |
+
seg_masks_ours.shape[0] // 2, seg_masks_ours.shape[1] // 2)
|
157 |
+
|
158 |
+
seg_masks_ours = seg_masks_ours[mask_center[0] -
|
159 |
+
100:mask_center[0], mask_center[1] - 50: mask_center[1] + 50]
|
160 |
+
|
161 |
+
if self.cfg.num_classes == 6:
|
162 |
+
seg_masks_ours = seg_masks_ours[..., [0, 1, 2, 4, 6, 7]]
|
163 |
+
|
164 |
+
flood_mask_path = self.flood_masks_dirs[scene] / \
|
165 |
+
(name.split("_")[0] + ".npy")
|
166 |
+
flood_mask = np.load(flood_mask_path)
|
167 |
+
|
168 |
+
flood_mask = flood_mask[mask_center[0]-100:mask_center[0],
|
169 |
+
mask_center[1] - 50: mask_center[1] + 50]
|
170 |
+
|
171 |
+
confidence_map = flood_mask.copy()
|
172 |
+
confidence_map = (confidence_map - confidence_map.min()) / \
|
173 |
+
(confidence_map.max() - confidence_map.min() + 1e-6)
|
174 |
+
|
175 |
+
seg_masks_ours = torch.from_numpy(seg_masks_ours).float()
|
176 |
+
flood_mask = torch.from_numpy(flood_mask).float()
|
177 |
+
confidence_map = torch.from_numpy(confidence_map).float()
|
178 |
+
|
179 |
+
# Map Augmentations
|
180 |
+
with torch.random.fork_rng(devices=[]):
|
181 |
+
torch.manual_seed(seed)
|
182 |
+
image, cam, valid, seg_masks_ours, flood_mask, confidence_map = self.random_flip(
|
183 |
+
image, cam, valid, seg_masks_ours, flood_mask, confidence_map)
|
184 |
+
|
185 |
+
return {
|
186 |
+
**data,
|
187 |
+
"image": image,
|
188 |
+
"valid": valid,
|
189 |
+
"camera": cam,
|
190 |
+
"seg_masks": seg_masks_ours,
|
191 |
+
"flood_masks": flood_mask,
|
192 |
+
"roll_pitch_yaw": torch.tensor((roll, pitch, yaw)).float(),
|
193 |
+
"confidence_map": confidence_map
|
194 |
+
# "pixels_per_meter": torch.tensor(canvas.ppm).float(),
|
195 |
+
}
|
196 |
+
|
197 |
+
def process_image(self, image, cam, roll, pitch, seed):
|
198 |
+
image = (
|
199 |
+
torch.from_numpy(np.ascontiguousarray(image))
|
200 |
+
.permute(2, 0, 1)
|
201 |
+
.float()
|
202 |
+
.div_(255)
|
203 |
+
)
|
204 |
+
|
205 |
+
if not self.cfg.gravity_align:
|
206 |
+
# Turn off gravity alignment
|
207 |
+
roll = 0.0
|
208 |
+
pitch = 0.0
|
209 |
+
image, valid = rectify_image(image, cam, roll, pitch)
|
210 |
+
else:
|
211 |
+
image, valid = rectify_image(
|
212 |
+
image, cam, roll, pitch if self.cfg.rectify_pitch else None
|
213 |
+
)
|
214 |
+
roll = 0.0
|
215 |
+
if self.cfg.rectify_pitch:
|
216 |
+
pitch = 0.0
|
217 |
+
|
218 |
+
if self.cfg.target_focal_length is not None:
|
219 |
+
# Resize to a canonical focal length
|
220 |
+
factor = self.cfg.target_focal_length / cam.f.numpy()
|
221 |
+
size = (np.array(image.shape[-2:][::-1]) * factor).astype(int)
|
222 |
+
image, _, cam, valid = resize_image(
|
223 |
+
image, size, camera=cam, valid=valid)
|
224 |
+
size_out = self.cfg.resize_image
|
225 |
+
if size_out is None:
|
226 |
+
# Round the edges up such that they are multiple of a factor
|
227 |
+
stride = self.cfg.pad_to_multiple
|
228 |
+
size_out = (np.ceil((size / stride)) * stride).astype(int)
|
229 |
+
# Crop or pad such that both edges are of the given size
|
230 |
+
image, valid, cam = pad_image(
|
231 |
+
image, size_out, cam, valid, crop_and_center=True
|
232 |
+
)
|
233 |
+
elif self.cfg.resize_image is not None:
|
234 |
+
image, _, cam, valid = resize_image(
|
235 |
+
image, self.cfg.resize_image, fn=max, camera=cam, valid=valid
|
236 |
+
)
|
237 |
+
if self.cfg.pad_to_square:
|
238 |
+
# Pad such that both edges are of the given size
|
239 |
+
image, valid, cam = pad_image(
|
240 |
+
image, self.cfg.resize_image, cam, valid)
|
241 |
+
|
242 |
+
if self.cfg.reduce_fov is not None:
|
243 |
+
h, w = image.shape[-2:]
|
244 |
+
f = float(cam.f[0])
|
245 |
+
fov = np.arctan(w / f / 2)
|
246 |
+
w_new = round(2 * f * np.tan(self.cfg.reduce_fov * fov))
|
247 |
+
image, valid, cam = pad_image(
|
248 |
+
image, (w_new, h), cam, valid, crop_and_center=True
|
249 |
+
)
|
250 |
+
|
251 |
+
with torch.random.fork_rng(devices=[]):
|
252 |
+
torch.manual_seed(seed)
|
253 |
+
image = self.tfs(image)
|
254 |
+
|
255 |
+
return image, valid, cam, roll, pitch
|
mapper/data/module.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Optional
|
2 |
+
from omegaconf import DictConfig
|
3 |
+
import pytorch_lightning as L
|
4 |
+
import torch.utils.data as torchdata
|
5 |
+
from .torch import collate, worker_init_fn
|
6 |
+
|
7 |
+
|
8 |
+
def get_dataset(name):
|
9 |
+
if name == "mapillary":
|
10 |
+
from .mapillary.data_module import MapillaryDataModule
|
11 |
+
return MapillaryDataModule
|
12 |
+
elif name == "nuscenes":
|
13 |
+
from .nuscenes.data_module import NuScenesData
|
14 |
+
return NuScenesData
|
15 |
+
elif name == "kitti":
|
16 |
+
from .kitti.data_module import BEVKitti360Data
|
17 |
+
return BEVKitti360Data
|
18 |
+
else:
|
19 |
+
raise NotImplementedError(f"Dataset {name} not implemented.")
|
20 |
+
|
21 |
+
|
22 |
+
class GenericDataModule(L.LightningDataModule):
|
23 |
+
def __init__(self, cfg: DictConfig):
|
24 |
+
super().__init__()
|
25 |
+
self.cfg = cfg
|
26 |
+
self.data_module = get_dataset(cfg.name)(cfg)
|
27 |
+
|
28 |
+
def prepare_data(self) -> None:
|
29 |
+
self.data_module.prepare_data()
|
30 |
+
|
31 |
+
def setup(self, stage: Optional[str] = None):
|
32 |
+
self.data_module.setup(stage)
|
33 |
+
|
34 |
+
def dataloader(
|
35 |
+
self,
|
36 |
+
stage: str,
|
37 |
+
shuffle: bool = False,
|
38 |
+
num_workers: int = None,
|
39 |
+
sampler: Optional[torchdata.Sampler] = None,
|
40 |
+
):
|
41 |
+
dataset = self.data_module.dataset(stage)
|
42 |
+
cfg = self.cfg["loading"][stage]
|
43 |
+
num_workers = cfg["num_workers"] if num_workers is None else num_workers
|
44 |
+
loader = torchdata.DataLoader(
|
45 |
+
dataset,
|
46 |
+
batch_size=cfg["batch_size"],
|
47 |
+
num_workers=num_workers,
|
48 |
+
shuffle=shuffle or (stage == "train"),
|
49 |
+
pin_memory=True,
|
50 |
+
persistent_workers=num_workers > 0,
|
51 |
+
worker_init_fn=worker_init_fn,
|
52 |
+
collate_fn=collate,
|
53 |
+
sampler=sampler,
|
54 |
+
)
|
55 |
+
return loader
|
56 |
+
|
57 |
+
def train_dataloader(self, **kwargs):
|
58 |
+
return self.dataloader("train", **kwargs)
|
59 |
+
|
60 |
+
def val_dataloader(self, **kwargs):
|
61 |
+
return self.dataloader("val", **kwargs)
|
62 |
+
|
63 |
+
def test_dataloader(self, **kwargs):
|
64 |
+
return self.dataloader("test", **kwargs)
|
mapper/data/nuscenes/data_module.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ..base import DataBase
|
2 |
+
from .dataset import NuScenesDataset
|
3 |
+
from ..schema import NuScenesDataConfiguration
|
4 |
+
|
5 |
+
class NuScenesData(DataBase):
|
6 |
+
def __init__(self, cfg: NuScenesDataConfiguration):
|
7 |
+
self.cfg = cfg
|
8 |
+
self._dataset = {}
|
9 |
+
|
10 |
+
def prepare_data(self):
|
11 |
+
pass
|
12 |
+
|
13 |
+
def setup(self, stage):
|
14 |
+
if stage is None:
|
15 |
+
stage = 'fit'
|
16 |
+
|
17 |
+
split = {
|
18 |
+
'fit': 'train',
|
19 |
+
'val': 'val',
|
20 |
+
'validate': 'val',
|
21 |
+
'test': 'test'
|
22 |
+
}[stage]
|
23 |
+
|
24 |
+
self._dataset[split] = NuScenesDataset(
|
25 |
+
split=split,
|
26 |
+
cfg=self.cfg
|
27 |
+
)
|
28 |
+
|
29 |
+
def dataset(self, stage):
|
30 |
+
if self._dataset.get(stage) is None:
|
31 |
+
self.setup(stage)
|
32 |
+
|
33 |
+
return self._dataset[stage]
|
mapper/data/nuscenes/dataset.py
ADDED
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
3 |
+
import numpy as np
|
4 |
+
from pyquaternion import Quaternion
|
5 |
+
from nuscenes.nuscenes import NuScenes
|
6 |
+
from itertools import chain
|
7 |
+
from PIL import Image
|
8 |
+
from torchvision import transforms as T
|
9 |
+
import torchvision.transforms as tvf
|
10 |
+
from torchvision.transforms.functional import to_tensor
|
11 |
+
|
12 |
+
from .splits_roddick import create_splits_scenes_roddick
|
13 |
+
from ..image import pad_image, rectify_image, resize_image
|
14 |
+
from .utils import decode_binary_labels
|
15 |
+
from ..utils import decompose_rotmat
|
16 |
+
from ...utils.io import read_image
|
17 |
+
from ...utils.wrappers import Camera
|
18 |
+
from ..schema import NuScenesDataConfiguration
|
19 |
+
|
20 |
+
|
21 |
+
class NuScenesDataset(torch.utils.data.Dataset):
|
22 |
+
def __init__(self, cfg: NuScenesDataConfiguration, split="train"):
|
23 |
+
|
24 |
+
self.cfg = cfg
|
25 |
+
self.nusc = NuScenes(version=cfg.version, dataroot=str(cfg.data_dir))
|
26 |
+
self.map_data_root = cfg.map_dir
|
27 |
+
self.split = split
|
28 |
+
|
29 |
+
self.scenes = create_splits_scenes_roddick() # custom based on Roddick et al.
|
30 |
+
|
31 |
+
scene_split = {
|
32 |
+
'v1.0-trainval': {'train': 'train', 'val': 'val', 'test': 'val'},
|
33 |
+
'v1.0-mini': {'train': 'mini_train', 'val': 'mini_val'},
|
34 |
+
}[cfg.version][split]
|
35 |
+
self.scenes = self.scenes[scene_split]
|
36 |
+
self.sample = list(filter(lambda sample: self.nusc.get(
|
37 |
+
'scene', sample['scene_token'])['name'] in self.scenes, self.nusc.sample))
|
38 |
+
|
39 |
+
self.tfs = self.get_augmentations() if split == "train" else T.Compose([])
|
40 |
+
|
41 |
+
data_tokens = []
|
42 |
+
for sample in self.sample:
|
43 |
+
data_token = sample['data']
|
44 |
+
data_token = [v for k,v in data_token.items() if k == "CAM_FRONT"]
|
45 |
+
|
46 |
+
data_tokens.append(data_token)
|
47 |
+
|
48 |
+
data_tokens = list(chain.from_iterable(data_tokens))
|
49 |
+
data = [self.nusc.get('sample_data', token) for token in data_tokens]
|
50 |
+
|
51 |
+
self.data = []
|
52 |
+
for d in data:
|
53 |
+
sample = self.nusc.get('sample', d['sample_token'])
|
54 |
+
scene = self.nusc.get('scene', sample['scene_token'])
|
55 |
+
location = self.nusc.get('log', scene['log_token'])['location']
|
56 |
+
|
57 |
+
file_name = d['filename']
|
58 |
+
ego_pose = self.nusc.get('ego_pose', d['ego_pose_token'])
|
59 |
+
calibrated_sensor = self.nusc.get(
|
60 |
+
"calibrated_sensor", d['calibrated_sensor_token'])
|
61 |
+
|
62 |
+
ego2global = np.eye(4).astype(np.float32)
|
63 |
+
ego2global[:3, :3] = Quaternion(ego_pose['rotation']).rotation_matrix
|
64 |
+
ego2global[:3, 3] = ego_pose['translation']
|
65 |
+
|
66 |
+
sensor2ego = np.eye(4).astype(np.float32)
|
67 |
+
sensor2ego[:3, :3] = Quaternion(
|
68 |
+
calibrated_sensor['rotation']).rotation_matrix
|
69 |
+
sensor2ego[:3, 3] = calibrated_sensor['translation']
|
70 |
+
|
71 |
+
sensor2global = ego2global @ sensor2ego
|
72 |
+
|
73 |
+
rotation = sensor2global[:3, :3]
|
74 |
+
roll, pitch, yaw = decompose_rotmat(rotation)
|
75 |
+
|
76 |
+
fx = calibrated_sensor['camera_intrinsic'][0][0]
|
77 |
+
fy = calibrated_sensor['camera_intrinsic'][1][1]
|
78 |
+
cx = calibrated_sensor['camera_intrinsic'][0][2]
|
79 |
+
cy = calibrated_sensor['camera_intrinsic'][1][2]
|
80 |
+
width = d['width']
|
81 |
+
height = d['height']
|
82 |
+
|
83 |
+
cam = Camera(torch.tensor(
|
84 |
+
[width, height, fx, fy, cx - 0.5, cy - 0.5])).float()
|
85 |
+
self.data.append({
|
86 |
+
'filename': file_name,
|
87 |
+
'yaw': yaw,
|
88 |
+
'pitch': pitch,
|
89 |
+
'roll': roll,
|
90 |
+
'cam': cam,
|
91 |
+
'sensor2global': sensor2global,
|
92 |
+
'token': d['token'],
|
93 |
+
'sample_token': d['sample_token'],
|
94 |
+
'location': location
|
95 |
+
})
|
96 |
+
|
97 |
+
if self.cfg.percentage < 1.0 and split == "train":
|
98 |
+
self.data = self.data[:int(len(self.data) * self.cfg.percentage)]
|
99 |
+
|
100 |
+
def get_augmentations(self):
|
101 |
+
|
102 |
+
print(f"Augmentation!", "\n" * 10)
|
103 |
+
augmentations = [
|
104 |
+
tvf.ColorJitter(
|
105 |
+
brightness=self.cfg.augmentations.brightness,
|
106 |
+
contrast=self.cfg.augmentations.contrast,
|
107 |
+
saturation=self.cfg.augmentations.saturation,
|
108 |
+
hue=self.cfg.augmentations.hue,
|
109 |
+
)
|
110 |
+
]
|
111 |
+
|
112 |
+
if self.cfg.augmentations.random_resized_crop:
|
113 |
+
augmentations.append(
|
114 |
+
tvf.RandomResizedCrop(scale=(0.8, 1.0))
|
115 |
+
) # RandomResizedCrop
|
116 |
+
|
117 |
+
if self.cfg.augmentations.gaussian_noise.enabled:
|
118 |
+
augmentations.append(
|
119 |
+
tvf.GaussianNoise(
|
120 |
+
mean=self.cfg.augmentations.gaussian_noise.mean,
|
121 |
+
std=self.cfg.augmentations.gaussian_noise.std,
|
122 |
+
)
|
123 |
+
) # Gaussian noise
|
124 |
+
|
125 |
+
if self.cfg.augmentations.brightness_contrast.enabled:
|
126 |
+
augmentations.append(
|
127 |
+
tvf.ColorJitter(
|
128 |
+
brightness=self.cfg.augmentations.brightness_contrast.brightness_factor,
|
129 |
+
contrast=self.cfg.augmentations.brightness_contrast.contrast_factor,
|
130 |
+
saturation=0, # Keep saturation at 0 for brightness and contrast adjustment
|
131 |
+
hue=0,
|
132 |
+
)
|
133 |
+
) # Brightness and contrast adjustment
|
134 |
+
|
135 |
+
return tvf.Compose(augmentations)
|
136 |
+
|
137 |
+
def __len__(self):
|
138 |
+
return len(self.data)
|
139 |
+
|
140 |
+
def __getitem__(self, idx):
|
141 |
+
d = self.data[idx]
|
142 |
+
|
143 |
+
image = read_image(os.path.join(self.nusc.dataroot, d['filename']))
|
144 |
+
image = np.array(image)
|
145 |
+
cam = d['cam']
|
146 |
+
roll = d['roll']
|
147 |
+
pitch = d['pitch']
|
148 |
+
yaw = d['yaw']
|
149 |
+
|
150 |
+
with Image.open(self.map_data_root / f"{d['token']}.png") as semantic_image:
|
151 |
+
semantic_mask = to_tensor(semantic_image)
|
152 |
+
|
153 |
+
semantic_mask = decode_binary_labels(semantic_mask, self.cfg.num_classes + 1)
|
154 |
+
semantic_mask = torch.nn.functional.max_pool2d(semantic_mask.float(), (2, 2), stride=2) # 2 times downsample
|
155 |
+
semantic_mask = semantic_mask.permute(1, 2, 0)
|
156 |
+
semantic_mask = torch.flip(semantic_mask, [0])
|
157 |
+
|
158 |
+
visibility_mask = semantic_mask[..., -1]
|
159 |
+
semantic_mask = semantic_mask[..., :-1]
|
160 |
+
|
161 |
+
if self.cfg.class_mapping is not None:
|
162 |
+
semantic_mask = semantic_mask[..., self.cfg.class_mapping]
|
163 |
+
|
164 |
+
image = (
|
165 |
+
torch.from_numpy(np.ascontiguousarray(image))
|
166 |
+
.permute(2, 0, 1)
|
167 |
+
.float()
|
168 |
+
.div_(255)
|
169 |
+
)
|
170 |
+
|
171 |
+
if not self.cfg.gravity_align:
|
172 |
+
# Turn off gravity alignment
|
173 |
+
roll = 0.0
|
174 |
+
pitch = 0.0
|
175 |
+
image, valid = rectify_image(image, cam, roll, pitch)
|
176 |
+
|
177 |
+
else:
|
178 |
+
image, valid = rectify_image(
|
179 |
+
image, cam, roll, pitch if self.cfg.rectify_pitch else None
|
180 |
+
)
|
181 |
+
roll = 0.0
|
182 |
+
if self.cfg.rectify_pitch:
|
183 |
+
pitch = 0.0
|
184 |
+
if self.cfg.resize_image is not None:
|
185 |
+
image, _, cam, valid = resize_image(
|
186 |
+
image, self.cfg.resize_image, fn=max, camera=cam, valid=valid
|
187 |
+
)
|
188 |
+
if self.cfg.pad_to_square:
|
189 |
+
image, valid, cam = pad_image(image, self.cfg.resize_image, cam, valid)
|
190 |
+
image = self.tfs(image)
|
191 |
+
|
192 |
+
confidence_map = visibility_mask.clone().float()
|
193 |
+
confidence_map = (confidence_map - confidence_map.min()) / (confidence_map.max() - confidence_map.min())
|
194 |
+
|
195 |
+
return {
|
196 |
+
"image": image,
|
197 |
+
"roll_pitch_yaw": torch.tensor([roll, pitch, yaw]).float(),
|
198 |
+
"camera": cam,
|
199 |
+
"valid": valid,
|
200 |
+
"seg_masks": semantic_mask.float(),
|
201 |
+
"token": d['token'],
|
202 |
+
"sample_token": d['sample_token'],
|
203 |
+
'location': d['location'],
|
204 |
+
'flood_masks': visibility_mask.float(),
|
205 |
+
"confidence_map": confidence_map,
|
206 |
+
'name': d['sample_token']
|
207 |
+
}
|
mapper/data/nuscenes/splits_roddick.py
ADDED
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def create_splits_scenes_roddick():
|
2 |
+
train_roddick_scenes = [
|
3 |
+
"scene-0002", "scene-0003", "scene-0004", "scene-0005", "scene-0006",
|
4 |
+
"scene-0007", "scene-0008", "scene-0009", "scene-0012", "scene-0013",
|
5 |
+
"scene-0014", "scene-0015", "scene-0016", "scene-0017", "scene-0018",
|
6 |
+
"scene-0019", "scene-0021", "scene-0022", "scene-0023", "scene-0024",
|
7 |
+
"scene-0025", "scene-0026", "scene-0027", "scene-0028", "scene-0029",
|
8 |
+
"scene-0030", "scene-0031", "scene-0032", "scene-0033", "scene-0034",
|
9 |
+
"scene-0035", "scene-0036", "scene-0039", "scene-0042", "scene-0043",
|
10 |
+
"scene-0044", "scene-0045", "scene-0046", "scene-0047", "scene-0048",
|
11 |
+
"scene-0049", "scene-0050", "scene-0051", "scene-0052", "scene-0055",
|
12 |
+
"scene-0056", "scene-0057", "scene-0058", "scene-0059", "scene-0060",
|
13 |
+
"scene-0061", "scene-0062", "scene-0063", "scene-0064", "scene-0065",
|
14 |
+
"scene-0066", "scene-0067", "scene-0068", "scene-0069", "scene-0070",
|
15 |
+
"scene-0071", "scene-0072", "scene-0073", "scene-0074", "scene-0075",
|
16 |
+
"scene-0076", "scene-0092", "scene-0093", "scene-0094", "scene-0095",
|
17 |
+
"scene-0096", "scene-0097", "scene-0098", "scene-0099", "scene-0100",
|
18 |
+
"scene-0101", "scene-0102", "scene-0103", "scene-0104", "scene-0105",
|
19 |
+
"scene-0106", "scene-0107", "scene-0108", "scene-0109", "scene-0110",
|
20 |
+
"scene-0120", "scene-0123", "scene-0124", "scene-0125", "scene-0126",
|
21 |
+
"scene-0127", "scene-0128", "scene-0129", "scene-0130", "scene-0131",
|
22 |
+
"scene-0132", "scene-0133", "scene-0134", "scene-0135", "scene-0138",
|
23 |
+
"scene-0149", "scene-0150", "scene-0151", "scene-0154", "scene-0155",
|
24 |
+
"scene-0157", "scene-0158", "scene-0159", "scene-0161", "scene-0162",
|
25 |
+
"scene-0163", "scene-0164", "scene-0165", "scene-0166", "scene-0167",
|
26 |
+
"scene-0168", "scene-0170", "scene-0171", "scene-0172", "scene-0173",
|
27 |
+
"scene-0174", "scene-0175", "scene-0176", "scene-0177", "scene-0178",
|
28 |
+
"scene-0179", "scene-0180", "scene-0181", "scene-0182", "scene-0183",
|
29 |
+
"scene-0185", "scene-0187", "scene-0188", "scene-0190", "scene-0191",
|
30 |
+
"scene-0192", "scene-0193", "scene-0194", "scene-0195", "scene-0196",
|
31 |
+
"scene-0199", "scene-0200", "scene-0202", "scene-0203", "scene-0204",
|
32 |
+
"scene-0206", "scene-0207", "scene-0208", "scene-0209", "scene-0210",
|
33 |
+
"scene-0211", "scene-0212", "scene-0213", "scene-0214", "scene-0218",
|
34 |
+
"scene-0219", "scene-0220", "scene-0221", "scene-0222", "scene-0224",
|
35 |
+
"scene-0225", "scene-0226", "scene-0227", "scene-0228", "scene-0229",
|
36 |
+
"scene-0230", "scene-0231", "scene-0232", "scene-0233", "scene-0234",
|
37 |
+
"scene-0235", "scene-0236", "scene-0237", "scene-0238", "scene-0239",
|
38 |
+
"scene-0240", "scene-0241", "scene-0242", "scene-0243", "scene-0244",
|
39 |
+
"scene-0245", "scene-0246", "scene-0247", "scene-0248", "scene-0249",
|
40 |
+
"scene-0250", "scene-0251", "scene-0252", "scene-0253", "scene-0254",
|
41 |
+
"scene-0255", "scene-0256", "scene-0257", "scene-0258", "scene-0259",
|
42 |
+
"scene-0260", "scene-0261", "scene-0262", "scene-0263", "scene-0264",
|
43 |
+
"scene-0268", "scene-0270", "scene-0271", "scene-0272", "scene-0273",
|
44 |
+
"scene-0274", "scene-0275", "scene-0276", "scene-0277", "scene-0278",
|
45 |
+
"scene-0283", "scene-0284", "scene-0285", "scene-0286", "scene-0287",
|
46 |
+
"scene-0288", "scene-0289", "scene-0290", "scene-0291", "scene-0292",
|
47 |
+
"scene-0293", "scene-0294", "scene-0295", "scene-0296", "scene-0297",
|
48 |
+
"scene-0298", "scene-0299", "scene-0300", "scene-0301", "scene-0302",
|
49 |
+
"scene-0303", "scene-0304", "scene-0305", "scene-0306", "scene-0315",
|
50 |
+
"scene-0316", "scene-0317", "scene-0318", "scene-0321", "scene-0323",
|
51 |
+
"scene-0324", "scene-0328", "scene-0329", "scene-0330", "scene-0331",
|
52 |
+
"scene-0332", "scene-0344", "scene-0345", "scene-0346", "scene-0349",
|
53 |
+
"scene-0350", "scene-0351", "scene-0352", "scene-0353", "scene-0354",
|
54 |
+
"scene-0355", "scene-0356", "scene-0357", "scene-0358", "scene-0359",
|
55 |
+
"scene-0360", "scene-0361", "scene-0362", "scene-0363", "scene-0364",
|
56 |
+
"scene-0365", "scene-0367", "scene-0370", "scene-0371", "scene-0372",
|
57 |
+
"scene-0373", "scene-0374", "scene-0375", "scene-0376", "scene-0377",
|
58 |
+
"scene-0379", "scene-0380", "scene-0381", "scene-0382", "scene-0383",
|
59 |
+
"scene-0384", "scene-0385", "scene-0386", "scene-0388", "scene-0399",
|
60 |
+
"scene-0400", "scene-0401", "scene-0402", "scene-0403", "scene-0405",
|
61 |
+
"scene-0406", "scene-0407", "scene-0408", "scene-0420", "scene-0421",
|
62 |
+
"scene-0422", "scene-0423", "scene-0424", "scene-0425", "scene-0426",
|
63 |
+
"scene-0427", "scene-0428", "scene-0429", "scene-0430", "scene-0431",
|
64 |
+
"scene-0432", "scene-0433", "scene-0434", "scene-0435", "scene-0436",
|
65 |
+
"scene-0437", "scene-0438", "scene-0439", "scene-0440", "scene-0441",
|
66 |
+
"scene-0442", "scene-0443", "scene-0444", "scene-0445", "scene-0446",
|
67 |
+
"scene-0447", "scene-0448", "scene-0449", "scene-0450", "scene-0451",
|
68 |
+
"scene-0452", "scene-0453", "scene-0454", "scene-0455", "scene-0456",
|
69 |
+
"scene-0457", "scene-0458", "scene-0459", "scene-0461", "scene-0462",
|
70 |
+
"scene-0463", "scene-0464", "scene-0465", "scene-0467", "scene-0468",
|
71 |
+
"scene-0469", "scene-0471", "scene-0472", "scene-0474", "scene-0475",
|
72 |
+
"scene-0476", "scene-0477", "scene-0478", "scene-0479", "scene-0480",
|
73 |
+
"scene-0499", "scene-0500", "scene-0501", "scene-0502", "scene-0504",
|
74 |
+
"scene-0505", "scene-0506", "scene-0507", "scene-0508", "scene-0509",
|
75 |
+
"scene-0510", "scene-0511", "scene-0512", "scene-0513", "scene-0514",
|
76 |
+
"scene-0515", "scene-0517", "scene-0518", "scene-0519", "scene-0520",
|
77 |
+
"scene-0521", "scene-0522", "scene-0523", "scene-0524", "scene-0552",
|
78 |
+
"scene-0553", "scene-0554", "scene-0555", "scene-0559", "scene-0560",
|
79 |
+
"scene-0561", "scene-0562", "scene-0563", "scene-0564", "scene-0565",
|
80 |
+
"scene-0584", "scene-0585", "scene-0586", "scene-0587", "scene-0588",
|
81 |
+
"scene-0589", "scene-0590", "scene-0591", "scene-0592", "scene-0593",
|
82 |
+
"scene-0594", "scene-0595", "scene-0596", "scene-0597", "scene-0598",
|
83 |
+
"scene-0599", "scene-0600", "scene-0625", "scene-0626", "scene-0627",
|
84 |
+
"scene-0629", "scene-0630", "scene-0632", "scene-0633", "scene-0634",
|
85 |
+
"scene-0635", "scene-0636", "scene-0637", "scene-0638", "scene-0639",
|
86 |
+
"scene-0640", "scene-0652", "scene-0653", "scene-0654", "scene-0655",
|
87 |
+
"scene-0656", "scene-0657", "scene-0658", "scene-0659", "scene-0660",
|
88 |
+
"scene-0661", "scene-0662", "scene-0663", "scene-0664", "scene-0665",
|
89 |
+
"scene-0666", "scene-0667", "scene-0668", "scene-0669", "scene-0670",
|
90 |
+
"scene-0671", "scene-0672", "scene-0673", "scene-0674", "scene-0675",
|
91 |
+
"scene-0676", "scene-0677", "scene-0678", "scene-0679", "scene-0681",
|
92 |
+
"scene-0683", "scene-0684", "scene-0685", "scene-0686", "scene-0687",
|
93 |
+
"scene-0688", "scene-0689", "scene-0695", "scene-0696", "scene-0697",
|
94 |
+
"scene-0698", "scene-0700", "scene-0701", "scene-0703", "scene-0704",
|
95 |
+
"scene-0705", "scene-0706", "scene-0707", "scene-0708", "scene-0709",
|
96 |
+
"scene-0710", "scene-0711", "scene-0712", "scene-0713", "scene-0714",
|
97 |
+
"scene-0715", "scene-0716", "scene-0717", "scene-0718", "scene-0719",
|
98 |
+
"scene-0726", "scene-0727", "scene-0728", "scene-0730", "scene-0731",
|
99 |
+
"scene-0733", "scene-0734", "scene-0735", "scene-0736", "scene-0737",
|
100 |
+
"scene-0738", "scene-0780", "scene-0781", "scene-0782", "scene-0783",
|
101 |
+
"scene-0784", "scene-0786", "scene-0787", "scene-0789", "scene-0790",
|
102 |
+
"scene-0791", "scene-0792", "scene-0802", "scene-0806", "scene-0808",
|
103 |
+
"scene-0809", "scene-0810", "scene-0811", "scene-0812", "scene-0813",
|
104 |
+
"scene-0815", "scene-0816", "scene-0817", "scene-0819", "scene-0820",
|
105 |
+
"scene-0821", "scene-0822", "scene-0847", "scene-0848", "scene-0849",
|
106 |
+
"scene-0850", "scene-0851", "scene-0852", "scene-0853", "scene-0854",
|
107 |
+
"scene-0855", "scene-0856", "scene-0858", "scene-0860", "scene-0861",
|
108 |
+
"scene-0862", "scene-0863", "scene-0864", "scene-0865", "scene-0866",
|
109 |
+
"scene-0868", "scene-0869", "scene-0870", "scene-0871", "scene-0872",
|
110 |
+
"scene-0873", "scene-0875", "scene-0876", "scene-0877", "scene-0878",
|
111 |
+
"scene-0880", "scene-0882", "scene-0883", "scene-0884", "scene-0885",
|
112 |
+
"scene-0886", "scene-0887", "scene-0888", "scene-0889", "scene-0890",
|
113 |
+
"scene-0891", "scene-0892", "scene-0893", "scene-0894", "scene-0895",
|
114 |
+
"scene-0896", "scene-0897", "scene-0898", "scene-0899", "scene-0900",
|
115 |
+
"scene-0901", "scene-0902", "scene-0903", "scene-0904", "scene-0905",
|
116 |
+
"scene-0906", "scene-0907", "scene-0908", "scene-0909", "scene-0916",
|
117 |
+
"scene-0917", "scene-0921", "scene-0922", "scene-0923", "scene-0925",
|
118 |
+
"scene-0926", "scene-0927", "scene-0928", "scene-0929", "scene-0930",
|
119 |
+
"scene-0931", "scene-0945", "scene-0947", "scene-0949", "scene-0952",
|
120 |
+
"scene-0953", "scene-0955", "scene-0956", "scene-0957", "scene-0958",
|
121 |
+
"scene-0959", "scene-0960", "scene-0961", "scene-0966", "scene-0967",
|
122 |
+
"scene-0968", "scene-0969", "scene-0971", "scene-0972", "scene-0975",
|
123 |
+
"scene-0976", "scene-0977", "scene-0978", "scene-0979", "scene-0980",
|
124 |
+
"scene-0981", "scene-0982", "scene-0983", "scene-0984", "scene-0988",
|
125 |
+
"scene-0989", "scene-0990", "scene-0991", "scene-0992", "scene-0994",
|
126 |
+
"scene-0995", "scene-0996", "scene-0997", "scene-0998", "scene-0999",
|
127 |
+
"scene-1000", "scene-1001", "scene-1004", "scene-1005", "scene-1006",
|
128 |
+
"scene-1007", "scene-1008", "scene-1009", "scene-1010", "scene-1011",
|
129 |
+
"scene-1012", "scene-1013", "scene-1014", "scene-1015", "scene-1019",
|
130 |
+
"scene-1020", "scene-1021", "scene-1022", "scene-1023", "scene-1024",
|
131 |
+
"scene-1025", "scene-1044", "scene-1045", "scene-1046", "scene-1047",
|
132 |
+
"scene-1048", "scene-1049", "scene-1050", "scene-1051", "scene-1052",
|
133 |
+
"scene-1053", "scene-1054", "scene-1064", "scene-1065", "scene-1066",
|
134 |
+
"scene-1067", "scene-1068", "scene-1069", "scene-1070", "scene-1071",
|
135 |
+
"scene-1072", "scene-1073", "scene-1074", "scene-1075", "scene-1076",
|
136 |
+
"scene-1077", "scene-1078", "scene-1079", "scene-1080", "scene-1081",
|
137 |
+
"scene-1082", "scene-1083", "scene-1084", "scene-1085", "scene-1086",
|
138 |
+
"scene-1087", "scene-1088", "scene-1089", "scene-1090", "scene-1091",
|
139 |
+
"scene-1092", "scene-1093", "scene-1094", "scene-1095", "scene-1096",
|
140 |
+
"scene-1097", "scene-1098", "scene-1099", "scene-1100", "scene-1101",
|
141 |
+
"scene-1102", "scene-1104", "scene-1105", "scene-1106", "scene-1107",
|
142 |
+
"scene-1108", "scene-1109", "scene-1110"]
|
143 |
+
|
144 |
+
val_roddick_scenes = [
|
145 |
+
"scene-0001", "scene-0010", "scene-0011", "scene-0020", "scene-0038",
|
146 |
+
"scene-0041", "scene-0053", "scene-0054", "scene-0121", "scene-0122",
|
147 |
+
"scene-0139", "scene-0152", "scene-0160", "scene-0184", "scene-0269",
|
148 |
+
"scene-0347", "scene-0348", "scene-0366", "scene-0368", "scene-0369",
|
149 |
+
"scene-0378", "scene-0389", "scene-0390", "scene-0391", "scene-0392",
|
150 |
+
"scene-0393", "scene-0394", "scene-0395", "scene-0396", "scene-0397",
|
151 |
+
"scene-0398", "scene-0411", "scene-0412", "scene-0413", "scene-0414",
|
152 |
+
"scene-0415", "scene-0416", "scene-0417", "scene-0418", "scene-0419",
|
153 |
+
"scene-0525", "scene-0526", "scene-0527", "scene-0528", "scene-0529",
|
154 |
+
"scene-0530", "scene-0531", "scene-0532", "scene-0533", "scene-0534",
|
155 |
+
"scene-0535", "scene-0536", "scene-0537", "scene-0538", "scene-0539",
|
156 |
+
"scene-0541", "scene-0542", "scene-0543", "scene-0544", "scene-0545",
|
157 |
+
"scene-0546", "scene-0556", "scene-0557", "scene-0558", "scene-0566",
|
158 |
+
"scene-0568", "scene-0570", "scene-0571", "scene-0572", "scene-0573",
|
159 |
+
"scene-0574", "scene-0575", "scene-0576", "scene-0577", "scene-0578",
|
160 |
+
"scene-0580", "scene-0582", "scene-0583", "scene-0642", "scene-0643",
|
161 |
+
"scene-0644", "scene-0645", "scene-0646", "scene-0647", "scene-0648",
|
162 |
+
"scene-0649", "scene-0650", "scene-0651", "scene-0739", "scene-0740",
|
163 |
+
"scene-0741", "scene-0744", "scene-0746", "scene-0747", "scene-0749",
|
164 |
+
"scene-0750", "scene-0751", "scene-0752", "scene-0757", "scene-0758",
|
165 |
+
"scene-0759", "scene-0760", "scene-0761", "scene-0762", "scene-0763",
|
166 |
+
"scene-0764", "scene-0765", "scene-0767", "scene-0768", "scene-0769",
|
167 |
+
"scene-0770", "scene-0771", "scene-0775", "scene-0777", "scene-0778",
|
168 |
+
"scene-0794", "scene-0795", "scene-0796", "scene-0797", "scene-0798",
|
169 |
+
"scene-0799", "scene-0800", "scene-0803", "scene-0804", "scene-0911",
|
170 |
+
"scene-0912", "scene-0913", "scene-0914", "scene-0915", "scene-0919",
|
171 |
+
"scene-0920", "scene-0924", "scene-0962", "scene-0963", "scene-1002",
|
172 |
+
"scene-1003", "scene-1016", "scene-1017", "scene-1018", "scene-1055",
|
173 |
+
"scene-1056", "scene-1057", "scene-1058", "scene-1059", "scene-1060",
|
174 |
+
"scene-1061", "scene-1062", "scene-1063"]
|
175 |
+
|
176 |
+
|
177 |
+
calibration_roddick_scenes = [
|
178 |
+
"scene-0852", "scene-0429", "scene-0956", "scene-0194", "scene-0811",
|
179 |
+
"scene-1110", "scene-1107", "scene-0294", "scene-0900", "scene-0596",
|
180 |
+
"scene-0296", "scene-0885", "scene-0866", "scene-0105", "scene-0782",
|
181 |
+
"scene-0191", "scene-0876", "scene-0133", "scene-0231", "scene-0847",
|
182 |
+
"scene-0363", "scene-0026", "scene-0791", "scene-0909", "scene-0002",
|
183 |
+
"scene-0283", "scene-0007", "scene-0251", "scene-1100", "scene-0668",
|
184 |
+
"scene-0584", "scene-0287", "scene-0260", "scene-0171", "scene-0789",
|
185 |
+
"scene-0108", "scene-0190", "scene-0206", "scene-0635", "scene-0815",
|
186 |
+
"scene-0058", "scene-0710", "scene-0302", "scene-0639", "scene-0166",
|
187 |
+
"scene-0094", "scene-0735", "scene-0321", "scene-1091", "scene-0344"
|
188 |
+
]
|
189 |
+
|
190 |
+
|
191 |
+
scenes_dict = {
|
192 |
+
"train": train_roddick_scenes,
|
193 |
+
"val": val_roddick_scenes,
|
194 |
+
"calibration": calibration_roddick_scenes
|
195 |
+
}
|
196 |
+
|
197 |
+
return scenes_dict
|
mapper/data/nuscenes/utils.py
ADDED
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import numpy as np
|
3 |
+
from shapely import geometry, affinity
|
4 |
+
from pyquaternion import Quaternion
|
5 |
+
import cv2
|
6 |
+
|
7 |
+
from nuscenes.eval.detection.utils import category_to_detection_name
|
8 |
+
from nuscenes.eval.detection.constants import DETECTION_NAMES
|
9 |
+
from nuscenes.utils.data_classes import LidarPointCloud
|
10 |
+
|
11 |
+
from nuscenes.map_expansion.map_api import NuScenesMap
|
12 |
+
from shapely.strtree import STRtree
|
13 |
+
from collections import OrderedDict
|
14 |
+
import torch
|
15 |
+
|
16 |
+
def decode_binary_labels(labels, nclass):
|
17 |
+
bits = torch.pow(2, torch.arange(nclass))
|
18 |
+
return (labels & bits.view(-1, 1, 1)) > 0
|
19 |
+
|
20 |
+
def transform_polygon(polygon, affine):
|
21 |
+
"""
|
22 |
+
Transform a 2D polygon
|
23 |
+
"""
|
24 |
+
a, b, tx, c, d, ty = affine.flatten()[:6]
|
25 |
+
return affinity.affine_transform(polygon, [a, b, c, d, tx, ty])
|
26 |
+
|
27 |
+
|
28 |
+
def render_polygon(mask, polygon, extents, resolution, value=1):
|
29 |
+
if len(polygon) == 0:
|
30 |
+
return
|
31 |
+
polygon = (polygon - np.array(extents[:2])) / resolution
|
32 |
+
polygon = np.ascontiguousarray(polygon).round().astype(np.int32)
|
33 |
+
cv2.fillConvexPoly(mask, polygon, value)
|
34 |
+
|
35 |
+
def transform(matrix, vectors):
|
36 |
+
vectors = np.dot(matrix[:-1, :-1], vectors.T)
|
37 |
+
vectors = vectors.T + matrix[:-1, -1]
|
38 |
+
return vectors
|
39 |
+
|
40 |
+
CAMERA_NAMES = ['CAM_FRONT', 'CAM_FRONT_LEFT', 'CAM_FRONT_RIGHT',
|
41 |
+
'CAM_BACK_LEFT', 'CAM_BACK_RIGHT', 'CAM_BACK']
|
42 |
+
|
43 |
+
NUSCENES_CLASS_NAMES = [
|
44 |
+
'drivable_area', 'ped_crossing', 'walkway', 'carpark', 'car', 'truck',
|
45 |
+
'bus', 'trailer', 'construction_vehicle', 'pedestrian', 'motorcycle',
|
46 |
+
'bicycle', 'traffic_cone', 'barrier'
|
47 |
+
]
|
48 |
+
|
49 |
+
STATIC_CLASSES = ['drivable_area', 'ped_crossing', 'walkway', 'carpark_area']
|
50 |
+
|
51 |
+
LOCATIONS = ['boston-seaport', 'singapore-onenorth', 'singapore-queenstown',
|
52 |
+
'singapore-hollandvillage']
|
53 |
+
|
54 |
+
def load_map_data(dataroot, location):
|
55 |
+
|
56 |
+
# Load the NuScenes map object
|
57 |
+
nusc_map = NuScenesMap(dataroot, location)
|
58 |
+
|
59 |
+
map_data = OrderedDict()
|
60 |
+
for layer in STATIC_CLASSES:
|
61 |
+
|
62 |
+
# Retrieve all data associated with the current layer
|
63 |
+
records = getattr(nusc_map, layer)
|
64 |
+
polygons = list()
|
65 |
+
|
66 |
+
# Drivable area records can contain multiple polygons
|
67 |
+
if layer == 'drivable_area':
|
68 |
+
for record in records:
|
69 |
+
|
70 |
+
# Convert each entry in the record into a shapely object
|
71 |
+
for token in record['polygon_tokens']:
|
72 |
+
poly = nusc_map.extract_polygon(token)
|
73 |
+
if poly.is_valid:
|
74 |
+
polygons.append(poly)
|
75 |
+
else:
|
76 |
+
for record in records:
|
77 |
+
|
78 |
+
# Convert each entry in the record into a shapely object
|
79 |
+
poly = nusc_map.extract_polygon(record['polygon_token'])
|
80 |
+
if poly.is_valid:
|
81 |
+
polygons.append(poly)
|
82 |
+
|
83 |
+
|
84 |
+
# Store as an R-Tree for fast intersection queries
|
85 |
+
map_data[layer] = STRtree(polygons)
|
86 |
+
|
87 |
+
return map_data
|
88 |
+
|
89 |
+
def iterate_samples(nuscenes, start_token):
|
90 |
+
sample_token = start_token
|
91 |
+
while sample_token != '':
|
92 |
+
sample = nuscenes.get('sample', sample_token)
|
93 |
+
yield sample
|
94 |
+
sample_token = sample['next']
|
95 |
+
|
96 |
+
|
97 |
+
def get_map_masks(nuscenes, map_data, sample_data, extents, resolution):
|
98 |
+
|
99 |
+
# Render each layer sequentially
|
100 |
+
layers = [get_layer_mask(nuscenes, polys, sample_data, extents,
|
101 |
+
resolution) for layer, polys in map_data.items()]
|
102 |
+
|
103 |
+
return np.stack(layers, axis=0)
|
104 |
+
|
105 |
+
|
106 |
+
def get_layer_mask(nuscenes, polygons, sample_data, extents, resolution):
|
107 |
+
|
108 |
+
# Get the 2D affine transform from bev coords to map coords
|
109 |
+
tfm = get_sensor_transform(nuscenes, sample_data)[[0, 1, 3]][:, [0, 2, 3]]
|
110 |
+
inv_tfm = np.linalg.inv(tfm)
|
111 |
+
|
112 |
+
# Create a patch representing the birds-eye-view region in map coordinates
|
113 |
+
map_patch = geometry.box(*extents)
|
114 |
+
map_patch = transform_polygon(map_patch, tfm)
|
115 |
+
|
116 |
+
# Initialise the map mask
|
117 |
+
x1, z1, x2, z2 = extents
|
118 |
+
mask = np.zeros((int((z2 - z1) / resolution), int((x2 - x1) / resolution)),
|
119 |
+
dtype=np.uint8)
|
120 |
+
|
121 |
+
# Find all polygons which intersect with the area of interest
|
122 |
+
for polygon in polygons.query(map_patch):
|
123 |
+
|
124 |
+
polygon = polygon.intersection(map_patch)
|
125 |
+
|
126 |
+
# Transform into map coordinates
|
127 |
+
polygon = transform_polygon(polygon, inv_tfm)
|
128 |
+
|
129 |
+
# Render the polygon to the mask
|
130 |
+
render_shapely_polygon(mask, polygon, extents, resolution)
|
131 |
+
|
132 |
+
return mask
|
133 |
+
|
134 |
+
|
135 |
+
|
136 |
+
|
137 |
+
def get_object_masks(nuscenes, sample_data, extents, resolution):
|
138 |
+
|
139 |
+
# Initialize object masks
|
140 |
+
nclass = len(DETECTION_NAMES) + 1
|
141 |
+
grid_width = int((extents[2] - extents[0]) / resolution)
|
142 |
+
grid_height = int((extents[3] - extents[1]) / resolution)
|
143 |
+
masks = np.zeros((nclass, grid_height, grid_width), dtype=np.uint8)
|
144 |
+
|
145 |
+
# Get the 2D affine transform from bev coords to map coords
|
146 |
+
tfm = get_sensor_transform(nuscenes, sample_data)[[0, 1, 3]][:, [0, 2, 3]]
|
147 |
+
inv_tfm = np.linalg.inv(tfm)
|
148 |
+
|
149 |
+
for box in nuscenes.get_boxes(sample_data['token']):
|
150 |
+
|
151 |
+
# Get the index of the class
|
152 |
+
det_name = category_to_detection_name(box.name)
|
153 |
+
if det_name not in DETECTION_NAMES:
|
154 |
+
class_id = -1
|
155 |
+
else:
|
156 |
+
class_id = DETECTION_NAMES.index(det_name)
|
157 |
+
|
158 |
+
# Get bounding box coordinates in the grid coordinate frame
|
159 |
+
bbox = box.bottom_corners()[:2]
|
160 |
+
local_bbox = np.dot(inv_tfm[:2, :2], bbox).T + inv_tfm[:2, 2]
|
161 |
+
|
162 |
+
# Render the rotated bounding box to the mask
|
163 |
+
render_polygon(masks[class_id], local_bbox, extents, resolution)
|
164 |
+
|
165 |
+
return masks.astype(np.bool)
|
166 |
+
|
167 |
+
|
168 |
+
def get_sensor_transform(nuscenes, sample_data):
|
169 |
+
|
170 |
+
# Load sensor transform data
|
171 |
+
sensor = nuscenes.get(
|
172 |
+
'calibrated_sensor', sample_data['calibrated_sensor_token'])
|
173 |
+
sensor_tfm = make_transform_matrix(sensor)
|
174 |
+
|
175 |
+
# Load ego pose data
|
176 |
+
pose = nuscenes.get('ego_pose', sample_data['ego_pose_token'])
|
177 |
+
pose_tfm = make_transform_matrix(pose)
|
178 |
+
|
179 |
+
return np.dot(pose_tfm, sensor_tfm)
|
180 |
+
|
181 |
+
|
182 |
+
def load_point_cloud(nuscenes, sample_data):
|
183 |
+
|
184 |
+
# Load point cloud
|
185 |
+
lidar_path = os.path.join(nuscenes.dataroot, sample_data['filename'])
|
186 |
+
pcl = LidarPointCloud.from_file(lidar_path)
|
187 |
+
return pcl.points[:3, :].T
|
188 |
+
|
189 |
+
|
190 |
+
def make_transform_matrix(record):
|
191 |
+
"""
|
192 |
+
Create a 4x4 transform matrix from a calibrated_sensor or ego_pose record
|
193 |
+
"""
|
194 |
+
transform = np.eye(4)
|
195 |
+
transform[:3, :3] = Quaternion(record['rotation']).rotation_matrix
|
196 |
+
transform[:3, 3] = np.array(record['translation'])
|
197 |
+
return transform
|
198 |
+
|
199 |
+
|
200 |
+
def render_shapely_polygon(mask, polygon, extents, resolution):
|
201 |
+
|
202 |
+
if polygon.geom_type == 'Polygon':
|
203 |
+
|
204 |
+
# Render exteriors
|
205 |
+
render_polygon(mask, polygon.exterior.coords, extents, resolution, 1)
|
206 |
+
|
207 |
+
# Render interiors
|
208 |
+
for hole in polygon.interiors:
|
209 |
+
render_polygon(mask, hole.coords, extents, resolution, 0)
|
210 |
+
|
211 |
+
# Handle the case of compound shapes
|
212 |
+
else:
|
213 |
+
for poly in polygon:
|
214 |
+
render_shapely_polygon(mask, poly, extents, resolution)
|
mapper/data/schema.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import Optional, Any, Dict
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
@dataclass
|
6 |
+
class AugmentationConfiguration:
|
7 |
+
gaussian_noise: dict
|
8 |
+
brightness_contrast: dict
|
9 |
+
|
10 |
+
enabled: bool = False
|
11 |
+
brightness: float = 0.5
|
12 |
+
contrast: float = 0.5
|
13 |
+
saturation: float = 0.5
|
14 |
+
hue: float = 0.5
|
15 |
+
random_resized_crop: Any = False
|
16 |
+
random_flip: float = 0.5
|
17 |
+
|
18 |
+
|
19 |
+
@dataclass(kw_only=True)
|
20 |
+
class DataConfiguration:
|
21 |
+
augmentations: AugmentationConfiguration
|
22 |
+
|
23 |
+
loading: Dict[str, Dict[str, Any]]
|
24 |
+
|
25 |
+
target_focal_length: Optional[int] = None
|
26 |
+
reduce_fov: Optional[bool] = None
|
27 |
+
resize_image: Optional[Any] = None
|
28 |
+
pad_to_square: Optional[bool] = None
|
29 |
+
pad_to_multiple: Optional[int] = None
|
30 |
+
gravity_align: Optional[bool] = None
|
31 |
+
rectify_pitch: Optional[bool] = True
|
32 |
+
num_classes: int
|
33 |
+
|
34 |
+
name: str
|
35 |
+
seed: Optional[int] = 0
|
36 |
+
random: Optional[bool] = True
|
37 |
+
num_threads: Optional[int] = None
|
38 |
+
|
39 |
+
@dataclass(kw_only=True)
|
40 |
+
class MIADataConfiguration(DataConfiguration):
|
41 |
+
|
42 |
+
scenes: list[str]
|
43 |
+
split: Any
|
44 |
+
data_dir: Path
|
45 |
+
pixel_per_meter: int
|
46 |
+
crop_size_meters: int
|
47 |
+
|
48 |
+
name: str = "mapillary"
|
49 |
+
filter_for: Optional[str] = None
|
50 |
+
filter_by_ground_angle: Optional[float] = None
|
51 |
+
min_num_points: int = 0
|
52 |
+
|
53 |
+
@dataclass(kw_only=True)
|
54 |
+
class KITTIDataConfiguration(DataConfiguration):
|
55 |
+
seam_root_dir: Path
|
56 |
+
dataset_root_dir: Path
|
57 |
+
bev_percentage: float
|
58 |
+
|
59 |
+
pixel_per_meter: int
|
60 |
+
crop_size_meters: int
|
61 |
+
|
62 |
+
class_mapping: Optional[Any] = None
|
63 |
+
percentage: float = 1.0
|
64 |
+
|
65 |
+
@dataclass(kw_only=True)
|
66 |
+
class NuScenesDataConfiguration(DataConfiguration):
|
67 |
+
data_dir: Path
|
68 |
+
map_dir: Path
|
69 |
+
pixel_per_meter: int
|
70 |
+
crop_size_meters: int
|
71 |
+
|
72 |
+
percentage: float = 1.0
|
73 |
+
class_mapping: Optional[Any] = None
|
74 |
+
version: str = "v1.0-trainval"
|
75 |
+
|
mapper/data/sequential.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import torch
|
5 |
+
|
6 |
+
|
7 |
+
def chunk_sequence(
|
8 |
+
data,
|
9 |
+
indices,
|
10 |
+
*,
|
11 |
+
names=None,
|
12 |
+
max_length=100,
|
13 |
+
min_length=1,
|
14 |
+
max_delay_s=None,
|
15 |
+
max_inter_dist=None,
|
16 |
+
max_total_dist=None,
|
17 |
+
):
|
18 |
+
sort_array = data.get("capture_time", data.get("index"))
|
19 |
+
if sort_array is None:
|
20 |
+
sort_array = indices if names is None else names
|
21 |
+
indices = sorted(indices, key=lambda i: sort_array[i].tolist())
|
22 |
+
centers = torch.stack([data["t_c2w"][i][:2] for i in indices]).numpy()
|
23 |
+
dists = np.linalg.norm(np.diff(centers, axis=0), axis=-1)
|
24 |
+
if "capture_time" in data:
|
25 |
+
times = torch.stack([data["capture_time"][i] for i in indices])
|
26 |
+
times = times.double() / 1e3 # ms to s
|
27 |
+
delays = np.diff(times, axis=0)
|
28 |
+
else:
|
29 |
+
delays = np.zeros_like(dists)
|
30 |
+
chunks = [[indices[0]]]
|
31 |
+
dist_total = 0
|
32 |
+
for dist, delay, idx in zip(dists, delays, indices[1:]):
|
33 |
+
dist_total += dist
|
34 |
+
if (
|
35 |
+
(max_inter_dist is not None and dist > max_inter_dist)
|
36 |
+
or (max_total_dist is not None and dist_total > max_total_dist)
|
37 |
+
or (max_delay_s is not None and delay > max_delay_s)
|
38 |
+
or len(chunks[-1]) >= max_length
|
39 |
+
):
|
40 |
+
chunks.append([])
|
41 |
+
dist_total = 0
|
42 |
+
chunks[-1].append(idx)
|
43 |
+
chunks = list(filter(lambda c: len(c) >= min_length, chunks))
|
44 |
+
chunks = sorted(chunks, key=len, reverse=True)
|
45 |
+
return chunks
|
mapper/data/torch.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2 |
+
|
3 |
+
import collections
|
4 |
+
import os
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch.utils.data import get_worker_info
|
8 |
+
from torch.utils.data._utils.collate import (
|
9 |
+
default_collate_err_msg_format,
|
10 |
+
np_str_obj_array_pattern,
|
11 |
+
)
|
12 |
+
from lightning_fabric.utilities.seed import pl_worker_init_function
|
13 |
+
|
14 |
+
def collate(batch):
|
15 |
+
"""Difference with PyTorch default_collate: it can stack other tensor-like objects.
|
16 |
+
Adapted from PixLoc, Paul-Edouard Sarlin, ETH Zurich
|
17 |
+
https://github.com/cvg/pixloc
|
18 |
+
Released under the Apache License 2.0
|
19 |
+
"""
|
20 |
+
if not isinstance(batch, list): # no batching
|
21 |
+
return batch
|
22 |
+
|
23 |
+
# Filter None Elements
|
24 |
+
batch = [elem for elem in batch if elem is not None]
|
25 |
+
elem = batch[0]
|
26 |
+
elem_type = type(elem)
|
27 |
+
if isinstance(elem, torch.Tensor):
|
28 |
+
out = None
|
29 |
+
if torch.utils.data.get_worker_info() is not None:
|
30 |
+
# If we're in a background process, concatenate directly into a
|
31 |
+
# shared memory tensor to avoid an extra copy
|
32 |
+
numel = sum(x.numel() for x in batch)
|
33 |
+
storage = elem.storage()._new_shared(numel, device=elem.device)
|
34 |
+
out = elem.new(storage).resize_(len(batch), *list(elem.size()))
|
35 |
+
return torch.stack(batch, 0, out=out)
|
36 |
+
elif (
|
37 |
+
elem_type.__module__ == "numpy"
|
38 |
+
and elem_type.__name__ != "str_"
|
39 |
+
and elem_type.__name__ != "string_"
|
40 |
+
):
|
41 |
+
if elem_type.__name__ == "ndarray" or elem_type.__name__ == "memmap":
|
42 |
+
# array of string classes and object
|
43 |
+
if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
|
44 |
+
raise TypeError(default_collate_err_msg_format.format(elem.dtype))
|
45 |
+
|
46 |
+
return collate([torch.as_tensor(b) for b in batch])
|
47 |
+
elif elem.shape == (): # scalars
|
48 |
+
return torch.as_tensor(batch)
|
49 |
+
elif isinstance(elem, float):
|
50 |
+
return torch.tensor(batch, dtype=torch.float64)
|
51 |
+
elif isinstance(elem, int):
|
52 |
+
return torch.tensor(batch)
|
53 |
+
elif isinstance(elem, (str, bytes)):
|
54 |
+
return batch
|
55 |
+
elif isinstance(elem, collections.abc.Mapping):
|
56 |
+
return {key: collate([d[key] for d in batch]) for key in elem}
|
57 |
+
elif isinstance(elem, tuple) and hasattr(elem, "_fields"): # namedtuple
|
58 |
+
return elem_type(*(collate(samples) for samples in zip(*batch)))
|
59 |
+
elif isinstance(elem, collections.abc.Sequence):
|
60 |
+
# check to make sure that the elements in batch have consistent size
|
61 |
+
it = iter(batch)
|
62 |
+
elem_size = len(next(it))
|
63 |
+
if not all(len(elem) == elem_size for elem in it):
|
64 |
+
raise RuntimeError("each element in list of batch should be of equal size")
|
65 |
+
transposed = zip(*batch)
|
66 |
+
return [collate(samples) for samples in transposed]
|
67 |
+
else:
|
68 |
+
# try to stack anyway in case the object implements stacking.
|
69 |
+
try:
|
70 |
+
return torch.stack(batch, 0)
|
71 |
+
except TypeError as e:
|
72 |
+
if "expected Tensor as element" in str(e):
|
73 |
+
return batch
|
74 |
+
else:
|
75 |
+
raise e
|
76 |
+
|
77 |
+
|
78 |
+
def set_num_threads(nt):
|
79 |
+
"""Force numpy and other libraries to use a limited number of threads."""
|
80 |
+
try:
|
81 |
+
import mkl
|
82 |
+
except ImportError:
|
83 |
+
pass
|
84 |
+
else:
|
85 |
+
mkl.set_num_threads(nt)
|
86 |
+
torch.set_num_threads(1)
|
87 |
+
os.environ["IPC_ENABLE"] = "1"
|
88 |
+
for o in [
|
89 |
+
"OPENBLAS_NUM_THREADS",
|
90 |
+
"NUMEXPR_NUM_THREADS",
|
91 |
+
"OMP_NUM_THREADS",
|
92 |
+
"MKL_NUM_THREADS",
|
93 |
+
]:
|
94 |
+
os.environ[o] = str(nt)
|
95 |
+
|
96 |
+
|
97 |
+
def worker_init_fn(i):
|
98 |
+
info = get_worker_info()
|
99 |
+
pl_worker_init_function(info.id)
|
100 |
+
num_threads = info.dataset.cfg.get("num_threads")
|
101 |
+
if num_threads is not None:
|
102 |
+
set_num_threads(num_threads)
|
mapper/data/utils.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
from scipy.spatial.transform import Rotation
|
5 |
+
|
6 |
+
|
7 |
+
def crop_map(raster, xy, size, seed=None):
|
8 |
+
h, w = raster.shape[-2:]
|
9 |
+
state = np.random.RandomState(seed)
|
10 |
+
top = state.randint(0, h - size + 1)
|
11 |
+
left = state.randint(0, w - size + 1)
|
12 |
+
raster = raster[..., top : top + size, left : left + size]
|
13 |
+
xy -= np.array([left, top])
|
14 |
+
return raster, xy
|
15 |
+
|
16 |
+
|
17 |
+
def decompose_rotmat(R_c2w):
|
18 |
+
R_cv2xyz = Rotation.from_euler("X", -90, degrees=True)
|
19 |
+
rot_w2c = R_cv2xyz * Rotation.from_matrix(R_c2w).inv()
|
20 |
+
roll, pitch, yaw = rot_w2c.as_euler("YXZ", degrees=True)
|
21 |
+
return roll, pitch, yaw
|
mapper/mapper.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import torch
|
3 |
+
import hydra
|
4 |
+
import pytorch_lightning as pl
|
5 |
+
from typing import Any
|
6 |
+
|
7 |
+
from hydra.core.config_store import ConfigStore
|
8 |
+
from omegaconf import OmegaConf
|
9 |
+
from pytorch_lightning.loggers import WandbLogger
|
10 |
+
from pytorch_lightning.callbacks import ModelCheckpoint
|
11 |
+
|
12 |
+
from pathlib import Path
|
13 |
+
from dataclasses import dataclass
|
14 |
+
|
15 |
+
from .module import GenericModule
|
16 |
+
from .data.module import GenericDataModule
|
17 |
+
from .callbacks import EvalSaveCallback, ImageLoggerCallback
|
18 |
+
from .models.schema import ModelConfiguration, DINOConfiguration, ResNetConfiguration
|
19 |
+
from .data.schema import MIADataConfiguration, KITTIDataConfiguration, NuScenesDataConfiguration
|
20 |
+
|
21 |
+
|
22 |
+
@dataclass
|
23 |
+
class ExperimentConfiguration:
|
24 |
+
name: str
|
25 |
+
|
26 |
+
@dataclass
|
27 |
+
class Configuration:
|
28 |
+
model: ModelConfiguration
|
29 |
+
experiment: ExperimentConfiguration
|
30 |
+
data: Any
|
31 |
+
training: Any
|
32 |
+
|
33 |
+
|
34 |
+
cs = ConfigStore.instance()
|
35 |
+
|
36 |
+
# Store root configuration schema
|
37 |
+
cs.store(name="pretrain", node=Configuration)
|
38 |
+
cs.store(name="mapper_nuscenes", node=Configuration)
|
39 |
+
cs.store(name="mapper_kitti", node=Configuration)
|
40 |
+
|
41 |
+
# Store data configuration schema
|
42 |
+
cs.store(group="schema/data", name="mia",
|
43 |
+
node=MIADataConfiguration, package="data")
|
44 |
+
cs.store(group="schema/data", name="kitti", node=KITTIDataConfiguration, package="data")
|
45 |
+
cs.store(group="schema/data", name="nuscenes", node=NuScenesDataConfiguration, package="data")
|
46 |
+
|
47 |
+
cs.store(group="model/schema/backbone", name="dino", node=DINOConfiguration, package="model.image_encoder.backbone")
|
48 |
+
cs.store(group="model/schema/backbone", name="resnet", node=ResNetConfiguration, package="model.image_encoder.backbone")
|
49 |
+
|
50 |
+
|
51 |
+
@hydra.main(version_base=None, config_path="conf", config_name="pretrain")
|
52 |
+
def train(cfg: Configuration):
|
53 |
+
OmegaConf.resolve(cfg)
|
54 |
+
|
55 |
+
dm = GenericDataModule(cfg.data)
|
56 |
+
|
57 |
+
model = GenericModule(cfg)
|
58 |
+
|
59 |
+
exp_name_with_time = cfg.experiment.name + \
|
60 |
+
"_" + time.strftime("%Y-%m-%d_%H-%M-%S")
|
61 |
+
|
62 |
+
callbacks: list[pl.Callback]
|
63 |
+
|
64 |
+
if cfg.training.eval:
|
65 |
+
save_dir = Path(cfg.training.save_dir)
|
66 |
+
save_dir.mkdir(parents=True, exist_ok=True)
|
67 |
+
|
68 |
+
callbacks = [
|
69 |
+
EvalSaveCallback(save_dir=save_dir)
|
70 |
+
]
|
71 |
+
|
72 |
+
logger = None
|
73 |
+
else:
|
74 |
+
callbacks = [
|
75 |
+
ImageLoggerCallback(num_classes=cfg.training.num_classes),
|
76 |
+
ModelCheckpoint(
|
77 |
+
monitor=cfg.training.checkpointing.monitor,
|
78 |
+
save_last=cfg.training.checkpointing.save_last,
|
79 |
+
save_top_k=cfg.training.checkpointing.save_top_k,
|
80 |
+
)
|
81 |
+
]
|
82 |
+
|
83 |
+
logger = WandbLogger(
|
84 |
+
name=exp_name_with_time,
|
85 |
+
id=exp_name_with_time,
|
86 |
+
entity="mappred-large",
|
87 |
+
project="map-pred-full-v3",
|
88 |
+
)
|
89 |
+
|
90 |
+
logger.watch(model, log="all", log_freq=500)
|
91 |
+
|
92 |
+
if cfg.training.checkpoint is not None:
|
93 |
+
state_dict = torch.load(cfg.training.checkpoint)['state_dict']
|
94 |
+
model.load_state_dict(state_dict, strict=False)
|
95 |
+
|
96 |
+
trainer_args = OmegaConf.to_container(cfg.training.trainer)
|
97 |
+
trainer_args['callbacks'] = callbacks
|
98 |
+
trainer_args['logger'] = logger
|
99 |
+
|
100 |
+
trainer = pl.Trainer(**trainer_args)
|
101 |
+
|
102 |
+
if cfg.training.eval:
|
103 |
+
trainer.test(model, datamodule=dm)
|
104 |
+
else:
|
105 |
+
trainer.fit(model, datamodule=dm)
|
106 |
+
|
107 |
+
|
108 |
+
if __name__ == "__main__":
|
109 |
+
pl.seed_everything(42)
|
110 |
+
torch.set_float32_matmul_precision("high")
|
111 |
+
|
112 |
+
train()
|
mapper/models/__init__.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2 |
+
|
3 |
+
# Adapted from PixLoc, Paul-Edouard Sarlin, ETH Zurich
|
4 |
+
# https://github.com/cvg/pixloc
|
5 |
+
# Released under the Apache License 2.0
|
6 |
+
|
7 |
+
import inspect
|
8 |
+
|
9 |
+
from .base import BaseModel
|
10 |
+
|
11 |
+
|
12 |
+
def get_class(mod_name, base_path, BaseClass):
|
13 |
+
"""Get the class object which inherits from BaseClass and is defined in
|
14 |
+
the module named mod_name, child of base_path.
|
15 |
+
"""
|
16 |
+
mod_path = "{}.{}".format(base_path, mod_name)
|
17 |
+
mod = __import__(mod_path, fromlist=[""])
|
18 |
+
classes = inspect.getmembers(mod, inspect.isclass)
|
19 |
+
# Filter classes defined in the module
|
20 |
+
classes = [c for c in classes if c[1].__module__ == mod_path]
|
21 |
+
# Filter classes inherited from BaseModel
|
22 |
+
classes = [c for c in classes if issubclass(c[1], BaseClass)]
|
23 |
+
assert len(classes) == 1, classes
|
24 |
+
return classes[0][1]
|
25 |
+
|
26 |
+
|
27 |
+
def get_model(name):
|
28 |
+
return get_class(name, __name__, BaseModel)
|
mapper/models/base.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2 |
+
|
3 |
+
# Adapted from PixLoc, Paul-Edouard Sarlin, ETH Zurich
|
4 |
+
# https://github.com/cvg/pixloc
|
5 |
+
# Released under the Apache License 2.0
|
6 |
+
|
7 |
+
"""
|
8 |
+
Base class for trainable models.
|
9 |
+
"""
|
10 |
+
|
11 |
+
from abc import ABCMeta, abstractmethod
|
12 |
+
from copy import copy
|
13 |
+
|
14 |
+
from omegaconf import OmegaConf
|
15 |
+
from torch import nn
|
16 |
+
|
17 |
+
|
18 |
+
class BaseModel(nn.Module, metaclass=ABCMeta):
|
19 |
+
|
20 |
+
required_data_keys = []
|
21 |
+
strict_conf = True
|
22 |
+
|
23 |
+
def __init__(self, conf):
|
24 |
+
"""Perform some logic and call the _init method of the child model."""
|
25 |
+
super().__init__()
|
26 |
+
self.conf = conf
|
27 |
+
OmegaConf.set_readonly(conf, True)
|
28 |
+
OmegaConf.set_struct(conf, True)
|
29 |
+
self.required_data_keys = copy(self.required_data_keys)
|
30 |
+
self._init(conf)
|
31 |
+
|
32 |
+
def forward(self, data):
|
33 |
+
"""Check the data and call the _forward method of the child model."""
|
34 |
+
|
35 |
+
def recursive_key_check(expected, given):
|
36 |
+
for key in expected:
|
37 |
+
assert key in given, f"Missing key {key} in data"
|
38 |
+
if isinstance(expected, dict):
|
39 |
+
recursive_key_check(expected[key], given[key])
|
40 |
+
|
41 |
+
recursive_key_check(self.required_data_keys, data)
|
42 |
+
return self._forward(data)
|
43 |
+
|
44 |
+
@abstractmethod
|
45 |
+
def _init(self, conf):
|
46 |
+
"""To be implemented by the child class."""
|
47 |
+
raise NotImplementedError
|
48 |
+
|
49 |
+
@abstractmethod
|
50 |
+
def _forward(self, data):
|
51 |
+
"""To be implemented by the child class."""
|
52 |
+
raise NotImplementedError
|
53 |
+
|
54 |
+
def loss(self, pred, data):
|
55 |
+
"""To be implemented by the child class."""
|
56 |
+
raise NotImplementedError
|
57 |
+
|
58 |
+
def metrics(self):
|
59 |
+
return {} # no metrics
|
mapper/models/bev_projection.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from torch.nn.functional import grid_sample
|
5 |
+
|
6 |
+
from ..utils.geometry import from_homogeneous
|
7 |
+
from .utils import make_grid
|
8 |
+
|
9 |
+
|
10 |
+
class PolarProjectionDepth(torch.nn.Module):
|
11 |
+
def __init__(self, z_max, ppm, scale_range, z_min=None):
|
12 |
+
super().__init__()
|
13 |
+
self.z_max = z_max
|
14 |
+
self.Δ = Δ = 1 / ppm
|
15 |
+
self.z_min = z_min = Δ if z_min is None else z_min
|
16 |
+
self.scale_range = scale_range
|
17 |
+
z_steps = torch.arange(z_min, z_max + Δ, Δ)
|
18 |
+
self.register_buffer("depth_steps", z_steps, persistent=False)
|
19 |
+
|
20 |
+
def sample_depth_scores(self, pixel_scales, camera):
|
21 |
+
scale_steps = camera.f[..., None, 1] / self.depth_steps.flip(-1)
|
22 |
+
log_scale_steps = torch.log2(scale_steps)
|
23 |
+
scale_min, scale_max = self.scale_range
|
24 |
+
log_scale_norm = (log_scale_steps - scale_min) / \
|
25 |
+
(scale_max - scale_min)
|
26 |
+
log_scale_norm = log_scale_norm * 2 - 1 # in [-1, 1]
|
27 |
+
|
28 |
+
values = pixel_scales.flatten(1, 2).unsqueeze(-1)
|
29 |
+
indices = log_scale_norm.unsqueeze(-1)
|
30 |
+
indices = torch.stack([torch.zeros_like(indices), indices], -1)
|
31 |
+
depth_scores = grid_sample(values, indices, align_corners=True)
|
32 |
+
depth_scores = depth_scores.reshape(
|
33 |
+
pixel_scales.shape[:-1] + (len(self.depth_steps),)
|
34 |
+
)
|
35 |
+
return depth_scores
|
36 |
+
|
37 |
+
def forward(
|
38 |
+
self,
|
39 |
+
image,
|
40 |
+
pixel_scales,
|
41 |
+
camera,
|
42 |
+
return_total_score=False,
|
43 |
+
):
|
44 |
+
depth_scores = self.sample_depth_scores(pixel_scales, camera)
|
45 |
+
depth_prob = torch.softmax(depth_scores, dim=1)
|
46 |
+
image_polar = torch.einsum("...dhw,...hwz->...dzw", image, depth_prob)
|
47 |
+
if return_total_score:
|
48 |
+
cell_score = torch.logsumexp(depth_scores, dim=1, keepdim=True)
|
49 |
+
return image_polar, cell_score.squeeze(1)
|
50 |
+
return image_polar
|
51 |
+
|
52 |
+
|
53 |
+
class CartesianProjection(torch.nn.Module):
|
54 |
+
def __init__(self, z_max, x_max, ppm, z_min=None):
|
55 |
+
super().__init__()
|
56 |
+
self.z_max = z_max
|
57 |
+
self.x_max = x_max
|
58 |
+
self.Δ = Δ = 1 / ppm
|
59 |
+
self.z_min = z_min = Δ if z_min is None else z_min
|
60 |
+
|
61 |
+
grid_xz = make_grid(
|
62 |
+
x_max * 2 + Δ, z_max, step_y=Δ, step_x=Δ, orig_y=Δ, orig_x=-x_max, y_up=True
|
63 |
+
)
|
64 |
+
self.register_buffer("grid_xz", grid_xz, persistent=False)
|
65 |
+
|
66 |
+
def grid_to_polar(self, cam):
|
67 |
+
f, c = cam.f[..., 0][..., None, None], cam.c[..., 0][..., None, None]
|
68 |
+
u = from_homogeneous(self.grid_xz).squeeze(-1) * f + c
|
69 |
+
z_idx = (self.grid_xz[..., 1] - self.z_min) / \
|
70 |
+
self.Δ # convert z value to index
|
71 |
+
z_idx = z_idx[None].expand_as(u)
|
72 |
+
grid_polar = torch.stack([u, z_idx], -1)
|
73 |
+
return grid_polar
|
74 |
+
|
75 |
+
def sample_from_polar(self, image_polar, valid_polar, grid_uz):
|
76 |
+
size = grid_uz.new_tensor(image_polar.shape[-2:][::-1])
|
77 |
+
grid_uz_norm = (grid_uz + 0.5) / size * 2 - 1
|
78 |
+
grid_uz_norm = grid_uz_norm * \
|
79 |
+
grid_uz.new_tensor([1, -1]) # y axis is up
|
80 |
+
image_bev = grid_sample(image_polar, grid_uz_norm, align_corners=False)
|
81 |
+
|
82 |
+
if valid_polar is None:
|
83 |
+
valid = torch.ones_like(image_polar[..., :1, :, :])
|
84 |
+
else:
|
85 |
+
valid = valid_polar.to(image_polar)[:, None]
|
86 |
+
valid = grid_sample(valid, grid_uz_norm, align_corners=False)
|
87 |
+
valid = valid.squeeze(1) > (1 - 1e-4)
|
88 |
+
|
89 |
+
return image_bev, valid
|
90 |
+
|
91 |
+
def forward(self, image_polar, valid_polar, cam):
|
92 |
+
grid_uz = self.grid_to_polar(cam)
|
93 |
+
image, valid = self.sample_from_polar(
|
94 |
+
image_polar, valid_polar, grid_uz)
|
95 |
+
return image, valid, grid_uz
|
mapper/models/dinov2/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the Apache License, Version 2.0
|
4 |
+
# found in the LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
__version__ = "0.0.1"
|
mapper/models/dinov2/configs/__init__.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the Apache License, Version 2.0
|
4 |
+
# found in the LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import pathlib
|
7 |
+
|
8 |
+
from omegaconf import OmegaConf
|
9 |
+
|
10 |
+
|
11 |
+
def load_config(config_name: str):
|
12 |
+
config_filename = config_name + ".yaml"
|
13 |
+
return OmegaConf.load(pathlib.Path(__file__).parent.resolve() / config_filename)
|
14 |
+
|
15 |
+
|
16 |
+
dinov2_default_config = load_config("ssl_default_config")
|
17 |
+
|
18 |
+
|
19 |
+
def load_and_merge_config(config_name: str):
|
20 |
+
default_config = OmegaConf.create(dinov2_default_config)
|
21 |
+
loaded_config = load_config(config_name)
|
22 |
+
return OmegaConf.merge(default_config, loaded_config)
|
mapper/models/dinov2/configs/eval/vitb14_pretrain.yaml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
student:
|
2 |
+
arch: vit_base
|
3 |
+
patch_size: 14
|
4 |
+
crops:
|
5 |
+
global_crops_size: 518 # this is to set up the position embeddings properly
|
6 |
+
local_crops_size: 98
|
mapper/models/dinov2/configs/eval/vitg14_pretrain.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
student:
|
2 |
+
arch: vit_giant2
|
3 |
+
patch_size: 14
|
4 |
+
ffn_layer: swiglufused
|
5 |
+
crops:
|
6 |
+
global_crops_size: 518 # this is to set up the position embeddings properly
|
7 |
+
local_crops_size: 98
|
mapper/models/dinov2/configs/eval/vitl14_pretrain.yaml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
student:
|
2 |
+
arch: vit_large
|
3 |
+
patch_size: 14
|
4 |
+
crops:
|
5 |
+
global_crops_size: 518 # this is to set up the position embeddings properly
|
6 |
+
local_crops_size: 98
|
mapper/models/dinov2/configs/eval/vits14_pretrain.yaml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
student:
|
2 |
+
arch: vit_small
|
3 |
+
patch_size: 14
|
4 |
+
crops:
|
5 |
+
global_crops_size: 518 # this is to set up the position embeddings properly
|
6 |
+
local_crops_size: 98
|
mapper/models/dinov2/configs/eval/vits14_reg4_pretrain.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
student:
|
2 |
+
arch: vit_small
|
3 |
+
patch_size: 14
|
4 |
+
num_register_tokens: 4
|
5 |
+
interpolate_antialias: true
|
6 |
+
interpolate_offset: 0.0
|
7 |
+
crops:
|
8 |
+
global_crops_size: 518 # this is to set up the position embeddings properly
|
9 |
+
local_crops_size: 98
|
mapper/models/dinov2/configs/ssl_default_config.yaml
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MODEL:
|
2 |
+
WEIGHTS: ''
|
3 |
+
compute_precision:
|
4 |
+
grad_scaler: true
|
5 |
+
teacher:
|
6 |
+
backbone:
|
7 |
+
sharding_strategy: SHARD_GRAD_OP
|
8 |
+
mixed_precision:
|
9 |
+
param_dtype: fp16
|
10 |
+
reduce_dtype: fp16
|
11 |
+
buffer_dtype: fp32
|
12 |
+
dino_head:
|
13 |
+
sharding_strategy: SHARD_GRAD_OP
|
14 |
+
mixed_precision:
|
15 |
+
param_dtype: fp16
|
16 |
+
reduce_dtype: fp16
|
17 |
+
buffer_dtype: fp32
|
18 |
+
ibot_head:
|
19 |
+
sharding_strategy: SHARD_GRAD_OP
|
20 |
+
mixed_precision:
|
21 |
+
param_dtype: fp16
|
22 |
+
reduce_dtype: fp16
|
23 |
+
buffer_dtype: fp32
|
24 |
+
student:
|
25 |
+
backbone:
|
26 |
+
sharding_strategy: SHARD_GRAD_OP
|
27 |
+
mixed_precision:
|
28 |
+
param_dtype: fp16
|
29 |
+
reduce_dtype: fp16
|
30 |
+
buffer_dtype: fp32
|
31 |
+
dino_head:
|
32 |
+
sharding_strategy: SHARD_GRAD_OP
|
33 |
+
mixed_precision:
|
34 |
+
param_dtype: fp16
|
35 |
+
reduce_dtype: fp32
|
36 |
+
buffer_dtype: fp32
|
37 |
+
ibot_head:
|
38 |
+
sharding_strategy: SHARD_GRAD_OP
|
39 |
+
mixed_precision:
|
40 |
+
param_dtype: fp16
|
41 |
+
reduce_dtype: fp32
|
42 |
+
buffer_dtype: fp32
|
43 |
+
dino:
|
44 |
+
loss_weight: 1.0
|
45 |
+
head_n_prototypes: 65536
|
46 |
+
head_bottleneck_dim: 256
|
47 |
+
head_nlayers: 3
|
48 |
+
head_hidden_dim: 2048
|
49 |
+
koleo_loss_weight: 0.1
|
50 |
+
ibot:
|
51 |
+
loss_weight: 1.0
|
52 |
+
mask_sample_probability: 0.5
|
53 |
+
mask_ratio_min_max:
|
54 |
+
- 0.1
|
55 |
+
- 0.5
|
56 |
+
separate_head: false
|
57 |
+
head_n_prototypes: 65536
|
58 |
+
head_bottleneck_dim: 256
|
59 |
+
head_nlayers: 3
|
60 |
+
head_hidden_dim: 2048
|
61 |
+
train:
|
62 |
+
batch_size_per_gpu: 64
|
63 |
+
dataset_path: ImageNet:split=TRAIN
|
64 |
+
output_dir: .
|
65 |
+
saveckp_freq: 20
|
66 |
+
seed: 0
|
67 |
+
num_workers: 10
|
68 |
+
OFFICIAL_EPOCH_LENGTH: 1250
|
69 |
+
cache_dataset: true
|
70 |
+
centering: "centering" # or "sinkhorn_knopp"
|
71 |
+
student:
|
72 |
+
arch: vit_large
|
73 |
+
patch_size: 16
|
74 |
+
drop_path_rate: 0.3
|
75 |
+
layerscale: 1.0e-05
|
76 |
+
drop_path_uniform: true
|
77 |
+
pretrained_weights: ''
|
78 |
+
ffn_layer: "mlp"
|
79 |
+
block_chunks: 0
|
80 |
+
qkv_bias: true
|
81 |
+
proj_bias: true
|
82 |
+
ffn_bias: true
|
83 |
+
num_register_tokens: 0
|
84 |
+
interpolate_antialias: false
|
85 |
+
interpolate_offset: 0.1
|
86 |
+
teacher:
|
87 |
+
momentum_teacher: 0.992
|
88 |
+
final_momentum_teacher: 1
|
89 |
+
warmup_teacher_temp: 0.04
|
90 |
+
teacher_temp: 0.07
|
91 |
+
warmup_teacher_temp_epochs: 30
|
92 |
+
optim:
|
93 |
+
epochs: 100
|
94 |
+
weight_decay: 0.04
|
95 |
+
weight_decay_end: 0.4
|
96 |
+
base_lr: 0.004 # learning rate for a batch size of 1024
|
97 |
+
lr: 0. # will be set after applying scaling rule
|
98 |
+
warmup_epochs: 10
|
99 |
+
min_lr: 1.0e-06
|
100 |
+
clip_grad: 3.0
|
101 |
+
freeze_last_layer_epochs: 1
|
102 |
+
scaling_rule: sqrt_wrt_1024
|
103 |
+
patch_embed_lr_mult: 0.2
|
104 |
+
layerwise_decay: 0.9
|
105 |
+
adamw_beta1: 0.9
|
106 |
+
adamw_beta2: 0.999
|
107 |
+
crops:
|
108 |
+
global_crops_scale:
|
109 |
+
- 0.32
|
110 |
+
- 1.0
|
111 |
+
local_crops_number: 8
|
112 |
+
local_crops_scale:
|
113 |
+
- 0.05
|
114 |
+
- 0.32
|
115 |
+
global_crops_size: 224
|
116 |
+
local_crops_size: 96
|
117 |
+
evaluation:
|
118 |
+
eval_period_iterations: 12500
|
mapper/models/dinov2/configs/train/vitg14.yaml
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dino:
|
2 |
+
head_n_prototypes: 131072
|
3 |
+
head_bottleneck_dim: 384
|
4 |
+
ibot:
|
5 |
+
separate_head: true
|
6 |
+
head_n_prototypes: 131072
|
7 |
+
train:
|
8 |
+
batch_size_per_gpu: 12
|
9 |
+
dataset_path: ImageNet22k
|
10 |
+
centering: sinkhorn_knopp
|
11 |
+
student:
|
12 |
+
arch: vit_giant2
|
13 |
+
patch_size: 14
|
14 |
+
drop_path_rate: 0.4
|
15 |
+
ffn_layer: swiglufused
|
16 |
+
block_chunks: 4
|
17 |
+
teacher:
|
18 |
+
momentum_teacher: 0.994
|
19 |
+
optim:
|
20 |
+
epochs: 500
|
21 |
+
weight_decay_end: 0.2
|
22 |
+
base_lr: 2.0e-04 # learning rate for a batch size of 1024
|
23 |
+
warmup_epochs: 80
|
24 |
+
layerwise_decay: 1.0
|
25 |
+
crops:
|
26 |
+
local_crops_size: 98
|
mapper/models/dinov2/configs/train/vitl14.yaml
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dino:
|
2 |
+
head_n_prototypes: 131072
|
3 |
+
head_bottleneck_dim: 384
|
4 |
+
ibot:
|
5 |
+
separate_head: true
|
6 |
+
head_n_prototypes: 131072
|
7 |
+
train:
|
8 |
+
batch_size_per_gpu: 32
|
9 |
+
dataset_path: ImageNet22k
|
10 |
+
centering: sinkhorn_knopp
|
11 |
+
student:
|
12 |
+
arch: vit_large
|
13 |
+
patch_size: 14
|
14 |
+
drop_path_rate: 0.4
|
15 |
+
ffn_layer: swiglufused
|
16 |
+
block_chunks: 4
|
17 |
+
teacher:
|
18 |
+
momentum_teacher: 0.994
|
19 |
+
optim:
|
20 |
+
epochs: 500
|
21 |
+
weight_decay_end: 0.2
|
22 |
+
base_lr: 2.0e-04 # learning rate for a batch size of 1024
|
23 |
+
warmup_epochs: 80
|
24 |
+
layerwise_decay: 1.0
|
25 |
+
crops:
|
26 |
+
local_crops_size: 98
|
mapper/models/dinov2/configs/train/vitl16_short.yaml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# this corresponds to the default config
|
2 |
+
train:
|
3 |
+
dataset_path: ImageNet:split=TRAIN
|
4 |
+
batch_size_per_gpu: 64
|
5 |
+
student:
|
6 |
+
block_chunks: 4
|