File size: 11,349 Bytes
c2ca15f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
primitive_z_cfg = dict(
    type='PrimitiveHead',
    num_dims=2,
    num_classes=18,
    primitive_mode='z',
    upper_thresh=100.0,
    surface_thresh=0.5,
    vote_module_cfg=dict(
        in_channels=256,
        vote_per_seed=1,
        gt_per_seed=1,
        conv_channels=(256, 256),
        conv_cfg=dict(type='Conv1d'),
        norm_cfg=dict(type='BN1d'),
        norm_feats=True,
        vote_loss=dict(
            type='ChamferDistance',
            mode='l1',
            reduction='none',
            loss_dst_weight=10.0)),
    vote_aggregation_cfg=dict(
        type='PointSAModule',
        num_point=1024,
        radius=0.3,
        num_sample=16,
        mlp_channels=[256, 128, 128, 128],
        use_xyz=True,
        normalize_xyz=True),
    feat_channels=(128, 128),
    conv_cfg=dict(type='Conv1d'),
    norm_cfg=dict(type='BN1d'),
    objectness_loss=dict(
        type='mmdet.CrossEntropyLoss',
        class_weight=[0.4, 0.6],
        reduction='mean',
        loss_weight=30.0),
    center_loss=dict(
        type='ChamferDistance',
        mode='l1',
        reduction='sum',
        loss_src_weight=0.5,
        loss_dst_weight=0.5),
    semantic_reg_loss=dict(
        type='ChamferDistance',
        mode='l1',
        reduction='sum',
        loss_src_weight=0.5,
        loss_dst_weight=0.5),
    semantic_cls_loss=dict(
        type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
    train_cfg=dict(
        sample_mode='vote',
        dist_thresh=0.2,
        var_thresh=1e-2,
        lower_thresh=1e-6,
        num_point=100,
        num_point_line=10,
        line_thresh=0.2),
    test_cfg=dict(sample_mode='seed'))

primitive_xy_cfg = dict(
    type='PrimitiveHead',
    num_dims=1,
    num_classes=18,
    primitive_mode='xy',
    upper_thresh=100.0,
    surface_thresh=0.5,
    vote_module_cfg=dict(
        in_channels=256,
        vote_per_seed=1,
        gt_per_seed=1,
        conv_channels=(256, 256),
        conv_cfg=dict(type='Conv1d'),
        norm_cfg=dict(type='BN1d'),
        norm_feats=True,
        vote_loss=dict(
            type='ChamferDistance',
            mode='l1',
            reduction='none',
            loss_dst_weight=10.0)),
    vote_aggregation_cfg=dict(
        type='PointSAModule',
        num_point=1024,
        radius=0.3,
        num_sample=16,
        mlp_channels=[256, 128, 128, 128],
        use_xyz=True,
        normalize_xyz=True),
    feat_channels=(128, 128),
    conv_cfg=dict(type='Conv1d'),
    norm_cfg=dict(type='BN1d'),
    objectness_loss=dict(
        type='mmdet.CrossEntropyLoss',
        class_weight=[0.4, 0.6],
        reduction='mean',
        loss_weight=30.0),
    center_loss=dict(
        type='ChamferDistance',
        mode='l1',
        reduction='sum',
        loss_src_weight=0.5,
        loss_dst_weight=0.5),
    semantic_reg_loss=dict(
        type='ChamferDistance',
        mode='l1',
        reduction='sum',
        loss_src_weight=0.5,
        loss_dst_weight=0.5),
    semantic_cls_loss=dict(
        type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
    train_cfg=dict(
        sample_mode='vote',
        dist_thresh=0.2,
        var_thresh=1e-2,
        lower_thresh=1e-6,
        num_point=100,
        num_point_line=10,
        line_thresh=0.2),
    test_cfg=dict(sample_mode='seed'))

primitive_line_cfg = dict(
    type='PrimitiveHead',
    num_dims=0,
    num_classes=18,
    primitive_mode='line',
    upper_thresh=100.0,
    surface_thresh=0.5,
    vote_module_cfg=dict(
        in_channels=256,
        vote_per_seed=1,
        gt_per_seed=1,
        conv_channels=(256, 256),
        conv_cfg=dict(type='Conv1d'),
        norm_cfg=dict(type='BN1d'),
        norm_feats=True,
        vote_loss=dict(
            type='ChamferDistance',
            mode='l1',
            reduction='none',
            loss_dst_weight=10.0)),
    vote_aggregation_cfg=dict(
        type='PointSAModule',
        num_point=1024,
        radius=0.3,
        num_sample=16,
        mlp_channels=[256, 128, 128, 128],
        use_xyz=True,
        normalize_xyz=True),
    feat_channels=(128, 128),
    conv_cfg=dict(type='Conv1d'),
    norm_cfg=dict(type='BN1d'),
    objectness_loss=dict(
        type='mmdet.CrossEntropyLoss',
        class_weight=[0.4, 0.6],
        reduction='mean',
        loss_weight=30.0),
    center_loss=dict(
        type='ChamferDistance',
        mode='l1',
        reduction='sum',
        loss_src_weight=1.0,
        loss_dst_weight=1.0),
    semantic_reg_loss=dict(
        type='ChamferDistance',
        mode='l1',
        reduction='sum',
        loss_src_weight=1.0,
        loss_dst_weight=1.0),
    semantic_cls_loss=dict(
        type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=2.0),
    train_cfg=dict(
        sample_mode='vote',
        dist_thresh=0.2,
        var_thresh=1e-2,
        lower_thresh=1e-6,
        num_point=100,
        num_point_line=10,
        line_thresh=0.2),
    test_cfg=dict(sample_mode='seed'))

model = dict(
    type='H3DNet',
    data_preprocessor=dict(type='Det3DDataPreprocessor'),
    backbone=dict(
        type='MultiBackbone',
        num_streams=4,
        suffixes=['net0', 'net1', 'net2', 'net3'],
        conv_cfg=dict(type='Conv1d'),
        norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01),
        act_cfg=dict(type='ReLU'),
        backbones=dict(
            type='PointNet2SASSG',
            in_channels=4,
            num_points=(2048, 1024, 512, 256),
            radius=(0.2, 0.4, 0.8, 1.2),
            num_samples=(64, 32, 16, 16),
            sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
                         (128, 128, 256)),
            fp_channels=((256, 256), (256, 256)),
            norm_cfg=dict(type='BN2d'),
            sa_cfg=dict(
                type='PointSAModule',
                pool_mod='max',
                use_xyz=True,
                normalize_xyz=True))),
    rpn_head=dict(
        type='VoteHead',
        vote_module_cfg=dict(
            in_channels=256,
            vote_per_seed=1,
            gt_per_seed=3,
            conv_channels=(256, 256),
            conv_cfg=dict(type='Conv1d'),
            norm_cfg=dict(type='BN1d'),
            norm_feats=True,
            vote_loss=dict(
                type='ChamferDistance',
                mode='l1',
                reduction='none',
                loss_dst_weight=10.0)),
        vote_aggregation_cfg=dict(
            type='PointSAModule',
            num_point=256,
            radius=0.3,
            num_sample=16,
            mlp_channels=[256, 128, 128, 128],
            use_xyz=True,
            normalize_xyz=True),
        pred_layer_cfg=dict(
            in_channels=128, shared_conv_channels=(128, 128), bias=True),
        objectness_loss=dict(
            type='mmdet.CrossEntropyLoss',
            class_weight=[0.2, 0.8],
            reduction='sum',
            loss_weight=5.0),
        center_loss=dict(
            type='ChamferDistance',
            mode='l2',
            reduction='sum',
            loss_src_weight=10.0,
            loss_dst_weight=10.0),
        dir_class_loss=dict(
            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
        dir_res_loss=dict(
            type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
        size_class_loss=dict(
            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
        size_res_loss=dict(
            type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
        semantic_loss=dict(
            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
    roi_head=dict(
        type='H3DRoIHead',
        primitive_list=[primitive_z_cfg, primitive_xy_cfg, primitive_line_cfg],
        bbox_head=dict(
            type='H3DBboxHead',
            gt_per_seed=3,
            num_proposal=256,
            suface_matching_cfg=dict(
                type='PointSAModule',
                num_point=256 * 6,
                radius=0.5,
                num_sample=32,
                mlp_channels=[128 + 6, 128, 64, 32],
                use_xyz=True,
                normalize_xyz=True),
            line_matching_cfg=dict(
                type='PointSAModule',
                num_point=256 * 12,
                radius=0.5,
                num_sample=32,
                mlp_channels=[128 + 12, 128, 64, 32],
                use_xyz=True,
                normalize_xyz=True),
            primitive_refine_channels=[128, 128, 128],
            upper_thresh=100.0,
            surface_thresh=0.5,
            line_thresh=0.5,
            conv_cfg=dict(type='Conv1d'),
            norm_cfg=dict(type='BN1d'),
            objectness_loss=dict(
                type='mmdet.CrossEntropyLoss',
                class_weight=[0.2, 0.8],
                reduction='sum',
                loss_weight=5.0),
            center_loss=dict(
                type='ChamferDistance',
                mode='l2',
                reduction='sum',
                loss_src_weight=10.0,
                loss_dst_weight=10.0),
            dir_class_loss=dict(
                type='mmdet.CrossEntropyLoss',
                reduction='sum',
                loss_weight=0.1),
            dir_res_loss=dict(
                type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
            size_class_loss=dict(
                type='mmdet.CrossEntropyLoss',
                reduction='sum',
                loss_weight=0.1),
            size_res_loss=dict(
                type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
            semantic_loss=dict(
                type='mmdet.CrossEntropyLoss',
                reduction='sum',
                loss_weight=0.1),
            cues_objectness_loss=dict(
                type='mmdet.CrossEntropyLoss',
                class_weight=[0.3, 0.7],
                reduction='mean',
                loss_weight=5.0),
            cues_semantic_loss=dict(
                type='mmdet.CrossEntropyLoss',
                class_weight=[0.3, 0.7],
                reduction='mean',
                loss_weight=5.0),
            proposal_objectness_loss=dict(
                type='mmdet.CrossEntropyLoss',
                class_weight=[0.2, 0.8],
                reduction='none',
                loss_weight=5.0),
            primitive_center_loss=dict(
                type='mmdet.MSELoss', reduction='none', loss_weight=1.0))),
    # model training and testing settings
    train_cfg=dict(
        rpn=dict(
            pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mode='vote'),
        rpn_proposal=dict(use_nms=False),
        rcnn=dict(
            pos_distance_thr=0.3,
            neg_distance_thr=0.6,
            sample_mode='vote',
            far_threshold=0.6,
            near_threshold=0.3,
            mask_surface_threshold=0.3,
            label_surface_threshold=0.3,
            mask_line_threshold=0.3,
            label_line_threshold=0.3)),
    test_cfg=dict(
        rpn=dict(
            sample_mode='seed',
            nms_thr=0.25,
            score_thr=0.05,
            per_class_proposal=True,
            use_nms=False),
        rcnn=dict(
            sample_mode='seed',
            nms_thr=0.25,
            score_thr=0.05,
            per_class_proposal=True)))