File size: 13,201 Bytes
76b12fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
callbacks:
  rollout_lh:
    tasks:
      _target_: calvin_env.envs.tasks.Tasks
      tasks:
        rotate_red_block_right:
        - rotate_object
        - block_red
        - -60
        rotate_red_block_left:
        - rotate_object
        - block_red
        - 60
        rotate_blue_block_right:
        - rotate_object
        - block_blue
        - -60
        rotate_blue_block_left:
        - rotate_object
        - block_blue
        - 60
        rotate_pink_block_right:
        - rotate_object
        - block_pink
        - -60
        rotate_pink_block_left:
        - rotate_object
        - block_pink
        - 60
        push_red_block_right:
        - push_object
        - block_red
        - 0.1
        - 0
        push_red_block_left:
        - push_object
        - block_red
        - -0.1
        - 0
        push_blue_block_right:
        - push_object
        - block_blue
        - 0.1
        - 0
        push_blue_block_left:
        - push_object
        - block_blue
        - -0.1
        - 0
        push_pink_block_right:
        - push_object
        - block_pink
        - 0.1
        - 0
        push_pink_block_left:
        - push_object
        - block_pink
        - -0.1
        - 0
        move_slider_left:
        - move_door_rel
        - base__slide
        - 0.15
        move_slider_right:
        - move_door_rel
        - base__slide
        - -0.15
        open_drawer:
        - move_door_rel
        - base__drawer
        - 0.12
        close_drawer:
        - move_door_rel
        - base__drawer
        - -0.12
        lift_red_block_table:
        - lift_object
        - block_red
        - 0.05
        - table
        - base_link
        lift_red_block_slider:
        - lift_object
        - block_red
        - 0.03
        - table
        - plank_link
        lift_red_block_drawer:
        - lift_object
        - block_red
        - 0.05
        - table
        - drawer_link
        lift_blue_block_table:
        - lift_object
        - block_blue
        - 0.05
        - table
        - base_link
        lift_blue_block_slider:
        - lift_object
        - block_blue
        - 0.03
        - table
        - plank_link
        lift_blue_block_drawer:
        - lift_object
        - block_blue
        - 0.05
        - table
        - drawer_link
        lift_pink_block_table:
        - lift_object
        - block_pink
        - 0.05
        - table
        - base_link
        lift_pink_block_slider:
        - lift_object
        - block_pink
        - 0.03
        - table
        - plank_link
        lift_pink_block_drawer:
        - lift_object
        - block_pink
        - 0.05
        - table
        - drawer_link
        place_in_slider:
        - place_object
        - table
        - plank_link
        place_in_drawer:
        - place_object
        - table
        - drawer_link
        stack_block:
        - stack_objects
        unstack_block:
        - unstack_objects
        turn_on_lightbulb:
        - toggle_light
        - lightbulb
        - 0
        - 1
        turn_off_lightbulb:
        - toggle_light
        - lightbulb
        - 1
        - 0
        turn_on_led:
        - toggle_light
        - led
        - 0
        - 1
        turn_off_led:
        - toggle_light
        - led
        - 1
        - 0
        push_into_drawer:
        - push_object_into
        - - block_red
          - block_blue
          - block_pink
        - table
        - base_link
        - table
        - drawer_link
    val_annotations:
      rotate_red_block_right:
      - take the red block and rotate it to the right
      rotate_red_block_left:
      - take the red block and rotate it to the left
      rotate_blue_block_right:
      - take the blue block and rotate it to the right
      rotate_blue_block_left:
      - take the blue block and rotate it to the left
      rotate_pink_block_right:
      - take the pink block and rotate it to the right
      rotate_pink_block_left:
      - take the pink block and rotate it to the left
      push_red_block_right:
      - go push the red block right
      push_red_block_left:
      - go push the red block left
      push_blue_block_right:
      - go push the blue block right
      push_blue_block_left:
      - go push the blue block left
      push_pink_block_right:
      - go push the pink block right
      push_pink_block_left:
      - go push the pink block left
      move_slider_left:
      - push the sliding door to the left side
      move_slider_right:
      - push the sliding door to the right side
      open_drawer:
      - pull the handle to open the drawer
      close_drawer:
      - push the handle to close the drawer
      lift_red_block_table:
      - grasp and lift the red block
      lift_blue_block_table:
      - grasp and lift the blue block
      lift_pink_block_table:
      - grasp and lift the pink block
      lift_red_block_slider:
      - lift the red block from the sliding cabinet
      lift_blue_block_slider:
      - lift the blue block from the sliding cabinet
      lift_pink_block_slider:
      - lift the pink block from the sliding cabinet
      lift_red_block_drawer:
      - Take the red block from the drawer
      lift_blue_block_drawer:
      - Take the blue block from the drawer
      lift_pink_block_drawer:
      - Take the pink block from the drawer
      place_in_slider:
      - store the grasped block in the sliding cabinet
      place_in_drawer:
      - store the grasped block in the drawer
      push_into_drawer:
      - slide the block that it falls into the drawer
      stack_block:
      - stack the grasped block
      unstack_block:
      - remove the stacked block
      turn_on_lightbulb:
      - use the switch to turn on the light bulb
      turn_off_lightbulb:
      - use the switch to turn off the light bulb
      turn_on_led:
      - press the button to turn on the led light
      turn_off_led:
      - press the button to turn off the led light
    _target_: mode.rollout.rollout_long_horizon.RolloutLongHorizon
    _recursive_: false
    env_cfg:
      _target_: mode.wrappers.hulc_wrapper.HulcWrapper
    skip_epochs: ${rollout_lh_skip_epochs}
    rollout_freq: 5
    num_videos: 0
    num_sequences: 1000
    replan_freq: 30
    ep_len: 360
    empty_cache: false
    log_video_to_file: false
    save_dir: ./videos
    lang_folder: ${lang_folder}
    debug: false
  ema:
    _target_: mode.callbacks.ema.EMA
    decay: 0.999
    start_step: 0
    save_ema_weights_in_callback_state: true
    evaluate_ema_weights_instead: true
    power: 0.6666666666666666
    inv_gamma: 1.0
    min_value: 0.0
    max_value: 0.9999
  checkpoint:
    _target_: pytorch_lightning.callbacks.ModelCheckpoint
    save_top_k: 1
    verbose: true
    monitor: eval_lh/avg_seq_len
    mode: max
    dirpath: saved_models
    filename: '{epoch:02d}_{eval_lh/avg_seq_len:.2f}'
    every_n_epochs: ${callbacks.rollout_lh.rollout_freq}
datamodule:
  transforms:
    train:
      rgb_static:
      - _target_: torchvision.transforms.Resize
        size: 224
        antialias: true
      - _target_: mode.utils.transforms.RandomShiftsAug
        pad: 10
      - _target_: mode.utils.transforms.ScaleImageTensor
      - _target_: torchvision.transforms.Normalize
        mean:
        - 0.48145466
        - 0.4578275
        - 0.40821073
        std:
        - 0.26862954
        - 0.26130258
        - 0.27577711
      rgb_gripper:
      - _target_: torchvision.transforms.Resize
        size: 112
        antialias: true
      - _target_: mode.utils.transforms.RandomShiftsAug
        pad: 4
      - _target_: mode.utils.transforms.ScaleImageTensor
      - _target_: torchvision.transforms.Normalize
        mean:
        - 0.48145466
        - 0.4578275
        - 0.40821073
        std:
        - 0.26862954
        - 0.26130258
        - 0.27577711
      robot_obs:
      - _target_: mode.utils.transforms.NormalizeVector
      scene_obs:
      - _target_: mode.utils.transforms.NormalizeVector
    val:
      rgb_static:
      - _target_: torchvision.transforms.Resize
        size: 224
        antialias: true
      - _target_: mode.utils.transforms.ScaleImageTensor
      - _target_: torchvision.transforms.Normalize
        mean:
        - 0.48145466
        - 0.4578275
        - 0.40821073
        std:
        - 0.26862954
        - 0.26130258
        - 0.27577711
      rgb_gripper:
      - _target_: torchvision.transforms.Resize
        size: 112
        antialias: true
      - _target_: mode.utils.transforms.ScaleImageTensor
      - _target_: torchvision.transforms.Normalize
        mean:
        - 0.48145466
        - 0.4578275
        - 0.40821073
        std:
        - 0.26862954
        - 0.26130258
        - 0.27577711
      robot_obs:
      - _target_: mode.utils.transforms.NormalizeVector
      scene_obs:
      - _target_: mode.utils.transforms.NormalizeVector
  _target_: mode.datasets.hulc_data_module.HulcDataModule
  _recursive_: false
  root_data_dir: ${root_data_dir}
  action_space: 7
  action_max:
  - 1.0
  - 1.0
  - 1.0
  - 1.0
  - 1.0
  - 1.0
  - 1.0
  action_min:
  - -1.0
  - -1.0
  - -1.0
  - -1.0
  - -1.0
  - -1.0
  - -1
  shuffle_val: false
  observation_space:
    rgb_obs:
    - rgb_static
    - rgb_gripper
    depth_obs: []
    state_obs:
    - robot_obs
    actions:
    - rel_actions
    language:
    - language
  proprioception_dims:
    n_state_obs: 8
    keep_indices:
    - - 0
      - 7
    - - 14
      - 15
    robot_orientation_idx:
    - 3
    - 6
    normalize: true
    normalize_robot_orientation: true
  datasets:
    lang_dataset:
      _target_: mode.datasets.disk_dataset.ExtendedDiskDataset
      key: lang
      save_format: npz
      batch_size: ${batch_size}
      min_window_size: ${act_seq_len}
      max_window_size: ${act_seq_len}
      proprio_state: ${datamodule.proprioception_dims}
      obs_space: ${datamodule.observation_space}
      skip_frames: 1
      pad: false
      lang_folder: ${lang_folder}
      aux_lang_loss_window: 8
      num_workers: ${num_workers}
      action_seq_len: ${act_seq_len}
      obs_seq_len: ${obs_seq_len}
      future_range: 1
      use_extracted_rel_actions: ${use_extracted_rel_actions}
model:
  _target_: mode.models.mode_agent.MoDEAgent
  _recursive_: false
  multistep: ${multistep}
  use_lr_scheduler: true
  entropy_gamma: 0.0
  router_z_delta: 0.0
  use_proprio: false
  seed: ${seed}
  sampler_type: ddim
  num_sampling_steps: 10
  sigma_data: 0.5
  sigma_min: 0.001
  sigma_max: 80
  noise_scheduler: exponential
  sigma_sample_density_type: loglogistic
  ckpt_path: /home/hk-project-sustainebot/ft4740/code/MoDE_Diffusion_Policy/pretrained_models/300k
  start_from_pretrained: true
  act_window_size: ${act_seq_len}
  latent_dim: 1024
  obs_enc_dim: ${obs_dim}
  cond_dim: 512
  resnet_type: '50'
  optimizer:
    _target_: torch.optim.AdamW
    transformer_weight_decay: 0.05
    obs_encoder_weight_decay: 0.05
    learning_rate: 0.0001
    betas:
    - 0.9
    - 0.95
  lr_scheduler:
    lr_scheduler:
      init_lr: 0.0001
      init_lr_scale: 0.1
      final_lr_scale: 1.0e-06
      total_steps: 45000
      phase_ratio: (0.02, 0.08, 0.9)
      lr: 0.0001
  model:
    _target_: mode.models.edm_diffusion.score_wrappers.GCDenoiser
    _recursive_: false
    sigma_data: ${model.sigma_data}
    inner_model:
      _target_: mode.models.networks.modedit.MoDeDiT
      action_dim: ${datamodule.action_space}
      goal_dim: ${model.cond_dim}
      obs_dim: 2048
      goal_conditioned: true
      causal: true
      use_custom_attn_mask: false
      use_proprio: ${model.use_proprio}
      state_dim: ${proprio_dims}
      embed_dim: ${model.latent_dim}
      n_layers: 12
      goal_seq_len: 1
      obs_seq_len: ${obs_seq_len}
      action_seq_len: ${act_seq_len}
      embed_pdrob: 0
      goal_drop: 0.1
      attn_pdrop: 0.3
      mlp_pdrop: 0.1
      n_heads: 8
      device: ${device}
      linear_output: true
      cond_router: true
      num_experts: 4
      top_k: 2
      router_normalize: true
      use_goal_in_routing: false
      use_argmax: false
      use_shared_expert: false
      use_noise_token_as_input: true
      init_style: olmoe
  language_goal:
    _target_: mode.models.networks.clip_lang_encoder.LangClip
    _recursive_: false
    model_name: ${clip_lang_model_name}
root_data_dir: /hkfs/work/workspace/scratch/ft4740-play3/data/task_ABC_D
lang_folder: lang_clip_resnet50
vis_clip_model_name: ViT-B/16
clip_lang_model_name: ViT-B/32
log_dir: ./logs
slurm: false
seed: 242
device: cuda
batch_size: 64
devices: 4
act_dim: 7
proprio_dims: 7
obs_dim: 512
goal_dim: 512
obs_seq_len: 1
act_seq_len: 10
multistep: 10
p_last_state: 0
gen_img_res: 112
max_epochs: 15
rollout_lh_skip_epochs: 4
num_workers: 12
benchmark_name: calvin_abc
use_extracted_rel_actions: true
trainer:
  devices: ${devices}
  precision: 16
  max_epochs: ${max_epochs}
  sync_batchnorm: true
  accelerator: gpu
  strategy: ddp
  limit_train_batches: 1000
  limit_val_batches: 4
logger:
  _target_: pytorch_lightning.loggers.WandbLogger
  save_dir: .
  name: logger
  group: mode
  log_model: false
  project: ${benchmark_name}
  entity: bennoq
  id: ???