imstevenpmwork HF staff commited on
Commit
f6ababa
·
verified ·
1 Parent(s): bff7190

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config.json +31 -46
  2. train_config.json +56 -80
config.json CHANGED
@@ -1,24 +1,26 @@
1
  {
2
- "type": "vqbet",
3
- "n_obs_steps": 5,
 
 
4
  "normalization_mapping": {
5
- "VISUAL": "IDENTITY",
6
- "STATE": "MIN_MAX",
7
- "ACTION": "MIN_MAX"
8
  },
9
  "input_features": {
10
- "observation.image": {
11
  "type": "VISUAL",
12
  "shape": [
13
  3,
14
- 96,
15
- 96
16
  ]
17
  },
18
  "observation.state": {
19
  "type": "STATE",
20
  "shape": [
21
- 2
22
  ]
23
  }
24
  },
@@ -26,46 +28,29 @@
26
  "action": {
27
  "type": "ACTION",
28
  "shape": [
29
- 2
30
  ]
31
  }
32
  },
33
- "n_action_pred_token": 3,
34
- "action_chunk_size": 5,
35
  "vision_backbone": "resnet18",
36
- "crop_shape": [
37
- 84,
38
- 84
39
- ],
40
- "crop_is_random": true,
41
- "pretrained_backbone_weights": null,
42
- "use_group_norm": true,
43
- "spatial_softmax_num_keypoints": 32,
44
- "n_vqvae_training_steps": 20000,
45
- "vqvae_n_embed": 16,
46
- "vqvae_embedding_dim": 256,
47
- "vqvae_enc_hidden_dim": 128,
48
- "gpt_block_size": 500,
49
- "gpt_input_dim": 512,
50
- "gpt_output_dim": 512,
51
- "gpt_n_layer": 8,
52
- "gpt_n_head": 8,
53
- "gpt_hidden_dim": 512,
54
  "dropout": 0.1,
55
- "mlp_hidden_dim": 1024,
56
- "offset_loss_weight": 10000.0,
57
- "primary_code_loss_weight": 5.0,
58
- "secondary_code_loss_weight": 0.5,
59
- "bet_softmax_temperature": 0.1,
60
- "sequentially_select": false,
61
- "optimizer_lr": 0.0001,
62
- "optimizer_betas": [
63
- 0.95,
64
- 0.999
65
- ],
66
- "optimizer_eps": 1e-08,
67
- "optimizer_weight_decay": 1e-06,
68
- "optimizer_vqvae_lr": 0.001,
69
- "optimizer_vqvae_weight_decay": 0.0001,
70
- "scheduler_warmup_steps": 500
71
  }
 
1
  {
2
+ "type": "act",
3
+ "n_obs_steps": 1,
4
+ "device": "cuda",
5
+ "use_amp": false,
6
  "normalization_mapping": {
7
+ "VISUAL": "MEAN_STD",
8
+ "STATE": "MEAN_STD",
9
+ "ACTION": "MEAN_STD"
10
  },
11
  "input_features": {
12
+ "observation.images.top": {
13
  "type": "VISUAL",
14
  "shape": [
15
  3,
16
+ 480,
17
+ 640
18
  ]
19
  },
20
  "observation.state": {
21
  "type": "STATE",
22
  "shape": [
23
+ 14
24
  ]
25
  }
26
  },
 
28
  "action": {
29
  "type": "ACTION",
30
  "shape": [
31
+ 14
32
  ]
33
  }
34
  },
35
+ "chunk_size": 100,
36
+ "n_action_steps": 100,
37
  "vision_backbone": "resnet18",
38
+ "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
39
+ "replace_final_stride_with_dilation": false,
40
+ "pre_norm": false,
41
+ "dim_model": 512,
42
+ "n_heads": 8,
43
+ "dim_feedforward": 3200,
44
+ "feedforward_activation": "relu",
45
+ "n_encoder_layers": 4,
46
+ "n_decoder_layers": 1,
47
+ "use_vae": true,
48
+ "latent_dim": 32,
49
+ "n_vae_encoder_layers": 4,
50
+ "temporal_ensemble_coeff": null,
 
 
 
 
 
51
  "dropout": 0.1,
52
+ "kl_weight": 10.0,
53
+ "optimizer_lr": 1e-05,
54
+ "optimizer_weight_decay": 0.0001,
55
+ "optimizer_lr_backbone": 1e-05
 
 
 
 
 
 
 
 
 
 
 
 
56
  }
train_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "dataset": {
3
- "repo_id": "lerobot/pusht",
4
  "episodes": null,
5
  "image_transforms": {
6
  "enable": false,
@@ -64,27 +64,27 @@
64
  "video_backend": "pyav"
65
  },
66
  "env": {
67
- "type": "pusht",
68
- "task": "PushT-v0",
69
- "fps": 10,
70
  "features": {
71
  "action": {
72
  "type": "ACTION",
73
  "shape": [
74
- 2
75
  ]
76
  },
77
  "agent_pos": {
78
  "type": "STATE",
79
  "shape": [
80
- 2
81
  ]
82
  },
83
- "pixels": {
84
  "type": "VISUAL",
85
  "shape": [
86
- 384,
87
- 384,
88
  3
89
  ]
90
  }
@@ -92,36 +92,36 @@
92
  "features_map": {
93
  "action": "action",
94
  "agent_pos": "observation.state",
95
- "environment_state": "observation.environment_state",
96
- "pixels": "observation.image"
97
  },
98
- "episode_length": 300,
99
  "obs_type": "pixels_agent_pos",
100
- "render_mode": "rgb_array",
101
- "visualization_width": 384,
102
- "visualization_height": 384
103
  },
104
  "policy": {
105
- "type": "vqbet",
106
- "n_obs_steps": 5,
 
 
107
  "normalization_mapping": {
108
- "VISUAL": "IDENTITY",
109
- "STATE": "MIN_MAX",
110
- "ACTION": "MIN_MAX"
111
  },
112
  "input_features": {
113
- "observation.image": {
114
  "type": "VISUAL",
115
  "shape": [
116
  3,
117
- 96,
118
- 96
119
  ]
120
  },
121
  "observation.state": {
122
  "type": "STATE",
123
  "shape": [
124
- 2
125
  ]
126
  }
127
  },
@@ -129,80 +129,56 @@
129
  "action": {
130
  "type": "ACTION",
131
  "shape": [
132
- 2
133
  ]
134
  }
135
  },
136
- "n_action_pred_token": 3,
137
- "action_chunk_size": 5,
138
  "vision_backbone": "resnet18",
139
- "crop_shape": [
140
- 84,
141
- 84
142
- ],
143
- "crop_is_random": true,
144
- "pretrained_backbone_weights": null,
145
- "use_group_norm": true,
146
- "spatial_softmax_num_keypoints": 32,
147
- "n_vqvae_training_steps": 20000,
148
- "vqvae_n_embed": 16,
149
- "vqvae_embedding_dim": 256,
150
- "vqvae_enc_hidden_dim": 128,
151
- "gpt_block_size": 500,
152
- "gpt_input_dim": 512,
153
- "gpt_output_dim": 512,
154
- "gpt_n_layer": 8,
155
- "gpt_n_head": 8,
156
- "gpt_hidden_dim": 512,
157
  "dropout": 0.1,
158
- "mlp_hidden_dim": 1024,
159
- "offset_loss_weight": 10000.0,
160
- "primary_code_loss_weight": 5.0,
161
- "secondary_code_loss_weight": 0.5,
162
- "bet_softmax_temperature": 0.1,
163
- "sequentially_select": false,
164
- "optimizer_lr": 0.0001,
165
- "optimizer_betas": [
166
- 0.95,
167
- 0.999
168
- ],
169
- "optimizer_eps": 1e-08,
170
- "optimizer_weight_decay": 1e-06,
171
- "optimizer_vqvae_lr": 0.001,
172
- "optimizer_vqvae_weight_decay": 0.0001,
173
- "scheduler_warmup_steps": 500
174
  },
175
- "output_dir": "outputs/train/2025-01-25/20-30-40_pusht_vqbet",
176
- "job_name": "pusht_vqbet",
177
  "resume": false,
178
- "device": "cuda",
179
- "use_amp": false,
180
- "seed": 100000,
181
  "num_workers": 4,
182
- "batch_size": 64,
183
- "steps": 250000,
184
- "eval_freq": 25000,
185
  "log_freq": 200,
186
  "save_checkpoint": true,
187
- "save_freq": 25000,
188
  "use_policy_training_preset": true,
189
  "optimizer": {
190
- "type": "adam",
191
- "lr": 0.0001,
192
  "betas": [
193
- 0.95,
194
  0.999
195
  ],
196
  "eps": 1e-08,
197
- "weight_decay": 1e-06,
198
  "grad_clip_norm": 10.0
199
  },
200
- "scheduler": {
201
- "type": "vqbet",
202
- "num_warmup_steps": 500,
203
- "num_vqvae_training_steps": 20000,
204
- "num_cycles": 0.5
205
- },
206
  "eval": {
207
  "n_episodes": 50,
208
  "batch_size": 50,
 
1
  {
2
  "dataset": {
3
+ "repo_id": "lerobot/aloha_sim_insertion_human",
4
  "episodes": null,
5
  "image_transforms": {
6
  "enable": false,
 
64
  "video_backend": "pyav"
65
  },
66
  "env": {
67
+ "type": "aloha",
68
+ "task": "AlohaInsertion-v0",
69
+ "fps": 50,
70
  "features": {
71
  "action": {
72
  "type": "ACTION",
73
  "shape": [
74
+ 14
75
  ]
76
  },
77
  "agent_pos": {
78
  "type": "STATE",
79
  "shape": [
80
+ 14
81
  ]
82
  },
83
+ "pixels/top": {
84
  "type": "VISUAL",
85
  "shape": [
86
+ 480,
87
+ 640,
88
  3
89
  ]
90
  }
 
92
  "features_map": {
93
  "action": "action",
94
  "agent_pos": "observation.state",
95
+ "top": "observation.image.top",
96
+ "pixels/top": "observation.images.top"
97
  },
98
+ "episode_length": 400,
99
  "obs_type": "pixels_agent_pos",
100
+ "render_mode": "rgb_array"
 
 
101
  },
102
  "policy": {
103
+ "type": "act",
104
+ "n_obs_steps": 1,
105
+ "device": "cuda",
106
+ "use_amp": false,
107
  "normalization_mapping": {
108
+ "VISUAL": "MEAN_STD",
109
+ "STATE": "MEAN_STD",
110
+ "ACTION": "MEAN_STD"
111
  },
112
  "input_features": {
113
+ "observation.images.top": {
114
  "type": "VISUAL",
115
  "shape": [
116
  3,
117
+ 480,
118
+ 640
119
  ]
120
  },
121
  "observation.state": {
122
  "type": "STATE",
123
  "shape": [
124
+ 14
125
  ]
126
  }
127
  },
 
129
  "action": {
130
  "type": "ACTION",
131
  "shape": [
132
+ 14
133
  ]
134
  }
135
  },
136
+ "chunk_size": 100,
137
+ "n_action_steps": 100,
138
  "vision_backbone": "resnet18",
139
+ "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
140
+ "replace_final_stride_with_dilation": false,
141
+ "pre_norm": false,
142
+ "dim_model": 512,
143
+ "n_heads": 8,
144
+ "dim_feedforward": 3200,
145
+ "feedforward_activation": "relu",
146
+ "n_encoder_layers": 4,
147
+ "n_decoder_layers": 1,
148
+ "use_vae": true,
149
+ "latent_dim": 32,
150
+ "n_vae_encoder_layers": 4,
151
+ "temporal_ensemble_coeff": null,
 
 
 
 
 
152
  "dropout": 0.1,
153
+ "kl_weight": 10.0,
154
+ "optimizer_lr": 1e-05,
155
+ "optimizer_weight_decay": 0.0001,
156
+ "optimizer_lr_backbone": 1e-05
 
 
 
 
 
 
 
 
 
 
 
 
157
  },
158
+ "output_dir": "outputs/train/2025-01-25/17-37-25_aloha_act",
159
+ "job_name": "aloha_act",
160
  "resume": false,
161
+ "seed": 1000,
 
 
162
  "num_workers": 4,
163
+ "batch_size": 8,
164
+ "steps": 100000,
165
+ "eval_freq": 20000,
166
  "log_freq": 200,
167
  "save_checkpoint": true,
168
+ "save_freq": 20000,
169
  "use_policy_training_preset": true,
170
  "optimizer": {
171
+ "type": "adamw",
172
+ "lr": 1e-05,
173
  "betas": [
174
+ 0.9,
175
  0.999
176
  ],
177
  "eps": 1e-08,
178
+ "weight_decay": 0.0001,
179
  "grad_clip_norm": 10.0
180
  },
181
+ "scheduler": null,
 
 
 
 
 
182
  "eval": {
183
  "n_episodes": 50,
184
  "batch_size": 50,