|
_target_: moto_gpt.src.models.moto_gpt.MotoGPT |
|
model_lang: |
|
_target_: transformers.T5EncoderModel.from_pretrained |
|
pretrained_model_name_or_path: "t5-base" |
|
model_vision: |
|
_target_: moto_gpt.src.models.mae_model.MaeEncoder |
|
use_obs_feature: true |
|
pretrained_model_name_or_path: "facebook/vit-mae-large" |
|
model_causal_transformer: |
|
_target_: moto_gpt.src.models.trajectory_gpt2.GPT2Model |
|
config: |
|
_target_: moto_gpt.src.models.trajectory_gpt2.GPT2Config |
|
vocab_size: 1 |
|
n_embd: 768 |
|
n_layer: 12 |
|
n_head: 12 |
|
activation_function: "relu" |
|
dropout: 0.1 |
|
n_positions: 1024 |
|
act_dim: 7 |
|
hidden_size: 768 |
|
sequence_length: 2 |
|
chunk_size: 5 |
|
per_latent_motion_len: 8 |
|
latent_motion_codebook_size: 128 |
|
latent_motion_pred: true |
|
act_pred: false |
|
img_feat_dim: 1024 |
|
patch_feat_dim: 1024 |
|
lang_feat_dim: 768 |
|
mask_latent_motion_probability: 0.5 |
|
freeze_lang: true |
|
freeze_vision: true |