|
|
|
optimus: |
|
symbol: optimus |
|
find_unused_parameters: false |
|
args: {} |
|
|
|
optimus_bert_encoder: |
|
super_cfg: optimus |
|
type: optimus_bert_connector |
|
|
|
args: |
|
config: |
|
architectures: |
|
- BertForMaskedLM |
|
attention_probs_dropout_prob: 0.1 |
|
finetuning_task: null |
|
hidden_act: gelu |
|
hidden_dropout_prob: 0.1 |
|
hidden_size: 768 |
|
initializer_range: 0.02 |
|
intermediate_size: 3072 |
|
layer_norm_eps: 1.e-12 |
|
max_position_embeddings: 512 |
|
num_attention_heads: 12 |
|
num_hidden_layers: 12 |
|
num_labels: 2 |
|
output_attentions: false |
|
output_hidden_states: false |
|
pruned_heads: {} |
|
torchscript: false |
|
type_vocab_size: 2 |
|
vocab_size: 28996 |
|
latent_size: 768 |
|
|
|
optimus_bert_tokenizer: |
|
super_cfg: optimus |
|
type: optimus_bert_tokenizer |
|
args: |
|
do_lower_case: false |
|
max_len: 512 |
|
vocab_file: core/models/latent_diffusion/vae/optimus_modules/vocab/bert-base-cased-vocab.txt |
|
|
|
optimus_gpt2_decoder: |
|
super_cfg: optimus |
|
type: optimus_gpt2_connector |
|
|
|
args: |
|
config: |
|
architectures: |
|
- GPT2LMHeadModel |
|
attn_pdrop: 0.1 |
|
embd_pdrop: 0.1 |
|
finetuning_task: null |
|
hidden_size: 768 |
|
initializer_range: 0.02 |
|
latent_size: 768 |
|
layer_norm_epsilon: 1.e-05 |
|
max_position_embeddings: 1024 |
|
n_ctx: 1024 |
|
n_embd: 768 |
|
n_head: 12 |
|
n_layer: 12 |
|
n_positions: 1024 |
|
num_attention_heads: 12 |
|
num_hidden_layers: 12 |
|
num_labels: 1 |
|
output_attentions: false |
|
output_hidden_states: false |
|
pretrained_config_archive_map: |
|
gpt2 : https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-config.json |
|
gpt2-medium : https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-config.json |
|
gpt2-large : https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-config.json |
|
pruned_heads: {} |
|
resid_pdrop: 0.1 |
|
summary_activation: null |
|
summary_first_dropout: 0.1 |
|
summary_proj_to_labels: true |
|
summary_type: cls_index |
|
summary_use_proj: true |
|
torchscript: false |
|
vocab_size: 50260 |
|
|
|
optimus_gpt2_tokenizer: |
|
super_cfg: optimus |
|
type: optimus_gpt2_tokenizer |
|
args: |
|
do_lower_case: false |
|
max_len: 1024 |
|
vocab_file: core/models/latent_diffusion/vae/optimus_modules/vocab/gpt2-vocab.json |
|
merges_file: core/models/latent_diffusion/vae/optimus_modules/vocab/gpt2-merges.txt |
|
|
|
optimus_vae: |
|
super_cfg: optimus |
|
type: optimus_vae |
|
pth: pretrained/optimus-vae.pth |
|
args: |
|
encoder: MODEL(optimus_bert_encoder) |
|
decoder: MODEL(optimus_gpt2_decoder) |
|
tokenizer_encoder: MODEL(optimus_bert_tokenizer) |
|
tokenizer_decoder: MODEL(optimus_gpt2_tokenizer) |
|
args: |
|
latent_size: 768 |
|
beta : 1.0 |
|
fb_mode : 0 |
|
length_weighted_loss : false |
|
dim_target_kl : 3.0 |
|
|