|
/content/drive/.shortcut-targets-by-id/1--J_BDKYgok4-kOhiE9x_VOKDgdv_GoS/Thesis/brio_project-main/BRIO/main_fix.py |
|
Namespace(cuda=True, gpuid=[0], evaluate=False, do_reranking=False, do_generation=False, log=True, port=12355, model_pt='', config='', batch_size=1, epoch=1, report_freq=100, accumulate_step=8, margin=0.001, gold_margin=0, gold_weight=0, mle_weight=0.1, rank_weight=10, model_type='vinai/bartpho-word-base', warmup_steps=20000, normalize=True, grad_norm=0, seed=970903, no_gold=False, pretrained='./finetuned_model/eval_bartpho_final', max_lr=5e-05, scale=0.5, score_mode='log', datatype='diverse', dataset='cookingVN', max_len=128, max_num=6, smooth=0.09, total_len=512, length_penalty=2.0, do_sample=True, gen_max_len=1024, gen_min_len=55, is_pegasus=False, adding=0, eval_interval=1000, num_beams=6) |
|
|
|
BRIO( |
|
(model): MBartScorer( |
|
(model): CustomMBartModel( |
|
(shared): Embedding(64001, 768, padding_idx=1) |
|
(encoder): MBartEncoder( |
|
(embed_tokens): Embedding(64001, 768, padding_idx=1) |
|
(embed_positions): MBartLearnedPositionalEmbedding(1026, 768) |
|
(layers): ModuleList( |
|
(0-5): 6 x MBartEncoderLayer( |
|
(self_attn): MBartAttention( |
|
(k_proj): Linear(in_features=768, out_features=768, bias=True) |
|
(v_proj): Linear(in_features=768, out_features=768, bias=True) |
|
(q_proj): Linear(in_features=768, out_features=768, bias=True) |
|
(out_proj): Linear(in_features=768, out_features=768, bias=True) |
|
) |
|
(self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) |
|
(activation_fn): GELUActivation() |
|
(fc1): Linear(in_features=768, out_features=3072, bias=True) |
|
(fc2): Linear(in_features=3072, out_features=768, bias=True) |
|
(final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) |
|
) |
|
) |
|
(layernorm_embedding): LayerNorm((768,), eps=1e-05, elementwise_affine=True) |
|
(layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) |
|
) |
|
(decoder): MBartDecoder( |
|
(embed_tokens): Embedding(64001, 768, padding_idx=1) |
|
(embed_positions): MBartLearnedPositionalEmbedding(1026, 768) |
|
(layers): ModuleList( |
|
(0-5): 6 x MBartDecoderLayer( |
|
(self_attn): MBartAttention( |
|
(k_proj): Linear(in_features=768, out_features=768, bias=True) |
|
(v_proj): Linear(in_features=768, out_features=768, bias=True) |
|
(q_proj): Linear(in_features=768, out_features=768, bias=True) |
|
(out_proj): Linear(in_features=768, out_features=768, bias=True) |
|
) |
|
(activation_fn): GELUActivation() |
|
(self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) |
|
(encoder_attn): MBartAttention( |
|
(k_proj): Linear(in_features=768, out_features=768, bias=True) |
|
(v_proj): Linear(in_features=768, out_features=768, bias=True) |
|
(q_proj): Linear(in_features=768, out_features=768, bias=True) |
|
(out_proj): Linear(in_features=768, out_features=768, bias=True) |
|
) |
|
(encoder_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) |
|
(fc1): Linear(in_features=768, out_features=3072, bias=True) |
|
(fc2): Linear(in_features=3072, out_features=768, bias=True) |
|
(final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) |
|
) |
|
) |
|
(layernorm_embedding): LayerNorm((768,), eps=1e-05, elementwise_affine=True) |
|
(layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) |
|
) |
|
) |
|
(lm_head): Linear(in_features=768, out_features=64001, bias=False) |
|
) |
|
) |
|
|
|
|