File size: 1,493 Bytes
8f3277d d455c34 2d25e55 d455c34 8f3277d d455c34 8f3277d d455c34 8f3277d d455c34 8f3277d d455c34 8f3277d d455c34 8f3277d d455c34 8f3277d d455c34 8f3277d d455c34 8f3277d d455c34 8f3277d d455c34 8f3277d d455c34 8f3277d d455c34 8f3277d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
{
"amp": 1,
"architectures": [
"FlaubertWithLMHeadModel"
],
"asm": false,
"attention_dropout": 0.1,
"bos_index": 0,
"bos_token_id": 0,
"bptt": 512,
"causal": false,
"clip_grad_norm": 5,
"dropout": 0.1,
"emb_dim": 512,
"embed_init_std": 0.02209708691207961,
"encoder_only": true,
"end_n_top": 5,
"eos_index": 1,
"fp16": true,
"gelu_activation": true,
"group_by_size": true,
"id2lang": {
"0": "fr"
},
"init_std": 0.02,
"is_encoder": true,
"lang2id": {
"fr": 0
},
"lang_id": 0,
"langs": [
"fr"
],
"layer_norm_eps": 1e-06,
"layerdrop": 0.2,
"lg_sampling_factor": -1,
"lgs": "fr",
"mask_index": 5,
"mask_token_id": 0,
"max_batch_size": 0,
"max_position_embeddings": 512,
"max_vocab": -1,
"mlm_steps": [
[
"fr",
null
]
],
"model_type": "flaubert",
"n_heads": 8,
"n_langs": 1,
"n_layers": 6,
"pad_index": 2,
"pad_token_id": 2,
"pre_norm": true,
"sample_alpha": 0,
"share_inout_emb": true,
"sinusoidal_embeddings": false,
"start_n_top": 5,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "first",
"summary_use_proj": true,
"tokens_per_batch": -1,
"unk_index": 3,
"use_lang_emb": true,
"vocab_size": 68729,
"word_blank": 0,
"word_dropout": 0,
"word_keep": 0.1,
"word_mask": 0.8,
"word_mask_keep_rand": "0.8,0.1,0.1",
"word_pred": 0.15,
"word_rand": 0.1,
"word_shuffle": 0
}
|