|
{ |
|
"_name_or_path": "MCG-NJU/videomae-base", |
|
"architectures": [ |
|
"VideoMAEForVideoClassification" |
|
], |
|
"attention_probs_dropout_prob": 0.0, |
|
"decoder_hidden_size": 384, |
|
"decoder_intermediate_size": 1536, |
|
"decoder_num_attention_heads": 6, |
|
"decoder_num_hidden_layers": 4, |
|
"hidden_act": "gelu", |
|
"hidden_dropout_prob": 0.0, |
|
"hidden_size": 768, |
|
"id2label": { |
|
"0": "A\u0306n", |
|
"1": "Ban nga\u0300y", |
|
"2": "Ban \u0111e\u0302m", |
|
"3": "Ba\u0323n", |
|
"4": "Bo\u0302\u0301", |
|
"5": "Ca\u0309m o\u031bn", |
|
"6": "Cho\u031bi", |
|
"7": "Cu\u031bo\u031b\u0300i", |
|
"8": "Kho\u0301c", |
|
"9": "La\u0300m vie\u0323\u0302c", |
|
"10": "Me\u0323", |
|
"11": "Mo\u0302\u0303i nga\u0300y", |
|
"12": "Sa\u0301ch", |
|
"13": "To\u0302i", |
|
"14": "Vie\u0302\u0301t", |
|
"15": "Xem", |
|
"16": "Xin cha\u0300o", |
|
"17": "Xin lo\u0302\u0303i", |
|
"18": "\u0110i", |
|
"19": "\u0110i ho\u0323c" |
|
}, |
|
"image_size": 224, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 3072, |
|
"label2id": { |
|
"A\u0306n": 0, |
|
"Ban nga\u0300y": 1, |
|
"Ban \u0111e\u0302m": 2, |
|
"Ba\u0323n": 3, |
|
"Bo\u0302\u0301": 4, |
|
"Ca\u0309m o\u031bn": 5, |
|
"Cho\u031bi": 6, |
|
"Cu\u031bo\u031b\u0300i": 7, |
|
"Kho\u0301c": 8, |
|
"La\u0300m vie\u0323\u0302c": 9, |
|
"Me\u0323": 10, |
|
"Mo\u0302\u0303i nga\u0300y": 11, |
|
"Sa\u0301ch": 12, |
|
"To\u0302i": 13, |
|
"Vie\u0302\u0301t": 14, |
|
"Xem": 15, |
|
"Xin cha\u0300o": 16, |
|
"Xin lo\u0302\u0303i": 17, |
|
"\u0110i": 18, |
|
"\u0110i ho\u0323c": 19 |
|
}, |
|
"layer_norm_eps": 1e-12, |
|
"model_type": "videomae", |
|
"norm_pix_loss": true, |
|
"num_attention_heads": 12, |
|
"num_channels": 3, |
|
"num_frames": 16, |
|
"num_hidden_layers": 12, |
|
"patch_size": 16, |
|
"problem_type": "single_label_classification", |
|
"qkv_bias": true, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.30.0", |
|
"tubelet_size": 2, |
|
"use_mean_pooling": false |
|
} |
|
|