asmaa1 commited on
Commit
b638f62
1 Parent(s): 3bd0ea7

Training in progress, epoch 0

Browse files
Files changed (4) hide show
  1. config.json +241 -0
  2. preprocessor_config.json +28 -0
  3. pytorch_model.bin +3 -0
  4. training_args.bin +3 -0
config.json ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/xclip-base-patch32",
3
+ "architectures": [
4
+ "VideoMAEForVideoClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "decoder_hidden_size": 384,
8
+ "decoder_intermediate_size": 1536,
9
+ "decoder_num_attention_heads": 6,
10
+ "decoder_num_hidden_layers": 4,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.0,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "\u0627\u0628\u0631\u0627\u0647\u064a\u0645",
16
+ "1": "\u0627\u062d\u062a\u0645\u0627\u0644",
17
+ "2": "\u0627\u0633\u062a\u0641\u0627\u062f\u0629",
18
+ "3": "\u0627\u0635\u063a\u0631 \u0645\u0646",
19
+ "4": "\u0627\u0639\u0627\u0642\u0629",
20
+ "5": "\u0627\u0644 \u062c\u064a",
21
+ "6": "\u0627\u0644\u062e\u0644\u064a\u062c \u0627\u0644\u0639\u0631\u0628\u064a",
22
+ "7": "\u0627\u0644\u0630\u0627\u0643\u0631\u0629",
23
+ "8": "\u0627\u0654\u0628\u0639\u0627\u062f",
24
+ "9": "\u0627\u0654\u0628\u0646 \u0627\u0644\u0627\u0654\u062e",
25
+ "10": "\u0627\u0654\u062d\u0627\u0637",
26
+ "11": "\u0627\u0654\u062e\u062a",
27
+ "12": "\u0627\u0654\u062e\u0635\u0627\u064a\u0654\u064a\u0629",
28
+ "13": "\u0627\u0655\u0639\u0644\u0627\u0645",
29
+ "14": "\u0628\u0646\u0641\u0633\u062c\u064a",
30
+ "15": "\u0628\u0648\u0646\u062a \u0633\u0646\u062a\u0631",
31
+ "16": "\u062a\u0627\u0654\u0645\u064a\u0646 \u0634\u0627\u0645\u0644",
32
+ "17": "\u062e\u0627\u0635",
33
+ "18": "\u0645\u0628\u0631\u0648\u0643 \u0627\u0644\u0645\u0648\u0644\u0648\u062f",
34
+ "19": "\u0645\u062a\u0631 \u0645\u0631\u0628\u0639 - \u06452"
35
+ },
36
+ "image_size": 224,
37
+ "initializer_factor": 1.0,
38
+ "initializer_range": 0.02,
39
+ "intermediate_size": 3072,
40
+ "label2id": {
41
+ "\u0627\u0628\u0631\u0627\u0647\u064a\u0645": 0,
42
+ "\u0627\u062d\u062a\u0645\u0627\u0644": 1,
43
+ "\u0627\u0633\u062a\u0641\u0627\u062f\u0629": 2,
44
+ "\u0627\u0635\u063a\u0631 \u0645\u0646": 3,
45
+ "\u0627\u0639\u0627\u0642\u0629": 4,
46
+ "\u0627\u0644 \u062c\u064a": 5,
47
+ "\u0627\u0644\u062e\u0644\u064a\u062c \u0627\u0644\u0639\u0631\u0628\u064a": 6,
48
+ "\u0627\u0644\u0630\u0627\u0643\u0631\u0629": 7,
49
+ "\u0627\u0654\u0628\u0639\u0627\u062f": 8,
50
+ "\u0627\u0654\u0628\u0646 \u0627\u0644\u0627\u0654\u062e": 9,
51
+ "\u0627\u0654\u062d\u0627\u0637": 10,
52
+ "\u0627\u0654\u062e\u062a": 11,
53
+ "\u0627\u0654\u062e\u0635\u0627\u064a\u0654\u064a\u0629": 12,
54
+ "\u0627\u0655\u0639\u0644\u0627\u0645": 13,
55
+ "\u0628\u0646\u0641\u0633\u062c\u064a": 14,
56
+ "\u0628\u0648\u0646\u062a \u0633\u0646\u062a\u0631": 15,
57
+ "\u062a\u0627\u0654\u0645\u064a\u0646 \u0634\u0627\u0645\u0644": 16,
58
+ "\u062e\u0627\u0635": 17,
59
+ "\u0645\u0628\u0631\u0648\u0643 \u0627\u0644\u0645\u0648\u0644\u0648\u062f": 18,
60
+ "\u0645\u062a\u0631 \u0645\u0631\u0628\u0639 - \u06452": 19
61
+ },
62
+ "layer_norm_eps": 1e-12,
63
+ "logit_scale_init_value": 2.6592,
64
+ "model_type": "videomae",
65
+ "norm_pix_loss": true,
66
+ "num_attention_heads": 12,
67
+ "num_channels": 3,
68
+ "num_frames": 16,
69
+ "num_hidden_layers": 12,
70
+ "patch_size": 16,
71
+ "problem_type": "single_label_classification",
72
+ "projection_dim": 512,
73
+ "prompt_alpha": 0.1,
74
+ "prompt_attention_dropout": 0.0,
75
+ "prompt_hidden_act": "quick_gelu",
76
+ "prompt_layers": 2,
77
+ "prompt_num_attention_heads": 8,
78
+ "prompt_projection_dropout": 0.0,
79
+ "qkv_bias": true,
80
+ "text_config": {
81
+ "_name_or_path": "",
82
+ "add_cross_attention": false,
83
+ "architectures": null,
84
+ "attention_dropout": 0.0,
85
+ "bad_words_ids": null,
86
+ "bos_token_id": 0,
87
+ "chunk_size_feed_forward": 0,
88
+ "cross_attention_hidden_size": null,
89
+ "decoder_start_token_id": null,
90
+ "diversity_penalty": 0.0,
91
+ "do_sample": false,
92
+ "dropout": 0.0,
93
+ "early_stopping": false,
94
+ "encoder_no_repeat_ngram_size": 0,
95
+ "eos_token_id": 2,
96
+ "exponential_decay_length_penalty": null,
97
+ "finetuning_task": null,
98
+ "forced_bos_token_id": null,
99
+ "forced_eos_token_id": null,
100
+ "hidden_act": "quick_gelu",
101
+ "hidden_size": 512,
102
+ "id2label": {
103
+ "0": "LABEL_0",
104
+ "1": "LABEL_1"
105
+ },
106
+ "initializer_factor": 1.0,
107
+ "initializer_range": 0.02,
108
+ "intermediate_size": 2048,
109
+ "is_decoder": false,
110
+ "is_encoder_decoder": false,
111
+ "label2id": {
112
+ "LABEL_0": 0,
113
+ "LABEL_1": 1
114
+ },
115
+ "layer_norm_eps": 1e-05,
116
+ "length_penalty": 1.0,
117
+ "max_length": 20,
118
+ "max_position_embeddings": 77,
119
+ "min_length": 0,
120
+ "model_type": "xclip_text_model",
121
+ "no_repeat_ngram_size": 0,
122
+ "num_attention_heads": 8,
123
+ "num_beam_groups": 1,
124
+ "num_beams": 1,
125
+ "num_hidden_layers": 12,
126
+ "num_return_sequences": 1,
127
+ "output_attentions": false,
128
+ "output_hidden_states": false,
129
+ "output_scores": false,
130
+ "pad_token_id": 1,
131
+ "prefix": null,
132
+ "problem_type": null,
133
+ "pruned_heads": {},
134
+ "remove_invalid_values": false,
135
+ "repetition_penalty": 1.0,
136
+ "return_dict": true,
137
+ "return_dict_in_generate": false,
138
+ "sep_token_id": null,
139
+ "task_specific_params": null,
140
+ "temperature": 1.0,
141
+ "tf_legacy_loss": false,
142
+ "tie_encoder_decoder": false,
143
+ "tie_word_embeddings": true,
144
+ "tokenizer_class": null,
145
+ "top_k": 50,
146
+ "top_p": 1.0,
147
+ "torch_dtype": null,
148
+ "torchscript": false,
149
+ "transformers_version": "4.22.0.dev0",
150
+ "typical_p": 1.0,
151
+ "use_bfloat16": false,
152
+ "vocab_size": 49408
153
+ },
154
+ "text_config_dict": null,
155
+ "torch_dtype": "float32",
156
+ "transformers_version": "4.33.2",
157
+ "tubelet_size": 2,
158
+ "use_mean_pooling": true,
159
+ "vision_config": {
160
+ "_name_or_path": "",
161
+ "add_cross_attention": false,
162
+ "architectures": null,
163
+ "attention_dropout": 0.0,
164
+ "bad_words_ids": null,
165
+ "bos_token_id": null,
166
+ "chunk_size_feed_forward": 0,
167
+ "cross_attention_hidden_size": null,
168
+ "decoder_start_token_id": null,
169
+ "diversity_penalty": 0.0,
170
+ "do_sample": false,
171
+ "drop_path_rate": 0.0,
172
+ "dropout": 0.0,
173
+ "early_stopping": false,
174
+ "encoder_no_repeat_ngram_size": 0,
175
+ "eos_token_id": null,
176
+ "exponential_decay_length_penalty": null,
177
+ "finetuning_task": null,
178
+ "forced_bos_token_id": null,
179
+ "forced_eos_token_id": null,
180
+ "hidden_act": "quick_gelu",
181
+ "hidden_size": 768,
182
+ "id2label": {
183
+ "0": "LABEL_0",
184
+ "1": "LABEL_1"
185
+ },
186
+ "image_size": 224,
187
+ "initializer_factor": 1.0,
188
+ "initializer_range": 0.02,
189
+ "intermediate_size": 3072,
190
+ "is_decoder": false,
191
+ "is_encoder_decoder": false,
192
+ "label2id": {
193
+ "LABEL_0": 0,
194
+ "LABEL_1": 1
195
+ },
196
+ "layer_norm_eps": 1e-05,
197
+ "length_penalty": 1.0,
198
+ "max_length": 20,
199
+ "min_length": 0,
200
+ "mit_hidden_size": 512,
201
+ "mit_intermediate_size": 2048,
202
+ "mit_num_attention_heads": 8,
203
+ "mit_num_hidden_layers": 1,
204
+ "model_type": "xclip_vision_model",
205
+ "no_repeat_ngram_size": 0,
206
+ "num_attention_heads": 12,
207
+ "num_beam_groups": 1,
208
+ "num_beams": 1,
209
+ "num_channels": 3,
210
+ "num_frames": 8,
211
+ "num_hidden_layers": 12,
212
+ "num_return_sequences": 1,
213
+ "output_attentions": false,
214
+ "output_hidden_states": false,
215
+ "output_scores": false,
216
+ "pad_token_id": null,
217
+ "patch_size": 32,
218
+ "prefix": null,
219
+ "problem_type": null,
220
+ "pruned_heads": {},
221
+ "remove_invalid_values": false,
222
+ "repetition_penalty": 1.0,
223
+ "return_dict": true,
224
+ "return_dict_in_generate": false,
225
+ "sep_token_id": null,
226
+ "task_specific_params": null,
227
+ "temperature": 1.0,
228
+ "tf_legacy_loss": false,
229
+ "tie_encoder_decoder": false,
230
+ "tie_word_embeddings": true,
231
+ "tokenizer_class": null,
232
+ "top_k": 50,
233
+ "top_p": 1.0,
234
+ "torch_dtype": null,
235
+ "torchscript": false,
236
+ "transformers_version": "4.22.0.dev0",
237
+ "typical_p": 1.0,
238
+ "use_bfloat16": false
239
+ },
240
+ "vision_config_dict": null
241
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_normalize": true,
8
+ "do_rescale": true,
9
+ "do_resize": true,
10
+ "feature_extractor_type": "VideoMAEFeatureExtractor",
11
+ "image_mean": [
12
+ 0.485,
13
+ 0.456,
14
+ 0.406
15
+ ],
16
+ "image_processor_type": "VideoMAEImageProcessor",
17
+ "image_std": [
18
+ 0.229,
19
+ 0.224,
20
+ 0.225
21
+ ],
22
+ "processor_class": "XCLIPProcessor",
23
+ "resample": 2,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "shortest_edge": 224
27
+ }
28
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5574508ba2a891601b66755309835c2300c2b7064d0a9648f948cd8bab6fa7e1
3
+ size 345034293
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:171abff4c6c516ceebdad71d3f09cd61f01ef44559bcdf4d97e3e8898d1a9727
3
+ size 4091