tkuye commited on
Commit
9c8bebd
1 Parent(s): 92db525

Upload my model weights.

Browse files
added_tokens.json ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</s_$birth_comment>": 57853,
3
+ "</s_$comment>": 57851,
4
+ "</s_$schema>": 57563,
5
+ "</s_DGPA>": 57923,
6
+ "</s_Linkedin>": 57685,
7
+ "</s_Research Title>": 57641,
8
+ "</s_StartDate>": 57899,
9
+ "</s__description>": 57863,
10
+ "</s__endDate>": 57869,
11
+ "</s__highlights>": 57861,
12
+ "</s__id>": 57659,
13
+ "</s__not_essential>": 57661,
14
+ "</s__startDate>": 57867,
15
+ "</s__summary>": 57865,
16
+ "</s__url>": 57859,
17
+ "</s__username>": 57857,
18
+ "</s_about>": 57715,
19
+ "</s_accent>": 57897,
20
+ "</s_activities>": 57739,
21
+ "</s_additional>": 57773,
22
+ "</s_additionalWork>": 57881,
23
+ "</s_address 2>": 57791,
24
+ "</s_address>": 57569,
25
+ "</s_advantage>": 57783,
26
+ "</s_ancv>": 57871,
27
+ "</s_area>": 57583,
28
+ "</s_ascii_only>": 57875,
29
+ "</s_authors>": 57769,
30
+ "</s_avatar>": 57713,
31
+ "</s_awarder>": 57635,
32
+ "</s_awards>": 57591,
33
+ "</s_basics>": 57525,
34
+ "</s_benefit>": 57785,
35
+ "</s_birth>": 57807,
36
+ "</s_blog>": 57731,
37
+ "</s_breakBefore>": 57911,
38
+ "</s_canonical>": 57619,
39
+ "</s_caption>": 57893,
40
+ "</s_certificates>": 57593,
41
+ "</s_cgpa>": 57763,
42
+ "</s_characterClass>": 57803,
43
+ "</s_citations>": 57687,
44
+ "</s_city>": 57539,
45
+ "</s_comment-location>": 57777,
46
+ "</s_comment-phone>": 57775,
47
+ "</s_company>": 57553,
48
+ "</s_concluded>": 57887,
49
+ "</s_content>": 57909,
50
+ "</s_countryCode>": 57541,
51
+ "</s_courses>": 57589,
52
+ "</s_credit>": 57895,
53
+ "</s_date>": 57597,
54
+ "</s_day>": 57705,
55
+ "</s_dec31_as_year>": 57877,
56
+ "</s_degree>": 57653,
57
+ "</s_description>": 57643,
58
+ "</s_descrption>": 57771,
59
+ "</s_desktop>": 57823,
60
+ "</s_displayName>": 57689,
61
+ "</s_education>": 57579,
62
+ "</s_email>": 57535,
63
+ "</s_email_url>": 57837,
64
+ "</s_end>": 57707,
65
+ "</s_endDate>": 57559,
66
+ "</s_entity>": 57647,
67
+ "</s_experience>": 57917,
68
+ "</s_fab>": 57779,
69
+ "</s_feature>": 57781,
70
+ "</s_fluency>": 57611,
71
+ "</s_followers>": 57735,
72
+ "</s_following>": 57737,
73
+ "</s_fullDate>": 57741,
74
+ "</s_fullReleaseDate>": 57761,
75
+ "</s_githubUrl>": 57695,
76
+ "</s_gpa>": 57631,
77
+ "</s_headings>": 57879,
78
+ "</s_headline>": 57663,
79
+ "</s_height>": 57819,
80
+ "</s_hideReferences>": 57915,
81
+ "</s_highlight>": 57757,
82
+ "</s_highlights>": 57549,
83
+ "</s_icon>": 57675,
84
+ "</s_iconColor>": 57677,
85
+ "</s_id>": 57717,
86
+ "</s_image>": 57531,
87
+ "</s_images>": 57709,
88
+ "</s_img>": 57795,
89
+ "</s_institution>": 57581,
90
+ "</s_interests>": 57561,
91
+ "</s_isCurrentRole>": 57671,
92
+ "</s_issuer>": 57595,
93
+ "</s_karma>": 57733,
94
+ "</s_keywords>": 57605,
95
+ "</s_label>": 57529,
96
+ "</s_labels>": 57727,
97
+ "</s_lang$comment>": 57833,
98
+ "</s_lang>": 57831,
99
+ "</s_language>": 57609,
100
+ "</s_languages>": 57607,
101
+ "</s_lastModified>": 57657,
102
+ "</s_lastmodified>": 57793,
103
+ "</s_leadership>": 57683,
104
+ "</s_level>": 57603,
105
+ "</s_levelNum>": 57913,
106
+ "</s_levelnum>": 57799,
107
+ "</s_libraries>": 57693,
108
+ "</s_link>": 57829,
109
+ "</s_locale>": 57719,
110
+ "</s_location>": 57537,
111
+ "</s_locationAsString>": 57667,
112
+ "</s_media>": 57891,
113
+ "</s_meta>": 57615,
114
+ "</s_micro>": 57813,
115
+ "</s_mobile>": 57797,
116
+ "</s_module>": 57903,
117
+ "</s_modules>": 57901,
118
+ "</s_month>": 57703,
119
+ "</s_name>": 57527,
120
+ "</s_network>": 57573,
121
+ "</s_note>": 57673,
122
+ "</s_order>": 57905,
123
+ "</s_organisation>": 57885,
124
+ "</s_organization >": 57925,
125
+ "</s_organization>": 57629,
126
+ "</s_other>": 57927,
127
+ "</s_period>": 57889,
128
+ "</s_phone>": 57565,
129
+ "</s_phone_url>": 57839,
130
+ "</s_phonetic_name>": 57721,
131
+ "</s_photo>": 57759,
132
+ "</s_picture$comment>": 57835,
133
+ "</s_picture>": 57625,
134
+ "</s_pinned>": 57655,
135
+ "</s_place>": 57809,
136
+ "</s_position>": 57555,
137
+ "</s_postalCode>": 57627,
138
+ "</s_preview>": 57729,
139
+ "</s_primaryLanguage>": 57691,
140
+ "</s_profiles>": 57571,
141
+ "</s_project>": 57801,
142
+ "</s_projects>": 57613,
143
+ "</s_pronouns$comment>": 57849,
144
+ "</s_pronouns>": 57847,
145
+ "</s_province>": 57855,
146
+ "</s_publications>": 57599,
147
+ "</s_publisher>": 57637,
148
+ "</s_rating>": 57669,
149
+ "</s_reference>": 57545,
150
+ "</s_references>": 57543,
151
+ "</s_region>": 57623,
152
+ "</s_releaseDate>": 57639,
153
+ "</s_remote>": 57679,
154
+ "</s_repo>": 57725,
155
+ "</s_repositoryUrl>": 57697,
156
+ "</s_resolutions>": 57811,
157
+ "</s_resume>": 57919,
158
+ "</s_role>": 57651,
159
+ "</s_roles>": 57645,
160
+ "</s_score>": 57587,
161
+ "</s_sections>": 57787,
162
+ "</s_size>": 57815,
163
+ "</s_skill>": 57743,
164
+ "</s_skills>": 57601,
165
+ "</s_slot>": 57907,
166
+ "</s_source>": 57825,
167
+ "</s_sourceId>": 57827,
168
+ "</s_specialization>": 57805,
169
+ "</s_sport>": 57789,
170
+ "</s_stack>": 57921,
171
+ "</s_start>": 57699,
172
+ "</s_startDate>": 57557,
173
+ "</s_state>": 57765,
174
+ "</s_studyType>": 57585,
175
+ "</s_summary>": 57533,
176
+ "</s_technologies>": 57767,
177
+ "</s_template>": 57873,
178
+ "</s_theme>": 57621,
179
+ "</s_thesis>": 57723,
180
+ "</s_thumbnail>": 57821,
181
+ "</s_title>": 57633,
182
+ "</s_type>": 57649,
183
+ "</s_url$comment2>": 57843,
184
+ "</s_url$comment3>": 57845,
185
+ "</s_url$comment>": 57841,
186
+ "</s_url2>": 57883,
187
+ "</s_url>": 57567,
188
+ "</s_username>": 57575,
189
+ "</s_version>": 57617,
190
+ "</s_videos>": 57711,
191
+ "</s_volunteer>": 57577,
192
+ "</s_webbsite>": 57681,
193
+ "</s_website>": 57551,
194
+ "</s_whatever>": 57745,
195
+ "</s_width>": 57817,
196
+ "</s_work>": 57547,
197
+ "</s_x>": 57747,
198
+ "</s_y>": 57749,
199
+ "</s_year>": 57701,
200
+ "</s_yearsOfExperience>": 57665,
201
+ "</s_z1>": 57753,
202
+ "</s_z2>": 57755,
203
+ "</s_z>": 57751,
204
+ "<s_$birth_comment>": 57854,
205
+ "<s_$comment>": 57852,
206
+ "<s_$schema>": 57564,
207
+ "<s_DGPA>": 57924,
208
+ "<s_Linkedin>": 57686,
209
+ "<s_Research Title>": 57642,
210
+ "<s_StartDate>": 57900,
211
+ "<s__description>": 57864,
212
+ "<s__endDate>": 57870,
213
+ "<s__highlights>": 57862,
214
+ "<s__id>": 57660,
215
+ "<s__not_essential>": 57662,
216
+ "<s__startDate>": 57868,
217
+ "<s__summary>": 57866,
218
+ "<s__url>": 57860,
219
+ "<s__username>": 57858,
220
+ "<s_about>": 57716,
221
+ "<s_accent>": 57898,
222
+ "<s_activities>": 57740,
223
+ "<s_additional>": 57774,
224
+ "<s_additionalWork>": 57882,
225
+ "<s_address 2>": 57792,
226
+ "<s_address>": 57570,
227
+ "<s_advantage>": 57784,
228
+ "<s_ancv>": 57872,
229
+ "<s_area>": 57584,
230
+ "<s_ascii_only>": 57876,
231
+ "<s_authors>": 57770,
232
+ "<s_avatar>": 57714,
233
+ "<s_awarder>": 57636,
234
+ "<s_awards>": 57592,
235
+ "<s_basics>": 57526,
236
+ "<s_benefit>": 57786,
237
+ "<s_birth>": 57808,
238
+ "<s_blog>": 57732,
239
+ "<s_breakBefore>": 57912,
240
+ "<s_canonical>": 57620,
241
+ "<s_caption>": 57894,
242
+ "<s_certificates>": 57594,
243
+ "<s_cgpa>": 57764,
244
+ "<s_characterClass>": 57804,
245
+ "<s_citations>": 57688,
246
+ "<s_city>": 57540,
247
+ "<s_comment-location>": 57778,
248
+ "<s_comment-phone>": 57776,
249
+ "<s_company>": 57554,
250
+ "<s_concluded>": 57888,
251
+ "<s_content>": 57910,
252
+ "<s_countryCode>": 57542,
253
+ "<s_courses>": 57590,
254
+ "<s_credit>": 57896,
255
+ "<s_date>": 57598,
256
+ "<s_day>": 57706,
257
+ "<s_dec31_as_year>": 57878,
258
+ "<s_degree>": 57654,
259
+ "<s_description>": 57644,
260
+ "<s_descrption>": 57772,
261
+ "<s_desktop>": 57824,
262
+ "<s_displayName>": 57690,
263
+ "<s_education>": 57580,
264
+ "<s_email>": 57536,
265
+ "<s_email_url>": 57838,
266
+ "<s_end>": 57708,
267
+ "<s_endDate>": 57560,
268
+ "<s_entity>": 57648,
269
+ "<s_experience>": 57918,
270
+ "<s_fab>": 57780,
271
+ "<s_feature>": 57782,
272
+ "<s_fluency>": 57612,
273
+ "<s_followers>": 57736,
274
+ "<s_following>": 57738,
275
+ "<s_fullDate>": 57742,
276
+ "<s_fullReleaseDate>": 57762,
277
+ "<s_githubUrl>": 57696,
278
+ "<s_gpa>": 57632,
279
+ "<s_headings>": 57880,
280
+ "<s_headline>": 57664,
281
+ "<s_height>": 57820,
282
+ "<s_hideReferences>": 57916,
283
+ "<s_highlight>": 57758,
284
+ "<s_highlights>": 57550,
285
+ "<s_icon>": 57676,
286
+ "<s_iconColor>": 57678,
287
+ "<s_id>": 57718,
288
+ "<s_iitcdip>": 57523,
289
+ "<s_image>": 57532,
290
+ "<s_images>": 57710,
291
+ "<s_img>": 57796,
292
+ "<s_institution>": 57582,
293
+ "<s_interests>": 57562,
294
+ "<s_isCurrentRole>": 57672,
295
+ "<s_issuer>": 57596,
296
+ "<s_karma>": 57734,
297
+ "<s_keywords>": 57606,
298
+ "<s_label>": 57530,
299
+ "<s_labels>": 57728,
300
+ "<s_lang$comment>": 57834,
301
+ "<s_lang>": 57832,
302
+ "<s_language>": 57610,
303
+ "<s_languages>": 57608,
304
+ "<s_lastModified>": 57658,
305
+ "<s_lastmodified>": 57794,
306
+ "<s_leadership>": 57684,
307
+ "<s_level>": 57604,
308
+ "<s_levelNum>": 57914,
309
+ "<s_levelnum>": 57800,
310
+ "<s_libraries>": 57694,
311
+ "<s_link>": 57830,
312
+ "<s_locale>": 57720,
313
+ "<s_location>": 57538,
314
+ "<s_locationAsString>": 57668,
315
+ "<s_media>": 57892,
316
+ "<s_meta>": 57616,
317
+ "<s_micro>": 57814,
318
+ "<s_mobile>": 57798,
319
+ "<s_module>": 57904,
320
+ "<s_modules>": 57902,
321
+ "<s_month>": 57704,
322
+ "<s_name>": 57528,
323
+ "<s_network>": 57574,
324
+ "<s_note>": 57674,
325
+ "<s_order>": 57906,
326
+ "<s_organisation>": 57886,
327
+ "<s_organization >": 57926,
328
+ "<s_organization>": 57630,
329
+ "<s_other>": 57928,
330
+ "<s_period>": 57890,
331
+ "<s_phone>": 57566,
332
+ "<s_phone_url>": 57840,
333
+ "<s_phonetic_name>": 57722,
334
+ "<s_photo>": 57760,
335
+ "<s_picture$comment>": 57836,
336
+ "<s_picture>": 57626,
337
+ "<s_pinned>": 57656,
338
+ "<s_place>": 57810,
339
+ "<s_position>": 57556,
340
+ "<s_postalCode>": 57628,
341
+ "<s_preview>": 57730,
342
+ "<s_primaryLanguage>": 57692,
343
+ "<s_profiles>": 57572,
344
+ "<s_project>": 57802,
345
+ "<s_projects>": 57614,
346
+ "<s_pronouns$comment>": 57850,
347
+ "<s_pronouns>": 57848,
348
+ "<s_province>": 57856,
349
+ "<s_publications>": 57600,
350
+ "<s_publisher>": 57638,
351
+ "<s_rating>": 57670,
352
+ "<s_reference>": 57546,
353
+ "<s_references>": 57544,
354
+ "<s_region>": 57624,
355
+ "<s_releaseDate>": 57640,
356
+ "<s_remote>": 57680,
357
+ "<s_repo>": 57726,
358
+ "<s_repositoryUrl>": 57698,
359
+ "<s_resolutions>": 57812,
360
+ "<s_resume>": 57920,
361
+ "<s_resuparse>": 57929,
362
+ "<s_role>": 57652,
363
+ "<s_roles>": 57646,
364
+ "<s_score>": 57588,
365
+ "<s_sections>": 57788,
366
+ "<s_size>": 57816,
367
+ "<s_skill>": 57744,
368
+ "<s_skills>": 57602,
369
+ "<s_slot>": 57908,
370
+ "<s_source>": 57826,
371
+ "<s_sourceId>": 57828,
372
+ "<s_specialization>": 57806,
373
+ "<s_sport>": 57790,
374
+ "<s_stack>": 57922,
375
+ "<s_start>": 57700,
376
+ "<s_startDate>": 57558,
377
+ "<s_state>": 57766,
378
+ "<s_studyType>": 57586,
379
+ "<s_summary>": 57534,
380
+ "<s_synthdog>": 57524,
381
+ "<s_technologies>": 57768,
382
+ "<s_template>": 57874,
383
+ "<s_theme>": 57622,
384
+ "<s_thesis>": 57724,
385
+ "<s_thumbnail>": 57822,
386
+ "<s_title>": 57634,
387
+ "<s_type>": 57650,
388
+ "<s_url$comment2>": 57844,
389
+ "<s_url$comment3>": 57846,
390
+ "<s_url$comment>": 57842,
391
+ "<s_url2>": 57884,
392
+ "<s_url>": 57568,
393
+ "<s_username>": 57576,
394
+ "<s_version>": 57618,
395
+ "<s_videos>": 57712,
396
+ "<s_volunteer>": 57578,
397
+ "<s_webbsite>": 57682,
398
+ "<s_website>": 57552,
399
+ "<s_whatever>": 57746,
400
+ "<s_width>": 57818,
401
+ "<s_work>": 57548,
402
+ "<s_x>": 57748,
403
+ "<s_y>": 57750,
404
+ "<s_year>": 57702,
405
+ "<s_yearsOfExperience>": 57666,
406
+ "<s_z1>": 57754,
407
+ "<s_z2>": 57756,
408
+ "<s_z>": 57752,
409
+ "<sep/>": 57522
410
+ }
config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "naver-clova-ix/donut-base",
3
+ "align_long_axis": true,
4
+ "architectures": [
5
+ "DonutModel"
6
+ ],
7
+ "decoder_layer": 4,
8
+ "encoder_layer": [
9
+ 2,
10
+ 2,
11
+ 14,
12
+ 2
13
+ ],
14
+ "input_size": [
15
+ 2560,
16
+ 1920
17
+ ],
18
+ "max_length": 768,
19
+ "max_position_embeddings": 768,
20
+ "model_type": "donut",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.23.1",
23
+ "window_size": 10
24
+ }
config.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ resume_from_checkpoint_path: None
2
+ result_path: '/workspace/result'
3
+ pretrained_model_name_or_path: 'naver-clova-ix/donut-base'
4
+ dataset_name_or_paths:
5
+ - '/workspace/resuparse'
6
+ sort_json_key: False
7
+ train_batch_sizes:
8
+ - 1
9
+ val_batch_sizes:
10
+ - 1
11
+ input_size:
12
+ - 2560
13
+ - 1920
14
+ max_length: 768
15
+ align_long_axis: True
16
+ num_nodes: 1
17
+ seed: 2022
18
+ lr: 3e-05
19
+ warmup_steps: 300
20
+ num_training_samples_per_epoch: 800
21
+ max_epochs: 30
22
+ max_steps: -1
23
+ num_workers: 8
24
+ val_check_interval: 1.0
25
+ check_val_every_n_epoch: 3
26
+ gradient_clip_val: 1.0
27
+ verbose: True
28
+ exp_name: 'train_resuparse'
29
+ exp_version: '20221025_223907'
hparams.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bcd92274d09562eb1dd67246399a7163c0a3402418cee3df4b4c5efa158f854
3
+ size 1016949955
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb9e3dce4c326195d08fc3dd0f7e2eee1da8595c847bf4c1a9c78b7a82d47e2d
3
+ size 1296245
special_tokens_map.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<s_resuparse>"
4
+ ],
5
+ "bos_token": "<s>",
6
+ "cls_token": "<s>",
7
+ "eos_token": "</s>",
8
+ "mask_token": {
9
+ "content": "<mask>",
10
+ "lstrip": true,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ },
15
+ "pad_token": "<pad>",
16
+ "sep_token": "</s>",
17
+ "unk_token": "<unk>"
18
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "from_slow": true,
6
+ "mask_token": {
7
+ "__type": "AddedToken",
8
+ "content": "<mask>",
9
+ "lstrip": true,
10
+ "normalized": true,
11
+ "rstrip": false,
12
+ "single_word": false
13
+ },
14
+ "name_or_path": "naver-clova-ix/donut-base",
15
+ "pad_token": "<pad>",
16
+ "processor_class": "DonutProcessor",
17
+ "sep_token": "</s>",
18
+ "sp_model_kwargs": {},
19
+ "special_tokens_map_file": null,
20
+ "tokenizer_class": "XLMRobertaTokenizer",
21
+ "unk_token": "<unk>"
22
+ }