hugo-albert commited on
Commit
897e7e3
1 Parent(s): df34f62

Training in progress epoch 0

Browse files
Files changed (7) hide show
  1. config.json +130 -130
  2. merges.txt +0 -0
  3. model.safetensors +1 -1
  4. special_tokens_map.json +25 -11
  5. tokenizer.json +0 -0
  6. tokenizer_config.json +26 -25
  7. vocab.json +0 -0
config.json CHANGED
@@ -12,142 +12,142 @@
12
  "hidden_dropout_prob": 0.0,
13
  "hidden_size": 1024,
14
  "id2label": {
15
- "0": "Z",
16
- "1": "vsp",
17
- "2": "vms",
18
- "3": "vsg",
19
- "4": "vss",
20
- "5": "Fc",
21
- "6": "vmn",
22
- "7": "Fpt",
23
- "8": "rn",
24
- "9": "dp",
25
- "10": "vas",
26
- "11": "sp",
27
- "12": "vsm",
28
  "13": "vam",
29
- "14": "Fs",
30
- "15": "vai",
31
- "16": "vag",
32
- "17": "np",
33
- "18": "Fd",
34
- "19": "pi",
35
- "20": "Fp",
36
- "21": "pr",
37
- "22": "Zm",
38
- "23": "vap",
39
- "24": "dt",
40
- "25": "I",
41
- "26": "ao",
42
- "27": "Fh",
43
- "28": "i",
44
- "29": "de",
45
- "30": "dn",
46
- "31": "pe",
47
- "32": "van",
48
- "33": "aq",
49
- "34": "nc",
50
- "35": "dd",
51
- "36": "da",
52
- "37": "vmm",
53
- "38": "vsn",
54
- "39": "px",
55
- "40": "vmi",
56
- "41": "cc",
57
- "42": "Fat",
58
- "43": "pd",
59
- "44": "pn",
60
- "45": "Fe",
61
- "46": "vmp",
62
- "47": "Fz",
63
- "48": "pp",
64
- "49": "Faa",
65
- "50": "Fpa",
66
- "51": "W",
67
- "52": "Fit",
68
- "53": "Fx",
69
- "54": "cs",
70
- "55": "di",
71
- "56": "Fg",
72
- "57": "vmg",
73
- "58": "Y",
74
- "59": "rg",
75
- "60": "p0",
76
- "61": "Zp",
77
- "62": "X",
78
- "63": "Fia",
79
- "64": "vsi",
80
- "65": "pt"
81
  },
82
  "initializer_range": 0.02,
83
  "intermediate_size": 4096,
84
  "label2id": {
85
- "Faa": 49,
86
- "Fat": 42,
87
- "Fc": 5,
88
- "Fd": 18,
89
- "Fe": 45,
90
- "Fg": 56,
91
- "Fh": 27,
92
- "Fia": 63,
93
- "Fit": 52,
94
- "Fp": 20,
95
- "Fpa": 50,
96
- "Fpt": 7,
97
- "Fs": 14,
98
- "Fx": 53,
99
- "Fz": 47,
100
- "I": 25,
101
- "W": 51,
102
- "X": 62,
103
- "Y": 58,
104
- "Z": 0,
105
- "Zm": 22,
106
- "Zp": 61,
107
- "ao": 26,
108
- "aq": 33,
109
- "cc": 41,
110
- "cs": 54,
111
- "da": 36,
112
- "dd": 35,
113
- "de": 29,
114
- "di": 55,
115
- "dn": 30,
116
- "dp": 9,
117
- "dt": 24,
118
- "i": 28,
119
- "nc": 34,
120
- "np": 17,
121
- "p0": 60,
122
- "pd": 43,
123
- "pe": 31,
124
- "pi": 19,
125
- "pn": 44,
126
- "pp": 48,
127
- "pr": 21,
128
- "pt": 65,
129
- "px": 39,
130
- "rg": 59,
131
- "rn": 8,
132
- "sp": 11,
133
- "vag": 16,
134
- "vai": 15,
135
  "vam": 13,
136
- "van": 32,
137
- "vap": 23,
138
- "vas": 10,
139
- "vmg": 57,
140
- "vmi": 40,
141
- "vmm": 37,
142
- "vmn": 6,
143
- "vmp": 46,
144
- "vms": 2,
145
- "vsg": 3,
146
- "vsi": 64,
147
- "vsm": 12,
148
- "vsn": 38,
149
- "vsp": 1,
150
- "vss": 4
151
  },
152
  "layer_norm_eps": 1e-05,
153
  "max_position_embeddings": 514,
 
12
  "hidden_dropout_prob": 0.0,
13
  "hidden_size": 1024,
14
  "id2label": {
15
+ "0": "X",
16
+ "1": "vai",
17
+ "2": "vap",
18
+ "3": "Zm",
19
+ "4": "vas",
20
+ "5": "Fat",
21
+ "6": "Fp",
22
+ "7": "ao",
23
+ "8": "W",
24
+ "9": "Fg",
25
+ "10": "sp",
26
+ "11": "vsg",
27
+ "12": "dt",
28
  "13": "vam",
29
+ "14": "pi",
30
+ "15": "dp",
31
+ "16": "vmm",
32
+ "17": "pn",
33
+ "18": "vmg",
34
+ "19": "Fpt",
35
+ "20": "rg",
36
+ "21": "Fs",
37
+ "22": "i",
38
+ "23": "Zp",
39
+ "24": "aq",
40
+ "25": "Fd",
41
+ "26": "vms",
42
+ "27": "vmp",
43
+ "28": "Fz",
44
+ "29": "dn",
45
+ "30": "vag",
46
+ "31": "vsn",
47
+ "32": "nc",
48
+ "33": "vss",
49
+ "34": "vsm",
50
+ "35": "Fh",
51
+ "36": "I",
52
+ "37": "de",
53
+ "38": "px",
54
+ "39": "Fc",
55
+ "40": "Faa",
56
+ "41": "pe",
57
+ "42": "Z",
58
+ "43": "vmi",
59
+ "44": "pp",
60
+ "45": "da",
61
+ "46": "cs",
62
+ "47": "rn",
63
+ "48": "vsi",
64
+ "49": "Fit",
65
+ "50": "Fe",
66
+ "51": "p0",
67
+ "52": "Fx",
68
+ "53": "Y",
69
+ "54": "cc",
70
+ "55": "van",
71
+ "56": "Fpa",
72
+ "57": "pr",
73
+ "58": "dd",
74
+ "59": "pt",
75
+ "60": "vmn",
76
+ "61": "di",
77
+ "62": "np",
78
+ "63": "vsp",
79
+ "64": "Fia",
80
+ "65": "pd"
81
  },
82
  "initializer_range": 0.02,
83
  "intermediate_size": 4096,
84
  "label2id": {
85
+ "Faa": 40,
86
+ "Fat": 5,
87
+ "Fc": 39,
88
+ "Fd": 25,
89
+ "Fe": 50,
90
+ "Fg": 9,
91
+ "Fh": 35,
92
+ "Fia": 64,
93
+ "Fit": 49,
94
+ "Fp": 6,
95
+ "Fpa": 56,
96
+ "Fpt": 19,
97
+ "Fs": 21,
98
+ "Fx": 52,
99
+ "Fz": 28,
100
+ "I": 36,
101
+ "W": 8,
102
+ "X": 0,
103
+ "Y": 53,
104
+ "Z": 42,
105
+ "Zm": 3,
106
+ "Zp": 23,
107
+ "ao": 7,
108
+ "aq": 24,
109
+ "cc": 54,
110
+ "cs": 46,
111
+ "da": 45,
112
+ "dd": 58,
113
+ "de": 37,
114
+ "di": 61,
115
+ "dn": 29,
116
+ "dp": 15,
117
+ "dt": 12,
118
+ "i": 22,
119
+ "nc": 32,
120
+ "np": 62,
121
+ "p0": 51,
122
+ "pd": 65,
123
+ "pe": 41,
124
+ "pi": 14,
125
+ "pn": 17,
126
+ "pp": 44,
127
+ "pr": 57,
128
+ "pt": 59,
129
+ "px": 38,
130
+ "rg": 20,
131
+ "rn": 47,
132
+ "sp": 10,
133
+ "vag": 30,
134
+ "vai": 1,
135
  "vam": 13,
136
+ "van": 55,
137
+ "vap": 2,
138
+ "vas": 4,
139
+ "vmg": 18,
140
+ "vmi": 43,
141
+ "vmm": 16,
142
+ "vmn": 60,
143
+ "vmp": 27,
144
+ "vms": 26,
145
+ "vsg": 11,
146
+ "vsi": 48,
147
+ "vsm": 34,
148
+ "vsn": 31,
149
+ "vsp": 63,
150
+ "vss": 33
151
  },
152
  "layer_norm_eps": 1e-05,
153
  "max_position_embeddings": 514,
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8016e124680e5d17f08af502ad581978dc381a70defa07fc36dd4577df19455a
3
  size 1417546912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc4b917fde90db6334cb3c9da523e6c0ebd33e7b8744a79c1de5d5c5cac953e2
3
  size 1417546912
special_tokens_map.json CHANGED
@@ -1,36 +1,50 @@
1
  {
 
 
 
 
 
 
 
2
  "cls_token": {
3
- "content": "[CLS]",
4
  "lstrip": false,
5
- "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
- "mask_token": {
10
- "content": "[MASK]",
11
  "lstrip": false,
12
- "normalized": false,
 
 
 
 
 
 
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "[PAD]",
18
  "lstrip": false,
19
- "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "sep_token": {
24
- "content": "[SEP]",
25
  "lstrip": false,
26
- "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
  "unk_token": {
31
- "content": "[UNK]",
32
  "lstrip": false,
33
- "normalized": false,
34
  "rstrip": false,
35
  "single_word": false
36
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
  "cls_token": {
10
+ "content": "<s>",
11
  "lstrip": false,
12
+ "normalized": true,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "eos_token": {
17
+ "content": "</s>",
18
  "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
  "pad_token": {
31
+ "content": "<pad>",
32
  "lstrip": false,
33
+ "normalized": true,
34
  "rstrip": false,
35
  "single_word": false
36
  },
37
  "sep_token": {
38
+ "content": "</s>",
39
  "lstrip": false,
40
+ "normalized": true,
41
  "rstrip": false,
42
  "single_word": false
43
  },
44
  "unk_token": {
45
+ "content": "<unk>",
46
  "lstrip": false,
47
+ "normalized": true,
48
  "rstrip": false,
49
  "single_word": false
50
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,57 +1,58 @@
1
  {
 
2
  "added_tokens_decoder": {
3
  "0": {
4
- "content": "[MASK]",
5
  "lstrip": false,
6
- "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
10
  },
11
  "1": {
12
- "content": "[PAD]",
13
  "lstrip": false,
14
- "normalized": false,
15
  "rstrip": false,
16
  "single_word": false,
17
  "special": true
18
  },
19
- "3": {
20
- "content": "[UNK]",
21
  "lstrip": false,
22
- "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
  },
27
- "4": {
28
- "content": "[CLS]",
29
  "lstrip": false,
30
- "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
- "5": {
36
- "content": "[SEP]",
37
- "lstrip": false,
38
- "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
  }
43
  },
 
44
  "clean_up_tokenization_spaces": true,
45
- "cls_token": "[CLS]",
46
- "do_basic_tokenize": true,
47
- "do_lower_case": false,
48
- "mask_token": "[MASK]",
 
49
  "model_max_length": 512,
50
- "never_split": null,
51
- "pad_token": "[PAD]",
52
- "sep_token": "[SEP]",
53
- "strip_accents": false,
54
- "tokenize_chinese_chars": true,
55
- "tokenizer_class": "BertTokenizer",
56
- "unk_token": "[UNK]"
57
  }
 
1
  {
2
+ "add_prefix_space": true,
3
  "added_tokens_decoder": {
4
  "0": {
5
+ "content": "<s>",
6
  "lstrip": false,
7
+ "normalized": true,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
  },
12
  "1": {
13
+ "content": "<pad>",
14
  "lstrip": false,
15
+ "normalized": true,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
  },
20
+ "2": {
21
+ "content": "</s>",
22
  "lstrip": false,
23
+ "normalized": true,
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": true
27
  },
28
+ "3": {
29
+ "content": "<unk>",
30
  "lstrip": false,
31
+ "normalized": true,
32
  "rstrip": false,
33
  "single_word": false,
34
  "special": true
35
  },
36
+ "4": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
  "rstrip": false,
41
  "single_word": false,
42
  "special": true
43
  }
44
  },
45
+ "bos_token": "<s>",
46
  "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
+ "max_len": 512,
52
  "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
 
 
58
  }
vocab.json ADDED
The diff for this file is too large to render. See raw diff