rdecoupes commited on
Commit
122371a
1 Parent(s): ff48425

End of training

Browse files
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
2
+ training_args.bin filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
config.json CHANGED
@@ -37,7 +37,7 @@
37
  "pad_token_id": 1,
38
  "position_embedding_type": "absolute",
39
  "torch_dtype": "float32",
40
- "transformers_version": "4.20.1",
41
  "type_vocab_size": 1,
42
  "use_cache": true,
43
  "vocab_size": 32005
 
37
  "pad_token_id": 1,
38
  "position_embedding_type": "absolute",
39
  "torch_dtype": "float32",
40
+ "transformers_version": "4.30.2",
41
  "type_vocab_size": 1,
42
  "use_cache": true,
43
  "vocab_size": 32005
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:333761c1fb1556877821522040d3716b3fc7bb26930791d08f9f4474f75a1382
3
+ size 1342611565
sentencepiece.bpe.model CHANGED
Binary files a/sentencepiece.bpe.model and b/sentencepiece.bpe.model differ
 
tokenizer.json CHANGED
@@ -5,66 +5,66 @@
5
  "added_tokens": [
6
  {
7
  "id": 0,
8
- "special": true,
9
  "content": "<s>NOTUSED",
10
  "single_word": false,
11
  "lstrip": false,
12
  "rstrip": false,
13
- "normalized": false
 
14
  },
15
  {
16
  "id": 1,
17
- "special": true,
18
  "content": "<pad>",
19
  "single_word": false,
20
  "lstrip": false,
21
  "rstrip": false,
22
- "normalized": false
 
23
  },
24
  {
25
  "id": 2,
26
- "special": true,
27
  "content": "</s>NOTUSED",
28
  "single_word": false,
29
  "lstrip": false,
30
  "rstrip": false,
31
- "normalized": false
 
32
  },
33
  {
34
  "id": 3,
35
- "special": true,
36
  "content": "<unk>",
37
  "single_word": false,
38
  "lstrip": false,
39
  "rstrip": false,
40
- "normalized": false
 
41
  },
42
  {
43
  "id": 5,
44
- "special": true,
45
  "content": "<s>",
46
  "single_word": false,
47
  "lstrip": false,
48
  "rstrip": false,
49
- "normalized": false
 
50
  },
51
  {
52
  "id": 6,
53
- "special": true,
54
  "content": "</s>",
55
  "single_word": false,
56
  "lstrip": false,
57
  "rstrip": false,
58
- "normalized": false
 
59
  },
60
  {
61
  "id": 32004,
62
- "special": true,
63
  "content": "<mask>",
64
  "single_word": false,
65
  "lstrip": true,
66
  "rstrip": false,
67
- "normalized": true
 
68
  }
69
  ],
70
  "normalizer": {
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
8
  "content": "<s>NOTUSED",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
  },
15
  {
16
  "id": 1,
 
17
  "content": "<pad>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
  },
24
  {
25
  "id": 2,
 
26
  "content": "</s>NOTUSED",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
  },
33
  {
34
  "id": 3,
 
35
  "content": "<unk>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
  },
42
  {
43
  "id": 5,
 
44
  "content": "<s>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
  },
51
  {
52
  "id": 6,
 
53
  "content": "</s>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
  },
60
  {
61
  "id": 32004,
 
62
  "content": "<mask>",
63
  "single_word": false,
64
  "lstrip": true,
65
  "rstrip": false,
66
+ "normalized": true,
67
+ "special": true
68
  }
69
  ],
70
  "normalizer": {
tokenizer_config.json CHANGED
@@ -4,6 +4,7 @@
4
  "</s>NOTUSED"
5
  ],
6
  "bos_token": "<s>",
 
7
  "cls_token": "<s>",
8
  "eos_token": "</s>",
9
  "mask_token": {
@@ -14,11 +15,10 @@
14
  "rstrip": false,
15
  "single_word": false
16
  },
17
- "name_or_path": "camembert/camembert-large",
18
  "pad_token": "<pad>",
19
  "sep_token": "</s>",
20
  "sp_model_kwargs": {},
21
- "special_tokens_map_file": null,
22
  "tokenizer_class": "CamembertTokenizer",
23
  "unk_token": "<unk>"
24
  }
 
4
  "</s>NOTUSED"
5
  ],
6
  "bos_token": "<s>",
7
+ "clean_up_tokenization_spaces": true,
8
  "cls_token": "<s>",
9
  "eos_token": "</s>",
10
  "mask_token": {
 
15
  "rstrip": false,
16
  "single_word": false
17
  },
18
+ "model_max_length": 1000000000000000019884624838656,
19
  "pad_token": "<pad>",
20
  "sep_token": "</s>",
21
  "sp_model_kwargs": {},
 
22
  "tokenizer_class": "CamembertTokenizer",
23
  "unk_token": "<unk>"
24
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03cc59526da8166036f5dd656c548647fedc1d2cf71889d9698b66d9c8e62f11
3
- size 3375
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e91693c446d53b538d917698d366e08655a0cb5fc7b011ba2ec7bb464ee1ad8f
3
+ size 4027