empty-michael commited on
Commit
ce61d09
1 Parent(s): 540279c

Training in progress, step 500

Browse files
config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "GPTNeoCodebookModel"
4
+ ],
5
+ "codebook_at": [
6
+ "attn",
7
+ "mlp"
8
+ ],
9
+ "codebook_kwargs": {},
10
+ "codebook_type": [
11
+ "vanilla",
12
+ "vanilla"
13
+ ],
14
+ "k_codebook": [
15
+ 16,
16
+ 16
17
+ ],
18
+ "kmeans_init": false,
19
+ "kmeans_init_examples": 1000,
20
+ "kmeans_kwargs": {
21
+ "batch_size": 24576,
22
+ "n_init": "auto"
23
+ },
24
+ "kmeans_path": "/.cache/cb_volume/huggingface/kmeans_embeddings.pt",
25
+ "layers_to_snap": [
26
+ 0
27
+ ],
28
+ "loss": "aeloss",
29
+ "model_type": "codebook",
30
+ "num_codebooks": [
31
+ 1,
32
+ 1
33
+ ],
34
+ "num_codes": [
35
+ 10000,
36
+ 10000
37
+ ],
38
+ "replace_codes": false,
39
+ "similarity_metric": "inner_product",
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.35.2"
42
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4960d699700eed121e8e126bd990da04491799d73bb84cf09812efcf475cda1c
3
+ size 346561224
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": true,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "model_max_length": 2048,
19
+ "pad_token": null,
20
+ "tokenizer_class": "GPT2Tokenizer",
21
+ "unk_token": "<|endoftext|>"
22
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4affbe5acc6d739ec10ec63e7415f2041c30813db9f74a83ae6d1f1aa5841a07
3
+ size 4856
vocab.json ADDED
The diff for this file is too large to render. See raw diff