l3utterfly commited on
Commit
30339f8
1 Parent(s): bfaa298

fixed eos token

Browse files
Files changed (2) hide show
  1. README.md +147 -1
  2. pytorch_model.bin +1 -1
README.md CHANGED
@@ -1,3 +1,149 @@
1
  ---
2
- license: mit
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ tags:
3
+ - generated_from_trainer
4
+ model-index:
5
+ - name: out
6
+ results: []
7
  ---
8
+
9
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
10
+ should probably proofread and complete it, then remove this comment. -->
11
+
12
+ [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
13
+ <details><summary>See axolotl config</summary>
14
+
15
+ axolotl version: `0.4.0`
16
+ ```yaml
17
+ base_model: /home/layla/src/text-generation-webui/models/phi-2
18
+ model_type: AutoModelForCausalLM
19
+ tokenizer_type: AutoTokenizer
20
+
21
+ load_in_8bit: false
22
+ load_in_4bit: false
23
+ strict: false
24
+
25
+ datasets:
26
+ - path: /home/layla/src/Layla-datasets/datasets_formatted/base/dailydialog.topicalchat.teatime.openhermes.jsonl
27
+ ds_type: json # see other options below
28
+ type: sharegpt
29
+ conversation: vicuna_v1.1
30
+
31
+ # datasets:
32
+ # - path: /home/layla/src/Layla-datasets/datasets_formatted/airoboros_alpaca.jsonl
33
+ # type: alpaca
34
+
35
+ dataset_prepared_path: last_run_prepared
36
+ val_set_size: 0.01
37
+ output_dir: ./out
38
+
39
+ sequence_len: 2048
40
+ sample_packing: true
41
+ pad_to_sequence_len: true
42
+
43
+ wandb_project:
44
+ wandb_entity:
45
+ wandb_watch:
46
+ wandb_run_id:
47
+ wandb_log_model:
48
+
49
+ gradient_accumulation_steps: 4
50
+ micro_batch_size: 2
51
+ num_epochs: 1
52
+ optimizer: adamw_bnb_8bit
53
+ lr_scheduler: cosine
54
+ learning_rate: 0.0000005
55
+
56
+ train_on_inputs: false
57
+ group_by_length: false
58
+ bf16: auto
59
+ fp16:
60
+ tf32: true
61
+
62
+ gradient_checkpointing: true
63
+ gradient_checkpointing_kwargs:
64
+ use_reentrant: True
65
+ early_stopping_patience:
66
+ resume_from_checkpoint:
67
+ local_rank:
68
+ logging_steps: 1
69
+ xformers_attention:
70
+ flash_attention: true
71
+
72
+ warmup_ratio: 0.05
73
+ eval_steps: 0.1
74
+ eval_sample_packing: true
75
+ save_steps: 300
76
+ debug:
77
+ deepspeed: /home/layla/src/Layla-datasets/axolotl/configs/deepspeed/zero2.json # multi-gpu only
78
+ weight_decay: 0.0
79
+ fsdp:
80
+ fsdp_config:
81
+ resize_token_embeddings_to_32x: true
82
+ special_tokens:
83
+ bos_token: "<|endoftext|>"
84
+ eos_token: "<|endoftext|>"
85
+ unk_token: "<|endoftext|>"
86
+ pad_token: "<|endoftext|>"
87
+ ```
88
+
89
+ </details><br>
90
+
91
+ # out
92
+
93
+ This model was trained from scratch on the None dataset.
94
+ It achieves the following results on the evaluation set:
95
+ - Loss: 0.8072
96
+
97
+ ## Model description
98
+
99
+ More information needed
100
+
101
+ ## Intended uses & limitations
102
+
103
+ More information needed
104
+
105
+ ## Training and evaluation data
106
+
107
+ More information needed
108
+
109
+ ## Training procedure
110
+
111
+ ### Training hyperparameters
112
+
113
+ The following hyperparameters were used during training:
114
+ - learning_rate: 5e-07
115
+ - train_batch_size: 2
116
+ - eval_batch_size: 2
117
+ - seed: 42
118
+ - distributed_type: multi-GPU
119
+ - num_devices: 5
120
+ - gradient_accumulation_steps: 4
121
+ - total_train_batch_size: 40
122
+ - total_eval_batch_size: 10
123
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
124
+ - lr_scheduler_type: cosine
125
+ - lr_scheduler_warmup_steps: 17
126
+ - num_epochs: 1
127
+
128
+ ### Training results
129
+
130
+ | Training Loss | Epoch | Step | Validation Loss |
131
+ |:-------------:|:-----:|:----:|:---------------:|
132
+ | 0.9616 | 0.0 | 1 | 1.0031 |
133
+ | 0.9489 | 0.1 | 372 | 0.8825 |
134
+ | 0.987 | 0.2 | 744 | 0.8487 |
135
+ | 0.818 | 0.3 | 1116 | 0.8313 |
136
+ | 0.8389 | 0.4 | 1488 | 0.8212 |
137
+ | 0.9015 | 0.5 | 1860 | 0.8146 |
138
+ | 0.8237 | 0.6 | 2232 | 0.8108 |
139
+ | 0.7562 | 0.7 | 2604 | 0.8088 |
140
+ | 0.8776 | 0.8 | 2976 | 0.8078 |
141
+ | 0.8703 | 0.9 | 3348 | 0.8072 |
142
+
143
+
144
+ ### Framework versions
145
+
146
+ - Transformers 4.39.0.dev0
147
+ - Pytorch 2.2.0
148
+ - Datasets 2.17.1
149
+ - Tokenizers 0.15.0
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:024f478c456c314750cefb9bd9090614652abbddbd7b1ad4854e32c70c005f75
3
  size 5559427324
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5971ffb17474e6612e8a47694bde37416a78f1505e53fd7f9fa7d4f7fa09ebaa
3
  size 5559427324