mtasic85 commited on
Commit
cb71291
1 Parent(s): 463b478
Files changed (1) hide show
  1. scripts/TRAIN.md +15 -31
scripts/TRAIN.md CHANGED
@@ -19,28 +19,15 @@ python -B train_tokenizer.py
19
 
20
  ```bash
21
  python -B prepare_pretrain_dataset.py
22
- ```
23
-
24
- ```python
25
- from litdata import StreamingDataset, StreamingDataLoader, TokensLoader
26
-
27
- dataset = StreamingDataset(
28
- input_dir='../pretrain-data/',
29
- item_loader=TokensLoader(block_size=2048 + 1),
30
- )
31
-
32
- print(len(dataset))
33
  ```
34
 
35
  ## Model
36
 
37
- ### Pretrain
38
 
39
  ```bash
40
  litgpt pretrain --config ./pretrain-model.yaml
41
- ```
42
-
43
- ```bash
44
  litgpt convert_from_litgpt out/pretrain/final/ out/converted_pretrain
45
  cp config.json out/pretrain/final/
46
  cp config.json out/converted_pretrain/
@@ -58,25 +45,22 @@ save_file(state_dict, 'out/converted_pretrain/model.safetensors')
58
 
59
  ```bash
60
  litgpt convert_pretrained_checkpoint out/pretrain/final/ out/pretrain_checkpoint/final/
 
 
61
  litgpt pretrain --config ./contrain-model.yaml
 
 
62
  ```
63
 
64
- ## Evaluate
65
-
66
- ```bash
67
- litgpt evaluate --tasks 'hellaswag,gsm8k,truthfulqa_mc2,mmlu,winogrande,arc_challenge' --out_dir 'evaluate-quick/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/
68
-
69
- litgpt evaluate --tasks 'leaderboard' --out_dir 'evaluate-leaderboard/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/
70
-
71
- litgpt evaluate --tasks 'bbh_zeroshot,bbh_fewshot,bbh_cot_fewshot,bbh_cot_zeroshot' --out_dir 'evaluate-bigbenchhard/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/
72
-
73
- litgpt evaluate --tasks 'mmlu,mmlu_pro' --out_dir 'evaluate-mmlu/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/
74
-
75
- litgpt evaluate --tasks 'arc_challenge,boolq,gpqa,hellaswag,openbookqa,piqa,truthfulqa_mc2,winogrande' --out_dir 'evaluate-reasoning/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/
76
-
77
- litgpt evaluate --tasks 'mmlu_multilingual,mgsm' --out_dir 'evaluate-multilinguals/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/
78
 
79
- litgpt evaluate --tasks 'gsm8k,mathqa' --out_dir 'evaluate-math/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/
 
 
80
 
81
- litgpt evaluate --tasks 'qasper' --out_dir 'evaluate-long/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/
 
 
82
  ```
 
19
 
20
  ```bash
21
  python -B prepare_pretrain_dataset.py
22
+ python -B prepare_contrain_dataset.py
 
 
 
 
 
 
 
 
 
 
23
  ```
24
 
25
  ## Model
26
 
27
+ ### Pretraining
28
 
29
  ```bash
30
  litgpt pretrain --config ./pretrain-model.yaml
 
 
 
31
  litgpt convert_from_litgpt out/pretrain/final/ out/converted_pretrain
32
  cp config.json out/pretrain/final/
33
  cp config.json out/converted_pretrain/
 
45
 
46
  ```bash
47
  litgpt convert_pretrained_checkpoint out/pretrain/final/ out/pretrain_checkpoint/final/
48
+ cp config.json out/pretrain_checkpoint/final/
49
+
50
  litgpt pretrain --config ./contrain-model.yaml
51
+ litgpt convert_from_litgpt out/contrain/final/ out/converted_contrain
52
+ cp config.json out/converted_contrain/
53
  ```
54
 
55
+ ```python
56
+ import torch
57
+ from safetensors.torch import save_file
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ state_dict = torch.load('out/converted_contrain/model.pth', map_location='cpu')
60
+ save_file(state_dict, 'out/converted_contrain/model.safetensors')
61
+ ```
62
 
63
+ ```bash
64
+ cp out/converted_contrain/model.pth ./
65
+ cp out/converted_contrain/model.safetensors ./
66
  ```