uukuguy commited on
Commit
f431c04
1 Parent(s): c423f8a
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. README.md +103 -0
  3. added_tokens.json +6 -0
  4. config.json +28 -0
  5. generation_config.json +6 -0
  6. pytorch_model-00001-of-00002.bin +3 -0
  7. pytorch_model-00002-of-00002.bin +3 -0
  8. pytorch_model.bin.index.json +298 -0
  9. special_tokens_map.json +6 -0
  10. tasks/speechless-tora-code-7b-v1.0/merge_peft_adapters.sh +13 -0
  11. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/all_results.json +11 -0
  12. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/README.md +21 -0
  13. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/adapter_config.json +26 -0
  14. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/adapter_model.bin +3 -0
  15. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/adapter_model/README.md +34 -0
  16. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/adapter_model/adapter_config.json +26 -0
  17. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/adapter_model/adapter_model.bin +3 -0
  18. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/added_tokens.json +6 -0
  19. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/optimizer.pt +3 -0
  20. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/rng_state_0.pth +3 -0
  21. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/rng_state_1.pth +3 -0
  22. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/scheduler.pt +3 -0
  23. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/special_tokens_map.json +6 -0
  24. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/tokenizer.model +3 -0
  25. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/tokenizer_config.json +52 -0
  26. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/trainer_state.json +0 -0
  27. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/training_args.bin +3 -0
  28. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3092/adapter_model/README.md +34 -0
  29. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3092/adapter_model/adapter_config.json +26 -0
  30. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3092/adapter_model/adapter_model.bin +3 -0
  31. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/completed +0 -0
  32. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/eval_results.json +7 -0
  33. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/logs/finetune_20231009_061922.log +24 -0
  34. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/logs/finetune_20231009_122312.log +88 -0
  35. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/metrics.json +1 -0
  36. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/runs/Oct09_06-19-22_I156f1f3f410070163e/events.out.tfevents.1696832467.I156f1f3f410070163e.7536.0 +3 -0
  37. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/runs/Oct09_12-23-12_I156f1f3f410070163e/events.out.tfevents.1696854295.I156f1f3f410070163e.1701.0 +3 -0
  38. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/runs/Oct09_12-23-12_I156f1f3f410070163e/events.out.tfevents.1696924201.I156f1f3f410070163e.1701.1 +3 -0
  39. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/train_results.json +7 -0
  40. tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/trainer_state.json +0 -0
  41. tasks/speechless-tora-code-7b-v1.0/run_api_server.sh +11 -0
  42. tasks/speechless-tora-code-7b-v1.0/run_finetune.sh +59 -0
  43. tasks/speechless-tora-code-7b-v1.0/task.env +47 -0
  44. tasks/speechless-tora-code-7b-v1.0/task_a100_40gx2.env +49 -0
  45. tasks/speechless-tora-code-7b-v1.0/task_a100_40gx4.env +49 -0
  46. tasks/speechless-tora-code-7b-v1.0/task_a100_80gx2.env +47 -0
  47. tasks/speechless-tora-code-7b-v1.0/task_a40_48gx2.env +47 -0
  48. tasks/speechless-tora-code-7b-v1.0/task_a40_48gx4.env +47 -0
  49. tokenizer.json +0 -0
  50. tokenizer.model +3 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ pytorch_model-00001-of-00002.bin filter=lfs diff=lfs merge=lfs -text
37
+ pytorch_model-00002-of-00002.bin filter=lfs diff=lfs merge=lfs -text
38
+ tokenizer.model filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,106 @@
1
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  license: llama2
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ language:
3
+ - en
4
+ library_name: transformers
5
+ pipeline_tag: text-generation
6
+ datasets:
7
+ - jondurbin/airoboros-2.2
8
+ - Open-Orca/OpenOrca
9
+ - garage-bAInd/Open-Platypus
10
+ - WizardLM/WizardLM_evol_instruct_V2_196k
11
+ - TokenBender/python_eval_instruct_51k
12
+ tags:
13
+ - llama-2
14
+ - code
15
  license: llama2
16
+ model-index:
17
+ - name: SpeechlessCoder
18
+ results:
19
+ - task:
20
+ type: text-generation
21
+ dataset:
22
+ type: openai_humaneval
23
+ name: HumanEval
24
+ metrics:
25
+ - name: pass@1
26
+ type: pass@1
27
+ value: 0.0
28
+ verified: false
29
  ---
30
+
31
+ <p><h1> speechless-tora-code-7b-v1.0 </h1></p>
32
+
33
+ Use the following dataset to fine-tune llm_agents/tora-code-7b-v0.1 in order to improve the model's reasoning and planning abilities.
34
+
35
+ Total 201,981 samples.
36
+ - jondurbin/airoboros-2.2: Filter categories related to coding, reasoning and planning. 23,462 samples.
37
+ - Open-Orca/OpenOrca: Filter the 'cot' category in 1M GPT4 dataset. 74,440 samples.
38
+ - garage-bAInd/Open-Platypus: 100%, 24,926 samples.
39
+ - WizardLM/WizardLM_evol_instruct_V2_196k: Coding coversation part. 30,185 samples
40
+ - TokenBender/python_eval_instruct_51k: “python” in output .40,309 samples
41
+ - Spider: 8,659 samples
42
+
43
+
44
+ | | |
45
+ |------ | ------ |
46
+ | lr | 2e-4 |
47
+ | lr_scheduler_type | cosine |
48
+ | weight_decay | 0.0 |
49
+ | optim | paged_adamw_8bit |
50
+ | flash_attention | True |
51
+ | rerope | False |
52
+ | max_new_tokens | 4096 |
53
+ | num_train_epochs | 2 |
54
+ | bits | 4 |
55
+ | lora_r | 64 |
56
+ | lora_alpha | 16 |
57
+ | lora_dropout | 0.05 |
58
+ | double_quant | True |
59
+ | quant_type | nf4 |
60
+ | dataset_format | airoboros |
61
+ | mini_batch_size | 2 |
62
+ | grandient_accumulation_steps | 32 |
63
+ | bf16 | True |
64
+
65
+ A100-40G x 4
66
+
67
+ | | |
68
+ |------ | ------ |
69
+ | epoch | 2.0 |
70
+ | etrain_loss | 0.4708 |
71
+ | etrain_runtime | 12:12:53.64 |
72
+ | etrain_samples_per_second | 9.002 |
73
+ | etrain_steps_per_second | 0.07 |
74
+ | eeval_loss | 0.4851 |
75
+ | eeval_runtime | 0:00:10.31 |
76
+ | eeval_samples_per_second | 19.385 |
77
+ | eeval_steps_per_second | 4.846 |
78
+
79
+ | Metric | Value |
80
+ | --- | --- |
81
+ | humaneval-python | |
82
+
83
+ [Big Code Models Leaderboard](https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard)
84
+
85
+ CodeLlama-34B-Python: 53.29
86
+
87
+ CodeLlama-34B-Instruct: 50.79
88
+
89
+ CodeLlama-13B-Instruct: 50.6
90
+
91
+ CodeLlama-34B: 45.11
92
+
93
+ CodeLlama-13B-Python: 42.89
94
+
95
+ CodeLlama-13B: 35.07
96
+
97
+ [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
98
+ | Metric | Value |
99
+ | --- | --- |
100
+ | ARC | |
101
+ | HellaSwag | |
102
+ | MMLU | |
103
+ | TruthfulQA | |
104
+ | Average | |
105
+
106
+
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "</s>": 2,
3
+ "<pad>": 32000,
4
+ "<s>": 1,
5
+ "<unk>": 0
6
+ }
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/opt/local/llm_models/huggingface.co/llm_agents/tora-code-7b-v1.0",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 11008,
13
+ "max_position_embeddings": 16384,
14
+ "model_type": "llama",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 32,
18
+ "pad_token_id": 0,
19
+ "pretraining_tp": 1,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "float16",
25
+ "transformers_version": "4.34.0",
26
+ "use_cache": true,
27
+ "vocab_size": 32001
28
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.34.0"
6
+ }
pytorch_model-00001-of-00002.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34de0eeb73499dff4312178870a24cc62d15caab73c13dcade8b0472e54dbe68
3
+ size 9976628314
pytorch_model-00002-of-00002.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0426b2042db9f55a1bcac302cc6e7b491eaeb1c9250e16749a1ec5529a48cd3b
3
+ size 3500318979
pytorch_model.bin.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 13476847616
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "pytorch_model-00002-of-00002.bin",
7
+ "model.embed_tokens.weight": "pytorch_model-00001-of-00002.bin",
8
+ "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
9
+ "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
10
+ "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
11
+ "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
12
+ "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
13
+ "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
14
+ "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
15
+ "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
16
+ "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
17
+ "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
18
+ "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
19
+ "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
20
+ "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
21
+ "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
22
+ "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
23
+ "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
24
+ "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
25
+ "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
26
+ "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
27
+ "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
28
+ "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
29
+ "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
30
+ "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
31
+ "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
32
+ "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
33
+ "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
34
+ "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
35
+ "model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
36
+ "model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
37
+ "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
38
+ "model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
39
+ "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
40
+ "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
41
+ "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
42
+ "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
43
+ "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
44
+ "model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
45
+ "model.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
46
+ "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
47
+ "model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
48
+ "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
49
+ "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
50
+ "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
51
+ "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
52
+ "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
53
+ "model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
54
+ "model.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
55
+ "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
56
+ "model.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
57
+ "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
58
+ "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
59
+ "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
60
+ "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
61
+ "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
62
+ "model.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
63
+ "model.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
64
+ "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
65
+ "model.layers.14.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
66
+ "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
67
+ "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
68
+ "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
69
+ "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
70
+ "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
71
+ "model.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
72
+ "model.layers.15.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
73
+ "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
74
+ "model.layers.15.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
75
+ "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
76
+ "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
77
+ "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
78
+ "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
79
+ "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
80
+ "model.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
81
+ "model.layers.16.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
82
+ "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
83
+ "model.layers.16.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
84
+ "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
85
+ "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
86
+ "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
87
+ "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
88
+ "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
89
+ "model.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
90
+ "model.layers.17.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
91
+ "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
92
+ "model.layers.17.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
93
+ "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
94
+ "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
95
+ "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
96
+ "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
97
+ "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
98
+ "model.layers.18.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
99
+ "model.layers.18.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
100
+ "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
101
+ "model.layers.18.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
102
+ "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
103
+ "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
104
+ "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
105
+ "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
106
+ "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
107
+ "model.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
108
+ "model.layers.19.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
109
+ "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
110
+ "model.layers.19.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
111
+ "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
112
+ "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
113
+ "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
114
+ "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
115
+ "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
116
+ "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
117
+ "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
118
+ "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
119
+ "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
120
+ "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
121
+ "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
122
+ "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
123
+ "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
124
+ "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
125
+ "model.layers.20.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
126
+ "model.layers.20.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
127
+ "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
128
+ "model.layers.20.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
129
+ "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
130
+ "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
131
+ "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
132
+ "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
133
+ "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
134
+ "model.layers.21.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
135
+ "model.layers.21.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
136
+ "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
137
+ "model.layers.21.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
138
+ "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
139
+ "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
140
+ "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
141
+ "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
142
+ "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
143
+ "model.layers.22.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
144
+ "model.layers.22.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
145
+ "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
146
+ "model.layers.22.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
147
+ "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
148
+ "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
149
+ "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
150
+ "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
151
+ "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
152
+ "model.layers.23.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
153
+ "model.layers.23.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
154
+ "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
155
+ "model.layers.23.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
156
+ "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
157
+ "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
158
+ "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
159
+ "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
160
+ "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
161
+ "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
162
+ "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
163
+ "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
164
+ "model.layers.24.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
165
+ "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
166
+ "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
167
+ "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
168
+ "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
169
+ "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
170
+ "model.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
171
+ "model.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
172
+ "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
173
+ "model.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
174
+ "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
175
+ "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
176
+ "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
177
+ "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
178
+ "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
179
+ "model.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
180
+ "model.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
181
+ "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
182
+ "model.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
183
+ "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
184
+ "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
185
+ "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
186
+ "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
187
+ "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
188
+ "model.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
189
+ "model.layers.27.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
190
+ "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
191
+ "model.layers.27.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
192
+ "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
193
+ "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
194
+ "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
195
+ "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
196
+ "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
197
+ "model.layers.28.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
198
+ "model.layers.28.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
199
+ "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
200
+ "model.layers.28.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
201
+ "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
202
+ "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
203
+ "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
204
+ "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
205
+ "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
206
+ "model.layers.29.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
207
+ "model.layers.29.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
208
+ "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
209
+ "model.layers.29.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
210
+ "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
211
+ "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
212
+ "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
213
+ "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
214
+ "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
215
+ "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
216
+ "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
217
+ "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
218
+ "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
219
+ "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
220
+ "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
221
+ "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
222
+ "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
223
+ "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
224
+ "model.layers.30.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
225
+ "model.layers.30.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
226
+ "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
227
+ "model.layers.30.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
228
+ "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
229
+ "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
230
+ "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
231
+ "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
232
+ "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
233
+ "model.layers.31.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
234
+ "model.layers.31.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
235
+ "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
236
+ "model.layers.31.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
237
+ "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
238
+ "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
239
+ "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
240
+ "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
241
+ "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
242
+ "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
243
+ "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
244
+ "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
245
+ "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
246
+ "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
247
+ "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
248
+ "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
249
+ "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
250
+ "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
251
+ "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
252
+ "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
253
+ "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
254
+ "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
255
+ "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
256
+ "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
257
+ "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
258
+ "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
259
+ "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
260
+ "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
261
+ "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
262
+ "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
263
+ "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
264
+ "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
265
+ "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
266
+ "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
267
+ "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
268
+ "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
269
+ "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
270
+ "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
271
+ "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
272
+ "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
273
+ "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
274
+ "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
275
+ "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
276
+ "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
277
+ "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
278
+ "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
279
+ "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
280
+ "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
281
+ "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
282
+ "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
283
+ "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
284
+ "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
285
+ "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
286
+ "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
287
+ "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
288
+ "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
289
+ "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
290
+ "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
291
+ "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
292
+ "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
293
+ "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
294
+ "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
295
+ "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
296
+ "model.norm.weight": "pytorch_model-00002-of-00002.bin"
297
+ }
298
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
6
+ }
tasks/speechless-tora-code-7b-v1.0/merge_peft_adapters.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # From speechless-code-mistral-7b-v1.0
3
+ SCRIPT_PATH=$(cd $(dirname ${BASH_SOURCE[0]}); pwd)
4
+ PARENT_PATH=$(cd "${SCRIPT_PATH}/.." ; pwd)
5
+
6
+ source ${SCRIPT_PATH}/task.env
7
+
8
+ CHECKPOINT_DIR=${TASK_CHECKPOINT_DIR}/checkpoint-3092/adapter_model
9
+
10
+ python ${SCRIPT_PATH}/../../scripts/merge_peft_adapters.py \
11
+ --base_model_name_or_path ${BASE_MODEL_PATH} \
12
+ --peft_model_path ${CHECKPOINT_DIR} \
13
+ --merged_model_name_or_path ${TEST_MODEL_PATH} \
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/all_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_loss": 0.5871890783309937,
4
+ "eval_runtime": 15.5979,
5
+ "eval_samples_per_second": 12.822,
6
+ "eval_steps_per_second": 6.411,
7
+ "train_loss": 0.5890884618112218,
8
+ "train_runtime": 69889.4305,
9
+ "train_samples_per_second": 5.664,
10
+ "train_steps_per_second": 0.044
11
+ }
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: bfloat16
18
+ ### Framework versions
19
+
20
+
21
+ - PEFT 0.5.0
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/opt/local/llm_models/huggingface.co/llm_agents/tora-code-7b-v1.0",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16.0,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "down_proj",
18
+ "up_proj",
19
+ "q_proj",
20
+ "o_proj",
21
+ "gate_proj",
22
+ "v_proj",
23
+ "k_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe53a63399829bb762760aac51f487a74255439a8ba937191ccecf3a72040093
3
+ size 639792909
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/adapter_model/README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: bfloat16
18
+
19
+ The following `bitsandbytes` quantization config was used during training:
20
+ - quant_method: bitsandbytes
21
+ - load_in_8bit: False
22
+ - load_in_4bit: True
23
+ - llm_int8_threshold: 6.0
24
+ - llm_int8_skip_modules: None
25
+ - llm_int8_enable_fp32_cpu_offload: False
26
+ - llm_int8_has_fp16_weight: False
27
+ - bnb_4bit_quant_type: nf4
28
+ - bnb_4bit_use_double_quant: True
29
+ - bnb_4bit_compute_dtype: bfloat16
30
+ ### Framework versions
31
+
32
+ - PEFT 0.5.0
33
+
34
+ - PEFT 0.5.0
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/adapter_model/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/opt/local/llm_models/huggingface.co/llm_agents/tora-code-7b-v1.0",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16.0,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "down_proj",
18
+ "up_proj",
19
+ "q_proj",
20
+ "o_proj",
21
+ "gate_proj",
22
+ "v_proj",
23
+ "k_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/adapter_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe53a63399829bb762760aac51f487a74255439a8ba937191ccecf3a72040093
3
+ size 639792909
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "</s>": 2,
3
+ "<pad>": 32000,
4
+ "<s>": 1,
5
+ "<unk>": 0
6
+ }
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ded5b21ac4123d95f741097b6ba2f10e14abf2fea6dc83011548234db36631c8
3
+ size 320970527
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b465ff783be6837c8cdf3aa9aa36366d7695d1fdb6b4888ec0c8a7238379af77
3
+ size 15735
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f79c2a3250c7c5ce53d25ba72962b46362a9f39b3282d1958c6bee70bd711b2e
3
+ size 15735
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:403f7d4ddb29af77fdfd9e883d0c8935063872d5e73dea699394bc703f947ea8
3
+ size 627
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
6
+ }
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/tokenizer_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "32000": {
30
+ "content": "<pad>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ }
37
+ },
38
+ "additional_special_tokens": [],
39
+ "bos_token": "<s>",
40
+ "clean_up_tokenization_spaces": false,
41
+ "eos_token": "</s>",
42
+ "legacy": true,
43
+ "model_max_length": 1000000000000000019884624838656,
44
+ "pad_token": "<pad>",
45
+ "padding_side": "left",
46
+ "sp_model_kwargs": {},
47
+ "spaces_between_special_tokens": false,
48
+ "tokenizer_class": "LlamaTokenizer",
49
+ "tokenizer_file": null,
50
+ "unk_token": "<unk>",
51
+ "use_default_system_prompt": true
52
+ }
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88c746833b5a7f3b81a2dbfe4fd0d18678e9740460c1a97584c75375f13a6c11
3
+ size 4667
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3092/adapter_model/README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: bfloat16
18
+
19
+ The following `bitsandbytes` quantization config was used during training:
20
+ - quant_method: bitsandbytes
21
+ - load_in_8bit: False
22
+ - load_in_4bit: True
23
+ - llm_int8_threshold: 6.0
24
+ - llm_int8_skip_modules: None
25
+ - llm_int8_enable_fp32_cpu_offload: False
26
+ - llm_int8_has_fp16_weight: False
27
+ - bnb_4bit_quant_type: nf4
28
+ - bnb_4bit_use_double_quant: True
29
+ - bnb_4bit_compute_dtype: bfloat16
30
+ ### Framework versions
31
+
32
+ - PEFT 0.5.0
33
+
34
+ - PEFT 0.5.0
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3092/adapter_model/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/opt/local/llm_models/huggingface.co/llm_agents/tora-code-7b-v1.0",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16.0,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "gate_proj",
19
+ "down_proj",
20
+ "v_proj",
21
+ "k_proj",
22
+ "o_proj",
23
+ "up_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/checkpoint-3092/adapter_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064bb620173b89e327154ada8f72dbd1eeea1d16d1ce351c2240385268dc3dac
3
+ size 639792909
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/completed ADDED
File without changes
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_loss": 0.5871890783309937,
4
+ "eval_runtime": 15.5979,
5
+ "eval_samples_per_second": 12.822,
6
+ "eval_steps_per_second": 6.411
7
+ }
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/logs/finetune_20231009_061922.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-10-09 06:19:22.839 | INFO | __main__:train:918 - args=Namespace(model_name_or_path='/opt/local/llm_models/huggingface.co/llm_agents/tora-code-7b-v1.0', trust_remote_code=False, force_remove_overlength_samples=False, eval_dataset_size=0.005, max_train_samples=None, max_eval_samples=200, model_max_len=4096, dataset='/opt/local/datasets/Speechless/speechless-thoughts-200k.jsonl', dataset_format='airoboros', output_dir='./outputs/speechless-tora-code-7b-v1.0', overwrite_output_dir=False, do_train=True, do_eval=True, do_predict=False, evaluation_strategy=<IntervalStrategy.STEPS: 'steps'>, prediction_loss_only=False, per_device_train_batch_size=8, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=8, eval_accumulation_steps=None, eval_delay=0, learning_rate=0.0002, weight_decay=0.0, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=0.3, num_train_epochs=2, max_steps=-1, lr_scheduler_type=<SchedulerType.COSINE: 'cosine'>, warmup_ratio=0.005, warmup_steps=20, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='./outputs/speechless-tora-code-7b-v1.0/runs/Oct09_06-19-22_I156f1f3f410070163e', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=False, logging_steps=1, logging_nan_inf_filter=True, save_strategy=<IntervalStrategy.STEPS: 'steps'>, save_steps=100, save_total_limit=1, save_safetensors=False, save_on_each_node=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=10042, data_seed=10042, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=100, dataloader_num_workers=3, past_index=-1, run_name='20231009-061916', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model=None, greater_is_better=None, ignore_data_skip=False, sharded_ddp=[], fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, deepspeed=None, label_smoothing_factor=0.0, optim=<OptimizerNames.PAGED_ADAMW_8BIT: 'paged_adamw_8bit'>, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=False, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=<HubStrategy.EVERY_SAVE: 'every_save'>, hub_token=None, hub_private_repo=False, hub_always_push=False, gradient_checkpointing=True, include_inputs_for_metrics=False, fp16_backend='auto', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, include_tokens_per_second=False, sortish_sampler=False, predict_with_generate=False, generation_max_length=None, generation_num_beams=None, generation_config=None, task_name='speechless-tora-code-7b-v1.0', flash_attention=True, rerope=False, rerope_window=None, wandb=None, sample_packing=False, cache_dir=None, full_finetune=False, adam8bit=False, double_quant=True, quant_type='nf4', bits=4, lora_r=64, lora_alpha=16.0, lora_dropout=0.05, max_memory_MB=70000, mpt=False, max_shard_size='5GB', repeat_steps=0, distributed_state=Distributed environment: MULTI_GPU Backend: nccl
2
+ Num processes: 2
3
+ Process index: 0
4
+ Local process index: 0
5
+ Device: cuda:0
6
+ , _n_gpu=1, __cached__setup_devices=device(type='cuda', index=0), deepspeed_plugin=None)
7
+ 2023-10-09 06:19:22.840 | INFO | __main__:get_accelerate_model:303 - loading base model /opt/local/llm_models/huggingface.co/llm_agents/tora-code-7b-v1.0...
8
+ 2023-10-09 06:19:22.848 | INFO | __main__:train:918 - args=Namespace(model_name_or_path='/opt/local/llm_models/huggingface.co/llm_agents/tora-code-7b-v1.0', trust_remote_code=False, force_remove_overlength_samples=False, eval_dataset_size=0.005, max_train_samples=None, max_eval_samples=200, model_max_len=4096, dataset='/opt/local/datasets/Speechless/speechless-thoughts-200k.jsonl', dataset_format='airoboros', output_dir='./outputs/speechless-tora-code-7b-v1.0', overwrite_output_dir=False, do_train=True, do_eval=True, do_predict=False, evaluation_strategy=<IntervalStrategy.STEPS: 'steps'>, prediction_loss_only=False, per_device_train_batch_size=8, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=8, eval_accumulation_steps=None, eval_delay=0, learning_rate=0.0002, weight_decay=0.0, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=0.3, num_train_epochs=2, max_steps=-1, lr_scheduler_type=<SchedulerType.COSINE: 'cosine'>, warmup_ratio=0.005, warmup_steps=20, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='./outputs/speechless-tora-code-7b-v1.0/runs/Oct09_06-19-22_I156f1f3f410070163e', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=False, logging_steps=1, logging_nan_inf_filter=True, save_strategy=<IntervalStrategy.STEPS: 'steps'>, save_steps=100, save_total_limit=1, save_safetensors=False, save_on_each_node=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=10042, data_seed=10042, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=1, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=100, dataloader_num_workers=3, past_index=-1, run_name='20231009-061916', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model=None, greater_is_better=None, ignore_data_skip=False, sharded_ddp=[], fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, deepspeed=None, label_smoothing_factor=0.0, optim=<OptimizerNames.PAGED_ADAMW_8BIT: 'paged_adamw_8bit'>, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=False, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=<HubStrategy.EVERY_SAVE: 'every_save'>, hub_token=None, hub_private_repo=False, hub_always_push=False, gradient_checkpointing=True, include_inputs_for_metrics=False, fp16_backend='auto', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, include_tokens_per_second=False, sortish_sampler=False, predict_with_generate=False, generation_max_length=None, generation_num_beams=None, generation_config=None, task_name='speechless-tora-code-7b-v1.0', flash_attention=True, rerope=False, rerope_window=None, wandb=None, sample_packing=False, cache_dir=None, full_finetune=False, adam8bit=False, double_quant=True, quant_type='nf4', bits=4, lora_r=64, lora_alpha=16.0, lora_dropout=0.05, max_memory_MB=70000, mpt=False, max_shard_size='5GB', repeat_steps=0, distributed_state=Distributed environment: MULTI_GPU Backend: nccl
9
+ Num processes: 2
10
+ Process index: 1
11
+ Local process index: 1
12
+ Device: cuda:1
13
+ , _n_gpu=1, __cached__setup_devices=device(type='cuda', index=1), deepspeed_plugin=None)
14
+ 2023-10-09 06:19:22.849 | INFO | __main__:get_accelerate_model:303 - loading base model /opt/local/llm_models/huggingface.co/llm_agents/tora-code-7b-v1.0...
15
+ 2023-10-09 06:19:51.853 | INFO | __main__:get_accelerate_model:350 - adding LoRA modules...
16
+ 2023-10-09 06:19:53.297 | INFO | __main__:get_accelerate_model:350 - adding LoRA modules...
17
+ 2023-10-09 06:21:03.048 | INFO | __main__:train:942 - loaded model
18
+ 2023-10-09 06:21:03.150 | INFO | __main__:format_dataset:639 - ---------- Formatting dataset for Airoboros. ----------
19
+ 2023-10-09 06:21:03.160 | INFO | __main__:make_data_module:734 - ---------- Splitting dataset into train/eval ----------
20
+ 2023-10-09 06:21:03.205 | INFO | __main__:train:985 - *** Train ***
21
+ 2023-10-09 06:21:06.112 | INFO | __main__:train:942 - loaded model
22
+ 2023-10-09 06:21:06.212 | INFO | __main__:format_dataset:639 - ---------- Formatting dataset for Airoboros. ----------
23
+ 2023-10-09 06:21:06.222 | INFO | __main__:make_data_module:734 - ---------- Splitting dataset into train/eval ----------
24
+ 2023-10-09 06:21:06.265 | INFO | __main__:train:985 - *** Train ***
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/logs/finetune_20231009_122312.log ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-10-09 12:23:12.104 | INFO | __main__:train:918 - args=Namespace(model_name_or_path='/opt/local/llm_models/huggingface.co/llm_agents/tora-code-7b-v1.0', trust_remote_code=False, force_remove_overlength_samples=False, eval_dataset_size=0.005, max_train_samples=None, max_eval_samples=200, model_max_len=4096, dataset='/opt/local/datasets/Speechless/speechless-thoughts-200k.jsonl', dataset_format='airoboros', output_dir='./outputs/speechless-tora-code-7b-v1.0', overwrite_output_dir=False, do_train=True, do_eval=True, do_predict=False, evaluation_strategy=<IntervalStrategy.STEPS: 'steps'>, prediction_loss_only=False, per_device_train_batch_size=4, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=16, eval_accumulation_steps=None, eval_delay=0, learning_rate=0.0002, weight_decay=0.0, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=0.3, num_train_epochs=2, max_steps=-1, lr_scheduler_type=<SchedulerType.COSINE: 'cosine'>, warmup_ratio=0.005, warmup_steps=20, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='./outputs/speechless-tora-code-7b-v1.0/runs/Oct09_12-23-12_I156f1f3f410070163e', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=False, logging_steps=1, logging_nan_inf_filter=True, save_strategy=<IntervalStrategy.STEPS: 'steps'>, save_steps=100, save_total_limit=1, save_safetensors=False, save_on_each_node=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=10042, data_seed=10042, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=1, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=100, dataloader_num_workers=3, past_index=-1, run_name='20231009-122305', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model=None, greater_is_better=None, ignore_data_skip=False, sharded_ddp=[], fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, deepspeed=None, label_smoothing_factor=0.0, optim=<OptimizerNames.PAGED_ADAMW_8BIT: 'paged_adamw_8bit'>, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=False, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=<HubStrategy.EVERY_SAVE: 'every_save'>, hub_token=None, hub_private_repo=False, hub_always_push=False, gradient_checkpointing=True, include_inputs_for_metrics=False, fp16_backend='auto', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, include_tokens_per_second=False, sortish_sampler=False, predict_with_generate=False, generation_max_length=None, generation_num_beams=None, generation_config=None, task_name='speechless-tora-code-7b-v1.0', flash_attention=True, rerope=False, rerope_window=None, wandb=None, sample_packing=False, cache_dir=None, full_finetune=False, adam8bit=False, double_quant=True, quant_type='nf4', bits=4, lora_r=64, lora_alpha=16.0, lora_dropout=0.05, max_memory_MB=32000, mpt=False, max_shard_size='5GB', repeat_steps=0, distributed_state=Distributed environment: MULTI_GPU Backend: nccl
2
+ Num processes: 2
3
+ Process index: 1
4
+ Local process index: 1
5
+ Device: cuda:1
6
+ , _n_gpu=1, __cached__setup_devices=device(type='cuda', index=1), deepspeed_plugin=None)
7
+ 2023-10-09 12:23:12.105 | INFO | __main__:get_accelerate_model:303 - loading base model /opt/local/llm_models/huggingface.co/llm_agents/tora-code-7b-v1.0...
8
+ 2023-10-09 12:23:12.105 | INFO | __main__:train:918 - args=Namespace(model_name_or_path='/opt/local/llm_models/huggingface.co/llm_agents/tora-code-7b-v1.0', trust_remote_code=False, force_remove_overlength_samples=False, eval_dataset_size=0.005, max_train_samples=None, max_eval_samples=200, model_max_len=4096, dataset='/opt/local/datasets/Speechless/speechless-thoughts-200k.jsonl', dataset_format='airoboros', output_dir='./outputs/speechless-tora-code-7b-v1.0', overwrite_output_dir=False, do_train=True, do_eval=True, do_predict=False, evaluation_strategy=<IntervalStrategy.STEPS: 'steps'>, prediction_loss_only=False, per_device_train_batch_size=4, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=16, eval_accumulation_steps=None, eval_delay=0, learning_rate=0.0002, weight_decay=0.0, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=0.3, num_train_epochs=2, max_steps=-1, lr_scheduler_type=<SchedulerType.COSINE: 'cosine'>, warmup_ratio=0.005, warmup_steps=20, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='./outputs/speechless-tora-code-7b-v1.0/runs/Oct09_12-23-12_I156f1f3f410070163e', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=False, logging_steps=1, logging_nan_inf_filter=True, save_strategy=<IntervalStrategy.STEPS: 'steps'>, save_steps=100, save_total_limit=1, save_safetensors=False, save_on_each_node=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=10042, data_seed=10042, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=100, dataloader_num_workers=3, past_index=-1, run_name='20231009-122305', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model=None, greater_is_better=None, ignore_data_skip=False, sharded_ddp=[], fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, deepspeed=None, label_smoothing_factor=0.0, optim=<OptimizerNames.PAGED_ADAMW_8BIT: 'paged_adamw_8bit'>, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=False, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=<HubStrategy.EVERY_SAVE: 'every_save'>, hub_token=None, hub_private_repo=False, hub_always_push=False, gradient_checkpointing=True, include_inputs_for_metrics=False, fp16_backend='auto', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, include_tokens_per_second=False, sortish_sampler=False, predict_with_generate=False, generation_max_length=None, generation_num_beams=None, generation_config=None, task_name='speechless-tora-code-7b-v1.0', flash_attention=True, rerope=False, rerope_window=None, wandb=None, sample_packing=False, cache_dir=None, full_finetune=False, adam8bit=False, double_quant=True, quant_type='nf4', bits=4, lora_r=64, lora_alpha=16.0, lora_dropout=0.05, max_memory_MB=32000, mpt=False, max_shard_size='5GB', repeat_steps=0, distributed_state=Distributed environment: MULTI_GPU Backend: nccl
9
+ Num processes: 2
10
+ Process index: 0
11
+ Local process index: 0
12
+ Device: cuda:0
13
+ , _n_gpu=1, __cached__setup_devices=device(type='cuda', index=0), deepspeed_plugin=None)
14
+ 2023-10-09 12:23:12.105 | INFO | __main__:get_accelerate_model:303 - loading base model /opt/local/llm_models/huggingface.co/llm_agents/tora-code-7b-v1.0...
15
+ 2023-10-09 12:23:40.070 | INFO | __main__:get_accelerate_model:350 - adding LoRA modules...
16
+ 2023-10-09 12:23:40.822 | INFO | __main__:get_accelerate_model:350 - adding LoRA modules...
17
+ 2023-10-09 12:24:52.367 | INFO | __main__:train:942 - loaded model
18
+ 2023-10-09 12:24:52.467 | INFO | __main__:format_dataset:639 - ---------- Formatting dataset for Airoboros. ----------
19
+ 2023-10-09 12:24:52.478 | INFO | __main__:make_data_module:734 - ---------- Splitting dataset into train/eval ----------
20
+ 2023-10-09 12:24:52.523 | INFO | __main__:train:985 - *** Train ***
21
+ 2023-10-09 12:24:53.833 | INFO | __main__:train:942 - loaded model
22
+ 2023-10-09 12:24:53.932 | INFO | __main__:format_dataset:639 - ---------- Formatting dataset for Airoboros. ----------
23
+ 2023-10-09 12:24:53.942 | INFO | __main__:make_data_module:734 - ---------- Splitting dataset into train/eval ----------
24
+ 2023-10-09 12:24:53.987 | INFO | __main__:train:985 - *** Train ***
25
+ 2023-10-09 13:02:59.195 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
26
+ 2023-10-09 13:03:00.511 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
27
+ 2023-10-09 13:40:57.489 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
28
+ 2023-10-09 13:40:59.007 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
29
+ 2023-10-09 14:18:56.508 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
30
+ 2023-10-09 14:18:57.522 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
31
+ 2023-10-09 14:56:40.535 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
32
+ 2023-10-09 14:56:41.789 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
33
+ 2023-10-09 15:34:06.992 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
34
+ 2023-10-09 15:34:08.286 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
35
+ 2023-10-09 16:12:02.503 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
36
+ 2023-10-09 16:12:03.732 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
37
+ 2023-10-09 16:49:21.174 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
38
+ 2023-10-09 16:49:22.504 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
39
+ 2023-10-09 17:26:54.676 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
40
+ 2023-10-09 17:26:55.668 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
41
+ 2023-10-09 18:04:46.740 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
42
+ 2023-10-09 18:04:47.949 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
43
+ 2023-10-09 18:42:04.644 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
44
+ 2023-10-09 18:42:05.914 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
45
+ 2023-10-09 19:19:37.187 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
46
+ 2023-10-09 19:19:38.502 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
47
+ 2023-10-09 19:56:57.213 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
48
+ 2023-10-09 19:56:58.406 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
49
+ 2023-10-09 20:34:41.712 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
50
+ 2023-10-09 20:34:43.045 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
51
+ 2023-10-09 21:12:11.382 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
52
+ 2023-10-09 21:12:12.669 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
53
+ 2023-10-09 21:49:45.432 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
54
+ 2023-10-09 21:49:46.721 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
55
+ 2023-10-09 22:27:13.362 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
56
+ 2023-10-09 22:27:14.775 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
57
+ 2023-10-09 23:04:40.350 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
58
+ 2023-10-09 23:04:41.734 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
59
+ 2023-10-09 23:43:00.849 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
60
+ 2023-10-09 23:43:01.897 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
61
+ 2023-10-10 00:20:56.083 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
62
+ 2023-10-10 00:20:57.315 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
63
+ 2023-10-10 00:58:45.672 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
64
+ 2023-10-10 00:58:46.917 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
65
+ 2023-10-10 01:36:16.111 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
66
+ 2023-10-10 01:36:17.107 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
67
+ 2023-10-10 02:14:04.076 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
68
+ 2023-10-10 02:14:05.116 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
69
+ 2023-10-10 02:51:35.732 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
70
+ 2023-10-10 02:51:37.116 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
71
+ 2023-10-10 03:29:10.549 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
72
+ 2023-10-10 03:29:11.807 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
73
+ 2023-10-10 04:07:10.597 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
74
+ 2023-10-10 04:07:12.121 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
75
+ 2023-10-10 04:45:13.080 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
76
+ 2023-10-10 04:45:14.388 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
77
+ 2023-10-10 05:22:29.236 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
78
+ 2023-10-10 05:22:30.313 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
79
+ 2023-10-10 06:00:01.444 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
80
+ 2023-10-10 06:00:02.697 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
81
+ 2023-10-10 06:37:56.658 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
82
+ 2023-10-10 06:37:57.703 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
83
+ 2023-10-10 07:15:19.303 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
84
+ 2023-10-10 07:15:20.460 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
85
+ 2023-10-10 07:49:44.958 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
86
+ 2023-10-10 07:49:44.959 | INFO | __main__:save_model:262 - Saving PEFT checkpoint...
87
+ 2023-10-10 07:49:45.497 | INFO | __main__:train:996 - *** Evaluate ***
88
+ 2023-10-10 07:49:45.564 | INFO | __main__:train:996 - *** Evaluate ***
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"run_name": "20231009-122305", "train_runtime": 69889.4305, "train_samples_per_second": 5.664, "train_steps_per_second": 0.044, "train_loss": 0.5890884618112218, "epoch": 2.0, "eval_loss": 0.5871890783309937, "eval_runtime": 15.5979, "eval_samples_per_second": 12.822, "eval_steps_per_second": 6.411}
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/runs/Oct09_06-19-22_I156f1f3f410070163e/events.out.tfevents.1696832467.I156f1f3f410070163e.7536.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2462cbdd451f1620c45fdc00446bef4939780801d8fc5937b775dd515dee8c8
3
+ size 7869
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/runs/Oct09_12-23-12_I156f1f3f410070163e/events.out.tfevents.1696854295.I156f1f3f410070163e.1701.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ed40796c7e6086dc6fed30704e34ee1f93696be1510cda6f81cd0b162e7b03c
3
+ size 498794
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/runs/Oct09_12-23-12_I156f1f3f410070163e/events.out.tfevents.1696924201.I156f1f3f410070163e.1701.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be1c5672e1dd6fd08655f555c268b13123ff736c6281a9bbef2fc2dd916bb156
3
+ size 359
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "train_loss": 0.5890884618112218,
4
+ "train_runtime": 69889.4305,
5
+ "train_samples_per_second": 5.664,
6
+ "train_steps_per_second": 0.044
7
+ }
tasks/speechless-tora-code-7b-v1.0/outputs/speechless-tora-code-7b-v1.0/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
tasks/speechless-tora-code-7b-v1.0/run_api_server.sh ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # From speechless-code-mistral-7b-v1.0
3
+ SCRIPT_PATH=$(cd $(dirname ${BASH_SOURCE[0]}); pwd)
4
+ PARENT_PATH=$(cd "${SCRIPT_PATH}/.." ; pwd)
5
+
6
+ source ${SCRIPT_PATH}/task.env
7
+
8
+ PYTHONPATH=${SCRIPT_PATH}/../../.. \
9
+ python ../../api/server.py \
10
+ --model_name_or_path=${TEST_MODEL_PATH} \
11
+ --model_family vllm
tasks/speechless-tora-code-7b-v1.0/run_finetune.sh ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # From speechless-code-mistral-7b-v1.0
3
+ SCRIPT_PATH=$(cd $(dirname ${BASH_SOURCE[0]}); pwd)
4
+ PARENT_PATH=$(cd "${SCRIPT_PATH}/.." ; pwd)
5
+
6
+ source ${SCRIPT_PATH}/task.env
7
+
8
+ PYTHONPATH=${PWD}/../.. \
9
+ torchrun --nnodes=1 --nproc_per_node=${NUM_GPUS} \
10
+ ../../finetune.py \
11
+ ${DEEPSPEED_STAGE2} \
12
+ --task_name ${TASK_NAME} \
13
+ --run_name $(date +%Y%m%d-%H%M%S) \
14
+ --model_name_or_path ${BASE_MODEL_PATH} \
15
+ --output_dir ${OUTPUT_DIR} \
16
+ --num_train_epochs ${NUM_TRAIN_EPOCHS} \
17
+ --data_seed 10042 \
18
+ --save_strategy steps \
19
+ --save_total_limit 1 \
20
+ --evaluation_strategy steps \
21
+ --eval_dataset_size ${EVAL_DATASET_SIZE} \
22
+ --save_steps 100 \
23
+ --eval_steps 100 \
24
+ --warmup_steps 20 \
25
+ --max_eval_samples 200 \
26
+ --dataloader_num_workers 3 \
27
+ --logging_strategy steps \
28
+ --logging_steps 1 \
29
+ --report_to tensorboard \
30
+ --remove_unused_columns False \
31
+ --do_train \
32
+ --max_memory_MB ${MAX_MEMORY_MB} \
33
+ --bits 4 \
34
+ --lora_r ${LORA_R} \
35
+ --lora_alpha 16 \
36
+ --lora_dropout 0.05 \
37
+ --lora_modules all \
38
+ --double_quant \
39
+ --quant_type nf4 \
40
+ --bf16 \
41
+ --dataset ${DATASET} \
42
+ --dataset_format ${DATASET_FORMAT} \
43
+ --max_new_tokens 4096 \
44
+ --model_max_len 4096 \
45
+ --per_device_train_batch_size ${PER_DEVICE_TRAIN_BATCH_SIZE} \
46
+ --gradient_accumulation_steps ${GRADIENT_ACCUMULATION_STEPS} \
47
+ --per_device_eval_batch_size 1 \
48
+ --learning_rate ${LEARNING_RATE} \
49
+ --lr_scheduler_type cosine \
50
+ --weight_decay 0.0 \
51
+ --seed 10042 \
52
+ --optim paged_adamw_8bit \
53
+ --gradient_checkpointing True \
54
+ --group_by_length ${GROUP_BY_LENGTH} \
55
+ --ddp_find_unused_parameters False \
56
+ --force_remove_overlength_samples False \
57
+ --flash_attention True \
58
+ --rerope False \
59
+ --repeat_steps 0
tasks/speechless-tora-code-7b-v1.0/task.env ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # From speechless-code-mistral-7b-v1.0
2
+ # -------------------- Model --------------------
3
+ export MODELS_ROOT_DIR=/opt/local/llm_models/huggingface.co
4
+ # FIXME
5
+ export BASE_MODEL_PATH=${MODELS_ROOT_DIR}/llm_agents/tora-code-7b-v1.0
6
+ # FIXME
7
+ export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-tora-code-7b-v1.0
8
+ # export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-nl2sql-mistral-7b-v0.1
9
+ # export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-mistral-7b-v0.1
10
+
11
+ # -------------------- Dataset --------------------
12
+ # FIXME
13
+ # export DATASET=/opt/local/datasets/jondurbin/airoboros-2.2/instructions-clean.jsonl
14
+ # export DATASET=/opt/local/datasets/Speechless/airoboros-orca-platypus-instructions.jsonl
15
+ # export DATASET=/opt/local/datasets/Speechless/speechless-spider.jsonl
16
+ export DATASET=/opt/local/datasets/Speechless/speechless-thoughts-200k.jsonl
17
+
18
+ # -------------------- Environment --------------------
19
+ export OUTPUT_DIR=./outputs
20
+ # export TORCH_DISTRIBUTED_DEBUG=DETAIL
21
+ export RAY_memory_monitor_refresh_ms=0
22
+
23
+ # -------------------- Task --------------------
24
+ # FIXME
25
+ export TASK_NAME=$(basename ${TEST_MODEL_PATH})
26
+ export TASK_CHECKPOINT_DIR=${OUTPUT_DIR}/${TASK_NAME}
27
+ export WANDB_PROJECT=${TASK_NAME}
28
+
29
+ # -------------------- Task --------------------
30
+ export DATASET_FORMAT=airoboros
31
+ export NUM_TRAIN_EPOCHS=2
32
+ export EVAL_DATASET_SIZE=0.005
33
+ export LEARNING_RATE=2e-4
34
+ export LORA_R=64
35
+ export GROUP_BY_LENGTH=False
36
+
37
+ #export DEEPSPEED_STAGE2="--deepspeed deepspeed-stage2.json"
38
+
39
+ export NUM_GPUS=2
40
+ # 2 gpus: 40GB: 2 x 32, 80GB: 4 x 16
41
+ # 4 gpus: 40GB: 2 x 16, 80GB: 4 x 8
42
+ export PER_DEVICE_TRAIN_BATCH_SIZE=4
43
+ export GRADIENT_ACCUMULATION_STEPS=16
44
+
45
+ # No more than 85% VRAM.
46
+ # A100(40GB) 34000, A40(48GB) 40000, A100(80GB) 70000
47
+ export MAX_MEMORY_MB=32000
tasks/speechless-tora-code-7b-v1.0/task_a100_40gx2.env ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # From speechless-code-mistral-orca-7b-v1.0
2
+ # -------------------- Model --------------------
3
+ export MODELS_ROOT_DIR=/opt/local/llm_models/huggingface.co
4
+ # FIXME
5
+ export BASE_MODEL_PATH=${MODELS_ROOT_DIR}/Open-Orca/Mistral-7B-OpenOrca
6
+ # export BASE_MODEL_PATH=${MODELS_ROOT_DIR}/mistralai/Mistral-7B-v0.1
7
+ # FIXME
8
+ export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-code-mistral-orca-7b-v1.0
9
+ # export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-code-mistral-7b-v1.0
10
+ # export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-nl2sql-mistral-7b-v0.1
11
+ # export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-mistral-7b-v0.1
12
+
13
+ # -------------------- Dataset --------------------
14
+ # FIXME
15
+ # export DATASET=/opt/local/datasets/jondurbin/airoboros-2.2/instructions-clean.jsonl
16
+ # export DATASET=/opt/local/datasets/Speechless/airoboros-orca-platypus-instructions.jsonl
17
+ # export DATASET=/opt/local/datasets/Speechless/speechless-spider.jsonl
18
+ export DATASET=/opt/local/datasets/Speechless/speechless-thoughts-200k.jsonl
19
+
20
+ # -------------------- Environment --------------------
21
+ export OUTPUT_DIR=./outputs
22
+ export TORCH_DISTRIBUTED_DEBUG=DETAIL
23
+ export RAY_memory_monitor_refresh_ms=0
24
+
25
+ # -------------------- Task --------------------
26
+ # FIXME
27
+ export TASK_NAME=$(basename ${TEST_MODEL_PATH})
28
+ export TASK_CHECKPOINT_DIR=${OUTPUT_DIR}/${TASK_NAME}
29
+ export WANDB_PROJECT=${TASK_NAME}
30
+
31
+ # -------------------- Task --------------------
32
+ export DATASET_FORMAT=airoboros
33
+ export NUM_TRAIN_EPOCHS=2
34
+ export EVAL_DATASET_SIZE=0.005
35
+ export LEARNING_RATE=2e-4
36
+ export LORA_R=64
37
+ export GROUP_BY_LENGTH=False
38
+
39
+ #export DEEPSPEED_STAGE2="--deepspeed deepspeed-stage2.json"
40
+
41
+ export NUM_GPUS=2
42
+ # 2 gpus: 40GB: 2 x 32, 80GB: 4 x 16
43
+ # 4 gpus: 40GB: 2 x 16, 80GB: 4 x 8
44
+ export PER_DEVICE_TRAIN_BATCH_SIZE=2
45
+ export GRADIENT_ACCUMULATION_STEPS=32
46
+
47
+ # No more than 85% VRAM.
48
+ # A100(40GB) 34000, A40(48GB) 40000, A100(80GB) 70000
49
+ export MAX_MEMORY_MB=34000
tasks/speechless-tora-code-7b-v1.0/task_a100_40gx4.env ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # From speechless-code-mistral-orca-7b-v1.0
2
+ # -------------------- Model --------------------
3
+ export MODELS_ROOT_DIR=/opt/local/llm_models/huggingface.co
4
+ # FIXME
5
+ export BASE_MODEL_PATH=${MODELS_ROOT_DIR}/Open-Orca/Mistral-7B-OpenOrca
6
+ # export BASE_MODEL_PATH=${MODELS_ROOT_DIR}/mistralai/Mistral-7B-v0.1
7
+ # FIXME
8
+ export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-code-mistral-orca-7b-v1.0
9
+ # export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-code-mistral-7b-v1.0
10
+ # export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-nl2sql-mistral-7b-v0.1
11
+ # export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-mistral-7b-v0.1
12
+
13
+ # -------------------- Dataset --------------------
14
+ # FIXME
15
+ # export DATASET=/opt/local/datasets/jondurbin/airoboros-2.2/instructions-clean.jsonl
16
+ # export DATASET=/opt/local/datasets/Speechless/airoboros-orca-platypus-instructions.jsonl
17
+ # export DATASET=/opt/local/datasets/Speechless/speechless-spider.jsonl
18
+ export DATASET=/opt/local/datasets/Speechless/speechless-thoughts-200k.jsonl
19
+
20
+ # -------------------- Environment --------------------
21
+ export OUTPUT_DIR=./outputs
22
+ export TORCH_DISTRIBUTED_DEBUG=DETAIL
23
+ export RAY_memory_monitor_refresh_ms=0
24
+
25
+ # -------------------- Task --------------------
26
+ # FIXME
27
+ export TASK_NAME=$(basename ${TEST_MODEL_PATH})
28
+ export TASK_CHECKPOINT_DIR=${OUTPUT_DIR}/${TASK_NAME}
29
+ export WANDB_PROJECT=${TASK_NAME}
30
+
31
+ # -------------------- Task --------------------
32
+ export DATASET_FORMAT=airoboros
33
+ export NUM_TRAIN_EPOCHS=2
34
+ export EVAL_DATASET_SIZE=0.005
35
+ export LEARNING_RATE=2e-4
36
+ export LORA_R=64
37
+ export GROUP_BY_LENGTH=False
38
+
39
+ #export DEEPSPEED_STAGE2="--deepspeed deepspeed-stage2.json"
40
+
41
+ export NUM_GPUS=4
42
+ # 2 gpus: 40GB: 2 x 32, 80GB: 4 x 16
43
+ # 4 gpus: 40GB: 2 x 16, 80GB: 4 x 8
44
+ export PER_DEVICE_TRAIN_BATCH_SIZE=2
45
+ export GRADIENT_ACCUMULATION_STEPS=16
46
+
47
+ # No more than 85% VRAM.
48
+ # A100(40GB) 34000, A40(48GB) 40000, A100(80GB) 70000
49
+ export MAX_MEMORY_MB=34000
tasks/speechless-tora-code-7b-v1.0/task_a100_80gx2.env ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # From speechless-code-mistral-7b-v1.0
2
+ # -------------------- Model --------------------
3
+ export MODELS_ROOT_DIR=/opt/local/llm_models/huggingface.co
4
+ # FIXME
5
+ export BASE_MODEL_PATH=${MODELS_ROOT_DIR}/mistralai/Mistral-7B-v0.1
6
+ # FIXME
7
+ export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-code-mistral-7b-v1.0
8
+ # export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-nl2sql-mistral-7b-v0.1
9
+ # export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-mistral-7b-v0.1
10
+
11
+ # -------------------- Dataset --------------------
12
+ # FIXME
13
+ # export DATASET=/opt/local/datasets/jondurbin/airoboros-2.2/instructions-clean.jsonl
14
+ # export DATASET=/opt/local/datasets/Speechless/airoboros-orca-platypus-instructions.jsonl
15
+ # export DATASET=/opt/local/datasets/Speechless/speechless-spider.jsonl
16
+ export DATASET=/opt/local/datasets/Speechless/speechless-thoughts-200k.jsonl
17
+
18
+ # -------------------- Environment --------------------
19
+ export OUTPUT_DIR=./outputs
20
+ export TORCH_DISTRIBUTED_DEBUG=DETAIL
21
+ export RAY_memory_monitor_refresh_ms=0
22
+
23
+ # -------------------- Task --------------------
24
+ # FIXME
25
+ export TASK_NAME=$(basename ${TEST_MODEL_PATH})
26
+ export TASK_CHECKPOINT_DIR=${OUTPUT_DIR}/${TASK_NAME}
27
+ export WANDB_PROJECT=${TASK_NAME}
28
+
29
+ # -------------------- Task --------------------
30
+ export DATASET_FORMAT=airoboros
31
+ export NUM_TRAIN_EPOCHS=2
32
+ export EVAL_DATASET_SIZE=0.005
33
+ export LEARNING_RATE=2e-4
34
+ export LORA_R=64
35
+ export GROUP_BY_LENGTH=False
36
+
37
+ #export DEEPSPEED_STAGE2="--deepspeed deepspeed-stage2.json"
38
+
39
+ export NUM_GPUS=2
40
+ # 2 gpus: 40GB: 2 x 32, 80GB: 4 x 16
41
+ # 4 gpus: 40GB: 2 x 16, 80GB: 4 x 8
42
+ export PER_DEVICE_TRAIN_BATCH_SIZE=2
43
+ export GRADIENT_ACCUMULATION_STEPS=32
44
+
45
+ # No more than 85% VRAM.
46
+ # A100(40GB) 34000, A40(48GB) 40000, A100(80GB) 70000
47
+ export MAX_MEMORY_MB=70000
tasks/speechless-tora-code-7b-v1.0/task_a40_48gx2.env ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # From speechless-code-mistral-7b-v1.0
2
+ # -------------------- Model --------------------
3
+ export MODELS_ROOT_DIR=/opt/local/llm_models/huggingface.co
4
+ # FIXME
5
+ export BASE_MODEL_PATH=${MODELS_ROOT_DIR}/mistralai/Mistral-7B-v0.1
6
+ # FIXME
7
+ export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-code-mistral-7b-v1.0
8
+ # export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-nl2sql-mistral-7b-v0.1
9
+ # export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-mistral-7b-v0.1
10
+
11
+ # -------------------- Dataset --------------------
12
+ # FIXME
13
+ # export DATASET=/opt/local/datasets/jondurbin/airoboros-2.2/instructions-clean.jsonl
14
+ # export DATASET=/opt/local/datasets/Speechless/airoboros-orca-platypus-instructions.jsonl
15
+ # export DATASET=/opt/local/datasets/Speechless/speechless-spider.jsonl
16
+ export DATASET=/opt/local/datasets/Speechless/speechless-thoughts-200k.jsonl
17
+
18
+ # -------------------- Environment --------------------
19
+ export OUTPUT_DIR=./outputs
20
+ export TORCH_DISTRIBUTED_DEBUG=DETAIL
21
+ export RAY_memory_monitor_refresh_ms=0
22
+
23
+ # -------------------- Task --------------------
24
+ # FIXME
25
+ export TASK_NAME=$(basename ${TEST_MODEL_PATH})
26
+ export TASK_CHECKPOINT_DIR=${OUTPUT_DIR}/${TASK_NAME}
27
+ export WANDB_PROJECT=${TASK_NAME}
28
+
29
+ # -------------------- Task --------------------
30
+ export DATASET_FORMAT=airoboros
31
+ export NUM_TRAIN_EPOCHS=2
32
+ export EVAL_DATASET_SIZE=0.005
33
+ export LEARNING_RATE=2e-4
34
+ export LORA_R=64
35
+ export GROUP_BY_LENGTH=False
36
+
37
+ #export DEEPSPEED_STAGE2="--deepspeed deepspeed-stage2.json"
38
+
39
+ export NUM_GPUS=2
40
+ # 2 gpus: 40GB: 2 x 32, 80GB: 4 x 16
41
+ # 4 gpus: 40GB: 2 x 16, 80GB: 4 x 8
42
+ export PER_DEVICE_TRAIN_BATCH_SIZE=2
43
+ export GRADIENT_ACCUMULATION_STEPS=32
44
+
45
+ # No more than 85% VRAM.
46
+ # A100(40GB) 34000, A40(48GB) 40000, A100(80GB) 70000
47
+ export MAX_MEMORY_MB=40000
tasks/speechless-tora-code-7b-v1.0/task_a40_48gx4.env ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # From speechless-code-mistral-7b-v1.0
2
+ # -------------------- Model --------------------
3
+ export MODELS_ROOT_DIR=/opt/local/llm_models/huggingface.co
4
+ # FIXME
5
+ export BASE_MODEL_PATH=${MODELS_ROOT_DIR}/mistralai/Mistral-7B-v0.1
6
+ # FIXME
7
+ export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-code-mistral-7b-v1.0
8
+ # export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-nl2sql-mistral-7b-v0.1
9
+ # export TEST_MODEL_PATH=${MODELS_ROOT_DIR}/speechlessai/speechless-mistral-7b-v0.1
10
+
11
+ # -------------------- Dataset --------------------
12
+ # FIXME
13
+ # export DATASET=/opt/local/datasets/jondurbin/airoboros-2.2/instructions-clean.jsonl
14
+ # export DATASET=/opt/local/datasets/Speechless/airoboros-orca-platypus-instructions.jsonl
15
+ # export DATASET=/opt/local/datasets/Speechless/speechless-spider.jsonl
16
+ export DATASET=/opt/local/datasets/Speechless/speechless-thoughts-200k.jsonl
17
+
18
+ # -------------------- Environment --------------------
19
+ export OUTPUT_DIR=./outputs
20
+ export TORCH_DISTRIBUTED_DEBUG=DETAIL
21
+ export RAY_memory_monitor_refresh_ms=0
22
+
23
+ # -------------------- Task --------------------
24
+ # FIXME
25
+ export TASK_NAME=$(basename ${TEST_MODEL_PATH})
26
+ export TASK_CHECKPOINT_DIR=${OUTPUT_DIR}/${TASK_NAME}
27
+ export WANDB_PROJECT=${TASK_NAME}
28
+
29
+ # -------------------- Task --------------------
30
+ export DATASET_FORMAT=airoboros
31
+ export NUM_TRAIN_EPOCHS=2
32
+ export EVAL_DATASET_SIZE=0.005
33
+ export LEARNING_RATE=2e-4
34
+ export LORA_R=64
35
+ export GROUP_BY_LENGTH=False
36
+
37
+ #export DEEPSPEED_STAGE2="--deepspeed deepspeed-stage2.json"
38
+
39
+ export NUM_GPUS=4
40
+ # 2 gpus: 40GB: 2 x 32, 80GB: 4 x 16
41
+ # 4 gpus: 40GB: 2 x 16, 80GB: 4 x 8
42
+ export PER_DEVICE_TRAIN_BATCH_SIZE=2
43
+ export GRADIENT_ACCUMULATION_STEPS=16
44
+
45
+ # No more than 85% VRAM.
46
+ # A100(40GB) 34000, A40(48GB) 40000, A100(80GB) 70000
47
+ export MAX_MEMORY_MB=40000
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723