inflaton commited on
Commit
3d8ab56
1 Parent(s): 55b5cbe

finished finetuning of phi-3.5-mini

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. llama-factory/saves/Phi-3.5-mini-instruct/README.md +69 -0
  2. llama-factory/saves/Phi-3.5-mini-instruct/adapter_config.json +31 -0
  3. llama-factory/saves/Phi-3.5-mini-instruct/adapter_model.safetensors +3 -0
  4. llama-factory/saves/Phi-3.5-mini-instruct/added_tokens.json +13 -0
  5. llama-factory/saves/Phi-3.5-mini-instruct/all_results.json +12 -0
  6. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/README.md +202 -0
  7. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/adapter_config.json +31 -0
  8. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/adapter_model.safetensors +3 -0
  9. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/added_tokens.json +13 -0
  10. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/optimizer.pt +3 -0
  11. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/rng_state.pth +3 -0
  12. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/scheduler.pt +3 -0
  13. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/special_tokens_map.json +30 -0
  14. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/tokenizer.json +0 -0
  15. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/tokenizer.model +3 -0
  16. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/tokenizer_config.json +132 -0
  17. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/trainer_state.json +204 -0
  18. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/training_args.bin +3 -0
  19. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/README.md +202 -0
  20. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/adapter_config.json +31 -0
  21. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/adapter_model.safetensors +3 -0
  22. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/added_tokens.json +13 -0
  23. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/optimizer.pt +3 -0
  24. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/rng_state.pth +3 -0
  25. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/scheduler.pt +3 -0
  26. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/special_tokens_map.json +30 -0
  27. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/tokenizer.json +0 -0
  28. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/tokenizer.model +3 -0
  29. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/tokenizer_config.json +132 -0
  30. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/trainer_state.json +261 -0
  31. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/training_args.bin +3 -0
  32. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/README.md +202 -0
  33. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/adapter_config.json +31 -0
  34. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/adapter_model.safetensors +3 -0
  35. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/added_tokens.json +13 -0
  36. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/optimizer.pt +3 -0
  37. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/rng_state.pth +3 -0
  38. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/scheduler.pt +3 -0
  39. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/special_tokens_map.json +30 -0
  40. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/tokenizer.json +0 -0
  41. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/tokenizer.model +3 -0
  42. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/tokenizer_config.json +132 -0
  43. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/trainer_state.json +318 -0
  44. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/training_args.bin +3 -0
  45. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-210/README.md +202 -0
  46. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-210/adapter_config.json +31 -0
  47. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-210/adapter_model.safetensors +3 -0
  48. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-210/added_tokens.json +13 -0
  49. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-210/optimizer.pt +3 -0
  50. llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-210/rng_state.pth +3 -0
llama-factory/saves/Phi-3.5-mini-instruct/README.md ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: microsoft/Phi-3.5-mini-instruct
3
+ library_name: peft
4
+ license: other
5
+ tags:
6
+ - llama-factory
7
+ - lora
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: Phi-3.5-mini-instruct
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # Phi-3.5-mini-instruct
18
+
19
+ This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on the alpaca_mac dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 1.6644
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 0.0001
41
+ - train_batch_size: 16
42
+ - eval_batch_size: 1
43
+ - seed: 42
44
+ - gradient_accumulation_steps: 8
45
+ - total_train_batch_size: 128
46
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
+ - lr_scheduler_type: cosine
48
+ - lr_scheduler_warmup_ratio: 0.1
49
+ - num_epochs: 6.0
50
+
51
+ ### Training results
52
+
53
+ | Training Loss | Epoch | Step | Validation Loss |
54
+ |:-------------:|:------:|:----:|:---------------:|
55
+ | 1.9221 | 0.9964 | 35 | 1.9123 |
56
+ | 1.7521 | 1.9929 | 70 | 1.7631 |
57
+ | 1.6831 | 2.9893 | 105 | 1.6914 |
58
+ | 1.5566 | 3.9858 | 140 | 1.6649 |
59
+ | 1.562 | 4.9822 | 175 | 1.6638 |
60
+ | 1.5573 | 5.9786 | 210 | 1.6644 |
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - PEFT 0.11.1
66
+ - Transformers 4.43.3
67
+ - Pytorch 2.4.0+cu121
68
+ - Datasets 2.19.1
69
+ - Tokenizers 0.19.1
llama-factory/saves/Phi-3.5-mini-instruct/adapter_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "microsoft/Phi-3.5-mini-instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "qkv_proj",
24
+ "down_proj",
25
+ "gate_up_proj",
26
+ "o_proj"
27
+ ],
28
+ "task_type": "CAUSAL_LM",
29
+ "use_dora": false,
30
+ "use_rslora": false
31
+ }
llama-factory/saves/Phi-3.5-mini-instruct/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2fef064b69976e06a1f7d030039fda5b1893feb02f1d92877cfa71069226acb
3
+ size 50365768
llama-factory/saves/Phi-3.5-mini-instruct/added_tokens.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|assistant|>": 32001,
3
+ "<|endoftext|>": 32000,
4
+ "<|end|>": 32007,
5
+ "<|placeholder1|>": 32002,
6
+ "<|placeholder2|>": 32003,
7
+ "<|placeholder3|>": 32004,
8
+ "<|placeholder4|>": 32005,
9
+ "<|placeholder5|>": 32008,
10
+ "<|placeholder6|>": 32009,
11
+ "<|system|>": 32006,
12
+ "<|user|>": 32010
13
+ }
llama-factory/saves/Phi-3.5-mini-instruct/all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.9786476868327405,
3
+ "eval_loss": 1.6644203662872314,
4
+ "eval_runtime": 2.6994,
5
+ "eval_samples_per_second": 17.041,
6
+ "eval_steps_per_second": 17.041,
7
+ "total_flos": 1.7533662523283866e+17,
8
+ "train_loss": 1.8585307757059732,
9
+ "train_runtime": 2890.6103,
10
+ "train_samples_per_second": 9.303,
11
+ "train_steps_per_second": 0.073
12
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: microsoft/Phi-3.5-mini-instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.11.1
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/adapter_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "microsoft/Phi-3.5-mini-instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "qkv_proj",
24
+ "down_proj",
25
+ "gate_up_proj",
26
+ "o_proj"
27
+ ],
28
+ "task_type": "CAUSAL_LM",
29
+ "use_dora": false,
30
+ "use_rslora": false
31
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a4606819c7f656a215e71778f3af33eec0e6b70d2413844063095cc60ce703b
3
+ size 50365768
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/added_tokens.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|assistant|>": 32001,
3
+ "<|endoftext|>": 32000,
4
+ "<|end|>": 32007,
5
+ "<|placeholder1|>": 32002,
6
+ "<|placeholder2|>": 32003,
7
+ "<|placeholder3|>": 32004,
8
+ "<|placeholder4|>": 32005,
9
+ "<|placeholder5|>": 32008,
10
+ "<|placeholder6|>": 32009,
11
+ "<|system|>": 32006,
12
+ "<|user|>": 32010
13
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62cd92b5ca96c038b57c5ebe85141fdd3bda27b0900eff9cf01aa255235d05d6
3
+ size 100878458
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c062f7f375beded48b5337f5a3f3a5cb38807fa3e85dbf3e294c0ab6b627bfc2
3
+ size 14244
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f884e1907aa5818c277b83d03af573102a4aa6924b1853efad0972d9cae963a5
3
+ size 1064
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/tokenizer_config.json ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": true,
27
+ "single_word": false,
28
+ "special": false
29
+ },
30
+ "32000": {
31
+ "content": "<|endoftext|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32001": {
39
+ "content": "<|assistant|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": true,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "32002": {
47
+ "content": "<|placeholder1|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": true,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "32003": {
55
+ "content": "<|placeholder2|>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": true,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "32004": {
63
+ "content": "<|placeholder3|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": true,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "32005": {
71
+ "content": "<|placeholder4|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": true,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "32006": {
79
+ "content": "<|system|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": true,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "32007": {
87
+ "content": "<|end|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "32008": {
95
+ "content": "<|placeholder5|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": true,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "32009": {
103
+ "content": "<|placeholder6|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": true,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "32010": {
111
+ "content": "<|user|>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": true,
115
+ "single_word": false,
116
+ "special": true
117
+ }
118
+ },
119
+ "bos_token": "<s>",
120
+ "chat_template": "{{ '<s>' }}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|system|>\n' + system_message + '<|end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + content + '<|end|>\n<|assistant|>\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end|>' + '\n' }}{% endif %}{% endfor %}",
121
+ "clean_up_tokenization_spaces": false,
122
+ "eos_token": "<|end|>",
123
+ "legacy": false,
124
+ "model_max_length": 131072,
125
+ "pad_token": "<|endoftext|>",
126
+ "padding_side": "right",
127
+ "sp_model_kwargs": {},
128
+ "split_special_tokens": false,
129
+ "tokenizer_class": "LlamaTokenizer",
130
+ "unk_token": "<unk>",
131
+ "use_default_system_prompt": false
132
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/trainer_state.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.9893238434163703,
5
+ "eval_steps": 35,
6
+ "global_step": 105,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1423487544483986,
13
+ "grad_norm": 3.915436267852783,
14
+ "learning_rate": 2.380952380952381e-05,
15
+ "loss": 4.6057,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.2846975088967972,
20
+ "grad_norm": 4.926669597625732,
21
+ "learning_rate": 4.761904761904762e-05,
22
+ "loss": 4.3259,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.42704626334519574,
27
+ "grad_norm": 3.071687936782837,
28
+ "learning_rate": 7.142857142857143e-05,
29
+ "loss": 3.5782,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 0.5693950177935944,
34
+ "grad_norm": 1.5628787279129028,
35
+ "learning_rate": 9.523809523809524e-05,
36
+ "loss": 2.542,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 0.7117437722419929,
41
+ "grad_norm": 0.6052073240280151,
42
+ "learning_rate": 9.988952191691925e-05,
43
+ "loss": 2.0288,
44
+ "step": 25
45
+ },
46
+ {
47
+ "epoch": 0.8540925266903915,
48
+ "grad_norm": 0.4768967032432556,
49
+ "learning_rate": 9.944154131125642e-05,
50
+ "loss": 1.9283,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 0.99644128113879,
55
+ "grad_norm": 0.3471652865409851,
56
+ "learning_rate": 9.865224352899119e-05,
57
+ "loss": 1.9221,
58
+ "step": 35
59
+ },
60
+ {
61
+ "epoch": 0.99644128113879,
62
+ "eval_loss": 1.9122635126113892,
63
+ "eval_runtime": 2.7209,
64
+ "eval_samples_per_second": 16.906,
65
+ "eval_steps_per_second": 16.906,
66
+ "step": 35
67
+ },
68
+ {
69
+ "epoch": 1.1387900355871885,
70
+ "grad_norm": 0.3238619863986969,
71
+ "learning_rate": 9.752707744739145e-05,
72
+ "loss": 1.8485,
73
+ "step": 40
74
+ },
75
+ {
76
+ "epoch": 1.281138790035587,
77
+ "grad_norm": 0.3222252130508423,
78
+ "learning_rate": 9.607381059352038e-05,
79
+ "loss": 1.8223,
80
+ "step": 45
81
+ },
82
+ {
83
+ "epoch": 1.4234875444839858,
84
+ "grad_norm": 0.30783718824386597,
85
+ "learning_rate": 9.430247552150673e-05,
86
+ "loss": 1.7991,
87
+ "step": 50
88
+ },
89
+ {
90
+ "epoch": 1.5658362989323842,
91
+ "grad_norm": 0.33385950326919556,
92
+ "learning_rate": 9.22253005533154e-05,
93
+ "loss": 1.7761,
94
+ "step": 55
95
+ },
96
+ {
97
+ "epoch": 1.708185053380783,
98
+ "grad_norm": 0.3235575258731842,
99
+ "learning_rate": 8.985662536114613e-05,
100
+ "loss": 1.7781,
101
+ "step": 60
102
+ },
103
+ {
104
+ "epoch": 1.8505338078291815,
105
+ "grad_norm": 0.31781575083732605,
106
+ "learning_rate": 8.721280197423258e-05,
107
+ "loss": 1.7457,
108
+ "step": 65
109
+ },
110
+ {
111
+ "epoch": 1.99288256227758,
112
+ "grad_norm": 0.3225061297416687,
113
+ "learning_rate": 8.43120818934367e-05,
114
+ "loss": 1.7521,
115
+ "step": 70
116
+ },
117
+ {
118
+ "epoch": 1.99288256227758,
119
+ "eval_loss": 1.7631458044052124,
120
+ "eval_runtime": 2.707,
121
+ "eval_samples_per_second": 16.993,
122
+ "eval_steps_per_second": 16.993,
123
+ "step": 70
124
+ },
125
+ {
126
+ "epoch": 2.135231316725979,
127
+ "grad_norm": 0.3239762783050537,
128
+ "learning_rate": 8.117449009293668e-05,
129
+ "loss": 1.6471,
130
+ "step": 75
131
+ },
132
+ {
133
+ "epoch": 2.277580071174377,
134
+ "grad_norm": 0.348884642124176,
135
+ "learning_rate": 7.782168677883206e-05,
136
+ "loss": 1.7,
137
+ "step": 80
138
+ },
139
+ {
140
+ "epoch": 2.419928825622776,
141
+ "grad_norm": 0.3774774372577667,
142
+ "learning_rate": 7.427681785900761e-05,
143
+ "loss": 1.7183,
144
+ "step": 85
145
+ },
146
+ {
147
+ "epoch": 2.562277580071174,
148
+ "grad_norm": 0.39107823371887207,
149
+ "learning_rate": 7.056435515653059e-05,
150
+ "loss": 1.6825,
151
+ "step": 90
152
+ },
153
+ {
154
+ "epoch": 2.704626334519573,
155
+ "grad_norm": 0.5160859823226929,
156
+ "learning_rate": 6.670992746965938e-05,
157
+ "loss": 1.664,
158
+ "step": 95
159
+ },
160
+ {
161
+ "epoch": 2.8469750889679717,
162
+ "grad_norm": 0.4094185531139374,
163
+ "learning_rate": 6.274014364473274e-05,
164
+ "loss": 1.6662,
165
+ "step": 100
166
+ },
167
+ {
168
+ "epoch": 2.9893238434163703,
169
+ "grad_norm": 0.4102707803249359,
170
+ "learning_rate": 5.868240888334653e-05,
171
+ "loss": 1.6831,
172
+ "step": 105
173
+ },
174
+ {
175
+ "epoch": 2.9893238434163703,
176
+ "eval_loss": 1.691434621810913,
177
+ "eval_runtime": 2.7669,
178
+ "eval_samples_per_second": 16.625,
179
+ "eval_steps_per_second": 16.625,
180
+ "step": 105
181
+ }
182
+ ],
183
+ "logging_steps": 5,
184
+ "max_steps": 210,
185
+ "num_input_tokens_seen": 0,
186
+ "num_train_epochs": 6,
187
+ "save_steps": 35,
188
+ "stateful_callbacks": {
189
+ "TrainerControl": {
190
+ "args": {
191
+ "should_epoch_stop": false,
192
+ "should_evaluate": false,
193
+ "should_log": false,
194
+ "should_save": true,
195
+ "should_training_stop": false
196
+ },
197
+ "attributes": {}
198
+ }
199
+ },
200
+ "total_flos": 8.782483083883315e+16,
201
+ "train_batch_size": 16,
202
+ "trial_name": null,
203
+ "trial_params": null
204
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-105/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2bc05afc45cc2260e917f0e545fdbc53c8c1a9def7f26bb3cc9459a74257487
3
+ size 5368
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: microsoft/Phi-3.5-mini-instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.11.1
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/adapter_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "microsoft/Phi-3.5-mini-instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "qkv_proj",
24
+ "down_proj",
25
+ "gate_up_proj",
26
+ "o_proj"
27
+ ],
28
+ "task_type": "CAUSAL_LM",
29
+ "use_dora": false,
30
+ "use_rslora": false
31
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51269624a2f8ae34127497ef0a4bb36779c73fdebfeb97c1b2a486bd8be8b071
3
+ size 50365768
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/added_tokens.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|assistant|>": 32001,
3
+ "<|endoftext|>": 32000,
4
+ "<|end|>": 32007,
5
+ "<|placeholder1|>": 32002,
6
+ "<|placeholder2|>": 32003,
7
+ "<|placeholder3|>": 32004,
8
+ "<|placeholder4|>": 32005,
9
+ "<|placeholder5|>": 32008,
10
+ "<|placeholder6|>": 32009,
11
+ "<|system|>": 32006,
12
+ "<|user|>": 32010
13
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0450d108083f8443f5d1e2111c29c7aa6ef4db50061a445b86cb02b796acbb53
3
+ size 100878458
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9899ccda7f0d8d9511991180b93aab508ce6e8489de708c88ad1188e7e1d90d6
3
+ size 14244
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c79c4e0ef5874a405d3a7ead058cbb211b801c0b67fe721ad82a960e96528bf4
3
+ size 1064
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/tokenizer_config.json ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": true,
27
+ "single_word": false,
28
+ "special": false
29
+ },
30
+ "32000": {
31
+ "content": "<|endoftext|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32001": {
39
+ "content": "<|assistant|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": true,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "32002": {
47
+ "content": "<|placeholder1|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": true,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "32003": {
55
+ "content": "<|placeholder2|>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": true,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "32004": {
63
+ "content": "<|placeholder3|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": true,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "32005": {
71
+ "content": "<|placeholder4|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": true,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "32006": {
79
+ "content": "<|system|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": true,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "32007": {
87
+ "content": "<|end|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "32008": {
95
+ "content": "<|placeholder5|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": true,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "32009": {
103
+ "content": "<|placeholder6|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": true,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "32010": {
111
+ "content": "<|user|>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": true,
115
+ "single_word": false,
116
+ "special": true
117
+ }
118
+ },
119
+ "bos_token": "<s>",
120
+ "chat_template": "{{ '<s>' }}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|system|>\n' + system_message + '<|end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + content + '<|end|>\n<|assistant|>\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end|>' + '\n' }}{% endif %}{% endfor %}",
121
+ "clean_up_tokenization_spaces": false,
122
+ "eos_token": "<|end|>",
123
+ "legacy": false,
124
+ "model_max_length": 131072,
125
+ "pad_token": "<|endoftext|>",
126
+ "padding_side": "right",
127
+ "sp_model_kwargs": {},
128
+ "split_special_tokens": false,
129
+ "tokenizer_class": "LlamaTokenizer",
130
+ "unk_token": "<unk>",
131
+ "use_default_system_prompt": false
132
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/trainer_state.json ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.98576512455516,
5
+ "eval_steps": 35,
6
+ "global_step": 140,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1423487544483986,
13
+ "grad_norm": 3.915436267852783,
14
+ "learning_rate": 2.380952380952381e-05,
15
+ "loss": 4.6057,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.2846975088967972,
20
+ "grad_norm": 4.926669597625732,
21
+ "learning_rate": 4.761904761904762e-05,
22
+ "loss": 4.3259,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.42704626334519574,
27
+ "grad_norm": 3.071687936782837,
28
+ "learning_rate": 7.142857142857143e-05,
29
+ "loss": 3.5782,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 0.5693950177935944,
34
+ "grad_norm": 1.5628787279129028,
35
+ "learning_rate": 9.523809523809524e-05,
36
+ "loss": 2.542,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 0.7117437722419929,
41
+ "grad_norm": 0.6052073240280151,
42
+ "learning_rate": 9.988952191691925e-05,
43
+ "loss": 2.0288,
44
+ "step": 25
45
+ },
46
+ {
47
+ "epoch": 0.8540925266903915,
48
+ "grad_norm": 0.4768967032432556,
49
+ "learning_rate": 9.944154131125642e-05,
50
+ "loss": 1.9283,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 0.99644128113879,
55
+ "grad_norm": 0.3471652865409851,
56
+ "learning_rate": 9.865224352899119e-05,
57
+ "loss": 1.9221,
58
+ "step": 35
59
+ },
60
+ {
61
+ "epoch": 0.99644128113879,
62
+ "eval_loss": 1.9122635126113892,
63
+ "eval_runtime": 2.7209,
64
+ "eval_samples_per_second": 16.906,
65
+ "eval_steps_per_second": 16.906,
66
+ "step": 35
67
+ },
68
+ {
69
+ "epoch": 1.1387900355871885,
70
+ "grad_norm": 0.3238619863986969,
71
+ "learning_rate": 9.752707744739145e-05,
72
+ "loss": 1.8485,
73
+ "step": 40
74
+ },
75
+ {
76
+ "epoch": 1.281138790035587,
77
+ "grad_norm": 0.3222252130508423,
78
+ "learning_rate": 9.607381059352038e-05,
79
+ "loss": 1.8223,
80
+ "step": 45
81
+ },
82
+ {
83
+ "epoch": 1.4234875444839858,
84
+ "grad_norm": 0.30783718824386597,
85
+ "learning_rate": 9.430247552150673e-05,
86
+ "loss": 1.7991,
87
+ "step": 50
88
+ },
89
+ {
90
+ "epoch": 1.5658362989323842,
91
+ "grad_norm": 0.33385950326919556,
92
+ "learning_rate": 9.22253005533154e-05,
93
+ "loss": 1.7761,
94
+ "step": 55
95
+ },
96
+ {
97
+ "epoch": 1.708185053380783,
98
+ "grad_norm": 0.3235575258731842,
99
+ "learning_rate": 8.985662536114613e-05,
100
+ "loss": 1.7781,
101
+ "step": 60
102
+ },
103
+ {
104
+ "epoch": 1.8505338078291815,
105
+ "grad_norm": 0.31781575083732605,
106
+ "learning_rate": 8.721280197423258e-05,
107
+ "loss": 1.7457,
108
+ "step": 65
109
+ },
110
+ {
111
+ "epoch": 1.99288256227758,
112
+ "grad_norm": 0.3225061297416687,
113
+ "learning_rate": 8.43120818934367e-05,
114
+ "loss": 1.7521,
115
+ "step": 70
116
+ },
117
+ {
118
+ "epoch": 1.99288256227758,
119
+ "eval_loss": 1.7631458044052124,
120
+ "eval_runtime": 2.707,
121
+ "eval_samples_per_second": 16.993,
122
+ "eval_steps_per_second": 16.993,
123
+ "step": 70
124
+ },
125
+ {
126
+ "epoch": 2.135231316725979,
127
+ "grad_norm": 0.3239762783050537,
128
+ "learning_rate": 8.117449009293668e-05,
129
+ "loss": 1.6471,
130
+ "step": 75
131
+ },
132
+ {
133
+ "epoch": 2.277580071174377,
134
+ "grad_norm": 0.348884642124176,
135
+ "learning_rate": 7.782168677883206e-05,
136
+ "loss": 1.7,
137
+ "step": 80
138
+ },
139
+ {
140
+ "epoch": 2.419928825622776,
141
+ "grad_norm": 0.3774774372577667,
142
+ "learning_rate": 7.427681785900761e-05,
143
+ "loss": 1.7183,
144
+ "step": 85
145
+ },
146
+ {
147
+ "epoch": 2.562277580071174,
148
+ "grad_norm": 0.39107823371887207,
149
+ "learning_rate": 7.056435515653059e-05,
150
+ "loss": 1.6825,
151
+ "step": 90
152
+ },
153
+ {
154
+ "epoch": 2.704626334519573,
155
+ "grad_norm": 0.5160859823226929,
156
+ "learning_rate": 6.670992746965938e-05,
157
+ "loss": 1.664,
158
+ "step": 95
159
+ },
160
+ {
161
+ "epoch": 2.8469750889679717,
162
+ "grad_norm": 0.4094185531139374,
163
+ "learning_rate": 6.274014364473274e-05,
164
+ "loss": 1.6662,
165
+ "step": 100
166
+ },
167
+ {
168
+ "epoch": 2.9893238434163703,
169
+ "grad_norm": 0.4102707803249359,
170
+ "learning_rate": 5.868240888334653e-05,
171
+ "loss": 1.6831,
172
+ "step": 105
173
+ },
174
+ {
175
+ "epoch": 2.9893238434163703,
176
+ "eval_loss": 1.691434621810913,
177
+ "eval_runtime": 2.7669,
178
+ "eval_samples_per_second": 16.625,
179
+ "eval_steps_per_second": 16.625,
180
+ "step": 105
181
+ },
182
+ {
183
+ "epoch": 3.131672597864769,
184
+ "grad_norm": 0.41169798374176025,
185
+ "learning_rate": 5.456473555193242e-05,
186
+ "loss": 1.6085,
187
+ "step": 110
188
+ },
189
+ {
190
+ "epoch": 3.2740213523131674,
191
+ "grad_norm": 0.5937510132789612,
192
+ "learning_rate": 5.041554979980486e-05,
193
+ "loss": 1.6105,
194
+ "step": 115
195
+ },
196
+ {
197
+ "epoch": 3.416370106761566,
198
+ "grad_norm": 0.4580947160720825,
199
+ "learning_rate": 4.626349532067879e-05,
200
+ "loss": 1.6227,
201
+ "step": 120
202
+ },
203
+ {
204
+ "epoch": 3.5587188612099645,
205
+ "grad_norm": 0.4503268599510193,
206
+ "learning_rate": 4.213723561238074e-05,
207
+ "loss": 1.6202,
208
+ "step": 125
209
+ },
210
+ {
211
+ "epoch": 3.701067615658363,
212
+ "grad_norm": 0.5091418623924255,
213
+ "learning_rate": 3.806525609984312e-05,
214
+ "loss": 1.6178,
215
+ "step": 130
216
+ },
217
+ {
218
+ "epoch": 3.8434163701067616,
219
+ "grad_norm": 0.5050191283226013,
220
+ "learning_rate": 3.4075667487415785e-05,
221
+ "loss": 1.6023,
222
+ "step": 135
223
+ },
224
+ {
225
+ "epoch": 3.98576512455516,
226
+ "grad_norm": 0.502037525177002,
227
+ "learning_rate": 3.019601169804216e-05,
228
+ "loss": 1.5566,
229
+ "step": 140
230
+ },
231
+ {
232
+ "epoch": 3.98576512455516,
233
+ "eval_loss": 1.6648945808410645,
234
+ "eval_runtime": 2.7182,
235
+ "eval_samples_per_second": 16.923,
236
+ "eval_steps_per_second": 16.923,
237
+ "step": 140
238
+ }
239
+ ],
240
+ "logging_steps": 5,
241
+ "max_steps": 210,
242
+ "num_input_tokens_seen": 0,
243
+ "num_train_epochs": 6,
244
+ "save_steps": 35,
245
+ "stateful_callbacks": {
246
+ "TrainerControl": {
247
+ "args": {
248
+ "should_epoch_stop": false,
249
+ "should_evaluate": false,
250
+ "should_log": false,
251
+ "should_save": true,
252
+ "should_training_stop": false
253
+ },
254
+ "attributes": {}
255
+ }
256
+ },
257
+ "total_flos": 1.1700499098451968e+17,
258
+ "train_batch_size": 16,
259
+ "trial_name": null,
260
+ "trial_params": null
261
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-140/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2bc05afc45cc2260e917f0e545fdbc53c8c1a9def7f26bb3cc9459a74257487
3
+ size 5368
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: microsoft/Phi-3.5-mini-instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.11.1
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/adapter_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "microsoft/Phi-3.5-mini-instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "qkv_proj",
24
+ "down_proj",
25
+ "gate_up_proj",
26
+ "o_proj"
27
+ ],
28
+ "task_type": "CAUSAL_LM",
29
+ "use_dora": false,
30
+ "use_rslora": false
31
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f40ed6cbfc252c788230cd22d274978577d1ac65c5118e3a032380cd4c7cb592
3
+ size 50365768
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/added_tokens.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|assistant|>": 32001,
3
+ "<|endoftext|>": 32000,
4
+ "<|end|>": 32007,
5
+ "<|placeholder1|>": 32002,
6
+ "<|placeholder2|>": 32003,
7
+ "<|placeholder3|>": 32004,
8
+ "<|placeholder4|>": 32005,
9
+ "<|placeholder5|>": 32008,
10
+ "<|placeholder6|>": 32009,
11
+ "<|system|>": 32006,
12
+ "<|user|>": 32010
13
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22b824711115ea3cae98bd98854e2c2767ee4ed6906fd914f876eeaf397b7971
3
+ size 100878458
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d245e05e72192c132e0f2edb6fdcae0c578c890f0fe912f17ec7b0bba2d38cc3
3
+ size 14244
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:069778854988beebfb3f91f36fc6b48d3790c61557f2d344cec2869edb6cb5d9
3
+ size 1064
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/tokenizer_config.json ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": true,
27
+ "single_word": false,
28
+ "special": false
29
+ },
30
+ "32000": {
31
+ "content": "<|endoftext|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32001": {
39
+ "content": "<|assistant|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": true,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "32002": {
47
+ "content": "<|placeholder1|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": true,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "32003": {
55
+ "content": "<|placeholder2|>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": true,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "32004": {
63
+ "content": "<|placeholder3|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": true,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "32005": {
71
+ "content": "<|placeholder4|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": true,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "32006": {
79
+ "content": "<|system|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": true,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "32007": {
87
+ "content": "<|end|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "32008": {
95
+ "content": "<|placeholder5|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": true,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "32009": {
103
+ "content": "<|placeholder6|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": true,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "32010": {
111
+ "content": "<|user|>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": true,
115
+ "single_word": false,
116
+ "special": true
117
+ }
118
+ },
119
+ "bos_token": "<s>",
120
+ "chat_template": "{{ '<s>' }}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|system|>\n' + system_message + '<|end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + content + '<|end|>\n<|assistant|>\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end|>' + '\n' }}{% endif %}{% endfor %}",
121
+ "clean_up_tokenization_spaces": false,
122
+ "eos_token": "<|end|>",
123
+ "legacy": false,
124
+ "model_max_length": 131072,
125
+ "pad_token": "<|endoftext|>",
126
+ "padding_side": "right",
127
+ "sp_model_kwargs": {},
128
+ "split_special_tokens": false,
129
+ "tokenizer_class": "LlamaTokenizer",
130
+ "unk_token": "<unk>",
131
+ "use_default_system_prompt": false
132
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/trainer_state.json ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 4.98220640569395,
5
+ "eval_steps": 35,
6
+ "global_step": 175,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1423487544483986,
13
+ "grad_norm": 3.915436267852783,
14
+ "learning_rate": 2.380952380952381e-05,
15
+ "loss": 4.6057,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.2846975088967972,
20
+ "grad_norm": 4.926669597625732,
21
+ "learning_rate": 4.761904761904762e-05,
22
+ "loss": 4.3259,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.42704626334519574,
27
+ "grad_norm": 3.071687936782837,
28
+ "learning_rate": 7.142857142857143e-05,
29
+ "loss": 3.5782,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 0.5693950177935944,
34
+ "grad_norm": 1.5628787279129028,
35
+ "learning_rate": 9.523809523809524e-05,
36
+ "loss": 2.542,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 0.7117437722419929,
41
+ "grad_norm": 0.6052073240280151,
42
+ "learning_rate": 9.988952191691925e-05,
43
+ "loss": 2.0288,
44
+ "step": 25
45
+ },
46
+ {
47
+ "epoch": 0.8540925266903915,
48
+ "grad_norm": 0.4768967032432556,
49
+ "learning_rate": 9.944154131125642e-05,
50
+ "loss": 1.9283,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 0.99644128113879,
55
+ "grad_norm": 0.3471652865409851,
56
+ "learning_rate": 9.865224352899119e-05,
57
+ "loss": 1.9221,
58
+ "step": 35
59
+ },
60
+ {
61
+ "epoch": 0.99644128113879,
62
+ "eval_loss": 1.9122635126113892,
63
+ "eval_runtime": 2.7209,
64
+ "eval_samples_per_second": 16.906,
65
+ "eval_steps_per_second": 16.906,
66
+ "step": 35
67
+ },
68
+ {
69
+ "epoch": 1.1387900355871885,
70
+ "grad_norm": 0.3238619863986969,
71
+ "learning_rate": 9.752707744739145e-05,
72
+ "loss": 1.8485,
73
+ "step": 40
74
+ },
75
+ {
76
+ "epoch": 1.281138790035587,
77
+ "grad_norm": 0.3222252130508423,
78
+ "learning_rate": 9.607381059352038e-05,
79
+ "loss": 1.8223,
80
+ "step": 45
81
+ },
82
+ {
83
+ "epoch": 1.4234875444839858,
84
+ "grad_norm": 0.30783718824386597,
85
+ "learning_rate": 9.430247552150673e-05,
86
+ "loss": 1.7991,
87
+ "step": 50
88
+ },
89
+ {
90
+ "epoch": 1.5658362989323842,
91
+ "grad_norm": 0.33385950326919556,
92
+ "learning_rate": 9.22253005533154e-05,
93
+ "loss": 1.7761,
94
+ "step": 55
95
+ },
96
+ {
97
+ "epoch": 1.708185053380783,
98
+ "grad_norm": 0.3235575258731842,
99
+ "learning_rate": 8.985662536114613e-05,
100
+ "loss": 1.7781,
101
+ "step": 60
102
+ },
103
+ {
104
+ "epoch": 1.8505338078291815,
105
+ "grad_norm": 0.31781575083732605,
106
+ "learning_rate": 8.721280197423258e-05,
107
+ "loss": 1.7457,
108
+ "step": 65
109
+ },
110
+ {
111
+ "epoch": 1.99288256227758,
112
+ "grad_norm": 0.3225061297416687,
113
+ "learning_rate": 8.43120818934367e-05,
114
+ "loss": 1.7521,
115
+ "step": 70
116
+ },
117
+ {
118
+ "epoch": 1.99288256227758,
119
+ "eval_loss": 1.7631458044052124,
120
+ "eval_runtime": 2.707,
121
+ "eval_samples_per_second": 16.993,
122
+ "eval_steps_per_second": 16.993,
123
+ "step": 70
124
+ },
125
+ {
126
+ "epoch": 2.135231316725979,
127
+ "grad_norm": 0.3239762783050537,
128
+ "learning_rate": 8.117449009293668e-05,
129
+ "loss": 1.6471,
130
+ "step": 75
131
+ },
132
+ {
133
+ "epoch": 2.277580071174377,
134
+ "grad_norm": 0.348884642124176,
135
+ "learning_rate": 7.782168677883206e-05,
136
+ "loss": 1.7,
137
+ "step": 80
138
+ },
139
+ {
140
+ "epoch": 2.419928825622776,
141
+ "grad_norm": 0.3774774372577667,
142
+ "learning_rate": 7.427681785900761e-05,
143
+ "loss": 1.7183,
144
+ "step": 85
145
+ },
146
+ {
147
+ "epoch": 2.562277580071174,
148
+ "grad_norm": 0.39107823371887207,
149
+ "learning_rate": 7.056435515653059e-05,
150
+ "loss": 1.6825,
151
+ "step": 90
152
+ },
153
+ {
154
+ "epoch": 2.704626334519573,
155
+ "grad_norm": 0.5160859823226929,
156
+ "learning_rate": 6.670992746965938e-05,
157
+ "loss": 1.664,
158
+ "step": 95
159
+ },
160
+ {
161
+ "epoch": 2.8469750889679717,
162
+ "grad_norm": 0.4094185531139374,
163
+ "learning_rate": 6.274014364473274e-05,
164
+ "loss": 1.6662,
165
+ "step": 100
166
+ },
167
+ {
168
+ "epoch": 2.9893238434163703,
169
+ "grad_norm": 0.4102707803249359,
170
+ "learning_rate": 5.868240888334653e-05,
171
+ "loss": 1.6831,
172
+ "step": 105
173
+ },
174
+ {
175
+ "epoch": 2.9893238434163703,
176
+ "eval_loss": 1.691434621810913,
177
+ "eval_runtime": 2.7669,
178
+ "eval_samples_per_second": 16.625,
179
+ "eval_steps_per_second": 16.625,
180
+ "step": 105
181
+ },
182
+ {
183
+ "epoch": 3.131672597864769,
184
+ "grad_norm": 0.41169798374176025,
185
+ "learning_rate": 5.456473555193242e-05,
186
+ "loss": 1.6085,
187
+ "step": 110
188
+ },
189
+ {
190
+ "epoch": 3.2740213523131674,
191
+ "grad_norm": 0.5937510132789612,
192
+ "learning_rate": 5.041554979980486e-05,
193
+ "loss": 1.6105,
194
+ "step": 115
195
+ },
196
+ {
197
+ "epoch": 3.416370106761566,
198
+ "grad_norm": 0.4580947160720825,
199
+ "learning_rate": 4.626349532067879e-05,
200
+ "loss": 1.6227,
201
+ "step": 120
202
+ },
203
+ {
204
+ "epoch": 3.5587188612099645,
205
+ "grad_norm": 0.4503268599510193,
206
+ "learning_rate": 4.213723561238074e-05,
207
+ "loss": 1.6202,
208
+ "step": 125
209
+ },
210
+ {
211
+ "epoch": 3.701067615658363,
212
+ "grad_norm": 0.5091418623924255,
213
+ "learning_rate": 3.806525609984312e-05,
214
+ "loss": 1.6178,
215
+ "step": 130
216
+ },
217
+ {
218
+ "epoch": 3.8434163701067616,
219
+ "grad_norm": 0.5050191283226013,
220
+ "learning_rate": 3.4075667487415785e-05,
221
+ "loss": 1.6023,
222
+ "step": 135
223
+ },
224
+ {
225
+ "epoch": 3.98576512455516,
226
+ "grad_norm": 0.502037525177002,
227
+ "learning_rate": 3.019601169804216e-05,
228
+ "loss": 1.5566,
229
+ "step": 140
230
+ },
231
+ {
232
+ "epoch": 3.98576512455516,
233
+ "eval_loss": 1.6648945808410645,
234
+ "eval_runtime": 2.7182,
235
+ "eval_samples_per_second": 16.923,
236
+ "eval_steps_per_second": 16.923,
237
+ "step": 140
238
+ },
239
+ {
240
+ "epoch": 4.128113879003559,
241
+ "grad_norm": 0.4845784604549408,
242
+ "learning_rate": 2.645307173898901e-05,
243
+ "loss": 1.5459,
244
+ "step": 145
245
+ },
246
+ {
247
+ "epoch": 4.270462633451958,
248
+ "grad_norm": 0.4947628378868103,
249
+ "learning_rate": 2.2872686806712035e-05,
250
+ "loss": 1.5934,
251
+ "step": 150
252
+ },
253
+ {
254
+ "epoch": 4.412811387900356,
255
+ "grad_norm": 0.5146717429161072,
256
+ "learning_rate": 1.947957390727185e-05,
257
+ "loss": 1.543,
258
+ "step": 155
259
+ },
260
+ {
261
+ "epoch": 4.555160142348754,
262
+ "grad_norm": 0.4881040155887604,
263
+ "learning_rate": 1.629715722373423e-05,
264
+ "loss": 1.568,
265
+ "step": 160
266
+ },
267
+ {
268
+ "epoch": 4.697508896797153,
269
+ "grad_norm": 0.514570951461792,
270
+ "learning_rate": 1.3347406408508695e-05,
271
+ "loss": 1.5337,
272
+ "step": 165
273
+ },
274
+ {
275
+ "epoch": 4.839857651245552,
276
+ "grad_norm": 0.5478077530860901,
277
+ "learning_rate": 1.0650684916965559e-05,
278
+ "loss": 1.5442,
279
+ "step": 170
280
+ },
281
+ {
282
+ "epoch": 4.98220640569395,
283
+ "grad_norm": 0.47540614008903503,
284
+ "learning_rate": 8.225609429353187e-06,
285
+ "loss": 1.562,
286
+ "step": 175
287
+ },
288
+ {
289
+ "epoch": 4.98220640569395,
290
+ "eval_loss": 1.6638323068618774,
291
+ "eval_runtime": 2.7058,
292
+ "eval_samples_per_second": 17.001,
293
+ "eval_steps_per_second": 17.001,
294
+ "step": 175
295
+ }
296
+ ],
297
+ "logging_steps": 5,
298
+ "max_steps": 210,
299
+ "num_input_tokens_seen": 0,
300
+ "num_train_epochs": 6,
301
+ "save_steps": 35,
302
+ "stateful_callbacks": {
303
+ "TrainerControl": {
304
+ "args": {
305
+ "should_epoch_stop": false,
306
+ "should_evaluate": false,
307
+ "should_log": false,
308
+ "should_save": true,
309
+ "should_training_stop": false
310
+ },
311
+ "attributes": {}
312
+ }
313
+ },
314
+ "total_flos": 1.4619841842511872e+17,
315
+ "train_batch_size": 16,
316
+ "trial_name": null,
317
+ "trial_params": null
318
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-175/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2bc05afc45cc2260e917f0e545fdbc53c8c1a9def7f26bb3cc9459a74257487
3
+ size 5368
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-210/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: microsoft/Phi-3.5-mini-instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.11.1
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-210/adapter_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "microsoft/Phi-3.5-mini-instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "qkv_proj",
24
+ "down_proj",
25
+ "gate_up_proj",
26
+ "o_proj"
27
+ ],
28
+ "task_type": "CAUSAL_LM",
29
+ "use_dora": false,
30
+ "use_rslora": false
31
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-210/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2fef064b69976e06a1f7d030039fda5b1893feb02f1d92877cfa71069226acb
3
+ size 50365768
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-210/added_tokens.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|assistant|>": 32001,
3
+ "<|endoftext|>": 32000,
4
+ "<|end|>": 32007,
5
+ "<|placeholder1|>": 32002,
6
+ "<|placeholder2|>": 32003,
7
+ "<|placeholder3|>": 32004,
8
+ "<|placeholder4|>": 32005,
9
+ "<|placeholder5|>": 32008,
10
+ "<|placeholder6|>": 32009,
11
+ "<|system|>": 32006,
12
+ "<|user|>": 32010
13
+ }
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-210/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca02a94b921baf8ee6e3a2a6d2080baa2cc64f8ef4c23f8b8051086eefc02da4
3
+ size 100878458
llama-factory/saves/Phi-3.5-mini-instruct/checkpoint-210/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3e5d946241df2516b06d7074d8779088eae7607173ad780df56583910a9589b
3
+ size 14244