danielpark
commited on
Commit
•
38160f3
1
Parent(s):
c29cd20
Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
| **Basic Training Settings** | |
|
2 |
+
|-----------------------------|---------------------------------|
|
3 |
+
| local_rank | -1 |
|
4 |
+
| per_device_train_batch_size | 4 |
|
5 |
+
| per_device_eval_batch_size | 1 |
|
6 |
+
| gradient_accumulation_steps | 4 |
|
7 |
+
| learning_rate | 2e-4 |
|
8 |
+
| max_grad_norm | 0.3 |
|
9 |
+
| weight_decay | 0.001 |
|
10 |
+
| max_seq_length | 2048 |
|
11 |
+
| num_train_epochs | 1 |
|
12 |
+
| max_steps | 100000 |
|
13 |
+
| warmup_ratio | 0.03 |
|
14 |
+
| save_steps | 500000 |
|
15 |
+
| logging_steps | 10000 |
|
16 |
+
|
17 |
+
| **4-bit Precision Settings** | |
|
18 |
+
|-----------------------------|---------------------------------|
|
19 |
+
| use_4bit | True |
|
20 |
+
| use_nested_quant | False |
|
21 |
+
| bnb_4bit_compute_dtype | "bfloat16" |
|
22 |
+
| bnb_4bit_quant_type | "nf4" |
|
23 |
+
|
24 |
+
| **LoRA Settings** | |
|
25 |
+
|-----------------------------|---------------------------------|
|
26 |
+
| lora_alpha | 16 |
|
27 |
+
| lora_dropout | 0.1 |
|
28 |
+
| lora_r | 64 |
|
29 |
+
|
30 |
+
| **Advanced Training Flags** | |
|
31 |
+
|-----------------------------|---------------------------------|
|
32 |
+
| fp16 | False |
|
33 |
+
| bf16 | False |
|
34 |
+
| packing | False |
|
35 |
+
| gradient_checkpointing | True |
|
36 |
+
| optim | "paged_adamw_32bit" |
|
37 |
+
| lr_scheduler_type | "constant" |
|
38 |
+
| group_by_length | True |
|
39 |
+
|
40 |
+
| **GPU Configuration** | |
|
41 |
+
|-----------------------------|---------------------------------|
|
42 |
+
| device_map | {"": 0} |
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
| **System Information** | | |
|
47 |
+
|------------------------|------------|------------|
|
48 |
+
| | **Used** | **Total** |
|
49 |
+
| System RAM | 5.8 GB | 83.5 GB |
|
50 |
+
| GPU RAM | 26.6 GB | 40.0 GB |
|
51 |
+
| Disk | 74.0 GB | 166.8 GB |
|
52 |
+
|
53 |
+
|
54 |
+
|
55 |
+
| **Training Process** | |
|
56 |
+
|----------------------------------------------|-------------------------------|
|
57 |
+
| Tokenizer Used | LlamaTokenizerFast |
|
58 |
+
| Training Progress (Epoch 3.15/16) | |
|
59 |
+
| Step | 19740/100000 |
|
60 |
+
| Google Colab Resource Usage | 150 tokens used |
|
61 |
+
|