Sao10K commited on
Commit
0cf0388
·
verified ·
1 Parent(s): 02eaa7a

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +49 -0
README.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Training Details:
3
+ Trained at 8K Context -> Expanded to 32K Context due to context extension with PoSE training.
4
+
5
+ Dataset Modifications:
6
+ - Further Cleaned up Roleplaying Samples
7
+ - Removed Low Quality Samples from Manual Check
8
+ - More Creative Writing Samples
9
+ - Remade and Refined Detailed Instruct Data
10
+
11
+ Needle in a Haystack Results:
12
+
13
+
14
+ ```
15
+ sequence_len: 8192
16
+ use_pose: true
17
+ pose_max_context_len: 32768
18
+
19
+ overrides_of_model_config:
20
+ rope_theta: 2000000.0
21
+ max_position_embeddings: 32768
22
+
23
+ # peft_use_dora: true
24
+ adapter: lora
25
+ peft_use_rslora: true
26
+ lora_model_dir:
27
+ lora_r: 256
28
+ lora_alpha: 256
29
+ lora_dropout: 0.1
30
+ lora_target_linear: true
31
+ lora_target_modules:
32
+ - gate_proj
33
+ - down_proj
34
+ - up_proj
35
+ - q_proj
36
+ - v_proj
37
+ - k_proj
38
+ - o_proj
39
+
40
+ warmup_steps: 80
41
+ gradient_accumulation_steps: 6
42
+ micro_batch_size: 1
43
+ num_epochs: 2
44
+ optimizer: adamw_bnb_8bit
45
+ lr_scheduler: cosine_with_min_lr
46
+ learning_rate: 0.00004
47
+ lr_scheduler_kwargs:
48
+ min_lr: 0.000004
49
+ ```