Text Generation
Transformers
PyTorch
English
mistral
conversational
Inference Endpoints
text-generation-inference
Crystalcareai commited on
Commit
0470c5b
1 Parent(s): 2584bd2

Create axolotl.yml

Browse files
Files changed (1) hide show
  1. axolotl.yml +118 -0
axolotl.yml ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Phi-3-medium-4k-instruct
2
+ model_type: AutoModelForCausalLM
3
+ tokenizer_type: AutoTokenizer
4
+
5
+ trust_remote_code: true
6
+
7
+ # load_in_8bit: true
8
+ load_in_4bit: true
9
+ # strict: false
10
+
11
+ datasets:
12
+ - path: /workspace/datasets/dolphin-2.9.2/dolphin201-sharegpt2.jsonl
13
+ type: sharegpt
14
+ conversation: chatml
15
+ - path: /workspace/datasets/dolphin-2.9.2/dolphin-coder-codegen-sharegpt2.jsonl
16
+ type: sharegpt
17
+ conversation: chatml
18
+ - path: /workspace/datasets/dolphin-2.9.2/dolphin-coder-translate-sharegpt2.jsonl
19
+ type: sharegpt
20
+ conversation: chatml
21
+ - path: /workspace/datasets/dolphin-2.9.2/m-a-p_Code-Feedback-sharegpt-unfiltered.jsonl
22
+ type: sharegpt
23
+ conversation: chatml
24
+ - path: /workspace/datasets/dolphin-2.9.2/m-a-p_CodeFeedback-Filtered-Instruction-sharegpt-unfiltered.jsonl
25
+ type: sharegpt
26
+ conversation: chatml
27
+ - path: /workspace/datasets/dolphin-2.9.2/not_samantha_norefusals.jsonl
28
+ type: sharegpt
29
+ conversation: chatml
30
+ - path: /workspace/datasets/dolphin-2.9.2/openhermes200k_unfiltered.jsonl
31
+ type: sharegpt
32
+ conversation: chatml
33
+ - path: /workspace/datasets/dolphin-2.9.2/Orca-Math-resort-unfiltered.jsonl
34
+ type: sharegpt
35
+ conversation: chatml
36
+ - path: /workspace/datasets/dolphin-2.9.2/SystemChat_sharegpt.jsonl
37
+ type: sharegpt
38
+ conversation: chatml
39
+ - path: /workspace/datasets/dolphin-2.9.2/toolbench_instruct_j1s1_3k_unfiltered.jsonl
40
+ type: sharegpt
41
+ conversation: chatml
42
+ - path: /workspace/datasets/dolphin-2.9.2/toolbench_negative_unfiltered.jsonl
43
+ type: sharegpt
44
+ conversation: chatml
45
+ - path: /workspace/datasets/dolphin-2.9.2/toolbench_react_10p_unfiltered.jsonl
46
+ type: sharegpt
47
+ conversation: chatml
48
+ - path: /workspace/datasets/dolphin-2.9.2/toolbench_tflan_cot_30p_unfiltered.jsonl
49
+ type: sharegpt
50
+ conversation: chatml
51
+ - path: /workspace/datasets/dolphin-2.9.2/agent_instruct_react_unfiltered.jsonl
52
+ type: sharegpt
53
+ conversation: chatml
54
+
55
+
56
+ chat_template: chatml
57
+ dataset_prepared_path: phi-14-data
58
+ val_set_size: 0.01
59
+ output_dir: phi-14
60
+
61
+ sequence_len: 4096 # supports up to 8192
62
+ sample_packing: true
63
+ pad_to_sequence_len: true
64
+
65
+ adapter: qlora
66
+ lora_model_dir:
67
+ lora_r: 32
68
+ lora_alpha: 16
69
+ lora_dropout: 0.05
70
+ lora_modules_to_save: [embed_tokens, lm_head]
71
+ lora_target_linear: true
72
+ lora_fan_in_fan_out:
73
+
74
+ wandb_project: Phi-3-14b
75
+ wandb_entity:
76
+ wandb_watch:
77
+ wandb_name:
78
+ wandb_log_model:
79
+
80
+ gradient_accumulation_steps: 4
81
+ micro_batch_size: 4
82
+ num_epochs: 3
83
+ optimizer: adamw_torch_fused
84
+ lr_scheduler: cosine
85
+ learning_rate: 4e-4
86
+
87
+ train_on_inputs: false
88
+ group_by_length: false
89
+ bf16: auto
90
+ fp16:
91
+ tf32: false
92
+
93
+ gradient_checkpointing: true
94
+ early_stopping_patience:
95
+ resume_from_checkpoint:
96
+ local_rank:
97
+ logging_steps: 1
98
+ xformers_attention:
99
+ flash_attention: true
100
+
101
+ warmup_steps: 10
102
+ # evals_per_epoch: 2
103
+ # eval_table_size:
104
+ # eval_max_new_tokens: 128
105
+ saves_per_epoch: 2
106
+ save_total_limit: 1
107
+ debug:
108
+ deepspeed: /workspace/axolotl/deepspeed_configs/zero3_bf16.json
109
+ weight_decay: 0.1
110
+ fsdp:
111
+ fsdp_config:
112
+ special_tokens:
113
+ pad_token: "<|placeholder6|>"
114
+ eos_token: "<|im_end|>"
115
+ bos_token: "<s>"
116
+ unk_token: "<unk>"
117
+ tokens:
118
+ - <|im_start|>