Text Generation
Transformers
PyTorch
llama
OpenAccess AI Collective
MPT
axolotl
text-generation-inference
winglian commited on
Commit
a0df24c
1 Parent(s): a12ba4c

Delete configs/minotaur.yml

Browse files
Files changed (1) hide show
  1. configs/minotaur.yml +0 -143
configs/minotaur.yml DELETED
@@ -1,143 +0,0 @@
1
- base_model: huggyllama/llama-13b
2
- base_model_config: huggyllama/llama-13b
3
- model_type: LlamaForCausalLM
4
- tokenizer_type: LlamaTokenizer
5
- load_in_8bit: false
6
- load_in_4bit: false
7
- gptq: false
8
- strict: false
9
- push_dataset_to_hub:
10
- hf_use_auth_token: true
11
- datasets:
12
- - path: winglian/evals
13
- data_files:
14
- - hf/ARC-Challenge.jsonl
15
- - hf/ARC-Easy.jsonl
16
- - hf/riddle_sense.jsonl
17
- - hf/piqa.jsonl
18
- type: explainchoice:chat
19
- - path: winglian/evals
20
- data_files:
21
- - hf/gsm8k.jsonl
22
- - hf/winogrande.jsonl
23
- type: alpaca_chat.load_qa
24
- - path: winglian/evals
25
- data_files:
26
- - custom/n_task.jsonl
27
- - custom/misconceptions.jsonl
28
- - custom/context_insensitivity.jsonl
29
- type: alpaca_chat
30
- - path: camel-ai/math
31
- type: alpaca_chat.load_camel_ai
32
- - path: camel-ai/biology
33
- type: alpaca_chat.load_camel_ai
34
- - path: camel-ai/physics
35
- type: alpaca_chat.load_camel_ai
36
- - path: camel-ai/chemistry
37
- type: alpaca_chat.load_camel_ai
38
- - path: winglian/evals
39
- data_files:
40
- - custom/in_context_qa.jsonl
41
- type: context_qa
42
- - path: winglian/evals
43
- data_files:
44
- - custom/in_context_qa.jsonl
45
- type: context_qa.load_404
46
- - path: winglian/evals
47
- data_files:
48
- - custom/jokes_explained_500up.jsonl
49
- type: sharegpt_jokes
50
- - path: winglian/evals
51
- data_files:
52
- - custom/classify-self-chat.sharegpt.jsonl
53
- - custom/coding-self-chat.sharegpt.jsonl
54
- - custom/prose-gpt4.sharegpt.jsonl
55
- - custom/prose-rewrite-gpt4.sharegpt.jsonl
56
- type: sharegpt_simple.load_role
57
- - path: winglian/evals
58
- data_files:
59
- - openai/tldr.jsonl
60
- type: summarizetldr:chat
61
- - path: winglian/evals
62
- data_files:
63
- - hellaswag/hellaswag.jsonl
64
- type: explainchoice:chat
65
- - path: metaeval/ScienceQA_text_only
66
- type: concisechoice:chat
67
- - path: teknium/GPT4-LLM-Cleaned
68
- type: alpaca_chat
69
- - path: teknium/GPTeacher-General-Instruct
70
- data_files: gpt4-instruct-similarity-0.6-dataset.json
71
- type: gpteacher:chat
72
- - path: QingyiSi/Alpaca-CoT
73
- data_files:
74
- - Chain-of-Thought/formatted_cot_data/aqua_train.json
75
- - Chain-of-Thought/formatted_cot_data/creak_train.json
76
- - Chain-of-Thought/formatted_cot_data/ecqa_train.json
77
- - Chain-of-Thought/formatted_cot_data/esnli_train.json
78
- - Chain-of-Thought/formatted_cot_data/qasc_train.json
79
- - Chain-of-Thought/formatted_cot_data/qed_train.json
80
- - Chain-of-Thought/formatted_cot_data/sensemaking_train.json
81
- - Chain-of-Thought/formatted_cot_data/strategyqa_train.json
82
- - GPTeacher/Roleplay/formatted_roleplay-similarity_0.6-instruct-dataset.json
83
- type: alpaca_chat
84
- - path: ehartford/WizardLM_alpaca_evol_instruct_70k_unfiltered
85
- type: alpaca_chat
86
- - path: ehartford/wizard_vicuna_70k_unfiltered
87
- type: sharegpt:chat
88
- dataset_prepared_path: last_run_prepared
89
- val_set_size: 0.004
90
- adapter:
91
- lora_model_dir:
92
- sequence_len: 2048
93
- max_packed_sequence_len: 2048
94
- lora_r:
95
- lora_alpha:
96
- lora_dropout:
97
- lora_target_modules:
98
- lora_target_linear: true
99
- lora_fan_in_fan_out:
100
- wandb_project: minotaur-13b
101
- wandb_watch:
102
- wandb_run_id:
103
- wandb_log_model:
104
- output_dir: ./minotaur-13b
105
- gradient_accumulation_steps: 1
106
- micro_batch_size: 12
107
- num_epochs: 3
108
- optimizer: adamw_bnb_8bit
109
- torchdistx_path:
110
- lr_scheduler: cosine
111
- learning_rate: 0.00013
112
- train_on_inputs: false
113
- group_by_length: true
114
- bf16: true
115
- fp16: false
116
- tf32: true
117
- gradient_checkpointing: true
118
- early_stopping_patience:
119
- resume_from_checkpoint:
120
- local_rank:
121
- logging_steps: 1
122
- xformers_attention: true
123
- flash_attention:
124
- gptq_groupsize:
125
- gptq_model_v1:
126
- warmup_steps: 100
127
- eval_steps: 20
128
- save_steps: 51
129
- load_best_model_at_end: false
130
- debug:
131
- deepspeed:
132
- weight_decay: 0.1
133
- fsdp:
134
- - full_shard
135
- - auto_wrap
136
- fsdp_config:
137
- fsdp_offload_params: true
138
- fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
139
- special_tokens:
140
- bos_token: "<s>"
141
- eos_token: "</s>"
142
- unk_token: "<unk>"
143
-