Text Generation
Transformers
PyTorch
English
llama
Inference Endpoints
text-generation-inference
winglian commited on
Commit
3acb895
1 Parent(s): 3ec8859

Create configs/hippogriff.yml

Browse files
Files changed (1) hide show
  1. configs/hippogriff.yml +127 -0
configs/hippogriff.yml ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: huggyllama/llama-30b
2
+ base_model_config: huggyllama/llama-30b
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: LlamaTokenizer
5
+ load_in_8bit: false
6
+ load_in_4bit: false
7
+ strict: false
8
+ push_dataset_to_hub: winglian
9
+ hf_use_auth_token: true
10
+ datasets:
11
+ - path: winglian/pygmalion-cleaned
12
+ data_files:
13
+ - v13_no_ai.cleaned.jsonl
14
+ type: pygmalion
15
+ - path: winglian/evals
16
+ data_files:
17
+ - hf/ARC-Challenge.jsonl
18
+ - hf/ARC-Easy.jsonl
19
+ - hf/riddle_sense.jsonl
20
+ type: explainchoice:chat
21
+ - path: winglian/evals
22
+ data_files:
23
+ - hf/gsm8k.jsonl
24
+ - custom/logic_inference_oa.jsonl
25
+ type: alpaca_chat.load_qa
26
+ - path: winglian/evals
27
+ data_files:
28
+ - custom/in_context_qa.jsonl
29
+ type: context_qa
30
+ - path: winglian/evals
31
+ data_files:
32
+ - custom/in_context_qa.jsonl
33
+ type: context_qa.load_404
34
+ - path: winglian/evals
35
+ data_files:
36
+ - custom/jokes_explained_500up.jsonl
37
+ type: sharegpt_jokes
38
+ - path: winglian/evals
39
+ data_files:
40
+ - custom/classify-self-chat.sharegpt.jsonl
41
+ - custom/coding-self-chat.sharegpt.jsonl
42
+ - custom/prose-gpt4.sharegpt.jsonl
43
+ - custom/prose-rewrite-gpt4.sharegpt.jsonl
44
+ type: sharegpt_simple
45
+ - path: winglian/evals
46
+ data_files:
47
+ - custom/guanaco-cleaned.en.jsonl
48
+ type: sharegpt_simple.load_guanaco
49
+ - path: winglian/evals
50
+ data_files:
51
+ - openai/tldr.jsonl
52
+ type: summarizetldr:chat
53
+ - path: winglian/evals
54
+ data_files:
55
+ - hellaswag/hellaswag.jsonl
56
+ type: explainchoice:chat
57
+ shards: 60
58
+ - path: metaeval/ScienceQA_text_only
59
+ type: concisechoice:chat
60
+ shards: 13
61
+ - path: teknium/GPTeacher-General-Instruct
62
+ data_files: gpt4-instruct-similarity-0.6-dataset.json
63
+ type: gpteacher:chat
64
+ - path: QingyiSi/Alpaca-CoT
65
+ data_files:
66
+ - Chain-of-Thought/formatted_cot_data/aqua_train.json
67
+ - Chain-of-Thought/formatted_cot_data/creak_train.json
68
+ - Chain-of-Thought/formatted_cot_data/ecqa_train.json
69
+ - Chain-of-Thought/formatted_cot_data/esnli_train.json
70
+ - Chain-of-Thought/formatted_cot_data/gsm8k_train.json
71
+ - Chain-of-Thought/formatted_cot_data/qasc_train.json
72
+ - Chain-of-Thought/formatted_cot_data/qed_train.json
73
+ - Chain-of-Thought/formatted_cot_data/sensemaking_train.json
74
+ - Chain-of-Thought/formatted_cot_data/strategyqa_train.json
75
+ - GPTeacher/Roleplay/formatted_roleplay-similarity_0.6-instruct-dataset.json
76
+ type: alpaca_chat
77
+ dataset_prepared_path: /workspace/mnt/last_run_prepared
78
+ val_set_size: 0.02
79
+ adapter:
80
+ lora_model_dir:
81
+ sequence_len: 2048
82
+ max_packed_sequence_len: 2048
83
+ lora_r: 64
84
+ lora_alpha: 32
85
+ lora_dropout: 0.000001
86
+ lora_target_modules:
87
+ lora_target_linear: true
88
+ lora_fan_in_fan_out:
89
+ wandb_project: hippogriff-30b-chat
90
+ wandb_watch:
91
+ wandb_run_id:
92
+ wandb_log_model:
93
+ output_dir: /workspace/mnt/hippogriff-30b-chat
94
+ gradient_accumulation_steps: 1
95
+ micro_batch_size: 20
96
+ num_epochs: 2
97
+ optimizer:
98
+ torchdistx_path:
99
+ lr_scheduler:
100
+ learning_rate: 0.00003
101
+ train_on_inputs: false
102
+ group_by_length: true
103
+ bf16: true
104
+ fp16: false
105
+ float16: false
106
+ tf32: true
107
+ gradient_checkpointing: true
108
+ early_stopping_patience:
109
+ resume_from_checkpoint:
110
+ local_rank:
111
+ logging_steps: 1
112
+ xformers_attention: true
113
+ flash_attention:
114
+ gptq_groupsize:
115
+ gptq_model_v1:
116
+ warmup_steps: 200
117
+ eval_steps: 20
118
+ save_steps: 50
119
+ debug:
120
+ deepspeed:
121
+ weight_decay: 0.000003
122
+ fsdp:
123
+ fsdp_config:
124
+ special_tokens:
125
+ bos_token: "<s>"
126
+ eos_token: "</s>"
127
+ unk_token: "<unk>"