samvelkoch commited on
Commit
ee581d8
·
verified ·
1 Parent(s): 8dd8294

Upload cfg.yaml

Browse files
Files changed (1) hide show
  1. cfg.yaml +227 -0
cfg.yaml ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ architecture:
2
+ backbone_dtype: float32
3
+ gradient_checkpointing: true
4
+ intermediate_dropout: 0.0
5
+ pretrained: true
6
+ pretrained_weights: ''
7
+ augmentation:
8
+ neftune_noise_alpha: 0.0
9
+ random_parent_probability: 0.0
10
+ skip_parent_probability: 0.0
11
+ token_mask_probability: 0.1
12
+ dataset:
13
+ add_eos_token_to_answer: false
14
+ add_eos_token_to_prompt: false
15
+ add_eos_token_to_system: false
16
+ add_prompt_answer_tokens: false
17
+ answer_column: score
18
+ chatbot_author: H2O.ai
19
+ chatbot_name: h2oGPT
20
+ data_sample: 1.0
21
+ data_sample_choice:
22
+ - Train
23
+ - Validation
24
+ limit_chained_samples: false
25
+ mask_prompt_labels: true
26
+ num_classes: 6
27
+ parent_id_column: None
28
+ personalize: false
29
+ prompt_column:
30
+ - full_text
31
+ - count_sentence
32
+ - count_word
33
+ - count_paragraph
34
+ - count_symbol
35
+ - count_punctuation
36
+ - count_stop_words
37
+ - count_ner
38
+ - CARDINAL
39
+ - DATE
40
+ - EVENT
41
+ - FAC
42
+ - GPE
43
+ - LANGUAGE
44
+ - LAW
45
+ - LOC
46
+ - MONEY
47
+ - NORP
48
+ - ORDINAL
49
+ - ORG
50
+ - PERCENT
51
+ - PERSON
52
+ - PRODUCT
53
+ - QUANTITY
54
+ - TIME
55
+ - WORK_OF_ART
56
+ - mean_ner_sentence
57
+ - len_ner
58
+ - mean_ner
59
+ - proc_ner_per_text
60
+ - max_ner_per_sentence
61
+ - independent_clauses
62
+ - dependent_clauses
63
+ - formality_level
64
+ - simple_sentence_count
65
+ - complex_sentence_count
66
+ - has_intro
67
+ - count_intro
68
+ - has_repetitions
69
+ - count_repetitions
70
+ - coherence
71
+ - -LRB-
72
+ - -RRB-
73
+ - ADD
74
+ - AFX
75
+ - CC
76
+ - CD
77
+ - DT
78
+ - EX
79
+ - FW
80
+ - HYPH
81
+ - IN
82
+ - JJ
83
+ - JJR
84
+ - JJS
85
+ - LS
86
+ - MD
87
+ - NFP
88
+ - NN
89
+ - NNP
90
+ - NNPS
91
+ - NNS
92
+ - PDT
93
+ - POS
94
+ - PRP
95
+ - PRP$
96
+ - RB
97
+ - RBR
98
+ - RBS
99
+ - RP
100
+ - SYM
101
+ - TO
102
+ - UH
103
+ - VB
104
+ - VBD
105
+ - VBG
106
+ - VBN
107
+ - VBP
108
+ - VBZ
109
+ - WDT
110
+ - WP
111
+ - WP$
112
+ - WRB
113
+ - XX
114
+ - _SP
115
+ - ROOT
116
+ - acl
117
+ - acomp
118
+ - advcl
119
+ - advmod
120
+ - agent
121
+ - amod
122
+ - appos
123
+ - attr
124
+ - aux
125
+ - auxpass
126
+ - case
127
+ - cc
128
+ - ccomp
129
+ - compound
130
+ - conj
131
+ - csubj
132
+ - csubjpass
133
+ - dative
134
+ - dep
135
+ - det
136
+ - dobj
137
+ - expl
138
+ - intj
139
+ - mark
140
+ - meta
141
+ - neg
142
+ - nmod
143
+ - npadvmod
144
+ - nsubj
145
+ - nsubjpass
146
+ - nummod
147
+ - oprd
148
+ - parataxis
149
+ - pcomp
150
+ - pobj
151
+ - poss
152
+ - preconj
153
+ - predet
154
+ - prep
155
+ - prt
156
+ - punct
157
+ - quantmod
158
+ - relcl
159
+ - xcomp
160
+ system_column: None
161
+ text_answer_separator: ''
162
+ text_prompt_start: ''
163
+ text_system_start: ''
164
+ train_dataframe: /root/h2o-llmstudio/data/user/train_spacy_mamba/train_spacy_mamba.csv
165
+ validation_dataframe: None
166
+ validation_size: 0.01
167
+ validation_strategy: automatic
168
+ environment:
169
+ compile_model: false
170
+ deepspeed_allgather_bucket_size: 1000000
171
+ deepspeed_method: ZeRO2
172
+ deepspeed_reduce_bucket_size: 1000000
173
+ deepspeed_stage3_param_persistence_threshold: 1000000
174
+ deepspeed_stage3_prefetch_bucket_size: 1000000
175
+ find_unused_parameters: false
176
+ gpus:
177
+ - '0'
178
+ huggingface_branch: main
179
+ mixed_precision: true
180
+ mixed_precision_dtype: bfloat16
181
+ number_of_workers: 8
182
+ seed: -1
183
+ trust_remote_code: true
184
+ use_deepspeed: false
185
+ experiment_name: spacy-mamba
186
+ llm_backbone: h2oai/h2ogpt-4096-llama2-7b
187
+ logging:
188
+ logger: Neptune
189
+ neptune_project: samvelkoch/essay
190
+ output_directory: /root/h2o-llmstudio/output/user/spacy-mamba/
191
+ prediction:
192
+ batch_size_inference: 0
193
+ metric: Accuracy
194
+ problem_type: text_causal_classification_modeling
195
+ tokenizer:
196
+ add_prompt_answer_tokens: false
197
+ max_length: 10240
198
+ padding_quantile: 1.0
199
+ tokenizer_kwargs: '{"use_fast": true, "add_prefix_space": false}'
200
+ training:
201
+ batch_size: 2
202
+ differential_learning_rate: 1.0e-05
203
+ differential_learning_rate_layers:
204
+ - classification_head
205
+ drop_last_batch: true
206
+ epochs: 1
207
+ evaluate_before_training: false
208
+ evaluation_epochs: 1.0
209
+ freeze_layers: []
210
+ grad_accumulation: 1
211
+ gradient_clip: 0.0
212
+ learning_rate: 0.0001
213
+ lora: true
214
+ lora_alpha: 16
215
+ lora_dropout: 0.05
216
+ lora_r: 4
217
+ lora_target_modules: ''
218
+ lora_unfreeze_layers: []
219
+ loss_function: CrossEntropyLoss
220
+ optimizer: AdamW
221
+ save_checkpoint: last
222
+ schedule: Cosine
223
+ train_validation_data: false
224
+ use_dora: false
225
+ use_flash_attention_2: true
226
+ warmup_epochs: 0.0
227
+ weight_decay: 0.0