File size: 4,386 Bytes
ee581d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 |
architecture:
backbone_dtype: float32
gradient_checkpointing: true
intermediate_dropout: 0.0
pretrained: true
pretrained_weights: ''
augmentation:
neftune_noise_alpha: 0.0
random_parent_probability: 0.0
skip_parent_probability: 0.0
token_mask_probability: 0.1
dataset:
add_eos_token_to_answer: false
add_eos_token_to_prompt: false
add_eos_token_to_system: false
add_prompt_answer_tokens: false
answer_column: score
chatbot_author: H2O.ai
chatbot_name: h2oGPT
data_sample: 1.0
data_sample_choice:
- Train
- Validation
limit_chained_samples: false
mask_prompt_labels: true
num_classes: 6
parent_id_column: None
personalize: false
prompt_column:
- full_text
- count_sentence
- count_word
- count_paragraph
- count_symbol
- count_punctuation
- count_stop_words
- count_ner
- CARDINAL
- DATE
- EVENT
- FAC
- GPE
- LANGUAGE
- LAW
- LOC
- MONEY
- NORP
- ORDINAL
- ORG
- PERCENT
- PERSON
- PRODUCT
- QUANTITY
- TIME
- WORK_OF_ART
- mean_ner_sentence
- len_ner
- mean_ner
- proc_ner_per_text
- max_ner_per_sentence
- independent_clauses
- dependent_clauses
- formality_level
- simple_sentence_count
- complex_sentence_count
- has_intro
- count_intro
- has_repetitions
- count_repetitions
- coherence
- -LRB-
- -RRB-
- ADD
- AFX
- CC
- CD
- DT
- EX
- FW
- HYPH
- IN
- JJ
- JJR
- JJS
- LS
- MD
- NFP
- NN
- NNP
- NNPS
- NNS
- PDT
- POS
- PRP
- PRP$
- RB
- RBR
- RBS
- RP
- SYM
- TO
- UH
- VB
- VBD
- VBG
- VBN
- VBP
- VBZ
- WDT
- WP
- WP$
- WRB
- XX
- _SP
- ROOT
- acl
- acomp
- advcl
- advmod
- agent
- amod
- appos
- attr
- aux
- auxpass
- case
- cc
- ccomp
- compound
- conj
- csubj
- csubjpass
- dative
- dep
- det
- dobj
- expl
- intj
- mark
- meta
- neg
- nmod
- npadvmod
- nsubj
- nsubjpass
- nummod
- oprd
- parataxis
- pcomp
- pobj
- poss
- preconj
- predet
- prep
- prt
- punct
- quantmod
- relcl
- xcomp
system_column: None
text_answer_separator: ''
text_prompt_start: ''
text_system_start: ''
train_dataframe: /root/h2o-llmstudio/data/user/train_spacy_mamba/train_spacy_mamba.csv
validation_dataframe: None
validation_size: 0.01
validation_strategy: automatic
environment:
compile_model: false
deepspeed_allgather_bucket_size: 1000000
deepspeed_method: ZeRO2
deepspeed_reduce_bucket_size: 1000000
deepspeed_stage3_param_persistence_threshold: 1000000
deepspeed_stage3_prefetch_bucket_size: 1000000
find_unused_parameters: false
gpus:
- '0'
huggingface_branch: main
mixed_precision: true
mixed_precision_dtype: bfloat16
number_of_workers: 8
seed: -1
trust_remote_code: true
use_deepspeed: false
experiment_name: spacy-mamba
llm_backbone: h2oai/h2ogpt-4096-llama2-7b
logging:
logger: Neptune
neptune_project: samvelkoch/essay
output_directory: /root/h2o-llmstudio/output/user/spacy-mamba/
prediction:
batch_size_inference: 0
metric: Accuracy
problem_type: text_causal_classification_modeling
tokenizer:
add_prompt_answer_tokens: false
max_length: 10240
padding_quantile: 1.0
tokenizer_kwargs: '{"use_fast": true, "add_prefix_space": false}'
training:
batch_size: 2
differential_learning_rate: 1.0e-05
differential_learning_rate_layers:
- classification_head
drop_last_batch: true
epochs: 1
evaluate_before_training: false
evaluation_epochs: 1.0
freeze_layers: []
grad_accumulation: 1
gradient_clip: 0.0
learning_rate: 0.0001
lora: true
lora_alpha: 16
lora_dropout: 0.05
lora_r: 4
lora_target_modules: ''
lora_unfreeze_layers: []
loss_function: CrossEntropyLoss
optimizer: AdamW
save_checkpoint: last
schedule: Cosine
train_validation_data: false
use_dora: false
use_flash_attention_2: true
warmup_epochs: 0.0
weight_decay: 0.0
|