en-nb-7b / cfg.yaml
thevox's picture
Upload cfg.yaml
d34dac5
architecture:
backbone_dtype: int4
force_embedding_gradients: false
gradient_checkpointing: true
intermediate_dropout: 0.0
pretrained: true
pretrained_weights: ''
augmentation:
random_parent_probability: 0.0
skip_parent_probability: 0.0
token_mask_probability: 0.05
dataset:
add_eos_token_to_answer: true
add_eos_token_to_prompt: true
add_eos_token_to_system: true
answer_column: "Kontekst: informasjonsteknologi, tagging, databaseadministrasjon,\
\ s\xF8k\nOversettelse:\nDefinisjon: (Wikipedia, 2008-08-07). Arbeide med\
\ koder p\xE5 factline-plattformen: Hvis systemet eller plattformadministratoren\
\ har aktivert dette, har du muligheten til \xE5 opprette koder. Koder er\
\ organisert som mapper. 1) Det er mulig \xE5 knytte faktene dine til s\xE5\
\ mange koder du \xF8nsker. 2) S\xF8k etter koder med 'factlist & search'.\
\ Innholdet som tilh\xF8rer de tilknyttede kodene vil bli vist. 3) Du kan\
\ ogs\xE5 s\xF8ke ved \xE5 bruke mer enn \xE9n kode ved \xE5 separere dem\
\ med komma (,).\nMer naturlig:\nDefinisjon: (Wikipedia, 2008-08-07). Arbeid\
\ med koder p\xE5 factline-plattformen: Hvis systemet eller plattformadministratoren\
\ har aktivert denne funksjonen, har du muligheten til \xE5 opprette koder.\
\ Koder er organisert som mapper. 1) Du kan knytte faktene dine til s\xE5\
\ mange koder du \xF8nsker. 2) S\xF8k etter koder med 'factlist & search'.\
\ Innholdet som er knyttet til kodene vil bli vist. 3) Du kan ogs\xE5 s\xF8\
ke ved \xE5 bruke flere koder samtidig ved \xE5 separere dem med komma (,).\r"
chatbot_author: H2O.ai
chatbot_name: h2oGPT
data_sample: 1.0
data_sample_choice:
- Train
- Validation
limit_chained_samples: false
mask_prompt_labels: true
parent_id_column: None
personalize: false
prompt_column:
- 'Oversett til Norsk:
Definition:. (Wikipedia, 2008-08-07). Working with Tags on the factline-platform:.
If your system or platform administrator activated this , you have the possibility
to create tags.. In fact tags they are organised like folders.. 1) It is possible
to link your facts to as many tags you want.. 2) Search for tags with "factlist
& search". The content belonging to the linked tags will be shown.. 3) Also
search using more than one tag by separating them with a comma (,).'
system_column: None
text_answer_separator: <|answer|>
text_prompt_start: <|prompt|>
text_system_start: <|system|>
train_dataframe: /fp/projects01/ec281/h2o-llmstudio/data/user/en-nb-15k/en-nb-15k.csv
validation_dataframe: None
validation_size: 0.04
validation_strategy: automatic
environment:
compile_model: false
deepspeed_reduce_bucket_size: 1000000
deepspeed_stage3_param_persistence_threshold: 1000000
deepspeed_stage3_prefetch_bucket_size: 1000000
find_unused_parameters: false
gpus:
- '0'
huggingface_branch: main
mixed_precision: true
number_of_workers: 8
seed: -1
trust_remote_code: true
use_deepspeed: false
experiment_name: mist-lang
llm_backbone: mistralai/Mistral-7B-v0.1
logging:
logger: None
neptune_project: ''
output_directory: /fp/projects01/ec281/h2o-llmstudio/output/user/mist-lang/
prediction:
batch_size_inference: 0
do_sample: false
max_length_inference: 256
metric: Perplexity
metric_gpt_model: gpt-3.5-turbo-0301
min_length_inference: 2
num_beams: 1
num_history: 4
repetition_penalty: 1.2
stop_tokens: ''
temperature: 0.0
top_k: 0
top_p: 1.0
problem_type: text_causal_language_modeling
tokenizer:
add_prefix_space: false
add_prompt_answer_tokens: false
max_length: 2048
max_length_answer: 1024
max_length_prompt: 1024
padding_quantile: 1.0
use_fast: true
training:
batch_size: 6
differential_learning_rate: 1.0e-05
differential_learning_rate_layers: []
drop_last_batch: true
epochs: 4
evaluate_before_training: false
evaluation_epochs: 1.0
grad_accumulation: 1
gradient_clip: 0.0
learning_rate: 0.0001
lora: true
lora_alpha: 16
lora_dropout: 0.05
lora_r: 64
lora_target_modules: q_proj,k_proj,down_proj,v_proj,o_proj,gate_proj,up_proj
loss_function: TokenAveragedCrossEntropy
optimizer: AdamW
save_best_checkpoint: true
schedule: Cosine
train_validation_data: false
warmup_epochs: 0.1
weight_decay: 0.0