|
--- |
|
license: other |
|
--- |
|
|
|
- [wand](https://wandb.ai/open-assistant/supervised-finetuning/runs/2jfazjt9) (still internal, needs to be moved to public-sft) |
|
- checkpoint: 3319 steps |
|
|
|
## Note |
|
|
|
In order to load this model you need to install a pre-release version of the Huggingface transformers library. |
|
|
|
|
|
## Model Configuration |
|
``` |
|
llama2_13b_orca_8k: |
|
rng_seed: 0xe1291f1a |
|
use_custom_sampler: true |
|
sort_by_length: false |
|
dtype: fp16 |
|
log_dir: "llama2_log_13b_orca_8k" |
|
learning_rate: 1e-5 |
|
model_name: /mnt/data/llama2/Llama-2-13b-hf/ |
|
output_dir: llama2_13b_orca_8k |
|
deepspeed_config: configs/zero_config_pretrain.json |
|
weight_decay: 0.0 |
|
max_length: 8192 |
|
warmup_steps: 100 |
|
use_flash_attention: true |
|
gradient_checkpointing: true |
|
gradient_accumulation_steps: 8 |
|
per_device_train_batch_size: 2 |
|
per_device_eval_batch_size: 1 |
|
residual_dropout: 0.0 |
|
eval_steps: 200 |
|
save_steps: 1000 # (total steps: 3319) |
|
num_train_epochs: 1 |
|
save_total_limit: 4 |
|
superhot: true |
|
superhot_config: |
|
type: linear |
|
scale: 2 |
|
datasets: |
|
# Dataset Composition: |
|
# Tain (sampled): |
|
# orca-chat: 100.00% (188842) |
|
# fanfics: 100.00% (47760) |
|
# red_pajama: 25.00% (188262) |
|
# Valid: |
|
# orca-chat: 5000 (71.43%) |
|
# fanfics: 1000 (14.29%) |
|
# red_pajama: 1000 (14.29%) |
|
- orca-chat: |
|
max_val_set: 5000 |
|
- fanfics: |
|
max_chunk_size: 65535 |
|
max_val_set: 1000 |
|
- red_pajama: |
|
fraction: 0.25 |
|
max_val_set: 1000 |
|
max_chunk_size: 65535 |
|
peft_model: false |
|
``` |
|
|
|
|