amigov1 / docs /index.html
asach's picture
Upload folder using huggingface_hub
d727a17
raw
history blame
4.47 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Training Commands</title>
<style>
body {
font-family: monospace;
margin: 0;
padding: 8px;
}
#command {
padding: 8px;
background-color: #f6f8fa;
border: 1px solid #d1d5da;
border-radius: 3px;
display: inline-block;
white-space: pre-wrap;
/* Allows the text to wrap */
word-break: break-all;
/* Breaks long words to fit container width */
}
button {
margin-left: 8px;
cursor: pointer;
}
</style>
</head>
<body>
<div id="command">torchrun --nproc_per_node=8 --master_port=&lt;YOUR PORT&gt; train.py \
--model_name_or_path "facebook/opt-6.7b" \
--data_path medalpaca_small.json \
--bf16 True \
--output_dir models \
--num_train_epochs 3 \
--per_device_train_batch_size 4 \
--per_device_eval_batch_size 4 \
--gradient_accumulation_steps 8 \
--evaluation_strategy "no" \
--save_strategy "steps" \
--save_steps 2000 \
--save_total_limit 1 \
--learning_rate 2e-5 \
--weight_decay 0. \
--warmup_ratio 0.03 \
--lr_scheduler_type "cosine" \
--logging_steps 1 \
--fsdp "full_shard auto_wrap" \
--fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \
--tf32 True</div>
<div>
<button onclick="changeCommand(0)">OPT 6.7B</button>
<button onclick="changeCommand(0)">OPT 13B</button>
<button onclick="changeCommand(1)">Alpaca 7B</button>
</div>
<script>
const commands = [
`torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \\
--model_name_or_path "facebook/opt-6.7b" \\
--data_path medalpaca_small.json \\
--bf16 True \\
--output_dir models \\
--num_train_epochs 3 \\
--per_device_train_batch_size 4 \\
--per_device_eval_batch_size 4 \\
--gradient_accumulation_steps 8 \\
--evaluation_strategy "no" \\
--save_strategy "steps" \\
--save_steps 2000 \\
--save_total_limit 1 \\
--learning_rate 2e-5 \\
--weight_decay 0. \\
--warmup_ratio 0.03 \\
--lr_scheduler_type "cosine" \\
--logging_steps 1 \\
--fsdp "full_shard auto_wrap" \\
--fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \\
--tf32 True`,
`torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \\
--model_name_or_path "facebook/opt-13b" \\
--data_path medalpaca_small.json \\
--bf16 True \\
--output_dir models \\
--num_train_epochs 3 \\
--per_device_train_batch_size 2 \\
--per_device_eval_batch_size 2 \\
--gradient_accumulation_steps 16 \\
--evaluation_strategy "no" \\
--save_strategy "steps" \\
--save_steps 2000 \\
--save_total_limit 1 \\
--learning_rate 2e-5 \\
--weight_decay 0. \\
--warmup_ratio 0.03 \\
--lr_scheduler_type "cosine" \\
--logging_steps 1 \\
--fsdp "full_shard auto_wrap" \\
--fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \\
--tf32 True`,
`torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \\
--model_name_or_path <PATH_TO_LLAMA_WEIGHTS> \\
--data_path medalpaca_small.json \\
--bf16 True \\
--output_dir models \\
--num_train_epochs 3 \\
--per_device_train_batch_size 4 \\
--per_device_eval_batch_size 4 \\
--gradient_accumulation_steps 8 \\
--evaluation_strategy "no" \\
--save_strategy "steps" \\
--save_steps 2000 \\
--save_total_limit 1 \\
--learning_rate 2e-5 \\
--weight_decay 0. \\
--warmup_ratio 0.03 \\
--lr_scheduler_type "cosine" \\
--logging_steps 1 \\
--fsdp "full_shard auto_wrap" \\
--fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer' \\
--tf32 True`,
];
function changeCommand(appIndex) {
document.getElementById("command").innerText = commands[appIndex];
}
</script>
</body>
</html>