|
from typing import List, Optional |
|
|
|
MODEL_SELECTION_ID: str = "model_selection" |
|
MODEL_VERSION_SELECTION_ID: str = "model_version_selection" |
|
|
|
LOAD_IN_4_BIT_ID: str = "load_in_4bit" |
|
BNB_4BIT_QUANT_TYPE: str = "bnb_4bit_quant_type" |
|
BNB_4BIT_COMPUTE_DTYPE: str = "bnb_4bit_compute_dtype" |
|
BNB_4BIT_USE_DOUBLE_QUANT: str = "bnb_4bit_use_double_quant" |
|
|
|
DATASET_SELECTION_ID = "dataset_selection" |
|
DATASET_SHUFFLING_SEED = "dataset_seed" |
|
|
|
FLASH_ATTENTION_ID = "flash_attention" |
|
|
|
PAD_SIDE_ID = "pad_side" |
|
PAD_VALUE_ID = "pad_value" |
|
|
|
LORA_R_ID = "lora_r" |
|
LORA_ALPHA_ID = "lora_alpha" |
|
LORA_DROPOUT_ID = "lora_dropout" |
|
LORA_BIAS_ID = 'lora_bias' |
|
|
|
NUM_TRAIN_EPOCHS_ID = "num_train_epochs" |
|
MAX_STEPS_ID = "max_steps_id" |
|
LOGGING_STEPS_ID = "logging_steps" |
|
PER_DEVICE_TRAIN_BATCH_SIZE = "per_device_train_batch_size" |
|
SAVE_STRATEGY_ID = "save_strategy" |
|
GRADIENT_ACCUMULATION_STEPS_ID = "gradient_accumulation_steps" |
|
GRADIENT_CHECKPOINTING_ID = "gradient_checkpointing" |
|
LEARNING_RATE_ID = "learning_rate" |
|
MAX_GRAD_NORM_ID = "max_grad_norm" |
|
WARMUP_RATIO_ID = "warmup_ratio" |
|
LR_SCHEDULER_TYPE_ID = "lr_scheduler_type" |
|
|
|
OUTPUT_DIR_ID = "output_dir" |
|
|
|
PUSH_TO_HUB_ID = "push_to_hub" |
|
|
|
REPOSITORY_NAME_ID = "repo_id" |
|
|
|
REPORT_TO_ID = "report_to" |
|
|
|
README_ID = "readme" |
|
|
|
MAX_SEQ_LENGTH_ID = "max_seq_length" |
|
PACKING_ID = "packing" |
|
|
|
OPTIMIZER_ID = "optim" |
|
BETA1_ID = "adam_beta1" |
|
BETA2_ID = "adam_beta2" |
|
EPSILON_ID = "adam_epsilon" |
|
WEIGHT_DECAY_ID = "weight_decay" |
|
|
|
|
|
class FTDataSet: |
|
def __init__(self, path: str, dataset_split: Optional[str] = None): |
|
self.path = path |
|
self.dataset_split = dataset_split |
|
|
|
def __str__(self): |
|
return self.path |
|
|
|
|
|
deita_dataset = FTDataSet(path="HuggingFaceH4/deita-10k-v0-sft", dataset_split="train_sft") |
|
dolly = FTDataSet(path="philschmid/dolly-15k-oai-style", dataset_split="train") |
|
ultrachat_200k = FTDataSet(path="HuggingFaceH4/ultrachat_200k", dataset_split="train_sft") |
|
ft_datasets = [deita_dataset, dolly, ultrachat_200k] |
|
|
|
|
|
class Model: |
|
def __init__(self, name: str, versions: List[str]): |
|
self.name = name |
|
self.versions = versions |
|
|
|
def __str__(self): |
|
return self.name |
|
|
|
|
|
models: List[Model] = [] |
|
gemma = Model(name="google/gemma", versions=["7b", "2b"]) |
|
models.append(gemma) |
|
falcon = Model(name="tiiuae/falcon", versions=["7b"]) |
|
models.append(falcon) |
|
phi = Model(name="microsoft/phi", versions=["1_5", "1", "2"]) |
|
models.append(phi) |
|
llama = Model(name="meta-llama/Llama-2", versions=["7b", "7b-hf"]) |
|
models.append(llama) |
|
mistral = Model(name="mistralai/Mistral", versions=["7B-v0.1"]) |
|
models.append(mistral) |
|
tinyLlama = Model(name="TinyLlama/TinyLlama-1.1B", |
|
versions=['intermediate-step-1431k-3T', 'step-50K-105b', 'intermediate-step-240k-503b', |
|
'intermediate-step-715k-1.5T', 'intermediate-step-1195k-token-2.5T']) |
|
models.append(tinyLlama) |
|
|