DEEPSPEED_STRAT="deepspeed_stage_1"
GPU_DEVICES="auto"
ENABLE_WANDB=True

LAYER_COUNT=6
EMBED_DIM=2048

EMBED_SCALE=0.1
EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(".", "_")

WANDB_PREFIX=f"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}"
FILENAME_PREFIX=f"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}" import os
NOTEBOOK_DIR=os.path.dirname(os.path.abspath("__file__"))
CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, "../"))
PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, "../../../../"))
TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v5/"))
INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v5/"))

DIR_NAME=os.path.basename(NOTEBOOK_DIR) # Enwiki Stage 2 : Basic Instruct Tuning ## Tune 2 : Low ctx size (512), memory training

- Tune 2: Low ctx size (512), Training with instruction & input masked. 