File size: 1,980 Bytes
002bd9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/bash

# Check if the wandb name argument is provided
if [ "$#" -ne 1 ]; then
    echo "Usage: $0 <wandb_name>"
    exit 1
fi

WANDB_NAME="$1"

# Generate timestamp
TIMESTAMP=$(python -c "from datetime import datetime; print(datetime.now().strftime('%Y%m%d_%H%M%S'))")

# Create directories
mkdir -p ./exp/checkpoints/$TIMESTAMP
mkdir -p ./exp/logs/$TIMESTAMP

HYDRA_FULL_ERROR=1 \
accelerate launch --config_file amlt_configs/accelerate_deepspeed_config.local.yaml \
    -m src.train \
    train_data='[m3d_2d]' eval_data='[m3d_2d]' \
    +model=base_sca_multitask_v2 \
    training.do_train=True \
    training.do_eval=True \
    training.do_inference=True \
    +data.streaming=False \
    training.max_eval_samples=800 \
    training.max_steps=100000 \
    training.fp16=True \
    training.output_dir=./exp/checkpoints/$TIMESTAMP \
    training.output_log_dir=./exp/logs/$TIMESTAMP \
    model.cache_dir=/mnt/blob/weights/.model.cache/ \
    training.save_strategy=steps \
    training.save_steps=5000 \
    training.save_total_limit=3 \
    training.optim=adamw_torch \
    training.evaluate_before_train=True \
    training.per_device_train_batch_size=1 \
    training.evaluation_strategy=steps \
    training.eval_steps=5000 \
    training.logging_steps=500 \
    training.logging_first_step=True \
    training.dataloader_num_workers=4 \
    training.num_masks_per_sample=16 \
    wandb.project=train_script \
    wandb.name=$WANDB_NAME \
    model.num_caption_tokens=8 \
    model.additional_num_hidden_layers=12 \
    model.num_task_tokens=6 \
    training.lr_scheduler_type=cosine \
    model.lm_head_model_name_or_path=StanfordAIMI/RadLLaMA-7b \
    # model.lm_head_model_name_or_path=gpt2-large \
    training.learning_rate=1e-4 \
    training.weight_decay=1e-4 \
    training.warmup_steps=200 \
    training.warmup_ratio=0.33333333 \
    training.compute_metrics=True \
    +data_transforms=lsj-1_0-1_0 \
    model.sam_model_name_or_path=facebook/sam-vit-base