File size: 2,609 Bytes
be10055
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# torchrun --nproc-per-node 1 train.py --cfg-path train_configs/minigptv2_finetune.yaml
model:
  arch: minigpt_v2
  model_type: pretrain
  max_txt_len: 1024
  image_size: 448
  end_sym: "</s>"
  llama_model: "llama-2-7b-chat-hf"
  ckpt: "/MiniGPT4-v2/checkpoints/checkpoint_stage3.pth"
  use_grad_checkpoint: True
  chat_template: True
  lora_r: 64
  lora_alpha: 16

datasets:
  grounding_SLAKE:   
    batch_size: 6
    vis_processor:
      train:
        name: "blip2_image_train"
        image_size: 448
    text_processor:
      train:
        name: "blip_caption"

  mimic_cxr:  
    batch_size: 6
    vis_processor:
      train:
        name: "blip2_image_train"
        image_size: 448
    text_processor:
      train:
        name: "blip_caption"

  radvqa:   
    batch_size: 6
    vis_processor:
      train:
        name: "blip2_image_train"
        image_size: 448
    text_processor:
      train:
        name: "blip_caption"

  # rsna:  
  #   batch_size: 6
  #   vis_processor:
  #     train:
  #       name: "blip2_image_train"
  #       image_size: 448
  #   text_processor:
  #     train:
  #       name: "blip_caption"

  # refer_rsna:  
  #   batch_size: 6
  #   vis_processor:
  #     train:
  #       name: "blip2_image_train"
  #       image_size: 448
  #   text_processor:
  #     train:
  #       name: "blip_caption"

  # identify_rsna:  
  #   batch_size: 6
  #   vis_processor:
  #     train:
  #       name: "blip2_image_train"
  #       image_size: 448
  #   text_processor:
  #     train:
  #       name: "blip_caption"

  nlst:   
    batch_size: 6
    vis_processor:
      train:
        name: "blip2_image_train"
        image_size: 448
    text_processor:
      train:
        name: "blip_caption"
        
  refer_nlst:  
    batch_size: 6
    vis_processor:
      train:
        name: "blip2_image_train"
        image_size: 448
    text_processor:
      train:
        name: "blip_caption"

  identify_nlst:  
    batch_size: 6
    vis_processor:
      train:
        name: "blip2_image_train"
        image_size: 448
    text_processor:
      train:
        name: "blip_caption"
      

run:
  task: image_text_pretrain
  lr_sched: "linear_warmup_cosine_lr"
  init_lr: 1e-5
  min_lr: 8e-5
  warmup_lr: 1e-6

  weight_decay: 0.05
  max_epoch: 100
  num_workers: 6
  warmup_steps: 1000
  iters_per_epoch: 1000
  
  seed: 42
  output_dir: "expermints_folder"

  amp: True
  resume_ckpt_path: null

  evaluate: False 
  train_splits: ["train"]

  device: "cuda"
  world_size: 1
  dist_url: "env://"
  distributed: True

  wandb_log: True
  job_name: minigptv2_finetune_final