model:
  arch: mm_gpt4

  # Imagebind
  freeze_imagebind: True

  # Q-Former
  freeze_qformer: True
  q_former_model: "checkpoints/blip2_pretrained_flant5xxl.pth"
  num_query_token: 32

  # Vicuna
  llama_model: "magicr/vicuna-7b"

  # generation configs
  prompt: ""

preprocess:
    vis_processor:
        train:
          name: "imagebind_vision_train"
          image_size: 224
        eval:
          name: "imagebind_vision_eval"
          image_size: 224
    text_processor:
        train:
          name: "imagebind_caption"
        eval:
          name: "imagebind_caption"