BuboGPT / eval_configs /mmgpt4_eval.yaml
ikuinen99's picture
update
e4bd7f9
model:
arch: mm_gpt4
model_type: pretrain_vicuna
freeze_imagebind: True
freeze_qformer: False
max_txt_len: 160
end_sym: "###"
low_resource: False
prompt_path: "prompts/alignment.txt"
prompt_template: '###Human: {} ###Assistant: '
ckpt: ['checkpoints/bubogpt_7b.pth',
# 'checkpoints/mmgpt2_stage1_audio.pth',
# 'checkpoints/mmgpt2_stage2_mm_5k.pth',
]
with_bind_head: False
use_blip_vision: True
joiner_cfg:
# NOTE: uncomment below to share qformer across modalities
# share_key: vision
vision:
feat_dim: 1408
post_dims: [768,]
num_query_token: 32
freeze_qformer: True
audio:
feat_dim: 768
datasets:
default: # Double check
vis_processor:
eval:
name: "imagebind_vision_eval"
image_size: 224
text_processor:
eval:
name: "imagebind_caption"
audio_processor:
eval:
name: "imagebind_audio_eval"
use_global: True
clip_duration: 5
clips_per_video: 6
run:
task: image_text_pretrain
evaluate: True