Spaces:
Runtime error
Runtime error
model: | |
arch: mm_gpt4 | |
model_type: pretrain_vicuna | |
freeze_imagebind: True | |
freeze_qformer: False | |
max_txt_len: 160 | |
end_sym: "###" | |
low_resource: False | |
prompt_path: "prompts/alignment.txt" | |
prompt_template: '###Human: {} ###Assistant: ' | |
ckpt: [ | |
"bubogpt/output/mmgpt4_stage2_mm_blipvision_13b/20230701204/checkpoint_4.pth", | |
] | |
with_bind_head: False | |
use_blip_vision: True | |
proj_model: "checkpoints/prerained_minigpt4_13b.pth" | |
llama_model: "/mnt/bn/bykang/chixma/data/pretrained_models/vicuna-13b-v0/" | |
joiner_cfg: | |
# NOTE: uncomment below to share qformer across modalities | |
# share_key: vision | |
vision: | |
feat_dim: 1408 | |
post_dims: [768,] | |
num_query_token: 32 | |
freeze_qformer: True | |
audio: | |
feat_dim: 768 | |
datasets: | |
default: # Double check | |
vis_processor: | |
eval: | |
name: "imagebind_vision_eval" | |
image_size: 224 | |
text_processor: | |
eval: | |
name: "imagebind_caption" | |
audio_processor: | |
eval: | |
name: "imagebind_audio_eval" | |
# d2c18 | |
# clip_duration: 2 | |
# clips_per_video: 18 | |
# d5c6 | |
use_global: True | |
clip_duration: 5 | |
clips_per_video: 6 | |
run: | |
task: image_text_pretrain | |
evaluate: True | |