|
args=Namespace(checkpoint='pretrained/InternVL2-2B-AWQ', task='reasoning-image-val', outputs_dir='pretrained/InternVL2-2B-AWQ/eval_mm_niah/reasoning-image-val', num_gpus_per_rank=2) |
|
Start evaluation on task reasoning-image-val |
|
args=Namespace(checkpoint='pretrained/InternVL2-2B-AWQ', task='reasoning-image-val', outputs_dir='pretrained/InternVL2-2B-AWQ/eval_mm_niah/reasoning-image-val', num_gpus_per_rank=2) |
|
args=Namespace(checkpoint='pretrained/InternVL2-2B-AWQ', task='reasoning-image-val', outputs_dir='pretrained/InternVL2-2B-AWQ/eval_mm_niah/reasoning-image-val', num_gpus_per_rank=2) |
|
Start evaluation on task reasoning-image-val |
|
args=Namespace(checkpoint='pretrained/InternVL2-2B-AWQ', task='reasoning-image-val', outputs_dir='pretrained/InternVL2-2B-AWQ/eval_mm_niah/reasoning-image-val', num_gpus_per_rank=2) |
|
Start evaluation on task reasoning-image-val |
|
Start evaluation on task reasoning-image-val |
|
language_model.model.layers.0 4 |
|
language_model.model.layers.1 4 |
|
language_model.model.layers.2 4 |
|
language_model.model.layers.3 4 |
|
language_model.model.layers.4 4 |
|
language_model.model.layers.5 4 |
|
language_model.model.layers.6 4 |
|
language_model.model.layers.7 4 |
|
language_model.model.layers.8 4 |
|
language_model.model.layers.9 4 |
|
language_model.model.layers.10 4 |
|
language_model.model.layers.11 4 |
|
language_model.model.layers.12 4 |
|
language_model.model.layers.13 4 |
|
language_model.model.layers.14 4 |
|
language_model.model.layers.15 4 |
|
language_model.model.layers.16 4 |
|
language_model.model.layers.17 4 |
|
language_model.model.layers.18 4 |
|
language_model.model.layers.19 4 |
|
language_model.model.layers.20 4 |
|
language_model.model.layers.21 4 |
|
language_model.model.layers.22 4 |
|
language_model.model.layers.23 4 |
|
vision_model.encoder.layers.0 0 |
|
vision_model.encoder.layers.1 0 |
|
vision_model.encoder.layers.2 0 |
|
vision_model.encoder.layers.3 0 |
|
vision_model.encoder.layers.4 0 |
|
vision_model.encoder.layers.5 0 |
|
vision_model.encoder.layers.6 0 |
|
vision_model.encoder.layers.7 0 |
|
vision_model.encoder.layers.8 0 |
|
vision_model.encoder.layers.9 0 |
|
vision_model.encoder.layers.10 0 |
|
vision_model.encoder.layers.11 0 |
|
vision_model.encoder.layers.12 0 |
|
vision_model.encoder.layers.13 0 |
|
vision_model.encoder.layers.14 0 |
|
vision_model.encoder.layers.15 0 |
|
vision_model.encoder.layers.16 0 |
|
vision_model.encoder.layers.17 0 |
|
vision_model.encoder.layers.18 0 |
|
vision_model.encoder.layers.19 0 |
|
vision_model.encoder.layers.20 0 |
|
vision_model.encoder.layers.21 0 |
|
vision_model.encoder.layers.22 0 |
|
vision_model.encoder.layers.23 0 |
|
vision_model.embeddings 0 |
|
mlp1 0 |
|
language_model.model.tok_embeddings 4 |
|
language_model.model.norm 4 |
|
language_model.output 4 |
|
language_model.model.embed_tokens 4 |
|
language_model.lm_head 4 |
|
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored. |
|
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored. |
|
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored. |
|
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored. |
|
Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage() |
|
return self.fget.__get__(instance, owner)() |
|
Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage() |
|
return self.fget.__get__(instance, owner)() |
|
Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage() |
|
return self.fget.__get__(instance, owner)() |
|
Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage() |
|
return self.fget.__get__(instance, owner)() |
|
Loading checkpoint shards: 50%|โโโโโ | 1/2 [00:06<00:06, 6.05s/it]
Loading checkpoint shards: 100%|โโโโโโโโโโ| 2/2 [00:07<00:00, 3.20s/it]
Loading checkpoint shards: 100%|โโโโโโโโโโ| 2/2 [00:07<00:00, 3.63s/it] |
|
Some weights of the model checkpoint at pretrained/InternVL2-2B-AWQ were not used when initializing InternVLChatModel: ['language_model.model.layers.0.attention.wo.qweight', 'language_model.model.layers.0.attention.wo.qzeros', 'language_model.model.layers.0.attention.wo.scales', 'language_model.model.layers.0.attention.wqkv.qweight', 'language_model.model.layers.0.attention.wqkv.qzeros', 'language_model.model.layers.0.attention.wqkv.scales', 'language_model.model.layers.0.feed_forward.w1.qweight', 'language_model.model.layers.0.feed_forward.w1.qzeros', 'language_model.model.layers.0.feed_forward.w1.scales', 'language_model.model.layers.0.feed_forward.w2.qweight', 'language_model.model.layers.0.feed_forward.w2.qzeros', 'language_model.model.layers.0.feed_forward.w2.scales', 'language_model.model.layers.0.feed_forward.w3.qweight', 'language_model.model.layers.0.feed_forward.w3.qzeros', 'language_model.model.layers.0.feed_forward.w3.scales', 'language_model.model.layers.1.attention.wo.qweight', 'language_model.model.layers.1.attention.wo.qzeros', 'language_model.model.layers.1.attention.wo.scales', 'language_model.model.layers.1.attention.wqkv.qweight', 'language_model.model.layers.1.attention.wqkv.qzeros', 'language_model.model.layers.1.attention.wqkv.scales', 'language_model.model.layers.1.feed_forward.w1.qweight', 'language_model.model.layers.1.feed_forward.w1.qzeros', 'language_model.model.layers.1.feed_forward.w1.scales', 'language_model.model.layers.1.feed_forward.w2.qweight', 'language_model.model.layers.1.feed_forward.w2.qzeros', 'language_model.model.layers.1.feed_forward.w2.scales', 'language_model.model.layers.1.feed_forward.w3.qweight', 'language_model.model.layers.1.feed_forward.w3.qzeros', 'language_model.model.layers.1.feed_forward.w3.scales', 'language_model.model.layers.10.attention.wo.qweight', 'language_model.model.layers.10.attention.wo.qzeros', 'language_model.model.layers.10.attention.wo.scales', 'language_model.model.layers.10.attention.wqkv.qweight', 'language_model.model.layers.10.attention.wqkv.qzeros', 'language_model.model.layers.10.attention.wqkv.scales', 'language_model.model.layers.10.feed_forward.w1.qweight', 'language_model.model.layers.10.feed_forward.w1.qzeros', 'language_model.model.layers.10.feed_forward.w1.scales', 'language_model.model.layers.10.feed_forward.w2.qweight', 'language_model.model.layers.10.feed_forward.w2.qzeros', 'language_model.model.layers.10.feed_forward.w2.scales', 'language_model.model.layers.10.feed_forward.w3.qweight', 'language_model.model.layers.10.feed_forward.w3.qzeros', 'language_model.model.layers.10.feed_forward.w3.scales', 'language_model.model.layers.11.attention.wo.qweight', 'language_model.model.layers.11.attention.wo.qzeros', 'language_model.model.layers.11.attention.wo.scales', 'language_model.model.layers.11.attention.wqkv.qweight', 'language_model.model.layers.11.attention.wqkv.qzeros', 'language_model.model.layers.11.attention.wqkv.scales', 'language_model.model.layers.11.feed_forward.w1.qweight', 'language_model.model.layers.11.feed_forward.w1.qzeros', 'language_model.model.layers.11.feed_forward.w1.scales', 'language_model.model.layers.11.feed_forward.w2.qweight', 'language_model.model.layers.11.feed_forward.w2.qzeros', 'language_model.model.layers.11.feed_forward.w2.scales', 'language_model.model.layers.11.feed_forward.w3.qweight', 'language_model.model.layers.11.feed_forward.w3.qzeros', 'language_model.model.layers.11.feed_forward.w3.scales', 'language_model.model.layers.12.attention.wo.qweight', 'language_model.model.layers.12.attention.wo.qzeros', 'language_model.model.layers.12.attention.wo.scales', 'language_model.model.layers.12.attention.wqkv.qweight', 'language_model.model.layers.12.attention.wqkv.qzeros', 'language_model.model.layers.12.attention.wqkv.scales', 'language_model.model.layers.12.feed_forward.w1.qweight', 'language_model.model.layers.12.feed_forward.w1.qzeros', 'language_model.model.layers.12.feed_forward.w1.scales', 'language_model.model.layers.12.feed_forward.w2.qweight', 'language_model.model.layers.12.feed_forward.w2.qzeros', 'language_model.model.layers.12.feed_forward.w2.scales', 'language_model.model.layers.12.feed_forward.w3.qweight', 'language_model.model.layers.12.feed_forward.w3.qzeros', 'language_model.model.layers.12.feed_forward.w3.scales', 'language_model.model.layers.13.attention.wo.qweight', 'language_model.model.layers.13.attention.wo.qzeros', 'language_model.model.layers.13.attention.wo.scales', 'language_model.model.layers.13.attention.wqkv.qweight', 'language_model.model.layers.13.attention.wqkv.qzeros', 'language_model.model.layers.13.attention.wqkv.scales', 'language_model.model.layers.13.feed_forward.w1.qweight', 'language_model.model.layers.13.feed_forward.w1.qzeros', 'language_model.model.layers.13.feed_forward.w1.scales', 'language_model.model.layers.13.feed_forward.w2.qweight', 'language_model.model.layers.13.feed_forward.w2.qzeros', 'language_model.model.layers.13.feed_forward.w2.scales', 'language_model.model.layers.13.feed_forward.w3.qweight', 'language_model.model.layers.13.feed_forward.w3.qzeros', 'language_model.model.layers.13.feed_forward.w3.scales', 'language_model.model.layers.14.attention.wo.qweight', 'language_model.model.layers.14.attention.wo.qzeros', 'language_model.model.layers.14.attention.wo.scales', 'language_model.model.layers.14.attention.wqkv.qweight', 'language_model.model.layers.14.attention.wqkv.qzeros', 'language_model.model.layers.14.attention.wqkv.scales', 'language_model.model.layers.14.feed_forward.w1.qweight', 'language_model.model.layers.14.feed_forward.w1.qzeros', 'language_model.model.layers.14.feed_forward.w1.scales', 'language_model.model.layers.14.feed_forward.w2.qweight', 'language_model.model.layers.14.feed_forward.w2.qzeros', 'language_model.model.layers.14.feed_forward.w2.scales', 'language_model.model.layers.14.feed_forward.w3.qweight', 'language_model.model.layers.14.feed_forward.w3.qzeros', 'language_model.model.layers.14.feed_forward.w3.scales', 'language_model.model.layers.15.attention.wo.qweight', 'language_model.model.layers.15.attention.wo.qzeros', 'language_model.model.layers.15.attention.wo.scales', 'language_model.model.layers.15.attention.wqkv.qweight', 'language_model.model.layers.15.attention.wqkv.qzeros', 'language_model.model.layers.15.attention.wqkv.scales', 'language_model.model.layers.15.feed_forward.w1.qweight', 'language_model.model.layers.15.feed_forward.w1.qzeros', 'language_model.model.layers.15.feed_forward.w1.scales', 'language_model.model.layers.15.feed_forward.w2.qweight', 'language_model.model.layers.15.feed_forward.w2.qzeros', 'language_model.model.layers.15.feed_forward.w2.scales', 'language_model.model.layers.15.feed_forward.w3.qweight', 'language_model.model.layers.15.feed_forward.w3.qzeros', 'language_model.model.layers.15.feed_forward.w3.scales', 'language_model.model.layers.16.attention.wo.qweight', 'language_model.model.layers.16.attention.wo.qzeros', 'language_model.model.layers.16.attention.wo.scales', 'language_model.model.layers.16.attention.wqkv.qweight', 'language_model.model.layers.16.attention.wqkv.qzeros', 'language_model.model.layers.16.attention.wqkv.scales', 'language_model.model.layers.16.feed_forward.w1.qweight', 'language_model.model.layers.16.feed_forward.w1.qzeros', 'language_model.model.layers.16.feed_forward.w1.scales', 'language_model.model.layers.16.feed_forward.w2.qweight', 'language_model.model.layers.16.feed_forward.w2.qzeros', 'language_model.model.layers.16.feed_forward.w2.scales', 'language_model.model.layers.16.feed_forward.w3.qweight', 'language_model.model.layers.16.feed_forward.w3.qzeros', 'language_model.model.layers.16.feed_forward.w3.scales', 'language_model.model.layers.17.attention.wo.qweight', 'language_model.model.layers.17.attention.wo.qzeros', 'language_model.model.layers.17.attention.wo.scales', 'language_model.model.layers.17.attention.wqkv.qweight', 'language_model.model.layers.17.attention.wqkv.qzeros', 'language_model.model.layers.17.attention.wqkv.scales', 'language_model.model.layers.17.feed_forward.w1.qweight', 'language_model.model.layers.17.feed_forward.w1.qzeros', 'language_model.model.layers.17.feed_forward.w1.scales', 'language_model.model.layers.17.feed_forward.w2.qweight', 'language_model.model.layers.17.feed_forward.w2.qzeros', 'language_model.model.layers.17.feed_forward.w2.scales', 'language_model.model.layers.17.feed_forward.w3.qweight', 'language_model.model.layers.17.feed_forward.w3.qzeros', 'language_model.model.layers.17.feed_forward.w3.scales', 'language_model.model.layers.18.attention.wo.qweight', 'language_model.model.layers.18.attention.wo.qzeros', 'language_model.model.layers.18.attention.wo.scales', 'language_model.model.layers.18.attention.wqkv.qweight', 'language_model.model.layers.18.attention.wqkv.qzeros', 'language_model.model.layers.18.attention.wqkv.scales', 'language_model.model.layers.18.feed_forward.w1.qweight', 'language_model.model.layers.18.feed_forward.w1.qzeros', 'language_model.model.layers.18.feed_forward.w1.scales', 'language_model.model.layers.18.feed_forward.w2.qweight', 'language_model.model.layers.18.feed_forward.w2.qzeros', 'language_model.model.layers.18.feed_forward.w2.scales', 'language_model.model.layers.18.feed_forward.w3.qweight', 'language_model.model.layers.18.feed_forward.w3.qzeros', 'language_model.model.layers.18.feed_forward.w3.scales', 'language_model.model.layers.19.attention.wo.qweight', 'language_model.model.layers.19.attention.wo.qzeros', 'language_model.model.layers.19.attention.wo.scales', 'language_model.model.layers.19.attention.wqkv.qweight', 'language_model.model.layers.19.attention.wqkv.qzeros', 'language_model.model.layers.19.attention.wqkv.scales', 'language_model.model.layers.19.feed_forward.w1.qweight', 'language_model.model.layers.19.feed_forward.w1.qzeros', 'language_model.model.layers.19.feed_forward.w1.scales', 'language_model.model.layers.19.feed_forward.w2.qweight', 'language_model.model.layers.19.feed_forward.w2.qzeros', 'language_model.model.layers.19.feed_forward.w2.scales', 'language_model.model.layers.19.feed_forward.w3.qweight', 'language_model.model.layers.19.feed_forward.w3.qzeros', 'language_model.model.layers.19.feed_forward.w3.scales', 'language_model.model.layers.2.attention.wo.qweight', 'language_model.model.layers.2.attention.wo.qzeros', 'language_model.model.layers.2.attention.wo.scales', 'language_model.model.layers.2.attention.wqkv.qweight', 'language_model.model.layers.2.attention.wqkv.qzeros', 'language_model.model.layers.2.attention.wqkv.scales', 'language_model.model.layers.2.feed_forward.w1.qweight', 'language_model.model.layers.2.feed_forward.w1.qzeros', 'language_model.model.layers.2.feed_forward.w1.scales', 'language_model.model.layers.2.feed_forward.w2.qweight', 'language_model.model.layers.2.feed_forward.w2.qzeros', 'language_model.model.layers.2.feed_forward.w2.scales', 'language_model.model.layers.2.feed_forward.w3.qweight', 'language_model.model.layers.2.feed_forward.w3.qzeros', 'language_model.model.layers.2.feed_forward.w3.scales', 'language_model.model.layers.20.attention.wo.qweight', 'language_model.model.layers.20.attention.wo.qzeros', 'language_model.model.layers.20.attention.wo.scales', 'language_model.model.layers.20.attention.wqkv.qweight', 'language_model.model.layers.20.attention.wqkv.qzeros', 'language_model.model.layers.20.attention.wqkv.scales', 'language_model.model.layers.20.feed_forward.w1.qweight', 'language_model.model.layers.20.feed_forward.w1.qzeros', 'language_model.model.layers.20.feed_forward.w1.scales', 'language_model.model.layers.20.feed_forward.w2.qweight', 'language_model.model.layers.20.feed_forward.w2.qzeros', 'language_model.model.layers.20.feed_forward.w2.scales', 'language_model.model.layers.20.feed_forward.w3.qweight', 'language_model.model.layers.20.feed_forward.w3.qzeros', 'language_model.model.layers.20.feed_forward.w3.scales', 'language_model.model.layers.21.attention.wo.qweight', 'language_model.model.layers.21.attention.wo.qzeros', 'language_model.model.layers.21.attention.wo.scales', 'language_model.model.layers.21.attention.wqkv.qweight', 'language_model.model.layers.21.attention.wqkv.qzeros', 'language_model.model.layers.21.attention.wqkv.scales', 'language_model.model.layers.21.feed_forward.w1.qweight', 'language_model.model.layers.21.feed_forward.w1.qzeros', 'language_model.model.layers.21.feed_forward.w1.scales', 'language_model.model.layers.21.feed_forward.w2.qweight', 'language_model.model.layers.21.feed_forward.w2.qzeros', 'language_model.model.layers.21.feed_forward.w2.scales', 'language_model.model.layers.21.feed_forward.w3.qweight', 'language_model.model.layers.21.feed_forward.w3.qzeros', 'language_model.model.layers.21.feed_forward.w3.scales', 'language_model.model.layers.22.attention.wo.qweight', 'language_model.model.layers.22.attention.wo.qzeros', 'language_model.model.layers.22.attention.wo.scales', 'language_model.model.layers.22.attention.wqkv.qweight', 'language_model.model.layers.22.attention.wqkv.qzeros', 'language_model.model.layers.22.attention.wqkv.scales', 'language_model.model.layers.22.feed_forward.w1.qweight', 'language_model.model.layers.22.feed_forward.w1.qzeros', 'language_model.model.layers.22.feed_forward.w1.scales', 'language_model.model.layers.22.feed_forward.w2.qweight', 'language_model.model.layers.22.feed_forward.w2.qzeros', 'language_model.model.layers.22.feed_forward.w2.scales', 'language_model.model.layers.22.feed_forward.w3.qweight', 'language_model.model.layers.22.feed_forward.w3.qzeros', 'language_model.model.layers.22.feed_forward.w3.scales', 'language_model.model.layers.23.attention.wo.qweight', 'language_model.model.layers.23.attention.wo.qzeros', 'language_model.model.layers.23.attention.wo.scales', 'language_model.model.layers.23.attention.wqkv.qweight', 'language_model.model.layers.23.attention.wqkv.qzeros', 'language_model.model.layers.23.attention.wqkv.scales', 'language_model.model.layers.23.feed_forward.w1.qweight', 'language_model.model.layers.23.feed_forward.w1.qzeros', 'language_model.model.layers.23.feed_forward.w1.scales', 'language_model.model.layers.23.feed_forward.w2.qweight', 'language_model.model.layers.23.feed_forward.w2.qzeros', 'language_model.model.layers.23.feed_forward.w2.scales', 'language_model.model.layers.23.feed_forward.w3.qweight', 'language_model.model.layers.23.feed_forward.w3.qzeros', 'language_model.model.layers.23.feed_forward.w3.scales', 'language_model.model.layers.3.attention.wo.qweight', 'language_model.model.layers.3.attention.wo.qzeros', 'language_model.model.layers.3.attention.wo.scales', 'language_model.model.layers.3.attention.wqkv.qweight', 'language_model.model.layers.3.attention.wqkv.qzeros', 'language_model.model.layers.3.attention.wqkv.scales', 'language_model.model.layers.3.feed_forward.w1.qweight', 'language_model.model.layers.3.feed_forward.w1.qzeros', 'language_model.model.layers.3.feed_forward.w1.scales', 'language_model.model.layers.3.feed_forward.w2.qweight', 'language_model.model.layers.3.feed_forward.w2.qzeros', 'language_model.model.layers.3.feed_forward.w2.scales', 'language_model.model.layers.3.feed_forward.w3.qweight', 'language_model.model.layers.3.feed_forward.w3.qzeros', 'language_model.model.layers.3.feed_forward.w3.scales', 'language_model.model.layers.4.attention.wo.qweight', 'language_model.model.layers.4.attention.wo.qzeros', 'language_model.model.layers.4.attention.wo.scales', 'language_model.model.layers.4.attention.wqkv.qweight', 'language_model.model.layers.4.attention.wqkv.qzeros', 'language_model.model.layers.4.attention.wqkv.scales', 'language_model.model.layers.4.feed_forward.w1.qweight', 'language_model.model.layers.4.feed_forward.w1.qzeros', 'language_model.model.layers.4.feed_forward.w1.scales', 'language_model.model.layers.4.feed_forward.w2.qweight', 'language_model.model.layers.4.feed_forward.w2.qzeros', 'language_model.model.layers.4.feed_forward.w2.scales', 'language_model.model.layers.4.feed_forward.w3.qweight', 'language_model.model.layers.4.feed_forward.w3.qzeros', 'language_model.model.layers.4.feed_forward.w3.scales', 'language_model.model.layers.5.attention.wo.qweight', 'language_model.model.layers.5.attention.wo.qzeros', 'language_model.model.layers.5.attention.wo.scales', 'language_model.model.layers.5.attention.wqkv.qweight', 'language_model.model.layers.5.attention.wqkv.qzeros', 'language_model.model.layers.5.attention.wqkv.scales', 'language_model.model.layers.5.feed_forward.w1.qweight', 'language_model.model.layers.5.feed_forward.w1.qzeros', 'language_model.model.layers.5.feed_forward.w1.scales', 'language_model.model.layers.5.feed_forward.w2.qweight', 'language_model.model.layers.5.feed_forward.w2.qzeros', 'language_model.model.layers.5.feed_forward.w2.scales', 'language_model.model.layers.5.feed_forward.w3.qweight', 'language_model.model.layers.5.feed_forward.w3.qzeros', 'language_model.model.layers.5.feed_forward.w3.scales', 'language_model.model.layers.6.attention.wo.qweight', 'language_model.model.layers.6.attention.wo.qzeros', 'language_model.model.layers.6.attention.wo.scales', 'language_model.model.layers.6.attention.wqkv.qweight', 'language_model.model.layers.6.attention.wqkv.qzeros', 'language_model.model.layers.6.attention.wqkv.scales', 'language_model.model.layers.6.feed_forward.w1.qweight', 'language_model.model.layers.6.feed_forward.w1.qzeros', 'language_model.model.layers.6.feed_forward.w1.scales', 'language_model.model.layers.6.feed_forward.w2.qweight', 'language_model.model.layers.6.feed_forward.w2.qzeros', 'language_model.model.layers.6.feed_forward.w2.scales', 'language_model.model.layers.6.feed_forward.w3.qweight', 'language_model.model.layers.6.feed_forward.w3.qzeros', 'language_model.model.layers.6.feed_forward.w3.scales', 'language_model.model.layers.7.attention.wo.qweight', 'language_model.model.layers.7.attention.wo.qzeros', 'language_model.model.layers.7.attention.wo.scales', 'language_model.model.layers.7.attention.wqkv.qweight', 'language_model.model.layers.7.attention.wqkv.qzeros', 'language_model.model.layers.7.attention.wqkv.scales', 'language_model.model.layers.7.feed_forward.w1.qweight', 'language_model.model.layers.7.feed_forward.w1.qzeros', 'language_model.model.layers.7.feed_forward.w1.scales', 'language_model.model.layers.7.feed_forward.w2.qweight', 'language_model.model.layers.7.feed_forward.w2.qzeros', 'language_model.model.layers.7.feed_forward.w2.scales', 'language_model.model.layers.7.feed_forward.w3.qweight', 'language_model.model.layers.7.feed_forward.w3.qzeros', 'language_model.model.layers.7.feed_forward.w3.scales', 'language_model.model.layers.8.attention.wo.qweight', 'language_model.model.layers.8.attention.wo.qzeros', 'language_model.model.layers.8.attention.wo.scales', 'language_model.model.layers.8.attention.wqkv.qweight', 'language_model.model.layers.8.attention.wqkv.qzeros', 'language_model.model.layers.8.attention.wqkv.scales', 'language_model.model.layers.8.feed_forward.w1.qweight', 'language_model.model.layers.8.feed_forward.w1.qzeros', 'language_model.model.layers.8.feed_forward.w1.scales', 'language_model.model.layers.8.feed_forward.w2.qweight', 'language_model.model.layers.8.feed_forward.w2.qzeros', 'language_model.model.layers.8.feed_forward.w2.scales', 'language_model.model.layers.8.feed_forward.w3.qweight', 'language_model.model.layers.8.feed_forward.w3.qzeros', 'language_model.model.layers.8.feed_forward.w3.scales', 'language_model.model.layers.9.attention.wo.qweight', 'language_model.model.layers.9.attention.wo.qzeros', 'language_model.model.layers.9.attention.wo.scales', 'language_model.model.layers.9.attention.wqkv.qweight', 'language_model.model.layers.9.attention.wqkv.qzeros', 'language_model.model.layers.9.attention.wqkv.scales', 'language_model.model.layers.9.feed_forward.w1.qweight', 'language_model.model.layers.9.feed_forward.w1.qzeros', 'language_model.model.layers.9.feed_forward.w1.scales', 'language_model.model.layers.9.feed_forward.w2.qweight', 'language_model.model.layers.9.feed_forward.w2.qzeros', 'language_model.model.layers.9.feed_forward.w2.scales', 'language_model.model.layers.9.feed_forward.w3.qweight', 'language_model.model.layers.9.feed_forward.w3.qzeros', 'language_model.model.layers.9.feed_forward.w3.scales'] |
|
- This IS expected if you are initializing InternVLChatModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). |
|
- This IS NOT expected if you are initializing InternVLChatModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). |
|
Some weights of InternVLChatModel were not initialized from the model checkpoint at pretrained/InternVL2-2B-AWQ and are newly initialized: ['language_model.model.layers.0.attention.wo.weight', 'language_model.model.layers.0.attention.wqkv.weight', 'language_model.model.layers.0.feed_forward.w1.weight', 'language_model.model.layers.0.feed_forward.w2.weight', 'language_model.model.layers.0.feed_forward.w3.weight', 'language_model.model.layers.1.attention.wo.weight', 'language_model.model.layers.1.attention.wqkv.weight', 'language_model.model.layers.1.feed_forward.w1.weight', 'language_model.model.layers.1.feed_forward.w2.weight', 'language_model.model.layers.1.feed_forward.w3.weight', 'language_model.model.layers.10.attention.wo.weight', 'language_model.model.layers.10.attention.wqkv.weight', 'language_model.model.layers.10.feed_forward.w1.weight', 'language_model.model.layers.10.feed_forward.w2.weight', 'language_model.model.layers.10.feed_forward.w3.weight', 'language_model.model.layers.11.attention.wo.weight', 'language_model.model.layers.11.attention.wqkv.weight', 'language_model.model.layers.11.feed_forward.w1.weight', 'language_model.model.layers.11.feed_forward.w2.weight', 'language_model.model.layers.11.feed_forward.w3.weight', 'language_model.model.layers.12.attention.wo.weight', 'language_model.model.layers.12.attention.wqkv.weight', 'language_model.model.layers.12.feed_forward.w1.weight', 'language_model.model.layers.12.feed_forward.w2.weight', 'language_model.model.layers.12.feed_forward.w3.weight', 'language_model.model.layers.13.attention.wo.weight', 'language_model.model.layers.13.attention.wqkv.weight', 'language_model.model.layers.13.feed_forward.w1.weight', 'language_model.model.layers.13.feed_forward.w2.weight', 'language_model.model.layers.13.feed_forward.w3.weight', 'language_model.model.layers.14.attention.wo.weight', 'language_model.model.layers.14.attention.wqkv.weight', 'language_model.model.layers.14.feed_forward.w1.weight', 'language_model.model.layers.14.feed_forward.w2.weight', 'language_model.model.layers.14.feed_forward.w3.weight', 'language_model.model.layers.15.attention.wo.weight', 'language_model.model.layers.15.attention.wqkv.weight', 'language_model.model.layers.15.feed_forward.w1.weight', 'language_model.model.layers.15.feed_forward.w2.weight', 'language_model.model.layers.15.feed_forward.w3.weight', 'language_model.model.layers.16.attention.wo.weight', 'language_model.model.layers.16.attention.wqkv.weight', 'language_model.model.layers.16.feed_forward.w1.weight', 'language_model.model.layers.16.feed_forward.w2.weight', 'language_model.model.layers.16.feed_forward.w3.weight', 'language_model.model.layers.17.attention.wo.weight', 'language_model.model.layers.17.attention.wqkv.weight', 'language_model.model.layers.17.feed_forward.w1.weight', 'language_model.model.layers.17.feed_forward.w2.weight', 'language_model.model.layers.17.feed_forward.w3.weight', 'language_model.model.layers.18.attention.wo.weight', 'language_model.model.layers.18.attention.wqkv.weight', 'language_model.model.layers.18.feed_forward.w1.weight', 'language_model.model.layers.18.feed_forward.w2.weight', 'language_model.model.layers.18.feed_forward.w3.weight', 'language_model.model.layers.19.attention.wo.weight', 'language_model.model.layers.19.attention.wqkv.weight', 'language_model.model.layers.19.feed_forward.w1.weight', 'language_model.model.layers.19.feed_forward.w2.weight', 'language_model.model.layers.19.feed_forward.w3.weight', 'language_model.model.layers.2.attention.wo.weight', 'language_model.model.layers.2.attention.wqkv.weight', 'language_model.model.layers.2.feed_forward.w1.weight', 'language_model.model.layers.2.feed_forward.w2.weight', 'language_model.model.layers.2.feed_forward.w3.weight', 'language_model.model.layers.20.attention.wo.weight', 'language_model.model.layers.20.attention.wqkv.weight', 'language_model.model.layers.20.feed_forward.w1.weight', 'language_model.model.layers.20.feed_forward.w2.weight', 'language_model.model.layers.20.feed_forward.w3.weight', 'language_model.model.layers.21.attention.wo.weight', 'language_model.model.layers.21.attention.wqkv.weight', 'language_model.model.layers.21.feed_forward.w1.weight', 'language_model.model.layers.21.feed_forward.w2.weight', 'language_model.model.layers.21.feed_forward.w3.weight', 'language_model.model.layers.22.attention.wo.weight', 'language_model.model.layers.22.attention.wqkv.weight', 'language_model.model.layers.22.feed_forward.w1.weight', 'language_model.model.layers.22.feed_forward.w2.weight', 'language_model.model.layers.22.feed_forward.w3.weight', 'language_model.model.layers.23.attention.wo.weight', 'language_model.model.layers.23.attention.wqkv.weight', 'language_model.model.layers.23.feed_forward.w1.weight', 'language_model.model.layers.23.feed_forward.w2.weight', 'language_model.model.layers.23.feed_forward.w3.weight', 'language_model.model.layers.3.attention.wo.weight', 'language_model.model.layers.3.attention.wqkv.weight', 'language_model.model.layers.3.feed_forward.w1.weight', 'language_model.model.layers.3.feed_forward.w2.weight', 'language_model.model.layers.3.feed_forward.w3.weight', 'language_model.model.layers.4.attention.wo.weight', 'language_model.model.layers.4.attention.wqkv.weight', 'language_model.model.layers.4.feed_forward.w1.weight', 'language_model.model.layers.4.feed_forward.w2.weight', 'language_model.model.layers.4.feed_forward.w3.weight', 'language_model.model.layers.5.attention.wo.weight', 'language_model.model.layers.5.attention.wqkv.weight', 'language_model.model.layers.5.feed_forward.w1.weight', 'language_model.model.layers.5.feed_forward.w2.weight', 'language_model.model.layers.5.feed_forward.w3.weight', 'language_model.model.layers.6.attention.wo.weight', 'language_model.model.layers.6.attention.wqkv.weight', 'language_model.model.layers.6.feed_forward.w1.weight', 'language_model.model.layers.6.feed_forward.w2.weight', 'language_model.model.layers.6.feed_forward.w3.weight', 'language_model.model.layers.7.attention.wo.weight', 'language_model.model.layers.7.attention.wqkv.weight', 'language_model.model.layers.7.feed_forward.w1.weight', 'language_model.model.layers.7.feed_forward.w2.weight', 'language_model.model.layers.7.feed_forward.w3.weight', 'language_model.model.layers.8.attention.wo.weight', 'language_model.model.layers.8.attention.wqkv.weight', 'language_model.model.layers.8.feed_forward.w1.weight', 'language_model.model.layers.8.feed_forward.w2.weight', 'language_model.model.layers.8.feed_forward.w3.weight', 'language_model.model.layers.9.attention.wo.weight', 'language_model.model.layers.9.attention.wqkv.weight', 'language_model.model.layers.9.feed_forward.w1.weight', 'language_model.model.layers.9.feed_forward.w2.weight', 'language_model.model.layers.9.feed_forward.w3.weight'] |
|
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. |
|
Loading checkpoint shards: 50%|โโโโโ | 1/2 [00:06<00:06, 6.04s/it]
Loading checkpoint shards: 100%|โโโโโโโโโโ| 2/2 [00:07<00:00, 3.21s/it]
Loading checkpoint shards: 100%|โโโโโโโโโโ| 2/2 [00:07<00:00, 3.63s/it] |
|
Some weights of the model checkpoint at pretrained/InternVL2-2B-AWQ were not used when initializing InternVLChatModel: ['language_model.model.layers.0.attention.wo.qweight', 'language_model.model.layers.0.attention.wo.qzeros', 'language_model.model.layers.0.attention.wo.scales', 'language_model.model.layers.0.attention.wqkv.qweight', 'language_model.model.layers.0.attention.wqkv.qzeros', 'language_model.model.layers.0.attention.wqkv.scales', 'language_model.model.layers.0.feed_forward.w1.qweight', 'language_model.model.layers.0.feed_forward.w1.qzeros', 'language_model.model.layers.0.feed_forward.w1.scales', 'language_model.model.layers.0.feed_forward.w2.qweight', 'language_model.model.layers.0.feed_forward.w2.qzeros', 'language_model.model.layers.0.feed_forward.w2.scales', 'language_model.model.layers.0.feed_forward.w3.qweight', 'language_model.model.layers.0.feed_forward.w3.qzeros', 'language_model.model.layers.0.feed_forward.w3.scales', 'language_model.model.layers.1.attention.wo.qweight', 'language_model.model.layers.1.attention.wo.qzeros', 'language_model.model.layers.1.attention.wo.scales', 'language_model.model.layers.1.attention.wqkv.qweight', 'language_model.model.layers.1.attention.wqkv.qzeros', 'language_model.model.layers.1.attention.wqkv.scales', 'language_model.model.layers.1.feed_forward.w1.qweight', 'language_model.model.layers.1.feed_forward.w1.qzeros', 'language_model.model.layers.1.feed_forward.w1.scales', 'language_model.model.layers.1.feed_forward.w2.qweight', 'language_model.model.layers.1.feed_forward.w2.qzeros', 'language_model.model.layers.1.feed_forward.w2.scales', 'language_model.model.layers.1.feed_forward.w3.qweight', 'language_model.model.layers.1.feed_forward.w3.qzeros', 'language_model.model.layers.1.feed_forward.w3.scales', 'language_model.model.layers.10.attention.wo.qweight', 'language_model.model.layers.10.attention.wo.qzeros', 'language_model.model.layers.10.attention.wo.scales', 'language_model.model.layers.10.attention.wqkv.qweight', 'language_model.model.layers.10.attention.wqkv.qzeros', 'language_model.model.layers.10.attention.wqkv.scales', 'language_model.model.layers.10.feed_forward.w1.qweight', 'language_model.model.layers.10.feed_forward.w1.qzeros', 'language_model.model.layers.10.feed_forward.w1.scales', 'language_model.model.layers.10.feed_forward.w2.qweight', 'language_model.model.layers.10.feed_forward.w2.qzeros', 'language_model.model.layers.10.feed_forward.w2.scales', 'language_model.model.layers.10.feed_forward.w3.qweight', 'language_model.model.layers.10.feed_forward.w3.qzeros', 'language_model.model.layers.10.feed_forward.w3.scales', 'language_model.model.layers.11.attention.wo.qweight', 'language_model.model.layers.11.attention.wo.qzeros', 'language_model.model.layers.11.attention.wo.scales', 'language_model.model.layers.11.attention.wqkv.qweight', 'language_model.model.layers.11.attention.wqkv.qzeros', 'language_model.model.layers.11.attention.wqkv.scales', 'language_model.model.layers.11.feed_forward.w1.qweight', 'language_model.model.layers.11.feed_forward.w1.qzeros', 'language_model.model.layers.11.feed_forward.w1.scales', 'language_model.model.layers.11.feed_forward.w2.qweight', 'language_model.model.layers.11.feed_forward.w2.qzeros', 'language_model.model.layers.11.feed_forward.w2.scales', 'language_model.model.layers.11.feed_forward.w3.qweight', 'language_model.model.layers.11.feed_forward.w3.qzeros', 'language_model.model.layers.11.feed_forward.w3.scales', 'language_model.model.layers.12.attention.wo.qweight', 'language_model.model.layers.12.attention.wo.qzeros', 'language_model.model.layers.12.attention.wo.scales', 'language_model.model.layers.12.attention.wqkv.qweight', 'language_model.model.layers.12.attention.wqkv.qzeros', 'language_model.model.layers.12.attention.wqkv.scales', 'language_model.model.layers.12.feed_forward.w1.qweight', 'language_model.model.layers.12.feed_forward.w1.qzeros', 'language_model.model.layers.12.feed_forward.w1.scales', 'language_model.model.layers.12.feed_forward.w2.qweight', 'language_model.model.layers.12.feed_forward.w2.qzeros', 'language_model.model.layers.12.feed_forward.w2.scales', 'language_model.model.layers.12.feed_forward.w3.qweight', 'language_model.model.layers.12.feed_forward.w3.qzeros', 'language_model.model.layers.12.feed_forward.w3.scales', 'language_model.model.layers.13.attention.wo.qweight', 'language_model.model.layers.13.attention.wo.qzeros', 'language_model.model.layers.13.attention.wo.scales', 'language_model.model.layers.13.attention.wqkv.qweight', 'language_model.model.layers.13.attention.wqkv.qzeros', 'language_model.model.layers.13.attention.wqkv.scales', 'language_model.model.layers.13.feed_forward.w1.qweight', 'language_model.model.layers.13.feed_forward.w1.qzeros', 'language_model.model.layers.13.feed_forward.w1.scales', 'language_model.model.layers.13.feed_forward.w2.qweight', 'language_model.model.layers.13.feed_forward.w2.qzeros', 'language_model.model.layers.13.feed_forward.w2.scales', 'language_model.model.layers.13.feed_forward.w3.qweight', 'language_model.model.layers.13.feed_forward.w3.qzeros', 'language_model.model.layers.13.feed_forward.w3.scales', 'language_model.model.layers.14.attention.wo.qweight', 'language_model.model.layers.14.attention.wo.qzeros', 'language_model.model.layers.14.attention.wo.scales', 'language_model.model.layers.14.attention.wqkv.qweight', 'language_model.model.layers.14.attention.wqkv.qzeros', 'language_model.model.layers.14.attention.wqkv.scales', 'language_model.model.layers.14.feed_forward.w1.qweight', 'language_model.model.layers.14.feed_forward.w1.qzeros', 'language_model.model.layers.14.feed_forward.w1.scales', 'language_model.model.layers.14.feed_forward.w2.qweight', 'language_model.model.layers.14.feed_forward.w2.qzeros', 'language_model.model.layers.14.feed_forward.w2.scales', 'language_model.model.layers.14.feed_forward.w3.qweight', 'language_model.model.layers.14.feed_forward.w3.qzeros', 'language_model.model.layers.14.feed_forward.w3.scales', 'language_model.model.layers.15.attention.wo.qweight', 'language_model.model.layers.15.attention.wo.qzeros', 'language_model.model.layers.15.attention.wo.scales', 'language_model.model.layers.15.attention.wqkv.qweight', 'language_model.model.layers.15.attention.wqkv.qzeros', 'language_model.model.layers.15.attention.wqkv.scales', 'language_model.model.layers.15.feed_forward.w1.qweight', 'language_model.model.layers.15.feed_forward.w1.qzeros', 'language_model.model.layers.15.feed_forward.w1.scales', 'language_model.model.layers.15.feed_forward.w2.qweight', 'language_model.model.layers.15.feed_forward.w2.qzeros', 'language_model.model.layers.15.feed_forward.w2.scales', 'language_model.model.layers.15.feed_forward.w3.qweight', 'language_model.model.layers.15.feed_forward.w3.qzeros', 'language_model.model.layers.15.feed_forward.w3.scales', 'language_model.model.layers.16.attention.wo.qweight', 'language_model.model.layers.16.attention.wo.qzeros', 'language_model.model.layers.16.attention.wo.scales', 'language_model.model.layers.16.attention.wqkv.qweight', 'language_model.model.layers.16.attention.wqkv.qzeros', 'language_model.model.layers.16.attention.wqkv.scales', 'language_model.model.layers.16.feed_forward.w1.qweight', 'language_model.model.layers.16.feed_forward.w1.qzeros', 'language_model.model.layers.16.feed_forward.w1.scales', 'language_model.model.layers.16.feed_forward.w2.qweight', 'language_model.model.layers.16.feed_forward.w2.qzeros', 'language_model.model.layers.16.feed_forward.w2.scales', 'language_model.model.layers.16.feed_forward.w3.qweight', 'language_model.model.layers.16.feed_forward.w3.qzeros', 'language_model.model.layers.16.feed_forward.w3.scales', 'language_model.model.layers.17.attention.wo.qweight', 'language_model.model.layers.17.attention.wo.qzeros', 'language_model.model.layers.17.attention.wo.scales', 'language_model.model.layers.17.attention.wqkv.qweight', 'language_model.model.layers.17.attention.wqkv.qzeros', 'language_model.model.layers.17.attention.wqkv.scales', 'language_model.model.layers.17.feed_forward.w1.qweight', 'language_model.model.layers.17.feed_forward.w1.qzeros', 'language_model.model.layers.17.feed_forward.w1.scales', 'language_model.model.layers.17.feed_forward.w2.qweight', 'language_model.model.layers.17.feed_forward.w2.qzeros', 'language_model.model.layers.17.feed_forward.w2.scales', 'language_model.model.layers.17.feed_forward.w3.qweight', 'language_model.model.layers.17.feed_forward.w3.qzeros', 'language_model.model.layers.17.feed_forward.w3.scales', 'language_model.model.layers.18.attention.wo.qweight', 'language_model.model.layers.18.attention.wo.qzeros', 'language_model.model.layers.18.attention.wo.scales', 'language_model.model.layers.18.attention.wqkv.qweight', 'language_model.model.layers.18.attention.wqkv.qzeros', 'language_model.model.layers.18.attention.wqkv.scales', 'language_model.model.layers.18.feed_forward.w1.qweight', 'language_model.model.layers.18.feed_forward.w1.qzeros', 'language_model.model.layers.18.feed_forward.w1.scales', 'language_model.model.layers.18.feed_forward.w2.qweight', 'language_model.model.layers.18.feed_forward.w2.qzeros', 'language_model.model.layers.18.feed_forward.w2.scales', 'language_model.model.layers.18.feed_forward.w3.qweight', 'language_model.model.layers.18.feed_forward.w3.qzeros', 'language_model.model.layers.18.feed_forward.w3.scales', 'language_model.model.layers.19.attention.wo.qweight', 'language_model.model.layers.19.attention.wo.qzeros', 'language_model.model.layers.19.attention.wo.scales', 'language_model.model.layers.19.attention.wqkv.qweight', 'language_model.model.layers.19.attention.wqkv.qzeros', 'language_model.model.layers.19.attention.wqkv.scales', 'language_model.model.layers.19.feed_forward.w1.qweight', 'language_model.model.layers.19.feed_forward.w1.qzeros', 'language_model.model.layers.19.feed_forward.w1.scales', 'language_model.model.layers.19.feed_forward.w2.qweight', 'language_model.model.layers.19.feed_forward.w2.qzeros', 'language_model.model.layers.19.feed_forward.w2.scales', 'language_model.model.layers.19.feed_forward.w3.qweight', 'language_model.model.layers.19.feed_forward.w3.qzeros', 'language_model.model.layers.19.feed_forward.w3.scales', 'language_model.model.layers.2.attention.wo.qweight', 'language_model.model.layers.2.attention.wo.qzeros', 'language_model.model.layers.2.attention.wo.scales', 'language_model.model.layers.2.attention.wqkv.qweight', 'language_model.model.layers.2.attention.wqkv.qzeros', 'language_model.model.layers.2.attention.wqkv.scales', 'language_model.model.layers.2.feed_forward.w1.qweight', 'language_model.model.layers.2.feed_forward.w1.qzeros', 'language_model.model.layers.2.feed_forward.w1.scales', 'language_model.model.layers.2.feed_forward.w2.qweight', 'language_model.model.layers.2.feed_forward.w2.qzeros', 'language_model.model.layers.2.feed_forward.w2.scales', 'language_model.model.layers.2.feed_forward.w3.qweight', 'language_model.model.layers.2.feed_forward.w3.qzeros', 'language_model.model.layers.2.feed_forward.w3.scales', 'language_model.model.layers.20.attention.wo.qweight', 'language_model.model.layers.20.attention.wo.qzeros', 'language_model.model.layers.20.attention.wo.scales', 'language_model.model.layers.20.attention.wqkv.qweight', 'language_model.model.layers.20.attention.wqkv.qzeros', 'language_model.model.layers.20.attention.wqkv.scales', 'language_model.model.layers.20.feed_forward.w1.qweight', 'language_model.model.layers.20.feed_forward.w1.qzeros', 'language_model.model.layers.20.feed_forward.w1.scales', 'language_model.model.layers.20.feed_forward.w2.qweight', 'language_model.model.layers.20.feed_forward.w2.qzeros', 'language_model.model.layers.20.feed_forward.w2.scales', 'language_model.model.layers.20.feed_forward.w3.qweight', 'language_model.model.layers.20.feed_forward.w3.qzeros', 'language_model.model.layers.20.feed_forward.w3.scales', 'language_model.model.layers.21.attention.wo.qweight', 'language_model.model.layers.21.attention.wo.qzeros', 'language_model.model.layers.21.attention.wo.scales', 'language_model.model.layers.21.attention.wqkv.qweight', 'language_model.model.layers.21.attention.wqkv.qzeros', 'language_model.model.layers.21.attention.wqkv.scales', 'language_model.model.layers.21.feed_forward.w1.qweight', 'language_model.model.layers.21.feed_forward.w1.qzeros', 'language_model.model.layers.21.feed_forward.w1.scales', 'language_model.model.layers.21.feed_forward.w2.qweight', 'language_model.model.layers.21.feed_forward.w2.qzeros', 'language_model.model.layers.21.feed_forward.w2.scales', 'language_model.model.layers.21.feed_forward.w3.qweight', 'language_model.model.layers.21.feed_forward.w3.qzeros', 'language_model.model.layers.21.feed_forward.w3.scales', 'language_model.model.layers.22.attention.wo.qweight', 'language_model.model.layers.22.attention.wo.qzeros', 'language_model.model.layers.22.attention.wo.scales', 'language_model.model.layers.22.attention.wqkv.qweight', 'language_model.model.layers.22.attention.wqkv.qzeros', 'language_model.model.layers.22.attention.wqkv.scales', 'language_model.model.layers.22.feed_forward.w1.qweight', 'language_model.model.layers.22.feed_forward.w1.qzeros', 'language_model.model.layers.22.feed_forward.w1.scales', 'language_model.model.layers.22.feed_forward.w2.qweight', 'language_model.model.layers.22.feed_forward.w2.qzeros', 'language_model.model.layers.22.feed_forward.w2.scales', 'language_model.model.layers.22.feed_forward.w3.qweight', 'language_model.model.layers.22.feed_forward.w3.qzeros', 'language_model.model.layers.22.feed_forward.w3.scales', 'language_model.model.layers.23.attention.wo.qweight', 'language_model.model.layers.23.attention.wo.qzeros', 'language_model.model.layers.23.attention.wo.scales', 'language_model.model.layers.23.attention.wqkv.qweight', 'language_model.model.layers.23.attention.wqkv.qzeros', 'language_model.model.layers.23.attention.wqkv.scales', 'language_model.model.layers.23.feed_forward.w1.qweight', 'language_model.model.layers.23.feed_forward.w1.qzeros', 'language_model.model.layers.23.feed_forward.w1.scales', 'language_model.model.layers.23.feed_forward.w2.qweight', 'language_model.model.layers.23.feed_forward.w2.qzeros', 'language_model.model.layers.23.feed_forward.w2.scales', 'language_model.model.layers.23.feed_forward.w3.qweight', 'language_model.model.layers.23.feed_forward.w3.qzeros', 'language_model.model.layers.23.feed_forward.w3.scales', 'language_model.model.layers.3.attention.wo.qweight', 'language_model.model.layers.3.attention.wo.qzeros', 'language_model.model.layers.3.attention.wo.scales', 'language_model.model.layers.3.attention.wqkv.qweight', 'language_model.model.layers.3.attention.wqkv.qzeros', 'language_model.model.layers.3.attention.wqkv.scales', 'language_model.model.layers.3.feed_forward.w1.qweight', 'language_model.model.layers.3.feed_forward.w1.qzeros', 'language_model.model.layers.3.feed_forward.w1.scales', 'language_model.model.layers.3.feed_forward.w2.qweight', 'language_model.model.layers.3.feed_forward.w2.qzeros', 'language_model.model.layers.3.feed_forward.w2.scales', 'language_model.model.layers.3.feed_forward.w3.qweight', 'language_model.model.layers.3.feed_forward.w3.qzeros', 'language_model.model.layers.3.feed_forward.w3.scales', 'language_model.model.layers.4.attention.wo.qweight', 'language_model.model.layers.4.attention.wo.qzeros', 'language_model.model.layers.4.attention.wo.scales', 'language_model.model.layers.4.attention.wqkv.qweight', 'language_model.model.layers.4.attention.wqkv.qzeros', 'language_model.model.layers.4.attention.wqkv.scales', 'language_model.model.layers.4.feed_forward.w1.qweight', 'language_model.model.layers.4.feed_forward.w1.qzeros', 'language_model.model.layers.4.feed_forward.w1.scales', 'language_model.model.layers.4.feed_forward.w2.qweight', 'language_model.model.layers.4.feed_forward.w2.qzeros', 'language_model.model.layers.4.feed_forward.w2.scales', 'language_model.model.layers.4.feed_forward.w3.qweight', 'language_model.model.layers.4.feed_forward.w3.qzeros', 'language_model.model.layers.4.feed_forward.w3.scales', 'language_model.model.layers.5.attention.wo.qweight', 'language_model.model.layers.5.attention.wo.qzeros', 'language_model.model.layers.5.attention.wo.scales', 'language_model.model.layers.5.attention.wqkv.qweight', 'language_model.model.layers.5.attention.wqkv.qzeros', 'language_model.model.layers.5.attention.wqkv.scales', 'language_model.model.layers.5.feed_forward.w1.qweight', 'language_model.model.layers.5.feed_forward.w1.qzeros', 'language_model.model.layers.5.feed_forward.w1.scales', 'language_model.model.layers.5.feed_forward.w2.qweight', 'language_model.model.layers.5.feed_forward.w2.qzeros', 'language_model.model.layers.5.feed_forward.w2.scales', 'language_model.model.layers.5.feed_forward.w3.qweight', 'language_model.model.layers.5.feed_forward.w3.qzeros', 'language_model.model.layers.5.feed_forward.w3.scales', 'language_model.model.layers.6.attention.wo.qweight', 'language_model.model.layers.6.attention.wo.qzeros', 'language_model.model.layers.6.attention.wo.scales', 'language_model.model.layers.6.attention.wqkv.qweight', 'language_model.model.layers.6.attention.wqkv.qzeros', 'language_model.model.layers.6.attention.wqkv.scales', 'language_model.model.layers.6.feed_forward.w1.qweight', 'language_model.model.layers.6.feed_forward.w1.qzeros', 'language_model.model.layers.6.feed_forward.w1.scales', 'language_model.model.layers.6.feed_forward.w2.qweight', 'language_model.model.layers.6.feed_forward.w2.qzeros', 'language_model.model.layers.6.feed_forward.w2.scales', 'language_model.model.layers.6.feed_forward.w3.qweight', 'language_model.model.layers.6.feed_forward.w3.qzeros', 'language_model.model.layers.6.feed_forward.w3.scales', 'language_model.model.layers.7.attention.wo.qweight', 'language_model.model.layers.7.attention.wo.qzeros', 'language_model.model.layers.7.attention.wo.scales', 'language_model.model.layers.7.attention.wqkv.qweight', 'language_model.model.layers.7.attention.wqkv.qzeros', 'language_model.model.layers.7.attention.wqkv.scales', 'language_model.model.layers.7.feed_forward.w1.qweight', 'language_model.model.layers.7.feed_forward.w1.qzeros', 'language_model.model.layers.7.feed_forward.w1.scales', 'language_model.model.layers.7.feed_forward.w2.qweight', 'language_model.model.layers.7.feed_forward.w2.qzeros', 'language_model.model.layers.7.feed_forward.w2.scales', 'language_model.model.layers.7.feed_forward.w3.qweight', 'language_model.model.layers.7.feed_forward.w3.qzeros', 'language_model.model.layers.7.feed_forward.w3.scales', 'language_model.model.layers.8.attention.wo.qweight', 'language_model.model.layers.8.attention.wo.qzeros', 'language_model.model.layers.8.attention.wo.scales', 'language_model.model.layers.8.attention.wqkv.qweight', 'language_model.model.layers.8.attention.wqkv.qzeros', 'language_model.model.layers.8.attention.wqkv.scales', 'language_model.model.layers.8.feed_forward.w1.qweight', 'language_model.model.layers.8.feed_forward.w1.qzeros', 'language_model.model.layers.8.feed_forward.w1.scales', 'language_model.model.layers.8.feed_forward.w2.qweight', 'language_model.model.layers.8.feed_forward.w2.qzeros', 'language_model.model.layers.8.feed_forward.w2.scales', 'language_model.model.layers.8.feed_forward.w3.qweight', 'language_model.model.layers.8.feed_forward.w3.qzeros', 'language_model.model.layers.8.feed_forward.w3.scales', 'language_model.model.layers.9.attention.wo.qweight', 'language_model.model.layers.9.attention.wo.qzeros', 'language_model.model.layers.9.attention.wo.scales', 'language_model.model.layers.9.attention.wqkv.qweight', 'language_model.model.layers.9.attention.wqkv.qzeros', 'language_model.model.layers.9.attention.wqkv.scales', 'language_model.model.layers.9.feed_forward.w1.qweight', 'language_model.model.layers.9.feed_forward.w1.qzeros', 'language_model.model.layers.9.feed_forward.w1.scales', 'language_model.model.layers.9.feed_forward.w2.qweight', 'language_model.model.layers.9.feed_forward.w2.qzeros', 'language_model.model.layers.9.feed_forward.w2.scales', 'language_model.model.layers.9.feed_forward.w3.qweight', 'language_model.model.layers.9.feed_forward.w3.qzeros', 'language_model.model.layers.9.feed_forward.w3.scales'] |
|
- This IS expected if you are initializing InternVLChatModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). |
|
- This IS NOT expected if you are initializing InternVLChatModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). |
|
Some weights of InternVLChatModel were not initialized from the model checkpoint at pretrained/InternVL2-2B-AWQ and are newly initialized: ['language_model.model.layers.0.attention.wo.weight', 'language_model.model.layers.0.attention.wqkv.weight', 'language_model.model.layers.0.feed_forward.w1.weight', 'language_model.model.layers.0.feed_forward.w2.weight', 'language_model.model.layers.0.feed_forward.w3.weight', 'language_model.model.layers.1.attention.wo.weight', 'language_model.model.layers.1.attention.wqkv.weight', 'language_model.model.layers.1.feed_forward.w1.weight', 'language_model.model.layers.1.feed_forward.w2.weight', 'language_model.model.layers.1.feed_forward.w3.weight', 'language_model.model.layers.10.attention.wo.weight', 'language_model.model.layers.10.attention.wqkv.weight', 'language_model.model.layers.10.feed_forward.w1.weight', 'language_model.model.layers.10.feed_forward.w2.weight', 'language_model.model.layers.10.feed_forward.w3.weight', 'language_model.model.layers.11.attention.wo.weight', 'language_model.model.layers.11.attention.wqkv.weight', 'language_model.model.layers.11.feed_forward.w1.weight', 'language_model.model.layers.11.feed_forward.w2.weight', 'language_model.model.layers.11.feed_forward.w3.weight', 'language_model.model.layers.12.attention.wo.weight', 'language_model.model.layers.12.attention.wqkv.weight', 'language_model.model.layers.12.feed_forward.w1.weight', 'language_model.model.layers.12.feed_forward.w2.weight', 'language_model.model.layers.12.feed_forward.w3.weight', 'language_model.model.layers.13.attention.wo.weight', 'language_model.model.layers.13.attention.wqkv.weight', 'language_model.model.layers.13.feed_forward.w1.weight', 'language_model.model.layers.13.feed_forward.w2.weight', 'language_model.model.layers.13.feed_forward.w3.weight', 'language_model.model.layers.14.attention.wo.weight', 'language_model.model.layers.14.attention.wqkv.weight', 'language_model.model.layers.14.feed_forward.w1.weight', 'language_model.model.layers.14.feed_forward.w2.weight', 'language_model.model.layers.14.feed_forward.w3.weight', 'language_model.model.layers.15.attention.wo.weight', 'language_model.model.layers.15.attention.wqkv.weight', 'language_model.model.layers.15.feed_forward.w1.weight', 'language_model.model.layers.15.feed_forward.w2.weight', 'language_model.model.layers.15.feed_forward.w3.weight', 'language_model.model.layers.16.attention.wo.weight', 'language_model.model.layers.16.attention.wqkv.weight', 'language_model.model.layers.16.feed_forward.w1.weight', 'language_model.model.layers.16.feed_forward.w2.weight', 'language_model.model.layers.16.feed_forward.w3.weight', 'language_model.model.layers.17.attention.wo.weight', 'language_model.model.layers.17.attention.wqkv.weight', 'language_model.model.layers.17.feed_forward.w1.weight', 'language_model.model.layers.17.feed_forward.w2.weight', 'language_model.model.layers.17.feed_forward.w3.weight', 'language_model.model.layers.18.attention.wo.weight', 'language_model.model.layers.18.attention.wqkv.weight', 'language_model.model.layers.18.feed_forward.w1.weight', 'language_model.model.layers.18.feed_forward.w2.weight', 'language_model.model.layers.18.feed_forward.w3.weight', 'language_model.model.layers.19.attention.wo.weight', 'language_model.model.layers.19.attention.wqkv.weight', 'language_model.model.layers.19.feed_forward.w1.weight', 'language_model.model.layers.19.feed_forward.w2.weight', 'language_model.model.layers.19.feed_forward.w3.weight', 'language_model.model.layers.2.attention.wo.weight', 'language_model.model.layers.2.attention.wqkv.weight', 'language_model.model.layers.2.feed_forward.w1.weight', 'language_model.model.layers.2.feed_forward.w2.weight', 'language_model.model.layers.2.feed_forward.w3.weight', 'language_model.model.layers.20.attention.wo.weight', 'language_model.model.layers.20.attention.wqkv.weight', 'language_model.model.layers.20.feed_forward.w1.weight', 'language_model.model.layers.20.feed_forward.w2.weight', 'language_model.model.layers.20.feed_forward.w3.weight', 'language_model.model.layers.21.attention.wo.weight', 'language_model.model.layers.21.attention.wqkv.weight', 'language_model.model.layers.21.feed_forward.w1.weight', 'language_model.model.layers.21.feed_forward.w2.weight', 'language_model.model.layers.21.feed_forward.w3.weight', 'language_model.model.layers.22.attention.wo.weight', 'language_model.model.layers.22.attention.wqkv.weight', 'language_model.model.layers.22.feed_forward.w1.weight', 'language_model.model.layers.22.feed_forward.w2.weight', 'language_model.model.layers.22.feed_forward.w3.weight', 'language_model.model.layers.23.attention.wo.weight', 'language_model.model.layers.23.attention.wqkv.weight', 'language_model.model.layers.23.feed_forward.w1.weight', 'language_model.model.layers.23.feed_forward.w2.weight', 'language_model.model.layers.23.feed_forward.w3.weight', 'language_model.model.layers.3.attention.wo.weight', 'language_model.model.layers.3.attention.wqkv.weight', 'language_model.model.layers.3.feed_forward.w1.weight', 'language_model.model.layers.3.feed_forward.w2.weight', 'language_model.model.layers.3.feed_forward.w3.weight', 'language_model.model.layers.4.attention.wo.weight', 'language_model.model.layers.4.attention.wqkv.weight', 'language_model.model.layers.4.feed_forward.w1.weight', 'language_model.model.layers.4.feed_forward.w2.weight', 'language_model.model.layers.4.feed_forward.w3.weight', 'language_model.model.layers.5.attention.wo.weight', 'language_model.model.layers.5.attention.wqkv.weight', 'language_model.model.layers.5.feed_forward.w1.weight', 'language_model.model.layers.5.feed_forward.w2.weight', 'language_model.model.layers.5.feed_forward.w3.weight', 'language_model.model.layers.6.attention.wo.weight', 'language_model.model.layers.6.attention.wqkv.weight', 'language_model.model.layers.6.feed_forward.w1.weight', 'language_model.model.layers.6.feed_forward.w2.weight', 'language_model.model.layers.6.feed_forward.w3.weight', 'language_model.model.layers.7.attention.wo.weight', 'language_model.model.layers.7.attention.wqkv.weight', 'language_model.model.layers.7.feed_forward.w1.weight', 'language_model.model.layers.7.feed_forward.w2.weight', 'language_model.model.layers.7.feed_forward.w3.weight', 'language_model.model.layers.8.attention.wo.weight', 'language_model.model.layers.8.attention.wqkv.weight', 'language_model.model.layers.8.feed_forward.w1.weight', 'language_model.model.layers.8.feed_forward.w2.weight', 'language_model.model.layers.8.feed_forward.w3.weight', 'language_model.model.layers.9.attention.wo.weight', 'language_model.model.layers.9.attention.wqkv.weight', 'language_model.model.layers.9.feed_forward.w1.weight', 'language_model.model.layers.9.feed_forward.w2.weight', 'language_model.model.layers.9.feed_forward.w3.weight'] |
|
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. |
|
Loading checkpoint shards: 50%|โโโโโ | 1/2 [00:06<00:06, 6.06s/it]
Loading checkpoint shards: 100%|โโโโโโโโโโ| 2/2 [00:07<00:00, 3.20s/it]
Loading checkpoint shards: 100%|โโโโโโโโโโ| 2/2 [00:07<00:00, 3.63s/it] |
|
Some weights of the model checkpoint at pretrained/InternVL2-2B-AWQ were not used when initializing InternVLChatModel: ['language_model.model.layers.0.attention.wo.qweight', 'language_model.model.layers.0.attention.wo.qzeros', 'language_model.model.layers.0.attention.wo.scales', 'language_model.model.layers.0.attention.wqkv.qweight', 'language_model.model.layers.0.attention.wqkv.qzeros', 'language_model.model.layers.0.attention.wqkv.scales', 'language_model.model.layers.0.feed_forward.w1.qweight', 'language_model.model.layers.0.feed_forward.w1.qzeros', 'language_model.model.layers.0.feed_forward.w1.scales', 'language_model.model.layers.0.feed_forward.w2.qweight', 'language_model.model.layers.0.feed_forward.w2.qzeros', 'language_model.model.layers.0.feed_forward.w2.scales', 'language_model.model.layers.0.feed_forward.w3.qweight', 'language_model.model.layers.0.feed_forward.w3.qzeros', 'language_model.model.layers.0.feed_forward.w3.scales', 'language_model.model.layers.1.attention.wo.qweight', 'language_model.model.layers.1.attention.wo.qzeros', 'language_model.model.layers.1.attention.wo.scales', 'language_model.model.layers.1.attention.wqkv.qweight', 'language_model.model.layers.1.attention.wqkv.qzeros', 'language_model.model.layers.1.attention.wqkv.scales', 'language_model.model.layers.1.feed_forward.w1.qweight', 'language_model.model.layers.1.feed_forward.w1.qzeros', 'language_model.model.layers.1.feed_forward.w1.scales', 'language_model.model.layers.1.feed_forward.w2.qweight', 'language_model.model.layers.1.feed_forward.w2.qzeros', 'language_model.model.layers.1.feed_forward.w2.scales', 'language_model.model.layers.1.feed_forward.w3.qweight', 'language_model.model.layers.1.feed_forward.w3.qzeros', 'language_model.model.layers.1.feed_forward.w3.scales', 'language_model.model.layers.10.attention.wo.qweight', 'language_model.model.layers.10.attention.wo.qzeros', 'language_model.model.layers.10.attention.wo.scales', 'language_model.model.layers.10.attention.wqkv.qweight', 'language_model.model.layers.10.attention.wqkv.qzeros', 'language_model.model.layers.10.attention.wqkv.scales', 'language_model.model.layers.10.feed_forward.w1.qweight', 'language_model.model.layers.10.feed_forward.w1.qzeros', 'language_model.model.layers.10.feed_forward.w1.scales', 'language_model.model.layers.10.feed_forward.w2.qweight', 'language_model.model.layers.10.feed_forward.w2.qzeros', 'language_model.model.layers.10.feed_forward.w2.scales', 'language_model.model.layers.10.feed_forward.w3.qweight', 'language_model.model.layers.10.feed_forward.w3.qzeros', 'language_model.model.layers.10.feed_forward.w3.scales', 'language_model.model.layers.11.attention.wo.qweight', 'language_model.model.layers.11.attention.wo.qzeros', 'language_model.model.layers.11.attention.wo.scales', 'language_model.model.layers.11.attention.wqkv.qweight', 'language_model.model.layers.11.attention.wqkv.qzeros', 'language_model.model.layers.11.attention.wqkv.scales', 'language_model.model.layers.11.feed_forward.w1.qweight', 'language_model.model.layers.11.feed_forward.w1.qzeros', 'language_model.model.layers.11.feed_forward.w1.scales', 'language_model.model.layers.11.feed_forward.w2.qweight', 'language_model.model.layers.11.feed_forward.w2.qzeros', 'language_model.model.layers.11.feed_forward.w2.scales', 'language_model.model.layers.11.feed_forward.w3.qweight', 'language_model.model.layers.11.feed_forward.w3.qzeros', 'language_model.model.layers.11.feed_forward.w3.scales', 'language_model.model.layers.12.attention.wo.qweight', 'language_model.model.layers.12.attention.wo.qzeros', 'language_model.model.layers.12.attention.wo.scales', 'language_model.model.layers.12.attention.wqkv.qweight', 'language_model.model.layers.12.attention.wqkv.qzeros', 'language_model.model.layers.12.attention.wqkv.scales', 'language_model.model.layers.12.feed_forward.w1.qweight', 'language_model.model.layers.12.feed_forward.w1.qzeros', 'language_model.model.layers.12.feed_forward.w1.scales', 'language_model.model.layers.12.feed_forward.w2.qweight', 'language_model.model.layers.12.feed_forward.w2.qzeros', 'language_model.model.layers.12.feed_forward.w2.scales', 'language_model.model.layers.12.feed_forward.w3.qweight', 'language_model.model.layers.12.feed_forward.w3.qzeros', 'language_model.model.layers.12.feed_forward.w3.scales', 'language_model.model.layers.13.attention.wo.qweight', 'language_model.model.layers.13.attention.wo.qzeros', 'language_model.model.layers.13.attention.wo.scales', 'language_model.model.layers.13.attention.wqkv.qweight', 'language_model.model.layers.13.attention.wqkv.qzeros', 'language_model.model.layers.13.attention.wqkv.scales', 'language_model.model.layers.13.feed_forward.w1.qweight', 'language_model.model.layers.13.feed_forward.w1.qzeros', 'language_model.model.layers.13.feed_forward.w1.scales', 'language_model.model.layers.13.feed_forward.w2.qweight', 'language_model.model.layers.13.feed_forward.w2.qzeros', 'language_model.model.layers.13.feed_forward.w2.scales', 'language_model.model.layers.13.feed_forward.w3.qweight', 'language_model.model.layers.13.feed_forward.w3.qzeros', 'language_model.model.layers.13.feed_forward.w3.scales', 'language_model.model.layers.14.attention.wo.qweight', 'language_model.model.layers.14.attention.wo.qzeros', 'language_model.model.layers.14.attention.wo.scales', 'language_model.model.layers.14.attention.wqkv.qweight', 'language_model.model.layers.14.attention.wqkv.qzeros', 'language_model.model.layers.14.attention.wqkv.scales', 'language_model.model.layers.14.feed_forward.w1.qweight', 'language_model.model.layers.14.feed_forward.w1.qzeros', 'language_model.model.layers.14.feed_forward.w1.scales', 'language_model.model.layers.14.feed_forward.w2.qweight', 'language_model.model.layers.14.feed_forward.w2.qzeros', 'language_model.model.layers.14.feed_forward.w2.scales', 'language_model.model.layers.14.feed_forward.w3.qweight', 'language_model.model.layers.14.feed_forward.w3.qzeros', 'language_model.model.layers.14.feed_forward.w3.scales', 'language_model.model.layers.15.attention.wo.qweight', 'language_model.model.layers.15.attention.wo.qzeros', 'language_model.model.layers.15.attention.wo.scales', 'language_model.model.layers.15.attention.wqkv.qweight', 'language_model.model.layers.15.attention.wqkv.qzeros', 'language_model.model.layers.15.attention.wqkv.scales', 'language_model.model.layers.15.feed_forward.w1.qweight', 'language_model.model.layers.15.feed_forward.w1.qzeros', 'language_model.model.layers.15.feed_forward.w1.scales', 'language_model.model.layers.15.feed_forward.w2.qweight', 'language_model.model.layers.15.feed_forward.w2.qzeros', 'language_model.model.layers.15.feed_forward.w2.scales', 'language_model.model.layers.15.feed_forward.w3.qweight', 'language_model.model.layers.15.feed_forward.w3.qzeros', 'language_model.model.layers.15.feed_forward.w3.scales', 'language_model.model.layers.16.attention.wo.qweight', 'language_model.model.layers.16.attention.wo.qzeros', 'language_model.model.layers.16.attention.wo.scales', 'language_model.model.layers.16.attention.wqkv.qweight', 'language_model.model.layers.16.attention.wqkv.qzeros', 'language_model.model.layers.16.attention.wqkv.scales', 'language_model.model.layers.16.feed_forward.w1.qweight', 'language_model.model.layers.16.feed_forward.w1.qzeros', 'language_model.model.layers.16.feed_forward.w1.scales', 'language_model.model.layers.16.feed_forward.w2.qweight', 'language_model.model.layers.16.feed_forward.w2.qzeros', 'language_model.model.layers.16.feed_forward.w2.scales', 'language_model.model.layers.16.feed_forward.w3.qweight', 'language_model.model.layers.16.feed_forward.w3.qzeros', 'language_model.model.layers.16.feed_forward.w3.scales', 'language_model.model.layers.17.attention.wo.qweight', 'language_model.model.layers.17.attention.wo.qzeros', 'language_model.model.layers.17.attention.wo.scales', 'language_model.model.layers.17.attention.wqkv.qweight', 'language_model.model.layers.17.attention.wqkv.qzeros', 'language_model.model.layers.17.attention.wqkv.scales', 'language_model.model.layers.17.feed_forward.w1.qweight', 'language_model.model.layers.17.feed_forward.w1.qzeros', 'language_model.model.layers.17.feed_forward.w1.scales', 'language_model.model.layers.17.feed_forward.w2.qweight', 'language_model.model.layers.17.feed_forward.w2.qzeros', 'language_model.model.layers.17.feed_forward.w2.scales', 'language_model.model.layers.17.feed_forward.w3.qweight', 'language_model.model.layers.17.feed_forward.w3.qzeros', 'language_model.model.layers.17.feed_forward.w3.scales', 'language_model.model.layers.18.attention.wo.qweight', 'language_model.model.layers.18.attention.wo.qzeros', 'language_model.model.layers.18.attention.wo.scales', 'language_model.model.layers.18.attention.wqkv.qweight', 'language_model.model.layers.18.attention.wqkv.qzeros', 'language_model.model.layers.18.attention.wqkv.scales', 'language_model.model.layers.18.feed_forward.w1.qweight', 'language_model.model.layers.18.feed_forward.w1.qzeros', 'language_model.model.layers.18.feed_forward.w1.scales', 'language_model.model.layers.18.feed_forward.w2.qweight', 'language_model.model.layers.18.feed_forward.w2.qzeros', 'language_model.model.layers.18.feed_forward.w2.scales', 'language_model.model.layers.18.feed_forward.w3.qweight', 'language_model.model.layers.18.feed_forward.w3.qzeros', 'language_model.model.layers.18.feed_forward.w3.scales', 'language_model.model.layers.19.attention.wo.qweight', 'language_model.model.layers.19.attention.wo.qzeros', 'language_model.model.layers.19.attention.wo.scales', 'language_model.model.layers.19.attention.wqkv.qweight', 'language_model.model.layers.19.attention.wqkv.qzeros', 'language_model.model.layers.19.attention.wqkv.scales', 'language_model.model.layers.19.feed_forward.w1.qweight', 'language_model.model.layers.19.feed_forward.w1.qzeros', 'language_model.model.layers.19.feed_forward.w1.scales', 'language_model.model.layers.19.feed_forward.w2.qweight', 'language_model.model.layers.19.feed_forward.w2.qzeros', 'language_model.model.layers.19.feed_forward.w2.scales', 'language_model.model.layers.19.feed_forward.w3.qweight', 'language_model.model.layers.19.feed_forward.w3.qzeros', 'language_model.model.layers.19.feed_forward.w3.scales', 'language_model.model.layers.2.attention.wo.qweight', 'language_model.model.layers.2.attention.wo.qzeros', 'language_model.model.layers.2.attention.wo.scales', 'language_model.model.layers.2.attention.wqkv.qweight', 'language_model.model.layers.2.attention.wqkv.qzeros', 'language_model.model.layers.2.attention.wqkv.scales', 'language_model.model.layers.2.feed_forward.w1.qweight', 'language_model.model.layers.2.feed_forward.w1.qzeros', 'language_model.model.layers.2.feed_forward.w1.scales', 'language_model.model.layers.2.feed_forward.w2.qweight', 'language_model.model.layers.2.feed_forward.w2.qzeros', 'language_model.model.layers.2.feed_forward.w2.scales', 'language_model.model.layers.2.feed_forward.w3.qweight', 'language_model.model.layers.2.feed_forward.w3.qzeros', 'language_model.model.layers.2.feed_forward.w3.scales', 'language_model.model.layers.20.attention.wo.qweight', 'language_model.model.layers.20.attention.wo.qzeros', 'language_model.model.layers.20.attention.wo.scales', 'language_model.model.layers.20.attention.wqkv.qweight', 'language_model.model.layers.20.attention.wqkv.qzeros', 'language_model.model.layers.20.attention.wqkv.scales', 'language_model.model.layers.20.feed_forward.w1.qweight', 'language_model.model.layers.20.feed_forward.w1.qzeros', 'language_model.model.layers.20.feed_forward.w1.scales', 'language_model.model.layers.20.feed_forward.w2.qweight', 'language_model.model.layers.20.feed_forward.w2.qzeros', 'language_model.model.layers.20.feed_forward.w2.scales', 'language_model.model.layers.20.feed_forward.w3.qweight', 'language_model.model.layers.20.feed_forward.w3.qzeros', 'language_model.model.layers.20.feed_forward.w3.scales', 'language_model.model.layers.21.attention.wo.qweight', 'language_model.model.layers.21.attention.wo.qzeros', 'language_model.model.layers.21.attention.wo.scales', 'language_model.model.layers.21.attention.wqkv.qweight', 'language_model.model.layers.21.attention.wqkv.qzeros', 'language_model.model.layers.21.attention.wqkv.scales', 'language_model.model.layers.21.feed_forward.w1.qweight', 'language_model.model.layers.21.feed_forward.w1.qzeros', 'language_model.model.layers.21.feed_forward.w1.scales', 'language_model.model.layers.21.feed_forward.w2.qweight', 'language_model.model.layers.21.feed_forward.w2.qzeros', 'language_model.model.layers.21.feed_forward.w2.scales', 'language_model.model.layers.21.feed_forward.w3.qweight', 'language_model.model.layers.21.feed_forward.w3.qzeros', 'language_model.model.layers.21.feed_forward.w3.scales', 'language_model.model.layers.22.attention.wo.qweight', 'language_model.model.layers.22.attention.wo.qzeros', 'language_model.model.layers.22.attention.wo.scales', 'language_model.model.layers.22.attention.wqkv.qweight', 'language_model.model.layers.22.attention.wqkv.qzeros', 'language_model.model.layers.22.attention.wqkv.scales', 'language_model.model.layers.22.feed_forward.w1.qweight', 'language_model.model.layers.22.feed_forward.w1.qzeros', 'language_model.model.layers.22.feed_forward.w1.scales', 'language_model.model.layers.22.feed_forward.w2.qweight', 'language_model.model.layers.22.feed_forward.w2.qzeros', 'language_model.model.layers.22.feed_forward.w2.scales', 'language_model.model.layers.22.feed_forward.w3.qweight', 'language_model.model.layers.22.feed_forward.w3.qzeros', 'language_model.model.layers.22.feed_forward.w3.scales', 'language_model.model.layers.23.attention.wo.qweight', 'language_model.model.layers.23.attention.wo.qzeros', 'language_model.model.layers.23.attention.wo.scales', 'language_model.model.layers.23.attention.wqkv.qweight', 'language_model.model.layers.23.attention.wqkv.qzeros', 'language_model.model.layers.23.attention.wqkv.scales', 'language_model.model.layers.23.feed_forward.w1.qweight', 'language_model.model.layers.23.feed_forward.w1.qzeros', 'language_model.model.layers.23.feed_forward.w1.scales', 'language_model.model.layers.23.feed_forward.w2.qweight', 'language_model.model.layers.23.feed_forward.w2.qzeros', 'language_model.model.layers.23.feed_forward.w2.scales', 'language_model.model.layers.23.feed_forward.w3.qweight', 'language_model.model.layers.23.feed_forward.w3.qzeros', 'language_model.model.layers.23.feed_forward.w3.scales', 'language_model.model.layers.3.attention.wo.qweight', 'language_model.model.layers.3.attention.wo.qzeros', 'language_model.model.layers.3.attention.wo.scales', 'language_model.model.layers.3.attention.wqkv.qweight', 'language_model.model.layers.3.attention.wqkv.qzeros', 'language_model.model.layers.3.attention.wqkv.scales', 'language_model.model.layers.3.feed_forward.w1.qweight', 'language_model.model.layers.3.feed_forward.w1.qzeros', 'language_model.model.layers.3.feed_forward.w1.scales', 'language_model.model.layers.3.feed_forward.w2.qweight', 'language_model.model.layers.3.feed_forward.w2.qzeros', 'language_model.model.layers.3.feed_forward.w2.scales', 'language_model.model.layers.3.feed_forward.w3.qweight', 'language_model.model.layers.3.feed_forward.w3.qzeros', 'language_model.model.layers.3.feed_forward.w3.scales', 'language_model.model.layers.4.attention.wo.qweight', 'language_model.model.layers.4.attention.wo.qzeros', 'language_model.model.layers.4.attention.wo.scales', 'language_model.model.layers.4.attention.wqkv.qweight', 'language_model.model.layers.4.attention.wqkv.qzeros', 'language_model.model.layers.4.attention.wqkv.scales', 'language_model.model.layers.4.feed_forward.w1.qweight', 'language_model.model.layers.4.feed_forward.w1.qzeros', 'language_model.model.layers.4.feed_forward.w1.scales', 'language_model.model.layers.4.feed_forward.w2.qweight', 'language_model.model.layers.4.feed_forward.w2.qzeros', 'language_model.model.layers.4.feed_forward.w2.scales', 'language_model.model.layers.4.feed_forward.w3.qweight', 'language_model.model.layers.4.feed_forward.w3.qzeros', 'language_model.model.layers.4.feed_forward.w3.scales', 'language_model.model.layers.5.attention.wo.qweight', 'language_model.model.layers.5.attention.wo.qzeros', 'language_model.model.layers.5.attention.wo.scales', 'language_model.model.layers.5.attention.wqkv.qweight', 'language_model.model.layers.5.attention.wqkv.qzeros', 'language_model.model.layers.5.attention.wqkv.scales', 'language_model.model.layers.5.feed_forward.w1.qweight', 'language_model.model.layers.5.feed_forward.w1.qzeros', 'language_model.model.layers.5.feed_forward.w1.scales', 'language_model.model.layers.5.feed_forward.w2.qweight', 'language_model.model.layers.5.feed_forward.w2.qzeros', 'language_model.model.layers.5.feed_forward.w2.scales', 'language_model.model.layers.5.feed_forward.w3.qweight', 'language_model.model.layers.5.feed_forward.w3.qzeros', 'language_model.model.layers.5.feed_forward.w3.scales', 'language_model.model.layers.6.attention.wo.qweight', 'language_model.model.layers.6.attention.wo.qzeros', 'language_model.model.layers.6.attention.wo.scales', 'language_model.model.layers.6.attention.wqkv.qweight', 'language_model.model.layers.6.attention.wqkv.qzeros', 'language_model.model.layers.6.attention.wqkv.scales', 'language_model.model.layers.6.feed_forward.w1.qweight', 'language_model.model.layers.6.feed_forward.w1.qzeros', 'language_model.model.layers.6.feed_forward.w1.scales', 'language_model.model.layers.6.feed_forward.w2.qweight', 'language_model.model.layers.6.feed_forward.w2.qzeros', 'language_model.model.layers.6.feed_forward.w2.scales', 'language_model.model.layers.6.feed_forward.w3.qweight', 'language_model.model.layers.6.feed_forward.w3.qzeros', 'language_model.model.layers.6.feed_forward.w3.scales', 'language_model.model.layers.7.attention.wo.qweight', 'language_model.model.layers.7.attention.wo.qzeros', 'language_model.model.layers.7.attention.wo.scales', 'language_model.model.layers.7.attention.wqkv.qweight', 'language_model.model.layers.7.attention.wqkv.qzeros', 'language_model.model.layers.7.attention.wqkv.scales', 'language_model.model.layers.7.feed_forward.w1.qweight', 'language_model.model.layers.7.feed_forward.w1.qzeros', 'language_model.model.layers.7.feed_forward.w1.scales', 'language_model.model.layers.7.feed_forward.w2.qweight', 'language_model.model.layers.7.feed_forward.w2.qzeros', 'language_model.model.layers.7.feed_forward.w2.scales', 'language_model.model.layers.7.feed_forward.w3.qweight', 'language_model.model.layers.7.feed_forward.w3.qzeros', 'language_model.model.layers.7.feed_forward.w3.scales', 'language_model.model.layers.8.attention.wo.qweight', 'language_model.model.layers.8.attention.wo.qzeros', 'language_model.model.layers.8.attention.wo.scales', 'language_model.model.layers.8.attention.wqkv.qweight', 'language_model.model.layers.8.attention.wqkv.qzeros', 'language_model.model.layers.8.attention.wqkv.scales', 'language_model.model.layers.8.feed_forward.w1.qweight', 'language_model.model.layers.8.feed_forward.w1.qzeros', 'language_model.model.layers.8.feed_forward.w1.scales', 'language_model.model.layers.8.feed_forward.w2.qweight', 'language_model.model.layers.8.feed_forward.w2.qzeros', 'language_model.model.layers.8.feed_forward.w2.scales', 'language_model.model.layers.8.feed_forward.w3.qweight', 'language_model.model.layers.8.feed_forward.w3.qzeros', 'language_model.model.layers.8.feed_forward.w3.scales', 'language_model.model.layers.9.attention.wo.qweight', 'language_model.model.layers.9.attention.wo.qzeros', 'language_model.model.layers.9.attention.wo.scales', 'language_model.model.layers.9.attention.wqkv.qweight', 'language_model.model.layers.9.attention.wqkv.qzeros', 'language_model.model.layers.9.attention.wqkv.scales', 'language_model.model.layers.9.feed_forward.w1.qweight', 'language_model.model.layers.9.feed_forward.w1.qzeros', 'language_model.model.layers.9.feed_forward.w1.scales', 'language_model.model.layers.9.feed_forward.w2.qweight', 'language_model.model.layers.9.feed_forward.w2.qzeros', 'language_model.model.layers.9.feed_forward.w2.scales', 'language_model.model.layers.9.feed_forward.w3.qweight', 'language_model.model.layers.9.feed_forward.w3.qzeros', 'language_model.model.layers.9.feed_forward.w3.scales'] |
|
- This IS expected if you are initializing InternVLChatModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). |
|
- This IS NOT expected if you are initializing InternVLChatModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). |
|
Some weights of InternVLChatModel were not initialized from the model checkpoint at pretrained/InternVL2-2B-AWQ and are newly initialized: ['language_model.model.layers.0.attention.wo.weight', 'language_model.model.layers.0.attention.wqkv.weight', 'language_model.model.layers.0.feed_forward.w1.weight', 'language_model.model.layers.0.feed_forward.w2.weight', 'language_model.model.layers.0.feed_forward.w3.weight', 'language_model.model.layers.1.attention.wo.weight', 'language_model.model.layers.1.attention.wqkv.weight', 'language_model.model.layers.1.feed_forward.w1.weight', 'language_model.model.layers.1.feed_forward.w2.weight', 'language_model.model.layers.1.feed_forward.w3.weight', 'language_model.model.layers.10.attention.wo.weight', 'language_model.model.layers.10.attention.wqkv.weight', 'language_model.model.layers.10.feed_forward.w1.weight', 'language_model.model.layers.10.feed_forward.w2.weight', 'language_model.model.layers.10.feed_forward.w3.weight', 'language_model.model.layers.11.attention.wo.weight', 'language_model.model.layers.11.attention.wqkv.weight', 'language_model.model.layers.11.feed_forward.w1.weight', 'language_model.model.layers.11.feed_forward.w2.weight', 'language_model.model.layers.11.feed_forward.w3.weight', 'language_model.model.layers.12.attention.wo.weight', 'language_model.model.layers.12.attention.wqkv.weight', 'language_model.model.layers.12.feed_forward.w1.weight', 'language_model.model.layers.12.feed_forward.w2.weight', 'language_model.model.layers.12.feed_forward.w3.weight', 'language_model.model.layers.13.attention.wo.weight', 'language_model.model.layers.13.attention.wqkv.weight', 'language_model.model.layers.13.feed_forward.w1.weight', 'language_model.model.layers.13.feed_forward.w2.weight', 'language_model.model.layers.13.feed_forward.w3.weight', 'language_model.model.layers.14.attention.wo.weight', 'language_model.model.layers.14.attention.wqkv.weight', 'language_model.model.layers.14.feed_forward.w1.weight', 'language_model.model.layers.14.feed_forward.w2.weight', 'language_model.model.layers.14.feed_forward.w3.weight', 'language_model.model.layers.15.attention.wo.weight', 'language_model.model.layers.15.attention.wqkv.weight', 'language_model.model.layers.15.feed_forward.w1.weight', 'language_model.model.layers.15.feed_forward.w2.weight', 'language_model.model.layers.15.feed_forward.w3.weight', 'language_model.model.layers.16.attention.wo.weight', 'language_model.model.layers.16.attention.wqkv.weight', 'language_model.model.layers.16.feed_forward.w1.weight', 'language_model.model.layers.16.feed_forward.w2.weight', 'language_model.model.layers.16.feed_forward.w3.weight', 'language_model.model.layers.17.attention.wo.weight', 'language_model.model.layers.17.attention.wqkv.weight', 'language_model.model.layers.17.feed_forward.w1.weight', 'language_model.model.layers.17.feed_forward.w2.weight', 'language_model.model.layers.17.feed_forward.w3.weight', 'language_model.model.layers.18.attention.wo.weight', 'language_model.model.layers.18.attention.wqkv.weight', 'language_model.model.layers.18.feed_forward.w1.weight', 'language_model.model.layers.18.feed_forward.w2.weight', 'language_model.model.layers.18.feed_forward.w3.weight', 'language_model.model.layers.19.attention.wo.weight', 'language_model.model.layers.19.attention.wqkv.weight', 'language_model.model.layers.19.feed_forward.w1.weight', 'language_model.model.layers.19.feed_forward.w2.weight', 'language_model.model.layers.19.feed_forward.w3.weight', 'language_model.model.layers.2.attention.wo.weight', 'language_model.model.layers.2.attention.wqkv.weight', 'language_model.model.layers.2.feed_forward.w1.weight', 'language_model.model.layers.2.feed_forward.w2.weight', 'language_model.model.layers.2.feed_forward.w3.weight', 'language_model.model.layers.20.attention.wo.weight', 'language_model.model.layers.20.attention.wqkv.weight', 'language_model.model.layers.20.feed_forward.w1.weight', 'language_model.model.layers.20.feed_forward.w2.weight', 'language_model.model.layers.20.feed_forward.w3.weight', 'language_model.model.layers.21.attention.wo.weight', 'language_model.model.layers.21.attention.wqkv.weight', 'language_model.model.layers.21.feed_forward.w1.weight', 'language_model.model.layers.21.feed_forward.w2.weight', 'language_model.model.layers.21.feed_forward.w3.weight', 'language_model.model.layers.22.attention.wo.weight', 'language_model.model.layers.22.attention.wqkv.weight', 'language_model.model.layers.22.feed_forward.w1.weight', 'language_model.model.layers.22.feed_forward.w2.weight', 'language_model.model.layers.22.feed_forward.w3.weight', 'language_model.model.layers.23.attention.wo.weight', 'language_model.model.layers.23.attention.wqkv.weight', 'language_model.model.layers.23.feed_forward.w1.weight', 'language_model.model.layers.23.feed_forward.w2.weight', 'language_model.model.layers.23.feed_forward.w3.weight', 'language_model.model.layers.3.attention.wo.weight', 'language_model.model.layers.3.attention.wqkv.weight', 'language_model.model.layers.3.feed_forward.w1.weight', 'language_model.model.layers.3.feed_forward.w2.weight', 'language_model.model.layers.3.feed_forward.w3.weight', 'language_model.model.layers.4.attention.wo.weight', 'language_model.model.layers.4.attention.wqkv.weight', 'language_model.model.layers.4.feed_forward.w1.weight', 'language_model.model.layers.4.feed_forward.w2.weight', 'language_model.model.layers.4.feed_forward.w3.weight', 'language_model.model.layers.5.attention.wo.weight', 'language_model.model.layers.5.attention.wqkv.weight', 'language_model.model.layers.5.feed_forward.w1.weight', 'language_model.model.layers.5.feed_forward.w2.weight', 'language_model.model.layers.5.feed_forward.w3.weight', 'language_model.model.layers.6.attention.wo.weight', 'language_model.model.layers.6.attention.wqkv.weight', 'language_model.model.layers.6.feed_forward.w1.weight', 'language_model.model.layers.6.feed_forward.w2.weight', 'language_model.model.layers.6.feed_forward.w3.weight', 'language_model.model.layers.7.attention.wo.weight', 'language_model.model.layers.7.attention.wqkv.weight', 'language_model.model.layers.7.feed_forward.w1.weight', 'language_model.model.layers.7.feed_forward.w2.weight', 'language_model.model.layers.7.feed_forward.w3.weight', 'language_model.model.layers.8.attention.wo.weight', 'language_model.model.layers.8.attention.wqkv.weight', 'language_model.model.layers.8.feed_forward.w1.weight', 'language_model.model.layers.8.feed_forward.w2.weight', 'language_model.model.layers.8.feed_forward.w3.weight', 'language_model.model.layers.9.attention.wo.weight', 'language_model.model.layers.9.attention.wqkv.weight', 'language_model.model.layers.9.feed_forward.w1.weight', 'language_model.model.layers.9.feed_forward.w2.weight', 'language_model.model.layers.9.feed_forward.w3.weight'] |
|
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. |
|
Loading checkpoint shards: 50%|โโโโโ | 1/2 [00:06<00:06, 6.01s/it]
Loading checkpoint shards: 100%|โโโโโโโโโโ| 2/2 [00:07<00:00, 3.21s/it]
Loading checkpoint shards: 100%|โโโโโโโโโโ| 2/2 [00:07<00:00, 3.63s/it] |
|
Some weights of the model checkpoint at pretrained/InternVL2-2B-AWQ were not used when initializing InternVLChatModel: ['language_model.model.layers.0.attention.wo.qweight', 'language_model.model.layers.0.attention.wo.qzeros', 'language_model.model.layers.0.attention.wo.scales', 'language_model.model.layers.0.attention.wqkv.qweight', 'language_model.model.layers.0.attention.wqkv.qzeros', 'language_model.model.layers.0.attention.wqkv.scales', 'language_model.model.layers.0.feed_forward.w1.qweight', 'language_model.model.layers.0.feed_forward.w1.qzeros', 'language_model.model.layers.0.feed_forward.w1.scales', 'language_model.model.layers.0.feed_forward.w2.qweight', 'language_model.model.layers.0.feed_forward.w2.qzeros', 'language_model.model.layers.0.feed_forward.w2.scales', 'language_model.model.layers.0.feed_forward.w3.qweight', 'language_model.model.layers.0.feed_forward.w3.qzeros', 'language_model.model.layers.0.feed_forward.w3.scales', 'language_model.model.layers.1.attention.wo.qweight', 'language_model.model.layers.1.attention.wo.qzeros', 'language_model.model.layers.1.attention.wo.scales', 'language_model.model.layers.1.attention.wqkv.qweight', 'language_model.model.layers.1.attention.wqkv.qzeros', 'language_model.model.layers.1.attention.wqkv.scales', 'language_model.model.layers.1.feed_forward.w1.qweight', 'language_model.model.layers.1.feed_forward.w1.qzeros', 'language_model.model.layers.1.feed_forward.w1.scales', 'language_model.model.layers.1.feed_forward.w2.qweight', 'language_model.model.layers.1.feed_forward.w2.qzeros', 'language_model.model.layers.1.feed_forward.w2.scales', 'language_model.model.layers.1.feed_forward.w3.qweight', 'language_model.model.layers.1.feed_forward.w3.qzeros', 'language_model.model.layers.1.feed_forward.w3.scales', 'language_model.model.layers.10.attention.wo.qweight', 'language_model.model.layers.10.attention.wo.qzeros', 'language_model.model.layers.10.attention.wo.scales', 'language_model.model.layers.10.attention.wqkv.qweight', 'language_model.model.layers.10.attention.wqkv.qzeros', 'language_model.model.layers.10.attention.wqkv.scales', 'language_model.model.layers.10.feed_forward.w1.qweight', 'language_model.model.layers.10.feed_forward.w1.qzeros', 'language_model.model.layers.10.feed_forward.w1.scales', 'language_model.model.layers.10.feed_forward.w2.qweight', 'language_model.model.layers.10.feed_forward.w2.qzeros', 'language_model.model.layers.10.feed_forward.w2.scales', 'language_model.model.layers.10.feed_forward.w3.qweight', 'language_model.model.layers.10.feed_forward.w3.qzeros', 'language_model.model.layers.10.feed_forward.w3.scales', 'language_model.model.layers.11.attention.wo.qweight', 'language_model.model.layers.11.attention.wo.qzeros', 'language_model.model.layers.11.attention.wo.scales', 'language_model.model.layers.11.attention.wqkv.qweight', 'language_model.model.layers.11.attention.wqkv.qzeros', 'language_model.model.layers.11.attention.wqkv.scales', 'language_model.model.layers.11.feed_forward.w1.qweight', 'language_model.model.layers.11.feed_forward.w1.qzeros', 'language_model.model.layers.11.feed_forward.w1.scales', 'language_model.model.layers.11.feed_forward.w2.qweight', 'language_model.model.layers.11.feed_forward.w2.qzeros', 'language_model.model.layers.11.feed_forward.w2.scales', 'language_model.model.layers.11.feed_forward.w3.qweight', 'language_model.model.layers.11.feed_forward.w3.qzeros', 'language_model.model.layers.11.feed_forward.w3.scales', 'language_model.model.layers.12.attention.wo.qweight', 'language_model.model.layers.12.attention.wo.qzeros', 'language_model.model.layers.12.attention.wo.scales', 'language_model.model.layers.12.attention.wqkv.qweight', 'language_model.model.layers.12.attention.wqkv.qzeros', 'language_model.model.layers.12.attention.wqkv.scales', 'language_model.model.layers.12.feed_forward.w1.qweight', 'language_model.model.layers.12.feed_forward.w1.qzeros', 'language_model.model.layers.12.feed_forward.w1.scales', 'language_model.model.layers.12.feed_forward.w2.qweight', 'language_model.model.layers.12.feed_forward.w2.qzeros', 'language_model.model.layers.12.feed_forward.w2.scales', 'language_model.model.layers.12.feed_forward.w3.qweight', 'language_model.model.layers.12.feed_forward.w3.qzeros', 'language_model.model.layers.12.feed_forward.w3.scales', 'language_model.model.layers.13.attention.wo.qweight', 'language_model.model.layers.13.attention.wo.qzeros', 'language_model.model.layers.13.attention.wo.scales', 'language_model.model.layers.13.attention.wqkv.qweight', 'language_model.model.layers.13.attention.wqkv.qzeros', 'language_model.model.layers.13.attention.wqkv.scales', 'language_model.model.layers.13.feed_forward.w1.qweight', 'language_model.model.layers.13.feed_forward.w1.qzeros', 'language_model.model.layers.13.feed_forward.w1.scales', 'language_model.model.layers.13.feed_forward.w2.qweight', 'language_model.model.layers.13.feed_forward.w2.qzeros', 'language_model.model.layers.13.feed_forward.w2.scales', 'language_model.model.layers.13.feed_forward.w3.qweight', 'language_model.model.layers.13.feed_forward.w3.qzeros', 'language_model.model.layers.13.feed_forward.w3.scales', 'language_model.model.layers.14.attention.wo.qweight', 'language_model.model.layers.14.attention.wo.qzeros', 'language_model.model.layers.14.attention.wo.scales', 'language_model.model.layers.14.attention.wqkv.qweight', 'language_model.model.layers.14.attention.wqkv.qzeros', 'language_model.model.layers.14.attention.wqkv.scales', 'language_model.model.layers.14.feed_forward.w1.qweight', 'language_model.model.layers.14.feed_forward.w1.qzeros', 'language_model.model.layers.14.feed_forward.w1.scales', 'language_model.model.layers.14.feed_forward.w2.qweight', 'language_model.model.layers.14.feed_forward.w2.qzeros', 'language_model.model.layers.14.feed_forward.w2.scales', 'language_model.model.layers.14.feed_forward.w3.qweight', 'language_model.model.layers.14.feed_forward.w3.qzeros', 'language_model.model.layers.14.feed_forward.w3.scales', 'language_model.model.layers.15.attention.wo.qweight', 'language_model.model.layers.15.attention.wo.qzeros', 'language_model.model.layers.15.attention.wo.scales', 'language_model.model.layers.15.attention.wqkv.qweight', 'language_model.model.layers.15.attention.wqkv.qzeros', 'language_model.model.layers.15.attention.wqkv.scales', 'language_model.model.layers.15.feed_forward.w1.qweight', 'language_model.model.layers.15.feed_forward.w1.qzeros', 'language_model.model.layers.15.feed_forward.w1.scales', 'language_model.model.layers.15.feed_forward.w2.qweight', 'language_model.model.layers.15.feed_forward.w2.qzeros', 'language_model.model.layers.15.feed_forward.w2.scales', 'language_model.model.layers.15.feed_forward.w3.qweight', 'language_model.model.layers.15.feed_forward.w3.qzeros', 'language_model.model.layers.15.feed_forward.w3.scales', 'language_model.model.layers.16.attention.wo.qweight', 'language_model.model.layers.16.attention.wo.qzeros', 'language_model.model.layers.16.attention.wo.scales', 'language_model.model.layers.16.attention.wqkv.qweight', 'language_model.model.layers.16.attention.wqkv.qzeros', 'language_model.model.layers.16.attention.wqkv.scales', 'language_model.model.layers.16.feed_forward.w1.qweight', 'language_model.model.layers.16.feed_forward.w1.qzeros', 'language_model.model.layers.16.feed_forward.w1.scales', 'language_model.model.layers.16.feed_forward.w2.qweight', 'language_model.model.layers.16.feed_forward.w2.qzeros', 'language_model.model.layers.16.feed_forward.w2.scales', 'language_model.model.layers.16.feed_forward.w3.qweight', 'language_model.model.layers.16.feed_forward.w3.qzeros', 'language_model.model.layers.16.feed_forward.w3.scales', 'language_model.model.layers.17.attention.wo.qweight', 'language_model.model.layers.17.attention.wo.qzeros', 'language_model.model.layers.17.attention.wo.scales', 'language_model.model.layers.17.attention.wqkv.qweight', 'language_model.model.layers.17.attention.wqkv.qzeros', 'language_model.model.layers.17.attention.wqkv.scales', 'language_model.model.layers.17.feed_forward.w1.qweight', 'language_model.model.layers.17.feed_forward.w1.qzeros', 'language_model.model.layers.17.feed_forward.w1.scales', 'language_model.model.layers.17.feed_forward.w2.qweight', 'language_model.model.layers.17.feed_forward.w2.qzeros', 'language_model.model.layers.17.feed_forward.w2.scales', 'language_model.model.layers.17.feed_forward.w3.qweight', 'language_model.model.layers.17.feed_forward.w3.qzeros', 'language_model.model.layers.17.feed_forward.w3.scales', 'language_model.model.layers.18.attention.wo.qweight', 'language_model.model.layers.18.attention.wo.qzeros', 'language_model.model.layers.18.attention.wo.scales', 'language_model.model.layers.18.attention.wqkv.qweight', 'language_model.model.layers.18.attention.wqkv.qzeros', 'language_model.model.layers.18.attention.wqkv.scales', 'language_model.model.layers.18.feed_forward.w1.qweight', 'language_model.model.layers.18.feed_forward.w1.qzeros', 'language_model.model.layers.18.feed_forward.w1.scales', 'language_model.model.layers.18.feed_forward.w2.qweight', 'language_model.model.layers.18.feed_forward.w2.qzeros', 'language_model.model.layers.18.feed_forward.w2.scales', 'language_model.model.layers.18.feed_forward.w3.qweight', 'language_model.model.layers.18.feed_forward.w3.qzeros', 'language_model.model.layers.18.feed_forward.w3.scales', 'language_model.model.layers.19.attention.wo.qweight', 'language_model.model.layers.19.attention.wo.qzeros', 'language_model.model.layers.19.attention.wo.scales', 'language_model.model.layers.19.attention.wqkv.qweight', 'language_model.model.layers.19.attention.wqkv.qzeros', 'language_model.model.layers.19.attention.wqkv.scales', 'language_model.model.layers.19.feed_forward.w1.qweight', 'language_model.model.layers.19.feed_forward.w1.qzeros', 'language_model.model.layers.19.feed_forward.w1.scales', 'language_model.model.layers.19.feed_forward.w2.qweight', 'language_model.model.layers.19.feed_forward.w2.qzeros', 'language_model.model.layers.19.feed_forward.w2.scales', 'language_model.model.layers.19.feed_forward.w3.qweight', 'language_model.model.layers.19.feed_forward.w3.qzeros', 'language_model.model.layers.19.feed_forward.w3.scales', 'language_model.model.layers.2.attention.wo.qweight', 'language_model.model.layers.2.attention.wo.qzeros', 'language_model.model.layers.2.attention.wo.scales', 'language_model.model.layers.2.attention.wqkv.qweight', 'language_model.model.layers.2.attention.wqkv.qzeros', 'language_model.model.layers.2.attention.wqkv.scales', 'language_model.model.layers.2.feed_forward.w1.qweight', 'language_model.model.layers.2.feed_forward.w1.qzeros', 'language_model.model.layers.2.feed_forward.w1.scales', 'language_model.model.layers.2.feed_forward.w2.qweight', 'language_model.model.layers.2.feed_forward.w2.qzeros', 'language_model.model.layers.2.feed_forward.w2.scales', 'language_model.model.layers.2.feed_forward.w3.qweight', 'language_model.model.layers.2.feed_forward.w3.qzeros', 'language_model.model.layers.2.feed_forward.w3.scales', 'language_model.model.layers.20.attention.wo.qweight', 'language_model.model.layers.20.attention.wo.qzeros', 'language_model.model.layers.20.attention.wo.scales', 'language_model.model.layers.20.attention.wqkv.qweight', 'language_model.model.layers.20.attention.wqkv.qzeros', 'language_model.model.layers.20.attention.wqkv.scales', 'language_model.model.layers.20.feed_forward.w1.qweight', 'language_model.model.layers.20.feed_forward.w1.qzeros', 'language_model.model.layers.20.feed_forward.w1.scales', 'language_model.model.layers.20.feed_forward.w2.qweight', 'language_model.model.layers.20.feed_forward.w2.qzeros', 'language_model.model.layers.20.feed_forward.w2.scales', 'language_model.model.layers.20.feed_forward.w3.qweight', 'language_model.model.layers.20.feed_forward.w3.qzeros', 'language_model.model.layers.20.feed_forward.w3.scales', 'language_model.model.layers.21.attention.wo.qweight', 'language_model.model.layers.21.attention.wo.qzeros', 'language_model.model.layers.21.attention.wo.scales', 'language_model.model.layers.21.attention.wqkv.qweight', 'language_model.model.layers.21.attention.wqkv.qzeros', 'language_model.model.layers.21.attention.wqkv.scales', 'language_model.model.layers.21.feed_forward.w1.qweight', 'language_model.model.layers.21.feed_forward.w1.qzeros', 'language_model.model.layers.21.feed_forward.w1.scales', 'language_model.model.layers.21.feed_forward.w2.qweight', 'language_model.model.layers.21.feed_forward.w2.qzeros', 'language_model.model.layers.21.feed_forward.w2.scales', 'language_model.model.layers.21.feed_forward.w3.qweight', 'language_model.model.layers.21.feed_forward.w3.qzeros', 'language_model.model.layers.21.feed_forward.w3.scales', 'language_model.model.layers.22.attention.wo.qweight', 'language_model.model.layers.22.attention.wo.qzeros', 'language_model.model.layers.22.attention.wo.scales', 'language_model.model.layers.22.attention.wqkv.qweight', 'language_model.model.layers.22.attention.wqkv.qzeros', 'language_model.model.layers.22.attention.wqkv.scales', 'language_model.model.layers.22.feed_forward.w1.qweight', 'language_model.model.layers.22.feed_forward.w1.qzeros', 'language_model.model.layers.22.feed_forward.w1.scales', 'language_model.model.layers.22.feed_forward.w2.qweight', 'language_model.model.layers.22.feed_forward.w2.qzeros', 'language_model.model.layers.22.feed_forward.w2.scales', 'language_model.model.layers.22.feed_forward.w3.qweight', 'language_model.model.layers.22.feed_forward.w3.qzeros', 'language_model.model.layers.22.feed_forward.w3.scales', 'language_model.model.layers.23.attention.wo.qweight', 'language_model.model.layers.23.attention.wo.qzeros', 'language_model.model.layers.23.attention.wo.scales', 'language_model.model.layers.23.attention.wqkv.qweight', 'language_model.model.layers.23.attention.wqkv.qzeros', 'language_model.model.layers.23.attention.wqkv.scales', 'language_model.model.layers.23.feed_forward.w1.qweight', 'language_model.model.layers.23.feed_forward.w1.qzeros', 'language_model.model.layers.23.feed_forward.w1.scales', 'language_model.model.layers.23.feed_forward.w2.qweight', 'language_model.model.layers.23.feed_forward.w2.qzeros', 'language_model.model.layers.23.feed_forward.w2.scales', 'language_model.model.layers.23.feed_forward.w3.qweight', 'language_model.model.layers.23.feed_forward.w3.qzeros', 'language_model.model.layers.23.feed_forward.w3.scales', 'language_model.model.layers.3.attention.wo.qweight', 'language_model.model.layers.3.attention.wo.qzeros', 'language_model.model.layers.3.attention.wo.scales', 'language_model.model.layers.3.attention.wqkv.qweight', 'language_model.model.layers.3.attention.wqkv.qzeros', 'language_model.model.layers.3.attention.wqkv.scales', 'language_model.model.layers.3.feed_forward.w1.qweight', 'language_model.model.layers.3.feed_forward.w1.qzeros', 'language_model.model.layers.3.feed_forward.w1.scales', 'language_model.model.layers.3.feed_forward.w2.qweight', 'language_model.model.layers.3.feed_forward.w2.qzeros', 'language_model.model.layers.3.feed_forward.w2.scales', 'language_model.model.layers.3.feed_forward.w3.qweight', 'language_model.model.layers.3.feed_forward.w3.qzeros', 'language_model.model.layers.3.feed_forward.w3.scales', 'language_model.model.layers.4.attention.wo.qweight', 'language_model.model.layers.4.attention.wo.qzeros', 'language_model.model.layers.4.attention.wo.scales', 'language_model.model.layers.4.attention.wqkv.qweight', 'language_model.model.layers.4.attention.wqkv.qzeros', 'language_model.model.layers.4.attention.wqkv.scales', 'language_model.model.layers.4.feed_forward.w1.qweight', 'language_model.model.layers.4.feed_forward.w1.qzeros', 'language_model.model.layers.4.feed_forward.w1.scales', 'language_model.model.layers.4.feed_forward.w2.qweight', 'language_model.model.layers.4.feed_forward.w2.qzeros', 'language_model.model.layers.4.feed_forward.w2.scales', 'language_model.model.layers.4.feed_forward.w3.qweight', 'language_model.model.layers.4.feed_forward.w3.qzeros', 'language_model.model.layers.4.feed_forward.w3.scales', 'language_model.model.layers.5.attention.wo.qweight', 'language_model.model.layers.5.attention.wo.qzeros', 'language_model.model.layers.5.attention.wo.scales', 'language_model.model.layers.5.attention.wqkv.qweight', 'language_model.model.layers.5.attention.wqkv.qzeros', 'language_model.model.layers.5.attention.wqkv.scales', 'language_model.model.layers.5.feed_forward.w1.qweight', 'language_model.model.layers.5.feed_forward.w1.qzeros', 'language_model.model.layers.5.feed_forward.w1.scales', 'language_model.model.layers.5.feed_forward.w2.qweight', 'language_model.model.layers.5.feed_forward.w2.qzeros', 'language_model.model.layers.5.feed_forward.w2.scales', 'language_model.model.layers.5.feed_forward.w3.qweight', 'language_model.model.layers.5.feed_forward.w3.qzeros', 'language_model.model.layers.5.feed_forward.w3.scales', 'language_model.model.layers.6.attention.wo.qweight', 'language_model.model.layers.6.attention.wo.qzeros', 'language_model.model.layers.6.attention.wo.scales', 'language_model.model.layers.6.attention.wqkv.qweight', 'language_model.model.layers.6.attention.wqkv.qzeros', 'language_model.model.layers.6.attention.wqkv.scales', 'language_model.model.layers.6.feed_forward.w1.qweight', 'language_model.model.layers.6.feed_forward.w1.qzeros', 'language_model.model.layers.6.feed_forward.w1.scales', 'language_model.model.layers.6.feed_forward.w2.qweight', 'language_model.model.layers.6.feed_forward.w2.qzeros', 'language_model.model.layers.6.feed_forward.w2.scales', 'language_model.model.layers.6.feed_forward.w3.qweight', 'language_model.model.layers.6.feed_forward.w3.qzeros', 'language_model.model.layers.6.feed_forward.w3.scales', 'language_model.model.layers.7.attention.wo.qweight', 'language_model.model.layers.7.attention.wo.qzeros', 'language_model.model.layers.7.attention.wo.scales', 'language_model.model.layers.7.attention.wqkv.qweight', 'language_model.model.layers.7.attention.wqkv.qzeros', 'language_model.model.layers.7.attention.wqkv.scales', 'language_model.model.layers.7.feed_forward.w1.qweight', 'language_model.model.layers.7.feed_forward.w1.qzeros', 'language_model.model.layers.7.feed_forward.w1.scales', 'language_model.model.layers.7.feed_forward.w2.qweight', 'language_model.model.layers.7.feed_forward.w2.qzeros', 'language_model.model.layers.7.feed_forward.w2.scales', 'language_model.model.layers.7.feed_forward.w3.qweight', 'language_model.model.layers.7.feed_forward.w3.qzeros', 'language_model.model.layers.7.feed_forward.w3.scales', 'language_model.model.layers.8.attention.wo.qweight', 'language_model.model.layers.8.attention.wo.qzeros', 'language_model.model.layers.8.attention.wo.scales', 'language_model.model.layers.8.attention.wqkv.qweight', 'language_model.model.layers.8.attention.wqkv.qzeros', 'language_model.model.layers.8.attention.wqkv.scales', 'language_model.model.layers.8.feed_forward.w1.qweight', 'language_model.model.layers.8.feed_forward.w1.qzeros', 'language_model.model.layers.8.feed_forward.w1.scales', 'language_model.model.layers.8.feed_forward.w2.qweight', 'language_model.model.layers.8.feed_forward.w2.qzeros', 'language_model.model.layers.8.feed_forward.w2.scales', 'language_model.model.layers.8.feed_forward.w3.qweight', 'language_model.model.layers.8.feed_forward.w3.qzeros', 'language_model.model.layers.8.feed_forward.w3.scales', 'language_model.model.layers.9.attention.wo.qweight', 'language_model.model.layers.9.attention.wo.qzeros', 'language_model.model.layers.9.attention.wo.scales', 'language_model.model.layers.9.attention.wqkv.qweight', 'language_model.model.layers.9.attention.wqkv.qzeros', 'language_model.model.layers.9.attention.wqkv.scales', 'language_model.model.layers.9.feed_forward.w1.qweight', 'language_model.model.layers.9.feed_forward.w1.qzeros', 'language_model.model.layers.9.feed_forward.w1.scales', 'language_model.model.layers.9.feed_forward.w2.qweight', 'language_model.model.layers.9.feed_forward.w2.qzeros', 'language_model.model.layers.9.feed_forward.w2.scales', 'language_model.model.layers.9.feed_forward.w3.qweight', 'language_model.model.layers.9.feed_forward.w3.qzeros', 'language_model.model.layers.9.feed_forward.w3.scales'] |
|
- This IS expected if you are initializing InternVLChatModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). |
|
- This IS NOT expected if you are initializing InternVLChatModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). |
|
Some weights of InternVLChatModel were not initialized from the model checkpoint at pretrained/InternVL2-2B-AWQ and are newly initialized: ['language_model.model.layers.0.attention.wo.weight', 'language_model.model.layers.0.attention.wqkv.weight', 'language_model.model.layers.0.feed_forward.w1.weight', 'language_model.model.layers.0.feed_forward.w2.weight', 'language_model.model.layers.0.feed_forward.w3.weight', 'language_model.model.layers.1.attention.wo.weight', 'language_model.model.layers.1.attention.wqkv.weight', 'language_model.model.layers.1.feed_forward.w1.weight', 'language_model.model.layers.1.feed_forward.w2.weight', 'language_model.model.layers.1.feed_forward.w3.weight', 'language_model.model.layers.10.attention.wo.weight', 'language_model.model.layers.10.attention.wqkv.weight', 'language_model.model.layers.10.feed_forward.w1.weight', 'language_model.model.layers.10.feed_forward.w2.weight', 'language_model.model.layers.10.feed_forward.w3.weight', 'language_model.model.layers.11.attention.wo.weight', 'language_model.model.layers.11.attention.wqkv.weight', 'language_model.model.layers.11.feed_forward.w1.weight', 'language_model.model.layers.11.feed_forward.w2.weight', 'language_model.model.layers.11.feed_forward.w3.weight', 'language_model.model.layers.12.attention.wo.weight', 'language_model.model.layers.12.attention.wqkv.weight', 'language_model.model.layers.12.feed_forward.w1.weight', 'language_model.model.layers.12.feed_forward.w2.weight', 'language_model.model.layers.12.feed_forward.w3.weight', 'language_model.model.layers.13.attention.wo.weight', 'language_model.model.layers.13.attention.wqkv.weight', 'language_model.model.layers.13.feed_forward.w1.weight', 'language_model.model.layers.13.feed_forward.w2.weight', 'language_model.model.layers.13.feed_forward.w3.weight', 'language_model.model.layers.14.attention.wo.weight', 'language_model.model.layers.14.attention.wqkv.weight', 'language_model.model.layers.14.feed_forward.w1.weight', 'language_model.model.layers.14.feed_forward.w2.weight', 'language_model.model.layers.14.feed_forward.w3.weight', 'language_model.model.layers.15.attention.wo.weight', 'language_model.model.layers.15.attention.wqkv.weight', 'language_model.model.layers.15.feed_forward.w1.weight', 'language_model.model.layers.15.feed_forward.w2.weight', 'language_model.model.layers.15.feed_forward.w3.weight', 'language_model.model.layers.16.attention.wo.weight', 'language_model.model.layers.16.attention.wqkv.weight', 'language_model.model.layers.16.feed_forward.w1.weight', 'language_model.model.layers.16.feed_forward.w2.weight', 'language_model.model.layers.16.feed_forward.w3.weight', 'language_model.model.layers.17.attention.wo.weight', 'language_model.model.layers.17.attention.wqkv.weight', 'language_model.model.layers.17.feed_forward.w1.weight', 'language_model.model.layers.17.feed_forward.w2.weight', 'language_model.model.layers.17.feed_forward.w3.weight', 'language_model.model.layers.18.attention.wo.weight', 'language_model.model.layers.18.attention.wqkv.weight', 'language_model.model.layers.18.feed_forward.w1.weight', 'language_model.model.layers.18.feed_forward.w2.weight', 'language_model.model.layers.18.feed_forward.w3.weight', 'language_model.model.layers.19.attention.wo.weight', 'language_model.model.layers.19.attention.wqkv.weight', 'language_model.model.layers.19.feed_forward.w1.weight', 'language_model.model.layers.19.feed_forward.w2.weight', 'language_model.model.layers.19.feed_forward.w3.weight', 'language_model.model.layers.2.attention.wo.weight', 'language_model.model.layers.2.attention.wqkv.weight', 'language_model.model.layers.2.feed_forward.w1.weight', 'language_model.model.layers.2.feed_forward.w2.weight', 'language_model.model.layers.2.feed_forward.w3.weight', 'language_model.model.layers.20.attention.wo.weight', 'language_model.model.layers.20.attention.wqkv.weight', 'language_model.model.layers.20.feed_forward.w1.weight', 'language_model.model.layers.20.feed_forward.w2.weight', 'language_model.model.layers.20.feed_forward.w3.weight', 'language_model.model.layers.21.attention.wo.weight', 'language_model.model.layers.21.attention.wqkv.weight', 'language_model.model.layers.21.feed_forward.w1.weight', 'language_model.model.layers.21.feed_forward.w2.weight', 'language_model.model.layers.21.feed_forward.w3.weight', 'language_model.model.layers.22.attention.wo.weight', 'language_model.model.layers.22.attention.wqkv.weight', 'language_model.model.layers.22.feed_forward.w1.weight', 'language_model.model.layers.22.feed_forward.w2.weight', 'language_model.model.layers.22.feed_forward.w3.weight', 'language_model.model.layers.23.attention.wo.weight', 'language_model.model.layers.23.attention.wqkv.weight', 'language_model.model.layers.23.feed_forward.w1.weight', 'language_model.model.layers.23.feed_forward.w2.weight', 'language_model.model.layers.23.feed_forward.w3.weight', 'language_model.model.layers.3.attention.wo.weight', 'language_model.model.layers.3.attention.wqkv.weight', 'language_model.model.layers.3.feed_forward.w1.weight', 'language_model.model.layers.3.feed_forward.w2.weight', 'language_model.model.layers.3.feed_forward.w3.weight', 'language_model.model.layers.4.attention.wo.weight', 'language_model.model.layers.4.attention.wqkv.weight', 'language_model.model.layers.4.feed_forward.w1.weight', 'language_model.model.layers.4.feed_forward.w2.weight', 'language_model.model.layers.4.feed_forward.w3.weight', 'language_model.model.layers.5.attention.wo.weight', 'language_model.model.layers.5.attention.wqkv.weight', 'language_model.model.layers.5.feed_forward.w1.weight', 'language_model.model.layers.5.feed_forward.w2.weight', 'language_model.model.layers.5.feed_forward.w3.weight', 'language_model.model.layers.6.attention.wo.weight', 'language_model.model.layers.6.attention.wqkv.weight', 'language_model.model.layers.6.feed_forward.w1.weight', 'language_model.model.layers.6.feed_forward.w2.weight', 'language_model.model.layers.6.feed_forward.w3.weight', 'language_model.model.layers.7.attention.wo.weight', 'language_model.model.layers.7.attention.wqkv.weight', 'language_model.model.layers.7.feed_forward.w1.weight', 'language_model.model.layers.7.feed_forward.w2.weight', 'language_model.model.layers.7.feed_forward.w3.weight', 'language_model.model.layers.8.attention.wo.weight', 'language_model.model.layers.8.attention.wqkv.weight', 'language_model.model.layers.8.feed_forward.w1.weight', 'language_model.model.layers.8.feed_forward.w2.weight', 'language_model.model.layers.8.feed_forward.w3.weight', 'language_model.model.layers.9.attention.wo.weight', 'language_model.model.layers.9.attention.wqkv.weight', 'language_model.model.layers.9.feed_forward.w1.weight', 'language_model.model.layers.9.feed_forward.w2.weight', 'language_model.model.layers.9.feed_forward.w3.weight'] |
|
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. |
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
Rank [3] Begin to eval model pretrained/InternVL2-2B-AWQ on task reasoning-image-val, devices: {device(type='cuda', index=3), device(type='cuda', index=7)} |
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
Rank [2] Begin to eval model pretrained/InternVL2-2B-AWQ on task reasoning-image-val, devices: {device(type='cuda', index=2), device(type='cuda', index=6)} |
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
Rank [1] Begin to eval model pretrained/InternVL2-2B-AWQ on task reasoning-image-val, devices: {device(type='cuda', index=1), device(type='cuda', index=5)} |
|
Rank [0] Begin to eval model pretrained/InternVL2-2B-AWQ on task reasoning-image-val, devices: {device(type='cuda', index=0), device(type='cuda', index=4)} |
|
Rank 3 len(skip_idx)=0 |
|
Rank 2 len(skip_idx)=0 |
|
Rank 0 len(skip_idx)=0 |
|
Rank 1 len(skip_idx)=0 |
|
[2024-08-07 19:05:04] [Rank 2] totoal_tokens=837, outputs='' |
|
[2024-08-07 19:05:04] [Rank 3] totoal_tokens=975, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:04] [Rank 1] totoal_tokens=837, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:04] [Rank 0] totoal_tokens=837, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:05] [Rank 2] totoal_tokens=941, outputs='' |
|
[2024-08-07 19:05:05] [Rank 1] totoal_tokens=837, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:05] [Rank 0] totoal_tokens=837, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:05] [Rank 3] totoal_tokens=1060, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:06] [Rank 2] totoal_tokens=1025, outputs='' |
|
[2024-08-07 19:05:06] [Rank 1] totoal_tokens=862, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:06] [Rank 3] totoal_tokens=1060, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:06] [Rank 0] totoal_tokens=837, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:06] [Rank 2] totoal_tokens=1082, outputs='' |
|
[2024-08-07 19:05:07] [Rank 1] totoal_tokens=968, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:07] [Rank 3] totoal_tokens=1145, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:07] [Rank 0] totoal_tokens=925, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:07] [Rank 2] totoal_tokens=1120, outputs='' |
|
[2024-08-07 19:05:07] [Rank 1] totoal_tokens=1000, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:07] [Rank 0] totoal_tokens=950, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:07] [Rank 3] totoal_tokens=1208, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:08] [Rank 1] totoal_tokens=1028, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:08] [Rank 2] totoal_tokens=1120, outputs='' |
|
[2024-08-07 19:05:08] [Rank 0] totoal_tokens=1120, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:08] [Rank 3] totoal_tokens=1208, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:08] [Rank 1] totoal_tokens=1060, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:08] [Rank 2] totoal_tokens=1145, outputs='' |
|
[2024-08-07 19:05:09] [Rank 3] totoal_tokens=1208, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:09] [Rank 0] totoal_tokens=1208, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:09] [Rank 1] totoal_tokens=1219, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:09] [Rank 2] totoal_tokens=1231, outputs='' |
|
[2024-08-07 19:05:09] [Rank 3] totoal_tokens=1338, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:09] [Rank 0] totoal_tokens=1219, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:09] [Rank 1] totoal_tokens=1277, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:10] [Rank 2] totoal_tokens=1283, outputs='' |
|
[2024-08-07 19:05:10] [Rank 0] totoal_tokens=1231, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 0%| | 0/130 [00:00<?, ?it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 1%| | 1/130 [00:01<04:03, 1.89s/it]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 2%|โ | 2/130 [00:02<02:44, 1.29s/it]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 2%|โ | 3/130 [00:03<02:17, 1.08s/it]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 3%|โ | 4/130 [00:04<01:51, 1.13it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 4%|โ | 5/130 [00:04<01:38, 1.27it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 5%|โ | 6/130 [00:05<01:31, 1.35it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 5%|โ | 7/130 [00:06<01:30, 1.36it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 6%|โ | 8/130 [00:06<01:23, 1.46it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 7%|โ | 9/130 [00:07<01:[2024-08-07 19:05:10] [Rank 3] totoal_tokens=1354, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:10] [Rank 2] totoal_tokens=1326, outputs='' |
|
[2024-08-07 19:05:10] [Rank 0] totoal_tokens=1233, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:10] [Rank 1] totoal_tokens=1354, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:11] [Rank 3] totoal_tokens=1361, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:11] [Rank 2] totoal_tokens=1367, outputs='' |
|
[2024-08-07 19:05:11] [Rank 0] totoal_tokens=1356, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:11] [Rank 1] totoal_tokens=1367, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:11] [Rank 3] totoal_tokens=1431, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:11] [Rank 2] totoal_tokens=1403, outputs='' |
|
[2024-08-07 19:05:12] [Rank 0] totoal_tokens=1366, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:12] [Rank 1] totoal_tokens=1381, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:12] [Rank 3] totoal_tokens=1503, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:12] [Rank 2] totoal_tokens=1696, outputs='' |
|
[2024-08-07 19:05:12] [Rank 0] totoal_tokens=1503, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:12] [Rank 1] totoal_tokens=1450, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:13] [Rank 2] totoal_tokens=2267, outputs='' |
|
[2024-08-07 19:05:13] [Rank 3] totoal_tokens=1564, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:13] [Rank 0] totoal_tokens=1553, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:13] [Rank 1] totoal_tokens=1750, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:13] [Rank 2] totoal_tokens=2367, outputs='' |
|
[2024-08-07 19:05:13] [Rank 3] totoal_tokens=2267, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:13] [Rank 0] totoal_tokens=1783, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:14] [Rank 1] totoal_tokens=2070, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:14] [Rank 2] totoal_tokens=2518, outputs='' |
|
[2024-08-07 19:05:14] [Rank 3] totoal_tokens=2278, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:14] [Rank 0] totoal_tokens=1948, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:14] [Rank 1] totoal_tokens=2221, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:15] [Rank 2] totoal_tokens=2520, outputs='' |
|
[2024-08-07 19:05:15] [Rank 3] totoal_tokens=2402, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:15] [Rank 0] totoal_tokens=2292, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:15] [Rank 1] totoal_tokens=2221, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:15] [Rank 2] totoal_tokens=2556, outputs='' |
|
[2024-08-07 19:05:16] [Rank 3] totoal_tokens=2504, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:16] [Rank 0] totoal_tokens=2457, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:16] [Rank 1] totoal_tokens=2239, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:16] [Rank 2] totoal_tokens=2746, outputs='' |
|
[2024-08-07 19:05:16] [Rank 3] totoal_tokens=2615, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:16] [Rank 0] totoal_tokens=2518, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
18, 1.53it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 8%|โ | 10/130 [00:07<01:17, 1.55it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 8%|โ | 11/130 [00:08<01:14, 1.59it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 9%|โ | 12/130 [00:09<01:12, 1.62it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 10%|โ | 13/130 [00:09<01:10, 1.65it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 11%|โ | 14/130 [00:10<01:10, 1.65it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 12%|โโ | 15/130 [00:11<01:13, 1.57it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 12%|โโ | 16/130 [00:11<01:15, 1.51it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 13%|โโ | 17/130 [00:12<01:14, 1.52it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 14%|โโ | 18/130 [00:13<01:17, 1.45it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val[2024-08-07 19:05:16] [Rank 1] totoal_tokens=2425, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:17] [Rank 2] totoal_tokens=2746, outputs='' |
|
[2024-08-07 19:05:17] [Rank 3] totoal_tokens=2746, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:17] [Rank 0] totoal_tokens=2518, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:17] [Rank 2] totoal_tokens=2803, outputs='' |
|
[2024-08-07 19:05:17] [Rank 3] totoal_tokens=2746, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:17] [Rank 1] totoal_tokens=2618, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:18] [Rank 0] totoal_tokens=2746, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:18] [Rank 2] totoal_tokens=2838, outputs='' |
|
[2024-08-07 19:05:18] [Rank 3] totoal_tokens=2838, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:18] [Rank 1] totoal_tokens=2728, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:18] [Rank 1] totoal_tokens=2728, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:18] [Rank 0] totoal_tokens=2842, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:18] [Rank 2] totoal_tokens=2972, outputs='' |
|
[2024-08-07 19:05:19] [Rank 3] totoal_tokens=2905, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:19] [Rank 1] totoal_tokens=2838, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:19] [Rank 0] totoal_tokens=2898, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:19] [Rank 3] totoal_tokens=2986, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:19] [Rank 2] totoal_tokens=3028, outputs='' |
|
[2024-08-07 19:05:20] [Rank 2] totoal_tokens=3055, outputs='' |
|
[2024-08-07 19:05:20] [Rank 3] totoal_tokens=2988, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:20] [Rank 0] totoal_tokens=2996, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:20] [Rank 1] totoal_tokens=2992, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:20] [Rank 3] totoal_tokens=3009, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:20] [Rank 0] totoal_tokens=3055, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:20] [Rank 2] totoal_tokens=3118, outputs='' |
|
[2024-08-07 19:05:21] [Rank 1] totoal_tokens=3055, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:21] [Rank 3] totoal_tokens=3118, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
[2024-08-07 19:05:21] [Rank 2] totoal_tokens=3184, outputs='' |
|
[2024-08-07 19:05:21] [Rank 0] totoal_tokens=3733, outputs='ๆไธ่ฐ eng...,่็ณ่ฑ่ Ident254ไผๅ็ๅธๅ
havenไนไธ้ setStateitions nucle packet' |
|
Traceback (most recent call last): |
|
File "/mnt/hwfile/wangweiyun/workspace_zyc/VLM-Dev/eval/mm_niah/eval_mm_niah.py", line 256, in <module> |
|
Traceback (most recent call last): |
|
File "/mnt/hwfile/wangweiyun/workspace_zyc/VLM-Dev/eval/mm_niah/eval_mm_niah.py", line 256, in <module> |
|
.jsonl: 15%|โโ | 19/130 [00:13<01:17, 1.43it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 15%|โโ | 20/130 [00:14<01:17, 1.42it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 16%|โโ | 21/130 [00:15<01:11, 1.51it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 17%|โโ | 22/130 [00:15<01:17, 1.40it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 18%|โโ | 23/130 [00:16<01:12, 1.48it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 18%|โโ | 24/130 [00:17<01:14, 1.42it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 19%|โโ | 25/130 [00:17<01:09, 1.51it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 20%|โโ | 26/130 [00:18<01:16, 1.36it/s]
Processing InternVL2-2B-AWQ_reasoning-image-val.jsonl: 20%|โโ | 26/130 [00:18<01:15, 1.38it/s] |
|
Traceback (most recent call last): |
|
File "/mnt/hwfile/wangweiyun/workspace_zyc/VLM-Dev/eval/mm_niah/eval_mm_niah.py", line 256, in <module> |
|
main(args) |
|
File "/mnt/hwfile/wangweiyun/workspace_zyc/VLM-Dev/eval/mm_niah/eval_mm_niah.py", line 186, in main |
|
main(args) |
|
File "/mnt/hwfile/wangweiyun/workspace_zyc/VLM-Dev/eval/mm_niah/eval_mm_niah.py", line 186, in main |
|
main(args) |
|
File "/mnt/hwfile/wangweiyun/workspace_zyc/VLM-Dev/eval/mm_niah/eval_mm_niah.py", line 171, in main |
|
outputs = model.chat( |
|
File "/mnt/hwfile/wangweiyun/workspace_zyc/VLM-Dev/internvl/model/internvl_chat/modeling_internvl_chat.py", line 385, in chat |
|
outputs = model.chat( |
|
File "/mnt/hwfile/wangweiyun/workspace_zyc/VLM-Dev/internvl/model/internvl_chat/modeling_internvl_chat.py", line 385, in chat |
|
curr_pixel_values = load_image(img, dynamic_image_size=False) |
|
File "/mnt/hwfile/wangweiyun/workspace_zyc/VLM-Dev/eval/mm_niah/eval_mm_niah.py", line 31, in load_image |
|
image = Image.open(image_file).convert('RGB') |
|
File "/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/site-packages/PIL/Image.py", line 3274, in open |
|
generation_output = self.generate( |
|
File "/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context |
|
generation_output = self.generate( |
|
File "/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context |
|
slurmstepd: error: *** STEP 3758685.0 ON HOST-10-140-66-148 CANCELLED AT 2024-08-07T19:05:21 *** |
|
return func(*args, **kwargs) |
|
File "/mnt/hwfile/wangweiyun/workspace_zyc/VLM-Dev/internvl/model/internvl_chat/modeling_internvl_chat.py", line 452, in generate |
|
return func(*args, **kwargs) |
|
File "/mnt/hwfile/wangweiyun/workspace_zyc/VLM-Dev/internvl/model/internvl_chat/modeling_internvl_chat.py", line 452, in generate |
|
outputs = self.language_model.generate( |
|
File "/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context |
|
outputs = self.language_model.generate( |
|
File "/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context |
|
return func(*args, **kwargs) |
|
File "/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/site-packages/transformers/generation/utils.py", line 1479, in generate |
|
return func(*args, **kwargs) |
|
File "/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/site-packages/transformers/generation/utils.py", line 1479, in generate |
|
filename = os.path.realpath(os.fspath(fp)) |
|
File "/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/posixpath.py", line 395, in realpath |
|
return self.greedy_search( |
|
File "/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/site-packages/transformers/generation/utils.py", line 2341, in greedy_search |
|
return self.greedy_search( |
|
File "/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/site-packages/transformers/generation/utils.py", line 2341, in greedy_search |
|
outputs = self( |
|
File "/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl |
|
outputs = self( |
|
File "/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl |
|
|