Upload folder using huggingface_hub

Browse files

Files changed (15) hide show

.gitattributes +4 -0
MiniCPM-V-2_6-rk3588-w8a8-opt-0-hybrid-ratio-0.0.rkllm +3 -0
MiniCPM-V-2_6-rk3588-w8a8-opt-0-hybrid-ratio-0.5.rkllm +3 -0
MiniCPM-V-2_6-rk3588-w8a8-opt-1-hybrid-ratio-0.0.rkllm +3 -0
MiniCPM-V-2_6-rk3588-w8a8-opt-1-hybrid-ratio-0.5.rkllm +3 -0
README.md +365 -0
added_tokens.json +25 -0
config.json +54 -0
generation_config.json +6 -0
model.safetensors.index.json +796 -0
preprocessor_config.json +24 -0
special_tokens_map.json +172 -0
tokenizer.json +0 -0
tokenizer_config.json +235 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+MiniCPM-V-2_6-rk3588-w8a8-opt-0-hybrid-ratio-0.0.rkllm filter=lfs diff=lfs merge=lfs -text
+MiniCPM-V-2_6-rk3588-w8a8-opt-0-hybrid-ratio-0.5.rkllm filter=lfs diff=lfs merge=lfs -text
+MiniCPM-V-2_6-rk3588-w8a8-opt-1-hybrid-ratio-0.0.rkllm filter=lfs diff=lfs merge=lfs -text
+MiniCPM-V-2_6-rk3588-w8a8-opt-1-hybrid-ratio-0.5.rkllm filter=lfs diff=lfs merge=lfs -text

MiniCPM-V-2_6-rk3588-w8a8-opt-0-hybrid-ratio-0.0.rkllm ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6405908be4df7e4060062228ea1585208ef5e0ecbe3087bbad232b5a4387ca56
+size 8189403140

MiniCPM-V-2_6-rk3588-w8a8-opt-0-hybrid-ratio-0.5.rkllm ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6405908be4df7e4060062228ea1585208ef5e0ecbe3087bbad232b5a4387ca56
+size 8189403140

MiniCPM-V-2_6-rk3588-w8a8-opt-1-hybrid-ratio-0.0.rkllm ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61e8b15a2bb02e47d967edfbca5f29b0d1d489bdd992e8dbc7741a339e22459f
+size 8189403140

MiniCPM-V-2_6-rk3588-w8a8-opt-1-hybrid-ratio-0.5.rkllm ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61e8b15a2bb02e47d967edfbca5f29b0d1d489bdd992e8dbc7741a339e22459f
+size 8189403140

README.md ADDED Viewed

	@@ -0,0 +1,365 @@

+---
+datasets:
+- openbmb/RLAIF-V-Dataset
+language:
+- multilingual
+library_name: transformers
+pipeline_tag: image-text-to-text
+tags:
+- minicpm-v
+- vision
+- ocr
+- multi-image
+- video
+- custom_code
+---
+# MiniCPM-V-2_6-RK3588-1.1.1
+This version of MiniCPM-V-2_6 has been converted to run on the RK3588 NPU using {'w8a8'} quantization.
+This model has been optimized with the following LoRA:
+Compatible with RKLLM version: 1.1.1
+###Useful links:
+[Official RKLLM GitHub](https://github.com/airockchip/rknn-llm)
+[RockhipNPU Reddit](https://reddit.com/r/RockchipNPU)
+[EZRKNN-LLM](https://github.com/Pelochus/ezrknn-llm/)
+Pretty much anything by these folks: (marty1885)[https://github.com/marty1885] and [happyme531](https://huggingface.co/happyme531)
+# Original Model Card for base model, MiniCPM-V-2_6, below:
+<h1>A GPT-4V Level MLLM for Single Image, Multi Image and Video on Your Phone</h1>
+[GitHub](https://github.com/OpenBMB/MiniCPM-V) | [Demo](http://120.92.209.146:8887/)</a>
+## MiniCPM-V 2.6
+**MiniCPM-V 2.6** is the latest and most capable model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters. It exhibits a significant performance improvement over MiniCPM-Llama3-V 2.5, and introduces new features for multi-image and video understanding. Notable features of MiniCPM-V 2.6 include:
+- 🔥 **Leading Performance.**
+  MiniCPM-V 2.6 achieves an average score of 65.2 on the latest version of OpenCompass, a comprehensive evaluation over 8 popular benchmarks. **With only 8B parameters, it surpasses widely used proprietary models like GPT-4o mini, GPT-4V, Gemini 1.5 Pro, and Claude 3.5 Sonnet** for single image understanding.
+- 🖼️ **Multi Image Understanding and In-context Learning.** MiniCPM-V 2.6 can also perform **conversation and reasoning over multiple images**. It achieves **state-of-the-art performance** on popular multi-image benchmarks such as Mantis-Eval, BLINK, Mathverse mv and Sciverse mv, and also shows promising in-context learning capability.
+- 🎬 **Video Understanding.** MiniCPM-V 2.6 can also **accept video inputs**, performing conversation and providing dense captions for spatial-temporal information. It outperforms **GPT-4V, Claude 3.5 Sonnet and LLaVA-NeXT-Video-34B** on Video-MME with/without subtitles.
+- 💪 **Strong OCR Capability and Others.**
+  MiniCPM-V 2.6 can process images with any aspect ratio and up to 1.8 million pixels (e.g., 1344x1344). It achieves **state-of-the-art performance on OCRBench, surpassing proprietary models such as GPT-4o, GPT-4V, and Gemini 1.5 Pro**.
+  Based on the the latest [RLAIF-V](https://github.com/RLHF-V/RLAIF-V/) and [VisCPM](https://github.com/OpenBMB/VisCPM) techniques, it features **trustworthy behaviors**, with significantly lower hallucination rates than GPT-4o and GPT-4V on Object HalBench, and supports **multilingual capabilities** on English, Chinese, German, French, Italian, Korean, etc.
+- 🚀 **Superior Efficiency.**
+  In addition to its friendly size, MiniCPM-V 2.6 also shows **state-of-the-art token density** (i.e., number of pixels encoded into each visual token). **It produces only 640 tokens when processing a 1.8M pixel image, which is 75% fewer than most models**. This directly improves the inference speed, first-token latency, memory usage, and power consumption. As a result, MiniCPM-V 2.6 can efficiently support **real-time video understanding** on end-side devices such as iPad.
+- 💫 **Easy Usage.**
+MiniCPM-V 2.6 can be easily used in various ways: (1) [llama.cpp](https://github.com/OpenBMB/llama.cpp/blob/minicpmv-main/examples/llava/README-minicpmv2.6.md) and [ollama](https://github.com/OpenBMB/ollama/tree/minicpm-v2.6) support for efficient CPU inference on local devices, (2) [int4](https://huggingface.co/openbmb/MiniCPM-V-2_6-int4) and [GGUF](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) format quantized models in 16 sizes, (3) [vLLM](https://github.com/OpenBMB/MiniCPM-V/tree/main?tab=readme-ov-file#inference-with-vllm) support for high-throughput and memory-efficient inference, (4) fine-tuning on new domains and tasks, (5) quick local WebUI demo setup with [Gradio](https://github.com/OpenBMB/MiniCPM-V/tree/main?tab=readme-ov-file#chat-with-our-demo-on-gradio) and (6) online web [demo](http://120.92.209.146:8887).
+### Evaluation  <!-- omit in toc -->
+<div align="center">
+    <img src="https://github.com/OpenBMB/MiniCPM-V/raw/main/assets/radar_final.png" width=66% />
+</div>
+Single image results on OpenCompass, MME, MMVet, OCRBench, MMMU, MathVista, MMB, AI2D, TextVQA, DocVQA, HallusionBench, Object HalBench:
+<div align="center">
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/64abc4aa6cadc7aca585dddf/QVl0iPtT5aUhlvViyEpgs.png)
+</div>
+<sup>*</sup> We evaluate this benchmark using chain-of-thought prompting.
+<sup>+</sup> Token Density: number of pixels encoded into each visual token at maximum resolution, i.e., # pixels at maximum resolution / # visual tokens.
+Note: For proprietary models, we calculate token density based on the image encoding charging strategy defined in the official API documentation, which provides an upper-bound estimation.
+<details>
+<summary>Click to view multi-image results on Mantis Eval, BLINK Val, Mathverse mv, Sciverse mv, MIRB.</summary>
+<div align="center">
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/64abc4aa6cadc7aca585dddf/o6FGHytRhzeatmhxq0Dbi.png)
+</div>
+<sup>*</sup> We evaluate the officially released checkpoint by ourselves.
+</details>
+<details>
+<summary>Click to view video results on Video-MME and Video-ChatGPT.</summary>
+<div align="center">
+<!-- ![image/png](https://cdn-uploads.huggingface.co/production/uploads/64abc4aa6cadc7aca585dddf/_T1mw5yhqNCqVdYRTQOGu.png) -->
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/64abc4aa6cadc7aca585dddf/jmrjoRr8SFLkrstjDmpaV.png)
+</div>
+</details>
+<details>
+<summary>Click to view few-shot results on TextVQA, VizWiz, VQAv2, OK-VQA.</summary>
+<div align="center">
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/64abc4aa6cadc7aca585dddf/zXIuiCTTe-POqKGHszdn0.png)
+</div>
+* denotes zero image shot and two additional text shots following Flamingo.
+<sup>+</sup> We evaluate the pretraining ckpt without SFT.
+</details>
+### Examples <!-- omit in toc -->
+<div style="display: flex; flex-direction: column; align-items: center;">
+  <img src="https://github.com/OpenBMB/MiniCPM-V/raw/main/assets/minicpmv2_6/multi_img-bike.png" alt="Bike" style="margin-bottom: -20px;">
+  <img src="https://github.com/OpenBMB/MiniCPM-V/raw/main/assets/minicpmv2_6/multi_img-menu.png" alt="Menu" style="margin-bottom: -20px;">
+  <img src="https://github.com/OpenBMB/MiniCPM-V/raw/main/assets/minicpmv2_6/multi_img-code.png" alt="Code" style="margin-bottom: -20px;">
+  <img src="https://github.com/OpenBMB/MiniCPM-V/raw/main/assets/minicpmv2_6/ICL-Mem.png" alt="Mem" style="margin-bottom: -20px;">
+  <img src="https://github.com/OpenBMB/MiniCPM-V/raw/main/assets/minicpmv2_6/multiling-medal.png" alt="medal" style="margin-bottom: 10px;">
+</div>
+<details>
+  <summary>Click to view more cases.</summary>
+  <div style="display: flex; flex-direction: column; align-items: center;">
+    <img src="https://github.com/OpenBMB/MiniCPM-V/raw/main/assets/minicpmv2_6/ICL-elec.png" alt="elec" style="margin-bottom: -20px;">
+    <img src="https://github.com/OpenBMB/MiniCPM-V/raw/main/assets/minicpmv2_6/multiling-olympic.png" alt="Menu" style="margin-bottom: 10px;">
+  </div>
+</details>
+We deploy MiniCPM-V 2.6 on end devices. The demo video is the raw screen recording on a iPad Pro without edition.
+<div style="display: flex; justify-content: center;">
+    <img src="https://github.com/OpenBMB/MiniCPM-V/raw/main/assets/gif_cases/ai.gif" width="48%" style="margin: 0 10px;"/>
+    <img src="https://github.com/OpenBMB/MiniCPM-V/raw/main/assets/gif_cases/beer.gif" width="48%" style="margin: 0 10px;"/>
+</div>
+<div style="display: flex; justify-content: center; margin-top: 20px;">
+    <img src="https://github.com/OpenBMB/MiniCPM-V/raw/main/assets/gif_cases/ticket.gif" width="48%" style="margin: 0 10px;"/>
+    <img src="https://github.com/OpenBMB/MiniCPM-V/raw/main/assets/gif_cases/wfh.gif" width="48%" style="margin: 0 10px;"/>
+</div>
+<div style="text-align: center;">
+<video controls autoplay src="https://cdn-uploads.huggingface.co/production/uploads/64abc4aa6cadc7aca585dddf/mXAEFQFqNd4nnvPk7r5eX.mp4"></video>
+<!-- <video controls autoplay src="https://cdn-uploads.huggingface.co/production/uploads/64abc4aa6cadc7aca585dddf/fEWzfHUdKnpkM7sdmnBQa.mp4"></video> -->
+</div>
+## Demo
+Click here to try the Demo of [MiniCPM-V 2.6](http://120.92.209.146:8887/).
+## Usage
+Inference using Huggingface transformers on NVIDIA GPUs. Requirements tested on python 3.10：
+```
+Pillow==10.1.0
+torch==2.1.2
+torchvision==0.16.2
+transformers==4.40.0
+sentencepiece==0.1.99
+decord
+```
+```python
+# test.py
+import torch
+from PIL import Image
+from transformers import AutoModel, AutoTokenizer
+model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True,
+    attn_implementation='sdpa', torch_dtype=torch.bfloat16) # sdpa or flash_attention_2, no eager
+model = model.eval().cuda()
+tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True)
+image = Image.open('xx.jpg').convert('RGB')
+question = 'What is in the image?'
+msgs = [{'role': 'user', 'content': [image, question]}]
+res = model.chat(
+    image=None,
+    msgs=msgs,
+    tokenizer=tokenizer
+)
+print(res)
+## if you want to use streaming, please make sure sampling=True and stream=True
+## the model.chat will return a generator
+res = model.chat(
+    image=None,
+    msgs=msgs,
+    tokenizer=tokenizer,
+    sampling=True,
+    stream=True
+)
+generated_text = ""
+for new_text in res:
+    generated_text += new_text
+    print(new_text, flush=True, end='')
+```
+### Chat with multiple images
+<details>
+<summary> Click to show Python code running MiniCPM-V 2.6 with multiple images input. </summary>
+```python
+import torch
+from PIL import Image
+from transformers import AutoModel, AutoTokenizer
+model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True,
+    attn_implementation='sdpa', torch_dtype=torch.bfloat16) # sdpa or flash_attention_2, no eager
+model = model.eval().cuda()
+tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True)
+image1 = Image.open('image1.jpg').convert('RGB')
+image2 = Image.open('image2.jpg').convert('RGB')
+question = 'Compare image 1 and image 2, tell me about the differences between image 1 and image 2.'
+msgs = [{'role': 'user', 'content': [image1, image2, question]}]
+answer = model.chat(
+    image=None,
+    msgs=msgs,
+    tokenizer=tokenizer
+)
+print(answer)
+```
+</details>
+### In-context few-shot learning
+<details>
+<summary> Click to view Python code running MiniCPM-V 2.6 with few-shot input. </summary>
+```python
+import torch
+from PIL import Image
+from transformers import AutoModel, AutoTokenizer
+model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True,
+    attn_implementation='sdpa', torch_dtype=torch.bfloat16) # sdpa or flash_attention_2, no eager
+model = model.eval().cuda()
+tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True)
+question = "production date"
+image1 = Image.open('example1.jpg').convert('RGB')
+answer1 = "2023.08.04"
+image2 = Image.open('example2.jpg').convert('RGB')
+answer2 = "2007.04.24"
+image_test = Image.open('test.jpg').convert('RGB')
+msgs = [
+    {'role': 'user', 'content': [image1, question]}, {'role': 'assistant', 'content': [answer1]},
+    {'role': 'user', 'content': [image2, question]}, {'role': 'assistant', 'content': [answer2]},
+    {'role': 'user', 'content': [image_test, question]}
+]
+answer = model.chat(
+    image=None,
+    msgs=msgs,
+    tokenizer=tokenizer
+)
+print(answer)
+```
+</details>
+### Chat with video
+<details>
+<summary> Click to view Python code running MiniCPM-V 2.6 with video input. </summary>
+```python
+import torch
+from PIL import Image
+from transformers import AutoModel, AutoTokenizer
+from decord import VideoReader, cpu    # pip install decord
+model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True,
+    attn_implementation='sdpa', torch_dtype=torch.bfloat16) # sdpa or flash_attention_2, no eager
+model = model.eval().cuda()
+tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True)
+MAX_NUM_FRAMES=64 # if cuda OOM set a smaller number
+def encode_video(video_path):
+    def uniform_sample(l, n):
+        gap = len(l) / n
+        idxs = [int(i * gap + gap / 2) for i in range(n)]
+        return [l[i] for i in idxs]
+    vr = VideoReader(video_path, ctx=cpu(0))
+    sample_fps = round(vr.get_avg_fps() / 1)  # FPS
+    frame_idx = [i for i in range(0, len(vr), sample_fps)]
+    if len(frame_idx) > MAX_NUM_FRAMES:
+        frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES)
+    frames = vr.get_batch(frame_idx).asnumpy()
+    frames = [Image.fromarray(v.astype('uint8')) for v in frames]
+    print('num frames:', len(frames))
+    return frames
+video_path ="video_test.mp4"
+frames = encode_video(video_path)
+question = "Describe the video"
+msgs = [
+    {'role': 'user', 'content': frames + [question]},
+]
+# Set decode params for video
+params={}
+params["use_image_id"] = False
+params["max_slice_nums"] = 2 # use 1 if cuda OOM and video resolution >  448*448
+answer = model.chat(
+    image=None,
+    msgs=msgs,
+    tokenizer=tokenizer,
+    **params
+)
+print(answer)
+```
+</details>
+Please look at [GitHub](https://github.com/OpenBMB/MiniCPM-V) for more detail about usage.
+## Inference with llama.cpp<a id="llamacpp"></a>
+MiniCPM-V 2.6 can run with llama.cpp. See our fork of [llama.cpp](https://github.com/OpenBMB/llama.cpp/tree/minicpm-v2.5/examples/minicpmv) for more detail.
+## Int4 quantized version
+Download the int4 quantized version for lower GPU memory (7GB) usage:  [MiniCPM-V-2_6-int4](https://huggingface.co/openbmb/MiniCPM-V-2_6-int4).
+## License
+#### Model License
+* The code in this repo is released under the [Apache-2.0](https://github.com/OpenBMB/MiniCPM/blob/main/LICENSE) License.
+* The usage of MiniCPM-V series model weights must strictly follow [MiniCPM Model License.md](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md).
+* The models and weights of MiniCPM are completely free for academic research. After filling out a ["questionnaire"](https://modelbest.feishu.cn/share/base/form/shrcnpV5ZT9EJ6xYjh3Kx0J6v8g) for registration, MiniCPM-V 2.6 weights are also available for free commercial use.
+#### Statement
+* As an LMM, MiniCPM-V 2.6 generates contents by learning a large mount of multimodal corpora, but it cannot comprehend, express personal opinions or make value judgement. Anything generated by MiniCPM-V 2.6 does not represent the views and positions of the model developers
+* We will not be liable for any problems arising from the use of the MinCPM-V models, including but not limited to data security issues, risk of public opinion, or any risks and problems arising from the misdirection, misuse, dissemination or misuse of the model.
+## Key Techniques and Other Multimodal Projects
+👏 Welcome to explore key techniques of MiniCPM-V 2.6 and other multimodal projects of our team:
+[VisCPM](https://github.com/OpenBMB/VisCPM/tree/main) | [RLHF-V](https://github.com/RLHF-V/RLHF-V) | [LLaVA-UHD](https://github.com/thunlp/LLaVA-UHD)  | [RLAIF-V](https://github.com/RLHF-V/RLAIF-V)
+## Citation
+If you find our work helpful, please consider citing our papers 📝 and liking this project ❤️！
+```bib
+@article{yao2024minicpm,
+  title={MiniCPM-V: A GPT-4V Level MLLM on Your Phone},
+  author={Yao, Yuan and Yu, Tianyu and Zhang, Ao and Wang, Chongyi and Cui, Junbo and Zhu, Hongji and Cai, Tianchi and Li, Haoyu and Zhao, Weilin and He, Zhihui and others},
+  journal={arXiv preprint arXiv:2408.01800},
+  year={2024}
+}
+```

added_tokens.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "</box>": 151651,
+  "</image>": 151647,
+  "</image_id>": 151659,
+  "</point>": 151655,
+  "</quad>": 151653,
+  "</ref>": 151649,
+  "</slice>": 151657,
+  "<box>": 151650,
+  "<image>": 151646,
+  "<image_id>": 151658,
+  "<point>": 151654,
+  "<quad>": 151652,
+  "<ref>": 151648,
+  "<slice>": 151656,
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|reserved_special_token_0|>": 151660,
+  "<|reserved_special_token_1|>": 151661,
+  "<|reserved_special_token_2|>": 151662,
+  "<|reserved_special_token_3|>": 151663,
+  "<|reserved_special_token_4|>": 151664,
+  "<|reserved_special_token_5|>": 151665
+}

config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "_name_or_path": "openbmb/MiniCPM-V-2_6",
+  "version": 2.6,
+  "architectures": [
+    "MiniCPMV"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_minicpm.MiniCPMVConfig",
+    "AutoModel": "modeling_minicpmv.MiniCPMV",
+    "AutoModelForCausalLM": "modeling_minicpmv.MiniCPMV"
+  },
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 3584,
+  "initializer_range": 0.02,
+  "intermediate_size": 18944,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "num_attention_heads": 28,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 4,
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 1000000.0,
+  "sliding_window": 131072,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.40.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151666,
+  "batch_vision_input": true,
+  "drop_vision_last_layer": false,
+  "image_size": 448,
+  "model_type": "minicpmv",
+  "patch_size": 14,
+  "query_num": 64,
+  "slice_config": {
+    "max_slice_nums": 9,
+    "patch_size": 14,
+    "model_type": "minicpmv"
+  },
+  "slice_mode": true,
+  "vision_config": {
+    "hidden_size": 1152,
+    "image_size": 980,
+    "intermediate_size": 4304,
+    "model_type": "siglip",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 27,
+    "patch_size": 14
+  }
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "transformers_version": "4.40.0"
+}

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,796 @@

+{
+  "metadata": {
+    "total_size": 16198350304
+  },
+  "weight_map": {
+    "llm.lm_head.weight": "model-00004-of-00004.safetensors",
+    "llm.model.embed_tokens.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.18.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.18.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "llm.model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "llm.model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "llm.model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "llm.model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "llm.model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "llm.model.norm.weight": "model-00003-of-00004.safetensors",
+    "resampler.attn.in_proj_bias": "model-00004-of-00004.safetensors",
+    "resampler.attn.in_proj_weight": "model-00004-of-00004.safetensors",
+    "resampler.attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "resampler.attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "resampler.kv_proj.weight": "model-00004-of-00004.safetensors",
+    "resampler.ln_kv.bias": "model-00004-of-00004.safetensors",
+    "resampler.ln_kv.weight": "model-00004-of-00004.safetensors",
+    "resampler.ln_post.bias": "model-00004-of-00004.safetensors",
+    "resampler.ln_post.weight": "model-00004-of-00004.safetensors",
+    "resampler.ln_q.bias": "model-00004-of-00004.safetensors",
+    "resampler.ln_q.weight": "model-00004-of-00004.safetensors",
+    "resampler.proj": "model-00004-of-00004.safetensors",
+    "resampler.query": "model-00004-of-00004.safetensors",
+    "vpm.embeddings.patch_embedding.bias": "model-00004-of-00004.safetensors",
+    "vpm.embeddings.patch_embedding.weight": "model-00004-of-00004.safetensors",
+    "vpm.embeddings.position_embedding.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.0.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.1.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.10.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.11.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.12.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.13.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.14.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.15.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.16.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.17.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.18.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.19.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.2.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.20.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.21.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.22.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.23.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.24.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.25.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.26.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.3.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.4.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.5.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.6.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.7.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.8.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.layer_norm1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.layer_norm1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.layer_norm2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.layer_norm2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.mlp.fc1.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.mlp.fc1.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.mlp.fc2.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.mlp.fc2.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.self_attn.out_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.self_attn.out_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "vpm.encoder.layers.9.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "vpm.post_layernorm.bias": "model-00004-of-00004.safetensors",
+    "vpm.post_layernorm.weight": "model-00004-of-00004.safetensors"
+  }
+}

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+    "image_processor_type": "MiniCPMVImageProcessor",
+    "auto_map": {
+        "AutoProcessor": "processing_minicpmv.MiniCPMVProcessor",
+        "AutoImageProcessor": "image_processing_minicpmv.MiniCPMVImageProcessor"
+      },
+    "processor_class": "MiniCPMVProcessor",
+    "max_slice_nums": 9,
+    "scale_resolution": 448,
+    "patch_size": 14,
+    "use_image_id": true,
+    "image_feature_size": 64,
+    "im_start": "<image>",
+    "im_end": "</image>",
+    "slice_start": "<slice>",
+    "slice_end": "</slice>",
+    "unk": "<unk>",
+    "im_id_start": "<image_id>",
+    "im_id_end": "</image_id>",
+    "slice_mode": true,
+    "norm_mean": [0.5, 0.5, 0.5],
+    "norm_std": [0.5, 0.5, 0.5],
+    "version": 2.6
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,172 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "<image>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</image>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<ref>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</ref>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<box>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</box>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<quad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</quad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<point>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</point>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<slice>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</slice>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<image_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</image_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|reserved_special_token_0|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|reserved_special_token_1|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|reserved_special_token_2|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|reserved_special_token_3|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|reserved_special_token_4|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|reserved_special_token_5|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "bos_token": {
+    "content": "<|im_start|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,235 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "128244": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<image>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "</image>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<ref>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "</ref>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<box>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "</box>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<quad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "</quad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<point>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "</point>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<slice>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "</slice>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151658": {
+      "content": "<image_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151659": {
+      "content": "</image_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151660": {
+      "content": "<|reserved_special_token_0|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151661": {
+      "content": "<|reserved_special_token_1|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151662": {
+      "content": "<|reserved_special_token_2|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151663": {
+      "content": "<|reserved_special_token_3|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151664": {
+      "content": "<|reserved_special_token_4|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151665": {
+      "content": "<|reserved_special_token_5|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<image>",
+    "</image>",
+    "<ref>",
+    "</ref>",
+    "<box>",
+    "</box>",
+    "<quad>",
+    "</quad>",
+    "<point>",
+    "</point>",
+    "<slice>",
+    "</slice>",
+    "<image_id>",
+    "</image_id>",
+    "<|reserved_special_token_0|>",
+    "<|reserved_special_token_1|>",
+    "<|reserved_special_token_2|>",
+    "<|reserved_special_token_3|>",
+    "<|reserved_special_token_4|>",
+    "<|reserved_special_token_5|>"
+  ],
+  "bos_token": "<|im_start|>",
+  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "auto_map": {
+    "AutoTokenizer": [
+      "tokenization_minicpmv_fast.MiniCPMVTokenizerFast",
+      null
+    ]
+  },
+  "tokenizer_class": "MiniCPMVTokenizerFast",
+  "unk_token": "<unk>"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff