finalf0 commited on
Commit
9c09515
•
1 Parent(s): fe77168

Update readme

Browse files
README.md CHANGED
@@ -32,243 +32,11 @@ pipeline_tag: visual-question-answering
32
  <div align="center">
33
  <img src=/openbmb/MiniCPM-V-2.0/resolve/main/assets/minicpmv-2-peformance2.png width=100% />
34
  </div>
35
- <details>
36
- <summary>Click to view results on TextVQA, DocVQA, OCRBench, OpenCompass, MME, MMBench, MMMU, MathVista, LLaVA Bench, Object HalBench. </summary>
37
  <div align="center">
38
- <div align="center">
39
- <img src=/openbmb/MiniCPM-V-2.0/resolve/main/assets/minicpmv-2-benchmark2.png width=140% />
40
- </div>
41
-
42
- <table style="margin: 0px auto;">
43
- <thead>
44
- <tr>
45
- <th align="left">Model</th>
46
- <th>Size</th>
47
- <th>TextVQA val</th>
48
- <th>DocVQA test</th>
49
- <th>OCRBench</th>
50
- <th>OpenCompass</th>
51
- <th nowrap="nowrap" >MME</th>
52
- <th>MMB dev(en)</th>
53
- <th>MMB dev(zh)</th>
54
- <th>MMMU val</th>
55
- <th>MathVista</th>
56
- <th>LLaVA Bench</th>
57
- <th nowrap="nowrap">Object HalBench</th>
58
- </tr>
59
- </thead>
60
- <tbody align="center">
61
- <tr>
62
- <td colspan="12" align="left"><strong>Proprietary models</strong></td>
63
- </tr>
64
- <tr>
65
- <td nowrap="nowrap" align="left">Gemini Pro Vision</td>
66
- <td>- </td>
67
- <td>74.6</td>
68
- <td>88.1</td>
69
- <td>680</td>
70
- <td>63.8</td>
71
- <td>2148.9</td>
72
- <td>75.2</td>
73
- <td>74.0</td>
74
- <td>48.9</td>
75
- <td>45.8</td>
76
- <td>79.9</td>
77
- <td>- </td>
78
- </tr>
79
- <tr>
80
- <td nowrap="nowrap" align="left">GPT-4V</td>
81
- <td>- </td>
82
- <td>78.0</td>
83
- <td>88.4</td>
84
- <td>645</td>
85
- <td>63.2</td>
86
- <td>1771.5</td>
87
- <td>75.1</td>
88
- <td>75.0</td>
89
- <td>53.8</td>
90
- <td>47.8</td>
91
- <td>93.1</td>
92
- <td>86.4 / 92.7</td>
93
- </tr>
94
- <tr>
95
- <td colspan="12" align="left"><strong>Open-source models 6B~34B</strong></td>
96
- </tr>
97
- <tr>
98
- <td nowrap="nowrap" align="left" >Yi-VL-6B</td>
99
- <td align="right" >6.7B</td>
100
- <td>45.5*</td>
101
- <td>17.1*</td>
102
- <td>290</td>
103
- <td>49.3</td>
104
- <td>1915.1 </td>
105
- <td>68.6 </td>
106
- <td>68.3 </td>
107
- <td>40.3 </td>
108
- <td>28.8 </td>
109
- <td>51.9 </td>
110
- <td>- </td>
111
- </tr>
112
- <tr>
113
- <td nowrap="nowrap" align="left" >Qwen-VL-Chat</td>
114
- <td align="right" >9.6B</td>
115
- <td>61.5</td>
116
- <td>62.6</td>
117
- <td>488 </td>
118
- <td>52.1 </td>
119
- <td>1860.0 </td>
120
- <td>60.6 </td>
121
- <td>56.7 </td>
122
- <td>37.0 </td>
123
- <td>33.8 </td>
124
- <td>67.7 </td>
125
- <td>56.2 / 80.0</td>
126
- </tr>
127
- <tr>
128
- <td nowrap="nowrap" align="left" >Yi-VL-34B</td>
129
- <td align="right" >34B</td>
130
- <td>43.4*</td>
131
- <td>16.9*</td>
132
- <td>290</td>
133
- <td>52.6 </td>
134
- <td>2050.2</td>
135
- <td>71.1</td>
136
- <td>71.4</td>
137
- <td>45.1</td>
138
- <td>30.7</td>
139
- <td>62.3</td>
140
- <td>- </td>
141
- </tr>
142
- <tr>
143
- <td nowrap="nowrap" align="left" >DeepSeek-VL-7B</td>
144
- <td align="right" >7.3B</td>
145
- <td>64.7*</td>
146
- <td>47.0* </td>
147
- <td>435</td>
148
- <td>55.6 </td>
149
- <td>1765.4 </td>
150
- <td>74.1 </td>
151
- <td>72.8 </td>
152
- <td>38.3 </td>
153
- <td>36.8</td>
154
- <td>77.8 </td>
155
- <td>- </td>
156
- </tr>
157
- <tr>
158
- <td nowrap="nowrap" align="left" >TextMonkey</td>
159
- <td align="right" >9.7B</td>
160
- <td>64.3</td>
161
- <td>66.7 </td>
162
- <td>558</td>
163
- <td>- </td>
164
- <td>- </td>
165
- <td>- </td>
166
- <td>- </td>
167
- <td>- </td>
168
- <td>-</td>
169
- <td>- </td>
170
- <td>- </td>
171
- </tr>
172
- <tr>
173
- <td nowrap="nowrap" align="left" >CogVLM-Chat</td>
174
- <td align="right" >17.4B</td>
175
- <td>70.4</td>
176
- <td>33.3*</td>
177
- <td>590 </td>
178
- <td>52.5 </td>
179
- <td>1736.6 </td>
180
- <td>63.7 </td>
181
- <td>53.8 </td>
182
- <td>37.3 </td>
183
- <td>34.7 </td>
184
- <td>73.9 </td>
185
- <td>73.6 / 87.4 </td>
186
- </tr>
187
- <tr>
188
- <td colspan="12" align="left"><strong>Open-source models 1B~3B </strong></td>
189
- </tr>
190
- <tr>
191
- <td nowrap="nowrap" align="left" >DeepSeek-VL-1.3B</td>
192
- <td align="right" >1.7B</td>
193
- <td>58.4*</td>
194
- <td>37.9*</td>
195
- <td>413</td>
196
- <td>46.0 </td>
197
- <td>1531.6 </td>
198
- <td>64.0 </td>
199
- <td>61.2 </td>
200
- <td>33.8 </td>
201
- <td>29.4 </td>
202
- <td>51.1 </td>
203
- <td>- </td>
204
- </tr>
205
- <tr>
206
- <td nowrap="nowrap" align="left" >MobileVLM V2</td>
207
- <td align="right" >3.1B</td>
208
- <td>57.5</td>
209
- <td>19.4*</td>
210
- <td>-</td>
211
- <td>-</td>
212
- <td>1440.5(P) </td>
213
- <td>63.2 </td>
214
- <td>-</td>
215
- <td>-</td>
216
- <td>-</td>
217
- <td>-</td>
218
- <td>-</td>
219
- </tr>
220
- <tr>
221
- <td nowrap="nowrap" align="left" >Mini-Gemini</td>
222
- <td align="right" >2.2B</td>
223
- <td>56.2</td>
224
- <td>34.2*</td>
225
- <td>-</td>
226
- <td>-</td>
227
- <td>1653.0 </td>
228
- <td>59.8 </td>
229
- <td>- </td>
230
- <td>31.7 </td>
231
- <td>-</td>
232
- <td>- </td>
233
- <td>- </td>
234
- </tr>
235
- <tr>
236
- <td nowrap="nowrap" align="left" >MiniCPM-V</td>
237
- <td align="right" >2.8B </td>
238
- <td>60.6</td>
239
- <td>38.2 </td>
240
- <td>366</td>
241
- <td>47.6</td>
242
- <td>1650.2 </td>
243
- <td>67.9 </td>
244
- <td>65.3 </td>
245
- <td><strong>38.3</strong></td>
246
- <td>28.9</td>
247
- <td>51.3 </td>
248
- <td>78.4 / 88.5 </td>
249
- </tr>
250
- <tr>
251
- <td nowrap="nowrap" align="left" ><strong>MiniCPM-V 2.0</strong></td>
252
- <td align="right" >2.8B </td>
253
- <td><strong>74.1</strong></td>
254
- <td><strong>71.9</strong> </td>
255
- <td><strong>605</strong></td>
256
- <td><strong>55.0</strong></td>
257
- <td><strong>1808.6</strong> </td>
258
- <td><strong>69.6</strong> </td>
259
- <td><strong>68.1</strong> </td>
260
- <td>38.2 </td>
261
- <td><strong>38.7</strong></td>
262
- <td><strong>69.2</strong> </td>
263
- <td><strong>85.5 / 92.2 </strong></td>
264
- </tr>
265
- </tbody>
266
- </table>
267
-
268
  </div>
269
- * We evaluate the officially released checkpoint by ourselves.
270
 
271
- </details>
272
 
273
  ## Examples <!-- omit in toc -->
274
 
@@ -291,10 +59,10 @@ We deploy MiniCPM-V 2.0 on end devices. The demo video is the raw screen recordi
291
 
292
 
293
  ## Demo
294
- Click here to try out the Demo of [MiniCPM-V-2.0](http://120.92.209.146:80).
295
 
296
  ## Deployment on Mobile Phone
297
- MiniCPM-V-2.0 can be deployed on mobile phones with Android and Harmony operating systems. 🚀 Try it out [here](https://github.com/OpenBMB/mlc-MiniCPM).
298
 
299
 
300
  ## Usage
@@ -323,7 +91,7 @@ model = model.to(device='cuda', dtype=torch.bfloat16)
323
  # Run with `PYTORCH_ENABLE_MPS_FALLBACK=1 python test.py`
324
  #model = model.to(device='mps', dtype=torch.float16)
325
 
326
- tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V', trust_remote_code=True)
327
  model.eval()
328
 
329
  image = Image.open('xx.jpg').convert('RGB')
@@ -348,13 +116,12 @@ Please look at [GitHub](https://github.com/OpenBMB/OmniLMM) for more detail abou
348
  Please see the info about MiniCPM-V 1.0 [here](./minicpm_v1.md).
349
 
350
  ## License
351
-
352
  #### Model License
353
  * The code in this repo is released according to [Apache-2.0](https://github.com/OpenBMB/MiniCPM/blob/main/LICENSE)
354
- * The usage of MiniCPM-V's parameters is subject to ["General Model License Agreement - Source Notes - Publicity Restrictions - Commercial License"](https://github.com/OpenBMB/General-Model-License/blob/main/)
355
  * The parameters are fully open to acedemic research
356
  * Please contact cpm@modelbest.cn to obtain a written authorization for commercial uses. Free commercial use is also allowed after registration.
357
 
358
  #### Statement
359
- * As a LLM, MiniCPM-V generates contents by learning a large mount of texts, but it cannot comprehend, express personal opinions or make value judgement. Anything generated by MiniCPM-V does not represent the views and positions of the model developers
360
  * We will not be liable for any problems arising from the use of the MinCPM-V open Source model, including but not limited to data security issues, risk of public opinion, or any risks and problems arising from the misdirection, misuse, dissemination or misuse of the model.
 
32
  <div align="center">
33
  <img src=/openbmb/MiniCPM-V-2.0/resolve/main/assets/minicpmv-2-peformance2.png width=100% />
34
  </div>
35
+ Results on TextVQA, DocVQA, OCRBench, OpenCompass, MME, MMBench, MMMU, MathVista, LLaVA Bench, Object HalBench.
 
36
  <div align="center">
37
+ <img src=/openbmb/MiniCPM-V-2.0/resolve/main/assets/minicpmv-2-benchmark.png width=140% />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  </div>
 
39
 
 
40
 
41
  ## Examples <!-- omit in toc -->
42
 
 
59
 
60
 
61
  ## Demo
62
+ Click here to try out the Demo of [MiniCPM-V 2.0](http://120.92.209.146:80).
63
 
64
  ## Deployment on Mobile Phone
65
+ MiniCPM-V 2.0 can be deployed on mobile phones with Android and Harmony operating systems. 🚀 Try it out [here](https://github.com/OpenBMB/mlc-MiniCPM).
66
 
67
 
68
  ## Usage
 
91
  # Run with `PYTORCH_ENABLE_MPS_FALLBACK=1 python test.py`
92
  #model = model.to(device='mps', dtype=torch.float16)
93
 
94
+ tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2.0', trust_remote_code=True)
95
  model.eval()
96
 
97
  image = Image.open('xx.jpg').convert('RGB')
 
116
  Please see the info about MiniCPM-V 1.0 [here](./minicpm_v1.md).
117
 
118
  ## License
 
119
  #### Model License
120
  * The code in this repo is released according to [Apache-2.0](https://github.com/OpenBMB/MiniCPM/blob/main/LICENSE)
121
+ * The usage of MiniCPM-V 2.0's parameters is subject to ["General Model License Agreement - Source Notes - Publicity Restrictions - Commercial License"](https://github.com/OpenBMB/General-Model-License/blob/main/)
122
  * The parameters are fully open to acedemic research
123
  * Please contact cpm@modelbest.cn to obtain a written authorization for commercial uses. Free commercial use is also allowed after registration.
124
 
125
  #### Statement
126
+ * As a LLM, MiniCPM-V 2.0 generates contents by learning a large mount of texts, but it cannot comprehend, express personal opinions or make value judgement. Anything generated by MiniCPM-V 2.0 does not represent the views and positions of the model developers
127
  * We will not be liable for any problems arising from the use of the MinCPM-V open Source model, including but not limited to data security issues, risk of public opinion, or any risks and problems arising from the misdirection, misuse, dissemination or misuse of the model.
assets/{minicpmv-2-benchmark2.png → minicpmv-2-benchmark.png} RENAMED
File without changes