me@hg.co commited on
Commit
b4e205e
1 Parent(s): 44fc622
Files changed (1) hide show
  1. ROCO-idefics3.ipynb +365 -66
ROCO-idefics3.ipynb CHANGED
@@ -22,7 +22,7 @@
22
  },
23
  {
24
  "cell_type": "code",
25
- "execution_count": null,
26
  "metadata": {
27
  "executionInfo": {
28
  "elapsed": 2,
@@ -36,7 +36,23 @@
36
  },
37
  "id": "F-zJG-uPIy3d"
38
  },
39
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  "source": [
41
  "try:\n",
42
  " import google.colab\n",
@@ -56,7 +72,7 @@
56
  },
57
  {
58
  "cell_type": "code",
59
- "execution_count": 1,
60
  "metadata": {
61
  "executionInfo": {
62
  "elapsed": 1459,
@@ -103,27 +119,21 @@
103
  },
104
  {
105
  "cell_type": "code",
106
- "execution_count": null,
107
  "metadata": {},
108
  "outputs": [
109
  {
110
- "name": "stderr",
111
  "output_type": "stream",
112
  "text": [
113
- "Token has not been saved to git credential helper.\n"
114
  ]
115
  },
116
  {
117
- "name": "stdout",
118
  "output_type": "stream",
119
  "text": [
120
- "\u001b[1m\u001b[31mCannot authenticate through git-credential as no helper is defined on your machine.\n",
121
- "You might have to re-authenticate when pushing to the Hugging Face Hub.\n",
122
- "Run the following command in your terminal in case you want to set the 'store' credential helper as default.\n",
123
- "\n",
124
- "git config --global credential.helper store\n",
125
- "\n",
126
- "Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.\u001b[0m\n"
127
  ]
128
  }
129
  ],
@@ -228,7 +238,7 @@
228
  },
229
  {
230
  "cell_type": "code",
231
- "execution_count": 3,
232
  "metadata": {
233
  "colab": {
234
  "base_uri": "https://localhost:8080/",
@@ -649,7 +659,245 @@
649
  {
650
  "data": {
651
  "application/vnd.jupyter.widget-view+json": {
652
- "model_id": "9371eea358754fe09d320c24e0ad42a6",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
653
  "version_major": 2,
654
  "version_minor": 0
655
  },
@@ -663,7 +911,7 @@
663
  {
664
  "data": {
665
  "application/vnd.jupyter.widget-view+json": {
666
- "model_id": "739e7749e5364b63aa0dd766fe0db69b",
667
  "version_major": 2,
668
  "version_minor": 0
669
  },
@@ -677,7 +925,7 @@
677
  {
678
  "data": {
679
  "application/vnd.jupyter.widget-view+json": {
680
- "model_id": "cf0aee0fbfed43788e5feebe465de1db",
681
  "version_major": 2,
682
  "version_minor": 0
683
  },
@@ -691,7 +939,7 @@
691
  {
692
  "data": {
693
  "application/vnd.jupyter.widget-view+json": {
694
- "model_id": "cfaca0fba2b94d1f992effbd930296c7",
695
  "version_major": 2,
696
  "version_minor": 0
697
  },
@@ -705,7 +953,7 @@
705
  {
706
  "data": {
707
  "application/vnd.jupyter.widget-view+json": {
708
- "model_id": "a0c12b32b24a4b878c666755463d97ca",
709
  "version_major": 2,
710
  "version_minor": 0
711
  },
@@ -719,7 +967,7 @@
719
  {
720
  "data": {
721
  "application/vnd.jupyter.widget-view+json": {
722
- "model_id": "8b455932b90e426b8356094b7465670e",
723
  "version_major": 2,
724
  "version_minor": 0
725
  },
@@ -733,7 +981,7 @@
733
  {
734
  "data": {
735
  "application/vnd.jupyter.widget-view+json": {
736
- "model_id": "54a181bd1f104a2f8d1fdee8495a546d",
737
  "version_major": 2,
738
  "version_minor": 0
739
  },
@@ -747,7 +995,7 @@
747
  {
748
  "data": {
749
  "application/vnd.jupyter.widget-view+json": {
750
- "model_id": "b8886eba98a540bc8e6fa675a0b181c6",
751
  "version_major": 2,
752
  "version_minor": 0
753
  },
@@ -761,7 +1009,7 @@
761
  {
762
  "data": {
763
  "application/vnd.jupyter.widget-view+json": {
764
- "model_id": "49cb40f2e63f44198927441543a17eb6",
765
  "version_major": 2,
766
  "version_minor": 0
767
  },
@@ -775,7 +1023,7 @@
775
  {
776
  "data": {
777
  "application/vnd.jupyter.widget-view+json": {
778
- "model_id": "31e1d0b1553946959846dfc5a098f3c7",
779
  "version_major": 2,
780
  "version_minor": 0
781
  },
@@ -789,7 +1037,7 @@
789
  {
790
  "data": {
791
  "application/vnd.jupyter.widget-view+json": {
792
- "model_id": "791f41cba3024fc2910974d8f1466c57",
793
  "version_major": 2,
794
  "version_minor": 0
795
  },
@@ -803,7 +1051,7 @@
803
  {
804
  "data": {
805
  "application/vnd.jupyter.widget-view+json": {
806
- "model_id": "e66e26e55af2424c8e262df2af686bba",
807
  "version_major": 2,
808
  "version_minor": 0
809
  },
@@ -817,7 +1065,7 @@
817
  {
818
  "data": {
819
  "application/vnd.jupyter.widget-view+json": {
820
- "model_id": "70a9c0769e2f4d1295cd8fbc75f59000",
821
  "version_major": 2,
822
  "version_minor": 0
823
  },
@@ -831,7 +1079,7 @@
831
  {
832
  "data": {
833
  "application/vnd.jupyter.widget-view+json": {
834
- "model_id": "a11e12a045c4469c81bd18dcf521e377",
835
  "version_major": 2,
836
  "version_minor": 0
837
  },
@@ -845,7 +1093,7 @@
845
  {
846
  "data": {
847
  "application/vnd.jupyter.widget-view+json": {
848
- "model_id": "f8f7e87560fb4db0a73a1c0abca661a8",
849
  "version_major": 2,
850
  "version_minor": 0
851
  },
@@ -859,7 +1107,7 @@
859
  {
860
  "data": {
861
  "application/vnd.jupyter.widget-view+json": {
862
- "model_id": "6ab2e09c185e42af9667e6149ca44031",
863
  "version_major": 2,
864
  "version_minor": 0
865
  },
@@ -873,7 +1121,7 @@
873
  {
874
  "data": {
875
  "application/vnd.jupyter.widget-view+json": {
876
- "model_id": "72dea4c919dd4b8d86538228f6291213",
877
  "version_major": 2,
878
  "version_minor": 0
879
  },
@@ -887,7 +1135,7 @@
887
  {
888
  "data": {
889
  "application/vnd.jupyter.widget-view+json": {
890
- "model_id": "ef3fbc6f624f44e1b3d01b33ecbd0e5f",
891
  "version_major": 2,
892
  "version_minor": 0
893
  },
@@ -901,7 +1149,7 @@
901
  {
902
  "data": {
903
  "application/vnd.jupyter.widget-view+json": {
904
- "model_id": "4bbdc7b9c1ca49cdba5c47b6ba9d5a8e",
905
  "version_major": 2,
906
  "version_minor": 0
907
  },
@@ -915,7 +1163,7 @@
915
  {
916
  "data": {
917
  "application/vnd.jupyter.widget-view+json": {
918
- "model_id": "479506696f244ce6b0c585e7da202f1c",
919
  "version_major": 2,
920
  "version_minor": 0
921
  },
@@ -929,7 +1177,7 @@
929
  {
930
  "data": {
931
  "application/vnd.jupyter.widget-view+json": {
932
- "model_id": "2870d5ecfed045a18c4e93eeb499da52",
933
  "version_major": 2,
934
  "version_minor": 0
935
  },
@@ -943,7 +1191,7 @@
943
  {
944
  "data": {
945
  "application/vnd.jupyter.widget-view+json": {
946
- "model_id": "0435bb18b438409bb003553a9cab29d1",
947
  "version_major": 2,
948
  "version_minor": 0
949
  },
@@ -957,7 +1205,7 @@
957
  {
958
  "data": {
959
  "application/vnd.jupyter.widget-view+json": {
960
- "model_id": "4302f869cd2f4e8ab88751e2772b374b",
961
  "version_major": 2,
962
  "version_minor": 0
963
  },
@@ -971,7 +1219,7 @@
971
  {
972
  "data": {
973
  "application/vnd.jupyter.widget-view+json": {
974
- "model_id": "7e912d183c534543bba1e9aa40ce1c27",
975
  "version_major": 2,
976
  "version_minor": 0
977
  },
@@ -985,7 +1233,7 @@
985
  {
986
  "data": {
987
  "application/vnd.jupyter.widget-view+json": {
988
- "model_id": "b5c2227341f94d7483756b410497c6c6",
989
  "version_major": 2,
990
  "version_minor": 0
991
  },
@@ -999,7 +1247,7 @@
999
  {
1000
  "data": {
1001
  "application/vnd.jupyter.widget-view+json": {
1002
- "model_id": "91428e23d51f403e8eba0f05443bafba",
1003
  "version_major": 2,
1004
  "version_minor": 0
1005
  },
@@ -1013,7 +1261,7 @@
1013
  {
1014
  "data": {
1015
  "application/vnd.jupyter.widget-view+json": {
1016
- "model_id": "cadd28d8df074bc79968fe53489d94d1",
1017
  "version_major": 2,
1018
  "version_minor": 0
1019
  },
@@ -1027,7 +1275,7 @@
1027
  {
1028
  "data": {
1029
  "application/vnd.jupyter.widget-view+json": {
1030
- "model_id": "55263a9a632b42a6af60995cf89441c4",
1031
  "version_major": 2,
1032
  "version_minor": 0
1033
  },
@@ -1041,7 +1289,7 @@
1041
  {
1042
  "data": {
1043
  "application/vnd.jupyter.widget-view+json": {
1044
- "model_id": "b56539a9023f4806ba7374f6a3e01731",
1045
  "version_major": 2,
1046
  "version_minor": 0
1047
  },
@@ -1055,7 +1303,7 @@
1055
  {
1056
  "data": {
1057
  "application/vnd.jupyter.widget-view+json": {
1058
- "model_id": "321ad27da22a48078d2b16f646f0a2f8",
1059
  "version_major": 2,
1060
  "version_minor": 0
1061
  },
@@ -1086,7 +1334,7 @@
1086
  },
1087
  {
1088
  "cell_type": "code",
1089
- "execution_count": 7,
1090
  "metadata": {
1091
  "colab": {
1092
  "base_uri": "https://localhost:8080/"
@@ -1114,7 +1362,7 @@
1114
  " 'cui': ['C0037005']}"
1115
  ]
1116
  },
1117
- "execution_count": 7,
1118
  "metadata": {},
1119
  "output_type": "execute_result"
1120
  }
@@ -1125,7 +1373,7 @@
1125
  },
1126
  {
1127
  "cell_type": "code",
1128
- "execution_count": 5,
1129
  "metadata": {
1130
  "colab": {
1131
  "base_uri": "https://localhost:8080/",
@@ -1154,7 +1402,7 @@
1154
  "<PIL.PngImagePlugin.PngImageFile image mode=RGB size=1684x2294>"
1155
  ]
1156
  },
1157
- "execution_count": 5,
1158
  "metadata": {},
1159
  "output_type": "execute_result"
1160
  }
@@ -1174,7 +1422,7 @@
1174
  },
1175
  {
1176
  "cell_type": "code",
1177
- "execution_count": 6,
1178
  "metadata": {
1179
  "colab": {
1180
  "base_uri": "https://localhost:8080/",
@@ -1217,7 +1465,7 @@
1217
  {
1218
  "data": {
1219
  "application/vnd.jupyter.widget-view+json": {
1220
- "model_id": "0915ca58b5974a558d9a97e6e5b49601",
1221
  "version_major": 2,
1222
  "version_minor": 0
1223
  },
@@ -1231,7 +1479,7 @@
1231
  {
1232
  "data": {
1233
  "application/vnd.jupyter.widget-view+json": {
1234
- "model_id": "6ddf29177eda49eaa001b66c4406d91d",
1235
  "version_major": 2,
1236
  "version_minor": 0
1237
  },
@@ -1245,7 +1493,7 @@
1245
  {
1246
  "data": {
1247
  "application/vnd.jupyter.widget-view+json": {
1248
- "model_id": "81e9fd88f02b44a0bd0d361891a97b8d",
1249
  "version_major": 2,
1250
  "version_minor": 0
1251
  },
@@ -1259,7 +1507,7 @@
1259
  {
1260
  "data": {
1261
  "application/vnd.jupyter.widget-view+json": {
1262
- "model_id": "3731fd9db78c4e7bb71fcfabce3b5cf2",
1263
  "version_major": 2,
1264
  "version_minor": 0
1265
  },
@@ -1273,7 +1521,7 @@
1273
  {
1274
  "data": {
1275
  "application/vnd.jupyter.widget-view+json": {
1276
- "model_id": "892c80cdcfba41d4b7b7cac77c641a80",
1277
  "version_major": 2,
1278
  "version_minor": 0
1279
  },
@@ -1287,7 +1535,7 @@
1287
  {
1288
  "data": {
1289
  "application/vnd.jupyter.widget-view+json": {
1290
- "model_id": "2e4ba2c07973429492728624aa9f6676",
1291
  "version_major": 2,
1292
  "version_minor": 0
1293
  },
@@ -1301,7 +1549,7 @@
1301
  {
1302
  "data": {
1303
  "application/vnd.jupyter.widget-view+json": {
1304
- "model_id": "c9b31d5ca2d0424fa449f7301e0ec502",
1305
  "version_major": 2,
1306
  "version_minor": 0
1307
  },
@@ -1322,7 +1570,7 @@
1322
  {
1323
  "data": {
1324
  "application/vnd.jupyter.widget-view+json": {
1325
- "model_id": "d407949500224df2ad24fb1a6b9ed96a",
1326
  "version_major": 2,
1327
  "version_minor": 0
1328
  },
@@ -1336,7 +1584,7 @@
1336
  {
1337
  "data": {
1338
  "application/vnd.jupyter.widget-view+json": {
1339
- "model_id": "6f77cab1ed284f9097605b8f43f12755",
1340
  "version_major": 2,
1341
  "version_minor": 0
1342
  },
@@ -1350,7 +1598,7 @@
1350
  {
1351
  "data": {
1352
  "application/vnd.jupyter.widget-view+json": {
1353
- "model_id": "1495b59787e745928dc1512693389ed4",
1354
  "version_major": 2,
1355
  "version_minor": 0
1356
  },
@@ -1364,7 +1612,7 @@
1364
  {
1365
  "data": {
1366
  "application/vnd.jupyter.widget-view+json": {
1367
- "model_id": "bb657ddcc31e4d72b04b5ed8049ece8c",
1368
  "version_major": 2,
1369
  "version_minor": 0
1370
  },
@@ -1378,7 +1626,7 @@
1378
  {
1379
  "data": {
1380
  "application/vnd.jupyter.widget-view+json": {
1381
- "model_id": "1bd3d44ebe444cf6aa57757d04f5d4a5",
1382
  "version_major": 2,
1383
  "version_minor": 0
1384
  },
@@ -1392,7 +1640,7 @@
1392
  {
1393
  "data": {
1394
  "application/vnd.jupyter.widget-view+json": {
1395
- "model_id": "8eb46982436d419fafd125d9ecabde87",
1396
  "version_major": 2,
1397
  "version_minor": 0
1398
  },
@@ -1406,7 +1654,7 @@
1406
  {
1407
  "data": {
1408
  "application/vnd.jupyter.widget-view+json": {
1409
- "model_id": "ab006c8fef8642feb0c34d0710a9900f",
1410
  "version_major": 2,
1411
  "version_minor": 0
1412
  },
@@ -1420,7 +1668,7 @@
1420
  {
1421
  "data": {
1422
  "application/vnd.jupyter.widget-view+json": {
1423
- "model_id": "d6c8bc77933a44c4bf037f6541b6315b",
1424
  "version_major": 2,
1425
  "version_minor": 0
1426
  },
@@ -1715,6 +1963,57 @@
1715
  "source": [
1716
  "trainer.train()"
1717
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1718
  }
1719
  ],
1720
  "metadata": {
@@ -14400,5 +14699,5 @@
14400
  }
14401
  },
14402
  "nbformat": 4,
14403
- "nbformat_minor": 0
14404
  }
 
22
  },
23
  {
24
  "cell_type": "code",
25
+ "execution_count": 1,
26
  "metadata": {
27
  "executionInfo": {
28
  "elapsed": 2,
 
36
  },
37
  "id": "F-zJG-uPIy3d"
38
  },
39
+ "outputs": [
40
+ {
41
+ "ename": "Exception",
42
+ "evalue": "You are not running this code in Google Colab. Please use Google Colab if you would like to save the model to Google Drive",
43
+ "output_type": "error",
44
+ "traceback": [
45
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
46
+ "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
47
+ "Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mgoogle\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcolab\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mgoogle\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcolab\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m drive\n",
48
+ "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'google'",
49
+ "\nDuring handling of the above exception, another exception occurred:\n",
50
+ "\u001b[0;31mException\u001b[0m Traceback (most recent call last)",
51
+ "Cell \u001b[0;32mIn[1], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m drive\u001b[38;5;241m.\u001b[39mmount(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/content/drive\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mModuleNotFoundError\u001b[39;00m:\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou are not running this code in Google Colab. Please use Google Colab if you would like to save the model to Google Drive\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
52
+ "\u001b[0;31mException\u001b[0m: You are not running this code in Google Colab. Please use Google Colab if you would like to save the model to Google Drive"
53
+ ]
54
+ }
55
+ ],
56
  "source": [
57
  "try:\n",
58
  " import google.colab\n",
 
72
  },
73
  {
74
  "cell_type": "code",
75
+ "execution_count": 2,
76
  "metadata": {
77
  "executionInfo": {
78
  "elapsed": 1459,
 
119
  },
120
  {
121
  "cell_type": "code",
122
+ "execution_count": 3,
123
  "metadata": {},
124
  "outputs": [
125
  {
126
+ "name": "stdout",
127
  "output_type": "stream",
128
  "text": [
129
+ "Hugging Face token found in environment variable\n"
130
  ]
131
  },
132
  {
133
+ "name": "stderr",
134
  "output_type": "stream",
135
  "text": [
136
+ "Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n"
 
 
 
 
 
 
137
  ]
138
  }
139
  ],
 
238
  },
239
  {
240
  "cell_type": "code",
241
+ "execution_count": 4,
242
  "metadata": {
243
  "colab": {
244
  "base_uri": "https://localhost:8080/",
 
659
  {
660
  "data": {
661
  "application/vnd.jupyter.widget-view+json": {
662
+ "model_id": "c681920ed6e24c35981eda639b1c4458",
663
+ "version_major": 2,
664
+ "version_minor": 0
665
+ },
666
+ "text/plain": [
667
+ "README.md: 0%| | 0.00/4.50k [00:00<?, ?B/s]"
668
+ ]
669
+ },
670
+ "metadata": {},
671
+ "output_type": "display_data"
672
+ },
673
+ {
674
+ "data": {
675
+ "application/vnd.jupyter.widget-view+json": {
676
+ "model_id": "c5c781c8755e4ee79f5c972bb786ddda",
677
+ "version_major": 2,
678
+ "version_minor": 0
679
+ },
680
+ "text/plain": [
681
+ "Resolving data files: 0%| | 0/27 [00:00<?, ?it/s]"
682
+ ]
683
+ },
684
+ "metadata": {},
685
+ "output_type": "display_data"
686
+ },
687
+ {
688
+ "data": {
689
+ "application/vnd.jupyter.widget-view+json": {
690
+ "model_id": "4ece62cf670f441181bce789dbabb38d",
691
+ "version_major": 2,
692
+ "version_minor": 0
693
+ },
694
+ "text/plain": [
695
+ "Resolving data files: 0%| | 0/27 [00:00<?, ?it/s]"
696
+ ]
697
+ },
698
+ "metadata": {},
699
+ "output_type": "display_data"
700
+ },
701
+ {
702
+ "data": {
703
+ "application/vnd.jupyter.widget-view+json": {
704
+ "model_id": "368b3734b6094270874bc9ed05220b43",
705
+ "version_major": 2,
706
+ "version_minor": 0
707
+ },
708
+ "text/plain": [
709
+ "Downloading data: 0%| | 0/27 [00:00<?, ?files/s]"
710
+ ]
711
+ },
712
+ "metadata": {},
713
+ "output_type": "display_data"
714
+ },
715
+ {
716
+ "data": {
717
+ "application/vnd.jupyter.widget-view+json": {
718
+ "model_id": "e002292c8f73473ba28b353594d047a0",
719
+ "version_major": 2,
720
+ "version_minor": 0
721
+ },
722
+ "text/plain": [
723
+ "train-00000-of-00027.parquet: 0%| | 0.00/497M [00:00<?, ?B/s]"
724
+ ]
725
+ },
726
+ "metadata": {},
727
+ "output_type": "display_data"
728
+ },
729
+ {
730
+ "data": {
731
+ "application/vnd.jupyter.widget-view+json": {
732
+ "model_id": "b6ec67734a7644c49c6d222db885d9fa",
733
+ "version_major": 2,
734
+ "version_minor": 0
735
+ },
736
+ "text/plain": [
737
+ "train-00001-of-00027.parquet: 0%| | 0.00/504M [00:00<?, ?B/s]"
738
+ ]
739
+ },
740
+ "metadata": {},
741
+ "output_type": "display_data"
742
+ },
743
+ {
744
+ "data": {
745
+ "application/vnd.jupyter.widget-view+json": {
746
+ "model_id": "d04c94230afb42c4b5488d1517c63fd6",
747
+ "version_major": 2,
748
+ "version_minor": 0
749
+ },
750
+ "text/plain": [
751
+ "train-00002-of-00027.parquet: 0%| | 0.00/490M [00:00<?, ?B/s]"
752
+ ]
753
+ },
754
+ "metadata": {},
755
+ "output_type": "display_data"
756
+ },
757
+ {
758
+ "data": {
759
+ "application/vnd.jupyter.widget-view+json": {
760
+ "model_id": "0fd069cace5146b8be5b44a89dae32dd",
761
+ "version_major": 2,
762
+ "version_minor": 0
763
+ },
764
+ "text/plain": [
765
+ "train-00003-of-00027.parquet: 0%| | 0.00/485M [00:00<?, ?B/s]"
766
+ ]
767
+ },
768
+ "metadata": {},
769
+ "output_type": "display_data"
770
+ },
771
+ {
772
+ "data": {
773
+ "application/vnd.jupyter.widget-view+json": {
774
+ "model_id": "e23e8d0ecb394513907c6ecb4a30783d",
775
+ "version_major": 2,
776
+ "version_minor": 0
777
+ },
778
+ "text/plain": [
779
+ "train-00004-of-00027.parquet: 0%| | 0.00/510M [00:00<?, ?B/s]"
780
+ ]
781
+ },
782
+ "metadata": {},
783
+ "output_type": "display_data"
784
+ },
785
+ {
786
+ "data": {
787
+ "application/vnd.jupyter.widget-view+json": {
788
+ "model_id": "6d0a484c03274c628585bfd3938dd5dd",
789
+ "version_major": 2,
790
+ "version_minor": 0
791
+ },
792
+ "text/plain": [
793
+ "train-00005-of-00027.parquet: 0%| | 0.00/498M [00:00<?, ?B/s]"
794
+ ]
795
+ },
796
+ "metadata": {},
797
+ "output_type": "display_data"
798
+ },
799
+ {
800
+ "data": {
801
+ "application/vnd.jupyter.widget-view+json": {
802
+ "model_id": "8a9d84a4ba4c4a9d8762845b466159dc",
803
+ "version_major": 2,
804
+ "version_minor": 0
805
+ },
806
+ "text/plain": [
807
+ "train-00006-of-00027.parquet: 0%| | 0.00/532M [00:00<?, ?B/s]"
808
+ ]
809
+ },
810
+ "metadata": {},
811
+ "output_type": "display_data"
812
+ },
813
+ {
814
+ "data": {
815
+ "application/vnd.jupyter.widget-view+json": {
816
+ "model_id": "ac978b853b8f4ce48b2240adad35813c",
817
+ "version_major": 2,
818
+ "version_minor": 0
819
+ },
820
+ "text/plain": [
821
+ "train-00007-of-00027.parquet: 0%| | 0.00/482M [00:00<?, ?B/s]"
822
+ ]
823
+ },
824
+ "metadata": {},
825
+ "output_type": "display_data"
826
+ },
827
+ {
828
+ "data": {
829
+ "application/vnd.jupyter.widget-view+json": {
830
+ "model_id": "dc39d2085a8748269438f3f518a936fa",
831
+ "version_major": 2,
832
+ "version_minor": 0
833
+ },
834
+ "text/plain": [
835
+ "train-00008-of-00027.parquet: 0%| | 0.00/497M [00:00<?, ?B/s]"
836
+ ]
837
+ },
838
+ "metadata": {},
839
+ "output_type": "display_data"
840
+ },
841
+ {
842
+ "data": {
843
+ "application/vnd.jupyter.widget-view+json": {
844
+ "model_id": "efd5dd9b5acf4a00b9fad256b239fe25",
845
+ "version_major": 2,
846
+ "version_minor": 0
847
+ },
848
+ "text/plain": [
849
+ "train-00009-of-00027.parquet: 0%| | 0.00/489M [00:00<?, ?B/s]"
850
+ ]
851
+ },
852
+ "metadata": {},
853
+ "output_type": "display_data"
854
+ },
855
+ {
856
+ "data": {
857
+ "application/vnd.jupyter.widget-view+json": {
858
+ "model_id": "0ee7a5cdf9fc44fb95a02a215db064b1",
859
+ "version_major": 2,
860
+ "version_minor": 0
861
+ },
862
+ "text/plain": [
863
+ "train-00010-of-00027.parquet: 0%| | 0.00/484M [00:00<?, ?B/s]"
864
+ ]
865
+ },
866
+ "metadata": {},
867
+ "output_type": "display_data"
868
+ },
869
+ {
870
+ "data": {
871
+ "application/vnd.jupyter.widget-view+json": {
872
+ "model_id": "d2bc061d040440d48bd1e068e7d0e754",
873
+ "version_major": 2,
874
+ "version_minor": 0
875
+ },
876
+ "text/plain": [
877
+ "train-00011-of-00027.parquet: 0%| | 0.00/508M [00:00<?, ?B/s]"
878
+ ]
879
+ },
880
+ "metadata": {},
881
+ "output_type": "display_data"
882
+ },
883
+ {
884
+ "data": {
885
+ "application/vnd.jupyter.widget-view+json": {
886
+ "model_id": "290a81df45914de6865a1fd538746d8b",
887
+ "version_major": 2,
888
+ "version_minor": 0
889
+ },
890
+ "text/plain": [
891
+ "train-00012-of-00027.parquet: 0%| | 0.00/490M [00:00<?, ?B/s]"
892
+ ]
893
+ },
894
+ "metadata": {},
895
+ "output_type": "display_data"
896
+ },
897
+ {
898
+ "data": {
899
+ "application/vnd.jupyter.widget-view+json": {
900
+ "model_id": "e15f56550122473eaf0b9d2c33defc7a",
901
  "version_major": 2,
902
  "version_minor": 0
903
  },
 
911
  {
912
  "data": {
913
  "application/vnd.jupyter.widget-view+json": {
914
+ "model_id": "219eeafe2fde471d9c524a8f03884678",
915
  "version_major": 2,
916
  "version_minor": 0
917
  },
 
925
  {
926
  "data": {
927
  "application/vnd.jupyter.widget-view+json": {
928
+ "model_id": "afb3a6d52a6e46268803195d3f7e0e8e",
929
  "version_major": 2,
930
  "version_minor": 0
931
  },
 
939
  {
940
  "data": {
941
  "application/vnd.jupyter.widget-view+json": {
942
+ "model_id": "fe952f2809c24da48e85d56b8de828d9",
943
  "version_major": 2,
944
  "version_minor": 0
945
  },
 
953
  {
954
  "data": {
955
  "application/vnd.jupyter.widget-view+json": {
956
+ "model_id": "09e7e2783b464c6197bebcbe45364e65",
957
  "version_major": 2,
958
  "version_minor": 0
959
  },
 
967
  {
968
  "data": {
969
  "application/vnd.jupyter.widget-view+json": {
970
+ "model_id": "f2142baf4df04c2e8fac7fda5cc4a4b4",
971
  "version_major": 2,
972
  "version_minor": 0
973
  },
 
981
  {
982
  "data": {
983
  "application/vnd.jupyter.widget-view+json": {
984
+ "model_id": "ce16d5ccf5a244279e29b26f6b9f159f",
985
  "version_major": 2,
986
  "version_minor": 0
987
  },
 
995
  {
996
  "data": {
997
  "application/vnd.jupyter.widget-view+json": {
998
+ "model_id": "d53b88d0ad5049e493578571a0d33740",
999
  "version_major": 2,
1000
  "version_minor": 0
1001
  },
 
1009
  {
1010
  "data": {
1011
  "application/vnd.jupyter.widget-view+json": {
1012
+ "model_id": "785e664dba3a40edb3858acccf6a07b0",
1013
  "version_major": 2,
1014
  "version_minor": 0
1015
  },
 
1023
  {
1024
  "data": {
1025
  "application/vnd.jupyter.widget-view+json": {
1026
+ "model_id": "ab300b08c57247d5a972d0d77acfddf4",
1027
  "version_major": 2,
1028
  "version_minor": 0
1029
  },
 
1037
  {
1038
  "data": {
1039
  "application/vnd.jupyter.widget-view+json": {
1040
+ "model_id": "00ab18dd02f34aa3a658b456d8bfe390",
1041
  "version_major": 2,
1042
  "version_minor": 0
1043
  },
 
1051
  {
1052
  "data": {
1053
  "application/vnd.jupyter.widget-view+json": {
1054
+ "model_id": "421d90d57c8e44a48479eef2eb40a479",
1055
  "version_major": 2,
1056
  "version_minor": 0
1057
  },
 
1065
  {
1066
  "data": {
1067
  "application/vnd.jupyter.widget-view+json": {
1068
+ "model_id": "60abcaa9df3d43a99ef1e07e9b7fbe11",
1069
  "version_major": 2,
1070
  "version_minor": 0
1071
  },
 
1079
  {
1080
  "data": {
1081
  "application/vnd.jupyter.widget-view+json": {
1082
+ "model_id": "79af8edddbfe4543a00e47d5f697866d",
1083
  "version_major": 2,
1084
  "version_minor": 0
1085
  },
 
1093
  {
1094
  "data": {
1095
  "application/vnd.jupyter.widget-view+json": {
1096
+ "model_id": "9d14e22b823d4c9e85b17e4dbd57fec6",
1097
  "version_major": 2,
1098
  "version_minor": 0
1099
  },
 
1107
  {
1108
  "data": {
1109
  "application/vnd.jupyter.widget-view+json": {
1110
+ "model_id": "ee5ba070e76b4854aadfac59b0237fbf",
1111
  "version_major": 2,
1112
  "version_minor": 0
1113
  },
 
1121
  {
1122
  "data": {
1123
  "application/vnd.jupyter.widget-view+json": {
1124
+ "model_id": "8ce96862fb234f6ea335071fc8114574",
1125
  "version_major": 2,
1126
  "version_minor": 0
1127
  },
 
1135
  {
1136
  "data": {
1137
  "application/vnd.jupyter.widget-view+json": {
1138
+ "model_id": "dd5860de108f49e5bcc609bb11a646df",
1139
  "version_major": 2,
1140
  "version_minor": 0
1141
  },
 
1149
  {
1150
  "data": {
1151
  "application/vnd.jupyter.widget-view+json": {
1152
+ "model_id": "bef0c258e4764f0ab9406467f01752a9",
1153
  "version_major": 2,
1154
  "version_minor": 0
1155
  },
 
1163
  {
1164
  "data": {
1165
  "application/vnd.jupyter.widget-view+json": {
1166
+ "model_id": "6c4d53689dad43dd8db780522139f599",
1167
  "version_major": 2,
1168
  "version_minor": 0
1169
  },
 
1177
  {
1178
  "data": {
1179
  "application/vnd.jupyter.widget-view+json": {
1180
+ "model_id": "d50075ea0fcd4a34ac88ed121b1e90bf",
1181
  "version_major": 2,
1182
  "version_minor": 0
1183
  },
 
1191
  {
1192
  "data": {
1193
  "application/vnd.jupyter.widget-view+json": {
1194
+ "model_id": "a4e15de7a1c54400a1ebff02d4caa657",
1195
  "version_major": 2,
1196
  "version_minor": 0
1197
  },
 
1205
  {
1206
  "data": {
1207
  "application/vnd.jupyter.widget-view+json": {
1208
+ "model_id": "41fc8cb29962483e81fa8a640f633d46",
1209
  "version_major": 2,
1210
  "version_minor": 0
1211
  },
 
1219
  {
1220
  "data": {
1221
  "application/vnd.jupyter.widget-view+json": {
1222
+ "model_id": "388b29f8655b4fee87db053d591bd72d",
1223
  "version_major": 2,
1224
  "version_minor": 0
1225
  },
 
1233
  {
1234
  "data": {
1235
  "application/vnd.jupyter.widget-view+json": {
1236
+ "model_id": "6c75919875544db98ee2c64215ecff97",
1237
  "version_major": 2,
1238
  "version_minor": 0
1239
  },
 
1247
  {
1248
  "data": {
1249
  "application/vnd.jupyter.widget-view+json": {
1250
+ "model_id": "a4e8b6ecaf734f33a842f60681e23108",
1251
  "version_major": 2,
1252
  "version_minor": 0
1253
  },
 
1261
  {
1262
  "data": {
1263
  "application/vnd.jupyter.widget-view+json": {
1264
+ "model_id": "7e497cfcd6f04a94aa0ac40d6df938b1",
1265
  "version_major": 2,
1266
  "version_minor": 0
1267
  },
 
1275
  {
1276
  "data": {
1277
  "application/vnd.jupyter.widget-view+json": {
1278
+ "model_id": "56cde5f680f643be887d6f9f804676c9",
1279
  "version_major": 2,
1280
  "version_minor": 0
1281
  },
 
1289
  {
1290
  "data": {
1291
  "application/vnd.jupyter.widget-view+json": {
1292
+ "model_id": "fbcf378be86c40cdbf606a2446d1a252",
1293
  "version_major": 2,
1294
  "version_minor": 0
1295
  },
 
1303
  {
1304
  "data": {
1305
  "application/vnd.jupyter.widget-view+json": {
1306
+ "model_id": "963491aad1f945cfb5811d7239880b54",
1307
  "version_major": 2,
1308
  "version_minor": 0
1309
  },
 
1334
  },
1335
  {
1336
  "cell_type": "code",
1337
+ "execution_count": 5,
1338
  "metadata": {
1339
  "colab": {
1340
  "base_uri": "https://localhost:8080/"
 
1362
  " 'cui': ['C0037005']}"
1363
  ]
1364
  },
1365
+ "execution_count": 5,
1366
  "metadata": {},
1367
  "output_type": "execute_result"
1368
  }
 
1373
  },
1374
  {
1375
  "cell_type": "code",
1376
+ "execution_count": 6,
1377
  "metadata": {
1378
  "colab": {
1379
  "base_uri": "https://localhost:8080/",
 
1402
  "<PIL.PngImagePlugin.PngImageFile image mode=RGB size=1684x2294>"
1403
  ]
1404
  },
1405
+ "execution_count": 6,
1406
  "metadata": {},
1407
  "output_type": "execute_result"
1408
  }
 
1422
  },
1423
  {
1424
  "cell_type": "code",
1425
+ "execution_count": 7,
1426
  "metadata": {
1427
  "colab": {
1428
  "base_uri": "https://localhost:8080/",
 
1465
  {
1466
  "data": {
1467
  "application/vnd.jupyter.widget-view+json": {
1468
+ "model_id": "f8115bd39ceb47678208bc6dc80a179a",
1469
  "version_major": 2,
1470
  "version_minor": 0
1471
  },
 
1479
  {
1480
  "data": {
1481
  "application/vnd.jupyter.widget-view+json": {
1482
+ "model_id": "195ef910bea946cbac27ae80d12ab37d",
1483
  "version_major": 2,
1484
  "version_minor": 0
1485
  },
 
1493
  {
1494
  "data": {
1495
  "application/vnd.jupyter.widget-view+json": {
1496
+ "model_id": "ee41668130414ec29bd670a4c00ea9dd",
1497
  "version_major": 2,
1498
  "version_minor": 0
1499
  },
 
1507
  {
1508
  "data": {
1509
  "application/vnd.jupyter.widget-view+json": {
1510
+ "model_id": "cae512d46a20437cb2b9054e6d796129",
1511
  "version_major": 2,
1512
  "version_minor": 0
1513
  },
 
1521
  {
1522
  "data": {
1523
  "application/vnd.jupyter.widget-view+json": {
1524
+ "model_id": "15b4f6ea701b40f984b45da569b8fd50",
1525
  "version_major": 2,
1526
  "version_minor": 0
1527
  },
 
1535
  {
1536
  "data": {
1537
  "application/vnd.jupyter.widget-view+json": {
1538
+ "model_id": "711ead0dc9ba4017a5755a1cc111d8c3",
1539
  "version_major": 2,
1540
  "version_minor": 0
1541
  },
 
1549
  {
1550
  "data": {
1551
  "application/vnd.jupyter.widget-view+json": {
1552
+ "model_id": "009a9836643d4a1285ba017cb6dee9fb",
1553
  "version_major": 2,
1554
  "version_minor": 0
1555
  },
 
1570
  {
1571
  "data": {
1572
  "application/vnd.jupyter.widget-view+json": {
1573
+ "model_id": "1078bb163ca14f4cb242b3afb81b6a70",
1574
  "version_major": 2,
1575
  "version_minor": 0
1576
  },
 
1584
  {
1585
  "data": {
1586
  "application/vnd.jupyter.widget-view+json": {
1587
+ "model_id": "813c7b8d26224b07893f652c1ab25acf",
1588
  "version_major": 2,
1589
  "version_minor": 0
1590
  },
 
1598
  {
1599
  "data": {
1600
  "application/vnd.jupyter.widget-view+json": {
1601
+ "model_id": "8f312307a8c34e9ebac5a4006cb75b15",
1602
  "version_major": 2,
1603
  "version_minor": 0
1604
  },
 
1612
  {
1613
  "data": {
1614
  "application/vnd.jupyter.widget-view+json": {
1615
+ "model_id": "6805d6a27b4b404594f644d8289e3e0c",
1616
  "version_major": 2,
1617
  "version_minor": 0
1618
  },
 
1626
  {
1627
  "data": {
1628
  "application/vnd.jupyter.widget-view+json": {
1629
+ "model_id": "dfa588033e244ba0aef4875d4dca0087",
1630
  "version_major": 2,
1631
  "version_minor": 0
1632
  },
 
1640
  {
1641
  "data": {
1642
  "application/vnd.jupyter.widget-view+json": {
1643
+ "model_id": "de1b8cc27c5744b4b34b48e7a1fb7a00",
1644
  "version_major": 2,
1645
  "version_minor": 0
1646
  },
 
1654
  {
1655
  "data": {
1656
  "application/vnd.jupyter.widget-view+json": {
1657
+ "model_id": "79b23b4c9373457fb28a1cdcc1b23277",
1658
  "version_major": 2,
1659
  "version_minor": 0
1660
  },
 
1668
  {
1669
  "data": {
1670
  "application/vnd.jupyter.widget-view+json": {
1671
+ "model_id": "02c68a9944a44dcc882dcaa7722dfa9b",
1672
  "version_major": 2,
1673
  "version_minor": 0
1674
  },
 
1963
  "source": [
1964
  "trainer.train()"
1965
  ]
1966
+ },
1967
+ {
1968
+ "cell_type": "code",
1969
+ "execution_count": 11,
1970
+ "metadata": {},
1971
+ "outputs": [
1972
+ {
1973
+ "data": {
1974
+ "application/vnd.jupyter.widget-view+json": {
1975
+ "model_id": "1694c769eb91432a90f7e6a69bfc8367",
1976
+ "version_major": 2,
1977
+ "version_minor": 0
1978
+ },
1979
+ "text/plain": [
1980
+ "Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s]"
1981
+ ]
1982
+ },
1983
+ "metadata": {},
1984
+ "output_type": "display_data"
1985
+ },
1986
+ {
1987
+ "ename": "OutOfMemoryError",
1988
+ "evalue": "CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 6.00 GiB of which 0 bytes is free. Of the allocated memory 20.39 GiB is allocated by PyTorch, and 155.53 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)",
1989
+ "output_type": "error",
1990
+ "traceback": [
1991
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1992
+ "\u001b[0;31mOutOfMemoryError\u001b[0m Traceback (most recent call last)",
1993
+ "Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m model \u001b[38;5;241m=\u001b[39m Idefics3ForConditionalGeneration\u001b[38;5;241m.\u001b[39mfrom_pretrained(source_model_id , torch_dtype\u001b[38;5;241m=\u001b[39mtorch\u001b[38;5;241m.\u001b[39mbfloat16)\u001b[38;5;241m.\u001b[39mto(DEVICE)\n\u001b[1;32m 2\u001b[0m model\u001b[38;5;241m.\u001b[39mload_adapter(destination_model_id, device_map\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
1994
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/modeling_utils.py:3167\u001b[0m, in \u001b[0;36mPreTrainedModel.to\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 3162\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m dtype_present_in_args:\n\u001b[1;32m 3163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 3164\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou cannot cast a GPTQ model in a new `dtype`. Make sure to load the model using `from_pretrained` using the desired\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3165\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m `dtype` by passing the correct `torch_dtype` argument.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3166\u001b[0m )\n\u001b[0;32m-> 3167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1995
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:1340\u001b[0m, in \u001b[0;36mModule.to\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1337\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1338\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[0;32m-> 1340\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_apply(convert)\n",
1996
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:900\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m 898\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m 899\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 900\u001b[0m module\u001b[38;5;241m.\u001b[39m_apply(fn)\n\u001b[1;32m 902\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m 903\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m 904\u001b[0m \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m 905\u001b[0m \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 910\u001b[0m \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m 911\u001b[0m \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n",
1997
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:900\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m 898\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m 899\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 900\u001b[0m module\u001b[38;5;241m.\u001b[39m_apply(fn)\n\u001b[1;32m 902\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m 903\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m 904\u001b[0m \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m 905\u001b[0m \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 910\u001b[0m \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m 911\u001b[0m \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n",
1998
+ " \u001b[0;31m[... skipping similar frames: Module._apply at line 900 (4 times)]\u001b[0m\n",
1999
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:900\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m 898\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m 899\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 900\u001b[0m module\u001b[38;5;241m.\u001b[39m_apply(fn)\n\u001b[1;32m 902\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m 903\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m 904\u001b[0m \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m 905\u001b[0m \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 910\u001b[0m \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m 911\u001b[0m \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n",
2000
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:927\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m 923\u001b[0m \u001b[38;5;66;03m# Tensors stored in modules are graph leaves, and we don't want to\u001b[39;00m\n\u001b[1;32m 924\u001b[0m \u001b[38;5;66;03m# track autograd history of `param_applied`, so we have to use\u001b[39;00m\n\u001b[1;32m 925\u001b[0m \u001b[38;5;66;03m# `with torch.no_grad():`\u001b[39;00m\n\u001b[1;32m 926\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n\u001b[0;32m--> 927\u001b[0m param_applied \u001b[38;5;241m=\u001b[39m fn(param)\n\u001b[1;32m 928\u001b[0m p_should_use_set_data \u001b[38;5;241m=\u001b[39m compute_should_use_set_data(param, param_applied)\n\u001b[1;32m 930\u001b[0m \u001b[38;5;66;03m# subclasses may have multiple child tensors so we need to use swap_tensors\u001b[39;00m\n",
2001
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:1326\u001b[0m, in \u001b[0;36mModule.to.<locals>.convert\u001b[0;34m(t)\u001b[0m\n\u001b[1;32m 1319\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m convert_to_format \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m t\u001b[38;5;241m.\u001b[39mdim() \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;241m4\u001b[39m, \u001b[38;5;241m5\u001b[39m):\n\u001b[1;32m 1320\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m t\u001b[38;5;241m.\u001b[39mto(\n\u001b[1;32m 1321\u001b[0m device,\n\u001b[1;32m 1322\u001b[0m dtype \u001b[38;5;28;01mif\u001b[39;00m t\u001b[38;5;241m.\u001b[39mis_floating_point() \u001b[38;5;129;01mor\u001b[39;00m t\u001b[38;5;241m.\u001b[39mis_complex() \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1323\u001b[0m non_blocking,\n\u001b[1;32m 1324\u001b[0m memory_format\u001b[38;5;241m=\u001b[39mconvert_to_format,\n\u001b[1;32m 1325\u001b[0m )\n\u001b[0;32m-> 1326\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m t\u001b[38;5;241m.\u001b[39mto(\n\u001b[1;32m 1327\u001b[0m device,\n\u001b[1;32m 1328\u001b[0m dtype \u001b[38;5;28;01mif\u001b[39;00m t\u001b[38;5;241m.\u001b[39mis_floating_point() \u001b[38;5;129;01mor\u001b[39;00m t\u001b[38;5;241m.\u001b[39mis_complex() \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1329\u001b[0m non_blocking,\n\u001b[1;32m 1330\u001b[0m )\n\u001b[1;32m 1331\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 1332\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(e) \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot copy out of meta tensor; no data!\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
2002
+ "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 6.00 GiB of which 0 bytes is free. Of the allocated memory 20.39 GiB is allocated by PyTorch, and 155.53 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)"
2003
+ ]
2004
+ }
2005
+ ],
2006
+ "source": [
2007
+ "model = Idefics3ForConditionalGeneration.from_pretrained(source_model_id , torch_dtype=torch.bfloat16).to(DEVICE)\n",
2008
+ "model.load_adapter(destination_model_id, device_map=\"auto\")"
2009
+ ]
2010
+ },
2011
+ {
2012
+ "cell_type": "code",
2013
+ "execution_count": null,
2014
+ "metadata": {},
2015
+ "outputs": [],
2016
+ "source": []
2017
  }
2018
  ],
2019
  "metadata": {
 
14699
  }
14700
  },
14701
  "nbformat": 4,
14702
+ "nbformat_minor": 4
14703
  }