Alex Telitsine commited on
Commit
cc6b80f
·
1 Parent(s): 2c156c4

Resnet Test Quantization

Browse files
Files changed (2) hide show
  1. .DS_Store +0 -0
  2. Int8ANE.ipynb +403 -0
.DS_Store ADDED
Binary file (10.2 kB). View file
 
Int8ANE.ipynb ADDED
@@ -0,0 +1,403 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "69faf98f-4067-4974-a3cf-2b7aa709d65c",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "pip install coremltools==8.0b1 torch==2.3.0 torchvision torchaudio scikit-learn==1.1.2 "
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 38,
16
+ "id": "56b386de-6f8c-4814-9159-79aef921c810",
17
+ "metadata": {},
18
+ "outputs": [
19
+ {
20
+ "name": "stderr",
21
+ "output_type": "stream",
22
+ "text": [
23
+ "Converting PyTorch Frontend ==> MIL Ops: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 440/441 [00:00<00:00, 6548.48 ops/s]\n",
24
+ "Running MIL frontend_pytorch pipeline: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 139.19 passes/s]\n",
25
+ "Running MIL default pipeline: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 79/79 [00:01<00:00, 57.60 passes/s]\n",
26
+ "Running MIL backend_mlprogram pipeline: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 233.95 passes/s]\n"
27
+ ]
28
+ },
29
+ {
30
+ "name": "stdout",
31
+ "output_type": "stream",
32
+ "text": [
33
+ "OptimizationConfig LUT\n",
34
+ "<class 'coremltools.optimize.coreml._quantization_passes.palettize_weights'>\n"
35
+ ]
36
+ },
37
+ {
38
+ "name": "stderr",
39
+ "output_type": "stream",
40
+ "text": [
41
+ "Running compression pass palettize_weights: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 67/67 [00:00<00:00, 99.79 ops/s]\n",
42
+ "Running MIL frontend_milinternal pipeline: 0 passes [00:00, ? passes/s]\n",
43
+ "Running MIL default pipeline: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 77/77 [00:00<00:00, 176.72 passes/s]\n",
44
+ "Running MIL backend_mlprogram pipeline: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 180.92 passes/s]\n"
45
+ ]
46
+ },
47
+ {
48
+ "name": "stdout",
49
+ "output_type": "stream",
50
+ "text": [
51
+ "OptimizationConfig LINEAR\n",
52
+ "-------- (W4) -------- \n",
53
+ "<class 'coremltools.optimize.coreml._quantization_passes.linear_quantize_weights'>\n"
54
+ ]
55
+ },
56
+ {
57
+ "name": "stderr",
58
+ "output_type": "stream",
59
+ "text": [
60
+ "Running compression pass linear_quantize_weights: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 67/67 [00:00<00:00, 92.51 ops/s]\n",
61
+ "Running MIL frontend_milinternal pipeline: 0 passes [00:00, ? passes/s]\n",
62
+ "Running MIL default pipeline: 100%|██████████████████████████��█████████████████████████████████████████████████████████████████████████████████████████████████████| 77/77 [00:00<00:00, 167.87 passes/s]\n",
63
+ "Running MIL backend_mlprogram pipeline: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 209.76 passes/s]\n"
64
+ ]
65
+ },
66
+ {
67
+ "name": "stdout",
68
+ "output_type": "stream",
69
+ "text": [
70
+ "-------- W8 selected! ---------- \n",
71
+ "-------- Activation A8 quant! ---------- \n",
72
+ "<class 'coremltools.optimize.coreml.experimental._quantization_passes.insert_prefix_quantize_dequantize_pair'>\n"
73
+ ]
74
+ },
75
+ {
76
+ "name": "stderr",
77
+ "output_type": "stream",
78
+ "text": [
79
+ "Running activation compression pass insert_prefix_quantize_dequantize_pair: 100%|██████████████████████████████████████████████████████████████████████████████████| 522/522 [00:00<00:00, 7993.67 ops/s]\n",
80
+ "Running compression pass linear_quantize_activations: start calibrating 10 samples\n",
81
+ "Running compression pass linear_quantize_activations: calibration may take a while ...\n",
82
+ "Running compression pass linear_quantize_activations: calibrating sample 1/10 succeeds.\n",
83
+ "Running compression pass linear_quantize_activations: calibrating sample 2/10 succeeds.\n",
84
+ "Running compression pass linear_quantize_activations: calibrating sample 3/10 succeeds.\n",
85
+ "Running compression pass linear_quantize_activations: calibrating sample 4/10 succeeds.\n",
86
+ "Running compression pass linear_quantize_activations: calibrating sample 5/10 succeeds.\n",
87
+ "Running compression pass linear_quantize_activations: calibrating sample 6/10 succeeds.\n",
88
+ "Running compression pass linear_quantize_activations: calibrating sample 7/10 succeeds.\n",
89
+ "Running compression pass linear_quantize_activations: calibrating sample 8/10 succeeds.\n",
90
+ "Running compression pass linear_quantize_activations: calibrating sample 9/10 succeeds.\n",
91
+ "Running compression pass linear_quantize_activations: calibrating sample 10/10 succeeds.\n",
92
+ "Running MIL frontend_milinternal pipeline: 0 passes [00:00, ? passes/s]\n",
93
+ "Running MIL default pipeline: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 77/77 [00:01<00:00, 56.74 passes/s]\n",
94
+ "Running MIL backend_mlprogram pipeline: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 76.64 passes/s]\n"
95
+ ]
96
+ },
97
+ {
98
+ "name": "stdout",
99
+ "output_type": "stream",
100
+ "text": [
101
+ "OptimizationConfig LUT(LINEAR)\n",
102
+ "-------- LUT(W8) -------- \n",
103
+ "<class 'coremltools.optimize.coreml._quantization_passes.linear_quantize_weights'>\n"
104
+ ]
105
+ },
106
+ {
107
+ "name": "stderr",
108
+ "output_type": "stream",
109
+ "text": [
110
+ "Running compression pass linear_quantize_weights: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 67/67 [00:00<00:00, 107.97 ops/s]\n",
111
+ "Running MIL frontend_milinternal pipeline: 0 passes [00:00, ? passes/s]\n",
112
+ "Running MIL default pipeline: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 77/77 [00:00<00:00, 176.48 passes/s]\n",
113
+ "Running MIL backend_mlprogram pipeline: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 215.97 passes/s]\n"
114
+ ]
115
+ },
116
+ {
117
+ "name": "stdout",
118
+ "output_type": "stream",
119
+ "text": [
120
+ "<class 'coremltools.optimize.coreml._quantization_passes.palettize_weights'>\n"
121
+ ]
122
+ },
123
+ {
124
+ "name": "stderr",
125
+ "output_type": "stream",
126
+ "text": [
127
+ "Running compression pass palettize_weights: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 121/121 [00:00<00:00, 116588.74 ops/s]\n",
128
+ "Running MIL frontend_milinternal pipeline: 0 passes [00:00, ? passes/s]\n",
129
+ "Running MIL default pipeline: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 77/77 [00:00<00:00, 180.58 passes/s]\n",
130
+ "Running MIL backend_mlprogram pipeline: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 198.24 passes/s]\n"
131
+ ]
132
+ },
133
+ {
134
+ "name": "stdout",
135
+ "output_type": "stream",
136
+ "text": [
137
+ "-------- LUT4+W8 selected! ---------- \n",
138
+ "-------- Activation A8 quant! ---------- \n",
139
+ "<class 'coremltools.optimize.coreml.experimental._quantization_passes.insert_prefix_quantize_dequantize_pair'>\n"
140
+ ]
141
+ },
142
+ {
143
+ "name": "stderr",
144
+ "output_type": "stream",
145
+ "text": [
146
+ "Running activation compression pass insert_prefix_quantize_dequantize_pair: 100%|██████████████████████████████████████████████████████████████████████████████████| 522/522 [00:00<00:00, 6895.20 ops/s]\n",
147
+ "Running compression pass linear_quantize_activations: start calibrating 10 samples\n",
148
+ "Running compression pass linear_quantize_activations: calibration may take a while ...\n",
149
+ "Running compression pass linear_quantize_activations: calibrating sample 1/10 succeeds.\n",
150
+ "Running compression pass linear_quantize_activations: calibrating sample 2/10 succeeds.\n",
151
+ "Running compression pass linear_quantize_activations: calibrating sample 3/10 succeeds.\n",
152
+ "Running compression pass linear_quantize_activations: calibrating sample 4/10 succeeds.\n",
153
+ "Running compression pass linear_quantize_activations: calibrating sample 5/10 succeeds.\n",
154
+ "Running compression pass linear_quantize_activations: calibrating sample 6/10 succeeds.\n",
155
+ "Running compression pass linear_quantize_activations: calibrating sample 7/10 succeeds.\n",
156
+ "Running compression pass linear_quantize_activations: calibrating sample 8/10 succeeds.\n",
157
+ "Running compression pass linear_quantize_activations: calibrating sample 9/10 succeeds.\n",
158
+ "Running compression pass linear_quantize_activations: calibrating sample 10/10 succeeds.\n",
159
+ "Running MIL frontend_milinternal pipeline: 0 passes [00:00, ? passes/s]\n",
160
+ "Running MIL default pipeline: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 77/77 [00:01<00:00, 70.87 passes/s]\n",
161
+ "Running MIL backend_mlprogram pipeline: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 116.62 passes/s]\n"
162
+ ]
163
+ },
164
+ {
165
+ "name": "stdout",
166
+ "output_type": "stream",
167
+ "text": [
168
+ "rnfs-A8W8-LUT4-b1.mlpackage\n",
169
+ "Done!\n"
170
+ ]
171
+ }
172
+ ],
173
+ "source": [
174
+ "import torch\n",
175
+ "import torch.nn as nn\n",
176
+ "import torch.nn.functional as F\n",
177
+ "import torchvision.transforms as transforms\n",
178
+ "import coremltools as ct\n",
179
+ "import coremltools.optimize as cto\n",
180
+ "from PIL import Image\n",
181
+ "import numpy as np\n",
182
+ "import requests\n",
183
+ "import os\n",
184
+ "\n",
185
+ "\n",
186
+ "class BasicBlock(nn.Module):\n",
187
+ " expansion = 1\n",
188
+ "\n",
189
+ " def __init__(self, in_planes, planes, stride=1):\n",
190
+ " super(BasicBlock, self).__init__()\n",
191
+ " self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)\n",
192
+ " self.bn1 = nn.BatchNorm2d(planes)\n",
193
+ " self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)\n",
194
+ " self.bn2 = nn.BatchNorm2d(planes)\n",
195
+ "\n",
196
+ " self.shortcut = nn.Sequential()\n",
197
+ " if stride != 1 or in_planes != self.expansion*planes:\n",
198
+ " self.shortcut = nn.Sequential(\n",
199
+ " nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),\n",
200
+ " nn.BatchNorm2d(self.expansion*planes)\n",
201
+ " )\n",
202
+ "\n",
203
+ " def forward(self, x):\n",
204
+ " out = F.relu(self.bn1(self.conv1(x)))\n",
205
+ " out = self.bn2(self.conv2(out))\n",
206
+ " out += self.shortcut(x)\n",
207
+ " out = F.relu(out)\n",
208
+ " return out\n",
209
+ "\n",
210
+ "class Bottleneck(nn.Module):\n",
211
+ " expansion = 4\n",
212
+ "\n",
213
+ " def __init__(self, in_planes, planes, stride=1):\n",
214
+ " super(Bottleneck, self).__init__()\n",
215
+ " self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)\n",
216
+ " self.bn1 = nn.BatchNorm2d(planes)\n",
217
+ " self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)\n",
218
+ " self.bn2 = nn.BatchNorm2d(planes)\n",
219
+ " self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)\n",
220
+ " self.bn3 = nn.BatchNorm2d(self.expansion*planes)\n",
221
+ "\n",
222
+ " self.shortcut = nn.Sequential()\n",
223
+ " if stride != 1 or in_planes != self.expansion*planes:\n",
224
+ " self.shortcut = nn.Sequential(\n",
225
+ " nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),\n",
226
+ " nn.BatchNorm2d(self.expansion*planes)\n",
227
+ " )\n",
228
+ "\n",
229
+ " def forward(self, x):\n",
230
+ " out = F.relu(self.bn1(self.conv1(x)))\n",
231
+ " out = F.relu(self.bn2(self.conv2(out)))\n",
232
+ " out = self.bn3(self.conv3(out))\n",
233
+ " out += self.shortcut(x)\n",
234
+ " out = F.relu(out)\n",
235
+ " return out\n",
236
+ "\n",
237
+ "class ResNet(nn.Module):\n",
238
+ " def __init__(self, block, num_blocks, num_classes=1000):\n",
239
+ " super(ResNet, self).__init__()\n",
240
+ " self.in_planes = 64\n",
241
+ "\n",
242
+ " self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)\n",
243
+ " self.bn1 = nn.BatchNorm2d(64)\n",
244
+ " self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n",
245
+ " self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)\n",
246
+ " self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)\n",
247
+ " self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)\n",
248
+ " self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)\n",
249
+ " self.avgpool = nn.AdaptiveAvgPool2d((1, 1))\n",
250
+ " self.fc = nn.Linear(512*block.expansion, num_classes)\n",
251
+ "\n",
252
+ " def _make_layer(self, block, planes, num_blocks, stride):\n",
253
+ " strides = [stride] + [1]*(num_blocks-1)\n",
254
+ " layers = []\n",
255
+ " for stride in strides:\n",
256
+ " layers.append(block(self.in_planes, planes, stride))\n",
257
+ " self.in_planes = planes * block.expansion\n",
258
+ " return nn.Sequential(*layers)\n",
259
+ "\n",
260
+ " def forward(self, x):\n",
261
+ " x = F.relu(self.bn1(self.conv1(x)))\n",
262
+ " x = self.maxpool(x)\n",
263
+ " x = self.layer1(x)\n",
264
+ " x = self.layer2(x)\n",
265
+ " x = self.layer3(x)\n",
266
+ " x = self.layer4(x)\n",
267
+ " x = self.avgpool(x)\n",
268
+ " x = torch.flatten(x, 1)\n",
269
+ " x = self.fc(x)\n",
270
+ " return x\n",
271
+ "\n",
272
+ "def ResNet50():\n",
273
+ " return ResNet(Bottleneck, [3, 4, 6, 3])\n",
274
+ "\n",
275
+ "# Initialize the model\n",
276
+ "model = ResNet50()\n",
277
+ "model.eval() # Switch to inference mode\n",
278
+ "\n",
279
+ "# Custom batch size and image size\n",
280
+ "batch_size = 1\n",
281
+ "image_size = 224 #1024 #224 # You can change this value to any desired input size\n",
282
+ "\n",
283
+ "# Example input tensor with custom batch size and image size\n",
284
+ "input_tensor = torch.randn(batch_size, 3, image_size, image_size)\n",
285
+ "\n",
286
+ "# Perform forward pass and trace the model\n",
287
+ "traced_model = torch.jit.trace(model, input_tensor)\n",
288
+ "#print(output)\n",
289
+ "\n",
290
+ "# Exporting for iOS18\n",
291
+ "coreml_model_iOS18 = ct.convert(\n",
292
+ " traced_model,\n",
293
+ " inputs=[ct.TensorType(name=\"input\", shape=input_tensor.shape, dtype=np.float16)],\n",
294
+ " #classifier_config=ct.ClassifierConfig(class_labels=class_labels),\n",
295
+ " minimum_deployment_target=ct.target.iOS18\n",
296
+ ")\n",
297
+ "a = f\"resnet-from-scratch-b{batch_size}.mlpackage\"\n",
298
+ "coreml_model_iOS18.save(a)\n",
299
+ "\n",
300
+ "# -------------------- quantization LUT only ----------------------------\n",
301
+ "print(\"OptimizationConfig LUT\")\n",
302
+ "\n",
303
+ "config = cto.coreml.OptimizationConfig(\n",
304
+ " global_config=cto.coreml.OpPalettizerConfig(mode=\"uniform\", nbits=4)\n",
305
+ ")\n",
306
+ "compressed_model = cto.coreml.palettize_weights(coreml_model_iOS18, config)\n",
307
+ "a = f\"rnfs-4bit-b{batch_size}.mlpackage\"\n",
308
+ "compressed_model.save(a)\n",
309
+ "\n",
310
+ "\n",
311
+ "# -------------------- OptimizationConfig LINEAR ----------------------------\n",
312
+ "print(\"OptimizationConfig LINEAR\")\n",
313
+ "\n",
314
+ "dt = ct.converters.mil.mil.types.int4 \n",
315
+ "print(\"-------- (W4) -------- \")\n",
316
+ "\n",
317
+ "weight_config = cto.coreml.OptimizationConfig(\n",
318
+ " global_config=cto.coreml.OpLinearQuantizerConfig(\n",
319
+ " mode=\"linear_symmetric\", dtype=dt\n",
320
+ " )\n",
321
+ ")\n",
322
+ "\n",
323
+ "compressed_model2 = cto.coreml.linear_quantize_weights(coreml_model_iOS18, weight_config) \n",
324
+ "print(\"-------- W8 selected! ---------- \")\n",
325
+ "\n",
326
+ "activation_config = cto.coreml.OptimizationConfig(\n",
327
+ " global_config=cto.coreml.experimental.OpActivationLinearQuantizerConfig(\n",
328
+ " mode=\"linear_symmetric\"\n",
329
+ " )\n",
330
+ ")\n",
331
+ "print(\"-------- Activation A8 quant! ---------- \")\n",
332
+ "compressed_model_a8 = cto.coreml.experimental.linear_quantize_activations(\n",
333
+ " compressed_model2, \n",
334
+ " activation_config, [{\"input\": torch.randn_like(input_tensor)+i} for i in range(10)]\n",
335
+ ")\n",
336
+ "a = f\"rnfs-A4W8-b{batch_size}.mlpackage\"\n",
337
+ "compressed_model_a8.save(a)\n",
338
+ "\n",
339
+ "\n",
340
+ "# -------------------- OptimizationConfig LUT(LINEAR)\" ----------------------------\n",
341
+ "print(\"OptimizationConfig LUT(LINEAR)\")\n",
342
+ "\n",
343
+ "dt = ct.converters.mil.mil.types.int8 # lut is 4 bit already\n",
344
+ "print(\"-------- LUT(W8) -------- \")\n",
345
+ "weight_config = cto.coreml.OptimizationConfig(\n",
346
+ " global_config=cto.coreml.OpLinearQuantizerConfig(\n",
347
+ " mode=\"linear_symmetric\", dtype=dt\n",
348
+ " )\n",
349
+ ")\n",
350
+ "\n",
351
+ "compressed_model1 = cto.coreml.linear_quantize_weights(coreml_model_iOS18, weight_config) \n",
352
+ "compressed_model2 = cto.coreml.palettize_weights(compressed_model1, config, joint_compression=True)\n",
353
+ "print(\"-------- LUT4+W8 selected! ---------- \")\n",
354
+ "\n",
355
+ "activation_config = cto.coreml.OptimizationConfig(\n",
356
+ " global_config=cto.coreml.experimental.OpActivationLinearQuantizerConfig(\n",
357
+ " mode=\"linear_symmetric\"\n",
358
+ " )\n",
359
+ ")\n",
360
+ "print(\"-------- Activation A8 quant! ---------- \")\n",
361
+ "compressed_model_a8 = cto.coreml.experimental.linear_quantize_activations(\n",
362
+ " compressed_model2, \n",
363
+ " activation_config, [{\"input\": torch.randn_like(input_tensor)+i} for i in range(10)]\n",
364
+ ")\n",
365
+ "\n",
366
+ "a = f\"rnfs-A8W8-LUT4-b{batch_size}.mlpackage\"\n",
367
+ "compressed_model.save(a)\n",
368
+ "\n",
369
+ "print(a)\n",
370
+ "print(\"Done!\")\n"
371
+ ]
372
+ },
373
+ {
374
+ "cell_type": "code",
375
+ "execution_count": null,
376
+ "id": "6e7808a0-7228-4964-9fa7-6a703a34d6dc",
377
+ "metadata": {},
378
+ "outputs": [],
379
+ "source": []
380
+ }
381
+ ],
382
+ "metadata": {
383
+ "kernelspec": {
384
+ "display_name": "Python 3 (ipykernel)",
385
+ "language": "python",
386
+ "name": "python3"
387
+ },
388
+ "language_info": {
389
+ "codemirror_mode": {
390
+ "name": "ipython",
391
+ "version": 3
392
+ },
393
+ "file_extension": ".py",
394
+ "mimetype": "text/x-python",
395
+ "name": "python",
396
+ "nbconvert_exporter": "python",
397
+ "pygments_lexer": "ipython3",
398
+ "version": "3.10.14"
399
+ }
400
+ },
401
+ "nbformat": 4,
402
+ "nbformat_minor": 5
403
+ }