Delete Coding Excercise.ipynb
Browse files- Coding Excercise.ipynb +0 -266
Coding Excercise.ipynb
DELETED
@@ -1,266 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": null,
|
6 |
-
"id": "37aa8c7a",
|
7 |
-
"metadata": {
|
8 |
-
"ExecuteTime": {
|
9 |
-
"end_time": "2023-03-26T07:31:34.213141Z",
|
10 |
-
"start_time": "2023-03-26T07:31:14.082603Z"
|
11 |
-
},
|
12 |
-
"scrolled": true
|
13 |
-
},
|
14 |
-
"outputs": [],
|
15 |
-
"source": [
|
16 |
-
"!pip install huggingface_hub\n",
|
17 |
-
"!pip install datasets\n",
|
18 |
-
"!pip install keras"
|
19 |
-
]
|
20 |
-
},
|
21 |
-
{
|
22 |
-
"cell_type": "code",
|
23 |
-
"execution_count": 1,
|
24 |
-
"id": "59bed9bc",
|
25 |
-
"metadata": {
|
26 |
-
"ExecuteTime": {
|
27 |
-
"end_time": "2023-03-26T14:03:42.287776Z",
|
28 |
-
"start_time": "2023-03-26T14:03:39.989670Z"
|
29 |
-
}
|
30 |
-
},
|
31 |
-
"outputs": [],
|
32 |
-
"source": [
|
33 |
-
"from huggingface_hub import notebook_login\n",
|
34 |
-
"from datasets import load_dataset\n",
|
35 |
-
"import pandas as pd\n",
|
36 |
-
"from datasets import load_dataset\n",
|
37 |
-
"import tensorflow as tf\n",
|
38 |
-
"from tensorflow.keras.applications.vgg16 import VGG16\n",
|
39 |
-
"from tensorflow.keras.models import Model\n",
|
40 |
-
"from tensorflow.keras.layers import Dense, GlobalAveragePooling2D\n",
|
41 |
-
"from tensorflow.keras.optimizers import Adam\n",
|
42 |
-
"from tensorflow.keras.utils import to_categorical\n",
|
43 |
-
"from PIL import Image\n",
|
44 |
-
"import numpy as np"
|
45 |
-
]
|
46 |
-
},
|
47 |
-
{
|
48 |
-
"cell_type": "code",
|
49 |
-
"execution_count": 2,
|
50 |
-
"id": "57bfa96e",
|
51 |
-
"metadata": {
|
52 |
-
"ExecuteTime": {
|
53 |
-
"end_time": "2023-03-26T14:03:42.317000Z",
|
54 |
-
"start_time": "2023-03-26T14:03:42.289947Z"
|
55 |
-
}
|
56 |
-
},
|
57 |
-
"outputs": [
|
58 |
-
{
|
59 |
-
"name": "stdout",
|
60 |
-
"output_type": "stream",
|
61 |
-
"text": [
|
62 |
-
"Login successful\n",
|
63 |
-
"Your token has been saved to /root/.huggingface/token\n",
|
64 |
-
"\u001b[1m\u001b[31mAuthenticated through git-credential store but this isn't the helper defined on your machine.\n",
|
65 |
-
"You might have to re-authenticate when pushing to the Hugging Face Hub. Run the following command in your terminal in case you want to set this credential helper as the default\n",
|
66 |
-
"\n",
|
67 |
-
"git config --global credential.helper store\u001b[0m\n"
|
68 |
-
]
|
69 |
-
}
|
70 |
-
],
|
71 |
-
"source": [
|
72 |
-
"notebook_login()"
|
73 |
-
]
|
74 |
-
},
|
75 |
-
{
|
76 |
-
"cell_type": "code",
|
77 |
-
"execution_count": 3,
|
78 |
-
"id": "e2407eb7",
|
79 |
-
"metadata": {
|
80 |
-
"ExecuteTime": {
|
81 |
-
"end_time": "2023-03-26T14:03:52.591875Z",
|
82 |
-
"start_time": "2023-03-26T14:03:48.476822Z"
|
83 |
-
}
|
84 |
-
},
|
85 |
-
"outputs": [
|
86 |
-
{
|
87 |
-
"name": "stderr",
|
88 |
-
"output_type": "stream",
|
89 |
-
"text": [
|
90 |
-
"Found cached dataset parquet (/root/.cache/huggingface/datasets/competitions___parquet/competitions--aiornot-f559c639a48093b2/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n"
|
91 |
-
]
|
92 |
-
}
|
93 |
-
],
|
94 |
-
"source": [
|
95 |
-
"# load dataset from hugging face\n",
|
96 |
-
"# prepare data for training, validation and testing\n",
|
97 |
-
"train_ds, val_ds = load_dataset('competitions/aiornot', split=\"train\").train_test_split(test_size=0.15).values()"
|
98 |
-
]
|
99 |
-
},
|
100 |
-
{
|
101 |
-
"cell_type": "code",
|
102 |
-
"execution_count": 4,
|
103 |
-
"id": "e58c6fe1",
|
104 |
-
"metadata": {
|
105 |
-
"ExecuteTime": {
|
106 |
-
"end_time": "2023-03-26T14:04:10.210069Z",
|
107 |
-
"start_time": "2023-03-26T14:03:53.833533Z"
|
108 |
-
}
|
109 |
-
},
|
110 |
-
"outputs": [],
|
111 |
-
"source": [
|
112 |
-
"data_sz = 1000\n",
|
113 |
-
"X_train = train_ds[:data_sz]['image']\n",
|
114 |
-
"X_val = val_ds[:data_sz]['image']\n",
|
115 |
-
"Y_train = to_categorical(train_ds[:data_sz]['label'])\n",
|
116 |
-
"Y_val = to_categorical(val_ds[:data_sz]['label'])\n",
|
117 |
-
"# Convert the input data to a NumPy array\n",
|
118 |
-
"X_train = np.stack([np.array(image) for image in X_train])\n",
|
119 |
-
"X_val = np.stack([np.array(image) for image in X_val])"
|
120 |
-
]
|
121 |
-
},
|
122 |
-
{
|
123 |
-
"cell_type": "code",
|
124 |
-
"execution_count": null,
|
125 |
-
"id": "ad809dae",
|
126 |
-
"metadata": {
|
127 |
-
"ExecuteTime": {
|
128 |
-
"start_time": "2023-03-26T14:04:33.658Z"
|
129 |
-
}
|
130 |
-
},
|
131 |
-
"outputs": [
|
132 |
-
{
|
133 |
-
"name": "stdout",
|
134 |
-
"output_type": "stream",
|
135 |
-
"text": [
|
136 |
-
"Epoch 1/10\n",
|
137 |
-
"22/32 [===================>..........] - ETA: 27s - loss: 2.8218 - accuracy: 0.6630"
|
138 |
-
]
|
139 |
-
}
|
140 |
-
],
|
141 |
-
"source": [
|
142 |
-
"with tf.device('/device:GPU:3'):\n",
|
143 |
-
" # Load the VGG16 model pre-trained on ImageNet\n",
|
144 |
-
" base_model = VGG16(weights='imagenet', include_top=False)\n",
|
145 |
-
"\n",
|
146 |
-
" # Add a global spatial average pooling layer\n",
|
147 |
-
" x = base_model.output\n",
|
148 |
-
" x = GlobalAveragePooling2D()(x)\n",
|
149 |
-
"\n",
|
150 |
-
" # Add a fully-connected layer\n",
|
151 |
-
" x = Dense(1024, activation='relu')(x)\n",
|
152 |
-
"\n",
|
153 |
-
" # Add a logistic layer with the number of classes of target variable\n",
|
154 |
-
" num_classes = 2\n",
|
155 |
-
" predictions = Dense(num_classes, activation='softmax')(x)\n",
|
156 |
-
"\n",
|
157 |
-
" # Create the final model\n",
|
158 |
-
" model = Model(inputs=base_model.input, outputs=predictions)\n",
|
159 |
-
"\n",
|
160 |
-
" # Freeze all layers in the base VGG16 model\n",
|
161 |
-
" for layer in base_model.layers:\n",
|
162 |
-
" layer.trainable = False\n",
|
163 |
-
"\n",
|
164 |
-
" # Compile the model\n",
|
165 |
-
" model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])\n",
|
166 |
-
"\n",
|
167 |
-
"# Train the model on your new dataset\n",
|
168 |
-
"model.fit(X_train, Y_train, epochs=10, validation_data=(X_val, Y_val))"
|
169 |
-
]
|
170 |
-
},
|
171 |
-
{
|
172 |
-
"cell_type": "code",
|
173 |
-
"execution_count": null,
|
174 |
-
"id": "4a0a91c9",
|
175 |
-
"metadata": {
|
176 |
-
"ExecuteTime": {
|
177 |
-
"start_time": "2023-03-26T14:05:03.786Z"
|
178 |
-
}
|
179 |
-
},
|
180 |
-
"outputs": [],
|
181 |
-
"source": [
|
182 |
-
"# Generate predictions for the data\n",
|
183 |
-
"y_pred = model.predict(X_val)\n",
|
184 |
-
"# Convert predictions and true labels to class indices\n",
|
185 |
-
"y_pred_classes = y_pred.argmax(axis=1)\n",
|
186 |
-
"y_true_classes = Y_val.argmax(axis=1)\n",
|
187 |
-
"# Find the indices of the misclassified samples\n",
|
188 |
-
"misclassified_indices = np.where(y_pred_classes != y_true_classes)[0]\n",
|
189 |
-
"\n",
|
190 |
-
"# Get the misclassified samples\n",
|
191 |
-
"# x_misclassified = X_val[misclassified_indices]\n",
|
192 |
-
"# y_misclassified_true = Y_val[misclassified_indices]\n",
|
193 |
-
"# y_misclassified_pred = y_pred[misclassified_indices]"
|
194 |
-
]
|
195 |
-
},
|
196 |
-
{
|
197 |
-
"cell_type": "code",
|
198 |
-
"execution_count": null,
|
199 |
-
"id": "254d9907",
|
200 |
-
"metadata": {
|
201 |
-
"ExecuteTime": {
|
202 |
-
"start_time": "2023-03-26T14:05:06.090Z"
|
203 |
-
}
|
204 |
-
},
|
205 |
-
"outputs": [],
|
206 |
-
"source": [
|
207 |
-
"# a helper function to view missclassfied data with the image and prediction\n",
|
208 |
-
"def checkMiss(idx):\n",
|
209 |
-
" print(\"\\ncorrect:\", Y_val[idx])\n",
|
210 |
-
" print(\"miss:\", y_pred[idx])\n",
|
211 |
-
" img = Image.fromarray(X_val[idx])\n",
|
212 |
-
" img.show()"
|
213 |
-
]
|
214 |
-
},
|
215 |
-
{
|
216 |
-
"cell_type": "code",
|
217 |
-
"execution_count": null,
|
218 |
-
"id": "60bbb1b9",
|
219 |
-
"metadata": {
|
220 |
-
"ExecuteTime": {
|
221 |
-
"start_time": "2023-03-26T14:05:07.650Z"
|
222 |
-
}
|
223 |
-
},
|
224 |
-
"outputs": [],
|
225 |
-
"source": [
|
226 |
-
"# view 5 miss classified data to see what could be improved\n",
|
227 |
-
"for i in range(10):\n",
|
228 |
-
" checkMiss(misclassified_indices[i])"
|
229 |
-
]
|
230 |
-
}
|
231 |
-
],
|
232 |
-
"metadata": {
|
233 |
-
"kernelspec": {
|
234 |
-
"display_name": "Python 3",
|
235 |
-
"language": "python",
|
236 |
-
"name": "python3"
|
237 |
-
},
|
238 |
-
"language_info": {
|
239 |
-
"codemirror_mode": {
|
240 |
-
"name": "ipython",
|
241 |
-
"version": 3
|
242 |
-
},
|
243 |
-
"file_extension": ".py",
|
244 |
-
"mimetype": "text/x-python",
|
245 |
-
"name": "python",
|
246 |
-
"nbconvert_exporter": "python",
|
247 |
-
"pygments_lexer": "ipython3",
|
248 |
-
"version": "3.8.10"
|
249 |
-
},
|
250 |
-
"toc": {
|
251 |
-
"base_numbering": 1,
|
252 |
-
"nav_menu": {},
|
253 |
-
"number_sections": false,
|
254 |
-
"sideBar": true,
|
255 |
-
"skip_h1_title": false,
|
256 |
-
"title_cell": "Table of Contents",
|
257 |
-
"title_sidebar": "Contents",
|
258 |
-
"toc_cell": false,
|
259 |
-
"toc_position": {},
|
260 |
-
"toc_section_display": true,
|
261 |
-
"toc_window_display": false
|
262 |
-
}
|
263 |
-
},
|
264 |
-
"nbformat": 4,
|
265 |
-
"nbformat_minor": 5
|
266 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|