{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Introduction to Machine Learning\n", "\n", "This notebook is an example of a CNN for recognizing handwritten characters.\n", "\n", "Most of this code is from https://keras.io/examples/vision/mnist_convnet/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import tensorflow as tf\n", "from tensorflow import keras\n", "from tensorflow.keras import layers\n", "\n", "# Hide GPU from visible devices\n", "tf.config.set_visible_devices([], 'GPU')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Prepare the data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "x_train shape: (60000, 28, 28, 1)\n", "60000 train samples\n", "10000 test samples\n" ] } ], "source": [ "# Model / data parameters\n", "num_classes = 10\n", "input_shape = (28, 28, 1)\n", "\n", "# Load the data and split it between train and test sets\n", "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", "\n", "# Scale images to the [0, 1] range\n", "x_train = x_train.astype(\"float32\") / 255\n", "x_test = x_test.astype(\"float32\") / 255\n", "\n", "# Make sure images have shape (28, 28, 1)\n", "x_train = np.expand_dims(x_train, -1)\n", "x_test = np.expand_dims(x_test, -1)\n", "print(\"x_train shape:\", x_train.shape)\n", "print(x_train.shape[0], \"train samples\")\n", "print(x_test.shape[0], \"test samples\")\n", "\n", "\n", "# convert class vectors to binary class matrices\n", "y_train = keras.utils.to_categorical(y_train, num_classes)\n", "y_test = keras.utils.to_categorical(y_test, num_classes)\n", "# [1, 2, 3, 4] -> [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Build the Model" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"sequential\"\n", "_________________________________________________________________\n", " Layer (type) Output Shape Param # \n", "=================================================================\n", " conv2d (Conv2D) (None, 26, 26, 32) 320 \n", " \n", " max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0 \n", " ) \n", " \n", " conv2d_1 (Conv2D) (None, 11, 11, 64) 18496 \n", " \n", " max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0 \n", " 2D) \n", " \n", " flatten (Flatten) (None, 1600) 0 \n", " \n", " dropout (Dropout) (None, 1600) 0 \n", " \n", " dense (Dense) (None, 10) 16010 \n", " \n", "=================================================================\n", "Total params: 34,826\n", "Trainable params: 34,826\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" ] } ], "source": [ "model = keras.Sequential(\n", " [\n", " keras.Input(shape=input_shape),\n", " layers.Conv2D(32, kernel_size=(3, 3), activation=\"relu\"),\n", " layers.MaxPooling2D(pool_size=(2, 2)),\n", " layers.Conv2D(64, kernel_size=(3, 3), activation=\"relu\"),\n", " layers.MaxPooling2D(pool_size=(2, 2)),\n", " layers.Flatten(),\n", " layers.Dropout(0.5),\n", " layers.Dense(num_classes, activation=\"softmax\"),\n", " ]\n", ")\n", "\n", "model.summary()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Train the Model" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "batch_size = 128\n", "epochs = 15\n", "\n", "model.compile(loss=\"categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/15\n", "422/422 [==============================] - 9s 20ms/step - loss: 0.3573 - accuracy: 0.8919 - val_loss: 0.0857 - val_accuracy: 0.9777\n", "Epoch 2/15\n", "422/422 [==============================] - 8s 19ms/step - loss: 0.1184 - accuracy: 0.9636 - val_loss: 0.0608 - val_accuracy: 0.9825\n", "Epoch 3/15\n", "422/422 [==============================] - 8s 19ms/step - loss: 0.0862 - accuracy: 0.9733 - val_loss: 0.0496 - val_accuracy: 0.9868\n", "Epoch 4/15\n", "422/422 [==============================] - 8s 20ms/step - loss: 0.0724 - accuracy: 0.9778 - val_loss: 0.0424 - val_accuracy: 0.9883\n", "Epoch 5/15\n", "422/422 [==============================] - 8s 19ms/step - loss: 0.0656 - accuracy: 0.9793 - val_loss: 0.0398 - val_accuracy: 0.9895\n", "Epoch 6/15\n", "422/422 [==============================] - 8s 20ms/step - loss: 0.0591 - accuracy: 0.9816 - val_loss: 0.0361 - val_accuracy: 0.9912\n", "Epoch 7/15\n", "422/422 [==============================] - 8s 20ms/step - loss: 0.0522 - accuracy: 0.9833 - val_loss: 0.0315 - val_accuracy: 0.9922\n", "Epoch 8/15\n", "422/422 [==============================] - 8s 20ms/step - loss: 0.0485 - accuracy: 0.9846 - val_loss: 0.0319 - val_accuracy: 0.9910\n", "Epoch 9/15\n", "422/422 [==============================] - 9s 20ms/step - loss: 0.0447 - accuracy: 0.9858 - val_loss: 0.0331 - val_accuracy: 0.9917\n", "Epoch 10/15\n", "422/422 [==============================] - 9s 21ms/step - loss: 0.0416 - accuracy: 0.9871 - val_loss: 0.0309 - val_accuracy: 0.9922\n", "Epoch 11/15\n", "422/422 [==============================] - 8s 20ms/step - loss: 0.0397 - accuracy: 0.9877 - val_loss: 0.0281 - val_accuracy: 0.9932\n", "Epoch 12/15\n", "422/422 [==============================] - 9s 20ms/step - loss: 0.0393 - accuracy: 0.9874 - val_loss: 0.0308 - val_accuracy: 0.9908\n", "Epoch 13/15\n", "422/422 [==============================] - 8s 20ms/step - loss: 0.0373 - accuracy: 0.9882 - val_loss: 0.0276 - val_accuracy: 0.9928\n", "Epoch 14/15\n", "422/422 [==============================] - 8s 19ms/step - loss: 0.0357 - accuracy: 0.9879 - val_loss: 0.0265 - val_accuracy: 0.9935\n", "Epoch 15/15\n", "422/422 [==============================] - 8s 19ms/step - loss: 0.0334 - accuracy: 0.9886 - val_loss: 0.0298 - val_accuracy: 0.9927\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# This line can be run multiple times, but keep in mind that the model will probably be over fitting\n", "\n", "model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Evaluate the Trained Model" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Test loss: 0.02596166729927063\n", "Test accuracy: 0.9919000267982483\n" ] } ], "source": [ "score = model.evaluate(x_test, y_test, verbose=0)\n", "print(\"Test loss:\", score[0])\n", "print(\"Test accuracy:\", score[1])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Save Model (h5 format)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "model.save(\"mnist.h5\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "ad2bdc8ecc057115af97d19610ffacc2b4e99fae6737bb82f5d7fb13d2f2c186" } } }, "nbformat": 4, "nbformat_minor": 2 }