{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "LnPbntVRnfvV" }, "source": [ "Importing the Dependencies" ] }, { "cell_type": "code", "metadata": { "id": "-71UtHzNVWjB" }, "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn import svm\n", "from sklearn.metrics import accuracy_score" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "bmfOfG8joBBy" }, "source": [ "Data Collection and Analysis\n", "\n", "PIMA Diabetes Dataset" ] }, { "cell_type": "code", "metadata": { "id": "Xpw6Mj_pn_TL" }, "source": [ "# loading the diabetes dataset to a pandas DataFrame\n", "diabetes_dataset = pd.read_csv('/content/diabetes.csv')" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "-tjO09ncovoh", "outputId": "4dd3939d-9cc2-4f80-cf6d-88dd95d02c2e" }, "source": [ "# printing the first 5 rows of the dataset\n", "diabetes_dataset.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n", "0 6 148 72 35 0 33.6 \n", "1 1 85 66 29 0 26.6 \n", "2 8 183 64 0 0 23.3 \n", "3 1 89 66 23 94 28.1 \n", "4 0 137 40 35 168 43.1 \n", "\n", " DiabetesPedigreeFunction Age Outcome \n", "0 0.627 50 1 \n", "1 0.351 31 0 \n", "2 0.672 32 1 \n", "3 0.167 21 0 \n", "4 2.288 33 1 " ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
061487235033.60.627501
11856629026.60.351310
28183640023.30.672321
318966239428.10.167210
40137403516843.12.288331
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 3 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lynParo6pEMB", "outputId": "8d134bf4-ed17-4ee5-9cbe-48d88cdd4495" }, "source": [ "# number of rows and Columns in this dataset\n", "diabetes_dataset.shape" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(768, 9)" ] }, "metadata": {}, "execution_count": 4 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 364 }, "id": "3NDJOlrEpmoL", "outputId": "7a404a6f-955b-4c04-fbe4-8634869eaf8f" }, "source": [ "# getting the statistical measures of the data\n", "diabetes_dataset.describe()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Pregnancies Glucose BloodPressure SkinThickness Insulin \\\n", "count 768.000000 768.000000 768.000000 768.000000 768.000000 \n", "mean 3.845052 120.894531 69.105469 20.536458 79.799479 \n", "std 3.369578 31.972618 19.355807 15.952218 115.244002 \n", "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "25% 1.000000 99.000000 62.000000 0.000000 0.000000 \n", "50% 3.000000 117.000000 72.000000 23.000000 30.500000 \n", "75% 6.000000 140.250000 80.000000 32.000000 127.250000 \n", "max 17.000000 199.000000 122.000000 99.000000 846.000000 \n", "\n", " BMI DiabetesPedigreeFunction Age Outcome \n", "count 768.000000 768.000000 768.000000 768.000000 \n", "mean 31.992578 0.471876 33.240885 0.348958 \n", "std 7.884160 0.331329 11.760232 0.476951 \n", "min 0.000000 0.078000 21.000000 0.000000 \n", "25% 27.300000 0.243750 24.000000 0.000000 \n", "50% 32.000000 0.372500 29.000000 0.000000 \n", "75% 36.600000 0.626250 41.000000 1.000000 \n", "max 67.100000 2.420000 81.000000 1.000000 " ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
count768.000000768.000000768.000000768.000000768.000000768.000000768.000000768.000000768.000000
mean3.845052120.89453169.10546920.53645879.79947931.9925780.47187633.2408850.348958
std3.36957831.97261819.35580715.952218115.2440027.8841600.33132911.7602320.476951
min0.0000000.0000000.0000000.0000000.0000000.0000000.07800021.0000000.000000
25%1.00000099.00000062.0000000.0000000.00000027.3000000.24375024.0000000.000000
50%3.000000117.00000072.00000023.00000030.50000032.0000000.37250029.0000000.000000
75%6.000000140.25000080.00000032.000000127.25000036.6000000.62625041.0000001.000000
max17.000000199.000000122.00000099.000000846.00000067.1000002.42000081.0000001.000000
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 5 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "LrpHzaGpp5dQ", "outputId": "ccc5292b-de17-4fd0-e7a9-ecd379a08404" }, "source": [ "diabetes_dataset['Outcome'].value_counts()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 500\n", "1 268\n", "Name: Outcome, dtype: int64" ] }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "markdown", "metadata": { "id": "cB1qRaNcqeh5" }, "source": [ "0 --> Non-Diabetic\n", "\n", "1 --> Diabetic" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 187 }, "id": "I6MWR0k_qSCK", "outputId": "2ac9729d-c4ce-4866-d886-5a2da81ba7dd" }, "source": [ "diabetes_dataset.groupby('Outcome').mean()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Pregnancies Glucose BloodPressure SkinThickness Insulin \\\n", "Outcome \n", "0 3.298000 109.980000 68.184000 19.664000 68.792000 \n", "1 4.865672 141.257463 70.824627 22.164179 100.335821 \n", "\n", " BMI DiabetesPedigreeFunction Age \n", "Outcome \n", "0 30.304200 0.429734 31.190000 \n", "1 35.142537 0.550500 37.067164 " ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAge
Outcome
03.298000109.98000068.18400019.66400068.79200030.3042000.42973431.190000
14.865672141.25746370.82462722.164179100.33582135.1425370.55050037.067164
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "code", "metadata": { "id": "RoDW7l9mqqHZ" }, "source": [ "# separating the data and labels\n", "X = diabetes_dataset.drop(columns = 'Outcome', axis=1)\n", "Y = diabetes_dataset['Outcome']" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3eiRW9M9raMm", "outputId": "a4dbd160-65e3-4f7f-f65e-e089695ad3b9" }, "source": [ "print(X)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n", "0 6 148 72 35 0 33.6 \n", "1 1 85 66 29 0 26.6 \n", "2 8 183 64 0 0 23.3 \n", "3 1 89 66 23 94 28.1 \n", "4 0 137 40 35 168 43.1 \n", ".. ... ... ... ... ... ... \n", "763 10 101 76 48 180 32.9 \n", "764 2 122 70 27 0 36.8 \n", "765 5 121 72 23 112 26.2 \n", "766 1 126 60 0 0 30.1 \n", "767 1 93 70 31 0 30.4 \n", "\n", " DiabetesPedigreeFunction Age \n", "0 0.627 50 \n", "1 0.351 31 \n", "2 0.672 32 \n", "3 0.167 21 \n", "4 2.288 33 \n", ".. ... ... \n", "763 0.171 63 \n", "764 0.340 27 \n", "765 0.245 30 \n", "766 0.349 47 \n", "767 0.315 23 \n", "\n", "[768 rows x 8 columns]\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AoxgTJAMrcCl", "outputId": "a76a9089-12b4-4319-da60-0bfc7c638ad0" }, "source": [ "print(Y)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0 1\n", "1 0\n", "2 1\n", "3 0\n", "4 1\n", " ..\n", "763 0\n", "764 0\n", "765 0\n", "766 1\n", "767 0\n", "Name: Outcome, Length: 768, dtype: int64\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "gHciEFkxsoQP" }, "source": [ "Train Test Split" ] }, { "cell_type": "code", "metadata": { "id": "AEfKGj_yslvD" }, "source": [ "X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "DR05T-o0t3FQ", "outputId": "24f5b16d-a500-49ca-de75-6503b41528d5" }, "source": [ "print(X.shape, X_train.shape, X_test.shape)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "(768, 8) (614, 8) (154, 8)\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "ElJ3tkOtuC_n" }, "source": [ "Training the Model" ] }, { "cell_type": "code", "metadata": { "id": "5szLWHlNt9xc" }, "source": [ "classifier = svm.SVC(kernel='linear')" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 75 }, "id": "ncJWY_7suPAb", "outputId": "e6e9a274-acb9-4d42-f0e0-f5c37e378f8a" }, "source": [ "#training the support vector Machine Classifier\n", "classifier.fit(X_train, Y_train)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "SVC(kernel='linear')" ], "text/html": [ "
SVC(kernel='linear')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ] }, "metadata": {}, "execution_count": 14 } ] }, { "cell_type": "markdown", "metadata": { "id": "UV4-CAfquiyP" }, "source": [ "Model Evaluation" ] }, { "cell_type": "markdown", "metadata": { "id": "yhAjGPJWunXa" }, "source": [ "Accuracy Score" ] }, { "cell_type": "code", "metadata": { "id": "fJLEPQK7ueXp" }, "source": [ "# accuracy score on the training data\n", "X_train_prediction = classifier.predict(X_train)\n", "training_data_accuracy = accuracy_score(X_train_prediction, Y_train)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "mmJ22qhVvNwj", "outputId": "7540f8ca-5527-4612-d5cd-8746d711220e" }, "source": [ "print('Accuracy score of the training data : ', training_data_accuracy)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Accuracy score of the training data : 0.7833876221498371\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "G2CICFMEvcCl" }, "source": [ "# accuracy score on the test data\n", "X_test_prediction = classifier.predict(X_test)\n", "test_data_accuracy = accuracy_score(X_test_prediction, Y_test)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "i2GcW_t_vz7C", "outputId": "e2b18fd9-f005-42fa-9444-81e8eb57d947" }, "source": [ "print('Accuracy score of the test data : ', test_data_accuracy)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Accuracy score of the test data : 0.7727272727272727\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "gq8ZX1xpwPF5" }, "source": [ "Making a Predictive System" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "U-ULRe4yv5tH", "outputId": "c218e6cf-ac30-4246-9bc6-cc09ac9d81ae" }, "source": [ "input_data = (5,166,72,19,175,25.8,0.587,51)\n", "\n", "# changing the input_data to numpy array\n", "input_data_as_numpy_array = np.asarray(input_data)\n", "\n", "# reshape the array as we are predicting for one instance\n", "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n", "\n", "prediction = classifier.predict(input_data_reshaped)\n", "print(prediction)\n", "\n", "if (prediction[0] == 0):\n", " print('The person is not diabetic')\n", "else:\n", " print('The person is diabetic')" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[1]\n", "The person is diabetic\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but SVC was fitted with feature names\n", " warnings.warn(\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "vgL6wblpQUtX" }, "source": [ "Saving the trained model" ] }, { "cell_type": "code", "metadata": { "id": "Nn60MdxByjgz" }, "source": [ "import pickle" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "cWzPQs4mQZN_" }, "source": [ "filename = 'trained_model.sav'\n", "pickle.dump(classifier, open(filename, 'wb'))" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Wk1T2sMcQ6_U" }, "source": [ "# loading the saved model\n", "loaded_model = pickle.load(open('trained_model.sav', 'rb'))" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Bd5OpxHnRPyy", "outputId": "daa664c6-683c-4ac6-986d-46654598fac6" }, "source": [ "input_data = (5,166,72,19,175,25.8,0.587,51)\n", "\n", "# changing the input_data to numpy array\n", "input_data_as_numpy_array = np.asarray(input_data)\n", "\n", "# reshape the array as we are predicting for one instance\n", "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n", "\n", "prediction = loaded_model.predict(input_data_reshaped)\n", "print(prediction)\n", "\n", "if (prediction[0] == 0):\n", " print('The person is not diabetic')\n", "else:\n", " print('The person is diabetic')" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[1]\n", "The person is diabetic\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but SVC was fitted with feature names\n", " warnings.warn(\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "iGRhGvgfRkvm" }, "source": [], "execution_count": null, "outputs": [] } ] }