{ "cells": [ { "cell_type": "markdown", "id": "ab540ee7", "metadata": {}, "source": [ "# Decision Tree" ] }, { "cell_type": "code", "execution_count": 2, "id": "92d3ce84", "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import confusion_matrix\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.metrics import classification_report\n", "from sklearn.datasets import load_iris\n", "iris=load_iris()" ] }, { "cell_type": "code", "execution_count": 3, "id": "dd4c544d", "metadata": {}, "outputs": [], "source": [ "X,y=iris.data,iris.target" ] }, { "cell_type": "code", "execution_count": 6, "id": "abe99084", "metadata": {}, "outputs": [], "source": [ "def train_using_gini(X_train, y_train):\n", " clf_gini = DecisionTreeClassifier(criterion = \"gini\", random_state = 100,max_depth=3, min_samples_leaf=4)\n", " clf_gini.fit(X_train, y_train)\n", " return clf_gini" ] }, { "cell_type": "code", "execution_count": 7, "id": "3e9ddda5", "metadata": {}, "outputs": [], "source": [ "#Using Entropy\n", "def train_using_entropy(X_train,y_train):\n", "#Creating a classifier object\n", " clf_entropy = DecisionTreeClassifier(criterion=\"entropy\",random_state = 100,max_depth=3,min_samples_leaf=4)\n", "#Training\n", " clf_entropy.fit(X_train,y_train)\n", " return clf_entropy" ] }, { "cell_type": "code", "execution_count": 8, "id": "74fd9b39", "metadata": {}, "outputs": [], "source": [ "def prediction(X_test,clf_object):\n", " y_pred=clf_object.predict(X_test)\n", " print(\"Predicted values:\",y_pred)\n", " return y_pred" ] }, { "cell_type": "code", "execution_count": 9, "id": "0b47818b", "metadata": {}, "outputs": [], "source": [ "#Function to calculate accuracy\n", "def cal_accuracy(y_test,y_pred):\n", " print(\"Confusion Matrix: \",confusion_matrix(y_test,y_pred))\n", " print(\"Accuracy:\",accuracy_score(y_test,y_pred)*100)\n", " print(\"Report :\",classification_report(y_test,y_pred))" ] }, { "cell_type": "code", "execution_count": 10, "id": "0f94ba7d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dimensions for training data (105, 4)\n", "Dimensions for testing data (105,)\n" ] } ], "source": [ "X_train, X_test, y_train, y_test = train_test_split( X, y, test_size = 0.3, random_state = 100)\n", "print(\"Dimensions for training data\",X_train.shape)\n", "print(\"Dimensions for testing data\",y_train.shape)" ] }, { "cell_type": "code", "execution_count": 13, "id": "a7ed365c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Results Using Gini Index:\n", "Predicted values: [2 0 2 0 2 2 0 0 2 0 0 2 0 0 2 1 1 2 2 2 2 0 2 0 1 2 1 0 1 2 1 1 1 0 0 1 0\n", " 1 2 2 0 1 2 2 0]\n", "Confusion Matrix: [[16 0 0]\n", " [ 0 10 1]\n", " [ 0 1 17]]\n", "Accuracy: 95.55555555555556\n", "Report : precision recall f1-score support\n", "\n", " 0 1.00 1.00 1.00 16\n", " 1 0.91 0.91 0.91 11\n", " 2 0.94 0.94 0.94 18\n", "\n", " accuracy 0.96 45\n", " macro avg 0.95 0.95 0.95 45\n", "weighted avg 0.96 0.96 0.96 45\n", "\n" ] } ], "source": [ "#Gini Index\n", "clf_gini = train_using_gini(X_train, y_train)\n", "print(\"Results Using Gini Index:\")\n", "# Prediction using gini\n", "y_pred_gini = prediction(X_test, clf_gini)\n", "cal_accuracy(y_test, y_pred_gini)" ] }, { "cell_type": "code", "execution_count": 14, "id": "0cd3759c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predicted values: [2 0 2 0 2 2 0 0 2 0 0 2 0 0 2 1 1 2 2 2 2 0 2 0 1 2 1 0 1 2 1 1 1 0 0 1 0\n", " 1 2 2 0 1 2 2 0]\n", "Confusion Matrix: [[16 0 0]\n", " [ 0 10 1]\n", " [ 0 1 17]]\n", "Accuracy: 95.55555555555556\n", "Report : precision recall f1-score support\n", "\n", " 0 1.00 1.00 1.00 16\n", " 1 0.91 0.91 0.91 11\n", " 2 0.94 0.94 0.94 18\n", "\n", " accuracy 0.96 45\n", " macro avg 0.95 0.95 0.95 45\n", "weighted avg 0.96 0.96 0.96 45\n", "\n" ] } ], "source": [ "#Analysing Metrics using entropy\n", "clf_entropy = train_using_entropy(X_train,y_train)\n", "# Prediction using entropy\n", "y_pred_entropy = prediction(X_test, clf_entropy)\n", "cal_accuracy(y_test, y_pred_entropy)" ] }, { "cell_type": "code", "execution_count": 19, "id": "bfb36a8a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Results Using Gini Index:\n", "Predicted values: [2 0 2 0 2 2 0 0 2 0 0 2 0 0 2 1 1 2 2 2 2 0 2 0 1 2 1 0 1 2 1 1 1 0 0 1 0\n", " 1 2 2 0 1 2 2 0]\n", "Confusion Matrix: [[16 0 0]\n", " [ 0 10 1]\n", " [ 0 1 17]]\n", "Accuracy: 95.55555555555556\n", "Report : precision recall f1-score support\n", "\n", " 0 1.00 1.00 1.00 16\n", " 1 0.91 0.91 0.91 11\n", " 2 0.94 0.94 0.94 18\n", "\n", " accuracy 0.96 45\n", " macro avg 0.95 0.95 0.95 45\n", "weighted avg 0.96 0.96 0.96 45\n", "\n" ] } ], "source": [ "#lets observe what the result will be if we change dept to 2 and leafs to 3\n", "def train_using_gini(X_train, y_train):\n", " clf_gini = DecisionTreeClassifier(criterion = \"gini\", random_state = 150,max_depth=5, min_samples_leaf=3)\n", " clf_gini.fit(X_train, y_train)\n", " return clf_gini\n", "clf_gini = train_using_gini(X_train, y_train)\n", "print(\"Results Using Gini Index:\")\n", "# Prediction using gini\n", "y_pred_gini = prediction(X_test, clf_gini)\n", "cal_accuracy(y_test, y_pred_gini)" ] }, { "cell_type": "code", "execution_count": null, "id": "1ec89b9d", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 5 }