Spaces:
Configuration error
Configuration error
File size: 154,125 Bytes
97e3689 |
|
{
"cells": [
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import pickle\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score, confusion_matrix\n",
"\n",
"from yellowbrick.classifier import ROCAUC\n",
"\n",
"from keras.utils.np_utils import to_categorical\n",
"\n",
"import warnings\n",
"warnings.filterwarnings('ignore')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Set ups"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1.1. Load models & scaler"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"# Load all sklearn models\n",
"with open(\"./model/all_sklearn.pkl\", \"rb\") as f:\n",
" sklearn_models = pickle.load(f)\n",
"\n",
"# Load all deep learning models\n",
"with open(\"./model/all_dp.pkl\", \"rb\") as f:\n",
" dp_models = pickle.load(f)\n",
"\n",
"# Load input scaler\n",
"with open(\"./model/input_scaler.pkl\", \"rb\") as f:\n",
" sc = pickle.load(f)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1.2. Important functions"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"def describe_dataset(dataset_path: str):\n",
" '''\n",
" Describe dataset\n",
" '''\n",
"\n",
" data = pd.read_csv(dataset_path)\n",
" print(f\"Headers: {list(data.columns.values)}\")\n",
" print(f'Number of rows: {data.shape[0]} \\nNumber of columns: {data.shape[1]}\\n')\n",
" print(f\"Labels: \\n{data['label'].value_counts()}\\n\")\n",
" print(f\"Missing values: {data.isnull().values.any()}\\n\")\n",
" \n",
" duplicate = data[data.duplicated()]\n",
" print(f\"Duplicate Rows : {len(duplicate.sum(axis=1))}\")\n",
"\n",
" return data\n",
"\n",
"\n",
"def round_up_metric_results(results) -> list:\n",
" '''Round up metrics results such as precision score, recall score, ...'''\n",
" return list(map(lambda el: round(el, 3), results))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Process Test set"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'left_foot_index_y', 'left_foot_index_z', 'left_foot_index_v', 'right_foot_index_x', 'right_foot_index_y', 'right_foot_index_z', 'right_foot_index_v']\n",
"Number of rows: 710 \n",
"Number of columns: 69\n",
"\n",
"Labels: \n",
"H 241\n",
"L 235\n",
"C 234\n",
"Name: label, dtype: int64\n",
"\n",
"Missing values: False\n",
"\n",
"Duplicate Rows : 0\n"
]
}
],
"source": [
"# load dataset\n",
"test_df = describe_dataset(\"./test.csv\")\n",
"\n",
"# Categorizing label\n",
"test_df.loc[test_df[\"label\"] == \"C\", \"label\"] = 0\n",
"test_df.loc[test_df[\"label\"] == \"H\", \"label\"] = 1\n",
"test_df.loc[test_df[\"label\"] == \"L\", \"label\"] = 2\n",
"\n",
"# Standard Scaling of features\n",
"test_x = test_df.drop(\"label\", axis = 1)\n",
"test_x = pd.DataFrame(sc.transform(test_x))\n",
"\n",
"test_y = test_df[\"label\"].astype('int')\n",
"\n",
"# # Converting prediction to categorical\n",
"test_y_cat = to_categorical(test_y)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Test set evaluation for all models"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.1. Sklearn models evaluation"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Model</th>\n",
" <th>Precision Score</th>\n",
" <th>Recall Score</th>\n",
" <th>Accuracy Score</th>\n",
" <th>F1 Score</th>\n",
" <th>Confusion Matrix</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>LR</td>\n",
" <td>0.995828</td>\n",
" <td>0.995775</td>\n",
" <td>0.995775</td>\n",
" <td>0.995781</td>\n",
" <td>[[234, 0, 0], [1, 240, 0], [2, 0, 233]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>SVC</td>\n",
" <td>0.987793</td>\n",
" <td>0.987324</td>\n",
" <td>0.987324</td>\n",
" <td>0.987363</td>\n",
" <td>[[234, 0, 0], [2, 239, 0], [7, 0, 228]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>KNN</td>\n",
" <td>0.955544</td>\n",
" <td>0.949296</td>\n",
" <td>0.949296</td>\n",
" <td>0.949254</td>\n",
" <td>[[233, 1, 0], [2, 239, 0], [33, 0, 202]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>DTC</td>\n",
" <td>0.773783</td>\n",
" <td>0.767606</td>\n",
" <td>0.767606</td>\n",
" <td>0.765410</td>\n",
" <td>[[127, 0, 107], [2, 238, 1], [55, 0, 180]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>SGDC</td>\n",
" <td>0.981748</td>\n",
" <td>0.981690</td>\n",
" <td>0.981690</td>\n",
" <td>0.981707</td>\n",
" <td>[[228, 6, 0], [3, 237, 1], [3, 0, 232]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>NB</td>\n",
" <td>0.856763</td>\n",
" <td>0.842254</td>\n",
" <td>0.842254</td>\n",
" <td>0.838005</td>\n",
" <td>[[148, 73, 13], [14, 227, 0], [4, 8, 223]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>RF</td>\n",
" <td>0.922452</td>\n",
" <td>0.898592</td>\n",
" <td>0.898592</td>\n",
" <td>0.896179</td>\n",
" <td>[[234, 0, 0], [0, 241, 0], [72, 0, 163]]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Model Precision Score Recall Score Accuracy Score F1 Score \\\n",
"0 LR 0.995828 0.995775 0.995775 0.995781 \n",
"1 SVC 0.987793 0.987324 0.987324 0.987363 \n",
"2 KNN 0.955544 0.949296 0.949296 0.949254 \n",
"3 DTC 0.773783 0.767606 0.767606 0.765410 \n",
"4 SGDC 0.981748 0.981690 0.981690 0.981707 \n",
"5 NB 0.856763 0.842254 0.842254 0.838005 \n",
"6 RF 0.922452 0.898592 0.898592 0.896179 \n",
"\n",
" Confusion Matrix \n",
"0 [[234, 0, 0], [1, 240, 0], [2, 0, 233]] \n",
"1 [[234, 0, 0], [2, 239, 0], [7, 0, 228]] \n",
"2 [[233, 1, 0], [2, 239, 0], [33, 0, 202]] \n",
"3 [[127, 0, 107], [2, 238, 1], [55, 0, 180]] \n",
"4 [[228, 6, 0], [3, 237, 1], [3, 0, 232]] \n",
"5 [[148, 73, 13], [14, 227, 0], [4, 8, 223]] \n",
"6 [[234, 0, 0], [0, 241, 0], [72, 0, 163]] "
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"testset_final_results = []\n",
"\n",
"for name, model in sklearn_models.items():\n",
" # Evaluate model\n",
" model_results = model.predict(test_x)\n",
"\n",
" p_score = precision_score(test_y, model_results, average=\"weighted\")\n",
" a_score = accuracy_score(test_y, model_results)\n",
" r_score = recall_score(test_y, model_results, average=\"weighted\")\n",
" f1_score_result = f1_score(test_y, model_results, average=\"weighted\")\n",
" cm = confusion_matrix(test_y, model_results, labels=[0, 1, 2])\n",
" testset_final_results.append(( name, p_score, r_score, a_score, f1_score_result, cm ))\n",
"\n",
"\n",
"sklearn_eval = pd.DataFrame(testset_final_results, columns=[\"Model\", \"Precision Score\", \"Recall Score\", \"Accuracy Score\", \"F1 Score\", \"Confusion Matrix\"])\n",
"\n",
"sklearn_eval"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.2. Deep learning models"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-11-28 10:56:43.727144: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n",
"2022-11-28 10:56:43.847788: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n",
"2022-11-28 10:56:43.976589: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n",
"2022-11-28 10:56:44.117560: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Model</th>\n",
" <th>Precision Score</th>\n",
" <th>Recall Score</th>\n",
" <th>Accuracy Score</th>\n",
" <th>F1 Score</th>\n",
" <th>Confusion Matrix</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3_layers</td>\n",
" <td>0.869224</td>\n",
" <td>0.847887</td>\n",
" <td>0.847887</td>\n",
" <td>0.843977</td>\n",
" <td>[[146, 88, 0], [1, 240, 0], [19, 0, 216]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5_layers</td>\n",
" <td>0.934660</td>\n",
" <td>0.929577</td>\n",
" <td>0.929577</td>\n",
" <td>0.927795</td>\n",
" <td>[[188, 16, 30], [1, 239, 1], [2, 0, 233]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7_layers_with_dropout</td>\n",
" <td>0.994461</td>\n",
" <td>0.994366</td>\n",
" <td>0.994366</td>\n",
" <td>0.994378</td>\n",
" <td>[[234, 0, 0], [2, 239, 0], [2, 0, 233]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>7_layers</td>\n",
" <td>0.935195</td>\n",
" <td>0.923944</td>\n",
" <td>0.923944</td>\n",
" <td>0.923033</td>\n",
" <td>[[183, 51, 0], [1, 240, 0], [2, 0, 233]]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Model Precision Score Recall Score Accuracy Score \\\n",
"0 3_layers 0.869224 0.847887 0.847887 \n",
"1 5_layers 0.934660 0.929577 0.929577 \n",
"2 7_layers_with_dropout 0.994461 0.994366 0.994366 \n",
"3 7_layers 0.935195 0.923944 0.923944 \n",
"\n",
" F1 Score Confusion Matrix \n",
"0 0.843977 [[146, 88, 0], [1, 240, 0], [19, 0, 216]] \n",
"1 0.927795 [[188, 16, 30], [1, 239, 1], [2, 0, 233]] \n",
"2 0.994378 [[234, 0, 0], [2, 239, 0], [2, 0, 233]] \n",
"3 0.923033 [[183, 51, 0], [1, 240, 0], [2, 0, 233]] "
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_set_results = []\n",
"\n",
"for name, model in dp_models.items():\n",
" # Evaluate model\n",
" predict_x = model.predict(test_x, verbose=False) \n",
" y_pred_class = np.argmax(predict_x, axis=1)\n",
" y_test_class = np.argmax(test_y_cat, axis=1)\n",
"\n",
" cm = confusion_matrix(y_test_class, y_pred_class, labels=[0, 1, 2])\n",
" p_score = precision_score(y_test_class, y_pred_class, average=\"weighted\")\n",
" a_score = accuracy_score(y_test_class, y_pred_class)\n",
" r_score = recall_score(y_test_class, y_pred_class, average=\"weighted\")\n",
" f1_score_result = f1_score(y_test_class, y_pred_class, average=\"weighted\")\n",
" \n",
" test_set_results.append(( name, p_score, r_score, a_score, f1_score_result, cm ))\n",
"\n",
"dp_eval = pd.DataFrame(test_set_results, columns=[\"Model\", \"Precision Score\", \"Recall Score\", \"Accuracy Score\", \"F1 Score\", \"Confusion Matrix\"])\n",
"\n",
"dp_eval"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.3. Final Results"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Model</th>\n",
" <th>Precision Score</th>\n",
" <th>Recall Score</th>\n",
" <th>Accuracy Score</th>\n",
" <th>F1 Score</th>\n",
" <th>Confusion Matrix</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>LR</td>\n",
" <td>0.995828</td>\n",
" <td>0.995775</td>\n",
" <td>0.995775</td>\n",
" <td>0.995781</td>\n",
" <td>[[234, 0, 0], [1, 240, 0], [2, 0, 233]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7_layers_with_dropout</td>\n",
" <td>0.994461</td>\n",
" <td>0.994366</td>\n",
" <td>0.994366</td>\n",
" <td>0.994378</td>\n",
" <td>[[234, 0, 0], [2, 239, 0], [2, 0, 233]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>SVC</td>\n",
" <td>0.987793</td>\n",
" <td>0.987324</td>\n",
" <td>0.987324</td>\n",
" <td>0.987363</td>\n",
" <td>[[234, 0, 0], [2, 239, 0], [7, 0, 228]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>SGDC</td>\n",
" <td>0.981748</td>\n",
" <td>0.981690</td>\n",
" <td>0.981690</td>\n",
" <td>0.981707</td>\n",
" <td>[[228, 6, 0], [3, 237, 1], [3, 0, 232]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>KNN</td>\n",
" <td>0.955544</td>\n",
" <td>0.949296</td>\n",
" <td>0.949296</td>\n",
" <td>0.949254</td>\n",
" <td>[[233, 1, 0], [2, 239, 0], [33, 0, 202]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5_layers</td>\n",
" <td>0.934660</td>\n",
" <td>0.929577</td>\n",
" <td>0.929577</td>\n",
" <td>0.927795</td>\n",
" <td>[[188, 16, 30], [1, 239, 1], [2, 0, 233]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7_layers</td>\n",
" <td>0.935195</td>\n",
" <td>0.923944</td>\n",
" <td>0.923944</td>\n",
" <td>0.923033</td>\n",
" <td>[[183, 51, 0], [1, 240, 0], [2, 0, 233]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>RF</td>\n",
" <td>0.922452</td>\n",
" <td>0.898592</td>\n",
" <td>0.898592</td>\n",
" <td>0.896179</td>\n",
" <td>[[234, 0, 0], [0, 241, 0], [72, 0, 163]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>3_layers</td>\n",
" <td>0.869224</td>\n",
" <td>0.847887</td>\n",
" <td>0.847887</td>\n",
" <td>0.843977</td>\n",
" <td>[[146, 88, 0], [1, 240, 0], [19, 0, 216]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NB</td>\n",
" <td>0.856763</td>\n",
" <td>0.842254</td>\n",
" <td>0.842254</td>\n",
" <td>0.838005</td>\n",
" <td>[[148, 73, 13], [14, 227, 0], [4, 8, 223]]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>DTC</td>\n",
" <td>0.773783</td>\n",
" <td>0.767606</td>\n",
" <td>0.767606</td>\n",
" <td>0.765410</td>\n",
" <td>[[127, 0, 107], [2, 238, 1], [55, 0, 180]]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Model Precision Score Recall Score Accuracy Score \\\n",
"0 LR 0.995828 0.995775 0.995775 \n",
"1 7_layers_with_dropout 0.994461 0.994366 0.994366 \n",
"2 SVC 0.987793 0.987324 0.987324 \n",
"3 SGDC 0.981748 0.981690 0.981690 \n",
"4 KNN 0.955544 0.949296 0.949296 \n",
"5 5_layers 0.934660 0.929577 0.929577 \n",
"6 7_layers 0.935195 0.923944 0.923944 \n",
"7 RF 0.922452 0.898592 0.898592 \n",
"8 3_layers 0.869224 0.847887 0.847887 \n",
"9 NB 0.856763 0.842254 0.842254 \n",
"10 DTC 0.773783 0.767606 0.767606 \n",
"\n",
" F1 Score Confusion Matrix \n",
"0 0.995781 [[234, 0, 0], [1, 240, 0], [2, 0, 233]] \n",
"1 0.994378 [[234, 0, 0], [2, 239, 0], [2, 0, 233]] \n",
"2 0.987363 [[234, 0, 0], [2, 239, 0], [7, 0, 228]] \n",
"3 0.981707 [[228, 6, 0], [3, 237, 1], [3, 0, 232]] \n",
"4 0.949254 [[233, 1, 0], [2, 239, 0], [33, 0, 202]] \n",
"5 0.927795 [[188, 16, 30], [1, 239, 1], [2, 0, 233]] \n",
"6 0.923033 [[183, 51, 0], [1, 240, 0], [2, 0, 233]] \n",
"7 0.896179 [[234, 0, 0], [0, 241, 0], [72, 0, 163]] \n",
"8 0.843977 [[146, 88, 0], [1, 240, 0], [19, 0, 216]] \n",
"9 0.838005 [[148, 73, 13], [14, 227, 0], [4, 8, 223]] \n",
"10 0.765410 [[127, 0, 107], [2, 238, 1], [55, 0, 180]] "
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eval_df = pd.concat([sklearn_eval, dp_eval])\n",
"eval_df = eval_df.sort_values(by=['F1 Score'], ascending=False).reset_index(drop=True)\n",
"eval_df.to_csv(f\"evaluation.csv\", sep=',', encoding='utf-8', index=False)\n",
"eval_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Best model - ROC - Confusion Matrix\n",
"\n",
"As we can see from the evaluation, the best model according to the F1 Score is the LR model."
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([0.98734177, 1. , 1. ]),\n",
" array([1. , 0.99585062, 0.99148936]),\n",
" array([0.99363057, 0.997921 , 0.9957265 ]))"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"best_model = sklearn_models[\"LR\"]\n",
"y_predictions = best_model.predict(test_x)\n",
"\n",
"p_score = precision_score(test_y, y_predictions, labels=[0, 1, 2], average=None)\n",
"r_score = recall_score(test_y, y_predictions, labels=[0, 1, 2], average=None)\n",
"f1_score_result = f1_score(test_y, y_predictions, labels=[0, 1, 2], average=None)\n",
"\n",
"p_score, r_score, f1_score_result"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.996"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(0.994 + 0.998 + 0.996) / 3"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.1. Confusion Matrix"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 800x600 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"KNN_cm = eval_df[ eval_df[\"Model\"] == 'LR' ][\"Confusion Matrix\"].values[0]\n",
"\n",
"cm_array_df = pd.DataFrame(KNN_cm, index=[\"C\", \"H\", \"L\"], columns=[\"C\", \"H\", \"L\"])\n",
"\n",
"fig, ax = plt.subplots(figsize=(8,6)) \n",
"sns.heatmap(cm_array_df, linewidths=1, annot=True, ax=ax, fmt='g', cmap=\"crest\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.3. F1 Score and Confidence correlation"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"def to_labels(y_pred, y_pred_proba, threshold):\n",
" '''Return prediction taking confidence threshold into account'''\n",
" results = []\n",
"\n",
" for index, predicted_class in enumerate(y_pred):\n",
" prediction_probabilities = y_pred_proba[index]\n",
" class_prediction_probability = round(prediction_probabilities[np.argmax(prediction_probabilities)], 2)\n",
"\n",
" results.append(predicted_class if class_prediction_probability >= threshold else -1)\n",
" \n",
" return results\n",
"\n",
"\n",
"def calculate_correlation_score_confidence(test_x, test_y):\n",
" '''Calculate correlation between Precision score/Recall score/F1 score and confidence threshold'''\n",
" y_predictions = best_model.predict(test_x)\n",
" y_predict_proba = best_model.predict_proba(test_x)\n",
"\n",
" thresholds = list(np.arange(0, 1.01, 0.01))\n",
"\n",
" f1_score_results = []\n",
"\n",
" for threshold in thresholds:\n",
" true_predictions = to_labels(y_predictions, y_predict_proba, threshold)\n",
" f1_s = list(f1_score(test_y, true_predictions, labels=[0, 1, 2], average=None))\n",
" all_class_f1 = f1_score(test_y, true_predictions, labels=[0, 1, 2], average=\"weighted\")\n",
" f1_s.append(all_class_f1)\n",
" f1_score_results.append(f1_s)\n",
" \n",
" return thresholds, f1_score_results\n"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"thresholds, f1_scores = calculate_correlation_score_confidence(test_x, test_y)\n",
"\n",
"first_class = [ el[0] for el in f1_scores ]\n",
"second_class = [ el[1] for el in f1_scores ]\n",
"third_class = [ el[2] for el in f1_scores ]\n",
"all_classes = [ el[3] for el in f1_scores ]\n",
"\n",
"\n",
"fig, ax = plt.subplots(figsize=(8,6))\n",
"plt.plot(thresholds, first_class, label = \"F1 Score - Correct class\")\n",
"plt.plot(thresholds, second_class, label = \"F1 Score - High-back class\")\n",
"plt.plot(thresholds, third_class, label = \"F1 Score - Low-back class\")\n",
"plt.plot(thresholds, all_classes, label = \"F1 Score - All 3 classes\", linewidth=2.0, color=\"blue\")\n",
"plt.legend(loc = 'lower left')\n",
"plt.ylim([0.5, 1])\n",
"plt.xlim([0.025, 1])\n",
"plt.xlabel(\"Thresholds\", fontsize = 12)\n",
"plt.ylabel(\"F1 Score\", fontsize = 12)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.2. ROC curve"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 800x550 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<AxesSubplot:title={'center':'ROC Curves for LogisticRegression'}, xlabel='False Positive Rate', ylabel='True Positive Rate'>"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"visualizer = ROCAUC(best_model, classes=[\"Correct\", \"High-back\", \"Low-back\"])\n",
"visualizer.target_type_ = \"multiclass\"\n",
"visualizer.score(test_x, test_y) \n",
"visualizer.show() "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.13 (conda)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "9260f401923fb5c4108c543a7d176de9733d378b3752e49535ad7c43c2271b65"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|