{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "objc[58344]: Class CaptureDelegate is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_videoio.3.4.16.dylib (0x10ae08860) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x15eece480). One of the two will be used. Which one is undefined.\n", "objc[58344]: Class CVWindow is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x105baca68) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x15eece4d0). One of the two will be used. Which one is undefined.\n", "objc[58344]: Class CVView is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x105baca90) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x15eece4f8). One of the two will be used. Which one is undefined.\n", "objc[58344]: Class CVSlider is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x105bacab8) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x15eece520). One of the two will be used. Which one is undefined.\n" ] } ], "source": [ "import mediapipe as mp\n", "import cv2\n", "import pandas as pd\n", "import pickle\n", "\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression, SGDClassifier\n", "from sklearn.svm import SVC\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score, confusion_matrix\n", "from sklearn.calibration import CalibratedClassifierCV\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "# Drawing helpers\n", "mp_drawing = mp.solutions.drawing_utils\n", "mp_pose = mp.solutions.pose" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Set up important functions" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def rescale_frame(frame, percent=50):\n", " '''\n", " Rescale a frame to a certain percentage compare to its original frame\n", " '''\n", " width = int(frame.shape[1] * percent/ 100)\n", " height = int(frame.shape[0] * percent/ 100)\n", " dim = (width, height)\n", " return cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)\n", "\n", "\n", "def describe_dataset(dataset_path: str):\n", " '''\n", " Describe dataset\n", " '''\n", "\n", " data = pd.read_csv(dataset_path)\n", " print(f\"Headers: {list(data.columns.values)}\")\n", " print(f'Number of rows: {data.shape[0]} \\nNumber of columns: {data.shape[1]}\\n')\n", " print(f\"Labels: \\n{data['label'].value_counts()}\\n\")\n", " print(f\"Missing values: {data.isnull().values.any()}\\n\")\n", " \n", " duplicate = data[data.duplicated()]\n", " print(f\"Duplicate Rows : {len(duplicate.sum(axis=1))}\")\n", "\n", " return data\n", "\n", "\n", "def round_up_metric_results(results) -> list:\n", " '''Round up metrics results such as precision score, recall score, ...'''\n", " return list(map(lambda el: round(el, 3), results))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Describe and process data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "TRAIN_SET_PATH = \"./err.train.csv\"\n", "TEST_SET_PATH = \"./err.test.csv\"" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'left_foot_index_y', 'left_foot_index_z', 'left_foot_index_v', 'right_foot_index_x', 'right_foot_index_y', 'right_foot_index_z', 'right_foot_index_v']\n", "Number of rows: 17907 \n", "Number of columns: 53\n", "\n", "Labels: \n", "L 9114\n", "C 8793\n", "Name: label, dtype: int64\n", "\n", "Missing values: False\n", "\n", "Duplicate Rows : 0\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelnose_xnose_ynose_znose_vleft_shoulder_xleft_shoulder_yleft_shoulder_zleft_shoulder_vright_shoulder_x...right_heel_zright_heel_vleft_foot_index_xleft_foot_index_yleft_foot_index_zleft_foot_index_vright_foot_index_xright_foot_index_yright_foot_index_zright_foot_index_v
1790410.6474380.4422680.0041140.9999850.6157980.5171700.1517060.9995790.631354...-0.0342280.9797190.7018260.8805160.1342220.9793190.5048800.881748-0.0279110.986165
1790510.6496520.4190570.0087830.9999830.6175770.5035140.1585450.9995290.631972...-0.0611760.9804310.7046060.8802480.0714760.9799320.5045130.881766-0.0888320.986975
1790610.6535560.4003940.0148520.9999800.6207340.4865220.1698070.9995560.631171...-0.1386780.9790780.7054750.8789810.0036900.9791990.5040670.882642-0.1833040.986824
\n", "

3 rows × 53 columns

\n", "
" ], "text/plain": [ " label nose_x nose_y nose_z nose_v left_shoulder_x \\\n", "17904 1 0.647438 0.442268 0.004114 0.999985 0.615798 \n", "17905 1 0.649652 0.419057 0.008783 0.999983 0.617577 \n", "17906 1 0.653556 0.400394 0.014852 0.999980 0.620734 \n", "\n", " left_shoulder_y left_shoulder_z left_shoulder_v right_shoulder_x \\\n", "17904 0.517170 0.151706 0.999579 0.631354 \n", "17905 0.503514 0.158545 0.999529 0.631972 \n", "17906 0.486522 0.169807 0.999556 0.631171 \n", "\n", " ... right_heel_z right_heel_v left_foot_index_x left_foot_index_y \\\n", "17904 ... -0.034228 0.979719 0.701826 0.880516 \n", "17905 ... -0.061176 0.980431 0.704606 0.880248 \n", "17906 ... -0.138678 0.979078 0.705475 0.878981 \n", "\n", " left_foot_index_z left_foot_index_v right_foot_index_x \\\n", "17904 0.134222 0.979319 0.504880 \n", "17905 0.071476 0.979932 0.504513 \n", "17906 0.003690 0.979199 0.504067 \n", "\n", " right_foot_index_y right_foot_index_z right_foot_index_v \n", "17904 0.881748 -0.027911 0.986165 \n", "17905 0.881766 -0.088832 0.986975 \n", "17906 0.882642 -0.183304 0.986824 \n", "\n", "[3 rows x 53 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = describe_dataset(TRAIN_SET_PATH)\n", "# Categorizing label\n", "df.loc[df[\"label\"] == \"L\", \"label\"] = 0\n", "df.loc[df[\"label\"] == \"C\", \"label\"] = 1\n", "\n", "df.tail(3)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "with open(\"./model/input_scaler.pkl\", \"rb\") as f:\n", " sc = pickle.load(f)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# Extract features and class\n", "X = df.drop(\"label\", axis=1)\n", "y = df[\"label\"].astype(\"int\")\n", "\n", "X = pd.DataFrame(sc.transform(X))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10827 0\n", "11395 0\n", "3742 1\n", "Name: label, dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)\n", "y_test.head(3)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Train & Evaluate Model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.1. Train and evaluate model with train set" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelPrecision ScoreAccuracy scoreRecall ScoreF1 scoreConfusion Matrix
0SVC[1.0, 0.999]0.999721[0.999, 1.0][1.0, 1.0][[1713, 1], [0, 1868]]
1KNN[1.0, 0.998]0.999162[0.998, 1.0][0.999, 0.999][[1711, 3], [0, 1868]]
2RF[0.999, 0.999]0.999162[0.999, 0.999][0.999, 0.999][[1712, 2], [1, 1867]]
3DTC[0.997, 0.997]0.997208[0.997, 0.997][0.997, 0.997][[1709, 5], [5, 1863]]
4LR[0.992, 0.987]0.989391[0.986, 0.993][0.989, 0.99][[1690, 24], [14, 1854]]
5SGDC[0.992, 0.988]0.989950[0.987, 0.993][0.989, 0.99][[1692, 22], [14, 1854]]
6NB[0.963, 0.952]0.957286[0.947, 0.967][0.955, 0.959][[1623, 91], [62, 1806]]
\n", "
" ], "text/plain": [ " Model Precision Score Accuracy score Recall Score F1 score \\\n", "0 SVC [1.0, 0.999] 0.999721 [0.999, 1.0] [1.0, 1.0] \n", "1 KNN [1.0, 0.998] 0.999162 [0.998, 1.0] [0.999, 0.999] \n", "2 RF [0.999, 0.999] 0.999162 [0.999, 0.999] [0.999, 0.999] \n", "3 DTC [0.997, 0.997] 0.997208 [0.997, 0.997] [0.997, 0.997] \n", "4 LR [0.992, 0.987] 0.989391 [0.986, 0.993] [0.989, 0.99] \n", "5 SGDC [0.992, 0.988] 0.989950 [0.987, 0.993] [0.989, 0.99] \n", "6 NB [0.963, 0.952] 0.957286 [0.947, 0.967] [0.955, 0.959] \n", "\n", " Confusion Matrix \n", "0 [[1713, 1], [0, 1868]] \n", "1 [[1711, 3], [0, 1868]] \n", "2 [[1712, 2], [1, 1867]] \n", "3 [[1709, 5], [5, 1863]] \n", "4 [[1690, 24], [14, 1854]] \n", "5 [[1692, 22], [14, 1854]] \n", "6 [[1623, 91], [62, 1806]] " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "algorithms =[(\"LR\", LogisticRegression()),\n", " (\"SVC\", SVC(probability=True)),\n", " ('KNN',KNeighborsClassifier()),\n", " (\"DTC\", DecisionTreeClassifier()),\n", " (\"SGDC\", CalibratedClassifierCV(SGDClassifier())),\n", " (\"NB\", GaussianNB()),\n", " ('RF', RandomForestClassifier()),]\n", "\n", "models = {}\n", "final_results = []\n", "\n", "for name, model in algorithms:\n", " trained_model = model.fit(X_train, y_train)\n", " models[name] = trained_model\n", "\n", " # Evaluate model\n", " model_results = model.predict(X_test)\n", "\n", " p_score = precision_score(y_test, model_results, average=None, labels=[1, 0])\n", " a_score = accuracy_score(y_test, model_results)\n", " r_score = recall_score(y_test, model_results, average=None, labels=[1, 0])\n", " f1_score_result = f1_score(y_test, model_results, average=None, labels=[1, 0])\n", " cm = confusion_matrix(y_test, model_results, labels=[1, 0])\n", " final_results.append(( name, round_up_metric_results(p_score), a_score, round_up_metric_results(r_score), round_up_metric_results(f1_score_result), cm))\n", "\n", "# Sort results by F1 score\n", "final_results.sort(key=lambda k: sum(k[4]), reverse=True)\n", "pd.DataFrame(final_results, columns=[\"Model\", \"Precision Score\", \"Accuracy score\", \"Recall Score\", \"F1 score\", \"Confusion Matrix\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.2. Test set evaluation" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'left_foot_index_y', 'left_foot_index_z', 'left_foot_index_v', 'right_foot_index_x', 'right_foot_index_y', 'right_foot_index_z', 'right_foot_index_v']\n", "Number of rows: 1107 \n", "Number of columns: 53\n", "\n", "Labels: \n", "L 561\n", "C 546\n", "Name: label, dtype: int64\n", "\n", "Missing values: False\n", "\n", "Duplicate Rows : 0\n" ] } ], "source": [ "test_df = describe_dataset(TEST_SET_PATH)\n", "test_df = test_df.sample(frac=1).reset_index(drop=True)\n", "\n", "# Categorizing label\n", "test_df.loc[test_df[\"label\"] == \"L\", \"label\"] = 0\n", "test_df.loc[test_df[\"label\"] == \"C\", \"label\"] = 1\n", "\n", "test_x = test_df.drop(\"label\", axis=1)\n", "test_y = test_df[\"label\"].astype(\"int\")\n", "\n", "test_x = pd.DataFrame(sc.transform(test_x))" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelPrecision ScoreAccuracy scoreRecall ScoreF1 scoreConfusion Matrix
0LR[0.948, 0.998]0.971996[0.998, 0.947][0.972, 0.972][[545, 1], [30, 531]]
1SGDC[0.922, 0.998]0.957543[0.998, 0.918][0.959, 0.956][[545, 1], [46, 515]]
2DTC[0.95, 0.889]0.916893[0.877, 0.955][0.912, 0.921][[479, 67], [25, 536]]
3RF[0.786, 0.921]0.841915[0.934, 0.752][0.854, 0.828][[510, 36], [139, 422]]
4NB[0.79, 0.751]0.768744[0.723, 0.813][0.755, 0.781][[395, 151], [105, 456]]
5KNN[0.737, 0.799]0.765131[0.815, 0.717][0.774, 0.756][[445, 101], [159, 402]]
6SVC[0.659, 0.842]0.719964[0.894, 0.551][0.759, 0.666][[488, 58], [252, 309]]
\n", "
" ], "text/plain": [ " Model Precision Score Accuracy score Recall Score F1 score \\\n", "0 LR [0.948, 0.998] 0.971996 [0.998, 0.947] [0.972, 0.972] \n", "1 SGDC [0.922, 0.998] 0.957543 [0.998, 0.918] [0.959, 0.956] \n", "2 DTC [0.95, 0.889] 0.916893 [0.877, 0.955] [0.912, 0.921] \n", "3 RF [0.786, 0.921] 0.841915 [0.934, 0.752] [0.854, 0.828] \n", "4 NB [0.79, 0.751] 0.768744 [0.723, 0.813] [0.755, 0.781] \n", "5 KNN [0.737, 0.799] 0.765131 [0.815, 0.717] [0.774, 0.756] \n", "6 SVC [0.659, 0.842] 0.719964 [0.894, 0.551] [0.759, 0.666] \n", "\n", " Confusion Matrix \n", "0 [[545, 1], [30, 531]] \n", "1 [[545, 1], [46, 515]] \n", "2 [[479, 67], [25, 536]] \n", "3 [[510, 36], [139, 422]] \n", "4 [[395, 151], [105, 456]] \n", "5 [[445, 101], [159, 402]] \n", "6 [[488, 58], [252, 309]] " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "testset_final_results = []\n", "\n", "for name, model in models.items():\n", " # Evaluate model\n", " model_results = model.predict(test_x)\n", "\n", " p_score = precision_score(test_y, model_results, average=None, labels=[1, 0])\n", " a_score = accuracy_score(test_y, model_results)\n", " r_score = recall_score(test_y, model_results, average=None, labels=[1, 0])\n", " f1_score_result = f1_score(test_y, model_results, average=None, labels=[1, 0])\n", " cm = confusion_matrix(test_y, model_results, labels=[1, 0])\n", " testset_final_results.append(( name, round_up_metric_results(p_score), a_score, round_up_metric_results(r_score), round_up_metric_results(f1_score_result), cm ))\n", "\n", "\n", "testset_final_results.sort(key=lambda k: sum(k[4]), reverse=True)\n", "pd.DataFrame(testset_final_results, columns=[\"Model\", \"Precision Score\", \"Accuracy score\", \"Recall Score\", \"F1 score\", \"Confusion Matrix\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. Dump Models \n", "\n", "According to the evaluation above, LR and KNN SGDC would be chosen for more eval." ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "with open(\"./model/sklearn/err_all_sklearn.pkl\", \"wb\") as f:\n", " pickle.dump(models, f)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "with open(\"./model/sklearn/err_SGDC_model.pkl\", \"wb\") as f:\n", " pickle.dump(models[\"SGDC\"], f)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "with open(\"./model/sklearn/err_LR_model.pkl\", \"wb\") as f:\n", " pickle.dump(models[\"LR\"], f)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.8.13 (conda)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "9260f401923fb5c4108c543a7d176de9733d378b3752e49535ad7c43c2271b65" } } }, "nbformat": 4, "nbformat_minor": 2 }