Upload Emotion Detection in Text.ipynb
Browse files- Emotion Detection in Text.ipynb +785 -0
Emotion Detection in Text.ipynb
ADDED
@@ -0,0 +1,785 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"id": "56cccab6",
|
6 |
+
"metadata": {},
|
7 |
+
"source": [
|
8 |
+
"# Emotions Detection in Text"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"cell_type": "code",
|
13 |
+
"execution_count": 1,
|
14 |
+
"id": "f0814628-3d83-4fd6-a511-2eccf79f9f1e",
|
15 |
+
"metadata": {},
|
16 |
+
"outputs": [],
|
17 |
+
"source": [
|
18 |
+
"# EDA\n",
|
19 |
+
"import pandas as pd\n",
|
20 |
+
"import numpy as np\n",
|
21 |
+
"\n",
|
22 |
+
"# Load Data Viz Pkgs\n",
|
23 |
+
"import seaborn as sns\n",
|
24 |
+
"\n",
|
25 |
+
"# Load Text Cleaning Pkgs\n",
|
26 |
+
"import neattext.functions as nfx\n",
|
27 |
+
"\n",
|
28 |
+
"# Load ML Pkgs\n",
|
29 |
+
"# Estimators\n",
|
30 |
+
"from sklearn.linear_model import LogisticRegression\n",
|
31 |
+
"from sklearn.naive_bayes import MultinomialNB\n",
|
32 |
+
"\n",
|
33 |
+
"# Transformers\n",
|
34 |
+
"from sklearn.feature_extraction.text import CountVectorizer\n",
|
35 |
+
"from sklearn.model_selection import train_test_split\n",
|
36 |
+
"from sklearn.metrics import accuracy_score,classification_report,confusion_matrix"
|
37 |
+
]
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"cell_type": "code",
|
41 |
+
"execution_count": 2,
|
42 |
+
"id": "b209e004-ab77-4407-8689-b4318944d47f",
|
43 |
+
"metadata": {},
|
44 |
+
"outputs": [],
|
45 |
+
"source": [
|
46 |
+
"# Load Dataset\n",
|
47 |
+
"df = pd.read_csv(\"../data/emotion_dataset_raw.csv\")"
|
48 |
+
]
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"cell_type": "code",
|
52 |
+
"execution_count": 3,
|
53 |
+
"id": "fea2d4c0-3bdd-405e-ab69-507ceaac36cb",
|
54 |
+
"metadata": {},
|
55 |
+
"outputs": [
|
56 |
+
{
|
57 |
+
"data": {
|
58 |
+
"text/html": [
|
59 |
+
"<div>\n",
|
60 |
+
"<style scoped>\n",
|
61 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
62 |
+
" vertical-align: middle;\n",
|
63 |
+
" }\n",
|
64 |
+
"\n",
|
65 |
+
" .dataframe tbody tr th {\n",
|
66 |
+
" vertical-align: top;\n",
|
67 |
+
" }\n",
|
68 |
+
"\n",
|
69 |
+
" .dataframe thead th {\n",
|
70 |
+
" text-align: right;\n",
|
71 |
+
" }\n",
|
72 |
+
"</style>\n",
|
73 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
74 |
+
" <thead>\n",
|
75 |
+
" <tr style=\"text-align: right;\">\n",
|
76 |
+
" <th></th>\n",
|
77 |
+
" <th>Emotion</th>\n",
|
78 |
+
" <th>Text</th>\n",
|
79 |
+
" </tr>\n",
|
80 |
+
" </thead>\n",
|
81 |
+
" <tbody>\n",
|
82 |
+
" <tr>\n",
|
83 |
+
" <th>0</th>\n",
|
84 |
+
" <td>neutral</td>\n",
|
85 |
+
" <td>Why ?</td>\n",
|
86 |
+
" </tr>\n",
|
87 |
+
" <tr>\n",
|
88 |
+
" <th>1</th>\n",
|
89 |
+
" <td>joy</td>\n",
|
90 |
+
" <td>Sage Act upgrade on my to do list for tommorow.</td>\n",
|
91 |
+
" </tr>\n",
|
92 |
+
" <tr>\n",
|
93 |
+
" <th>2</th>\n",
|
94 |
+
" <td>sadness</td>\n",
|
95 |
+
" <td>ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...</td>\n",
|
96 |
+
" </tr>\n",
|
97 |
+
" <tr>\n",
|
98 |
+
" <th>3</th>\n",
|
99 |
+
" <td>joy</td>\n",
|
100 |
+
" <td>Such an eye ! The true hazel eye-and so brill...</td>\n",
|
101 |
+
" </tr>\n",
|
102 |
+
" <tr>\n",
|
103 |
+
" <th>4</th>\n",
|
104 |
+
" <td>joy</td>\n",
|
105 |
+
" <td>@Iluvmiasantos ugh babe.. hugggzzz for u .! b...</td>\n",
|
106 |
+
" </tr>\n",
|
107 |
+
" </tbody>\n",
|
108 |
+
"</table>\n",
|
109 |
+
"</div>"
|
110 |
+
],
|
111 |
+
"text/plain": [
|
112 |
+
" Emotion Text\n",
|
113 |
+
"0 neutral Why ? \n",
|
114 |
+
"1 joy Sage Act upgrade on my to do list for tommorow.\n",
|
115 |
+
"2 sadness ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...\n",
|
116 |
+
"3 joy Such an eye ! The true hazel eye-and so brill...\n",
|
117 |
+
"4 joy @Iluvmiasantos ugh babe.. hugggzzz for u .! b..."
|
118 |
+
]
|
119 |
+
},
|
120 |
+
"execution_count": 3,
|
121 |
+
"metadata": {},
|
122 |
+
"output_type": "execute_result"
|
123 |
+
}
|
124 |
+
],
|
125 |
+
"source": [
|
126 |
+
"df.head()"
|
127 |
+
]
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"cell_type": "code",
|
131 |
+
"execution_count": 4,
|
132 |
+
"id": "430565a3-cf3b-4c6f-afa5-bafd084f5676",
|
133 |
+
"metadata": {},
|
134 |
+
"outputs": [
|
135 |
+
{
|
136 |
+
"data": {
|
137 |
+
"text/plain": [
|
138 |
+
"joy 11045\n",
|
139 |
+
"sadness 6722\n",
|
140 |
+
"fear 5410\n",
|
141 |
+
"anger 4297\n",
|
142 |
+
"surprise 4062\n",
|
143 |
+
"neutral 2254\n",
|
144 |
+
"disgust 856\n",
|
145 |
+
"shame 146\n",
|
146 |
+
"Name: Emotion, dtype: int64"
|
147 |
+
]
|
148 |
+
},
|
149 |
+
"execution_count": 4,
|
150 |
+
"metadata": {},
|
151 |
+
"output_type": "execute_result"
|
152 |
+
}
|
153 |
+
],
|
154 |
+
"source": [
|
155 |
+
"# Value Counts\n",
|
156 |
+
"df['Emotion'].value_counts()"
|
157 |
+
]
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"cell_type": "code",
|
161 |
+
"execution_count": 5,
|
162 |
+
"id": "531d3449-a959-4a19-bff0-3ffed551e619",
|
163 |
+
"metadata": {},
|
164 |
+
"outputs": [
|
165 |
+
{
|
166 |
+
"data": {
|
167 |
+
"text/plain": [
|
168 |
+
"<Axes: xlabel='Emotion', ylabel='count'>"
|
169 |
+
]
|
170 |
+
},
|
171 |
+
"execution_count": 5,
|
172 |
+
"metadata": {},
|
173 |
+
"output_type": "execute_result"
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"data": {
|
177 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAGwCAYAAAC0HlECAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA9IUlEQVR4nO3de1yUdf7//+cIMg6nSVBOK3lIRFRS01K01PLUAc12V9tsSTdTy5JIzXItIzd1szyttqZuq62H7LuVbQdlPZSu5jGUPBGaYeoKYS2CmgHC+/dHH6+fIx6uEATscb/d5lbzvl7XNa/3cA3z9JprLhzGGCMAAABcUo3KbgAAAKA6IDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAG7wru4FrSUlJiY4ePaqAgAA5HI7KbgcAANhgjNGJEycUERGhGjUufjyJ0FSOjh49qsjIyMpuAwAAlMHhw4dVr169iy4nNJWjgIAAST896YGBgZXcDQAAsCM/P1+RkZHW+/jFEJrK0dmP5AIDAwlNAABUM5c7tYYTwQEAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAG7wruwFc2w6Nj63sFq7Y9eN2VXYLAIAqgCNNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsKFSQ9N//vMf9erVSxEREXI4HHr//fc9lhtjlJycrIiICLlcLnXp0kV79uzxqCkoKNDw4cNVp04d+fn5qXfv3jpy5IhHTW5urhISEuR2u+V2u5WQkKDjx4971Bw6dEi9evWSn5+f6tSpo8TERBUWFlbEtAEAQDVUqaHp1KlTatmypWbNmnXB5ZMnT9bUqVM1a9Ysbdu2TWFhYerevbtOnDhh1SQlJWnZsmVaunSpNmzYoJMnTyo+Pl7FxcVWTf/+/ZWWlqaUlBSlpKQoLS1NCQkJ1vLi4mLdc889OnXqlDZs2KClS5fq3Xff1ciRIytu8gAAoFpxGGNMZTchSQ6HQ8uWLVOfPn0k/XSUKSIiQklJSXrmmWck/XRUKTQ0VC+//LKGDh2qvLw81a1bVwsXLtT9998vSTp69KgiIyO1fPly9ezZU+np6WrWrJk2b96sdu3aSZI2b96suLg4ffnll4qOjtaKFSsUHx+vw4cPKyIiQpK0dOlSDRw4UDk5OQoMDLQ1h/z8fLndbuXl5dle51p3aHxsZbdwxa4ft6uyWwAAVCC7799V9pymzMxMZWdnq0ePHtaY0+lU586dtXHjRklSamqqioqKPGoiIiLUokULq2bTpk1yu91WYJKk9u3by+12e9S0aNHCCkyS1LNnTxUUFCg1NfWiPRYUFCg/P9/jBgAArk1VNjRlZ2dLkkJDQz3GQ0NDrWXZ2dny8fFR7dq1L1kTEhJSavshISEeNec/Tu3ateXj42PVXMikSZOs86TcbrciIyN/5iwBAEB1UWVD01kOh8PjvjGm1Nj5zq+5UH1Zas43ZswY5eXlWbfDhw9fsi8AAFB9VdnQFBYWJkmljvTk5ORYR4XCwsJUWFio3NzcS9Z8++23pbZ/7Ngxj5rzHyc3N1dFRUWljkCdy+l0KjAw0OMGAACuTVU2NDVs2FBhYWFatWqVNVZYWKh169apQ4cOkqQ2bdqoZs2aHjVZWVnavXu3VRMXF6e8vDxt3brVqtmyZYvy8vI8anbv3q2srCyrZuXKlXI6nWrTpk2FzhMAAFQP3pX54CdPntRXX31l3c/MzFRaWpqCgoJ0/fXXKykpSRMnTlRUVJSioqI0ceJE+fr6qn///pIkt9utQYMGaeTIkQoODlZQUJBGjRql2NhYdevWTZIUExOjO++8U4MHD9acOXMkSUOGDFF8fLyio6MlST169FCzZs2UkJCgV155Rf/73/80atQoDR48mKNHAABAUiWHps8//1y33367dX/EiBGSpAEDBmjBggUaPXq0Tp8+rWHDhik3N1ft2rXTypUrFRAQYK0zbdo0eXt7q1+/fjp9+rS6du2qBQsWyMvLy6pZvHixEhMTrW/Z9e7d2+PaUF5eXvr44481bNgwdezYUS6XS/3799err75a0U8BAACoJqrMdZquBVynqTSu0wQAqOqq/XWaAAAAqhJCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGyo0qHpzJkzeu6559SwYUO5XC41atRI48ePV0lJiVVjjFFycrIiIiLkcrnUpUsX7dmzx2M7BQUFGj58uOrUqSM/Pz/17t1bR44c8ajJzc1VQkKC3G633G63EhISdPz48asxTQAAUA1U6dD08ssv6/XXX9esWbOUnp6uyZMn65VXXtHMmTOtmsmTJ2vq1KmaNWuWtm3bprCwMHXv3l0nTpywapKSkrRs2TItXbpUGzZs0MmTJxUfH6/i4mKrpn///kpLS1NKSopSUlKUlpamhISEqzpfAABQdTmMMaaym7iY+Ph4hYaG6o033rDGfvOb38jX11cLFy6UMUYRERFKSkrSM888I+mno0qhoaF6+eWXNXToUOXl5alu3bpauHCh7r//fknS0aNHFRkZqeXLl6tnz55KT09Xs2bNtHnzZrVr106StHnzZsXFxenLL79UdHS0rX7z8/PldruVl5enwMDAcn42qqdD42Mru4Urdv24XZXdAgCgAtl9/67SR5puvfVWrVmzRvv27ZMkffHFF9qwYYPuvvtuSVJmZqays7PVo0cPax2n06nOnTtr48aNkqTU1FQVFRV51ERERKhFixZWzaZNm+R2u63AJEnt27eX2+22ai6koKBA+fn5HjcAAHBt8q7sBi7lmWeeUV5enpo2bSovLy8VFxdrwoQJeuCBByRJ2dnZkqTQ0FCP9UJDQ/XNN99YNT4+Pqpdu3apmrPrZ2dnKyQkpNTjh4SEWDUXMmnSJL344otlnyAAAKg2qvSRprfffluLFi3SkiVLtH37dr355pt69dVX9eabb3rUORwOj/vGmFJj5zu/5kL1l9vOmDFjlJeXZ90OHz5sZ1oAAKAaqtJHmp5++mk9++yz+t3vfidJio2N1TfffKNJkyZpwIABCgsLk/TTkaLw8HBrvZycHOvoU1hYmAoLC5Wbm+txtCknJ0cdOnSwar799ttSj3/s2LFSR7HO5XQ65XQ6r3yiAACgyqvSR5p++OEH1ajh2aKXl5d1yYGGDRsqLCxMq1atspYXFhZq3bp1ViBq06aNatas6VGTlZWl3bt3WzVxcXHKy8vT1q1brZotW7YoLy/PqgEAAL9sVfpIU69evTRhwgRdf/31at68uXbs2KGpU6fq4YcflvTTR2pJSUmaOHGioqKiFBUVpYkTJ8rX11f9+/eXJLndbg0aNEgjR45UcHCwgoKCNGrUKMXGxqpbt26SpJiYGN15550aPHiw5syZI0kaMmSI4uPjbX9zDgAAXNuqdGiaOXOmnn/+eQ0bNkw5OTmKiIjQ0KFDNW7cOKtm9OjROn36tIYNG6bc3Fy1a9dOK1euVEBAgFUzbdo0eXt7q1+/fjp9+rS6du2qBQsWyMvLy6pZvHixEhMTrW/Z9e7dW7Nmzbp6kwUAAFValb5OU3XDdZpK4zpNAICq7pq4ThMAAEBVQWgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2OBd2Q0A16KOMztWdgtX7LPhn1V2CwBQpXCkCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2lCk03XHHHTp+/Hip8fz8fN1xxx1X2hMAAECVU6bQtHbtWhUWFpYa//HHH7V+/forbgoAAKCq8f45xTt37rT+f+/evcrOzrbuFxcXKyUlRb/61a/KrzsAAIAq4meFplatWsnhcMjhcFzwYziXy6WZM2eWW3MAAABVxc8KTZmZmTLGqFGjRtq6davq1q1rLfPx8VFISIi8vLzKvUkAAIDK9rNCU/369SVJJSUlFdIMAABAVfWzQtO59u3bp7Vr1yonJ6dUiBo3btwVNwYAAFCVlOnbc/PmzVOzZs00btw4vfPOO1q2bJl1e//998u1wf/+97/6/e9/r+DgYPn6+qpVq1ZKTU21lhtjlJycrIiICLlcLnXp0kV79uzx2EZBQYGGDx+uOnXqyM/PT71799aRI0c8anJzc5WQkCC32y23262EhIQLXlYBAAD8MpUpNL300kuaMGGCsrOzlZaWph07dli37du3l1tzubm56tixo2rWrKkVK1Zo7969mjJliq677jqrZvLkyZo6dapmzZqlbdu2KSwsTN27d9eJEyesmqSkJC1btkxLly7Vhg0bdPLkScXHx6u4uNiq6d+/v9LS0pSSkqKUlBSlpaUpISGh3OYCAACqtzJ9PJebm6u+ffuWdy+lvPzyy4qMjNT8+fOtsQYNGlj/b4zR9OnTNXbsWP3617+WJL355psKDQ3VkiVLNHToUOXl5emNN97QwoUL1a1bN0nSokWLFBkZqdWrV6tnz55KT09XSkqKNm/erHbt2kn66WhaXFycMjIyFB0dXeFzBQAAVVuZjjT17dtXK1euLO9eSvnggw/Utm1b9e3bVyEhIWrdurXmzZtnLc/MzFR2drZ69OhhjTmdTnXu3FkbN26UJKWmpqqoqMijJiIiQi1atLBqNm3aJLfbbQUmSWrfvr3cbrdVcyEFBQXKz8/3uAEAgGtTmY40NW7cWM8//7w2b96s2NhY1axZ02N5YmJiuTT39ddfa/bs2RoxYoT++Mc/auvWrUpMTJTT6dRDDz1kXVwzNDTUY73Q0FB98803kqTs7Gz5+Piodu3apWrOrp+dna2QkJBSjx8SEuJxAc/zTZo0SS+++OIVzREAAFQPZQpNc+fOlb+/v9atW6d169Z5LHM4HOUWmkpKStS2bVtNnDhRktS6dWvt2bNHs2fP1kMPPeTxmOcyxpQaO9/5NReqv9x2xowZoxEjRlj38/PzFRkZeelJAQCAaqlMoSkzM7O8+7ig8PBwNWvWzGMsJiZG7777riQpLCxM0k9HisLDw62anJwc6+hTWFiYCgsLlZub63G0KScnRx06dLBqvv3221KPf+zYsVJHsc7ldDrldDrLODsAAFCdlOmcpqulY8eOysjI8Bjbt2+fdZHNhg0bKiwsTKtWrbKWFxYWat26dVYgatOmjWrWrOlRk5WVpd27d1s1cXFxysvL09atW62aLVu2KC8vz6oBAAC/bGU60vTwww9fcvnf//73MjVzvqeeekodOnTQxIkT1a9fP23dulVz587V3LlzJf30kVpSUpImTpyoqKgoRUVFaeLEifL19VX//v0lSW63W4MGDdLIkSMVHBysoKAgjRo1SrGxsda36WJiYnTnnXdq8ODBmjNnjiRpyJAhio+P55tzAABA0hVccuBcRUVF2r17t44fP37BP+RbVjfffLOWLVumMWPGaPz48WrYsKGmT5+uBx980KoZPXq0Tp8+rWHDhik3N1ft2rXTypUrFRAQYNVMmzZN3t7e6tevn06fPq2uXbtqwYIFHn8nb/HixUpMTLS+Zde7d2/NmjWr3OYCAACqN4cxxpTHhkpKSjRs2DA1atRIo0ePLo9NVjv5+flyu93Ky8tTYGBgZbdTJRwaH1vZLVyx68ft+tnrdJzZsQI6ubo+G/5ZZbcAAFeF3ffvcjunqUaNGnrqqac0bdq08tokAABAlVGuJ4IfOHBAZ86cKc9NAgAAVAllOqfp3GsTST9dzygrK0sff/yxBgwYUC6NAQAAVCVlCk07duzwuF+jRg3VrVtXU6ZMuew36wAAAKqjMoWmTz/9tLz7AAAAqNLKFJrOOnbsmDIyMuRwONSkSRPVrVu3vPoCAACoUsp0IvipU6f08MMPKzw8XJ06ddJtt92miIgIDRo0SD/88EN59wgAAFDpyhSaRowYoXXr1unDDz/U8ePHdfz4cf3rX//SunXrNHLkyPLuEQAAoNKV6eO5d999V++88466dOlijd19991yuVzq16+fZs+eXV79AQAAVAllOtL0ww8/KDQ0tNR4SEgIH88BAIBrUplCU1xcnF544QX9+OOP1tjp06f14osvKi4urtyaAwAAqCrK9PHc9OnTddddd6levXpq2bKlHA6H0tLS5HQ6tXLlyvLuEUA1sa5T58pu4Yp1/s+6ym4BQBVVptAUGxur/fv3a9GiRfryyy9ljNHvfvc7Pfjgg3K5XOXdIwAAQKUrU2iaNGmSQkNDNXjwYI/xv//97zp27JieeeaZcmkOAACgqijTOU1z5sxR06ZNS403b95cr7/++hU3BQAAUNWUKTRlZ2crPDy81HjdunWVlZV1xU0BAABUNWUKTZGRkfrss89KjX/22WeKiIi44qYAAACqmjKd0/TII48oKSlJRUVFuuOOOyRJa9as0ejRo7kiOAAAuCaVKTSNHj1a//vf/zRs2DAVFhZKkmrVqqVnnnlGY8aMKdcGAQAAqoIyhSaHw6GXX35Zzz//vNLT0+VyuRQVFSWn01ne/QEAAFQJZQpNZ/n7++vmm28ur14AAACqrDKdCA4AAPBLQ2gCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwwbuyGwAAVE8Tfv/bym7hio1d9E5lt4BqhCNNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYEO1Ck2TJk2Sw+FQUlKSNWaMUXJysiIiIuRyudSlSxft2bPHY72CggINHz5cderUkZ+fn3r37q0jR4541OTm5iohIUFut1tut1sJCQk6fvz4VZgVAACoDqrNJQe2bdumuXPn6sYbb/QYnzx5sqZOnaoFCxaoSZMmeumll9S9e3dlZGQoICBAkpSUlKQPP/xQS5cuVXBwsEaOHKn4+HilpqbKy8tLktS/f38dOXJEKSkpkqQhQ4YoISFBH3744dWdKIBqZ9bI6v974okpvSq7BaDKqxZHmk6ePKkHH3xQ8+bNU+3ata1xY4ymT5+usWPH6te//rVatGihN998Uz/88IOWLFkiScrLy9Mbb7yhKVOmqFu3bmrdurUWLVqkXbt2afXq1ZKk9PR0paSk6G9/+5vi4uIUFxenefPm6aOPPlJGRsZF+yooKFB+fr7HDQAAXJuqRWh6/PHHdc8996hbt24e45mZmcrOzlaPHj2sMafTqc6dO2vjxo2SpNTUVBUVFXnUREREqEWLFlbNpk2b5Ha71a5dO6umffv2crvdVs2FTJo0yfo4z+12KzIyslzmCwAAqp4qH5qWLl2q1NRUTZo0qdSy7OxsSVJoaKjHeGhoqLUsOztbPj4+HkeoLlQTEhJSavshISFWzYWMGTNGeXl51u3w4cM/b3IAAKDaqNLnNB0+fFhPPvmkVq5cqVq1al20zuFweNw3xpQaO9/5NReqv9x2nE6nnE7nJR8HAABcG6r0kabU1FTl5OSoTZs28vb2lre3t9atW6e//OUv8vb2to4wnX80KCcnx1oWFhamwsJC5ebmXrLm22+/LfX4x44dK3UUCwAA/DJV6dDUtWtX7dq1S2lpadatbdu2evDBB5WWlqZGjRopLCxMq1atstYpLCzUunXr1KFDB0lSmzZtVLNmTY+arKws7d6926qJi4tTXl6etm7datVs2bJFeXl5Vg0AAPhlq9IfzwUEBKhFixYeY35+fgoODrbGk5KSNHHiREVFRSkqKkoTJ06Ur6+v+vfvL0lyu90aNGiQRo4cqeDgYAUFBWnUqFGKjY21TiyPiYnRnXfeqcGDB2vOnDmSfrrkQHx8vKKjo6/ijAEAQFVVpUOTHaNHj9bp06c1bNgw5ebmql27dlq5cqV1jSZJmjZtmry9vdWvXz+dPn1aXbt21YIFC6xrNEnS4sWLlZiYaH3Lrnfv3po1a9ZVnw8AAKiaql1oWrt2rcd9h8Oh5ORkJScnX3SdWrVqaebMmZo5c+ZFa4KCgrRo0aJy6hIAAFxrqvQ5TQAAAFUFoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALDBu7Ib+KVo8/Q/KruFK5b6ykOV3QIAAJWGI00AAAA2EJoAAABsIDQBAADYUKVD06RJk3TzzTcrICBAISEh6tOnjzIyMjxqjDFKTk5WRESEXC6XunTpoj179njUFBQUaPjw4apTp478/PzUu3dvHTlyxKMmNzdXCQkJcrvdcrvdSkhI0PHjxyt6igAAoJqo0qFp3bp1evzxx7V582atWrVKZ86cUY8ePXTq1CmrZvLkyZo6dapmzZqlbdu2KSwsTN27d9eJEyesmqSkJC1btkxLly7Vhg0bdPLkScXHx6u4uNiq6d+/v9LS0pSSkqKUlBSlpaUpISHhqs4XAABUXVX623MpKSke9+fPn6+QkBClpqaqU6dOMsZo+vTpGjt2rH79619Lkt58802FhoZqyZIlGjp0qPLy8vTGG29o4cKF6tatmyRp0aJFioyM1OrVq9WzZ0+lp6crJSVFmzdvVrt27SRJ8+bNU1xcnDIyMhQdHX11Jw4AAKqcKn2k6Xx5eXmSpKCgIElSZmamsrOz1aNHD6vG6XSqc+fO2rhxoyQpNTVVRUVFHjURERFq0aKFVbNp0ya53W4rMElS+/bt5Xa7rZoLKSgoUH5+vscNAABcm6pNaDLGaMSIEbr11lvVokULSVJ2drYkKTQ01KM2NDTUWpadnS0fHx/Vrl37kjUhISGlHjMkJMSquZBJkyZZ50C53W5FRkaWfYIAAKBKqzah6YknntDOnTv11ltvlVrmcDg87htjSo2d7/yaC9VfbjtjxoxRXl6edTt8+PDlpgEAAKqpahGahg8frg8++ECffvqp6tWrZ42HhYVJUqmjQTk5OdbRp7CwMBUWFio3N/eSNd9++22pxz127Fipo1jncjqdCgwM9LgBAIBrU5UOTcYYPfHEE3rvvff0ySefqGHDhh7LGzZsqLCwMK1atcoaKyws1Lp169ShQwdJUps2bVSzZk2PmqysLO3evduqiYuLU15enrZu3WrVbNmyRXl5eVYNAAD4ZavS3557/PHHtWTJEv3rX/9SQECAdUTJ7XbL5XLJ4XAoKSlJEydOVFRUlKKiojRx4kT5+vqqf//+Vu2gQYM0cuRIBQcHKygoSKNGjVJsbKz1bbqYmBjdeeedGjx4sObMmSNJGjJkiOLj4/nmHAAAkFTFQ9Ps2bMlSV26dPEYnz9/vgYOHChJGj16tE6fPq1hw4YpNzdX7dq108qVKxUQEGDVT5s2Td7e3urXr59Onz6trl27asGCBfLy8rJqFi9erMTEROtbdr1799asWbMqdoIAAKDaqNKhyRhz2RqHw6Hk5GQlJydftKZWrVqaOXOmZs6cedGaoKAgLVq0qCxtAgCAX4AqfU4TAABAVUFoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA3eld0AAACo+pKTkyu7hSt2pXPgSBMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALDBu7IbAACgOkmf8Ellt3DFYsbeUdktVEscaTrPX//6VzVs2FC1atVSmzZttH79+spuCQAAVAGEpnO8/fbbSkpK0tixY7Vjxw7ddtttuuuuu3To0KHKbg0AAFQyQtM5pk6dqkGDBumRRx5RTEyMpk+frsjISM2ePbuyWwMAAJWMc5r+T2FhoVJTU/Xss896jPfo0UMbN2684DoFBQUqKCiw7ufl5UmS8vPzS9UWF5wux24rx4XmdTknfiyugE6urrLM+8zpMxXQydVVlnmfOvPLnPfpgh8qoJOrqyzz/rGoqAI6ubrKMu+TP56qgE6urrLM+9z3u+rqYvM+O26MufQGDIwxxvz3v/81ksxnn33mMT5hwgTTpEmTC67zwgsvGEncuHHjxo0bt2vgdvjw4UtmBY40ncfhcHjcN8aUGjtrzJgxGjFihHW/pKRE//vf/xQcHHzRdSpKfn6+IiMjdfjwYQUGBl7Vx65MzJt5/xIwb+b9S1CZ8zbG6MSJE4qIiLhkHaHp/9SpU0deXl7Kzs72GM/JyVFoaOgF13E6nXI6nR5j1113XUW1aEtgYOAv6kV2FvP+ZWHevyzM+5elsubtdrsvW8OJ4P/Hx8dHbdq00apVqzzGV61apQ4dOlRSVwAAoKrgSNM5RowYoYSEBLVt21ZxcXGaO3euDh06pEcffbSyWwMAAJWM0HSO+++/X99//73Gjx+vrKwstWjRQsuXL1f9+vUru7XLcjqdeuGFF0p9XHitY97M+5eAeTPvX4LqMG+HMZf7fh0AAAA4pwkAAMAGQhMAAIANhCYAAAAbCE24pAYNGmj69OmV9vgDBw5Unz59Ku3xK5vD4dD7779f2W1UGGOMhgwZoqCgIDkcDqWlpVV2S9VWcnKyWrVqVdlt4P/8kn53denSRUlJSZIq/z2jovHtuWtMly5d1KpVq2tmp50xY8bl/xYQqq2UlBQtWLBAa9euVaNGjVSnTp3KbqnaGjVqlIYPH17ZbeAXbtu2bfLz86vsNiRJBw8eVMOGDbVjx45y+wcFoekXyBij4uJieXtX/R+/nSu0ovo6cOCAwsPDK/QCsoWFhfLx8amw7ZeXsvZ59vXs7+8vf3//Cujs2lFUVKSaNWtWdhvXtLp161Z2CxWKj+euoi5duigxMVGjR49WUFCQwsLClJycbC3Py8vTkCFDFBISosDAQN1xxx364osvrOUXOtyblJSkLl26WMvXrVunGTNmyOFwyOFw6ODBg1q7dq0cDof+/e9/q23btnI6nVq/fr0OHDige++9V6GhofL399fNN9+s1atXX4Vnwr5z51xQUKDExESFhISoVq1auvXWW7Vt2zZJP71xNG7cWK+++qrH+rt371aNGjV04MCBq9LvO++8o9jYWLlcLgUHB6tbt246deqUtm3bpu7du6tOnTpyu93q3Lmztm/f7rHu/v371alTJ9WqVUvNmjUrdXX6gwcPyuFw6L333tPtt98uX19ftWzZUps2bfKo27hxozp16iSXy6XIyEglJibq1Kn//6+y//Wvf1VUVJRq1aql0NBQ/fa3v71s/xVh4MCBGj58uA4dOiSHw6EGDRrIGKPJkyerUaNGcrlcatmypd555x1rneLiYg0aNEgNGzaUy+VSdHS0ZsyYUWq7ffr00aRJkxQREaEmTZpUSP/SxZ+vcz+uOKtPnz4aOHCgdb9BgwZ66aWXNHDgQLndbg0ePNj6GS9dulQdOnRQrVq11Lx5c61du9Za72Kv5/M/nlu7dq1uueUW+fn56brrrlPHjh31zTffWMs//PBDtWnTRrVq1VKjRo304osv6syZM+XyvKSkpOjWW2/Vddddp+DgYMXHx1uvQbv78bx58xQZGSlfX1/dd999mjp1aqk/U3W5OTgcDr3++uu699575efnp5deeqlc5neuy71mXn31VYWHhys4OFiPP/64ioqKrGWLFi1S27ZtFRAQoLCwMPXv3185OTnW8nN/1q1bt5bL5dIdd9yhnJwcrVixQjExMQoMDNQDDzygH374wVrvcq+jK3Hq1Ck99NBD8vf3V3h4uKZMmeKx/PyP55KTk3X99dfL6XQqIiJCiYmJ1rKsrCzdc889crlcatiwoZYsWeKx/tl95dyP7Y8fPy6Hw2G9JnJzc/Xggw+qbt26crlcioqK0vz58yVJDRs2lCS1bt1aDofDeq+8Ipf8c74oV507dzaBgYEmOTnZ7Nu3z7z55pvG4XCYlStXmpKSEtOxY0fTq1cvs23bNrNv3z4zcuRIExwcbL7//ntjjDEDBgww9957r8c2n3zySdO5c2djjDHHjx83cXFxZvDgwSYrK8tkZWWZM2fOmE8//dRIMjfeeKNZuXKl+eqrr8x3331n0tLSzOuvv2527txp9u3bZ8aOHWtq1aplvvnmG2v79evXN9OmTbtKz1Bp5845MTHRREREmOXLl5s9e/aYAQMGmNq1a1vPz4QJE0yzZs081n/qqadMp06drkqvR48eNd7e3mbq1KkmMzPT7Ny507z22mvmxIkTZs2aNWbhwoVm7969Zu/evWbQoEEmNDTU5OfnG2OMKS4uNi1atDBdunQxO3bsMOvWrTOtW7c2ksyyZcuMMcZkZmYaSaZp06bmo48+MhkZGea3v/2tqV+/vikqKjLGGLNz507j7+9vpk2bZvbt22c+++wz07p1azNw4EBjjDHbtm0zXl5eZsmSJebgwYNm+/btZsaMGZftvyIcP37cjB8/3tSrV89kZWWZnJwc88c//tE0bdrUpKSkmAMHDpj58+cbp9Np1q5da4wxprCw0IwbN85s3brVfP3112bRokXG19fXvP3229Z2BwwYYPz9/U1CQoLZvXu32bVrV4X0f6nnq3PnzubJJ5/0qL/33nvNgAEDrPv169c3gYGB5pVXXjH79+83+/fvt37G9erVM++8847Zu3eveeSRR0xAQID57rvvjDHmoq/nF154wbRs2dIYY0xRUZFxu91m1KhR5quvvjJ79+41CxYssF7bKSkpJjAw0CxYsMAcOHDArFy50jRo0MAkJyeXy3PzzjvvmHfffdfs27fP7Nixw/Tq1cvExsaa4uJiW/vxhg0bTI0aNcwrr7xiMjIyzGuvvWaCgoKM2+22HsPOHCSZkJAQ88Ybb5gDBw6YgwcPlsv8zrrUPjBgwAATGBhoHn30UZOenm4+/PBD4+vra+bOnWut/8Ybb5jly5ebAwcOmE2bNpn27dubu+66y1p+9mfdvn17s2HDBrN9+3bTuHFj07lzZ9OjRw+zfft285///McEBwebP//5z9Z6l3sdXYnHHnvM1KtXz6xcudLs3LnTxMfHG39/f2t/P/c945///KcJDAw0y5cvN998843ZsmWLx/y7detmWrVqZTZv3mxSU1NN586djcvlstY/u6/s2LHDWic3N9dIMp9++qkxxpjHH3/ctGrVymzbts1kZmaaVatWmQ8++MAYY8zWrVuNJLN69WqTlZVlvVdcCULTVdS5c2dz6623eozdfPPN5plnnjFr1qwxgYGB5scff/RYfsMNN5g5c+YYYy4fms4+xvm/rM++8N5///3L9tisWTMzc+ZM635VCU0nT540NWvWNIsXL7aWFRYWmoiICDN58mRjzE+/wLy8vMyWLVus5XXr1jULFiy4Kr2mpqYaSbZ+MZ85c8YEBASYDz/80BhjzL///W/j5eVlDh8+bNWsWLHigqHpb3/7m1WzZ88eI8mkp6cbY4xJSEgwQ4YM8Xis9evXmxo1apjTp0+bd9991wQGBlphraz9l5dp06aZ+vXrG2OMOXnypKlVq5bZuHGjR82gQYPMAw88cNFtDBs2zPzmN7+x7g8YMMCEhoaagoKCCun5rEs9X3ZDU58+fTxqzv6Mz30DLCoqMvXq1TMvv/yyMebir+dzQ9P3339vJF30TfK2224zEydO9BhbuHChCQ8Pv+ScyyonJ8dIMrt27bK1H99///3mnnvu8djGgw8+6BGa7MxBkklKSqqAGf3kUvvAgAEDTP369c2ZM2essb59+5r777//ots7+yZ/9h8qZ3/Wq1evtmomTZpkJJkDBw5YY0OHDjU9e/Y0xpT9dWTHiRMnjI+Pj1m6dKk19v333xuXy3XB0DRlyhTTpEkTU1hYWGpb6enpRpLZtm2bNbZ//34j6WeFpl69epk//OEPF+z3QutfKT6eu8puvPFGj/vh4eHKyclRamqqTp48qeDgYOvcBH9/f2VmZpbbR0tt27b1uH/q1CmNHj1azZo103XXXSd/f399+eWXOnToULk8Xnk6cOCAioqK1LFjR2usZs2auuWWW5Seni7pp+fynnvu0d///ndJ0kcffaQff/xRffv2vSo9tmzZUl27dlVsbKz69u2refPmKTc3V5KUk5OjRx99VE2aNJHb7Zbb7dbJkyet5zo9PV3XX3+96tWrZ20vLi7ugo9z7j4UHh5ubV+SUlNTtWDBAo99qGfPniopKVFmZqa6d++u+vXrq1GjRkpISNDixYutw/qX6v9q2Lt3r3788Ud1797do/9//OMfHq+B119/XW3btlXdunXl7++vefPmldpnY2NjK/w8pvJ4vs5/TZ517s/e29tbbdu2tfbzy60rSUFBQRo4cKB69uypXr16acaMGcrKyrKWp6amavz48R7P8+DBg5WVleXxMU9ZHThwQP3791ejRo0UGBhofUxy7s/pUvtxRkaGbrnlFo9tnn/f7hwu9TxdqcvtA82bN5eXl5d1/+zv+7N27Nihe++9V/Xr11dAQID18dH5+/O5z1VoaKh8fX3VqFEjj7Gz27X7OiqLAwcOqLCw0GP/DAoKUnR09AXr+/btq9OnT6tRo0YaPHiwli1bZn18mpGRIW9vb910001WfePGjVW7du2f1dNjjz2mpUuXqlWrVho9erQ2btxYhpnZR2i6ys4/CdHhcKikpEQlJSUKDw9XWlqaxy0jI0NPP/20JKlGjRqlvkl27ufjl3P+Nxqefvppvfvuu5owYYLWr1+vtLQ0xcbGqrCwsIyzqzhn5+1wOEqNnzv2yCOPaOnSpTp9+rTmz5+v+++/X76+vlelRy8vL61atUorVqxQs2bNNHPmTEVHRyszM1MDBw5Uamqqpk+fro0bNyotLU3BwcHWc33+z1UqPdezzt2HztaUlJRY/x06dKjHPvTFF19o//79uuGGGxQQEKDt27frrbfeUnh4uMaNG6eWLVvq+PHjl+z/ajg7h48//tij/71791rnY/y///f/9NRTT+nhhx/WypUrlZaWpj/84Q+l9tmr8e2dSz1fdl+rP6fP8/eHy607f/58bdq0SR06dNDbb7+tJk2aaPPmzZJ+eq5ffPFFj+d5165d2r9/v2rVqmW7p4vp1auXvv/+e82bN09btmzRli1bJMnj53Sp/fj81/XZsXPZnUNF7guXe81c7Pe99NM/Wnv06CF/f38tWrRI27Zt07JlyySp1P58/nN1qe3aeR2V1YV+T11KZGSkMjIy9Nprr8nlcmnYsGHq1KmTioqKLrqtc8dr1KhRauz819Fdd92lb775RklJSTp69Ki6du2qUaNG/aw+fw5CUxVx0003KTs7W97e3mrcuLHH7ezXsOvWrevxr0VJpa5r4+Pjo+LiYluPuX79eg0cOFD33XefYmNjFRYWpoMHD5bHdMpd48aN5ePjow0bNlhjRUVF+vzzzxUTE2ON3X333fLz89Ps2bO1YsUKPfzww1e1T4fDoY4dO+rFF1/Ujh075OPjo2XLlmn9+vVKTEzU3XffrebNm8vpdOq7776z1mvWrJkOHTqko0ePWmPnnxhrx0033aQ9e/aU2ofOPn/ST0cuunXrpsmTJ2vnzp06ePCgPvnkk0v2fzU0a9ZMTqdThw4dKtV7ZGSkpJ/22Q4dOmjYsGFq3bq1GjdufNVO8r+Qiz1f579Wi4uLtXv3btvbPRtuJOnMmTNKTU1V06ZNf3Z/rVu31pgxY7Rx40a1aNFCS5YskfTTfpKRkXHB/eTsG1VZff/990pPT9dzzz2nrl27KiYm5mcfgWvatKm2bt3qMfb555973K/IOfwcZX3NfPnll/ruu+/05z//WbfddpuaNm3qcRSqrOy8jsqqcePGqlmzpsf+mZubq3379l10HZfLpd69e+svf/mL1q5dq02bNmnXrl1q2rSpzpw5ox07dli1X331lY4fP27dP/tNvHNfSxe6llvdunU1cOBALVq0SNOnT9fcuXMlyfqdZ/c90Y6q/53zX4hu3bopLi5Offr00csvv6zo6GgdPXpUy5cvV58+fdS2bVvdcccdeuWVV/SPf/xDcXFxWrRokXbv3q3WrVtb22nQoIG2bNmigwcPyt/fX0FBQRd9zMaNG+u9995Tr1695HA49Pzzz1v/Sqlq/Pz89Nhjj+npp59WUFCQrr/+ek2ePFk//PCDBg0aZNV5eXlp4MCBGjNmjBo3bnzRj7gqwpYtW7RmzRr16NFDISEh2rJli44dO6aYmBg1btxYCxcuVNu2bZWfn6+nn35aLpfLWrdbt26Kjo7WQw89pClTpig/P19jx4792T0888wzat++vR5//HENHjxYfn5+Sk9P16pVqzRz5kx99NFH+vrrr9WpUyfVrl1by5cvV0lJiaKjoy/Z/9UQEBCgUaNG6amnnlJJSYluvfVW5efna+PGjfL399eAAQPUuHFj/eMf/9C///1vNWzYUAsXLtS2bdusj3+upks9X35+fhoxYoQ+/vhj3XDDDZo2bZrHm8HlvPbaa4qKilJMTIymTZum3Nzcn/UPgMzMTM2dO1e9e/dWRESEMjIytG/fPj300EOSpHHjxik+Pl6RkZHq27evatSooZ07d2rXrl1X/A2z2rVrKzg4WHPnzlV4eLgOHTqkZ5999mdtY/jw4erUqZOmTp2qXr166ZNPPtGKFSs8jj5V5BzsutQ+sHPnzkuue/3118vHx0czZ87Uo48+qt27d+tPf/rTFfdk53VUVv7+/ho0aJCefvppBQcHKzQ0VGPHjr1oSF2wYIGKi4vVrl07+fr6auHChXK5XKpfv771TcMhQ4Zo9uzZqlmzpkaOHCmXy2X9nF0ul9q3b68///nPatCggb777js999xzHo8xbtw4tWnTRs2bN1dBQYE++ugj63dWSEiIXC6XUlJSVK9ePdWqVevKL2NTbmdH4bIud3Jofn6+GT58uImIiDA1a9Y0kZGR5sEHHzSHDh2y6seNG2dCQ0ON2+02Tz31lHniiSc8TgTPyMgw7du3Ny6Xy0gymZmZ1smEubm5Ho+dmZlpbr/9duNyuUxkZKSZNWtWqR6ryongxhhz+vRpM3z4cFOnTh3jdDpNx44dzdatW0utc+DAASPJOkH8atm7d6/p2bOnqVu3rnE6naZJkybWSfXbt283bdu2NU6n00RFRZl//vOfpZ7bjIwMc+uttxofHx/TpEkTk5KScsETwS91UqQxP51M2r17d+Pv72/8/PzMjTfeaCZMmGCM+emk8M6dO5vatWsbl8tlbrzxRuubZ5fqv6KceyK4McaUlJSYGTNmmOjoaFOzZk1Tt25d07NnT7Nu3TpjjDE//vijGThwoHG73ea6664zjz32mHn22WetE6CNufAXJirCpZ6vwsJC89hjj5mgoCATEhJiJk2adMETwc9/bZ39GS9ZssS0a9fO+Pj4mJiYGLNmzRqr5mKv53NPBM/OzjZ9+vQx4eHhxsfHx9SvX9+MGzfOFBcXW/UpKSmmQ4cOxuVymcDAQHPLLbd4fLPpSqxatcrExMQYp9NpbrzxRrN27VprX7a7H8+dO9f86le/Mi6Xy/Tp08e89NJLJiwszONxLjeHc18/FeFS+4CdL+4sWbLENGjQwDidThMXF2c++OADj+fmQj/r+fPne5wQb4znz96Yy7+OrsSJEyfM73//e+Pr62tCQ0PN5MmTPd43zt2vly1bZtq1a2cCAwONn5+fad++vcdJ7UePHjV33XWXcTqdpn79+mbJkiUmJCTEvP7661bN3r17rfe0Vq1amZUrV3rsK3/6059MTEyMcblcJigoyNx7773m66+/ttafN2+eiYyMNDVq1PB47svKYQyXW0bV9cADD8jLy0uLFi2yvc5nn32mLl266MiRIwoNDa3A7oDyVRFXML5WDB48WF9++aXWr19f2a2gghw5ckSRkZFavXq1unbtWtntXBAfz6FKOnPmjPbt26dNmzZp6NChttYpKCjQ4cOH9fzzz6tfv34EJqAae/XVV9W9e3f5+flpxYoVevPNN/XXv/61sttCOfrkk0908uRJxcbGKisrS6NHj1aDBg3UqVOnym7tojgRHFXS7t271bZtWzVv3lyPPvqorXXeeustRUdHKy8vT5MnT67gDgFUpK1bt6p79+6KjY3V66+/rr/85S965JFHKrstlKOioiL98Y9/VPPmzXXfffepbt26Wrt2bZX+Uzd8PAcAAGADR5oAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAJSD5ORkLkgJXOMITQCqrYEDB8rhcJS63XnnnRX6uA6HQ++//77H2KhRo7RmzZoKfVwAlYsrggOo1u68807Nnz/fY8zpdF71Pvz9/eXv73/VHxfA1cORJgDVmtPpVFhYmMetdu3akn46IjRnzhzFx8fL19dXMTEx2rRpk7766it16dJFfn5+iouL04EDBzy2OXv2bN1www3y8fFRdHS0Fi5caC1r0KCBJOm+++6Tw+Gw7p//8VxJSYnGjx+vevXqyel0qlWrVkpJSbGWHzx4UA6HQ++9955uv/12+fr6qmXLltq0aVPFPFEArhihCcA17U9/+pMeeughpaWlqWnTpurfv7+GDh2qMWPG6PPPP5ckPfHEE1b9smXL9OSTT2rkyJHavXu3hg4dqj/84Q/69NNPJUnbtm2TJM2fP19ZWVnW/fPNmDFDU6ZM0auvvqqdO3eqZ8+e6t27t/bv3+9RN3bsWI0aNUppaWlq0qSJHnjgAZ05c6YingoAV8oAQDU1YMAA4+XlZfz8/Dxu48ePN8YYI8k899xzVv2mTZuMJPPGG29YY2+99ZapVauWdb9Dhw5m8ODBHo/Tt29fc/fdd1v3JZlly5Z51LzwwgumZcuW1v2IiAgzYcIEj5qbb77ZDBs2zBhjTGZmppFk/va3v1nL9+zZYySZ9PT0n/lMALgaOKcJQLV2++23a/bs2R5jQUFB1v/feOON1v+HhoZKkmJjYz3GfvzxR+Xn5yswMFDp6ekaMmSIx/Y6duyoGTNm2O4pPz9fR48eVceOHUtt54svvvAYO7e/8PBwSVJOTo6aNm1q+/EAXB2EJgDVmp+fnxo3bnzR5ef+xXSHw3HRsZKSklJjZxljSo3ZYWc7l+sFQNXBOU0AcI6YmBht2LDBY2zjxo2KiYmx7tesWVPFxcUX3UZgYKAiIiIuux0A1QtHmgBUawUFBcrOzvYY8/b2Vp06dcq0vaefflr9+vXTTTfdpK5du+rDDz/Ue++9p9WrV1s1DRo00Jo1a9SxY0c5nU7r23rnb+eFF17QDTfcoFatWmn+/PlKS0vT4sWLy9QXgMpHaAJQraWkpFjnAp0VHR2tL7/8skzb69Onj2bMmKFXXnlFiYmJatiwoebPn68uXbpYNVOmTNGIESM0b948/epXv9LBgwdLbScxMVH5+fkaOXKkcnJy1KxZM33wwQeKiooqU18AKp/DGGMquwkAAICqjnOaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALDh/wNHGcb1uChEfwAAAABJRU5ErkJggg==\n",
|
178 |
+
"text/plain": [
|
179 |
+
"<Figure size 640x480 with 1 Axes>"
|
180 |
+
]
|
181 |
+
},
|
182 |
+
"metadata": {},
|
183 |
+
"output_type": "display_data"
|
184 |
+
}
|
185 |
+
],
|
186 |
+
"source": [
|
187 |
+
"# Plot\n",
|
188 |
+
"sns.countplot(x='Emotion',data=df)"
|
189 |
+
]
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"cell_type": "code",
|
193 |
+
"execution_count": 6,
|
194 |
+
"id": "40f991d0-952f-40c1-bf00-f3476ce0436d",
|
195 |
+
"metadata": {
|
196 |
+
"jupyter": {
|
197 |
+
"outputs_hidden": true
|
198 |
+
},
|
199 |
+
"scrolled": false,
|
200 |
+
"tags": []
|
201 |
+
},
|
202 |
+
"outputs": [
|
203 |
+
{
|
204 |
+
"data": {
|
205 |
+
"text/plain": [
|
206 |
+
"['BTC_ADDRESS_REGEX',\n",
|
207 |
+
" 'CURRENCY_REGEX',\n",
|
208 |
+
" 'CURRENCY_SYMB_REGEX',\n",
|
209 |
+
" 'Counter',\n",
|
210 |
+
" 'DATE_REGEX',\n",
|
211 |
+
" 'EMAIL_REGEX',\n",
|
212 |
+
" 'EMOJI_REGEX',\n",
|
213 |
+
" 'HASTAG_REGEX',\n",
|
214 |
+
" 'MASTERCard_REGEX',\n",
|
215 |
+
" 'MD5_SHA_REGEX',\n",
|
216 |
+
" 'MOST_COMMON_PUNCT_REGEX',\n",
|
217 |
+
" 'NUMBERS_REGEX',\n",
|
218 |
+
" 'PHONE_REGEX',\n",
|
219 |
+
" 'PoBOX_REGEX',\n",
|
220 |
+
" 'SPECIAL_CHARACTERS_REGEX',\n",
|
221 |
+
" 'STOPWORDS',\n",
|
222 |
+
" 'STOPWORDS_de',\n",
|
223 |
+
" 'STOPWORDS_en',\n",
|
224 |
+
" 'STOPWORDS_es',\n",
|
225 |
+
" 'STOPWORDS_fr',\n",
|
226 |
+
" 'STOPWORDS_ru',\n",
|
227 |
+
" 'STOPWORDS_yo',\n",
|
228 |
+
" 'STREET_ADDRESS_REGEX',\n",
|
229 |
+
" 'TextFrame',\n",
|
230 |
+
" 'URL_PATTERN',\n",
|
231 |
+
" 'USER_HANDLES_REGEX',\n",
|
232 |
+
" 'VISACard_REGEX',\n",
|
233 |
+
" '__builtins__',\n",
|
234 |
+
" '__cached__',\n",
|
235 |
+
" '__doc__',\n",
|
236 |
+
" '__file__',\n",
|
237 |
+
" '__generate_text',\n",
|
238 |
+
" '__loader__',\n",
|
239 |
+
" '__name__',\n",
|
240 |
+
" '__numbers_dict',\n",
|
241 |
+
" '__package__',\n",
|
242 |
+
" '__spec__',\n",
|
243 |
+
" '_lex_richness_herdan',\n",
|
244 |
+
" '_lex_richness_maas_ttr',\n",
|
245 |
+
" 'clean_text',\n",
|
246 |
+
" 'defaultdict',\n",
|
247 |
+
" 'digit2words',\n",
|
248 |
+
" 'extract_btc_address',\n",
|
249 |
+
" 'extract_currencies',\n",
|
250 |
+
" 'extract_currency_symbols',\n",
|
251 |
+
" 'extract_dates',\n",
|
252 |
+
" 'extract_emails',\n",
|
253 |
+
" 'extract_emojis',\n",
|
254 |
+
" 'extract_hashtags',\n",
|
255 |
+
" 'extract_html_tags',\n",
|
256 |
+
" 'extract_mastercard_addr',\n",
|
257 |
+
" 'extract_md5sha',\n",
|
258 |
+
" 'extract_numbers',\n",
|
259 |
+
" 'extract_pattern',\n",
|
260 |
+
" 'extract_phone_numbers',\n",
|
261 |
+
" 'extract_postoffice_box',\n",
|
262 |
+
" 'extract_shortwords',\n",
|
263 |
+
" 'extract_special_characters',\n",
|
264 |
+
" 'extract_stopwords',\n",
|
265 |
+
" 'extract_street_address',\n",
|
266 |
+
" 'extract_terms_in_bracket',\n",
|
267 |
+
" 'extract_urls',\n",
|
268 |
+
" 'extract_userhandles',\n",
|
269 |
+
" 'extract_visacard_addr',\n",
|
270 |
+
" 'fix_contractions',\n",
|
271 |
+
" 'generate_sentence',\n",
|
272 |
+
" 'hamming_distance',\n",
|
273 |
+
" 'inverse_df',\n",
|
274 |
+
" 'lexical_richness',\n",
|
275 |
+
" 'markov_chain',\n",
|
276 |
+
" 'math',\n",
|
277 |
+
" 'nlargest',\n",
|
278 |
+
" 'normalize',\n",
|
279 |
+
" 'num2words',\n",
|
280 |
+
" 'random',\n",
|
281 |
+
" 're',\n",
|
282 |
+
" 'read_txt',\n",
|
283 |
+
" 'remove_accents',\n",
|
284 |
+
" 'remove_bad_quotes',\n",
|
285 |
+
" 'remove_btc_address',\n",
|
286 |
+
" 'remove_currencies',\n",
|
287 |
+
" 'remove_currency_symbols',\n",
|
288 |
+
" 'remove_custom_pattern',\n",
|
289 |
+
" 'remove_custom_words',\n",
|
290 |
+
" 'remove_dates',\n",
|
291 |
+
" 'remove_emails',\n",
|
292 |
+
" 'remove_emojis',\n",
|
293 |
+
" 'remove_hashtags',\n",
|
294 |
+
" 'remove_html_tags',\n",
|
295 |
+
" 'remove_mastercard_addr',\n",
|
296 |
+
" 'remove_md5sha',\n",
|
297 |
+
" 'remove_multiple_spaces',\n",
|
298 |
+
" 'remove_non_ascii',\n",
|
299 |
+
" 'remove_numbers',\n",
|
300 |
+
" 'remove_phone_numbers',\n",
|
301 |
+
" 'remove_postoffice_box',\n",
|
302 |
+
" 'remove_puncts',\n",
|
303 |
+
" 'remove_punctuations',\n",
|
304 |
+
" 'remove_shortwords',\n",
|
305 |
+
" 'remove_special_characters',\n",
|
306 |
+
" 'remove_stopwords',\n",
|
307 |
+
" 'remove_street_address',\n",
|
308 |
+
" 'remove_terms_in_bracket',\n",
|
309 |
+
" 'remove_urls',\n",
|
310 |
+
" 'remove_userhandles',\n",
|
311 |
+
" 'remove_visacard_addr',\n",
|
312 |
+
" 'replace_bad_quotes',\n",
|
313 |
+
" 'replace_currencies',\n",
|
314 |
+
" 'replace_currency_symbols',\n",
|
315 |
+
" 'replace_dates',\n",
|
316 |
+
" 'replace_emails',\n",
|
317 |
+
" 'replace_emojis',\n",
|
318 |
+
" 'replace_numbers',\n",
|
319 |
+
" 'replace_phone_numbers',\n",
|
320 |
+
" 'replace_special_characters',\n",
|
321 |
+
" 'replace_term',\n",
|
322 |
+
" 'replace_urls',\n",
|
323 |
+
" 'string',\n",
|
324 |
+
" 'term_freq',\n",
|
325 |
+
" 'to_txt',\n",
|
326 |
+
" 'unicodedata',\n",
|
327 |
+
" 'word_freq',\n",
|
328 |
+
" 'word_length_freq']"
|
329 |
+
]
|
330 |
+
},
|
331 |
+
"execution_count": 6,
|
332 |
+
"metadata": {},
|
333 |
+
"output_type": "execute_result"
|
334 |
+
}
|
335 |
+
],
|
336 |
+
"source": [
|
337 |
+
"# Data Cleaning\n",
|
338 |
+
"dir(nfx)"
|
339 |
+
]
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"cell_type": "code",
|
343 |
+
"execution_count": 7,
|
344 |
+
"id": "b1f87847-a91c-4bd6-a307-d746eb5aa9a0",
|
345 |
+
"metadata": {},
|
346 |
+
"outputs": [],
|
347 |
+
"source": [
|
348 |
+
"# User handles\n",
|
349 |
+
"df['Clean_Text'] = df['Text'].apply(nfx.remove_userhandles)"
|
350 |
+
]
|
351 |
+
},
|
352 |
+
{
|
353 |
+
"cell_type": "code",
|
354 |
+
"execution_count": 8,
|
355 |
+
"id": "03886bc3-1ac4-4f1b-842b-e5d2d770ff81",
|
356 |
+
"metadata": {},
|
357 |
+
"outputs": [],
|
358 |
+
"source": [
|
359 |
+
"# Stopwords\n",
|
360 |
+
"df['Clean_Text'] = df['Clean_Text'].apply(nfx.remove_stopwords)"
|
361 |
+
]
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"cell_type": "markdown",
|
365 |
+
"id": "0ffcf4c7",
|
366 |
+
"metadata": {},
|
367 |
+
"source": [
|
368 |
+
"## We are not removing Special Characters as some of the rows have just Special Characters and it'll result into empty row."
|
369 |
+
]
|
370 |
+
},
|
371 |
+
{
|
372 |
+
"cell_type": "code",
|
373 |
+
"execution_count": 9,
|
374 |
+
"id": "0a0fcc0c-4adf-4f0b-b226-164659ad70ba",
|
375 |
+
"metadata": {
|
376 |
+
"jupyter": {
|
377 |
+
"outputs_hidden": true
|
378 |
+
},
|
379 |
+
"tags": []
|
380 |
+
},
|
381 |
+
"outputs": [
|
382 |
+
{
|
383 |
+
"data": {
|
384 |
+
"text/html": [
|
385 |
+
"<div>\n",
|
386 |
+
"<style scoped>\n",
|
387 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
388 |
+
" vertical-align: middle;\n",
|
389 |
+
" }\n",
|
390 |
+
"\n",
|
391 |
+
" .dataframe tbody tr th {\n",
|
392 |
+
" vertical-align: top;\n",
|
393 |
+
" }\n",
|
394 |
+
"\n",
|
395 |
+
" .dataframe thead th {\n",
|
396 |
+
" text-align: right;\n",
|
397 |
+
" }\n",
|
398 |
+
"</style>\n",
|
399 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
400 |
+
" <thead>\n",
|
401 |
+
" <tr style=\"text-align: right;\">\n",
|
402 |
+
" <th></th>\n",
|
403 |
+
" <th>Emotion</th>\n",
|
404 |
+
" <th>Text</th>\n",
|
405 |
+
" <th>Clean_Text</th>\n",
|
406 |
+
" </tr>\n",
|
407 |
+
" </thead>\n",
|
408 |
+
" <tbody>\n",
|
409 |
+
" <tr>\n",
|
410 |
+
" <th>0</th>\n",
|
411 |
+
" <td>neutral</td>\n",
|
412 |
+
" <td>Why ?</td>\n",
|
413 |
+
" <td>?</td>\n",
|
414 |
+
" </tr>\n",
|
415 |
+
" <tr>\n",
|
416 |
+
" <th>1</th>\n",
|
417 |
+
" <td>joy</td>\n",
|
418 |
+
" <td>Sage Act upgrade on my to do list for tommorow.</td>\n",
|
419 |
+
" <td>Sage Act upgrade list tommorow.</td>\n",
|
420 |
+
" </tr>\n",
|
421 |
+
" <tr>\n",
|
422 |
+
" <th>2</th>\n",
|
423 |
+
" <td>sadness</td>\n",
|
424 |
+
" <td>ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...</td>\n",
|
425 |
+
" <td>WAY HOMEGIRL BABY FUNERAL!!! MAN HATE FUNERALS...</td>\n",
|
426 |
+
" </tr>\n",
|
427 |
+
" <tr>\n",
|
428 |
+
" <th>3</th>\n",
|
429 |
+
" <td>joy</td>\n",
|
430 |
+
" <td>Such an eye ! The true hazel eye-and so brill...</td>\n",
|
431 |
+
" <td>eye ! true hazel eye-and brilliant ! Regular f...</td>\n",
|
432 |
+
" </tr>\n",
|
433 |
+
" <tr>\n",
|
434 |
+
" <th>4</th>\n",
|
435 |
+
" <td>joy</td>\n",
|
436 |
+
" <td>@Iluvmiasantos ugh babe.. hugggzzz for u .! b...</td>\n",
|
437 |
+
" <td>ugh babe.. hugggzzz u .! babe naamazed nga ako...</td>\n",
|
438 |
+
" </tr>\n",
|
439 |
+
" <tr>\n",
|
440 |
+
" <th>...</th>\n",
|
441 |
+
" <td>...</td>\n",
|
442 |
+
" <td>...</td>\n",
|
443 |
+
" <td>...</td>\n",
|
444 |
+
" </tr>\n",
|
445 |
+
" <tr>\n",
|
446 |
+
" <th>34787</th>\n",
|
447 |
+
" <td>surprise</td>\n",
|
448 |
+
" <td>@MichelGW have you gift! Hope you like it! It'...</td>\n",
|
449 |
+
" <td>gift! Hope like it! hand wear ! It'll warm! Lol</td>\n",
|
450 |
+
" </tr>\n",
|
451 |
+
" <tr>\n",
|
452 |
+
" <th>34788</th>\n",
|
453 |
+
" <td>joy</td>\n",
|
454 |
+
" <td>The world didnt give it to me..so the world MO...</td>\n",
|
455 |
+
" <td>world didnt me..so world DEFINITELY cnt away!!!</td>\n",
|
456 |
+
" </tr>\n",
|
457 |
+
" <tr>\n",
|
458 |
+
" <th>34789</th>\n",
|
459 |
+
" <td>anger</td>\n",
|
460 |
+
" <td>A man robbed me today .</td>\n",
|
461 |
+
" <td>man robbed today .</td>\n",
|
462 |
+
" </tr>\n",
|
463 |
+
" <tr>\n",
|
464 |
+
" <th>34790</th>\n",
|
465 |
+
" <td>fear</td>\n",
|
466 |
+
" <td>Youu call it JEALOUSY, I call it of #Losing YO...</td>\n",
|
467 |
+
" <td>Youu JEALOUSY, #Losing YOU...</td>\n",
|
468 |
+
" </tr>\n",
|
469 |
+
" <tr>\n",
|
470 |
+
" <th>34791</th>\n",
|
471 |
+
" <td>sadness</td>\n",
|
472 |
+
" <td>I think about you baby, and I dream about you ...</td>\n",
|
473 |
+
" <td>think baby, dream time</td>\n",
|
474 |
+
" </tr>\n",
|
475 |
+
" </tbody>\n",
|
476 |
+
"</table>\n",
|
477 |
+
"<p>34792 rows × 3 columns</p>\n",
|
478 |
+
"</div>"
|
479 |
+
],
|
480 |
+
"text/plain": [
|
481 |
+
" Emotion Text \\\n",
|
482 |
+
"0 neutral Why ? \n",
|
483 |
+
"1 joy Sage Act upgrade on my to do list for tommorow. \n",
|
484 |
+
"2 sadness ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ... \n",
|
485 |
+
"3 joy Such an eye ! The true hazel eye-and so brill... \n",
|
486 |
+
"4 joy @Iluvmiasantos ugh babe.. hugggzzz for u .! b... \n",
|
487 |
+
"... ... ... \n",
|
488 |
+
"34787 surprise @MichelGW have you gift! Hope you like it! It'... \n",
|
489 |
+
"34788 joy The world didnt give it to me..so the world MO... \n",
|
490 |
+
"34789 anger A man robbed me today . \n",
|
491 |
+
"34790 fear Youu call it JEALOUSY, I call it of #Losing YO... \n",
|
492 |
+
"34791 sadness I think about you baby, and I dream about you ... \n",
|
493 |
+
"\n",
|
494 |
+
" Clean_Text \n",
|
495 |
+
"0 ? \n",
|
496 |
+
"1 Sage Act upgrade list tommorow. \n",
|
497 |
+
"2 WAY HOMEGIRL BABY FUNERAL!!! MAN HATE FUNERALS... \n",
|
498 |
+
"3 eye ! true hazel eye-and brilliant ! Regular f... \n",
|
499 |
+
"4 ugh babe.. hugggzzz u .! babe naamazed nga ako... \n",
|
500 |
+
"... ... \n",
|
501 |
+
"34787 gift! Hope like it! hand wear ! It'll warm! Lol \n",
|
502 |
+
"34788 world didnt me..so world DEFINITELY cnt away!!! \n",
|
503 |
+
"34789 man robbed today . \n",
|
504 |
+
"34790 Youu JEALOUSY, #Losing YOU... \n",
|
505 |
+
"34791 think baby, dream time \n",
|
506 |
+
"\n",
|
507 |
+
"[34792 rows x 3 columns]"
|
508 |
+
]
|
509 |
+
},
|
510 |
+
"execution_count": 9,
|
511 |
+
"metadata": {},
|
512 |
+
"output_type": "execute_result"
|
513 |
+
}
|
514 |
+
],
|
515 |
+
"source": [
|
516 |
+
"df"
|
517 |
+
]
|
518 |
+
},
|
519 |
+
{
|
520 |
+
"cell_type": "code",
|
521 |
+
"execution_count": 10,
|
522 |
+
"id": "450c39c0-79dd-4eaf-85fe-57e344eb81bd",
|
523 |
+
"metadata": {},
|
524 |
+
"outputs": [],
|
525 |
+
"source": [
|
526 |
+
"# Features & Labels\n",
|
527 |
+
"Xfeatures = df['Clean_Text']\n",
|
528 |
+
"ylabels = df['Emotion']"
|
529 |
+
]
|
530 |
+
},
|
531 |
+
{
|
532 |
+
"cell_type": "markdown",
|
533 |
+
"id": "edde3d4b",
|
534 |
+
"metadata": {},
|
535 |
+
"source": [
|
536 |
+
"# It is advisable to split before applying pipelines because it prevents data leakage."
|
537 |
+
]
|
538 |
+
},
|
539 |
+
{
|
540 |
+
"cell_type": "code",
|
541 |
+
"execution_count": 11,
|
542 |
+
"id": "27d7f976-c28f-449e-ae1a-53a42bbda4e8",
|
543 |
+
"metadata": {},
|
544 |
+
"outputs": [],
|
545 |
+
"source": [
|
546 |
+
"# Split Data\n",
|
547 |
+
"x_train,x_test,y_train,y_test = train_test_split(Xfeatures,ylabels,test_size=0.3,random_state=42)"
|
548 |
+
]
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"cell_type": "code",
|
552 |
+
"execution_count": 12,
|
553 |
+
"id": "2f086f29-dba9-40d2-a9dd-f06a6cca3a4c",
|
554 |
+
"metadata": {},
|
555 |
+
"outputs": [],
|
556 |
+
"source": [
|
557 |
+
"# Build Pipeline\n",
|
558 |
+
"from sklearn.pipeline import Pipeline"
|
559 |
+
]
|
560 |
+
},
|
561 |
+
{
|
562 |
+
"cell_type": "code",
|
563 |
+
"execution_count": 13,
|
564 |
+
"id": "6b81cc86-2bef-40c2-b9a3-668caaadaff0",
|
565 |
+
"metadata": {},
|
566 |
+
"outputs": [],
|
567 |
+
"source": [
|
568 |
+
"# LogisticRegression Pipeline\n",
|
569 |
+
"pipe_lr = Pipeline(steps=[('cv',CountVectorizer()),('lr',LogisticRegression())])"
|
570 |
+
]
|
571 |
+
},
|
572 |
+
{
|
573 |
+
"cell_type": "code",
|
574 |
+
"execution_count": 14,
|
575 |
+
"id": "dc64b9a7-efe2-4bc4-a0e7-46dff1d52b31",
|
576 |
+
"metadata": {
|
577 |
+
"jupyter": {
|
578 |
+
"outputs_hidden": true
|
579 |
+
},
|
580 |
+
"scrolled": false,
|
581 |
+
"tags": []
|
582 |
+
},
|
583 |
+
"outputs": [
|
584 |
+
{
|
585 |
+
"name": "stderr",
|
586 |
+
"output_type": "stream",
|
587 |
+
"text": [
|
588 |
+
"C:\\Users\\Sanket\\anaconda3\\envs\\nlp\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
|
589 |
+
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
|
590 |
+
"\n",
|
591 |
+
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
|
592 |
+
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
|
593 |
+
"Please also refer to the documentation for alternative solver options:\n",
|
594 |
+
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
|
595 |
+
" n_iter_i = _check_optimize_result(\n"
|
596 |
+
]
|
597 |
+
},
|
598 |
+
{
|
599 |
+
"data": {
|
600 |
+
"text/html": [
|
601 |
+
"<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">CountVectorizer</label><div class=\"sk-toggleable__content\"><pre>CountVectorizer()</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression()</pre></div></div></div></div></div></div></div>"
|
602 |
+
],
|
603 |
+
"text/plain": [
|
604 |
+
"Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])"
|
605 |
+
]
|
606 |
+
},
|
607 |
+
"execution_count": 14,
|
608 |
+
"metadata": {},
|
609 |
+
"output_type": "execute_result"
|
610 |
+
}
|
611 |
+
],
|
612 |
+
"source": [
|
613 |
+
"# Train and Fit Data\n",
|
614 |
+
"pipe_lr.fit(x_train,y_train)"
|
615 |
+
]
|
616 |
+
},
|
617 |
+
{
|
618 |
+
"cell_type": "code",
|
619 |
+
"execution_count": 15,
|
620 |
+
"id": "135ed6f8-56ff-4d53-85e3-541e3a7ae2d7",
|
621 |
+
"metadata": {},
|
622 |
+
"outputs": [
|
623 |
+
{
|
624 |
+
"data": {
|
625 |
+
"text/html": [
|
626 |
+
"<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">CountVectorizer</label><div class=\"sk-toggleable__content\"><pre>CountVectorizer()</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression()</pre></div></div></div></div></div></div></div>"
|
627 |
+
],
|
628 |
+
"text/plain": [
|
629 |
+
"Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])"
|
630 |
+
]
|
631 |
+
},
|
632 |
+
"execution_count": 15,
|
633 |
+
"metadata": {},
|
634 |
+
"output_type": "execute_result"
|
635 |
+
}
|
636 |
+
],
|
637 |
+
"source": [
|
638 |
+
"pipe_lr"
|
639 |
+
]
|
640 |
+
},
|
641 |
+
{
|
642 |
+
"cell_type": "code",
|
643 |
+
"execution_count": 16,
|
644 |
+
"id": "28396371-5f5c-4a3b-b974-164e047764f3",
|
645 |
+
"metadata": {},
|
646 |
+
"outputs": [
|
647 |
+
{
|
648 |
+
"data": {
|
649 |
+
"text/plain": [
|
650 |
+
"0.619946349875455"
|
651 |
+
]
|
652 |
+
},
|
653 |
+
"execution_count": 16,
|
654 |
+
"metadata": {},
|
655 |
+
"output_type": "execute_result"
|
656 |
+
}
|
657 |
+
],
|
658 |
+
"source": [
|
659 |
+
"# Check Accuracy\n",
|
660 |
+
"pipe_lr.score(x_test,y_test)"
|
661 |
+
]
|
662 |
+
},
|
663 |
+
{
|
664 |
+
"cell_type": "code",
|
665 |
+
"execution_count": 17,
|
666 |
+
"id": "eb3a26b6-d09e-422f-991b-b08c48f55b75",
|
667 |
+
"metadata": {},
|
668 |
+
"outputs": [],
|
669 |
+
"source": [
|
670 |
+
"# Make A Prediction\n",
|
671 |
+
"ex1 = \"This book was so interesting it made me happy\""
|
672 |
+
]
|
673 |
+
},
|
674 |
+
{
|
675 |
+
"cell_type": "code",
|
676 |
+
"execution_count": 18,
|
677 |
+
"id": "b08597d9-6f59-45cb-a648-95b0da1ce313",
|
678 |
+
"metadata": {},
|
679 |
+
"outputs": [
|
680 |
+
{
|
681 |
+
"data": {
|
682 |
+
"text/plain": [
|
683 |
+
"array(['joy'], dtype=object)"
|
684 |
+
]
|
685 |
+
},
|
686 |
+
"execution_count": 18,
|
687 |
+
"metadata": {},
|
688 |
+
"output_type": "execute_result"
|
689 |
+
}
|
690 |
+
],
|
691 |
+
"source": [
|
692 |
+
"pipe_lr.predict([ex1])"
|
693 |
+
]
|
694 |
+
},
|
695 |
+
{
|
696 |
+
"cell_type": "code",
|
697 |
+
"execution_count": 19,
|
698 |
+
"id": "5b3822ac-17fc-43dd-9bb7-8dad07a4d32c",
|
699 |
+
"metadata": {},
|
700 |
+
"outputs": [
|
701 |
+
{
|
702 |
+
"data": {
|
703 |
+
"text/plain": [
|
704 |
+
"array([[1.60505334e-03, 7.06448086e-03, 6.95652453e-03, 9.43810868e-01,\n",
|
705 |
+
" 1.00440585e-04, 2.63232385e-02, 6.63277122e-05, 1.40730665e-02]])"
|
706 |
+
]
|
707 |
+
},
|
708 |
+
"execution_count": 19,
|
709 |
+
"metadata": {},
|
710 |
+
"output_type": "execute_result"
|
711 |
+
}
|
712 |
+
],
|
713 |
+
"source": [
|
714 |
+
"# Prediction Prob\n",
|
715 |
+
"pipe_lr.predict_proba([ex1])"
|
716 |
+
]
|
717 |
+
},
|
718 |
+
{
|
719 |
+
"cell_type": "code",
|
720 |
+
"execution_count": 20,
|
721 |
+
"id": "5b7c4596-d643-48e5-a777-79a6f55c49da",
|
722 |
+
"metadata": {},
|
723 |
+
"outputs": [
|
724 |
+
{
|
725 |
+
"data": {
|
726 |
+
"text/plain": [
|
727 |
+
"array(['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'shame',\n",
|
728 |
+
" 'surprise'], dtype=object)"
|
729 |
+
]
|
730 |
+
},
|
731 |
+
"execution_count": 20,
|
732 |
+
"metadata": {},
|
733 |
+
"output_type": "execute_result"
|
734 |
+
}
|
735 |
+
],
|
736 |
+
"source": [
|
737 |
+
"# To Know the classes\n",
|
738 |
+
"pipe_lr.classes_"
|
739 |
+
]
|
740 |
+
},
|
741 |
+
{
|
742 |
+
"cell_type": "code",
|
743 |
+
"execution_count": 21,
|
744 |
+
"id": "c0d40f62-b1fd-4748-a279-c8f50c748f26",
|
745 |
+
"metadata": {},
|
746 |
+
"outputs": [],
|
747 |
+
"source": [
|
748 |
+
"# Save Model & Pipeline\n",
|
749 |
+
"import joblib\n",
|
750 |
+
"pipeline_file = open(\"../models/emotion_classifier_pipe_lr.pkl\",\"wb\")\n",
|
751 |
+
"joblib.dump(pipe_lr,pipeline_file)\n",
|
752 |
+
"pipeline_file.close()"
|
753 |
+
]
|
754 |
+
},
|
755 |
+
{
|
756 |
+
"cell_type": "code",
|
757 |
+
"execution_count": null,
|
758 |
+
"id": "377c4e98-67f0-45e5-8dd5-0417585754f0",
|
759 |
+
"metadata": {},
|
760 |
+
"outputs": [],
|
761 |
+
"source": []
|
762 |
+
}
|
763 |
+
],
|
764 |
+
"metadata": {
|
765 |
+
"kernelspec": {
|
766 |
+
"display_name": "Python 3 (ipykernel)",
|
767 |
+
"language": "python",
|
768 |
+
"name": "python3"
|
769 |
+
},
|
770 |
+
"language_info": {
|
771 |
+
"codemirror_mode": {
|
772 |
+
"name": "ipython",
|
773 |
+
"version": 3
|
774 |
+
},
|
775 |
+
"file_extension": ".py",
|
776 |
+
"mimetype": "text/x-python",
|
777 |
+
"name": "python",
|
778 |
+
"nbconvert_exporter": "python",
|
779 |
+
"pygments_lexer": "ipython3",
|
780 |
+
"version": "3.10.9"
|
781 |
+
}
|
782 |
+
},
|
783 |
+
"nbformat": 4,
|
784 |
+
"nbformat_minor": 5
|
785 |
+
}
|