ruba2ksa commited on
Commit
10c9523
1 Parent(s): f1e37ca

Upload Emotion Detection in Text.ipynb

Browse files
Files changed (1) hide show
  1. Emotion Detection in Text.ipynb +785 -0
Emotion Detection in Text.ipynb ADDED
@@ -0,0 +1,785 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "56cccab6",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Emotions Detection in Text"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 1,
14
+ "id": "f0814628-3d83-4fd6-a511-2eccf79f9f1e",
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "# EDA\n",
19
+ "import pandas as pd\n",
20
+ "import numpy as np\n",
21
+ "\n",
22
+ "# Load Data Viz Pkgs\n",
23
+ "import seaborn as sns\n",
24
+ "\n",
25
+ "# Load Text Cleaning Pkgs\n",
26
+ "import neattext.functions as nfx\n",
27
+ "\n",
28
+ "# Load ML Pkgs\n",
29
+ "# Estimators\n",
30
+ "from sklearn.linear_model import LogisticRegression\n",
31
+ "from sklearn.naive_bayes import MultinomialNB\n",
32
+ "\n",
33
+ "# Transformers\n",
34
+ "from sklearn.feature_extraction.text import CountVectorizer\n",
35
+ "from sklearn.model_selection import train_test_split\n",
36
+ "from sklearn.metrics import accuracy_score,classification_report,confusion_matrix"
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": 2,
42
+ "id": "b209e004-ab77-4407-8689-b4318944d47f",
43
+ "metadata": {},
44
+ "outputs": [],
45
+ "source": [
46
+ "# Load Dataset\n",
47
+ "df = pd.read_csv(\"../data/emotion_dataset_raw.csv\")"
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": 3,
53
+ "id": "fea2d4c0-3bdd-405e-ab69-507ceaac36cb",
54
+ "metadata": {},
55
+ "outputs": [
56
+ {
57
+ "data": {
58
+ "text/html": [
59
+ "<div>\n",
60
+ "<style scoped>\n",
61
+ " .dataframe tbody tr th:only-of-type {\n",
62
+ " vertical-align: middle;\n",
63
+ " }\n",
64
+ "\n",
65
+ " .dataframe tbody tr th {\n",
66
+ " vertical-align: top;\n",
67
+ " }\n",
68
+ "\n",
69
+ " .dataframe thead th {\n",
70
+ " text-align: right;\n",
71
+ " }\n",
72
+ "</style>\n",
73
+ "<table border=\"1\" class=\"dataframe\">\n",
74
+ " <thead>\n",
75
+ " <tr style=\"text-align: right;\">\n",
76
+ " <th></th>\n",
77
+ " <th>Emotion</th>\n",
78
+ " <th>Text</th>\n",
79
+ " </tr>\n",
80
+ " </thead>\n",
81
+ " <tbody>\n",
82
+ " <tr>\n",
83
+ " <th>0</th>\n",
84
+ " <td>neutral</td>\n",
85
+ " <td>Why ?</td>\n",
86
+ " </tr>\n",
87
+ " <tr>\n",
88
+ " <th>1</th>\n",
89
+ " <td>joy</td>\n",
90
+ " <td>Sage Act upgrade on my to do list for tommorow.</td>\n",
91
+ " </tr>\n",
92
+ " <tr>\n",
93
+ " <th>2</th>\n",
94
+ " <td>sadness</td>\n",
95
+ " <td>ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...</td>\n",
96
+ " </tr>\n",
97
+ " <tr>\n",
98
+ " <th>3</th>\n",
99
+ " <td>joy</td>\n",
100
+ " <td>Such an eye ! The true hazel eye-and so brill...</td>\n",
101
+ " </tr>\n",
102
+ " <tr>\n",
103
+ " <th>4</th>\n",
104
+ " <td>joy</td>\n",
105
+ " <td>@Iluvmiasantos ugh babe.. hugggzzz for u .! b...</td>\n",
106
+ " </tr>\n",
107
+ " </tbody>\n",
108
+ "</table>\n",
109
+ "</div>"
110
+ ],
111
+ "text/plain": [
112
+ " Emotion Text\n",
113
+ "0 neutral Why ? \n",
114
+ "1 joy Sage Act upgrade on my to do list for tommorow.\n",
115
+ "2 sadness ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...\n",
116
+ "3 joy Such an eye ! The true hazel eye-and so brill...\n",
117
+ "4 joy @Iluvmiasantos ugh babe.. hugggzzz for u .! b..."
118
+ ]
119
+ },
120
+ "execution_count": 3,
121
+ "metadata": {},
122
+ "output_type": "execute_result"
123
+ }
124
+ ],
125
+ "source": [
126
+ "df.head()"
127
+ ]
128
+ },
129
+ {
130
+ "cell_type": "code",
131
+ "execution_count": 4,
132
+ "id": "430565a3-cf3b-4c6f-afa5-bafd084f5676",
133
+ "metadata": {},
134
+ "outputs": [
135
+ {
136
+ "data": {
137
+ "text/plain": [
138
+ "joy 11045\n",
139
+ "sadness 6722\n",
140
+ "fear 5410\n",
141
+ "anger 4297\n",
142
+ "surprise 4062\n",
143
+ "neutral 2254\n",
144
+ "disgust 856\n",
145
+ "shame 146\n",
146
+ "Name: Emotion, dtype: int64"
147
+ ]
148
+ },
149
+ "execution_count": 4,
150
+ "metadata": {},
151
+ "output_type": "execute_result"
152
+ }
153
+ ],
154
+ "source": [
155
+ "# Value Counts\n",
156
+ "df['Emotion'].value_counts()"
157
+ ]
158
+ },
159
+ {
160
+ "cell_type": "code",
161
+ "execution_count": 5,
162
+ "id": "531d3449-a959-4a19-bff0-3ffed551e619",
163
+ "metadata": {},
164
+ "outputs": [
165
+ {
166
+ "data": {
167
+ "text/plain": [
168
+ "<Axes: xlabel='Emotion', ylabel='count'>"
169
+ ]
170
+ },
171
+ "execution_count": 5,
172
+ "metadata": {},
173
+ "output_type": "execute_result"
174
+ },
175
+ {
176
+ "data": {
177
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAGwCAYAAAC0HlECAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA9IUlEQVR4nO3de1yUdf7//+cIMg6nSVBOK3lIRFRS01K01PLUAc12V9tsSTdTy5JIzXItIzd1szyttqZuq62H7LuVbQdlPZSu5jGUPBGaYeoKYS2CmgHC+/dHH6+fIx6uEATscb/d5lbzvl7XNa/3cA3z9JprLhzGGCMAAABcUo3KbgAAAKA6IDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAG7wru4FrSUlJiY4ePaqAgAA5HI7KbgcAANhgjNGJEycUERGhGjUufjyJ0FSOjh49qsjIyMpuAwAAlMHhw4dVr169iy4nNJWjgIAAST896YGBgZXcDQAAsCM/P1+RkZHW+/jFEJrK0dmP5AIDAwlNAABUM5c7tYYTwQEAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAG7wruwFc2w6Nj63sFq7Y9eN2VXYLAIAqgCNNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsKFSQ9N//vMf9erVSxEREXI4HHr//fc9lhtjlJycrIiICLlcLnXp0kV79uzxqCkoKNDw4cNVp04d+fn5qXfv3jpy5IhHTW5urhISEuR2u+V2u5WQkKDjx4971Bw6dEi9evWSn5+f6tSpo8TERBUWFlbEtAEAQDVUqaHp1KlTatmypWbNmnXB5ZMnT9bUqVM1a9Ysbdu2TWFhYerevbtOnDhh1SQlJWnZsmVaunSpNmzYoJMnTyo+Pl7FxcVWTf/+/ZWWlqaUlBSlpKQoLS1NCQkJ1vLi4mLdc889OnXqlDZs2KClS5fq3Xff1ciRIytu8gAAoFpxGGNMZTchSQ6HQ8uWLVOfPn0k/XSUKSIiQklJSXrmmWck/XRUKTQ0VC+//LKGDh2qvLw81a1bVwsXLtT9998vSTp69KgiIyO1fPly9ezZU+np6WrWrJk2b96sdu3aSZI2b96suLg4ffnll4qOjtaKFSsUHx+vw4cPKyIiQpK0dOlSDRw4UDk5OQoMDLQ1h/z8fLndbuXl5dle51p3aHxsZbdwxa4ft6uyWwAAVCC7799V9pymzMxMZWdnq0ePHtaY0+lU586dtXHjRklSamqqioqKPGoiIiLUokULq2bTpk1yu91WYJKk9u3by+12e9S0aNHCCkyS1LNnTxUUFCg1NfWiPRYUFCg/P9/jBgAArk1VNjRlZ2dLkkJDQz3GQ0NDrWXZ2dny8fFR7dq1L1kTEhJSavshISEeNec/Tu3ateXj42PVXMikSZOs86TcbrciIyN/5iwBAEB1UWVD01kOh8PjvjGm1Nj5zq+5UH1Zas43ZswY5eXlWbfDhw9fsi8AAFB9VdnQFBYWJkmljvTk5ORYR4XCwsJUWFio3NzcS9Z8++23pbZ/7Ngxj5rzHyc3N1dFRUWljkCdy+l0KjAw0OMGAACuTVU2NDVs2FBhYWFatWqVNVZYWKh169apQ4cOkqQ2bdqoZs2aHjVZWVnavXu3VRMXF6e8vDxt3brVqtmyZYvy8vI8anbv3q2srCyrZuXKlXI6nWrTpk2FzhMAAFQP3pX54CdPntRXX31l3c/MzFRaWpqCgoJ0/fXXKykpSRMnTlRUVJSioqI0ceJE+fr6qn///pIkt9utQYMGaeTIkQoODlZQUJBGjRql2NhYdevWTZIUExOjO++8U4MHD9acOXMkSUOGDFF8fLyio6MlST169FCzZs2UkJCgV155Rf/73/80atQoDR48mKNHAABAUiWHps8//1y33367dX/EiBGSpAEDBmjBggUaPXq0Tp8+rWHDhik3N1ft2rXTypUrFRAQYK0zbdo0eXt7q1+/fjp9+rS6du2qBQsWyMvLy6pZvHixEhMTrW/Z9e7d2+PaUF5eXvr44481bNgwdezYUS6XS/3799err75a0U8BAACoJqrMdZquBVynqTSu0wQAqOqq/XWaAAAAqhJCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGyo0qHpzJkzeu6559SwYUO5XC41atRI48ePV0lJiVVjjFFycrIiIiLkcrnUpUsX7dmzx2M7BQUFGj58uOrUqSM/Pz/17t1bR44c8ajJzc1VQkKC3G633G63EhISdPz48asxTQAAUA1U6dD08ssv6/XXX9esWbOUnp6uyZMn65VXXtHMmTOtmsmTJ2vq1KmaNWuWtm3bprCwMHXv3l0nTpywapKSkrRs2TItXbpUGzZs0MmTJxUfH6/i4mKrpn///kpLS1NKSopSUlKUlpamhISEqzpfAABQdTmMMaaym7iY+Ph4hYaG6o033rDGfvOb38jX11cLFy6UMUYRERFKSkrSM888I+mno0qhoaF6+eWXNXToUOXl5alu3bpauHCh7r//fknS0aNHFRkZqeXLl6tnz55KT09Xs2bNtHnzZrVr106StHnzZsXFxenLL79UdHS0rX7z8/PldruVl5enwMDAcn42qqdD42Mru4Urdv24XZXdAgCgAtl9/67SR5puvfVWrVmzRvv27ZMkffHFF9qwYYPuvvtuSVJmZqays7PVo0cPax2n06nOnTtr48aNkqTU1FQVFRV51ERERKhFixZWzaZNm+R2u63AJEnt27eX2+22ai6koKBA+fn5HjcAAHBt8q7sBi7lmWeeUV5enpo2bSovLy8VFxdrwoQJeuCBByRJ2dnZkqTQ0FCP9UJDQ/XNN99YNT4+Pqpdu3apmrPrZ2dnKyQkpNTjh4SEWDUXMmnSJL344otlnyAAAKg2qvSRprfffluLFi3SkiVLtH37dr355pt69dVX9eabb3rUORwOj/vGmFJj5zu/5kL1l9vOmDFjlJeXZ90OHz5sZ1oAAKAaqtJHmp5++mk9++yz+t3vfidJio2N1TfffKNJkyZpwIABCgsLk/TTkaLw8HBrvZycHOvoU1hYmAoLC5Wbm+txtCknJ0cdOnSwar799ttSj3/s2LFSR7HO5XQ65XQ6r3yiAACgyqvSR5p++OEH1ajh2aKXl5d1yYGGDRsqLCxMq1atspYXFhZq3bp1ViBq06aNatas6VGTlZWl3bt3WzVxcXHKy8vT1q1brZotW7YoLy/PqgEAAL9sVfpIU69evTRhwgRdf/31at68uXbs2KGpU6fq4YcflvTTR2pJSUmaOHGioqKiFBUVpYkTJ8rX11f9+/eXJLndbg0aNEgjR45UcHCwgoKCNGrUKMXGxqpbt26SpJiYGN15550aPHiw5syZI0kaMmSI4uPjbX9zDgAAXNuqdGiaOXOmnn/+eQ0bNkw5OTmKiIjQ0KFDNW7cOKtm9OjROn36tIYNG6bc3Fy1a9dOK1euVEBAgFUzbdo0eXt7q1+/fjp9+rS6du2qBQsWyMvLy6pZvHixEhMTrW/Z9e7dW7Nmzbp6kwUAAFValb5OU3XDdZpK4zpNAICq7pq4ThMAAEBVQWgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2OBd2Q0A16KOMztWdgtX7LPhn1V2CwBQpXCkCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2lCk03XHHHTp+/Hip8fz8fN1xxx1X2hMAAECVU6bQtHbtWhUWFpYa//HHH7V+/forbgoAAKCq8f45xTt37rT+f+/evcrOzrbuFxcXKyUlRb/61a/KrzsAAIAq4meFplatWsnhcMjhcFzwYziXy6WZM2eWW3MAAABVxc8KTZmZmTLGqFGjRtq6davq1q1rLfPx8VFISIi8vLzKvUkAAIDK9rNCU/369SVJJSUlFdIMAABAVfWzQtO59u3bp7Vr1yonJ6dUiBo3btwVNwYAAFCVlOnbc/PmzVOzZs00btw4vfPOO1q2bJl1e//998u1wf/+97/6/e9/r+DgYPn6+qpVq1ZKTU21lhtjlJycrIiICLlcLnXp0kV79uzx2EZBQYGGDx+uOnXqyM/PT71799aRI0c8anJzc5WQkCC32y23262EhIQLXlYBAAD8MpUpNL300kuaMGGCsrOzlZaWph07dli37du3l1tzubm56tixo2rWrKkVK1Zo7969mjJliq677jqrZvLkyZo6dapmzZqlbdu2KSwsTN27d9eJEyesmqSkJC1btkxLly7Vhg0bdPLkScXHx6u4uNiq6d+/v9LS0pSSkqKUlBSlpaUpISGh3OYCAACqtzJ9PJebm6u+ffuWdy+lvPzyy4qMjNT8+fOtsQYNGlj/b4zR9OnTNXbsWP3617+WJL355psKDQ3VkiVLNHToUOXl5emNN97QwoUL1a1bN0nSokWLFBkZqdWrV6tnz55KT09XSkqKNm/erHbt2kn66WhaXFycMjIyFB0dXeFzBQAAVVuZjjT17dtXK1euLO9eSvnggw/Utm1b9e3bVyEhIWrdurXmzZtnLc/MzFR2drZ69OhhjTmdTnXu3FkbN26UJKWmpqqoqMijJiIiQi1atLBqNm3aJLfbbQUmSWrfvr3cbrdVcyEFBQXKz8/3uAEAgGtTmY40NW7cWM8//7w2b96s2NhY1axZ02N5YmJiuTT39ddfa/bs2RoxYoT++Mc/auvWrUpMTJTT6dRDDz1kXVwzNDTUY73Q0FB98803kqTs7Gz5+Piodu3apWrOrp+dna2QkJBSjx8SEuJxAc/zTZo0SS+++OIVzREAAFQPZQpNc+fOlb+/v9atW6d169Z5LHM4HOUWmkpKStS2bVtNnDhRktS6dWvt2bNHs2fP1kMPPeTxmOcyxpQaO9/5NReqv9x2xowZoxEjRlj38/PzFRkZeelJAQCAaqlMoSkzM7O8+7ig8PBwNWvWzGMsJiZG7777riQpLCxM0k9HisLDw62anJwc6+hTWFiYCgsLlZub63G0KScnRx06dLBqvv3221KPf+zYsVJHsc7ldDrldDrLODsAAFCdlOmcpqulY8eOysjI8Bjbt2+fdZHNhg0bKiwsTKtWrbKWFxYWat26dVYgatOmjWrWrOlRk5WVpd27d1s1cXFxysvL09atW62aLVu2KC8vz6oBAAC/bGU60vTwww9fcvnf//73MjVzvqeeekodOnTQxIkT1a9fP23dulVz587V3LlzJf30kVpSUpImTpyoqKgoRUVFaeLEifL19VX//v0lSW63W4MGDdLIkSMVHBysoKAgjRo1SrGxsda36WJiYnTnnXdq8ODBmjNnjiRpyJAhio+P55tzAABA0hVccuBcRUVF2r17t44fP37BP+RbVjfffLOWLVumMWPGaPz48WrYsKGmT5+uBx980KoZPXq0Tp8+rWHDhik3N1ft2rXTypUrFRAQYNVMmzZN3t7e6tevn06fPq2uXbtqwYIFHn8nb/HixUpMTLS+Zde7d2/NmjWr3OYCAACqN4cxxpTHhkpKSjRs2DA1atRIo0ePLo9NVjv5+flyu93Ky8tTYGBgZbdTJRwaH1vZLVyx68ft+tnrdJzZsQI6ubo+G/5ZZbcAAFeF3ffvcjunqUaNGnrqqac0bdq08tokAABAlVGuJ4IfOHBAZ86cKc9NAgAAVAllOqfp3GsTST9dzygrK0sff/yxBgwYUC6NAQAAVCVlCk07duzwuF+jRg3VrVtXU6ZMuew36wAAAKqjMoWmTz/9tLz7AAAAqNLKFJrOOnbsmDIyMuRwONSkSRPVrVu3vPoCAACoUsp0IvipU6f08MMPKzw8XJ06ddJtt92miIgIDRo0SD/88EN59wgAAFDpyhSaRowYoXXr1unDDz/U8ePHdfz4cf3rX//SunXrNHLkyPLuEQAAoNKV6eO5d999V++88466dOlijd19991yuVzq16+fZs+eXV79AQAAVAllOtL0ww8/KDQ0tNR4SEgIH88BAIBrUplCU1xcnF544QX9+OOP1tjp06f14osvKi4urtyaAwAAqCrK9PHc9OnTddddd6levXpq2bKlHA6H0tLS5HQ6tXLlyvLuEUA1sa5T58pu4Yp1/s+6ym4BQBVVptAUGxur/fv3a9GiRfryyy9ljNHvfvc7Pfjgg3K5XOXdIwAAQKUrU2iaNGmSQkNDNXjwYI/xv//97zp27JieeeaZcmkOAACgqijTOU1z5sxR06ZNS403b95cr7/++hU3BQAAUNWUKTRlZ2crPDy81HjdunWVlZV1xU0BAABUNWUKTZGRkfrss89KjX/22WeKiIi44qYAAACqmjKd0/TII48oKSlJRUVFuuOOOyRJa9as0ejRo7kiOAAAuCaVKTSNHj1a//vf/zRs2DAVFhZKkmrVqqVnnnlGY8aMKdcGAQAAqoIyhSaHw6GXX35Zzz//vNLT0+VyuRQVFSWn01ne/QEAAFQJZQpNZ/n7++vmm28ur14AAACqrDKdCA4AAPBLQ2gCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwwbuyGwAAVE8Tfv/bym7hio1d9E5lt4BqhCNNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYEO1Ck2TJk2Sw+FQUlKSNWaMUXJysiIiIuRyudSlSxft2bPHY72CggINHz5cderUkZ+fn3r37q0jR4541OTm5iohIUFut1tut1sJCQk6fvz4VZgVAACoDqrNJQe2bdumuXPn6sYbb/QYnzx5sqZOnaoFCxaoSZMmeumll9S9e3dlZGQoICBAkpSUlKQPP/xQS5cuVXBwsEaOHKn4+HilpqbKy8tLktS/f38dOXJEKSkpkqQhQ4YoISFBH3744dWdKIBqZ9bI6v974okpvSq7BaDKqxZHmk6ePKkHH3xQ8+bNU+3ata1xY4ymT5+usWPH6te//rVatGihN998Uz/88IOWLFkiScrLy9Mbb7yhKVOmqFu3bmrdurUWLVqkXbt2afXq1ZKk9PR0paSk6G9/+5vi4uIUFxenefPm6aOPPlJGRsZF+yooKFB+fr7HDQAAXJuqRWh6/PHHdc8996hbt24e45mZmcrOzlaPHj2sMafTqc6dO2vjxo2SpNTUVBUVFXnUREREqEWLFlbNpk2b5Ha71a5dO6umffv2crvdVs2FTJo0yfo4z+12KzIyslzmCwAAqp4qH5qWLl2q1NRUTZo0qdSy7OxsSVJoaKjHeGhoqLUsOztbPj4+HkeoLlQTEhJSavshISFWzYWMGTNGeXl51u3w4cM/b3IAAKDaqNLnNB0+fFhPPvmkVq5cqVq1al20zuFweNw3xpQaO9/5NReqv9x2nE6nnE7nJR8HAABcG6r0kabU1FTl5OSoTZs28vb2lre3t9atW6e//OUv8vb2to4wnX80KCcnx1oWFhamwsJC5ebmXrLm22+/LfX4x44dK3UUCwAA/DJV6dDUtWtX7dq1S2lpadatbdu2evDBB5WWlqZGjRopLCxMq1atstYpLCzUunXr1KFDB0lSmzZtVLNmTY+arKws7d6926qJi4tTXl6etm7datVs2bJFeXl5Vg0AAPhlq9IfzwUEBKhFixYeY35+fgoODrbGk5KSNHHiREVFRSkqKkoTJ06Ur6+v+vfvL0lyu90aNGiQRo4cqeDgYAUFBWnUqFGKjY21TiyPiYnRnXfeqcGDB2vOnDmSfrrkQHx8vKKjo6/ijAEAQFVVpUOTHaNHj9bp06c1bNgw5ebmql27dlq5cqV1jSZJmjZtmry9vdWvXz+dPn1aXbt21YIFC6xrNEnS4sWLlZiYaH3Lrnfv3po1a9ZVnw8AAKiaql1oWrt2rcd9h8Oh5ORkJScnX3SdWrVqaebMmZo5c+ZFa4KCgrRo0aJy6hIAAFxrqvQ5TQAAAFUFoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALDBu7Ib+KVo8/Q/KruFK5b6ykOV3QIAAJWGI00AAAA2EJoAAABsIDQBAADYUKVD06RJk3TzzTcrICBAISEh6tOnjzIyMjxqjDFKTk5WRESEXC6XunTpoj179njUFBQUaPjw4apTp478/PzUu3dvHTlyxKMmNzdXCQkJcrvdcrvdSkhI0PHjxyt6igAAoJqo0qFp3bp1evzxx7V582atWrVKZ86cUY8ePXTq1CmrZvLkyZo6dapmzZqlbdu2KSwsTN27d9eJEyesmqSkJC1btkxLly7Vhg0bdPLkScXHx6u4uNiq6d+/v9LS0pSSkqKUlBSlpaUpISHhqs4XAABUXVX623MpKSke9+fPn6+QkBClpqaqU6dOMsZo+vTpGjt2rH79619Lkt58802FhoZqyZIlGjp0qPLy8vTGG29o4cKF6tatmyRp0aJFioyM1OrVq9WzZ0+lp6crJSVFmzdvVrt27SRJ8+bNU1xcnDIyMhQdHX11Jw4AAKqcKn2k6Xx5eXmSpKCgIElSZmamsrOz1aNHD6vG6XSqc+fO2rhxoyQpNTVVRUVFHjURERFq0aKFVbNp0ya53W4rMElS+/bt5Xa7rZoLKSgoUH5+vscNAABcm6pNaDLGaMSIEbr11lvVokULSVJ2drYkKTQ01KM2NDTUWpadnS0fHx/Vrl37kjUhISGlHjMkJMSquZBJkyZZ50C53W5FRkaWfYIAAKBKqzah6YknntDOnTv11ltvlVrmcDg87htjSo2d7/yaC9VfbjtjxoxRXl6edTt8+PDlpgEAAKqpahGahg8frg8++ECffvqp6tWrZ42HhYVJUqmjQTk5OdbRp7CwMBUWFio3N/eSNd9++22pxz127Fipo1jncjqdCgwM9LgBAIBrU5UOTcYYPfHEE3rvvff0ySefqGHDhh7LGzZsqLCwMK1atcoaKyws1Lp169ShQwdJUps2bVSzZk2PmqysLO3evduqiYuLU15enrZu3WrVbNmyRXl5eVYNAAD4ZavS3557/PHHtWTJEv3rX/9SQECAdUTJ7XbL5XLJ4XAoKSlJEydOVFRUlKKiojRx4kT5+vqqf//+Vu2gQYM0cuRIBQcHKygoSKNGjVJsbKz1bbqYmBjdeeedGjx4sObMmSNJGjJkiOLj4/nmHAAAkFTFQ9Ps2bMlSV26dPEYnz9/vgYOHChJGj16tE6fPq1hw4YpNzdX7dq108qVKxUQEGDVT5s2Td7e3urXr59Onz6trl27asGCBfLy8rJqFi9erMTEROtbdr1799asWbMqdoIAAKDaqNKhyRhz2RqHw6Hk5GQlJydftKZWrVqaOXOmZs6cedGaoKAgLVq0qCxtAgCAX4AqfU4TAABAVUFoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA3eld0AAACo+pKTkyu7hSt2pXPgSBMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALDBu7IbAACgOkmf8Ellt3DFYsbeUdktVEscaTrPX//6VzVs2FC1atVSmzZttH79+spuCQAAVAGEpnO8/fbbSkpK0tixY7Vjxw7ddtttuuuuu3To0KHKbg0AAFQyQtM5pk6dqkGDBumRRx5RTEyMpk+frsjISM2ePbuyWwMAAJWMc5r+T2FhoVJTU/Xss896jPfo0UMbN2684DoFBQUqKCiw7ufl5UmS8vPzS9UWF5wux24rx4XmdTknfiyugE6urrLM+8zpMxXQydVVlnmfOvPLnPfpgh8qoJOrqyzz/rGoqAI6ubrKMu+TP56qgE6urrLM+9z3u+rqYvM+O26MufQGDIwxxvz3v/81ksxnn33mMT5hwgTTpEmTC67zwgsvGEncuHHjxo0bt2vgdvjw4UtmBY40ncfhcHjcN8aUGjtrzJgxGjFihHW/pKRE//vf/xQcHHzRdSpKfn6+IiMjdfjwYQUGBl7Vx65MzJt5/xIwb+b9S1CZ8zbG6MSJE4qIiLhkHaHp/9SpU0deXl7Kzs72GM/JyVFoaOgF13E6nXI6nR5j1113XUW1aEtgYOAv6kV2FvP+ZWHevyzM+5elsubtdrsvW8OJ4P/Hx8dHbdq00apVqzzGV61apQ4dOlRSVwAAoKrgSNM5RowYoYSEBLVt21ZxcXGaO3euDh06pEcffbSyWwMAAJWM0HSO+++/X99//73Gjx+vrKwstWjRQsuXL1f9+vUru7XLcjqdeuGFF0p9XHitY97M+5eAeTPvX4LqMG+HMZf7fh0AAAA4pwkAAMAGQhMAAIANhCYAAAAbCE24pAYNGmj69OmV9vgDBw5Unz59Ku3xK5vD4dD7779f2W1UGGOMhgwZoqCgIDkcDqWlpVV2S9VWcnKyWrVqVdlt4P/8kn53denSRUlJSZIq/z2jovHtuWtMly5d1KpVq2tmp50xY8bl/xYQqq2UlBQtWLBAa9euVaNGjVSnTp3KbqnaGjVqlIYPH17ZbeAXbtu2bfLz86vsNiRJBw8eVMOGDbVjx45y+wcFoekXyBij4uJieXtX/R+/nSu0ovo6cOCAwsPDK/QCsoWFhfLx8amw7ZeXsvZ59vXs7+8vf3//Cujs2lFUVKSaNWtWdhvXtLp161Z2CxWKj+euoi5duigxMVGjR49WUFCQwsLClJycbC3Py8vTkCFDFBISosDAQN1xxx364osvrOUXOtyblJSkLl26WMvXrVunGTNmyOFwyOFw6ODBg1q7dq0cDof+/e9/q23btnI6nVq/fr0OHDige++9V6GhofL399fNN9+s1atXX4Vnwr5z51xQUKDExESFhISoVq1auvXWW7Vt2zZJP71xNG7cWK+++qrH+rt371aNGjV04MCBq9LvO++8o9jYWLlcLgUHB6tbt246deqUtm3bpu7du6tOnTpyu93q3Lmztm/f7rHu/v371alTJ9WqVUvNmjUrdXX6gwcPyuFw6L333tPtt98uX19ftWzZUps2bfKo27hxozp16iSXy6XIyEglJibq1Kn//6+y//Wvf1VUVJRq1aql0NBQ/fa3v71s/xVh4MCBGj58uA4dOiSHw6EGDRrIGKPJkyerUaNGcrlcatmypd555x1rneLiYg0aNEgNGzaUy+VSdHS0ZsyYUWq7ffr00aRJkxQREaEmTZpUSP/SxZ+vcz+uOKtPnz4aOHCgdb9BgwZ66aWXNHDgQLndbg0ePNj6GS9dulQdOnRQrVq11Lx5c61du9Za72Kv5/M/nlu7dq1uueUW+fn56brrrlPHjh31zTffWMs//PBDtWnTRrVq1VKjRo304osv6syZM+XyvKSkpOjWW2/Vddddp+DgYMXHx1uvQbv78bx58xQZGSlfX1/dd999mjp1aqk/U3W5OTgcDr3++uu699575efnp5deeqlc5neuy71mXn31VYWHhys4OFiPP/64ioqKrGWLFi1S27ZtFRAQoLCwMPXv3185OTnW8nN/1q1bt5bL5dIdd9yhnJwcrVixQjExMQoMDNQDDzygH374wVrvcq+jK3Hq1Ck99NBD8vf3V3h4uKZMmeKx/PyP55KTk3X99dfL6XQqIiJCiYmJ1rKsrCzdc889crlcatiwoZYsWeKx/tl95dyP7Y8fPy6Hw2G9JnJzc/Xggw+qbt26crlcioqK0vz58yVJDRs2lCS1bt1aDofDeq+8Ipf8c74oV507dzaBgYEmOTnZ7Nu3z7z55pvG4XCYlStXmpKSEtOxY0fTq1cvs23bNrNv3z4zcuRIExwcbL7//ntjjDEDBgww9957r8c2n3zySdO5c2djjDHHjx83cXFxZvDgwSYrK8tkZWWZM2fOmE8//dRIMjfeeKNZuXKl+eqrr8x3331n0tLSzOuvv2527txp9u3bZ8aOHWtq1aplvvnmG2v79evXN9OmTbtKz1Bp5845MTHRREREmOXLl5s9e/aYAQMGmNq1a1vPz4QJE0yzZs081n/qqadMp06drkqvR48eNd7e3mbq1KkmMzPT7Ny507z22mvmxIkTZs2aNWbhwoVm7969Zu/evWbQoEEmNDTU5OfnG2OMKS4uNi1atDBdunQxO3bsMOvWrTOtW7c2ksyyZcuMMcZkZmYaSaZp06bmo48+MhkZGea3v/2tqV+/vikqKjLGGLNz507j7+9vpk2bZvbt22c+++wz07p1azNw4EBjjDHbtm0zXl5eZsmSJebgwYNm+/btZsaMGZftvyIcP37cjB8/3tSrV89kZWWZnJwc88c//tE0bdrUpKSkmAMHDpj58+cbp9Np1q5da4wxprCw0IwbN85s3brVfP3112bRokXG19fXvP3229Z2BwwYYPz9/U1CQoLZvXu32bVrV4X0f6nnq3PnzubJJ5/0qL/33nvNgAEDrPv169c3gYGB5pVXXjH79+83+/fvt37G9erVM++8847Zu3eveeSRR0xAQID57rvvjDHmoq/nF154wbRs2dIYY0xRUZFxu91m1KhR5quvvjJ79+41CxYssF7bKSkpJjAw0CxYsMAcOHDArFy50jRo0MAkJyeXy3PzzjvvmHfffdfs27fP7Nixw/Tq1cvExsaa4uJiW/vxhg0bTI0aNcwrr7xiMjIyzGuvvWaCgoKM2+22HsPOHCSZkJAQ88Ybb5gDBw6YgwcPlsv8zrrUPjBgwAATGBhoHn30UZOenm4+/PBD4+vra+bOnWut/8Ybb5jly5ebAwcOmE2bNpn27dubu+66y1p+9mfdvn17s2HDBrN9+3bTuHFj07lzZ9OjRw+zfft285///McEBwebP//5z9Z6l3sdXYnHHnvM1KtXz6xcudLs3LnTxMfHG39/f2t/P/c945///KcJDAw0y5cvN998843ZsmWLx/y7detmWrVqZTZv3mxSU1NN586djcvlstY/u6/s2LHDWic3N9dIMp9++qkxxpjHH3/ctGrVymzbts1kZmaaVatWmQ8++MAYY8zWrVuNJLN69WqTlZVlvVdcCULTVdS5c2dz6623eozdfPPN5plnnjFr1qwxgYGB5scff/RYfsMNN5g5c+YYYy4fms4+xvm/rM++8N5///3L9tisWTMzc+ZM635VCU0nT540NWvWNIsXL7aWFRYWmoiICDN58mRjzE+/wLy8vMyWLVus5XXr1jULFiy4Kr2mpqYaSbZ+MZ85c8YEBASYDz/80BhjzL///W/j5eVlDh8+bNWsWLHigqHpb3/7m1WzZ88eI8mkp6cbY4xJSEgwQ4YM8Xis9evXmxo1apjTp0+bd9991wQGBlphraz9l5dp06aZ+vXrG2OMOXnypKlVq5bZuHGjR82gQYPMAw88cNFtDBs2zPzmN7+x7g8YMMCEhoaagoKCCun5rEs9X3ZDU58+fTxqzv6Mz30DLCoqMvXq1TMvv/yyMebir+dzQ9P3339vJF30TfK2224zEydO9BhbuHChCQ8Pv+ScyyonJ8dIMrt27bK1H99///3mnnvu8djGgw8+6BGa7MxBkklKSqqAGf3kUvvAgAEDTP369c2ZM2essb59+5r777//ots7+yZ/9h8qZ3/Wq1evtmomTZpkJJkDBw5YY0OHDjU9e/Y0xpT9dWTHiRMnjI+Pj1m6dKk19v333xuXy3XB0DRlyhTTpEkTU1hYWGpb6enpRpLZtm2bNbZ//34j6WeFpl69epk//OEPF+z3QutfKT6eu8puvPFGj/vh4eHKyclRamqqTp48qeDgYOvcBH9/f2VmZpbbR0tt27b1uH/q1CmNHj1azZo103XXXSd/f399+eWXOnToULk8Xnk6cOCAioqK1LFjR2usZs2auuWWW5Seni7pp+fynnvu0d///ndJ0kcffaQff/xRffv2vSo9tmzZUl27dlVsbKz69u2refPmKTc3V5KUk5OjRx99VE2aNJHb7Zbb7dbJkyet5zo9PV3XX3+96tWrZ20vLi7ugo9z7j4UHh5ubV+SUlNTtWDBAo99qGfPniopKVFmZqa6d++u+vXrq1GjRkpISNDixYutw/qX6v9q2Lt3r3788Ud1797do/9//OMfHq+B119/XW3btlXdunXl7++vefPmldpnY2NjK/w8pvJ4vs5/TZ517s/e29tbbdu2tfbzy60rSUFBQRo4cKB69uypXr16acaMGcrKyrKWp6amavz48R7P8+DBg5WVleXxMU9ZHThwQP3791ejRo0UGBhofUxy7s/pUvtxRkaGbrnlFo9tnn/f7hwu9TxdqcvtA82bN5eXl5d1/+zv+7N27Nihe++9V/Xr11dAQID18dH5+/O5z1VoaKh8fX3VqFEjj7Gz27X7OiqLAwcOqLCw0GP/DAoKUnR09AXr+/btq9OnT6tRo0YaPHiwli1bZn18mpGRIW9vb910001WfePGjVW7du2f1dNjjz2mpUuXqlWrVho9erQ2btxYhpnZR2i6ys4/CdHhcKikpEQlJSUKDw9XWlqaxy0jI0NPP/20JKlGjRqlvkl27ufjl3P+Nxqefvppvfvuu5owYYLWr1+vtLQ0xcbGqrCwsIyzqzhn5+1wOEqNnzv2yCOPaOnSpTp9+rTmz5+v+++/X76+vlelRy8vL61atUorVqxQs2bNNHPmTEVHRyszM1MDBw5Uamqqpk+fro0bNyotLU3BwcHWc33+z1UqPdezzt2HztaUlJRY/x06dKjHPvTFF19o//79uuGGGxQQEKDt27frrbfeUnh4uMaNG6eWLVvq+PHjl+z/ajg7h48//tij/71791rnY/y///f/9NRTT+nhhx/WypUrlZaWpj/84Q+l9tmr8e2dSz1fdl+rP6fP8/eHy607f/58bdq0SR06dNDbb7+tJk2aaPPmzZJ+eq5ffPFFj+d5165d2r9/v2rVqmW7p4vp1auXvv/+e82bN09btmzRli1bJMnj53Sp/fj81/XZsXPZnUNF7guXe81c7Pe99NM/Wnv06CF/f38tWrRI27Zt07JlyySp1P58/nN1qe3aeR2V1YV+T11KZGSkMjIy9Nprr8nlcmnYsGHq1KmTioqKLrqtc8dr1KhRauz819Fdd92lb775RklJSTp69Ki6du2qUaNG/aw+fw5CUxVx0003KTs7W97e3mrcuLHH7ezXsOvWrevxr0VJpa5r4+Pjo+LiYluPuX79eg0cOFD33XefYmNjFRYWpoMHD5bHdMpd48aN5ePjow0bNlhjRUVF+vzzzxUTE2ON3X333fLz89Ps2bO1YsUKPfzww1e1T4fDoY4dO+rFF1/Ujh075OPjo2XLlmn9+vVKTEzU3XffrebNm8vpdOq7776z1mvWrJkOHTqko0ePWmPnnxhrx0033aQ9e/aU2ofOPn/ST0cuunXrpsmTJ2vnzp06ePCgPvnkk0v2fzU0a9ZMTqdThw4dKtV7ZGSkpJ/22Q4dOmjYsGFq3bq1GjdufNVO8r+Qiz1f579Wi4uLtXv3btvbPRtuJOnMmTNKTU1V06ZNf3Z/rVu31pgxY7Rx40a1aNFCS5YskfTTfpKRkXHB/eTsG1VZff/990pPT9dzzz2nrl27KiYm5mcfgWvatKm2bt3qMfb555973K/IOfwcZX3NfPnll/ruu+/05z//WbfddpuaNm3qcRSqrOy8jsqqcePGqlmzpsf+mZubq3379l10HZfLpd69e+svf/mL1q5dq02bNmnXrl1q2rSpzpw5ox07dli1X331lY4fP27dP/tNvHNfSxe6llvdunU1cOBALVq0SNOnT9fcuXMlyfqdZ/c90Y6q/53zX4hu3bopLi5Offr00csvv6zo6GgdPXpUy5cvV58+fdS2bVvdcccdeuWVV/SPf/xDcXFxWrRokXbv3q3WrVtb22nQoIG2bNmigwcPyt/fX0FBQRd9zMaNG+u9995Tr1695HA49Pzzz1v/Sqlq/Pz89Nhjj+npp59WUFCQrr/+ek2ePFk//PCDBg0aZNV5eXlp4MCBGjNmjBo3bnzRj7gqwpYtW7RmzRr16NFDISEh2rJli44dO6aYmBg1btxYCxcuVNu2bZWfn6+nn35aLpfLWrdbt26Kjo7WQw89pClTpig/P19jx4792T0888wzat++vR5//HENHjxYfn5+Sk9P16pVqzRz5kx99NFH+vrrr9WpUyfVrl1by5cvV0lJiaKjoy/Z/9UQEBCgUaNG6amnnlJJSYluvfVW5efna+PGjfL399eAAQPUuHFj/eMf/9C///1vNWzYUAsXLtS2bdusj3+upks9X35+fhoxYoQ+/vhj3XDDDZo2bZrHm8HlvPbaa4qKilJMTIymTZum3Nzcn/UPgMzMTM2dO1e9e/dWRESEMjIytG/fPj300EOSpHHjxik+Pl6RkZHq27evatSooZ07d2rXrl1X/A2z2rVrKzg4WHPnzlV4eLgOHTqkZ5999mdtY/jw4erUqZOmTp2qXr166ZNPPtGKFSs8jj5V5BzsutQ+sHPnzkuue/3118vHx0czZ87Uo48+qt27d+tPf/rTFfdk53VUVv7+/ho0aJCefvppBQcHKzQ0VGPHjr1oSF2wYIGKi4vVrl07+fr6auHChXK5XKpfv771TcMhQ4Zo9uzZqlmzpkaOHCmXy2X9nF0ul9q3b68///nPatCggb777js999xzHo8xbtw4tWnTRs2bN1dBQYE++ugj63dWSEiIXC6XUlJSVK9ePdWqVevKL2NTbmdH4bIud3Jofn6+GT58uImIiDA1a9Y0kZGR5sEHHzSHDh2y6seNG2dCQ0ON2+02Tz31lHniiSc8TgTPyMgw7du3Ny6Xy0gymZmZ1smEubm5Ho+dmZlpbr/9duNyuUxkZKSZNWtWqR6ryongxhhz+vRpM3z4cFOnTh3jdDpNx44dzdatW0utc+DAASPJOkH8atm7d6/p2bOnqVu3rnE6naZJkybWSfXbt283bdu2NU6n00RFRZl//vOfpZ7bjIwMc+uttxofHx/TpEkTk5KScsETwS91UqQxP51M2r17d+Pv72/8/PzMjTfeaCZMmGCM+emk8M6dO5vatWsbl8tlbrzxRuubZ5fqv6KceyK4McaUlJSYGTNmmOjoaFOzZk1Tt25d07NnT7Nu3TpjjDE//vijGThwoHG73ea6664zjz32mHn22WetE6CNufAXJirCpZ6vwsJC89hjj5mgoCATEhJiJk2adMETwc9/bZ39GS9ZssS0a9fO+Pj4mJiYGLNmzRqr5mKv53NPBM/OzjZ9+vQx4eHhxsfHx9SvX9+MGzfOFBcXW/UpKSmmQ4cOxuVymcDAQHPLLbd4fLPpSqxatcrExMQYp9NpbrzxRrN27VprX7a7H8+dO9f86le/Mi6Xy/Tp08e89NJLJiwszONxLjeHc18/FeFS+4CdL+4sWbLENGjQwDidThMXF2c++OADj+fmQj/r+fPne5wQb4znz96Yy7+OrsSJEyfM73//e+Pr62tCQ0PN5MmTPd43zt2vly1bZtq1a2cCAwONn5+fad++vcdJ7UePHjV33XWXcTqdpn79+mbJkiUmJCTEvP7661bN3r17rfe0Vq1amZUrV3rsK3/6059MTEyMcblcJigoyNx7773m66+/ttafN2+eiYyMNDVq1PB47svKYQyXW0bV9cADD8jLy0uLFi2yvc5nn32mLl266MiRIwoNDa3A7oDyVRFXML5WDB48WF9++aXWr19f2a2gghw5ckSRkZFavXq1unbtWtntXBAfz6FKOnPmjPbt26dNmzZp6NChttYpKCjQ4cOH9fzzz6tfv34EJqAae/XVV9W9e3f5+flpxYoVevPNN/XXv/61sttCOfrkk0908uRJxcbGKisrS6NHj1aDBg3UqVOnym7tojgRHFXS7t271bZtWzVv3lyPPvqorXXeeustRUdHKy8vT5MnT67gDgFUpK1bt6p79+6KjY3V66+/rr/85S965JFHKrstlKOioiL98Y9/VPPmzXXfffepbt26Wrt2bZX+Uzd8PAcAAGADR5oAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAJSD5ORkLkgJXOMITQCqrYEDB8rhcJS63XnnnRX6uA6HQ++//77H2KhRo7RmzZoKfVwAlYsrggOo1u68807Nnz/fY8zpdF71Pvz9/eXv73/VHxfA1cORJgDVmtPpVFhYmMetdu3akn46IjRnzhzFx8fL19dXMTEx2rRpk7766it16dJFfn5+iouL04EDBzy2OXv2bN1www3y8fFRdHS0Fi5caC1r0KCBJOm+++6Tw+Gw7p//8VxJSYnGjx+vevXqyel0qlWrVkpJSbGWHzx4UA6HQ++9955uv/12+fr6qmXLltq0aVPFPFEArhihCcA17U9/+pMeeughpaWlqWnTpurfv7+GDh2qMWPG6PPPP5ckPfHEE1b9smXL9OSTT2rkyJHavXu3hg4dqj/84Q/69NNPJUnbtm2TJM2fP19ZWVnW/fPNmDFDU6ZM0auvvqqdO3eqZ8+e6t27t/bv3+9RN3bsWI0aNUppaWlq0qSJHnjgAZ05c6YingoAV8oAQDU1YMAA4+XlZfz8/Dxu48ePN8YYI8k899xzVv2mTZuMJPPGG29YY2+99ZapVauWdb9Dhw5m8ODBHo/Tt29fc/fdd1v3JZlly5Z51LzwwgumZcuW1v2IiAgzYcIEj5qbb77ZDBs2zBhjTGZmppFk/va3v1nL9+zZYySZ9PT0n/lMALgaOKcJQLV2++23a/bs2R5jQUFB1v/feOON1v+HhoZKkmJjYz3GfvzxR+Xn5yswMFDp6ekaMmSIx/Y6duyoGTNm2O4pPz9fR48eVceOHUtt54svvvAYO7e/8PBwSVJOTo6aNm1q+/EAXB2EJgDVmp+fnxo3bnzR5ef+xXSHw3HRsZKSklJjZxljSo3ZYWc7l+sFQNXBOU0AcI6YmBht2LDBY2zjxo2KiYmx7tesWVPFxcUX3UZgYKAiIiIuux0A1QtHmgBUawUFBcrOzvYY8/b2Vp06dcq0vaefflr9+vXTTTfdpK5du+rDDz/Ue++9p9WrV1s1DRo00Jo1a9SxY0c5nU7r23rnb+eFF17QDTfcoFatWmn+/PlKS0vT4sWLy9QXgMpHaAJQraWkpFjnAp0VHR2tL7/8skzb69Onj2bMmKFXXnlFiYmJatiwoebPn68uXbpYNVOmTNGIESM0b948/epXv9LBgwdLbScxMVH5+fkaOXKkcnJy1KxZM33wwQeKiooqU18AKp/DGGMquwkAAICqjnOaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALDh/wNHGcb1uChEfwAAAABJRU5ErkJggg==\n",
178
+ "text/plain": [
179
+ "<Figure size 640x480 with 1 Axes>"
180
+ ]
181
+ },
182
+ "metadata": {},
183
+ "output_type": "display_data"
184
+ }
185
+ ],
186
+ "source": [
187
+ "# Plot\n",
188
+ "sns.countplot(x='Emotion',data=df)"
189
+ ]
190
+ },
191
+ {
192
+ "cell_type": "code",
193
+ "execution_count": 6,
194
+ "id": "40f991d0-952f-40c1-bf00-f3476ce0436d",
195
+ "metadata": {
196
+ "jupyter": {
197
+ "outputs_hidden": true
198
+ },
199
+ "scrolled": false,
200
+ "tags": []
201
+ },
202
+ "outputs": [
203
+ {
204
+ "data": {
205
+ "text/plain": [
206
+ "['BTC_ADDRESS_REGEX',\n",
207
+ " 'CURRENCY_REGEX',\n",
208
+ " 'CURRENCY_SYMB_REGEX',\n",
209
+ " 'Counter',\n",
210
+ " 'DATE_REGEX',\n",
211
+ " 'EMAIL_REGEX',\n",
212
+ " 'EMOJI_REGEX',\n",
213
+ " 'HASTAG_REGEX',\n",
214
+ " 'MASTERCard_REGEX',\n",
215
+ " 'MD5_SHA_REGEX',\n",
216
+ " 'MOST_COMMON_PUNCT_REGEX',\n",
217
+ " 'NUMBERS_REGEX',\n",
218
+ " 'PHONE_REGEX',\n",
219
+ " 'PoBOX_REGEX',\n",
220
+ " 'SPECIAL_CHARACTERS_REGEX',\n",
221
+ " 'STOPWORDS',\n",
222
+ " 'STOPWORDS_de',\n",
223
+ " 'STOPWORDS_en',\n",
224
+ " 'STOPWORDS_es',\n",
225
+ " 'STOPWORDS_fr',\n",
226
+ " 'STOPWORDS_ru',\n",
227
+ " 'STOPWORDS_yo',\n",
228
+ " 'STREET_ADDRESS_REGEX',\n",
229
+ " 'TextFrame',\n",
230
+ " 'URL_PATTERN',\n",
231
+ " 'USER_HANDLES_REGEX',\n",
232
+ " 'VISACard_REGEX',\n",
233
+ " '__builtins__',\n",
234
+ " '__cached__',\n",
235
+ " '__doc__',\n",
236
+ " '__file__',\n",
237
+ " '__generate_text',\n",
238
+ " '__loader__',\n",
239
+ " '__name__',\n",
240
+ " '__numbers_dict',\n",
241
+ " '__package__',\n",
242
+ " '__spec__',\n",
243
+ " '_lex_richness_herdan',\n",
244
+ " '_lex_richness_maas_ttr',\n",
245
+ " 'clean_text',\n",
246
+ " 'defaultdict',\n",
247
+ " 'digit2words',\n",
248
+ " 'extract_btc_address',\n",
249
+ " 'extract_currencies',\n",
250
+ " 'extract_currency_symbols',\n",
251
+ " 'extract_dates',\n",
252
+ " 'extract_emails',\n",
253
+ " 'extract_emojis',\n",
254
+ " 'extract_hashtags',\n",
255
+ " 'extract_html_tags',\n",
256
+ " 'extract_mastercard_addr',\n",
257
+ " 'extract_md5sha',\n",
258
+ " 'extract_numbers',\n",
259
+ " 'extract_pattern',\n",
260
+ " 'extract_phone_numbers',\n",
261
+ " 'extract_postoffice_box',\n",
262
+ " 'extract_shortwords',\n",
263
+ " 'extract_special_characters',\n",
264
+ " 'extract_stopwords',\n",
265
+ " 'extract_street_address',\n",
266
+ " 'extract_terms_in_bracket',\n",
267
+ " 'extract_urls',\n",
268
+ " 'extract_userhandles',\n",
269
+ " 'extract_visacard_addr',\n",
270
+ " 'fix_contractions',\n",
271
+ " 'generate_sentence',\n",
272
+ " 'hamming_distance',\n",
273
+ " 'inverse_df',\n",
274
+ " 'lexical_richness',\n",
275
+ " 'markov_chain',\n",
276
+ " 'math',\n",
277
+ " 'nlargest',\n",
278
+ " 'normalize',\n",
279
+ " 'num2words',\n",
280
+ " 'random',\n",
281
+ " 're',\n",
282
+ " 'read_txt',\n",
283
+ " 'remove_accents',\n",
284
+ " 'remove_bad_quotes',\n",
285
+ " 'remove_btc_address',\n",
286
+ " 'remove_currencies',\n",
287
+ " 'remove_currency_symbols',\n",
288
+ " 'remove_custom_pattern',\n",
289
+ " 'remove_custom_words',\n",
290
+ " 'remove_dates',\n",
291
+ " 'remove_emails',\n",
292
+ " 'remove_emojis',\n",
293
+ " 'remove_hashtags',\n",
294
+ " 'remove_html_tags',\n",
295
+ " 'remove_mastercard_addr',\n",
296
+ " 'remove_md5sha',\n",
297
+ " 'remove_multiple_spaces',\n",
298
+ " 'remove_non_ascii',\n",
299
+ " 'remove_numbers',\n",
300
+ " 'remove_phone_numbers',\n",
301
+ " 'remove_postoffice_box',\n",
302
+ " 'remove_puncts',\n",
303
+ " 'remove_punctuations',\n",
304
+ " 'remove_shortwords',\n",
305
+ " 'remove_special_characters',\n",
306
+ " 'remove_stopwords',\n",
307
+ " 'remove_street_address',\n",
308
+ " 'remove_terms_in_bracket',\n",
309
+ " 'remove_urls',\n",
310
+ " 'remove_userhandles',\n",
311
+ " 'remove_visacard_addr',\n",
312
+ " 'replace_bad_quotes',\n",
313
+ " 'replace_currencies',\n",
314
+ " 'replace_currency_symbols',\n",
315
+ " 'replace_dates',\n",
316
+ " 'replace_emails',\n",
317
+ " 'replace_emojis',\n",
318
+ " 'replace_numbers',\n",
319
+ " 'replace_phone_numbers',\n",
320
+ " 'replace_special_characters',\n",
321
+ " 'replace_term',\n",
322
+ " 'replace_urls',\n",
323
+ " 'string',\n",
324
+ " 'term_freq',\n",
325
+ " 'to_txt',\n",
326
+ " 'unicodedata',\n",
327
+ " 'word_freq',\n",
328
+ " 'word_length_freq']"
329
+ ]
330
+ },
331
+ "execution_count": 6,
332
+ "metadata": {},
333
+ "output_type": "execute_result"
334
+ }
335
+ ],
336
+ "source": [
337
+ "# Data Cleaning\n",
338
+ "dir(nfx)"
339
+ ]
340
+ },
341
+ {
342
+ "cell_type": "code",
343
+ "execution_count": 7,
344
+ "id": "b1f87847-a91c-4bd6-a307-d746eb5aa9a0",
345
+ "metadata": {},
346
+ "outputs": [],
347
+ "source": [
348
+ "# User handles\n",
349
+ "df['Clean_Text'] = df['Text'].apply(nfx.remove_userhandles)"
350
+ ]
351
+ },
352
+ {
353
+ "cell_type": "code",
354
+ "execution_count": 8,
355
+ "id": "03886bc3-1ac4-4f1b-842b-e5d2d770ff81",
356
+ "metadata": {},
357
+ "outputs": [],
358
+ "source": [
359
+ "# Stopwords\n",
360
+ "df['Clean_Text'] = df['Clean_Text'].apply(nfx.remove_stopwords)"
361
+ ]
362
+ },
363
+ {
364
+ "cell_type": "markdown",
365
+ "id": "0ffcf4c7",
366
+ "metadata": {},
367
+ "source": [
368
+ "## We are not removing Special Characters as some of the rows have just Special Characters and it'll result into empty row."
369
+ ]
370
+ },
371
+ {
372
+ "cell_type": "code",
373
+ "execution_count": 9,
374
+ "id": "0a0fcc0c-4adf-4f0b-b226-164659ad70ba",
375
+ "metadata": {
376
+ "jupyter": {
377
+ "outputs_hidden": true
378
+ },
379
+ "tags": []
380
+ },
381
+ "outputs": [
382
+ {
383
+ "data": {
384
+ "text/html": [
385
+ "<div>\n",
386
+ "<style scoped>\n",
387
+ " .dataframe tbody tr th:only-of-type {\n",
388
+ " vertical-align: middle;\n",
389
+ " }\n",
390
+ "\n",
391
+ " .dataframe tbody tr th {\n",
392
+ " vertical-align: top;\n",
393
+ " }\n",
394
+ "\n",
395
+ " .dataframe thead th {\n",
396
+ " text-align: right;\n",
397
+ " }\n",
398
+ "</style>\n",
399
+ "<table border=\"1\" class=\"dataframe\">\n",
400
+ " <thead>\n",
401
+ " <tr style=\"text-align: right;\">\n",
402
+ " <th></th>\n",
403
+ " <th>Emotion</th>\n",
404
+ " <th>Text</th>\n",
405
+ " <th>Clean_Text</th>\n",
406
+ " </tr>\n",
407
+ " </thead>\n",
408
+ " <tbody>\n",
409
+ " <tr>\n",
410
+ " <th>0</th>\n",
411
+ " <td>neutral</td>\n",
412
+ " <td>Why ?</td>\n",
413
+ " <td>?</td>\n",
414
+ " </tr>\n",
415
+ " <tr>\n",
416
+ " <th>1</th>\n",
417
+ " <td>joy</td>\n",
418
+ " <td>Sage Act upgrade on my to do list for tommorow.</td>\n",
419
+ " <td>Sage Act upgrade list tommorow.</td>\n",
420
+ " </tr>\n",
421
+ " <tr>\n",
422
+ " <th>2</th>\n",
423
+ " <td>sadness</td>\n",
424
+ " <td>ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...</td>\n",
425
+ " <td>WAY HOMEGIRL BABY FUNERAL!!! MAN HATE FUNERALS...</td>\n",
426
+ " </tr>\n",
427
+ " <tr>\n",
428
+ " <th>3</th>\n",
429
+ " <td>joy</td>\n",
430
+ " <td>Such an eye ! The true hazel eye-and so brill...</td>\n",
431
+ " <td>eye ! true hazel eye-and brilliant ! Regular f...</td>\n",
432
+ " </tr>\n",
433
+ " <tr>\n",
434
+ " <th>4</th>\n",
435
+ " <td>joy</td>\n",
436
+ " <td>@Iluvmiasantos ugh babe.. hugggzzz for u .! b...</td>\n",
437
+ " <td>ugh babe.. hugggzzz u .! babe naamazed nga ako...</td>\n",
438
+ " </tr>\n",
439
+ " <tr>\n",
440
+ " <th>...</th>\n",
441
+ " <td>...</td>\n",
442
+ " <td>...</td>\n",
443
+ " <td>...</td>\n",
444
+ " </tr>\n",
445
+ " <tr>\n",
446
+ " <th>34787</th>\n",
447
+ " <td>surprise</td>\n",
448
+ " <td>@MichelGW have you gift! Hope you like it! It'...</td>\n",
449
+ " <td>gift! Hope like it! hand wear ! It'll warm! Lol</td>\n",
450
+ " </tr>\n",
451
+ " <tr>\n",
452
+ " <th>34788</th>\n",
453
+ " <td>joy</td>\n",
454
+ " <td>The world didnt give it to me..so the world MO...</td>\n",
455
+ " <td>world didnt me..so world DEFINITELY cnt away!!!</td>\n",
456
+ " </tr>\n",
457
+ " <tr>\n",
458
+ " <th>34789</th>\n",
459
+ " <td>anger</td>\n",
460
+ " <td>A man robbed me today .</td>\n",
461
+ " <td>man robbed today .</td>\n",
462
+ " </tr>\n",
463
+ " <tr>\n",
464
+ " <th>34790</th>\n",
465
+ " <td>fear</td>\n",
466
+ " <td>Youu call it JEALOUSY, I call it of #Losing YO...</td>\n",
467
+ " <td>Youu JEALOUSY, #Losing YOU...</td>\n",
468
+ " </tr>\n",
469
+ " <tr>\n",
470
+ " <th>34791</th>\n",
471
+ " <td>sadness</td>\n",
472
+ " <td>I think about you baby, and I dream about you ...</td>\n",
473
+ " <td>think baby, dream time</td>\n",
474
+ " </tr>\n",
475
+ " </tbody>\n",
476
+ "</table>\n",
477
+ "<p>34792 rows × 3 columns</p>\n",
478
+ "</div>"
479
+ ],
480
+ "text/plain": [
481
+ " Emotion Text \\\n",
482
+ "0 neutral Why ? \n",
483
+ "1 joy Sage Act upgrade on my to do list for tommorow. \n",
484
+ "2 sadness ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ... \n",
485
+ "3 joy Such an eye ! The true hazel eye-and so brill... \n",
486
+ "4 joy @Iluvmiasantos ugh babe.. hugggzzz for u .! b... \n",
487
+ "... ... ... \n",
488
+ "34787 surprise @MichelGW have you gift! Hope you like it! It'... \n",
489
+ "34788 joy The world didnt give it to me..so the world MO... \n",
490
+ "34789 anger A man robbed me today . \n",
491
+ "34790 fear Youu call it JEALOUSY, I call it of #Losing YO... \n",
492
+ "34791 sadness I think about you baby, and I dream about you ... \n",
493
+ "\n",
494
+ " Clean_Text \n",
495
+ "0 ? \n",
496
+ "1 Sage Act upgrade list tommorow. \n",
497
+ "2 WAY HOMEGIRL BABY FUNERAL!!! MAN HATE FUNERALS... \n",
498
+ "3 eye ! true hazel eye-and brilliant ! Regular f... \n",
499
+ "4 ugh babe.. hugggzzz u .! babe naamazed nga ako... \n",
500
+ "... ... \n",
501
+ "34787 gift! Hope like it! hand wear ! It'll warm! Lol \n",
502
+ "34788 world didnt me..so world DEFINITELY cnt away!!! \n",
503
+ "34789 man robbed today . \n",
504
+ "34790 Youu JEALOUSY, #Losing YOU... \n",
505
+ "34791 think baby, dream time \n",
506
+ "\n",
507
+ "[34792 rows x 3 columns]"
508
+ ]
509
+ },
510
+ "execution_count": 9,
511
+ "metadata": {},
512
+ "output_type": "execute_result"
513
+ }
514
+ ],
515
+ "source": [
516
+ "df"
517
+ ]
518
+ },
519
+ {
520
+ "cell_type": "code",
521
+ "execution_count": 10,
522
+ "id": "450c39c0-79dd-4eaf-85fe-57e344eb81bd",
523
+ "metadata": {},
524
+ "outputs": [],
525
+ "source": [
526
+ "# Features & Labels\n",
527
+ "Xfeatures = df['Clean_Text']\n",
528
+ "ylabels = df['Emotion']"
529
+ ]
530
+ },
531
+ {
532
+ "cell_type": "markdown",
533
+ "id": "edde3d4b",
534
+ "metadata": {},
535
+ "source": [
536
+ "# It is advisable to split before applying pipelines because it prevents data leakage."
537
+ ]
538
+ },
539
+ {
540
+ "cell_type": "code",
541
+ "execution_count": 11,
542
+ "id": "27d7f976-c28f-449e-ae1a-53a42bbda4e8",
543
+ "metadata": {},
544
+ "outputs": [],
545
+ "source": [
546
+ "# Split Data\n",
547
+ "x_train,x_test,y_train,y_test = train_test_split(Xfeatures,ylabels,test_size=0.3,random_state=42)"
548
+ ]
549
+ },
550
+ {
551
+ "cell_type": "code",
552
+ "execution_count": 12,
553
+ "id": "2f086f29-dba9-40d2-a9dd-f06a6cca3a4c",
554
+ "metadata": {},
555
+ "outputs": [],
556
+ "source": [
557
+ "# Build Pipeline\n",
558
+ "from sklearn.pipeline import Pipeline"
559
+ ]
560
+ },
561
+ {
562
+ "cell_type": "code",
563
+ "execution_count": 13,
564
+ "id": "6b81cc86-2bef-40c2-b9a3-668caaadaff0",
565
+ "metadata": {},
566
+ "outputs": [],
567
+ "source": [
568
+ "# LogisticRegression Pipeline\n",
569
+ "pipe_lr = Pipeline(steps=[('cv',CountVectorizer()),('lr',LogisticRegression())])"
570
+ ]
571
+ },
572
+ {
573
+ "cell_type": "code",
574
+ "execution_count": 14,
575
+ "id": "dc64b9a7-efe2-4bc4-a0e7-46dff1d52b31",
576
+ "metadata": {
577
+ "jupyter": {
578
+ "outputs_hidden": true
579
+ },
580
+ "scrolled": false,
581
+ "tags": []
582
+ },
583
+ "outputs": [
584
+ {
585
+ "name": "stderr",
586
+ "output_type": "stream",
587
+ "text": [
588
+ "C:\\Users\\Sanket\\anaconda3\\envs\\nlp\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
589
+ "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
590
+ "\n",
591
+ "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
592
+ " https://scikit-learn.org/stable/modules/preprocessing.html\n",
593
+ "Please also refer to the documentation for alternative solver options:\n",
594
+ " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
595
+ " n_iter_i = _check_optimize_result(\n"
596
+ ]
597
+ },
598
+ {
599
+ "data": {
600
+ "text/html": [
601
+ "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;cv&#x27;, CountVectorizer()), (&#x27;lr&#x27;, LogisticRegression())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;cv&#x27;, CountVectorizer()), (&#x27;lr&#x27;, LogisticRegression())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">CountVectorizer</label><div class=\"sk-toggleable__content\"><pre>CountVectorizer()</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression()</pre></div></div></div></div></div></div></div>"
602
+ ],
603
+ "text/plain": [
604
+ "Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])"
605
+ ]
606
+ },
607
+ "execution_count": 14,
608
+ "metadata": {},
609
+ "output_type": "execute_result"
610
+ }
611
+ ],
612
+ "source": [
613
+ "# Train and Fit Data\n",
614
+ "pipe_lr.fit(x_train,y_train)"
615
+ ]
616
+ },
617
+ {
618
+ "cell_type": "code",
619
+ "execution_count": 15,
620
+ "id": "135ed6f8-56ff-4d53-85e3-541e3a7ae2d7",
621
+ "metadata": {},
622
+ "outputs": [
623
+ {
624
+ "data": {
625
+ "text/html": [
626
+ "<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;cv&#x27;, CountVectorizer()), (&#x27;lr&#x27;, LogisticRegression())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;cv&#x27;, CountVectorizer()), (&#x27;lr&#x27;, LogisticRegression())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">CountVectorizer</label><div class=\"sk-toggleable__content\"><pre>CountVectorizer()</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression()</pre></div></div></div></div></div></div></div>"
627
+ ],
628
+ "text/plain": [
629
+ "Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])"
630
+ ]
631
+ },
632
+ "execution_count": 15,
633
+ "metadata": {},
634
+ "output_type": "execute_result"
635
+ }
636
+ ],
637
+ "source": [
638
+ "pipe_lr"
639
+ ]
640
+ },
641
+ {
642
+ "cell_type": "code",
643
+ "execution_count": 16,
644
+ "id": "28396371-5f5c-4a3b-b974-164e047764f3",
645
+ "metadata": {},
646
+ "outputs": [
647
+ {
648
+ "data": {
649
+ "text/plain": [
650
+ "0.619946349875455"
651
+ ]
652
+ },
653
+ "execution_count": 16,
654
+ "metadata": {},
655
+ "output_type": "execute_result"
656
+ }
657
+ ],
658
+ "source": [
659
+ "# Check Accuracy\n",
660
+ "pipe_lr.score(x_test,y_test)"
661
+ ]
662
+ },
663
+ {
664
+ "cell_type": "code",
665
+ "execution_count": 17,
666
+ "id": "eb3a26b6-d09e-422f-991b-b08c48f55b75",
667
+ "metadata": {},
668
+ "outputs": [],
669
+ "source": [
670
+ "# Make A Prediction\n",
671
+ "ex1 = \"This book was so interesting it made me happy\""
672
+ ]
673
+ },
674
+ {
675
+ "cell_type": "code",
676
+ "execution_count": 18,
677
+ "id": "b08597d9-6f59-45cb-a648-95b0da1ce313",
678
+ "metadata": {},
679
+ "outputs": [
680
+ {
681
+ "data": {
682
+ "text/plain": [
683
+ "array(['joy'], dtype=object)"
684
+ ]
685
+ },
686
+ "execution_count": 18,
687
+ "metadata": {},
688
+ "output_type": "execute_result"
689
+ }
690
+ ],
691
+ "source": [
692
+ "pipe_lr.predict([ex1])"
693
+ ]
694
+ },
695
+ {
696
+ "cell_type": "code",
697
+ "execution_count": 19,
698
+ "id": "5b3822ac-17fc-43dd-9bb7-8dad07a4d32c",
699
+ "metadata": {},
700
+ "outputs": [
701
+ {
702
+ "data": {
703
+ "text/plain": [
704
+ "array([[1.60505334e-03, 7.06448086e-03, 6.95652453e-03, 9.43810868e-01,\n",
705
+ " 1.00440585e-04, 2.63232385e-02, 6.63277122e-05, 1.40730665e-02]])"
706
+ ]
707
+ },
708
+ "execution_count": 19,
709
+ "metadata": {},
710
+ "output_type": "execute_result"
711
+ }
712
+ ],
713
+ "source": [
714
+ "# Prediction Prob\n",
715
+ "pipe_lr.predict_proba([ex1])"
716
+ ]
717
+ },
718
+ {
719
+ "cell_type": "code",
720
+ "execution_count": 20,
721
+ "id": "5b7c4596-d643-48e5-a777-79a6f55c49da",
722
+ "metadata": {},
723
+ "outputs": [
724
+ {
725
+ "data": {
726
+ "text/plain": [
727
+ "array(['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'shame',\n",
728
+ " 'surprise'], dtype=object)"
729
+ ]
730
+ },
731
+ "execution_count": 20,
732
+ "metadata": {},
733
+ "output_type": "execute_result"
734
+ }
735
+ ],
736
+ "source": [
737
+ "# To Know the classes\n",
738
+ "pipe_lr.classes_"
739
+ ]
740
+ },
741
+ {
742
+ "cell_type": "code",
743
+ "execution_count": 21,
744
+ "id": "c0d40f62-b1fd-4748-a279-c8f50c748f26",
745
+ "metadata": {},
746
+ "outputs": [],
747
+ "source": [
748
+ "# Save Model & Pipeline\n",
749
+ "import joblib\n",
750
+ "pipeline_file = open(\"../models/emotion_classifier_pipe_lr.pkl\",\"wb\")\n",
751
+ "joblib.dump(pipe_lr,pipeline_file)\n",
752
+ "pipeline_file.close()"
753
+ ]
754
+ },
755
+ {
756
+ "cell_type": "code",
757
+ "execution_count": null,
758
+ "id": "377c4e98-67f0-45e5-8dd5-0417585754f0",
759
+ "metadata": {},
760
+ "outputs": [],
761
+ "source": []
762
+ }
763
+ ],
764
+ "metadata": {
765
+ "kernelspec": {
766
+ "display_name": "Python 3 (ipykernel)",
767
+ "language": "python",
768
+ "name": "python3"
769
+ },
770
+ "language_info": {
771
+ "codemirror_mode": {
772
+ "name": "ipython",
773
+ "version": 3
774
+ },
775
+ "file_extension": ".py",
776
+ "mimetype": "text/x-python",
777
+ "name": "python",
778
+ "nbconvert_exporter": "python",
779
+ "pygments_lexer": "ipython3",
780
+ "version": "3.10.9"
781
+ }
782
+ },
783
+ "nbformat": 4,
784
+ "nbformat_minor": 5
785
+ }