Azarthehulk
commited on
Commit
•
113b600
1
Parent(s):
1ad77a3
Upload notebooks_Untitled (1).ipynb
Browse files- notebooks_Untitled (1).ipynb +638 -0
notebooks_Untitled (1).ipynb
ADDED
@@ -0,0 +1,638 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"language_info": {
|
4 |
+
"codemirror_mode": {
|
5 |
+
"name": "python",
|
6 |
+
"version": 3
|
7 |
+
},
|
8 |
+
"file_extension": ".py",
|
9 |
+
"mimetype": "text/x-python",
|
10 |
+
"name": "python",
|
11 |
+
"nbconvert_exporter": "python",
|
12 |
+
"pygments_lexer": "ipython3",
|
13 |
+
"version": "3.8"
|
14 |
+
},
|
15 |
+
"kernelspec": {
|
16 |
+
"name": "python",
|
17 |
+
"display_name": "Python (Pyodide)",
|
18 |
+
"language": "python"
|
19 |
+
}
|
20 |
+
},
|
21 |
+
"nbformat_minor": 4,
|
22 |
+
"nbformat": 4,
|
23 |
+
"cells": [
|
24 |
+
{
|
25 |
+
"cell_type": "code",
|
26 |
+
"source": "import pandas as pd",
|
27 |
+
"metadata": {
|
28 |
+
"trusted": true
|
29 |
+
},
|
30 |
+
"execution_count": 1,
|
31 |
+
"outputs": []
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"cell_type": "code",
|
35 |
+
"source": "df=[1,2,3,4]\nprint(pd.DataFrame(df))",
|
36 |
+
"metadata": {
|
37 |
+
"trusted": true
|
38 |
+
},
|
39 |
+
"execution_count": 2,
|
40 |
+
"outputs": [
|
41 |
+
{
|
42 |
+
"name": "stdout",
|
43 |
+
"text": " 0\n0 1\n1 2\n2 3\n3 4\n",
|
44 |
+
"output_type": "stream"
|
45 |
+
}
|
46 |
+
]
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"cell_type": "code",
|
50 |
+
"source": "print(pd.Series(df))",
|
51 |
+
"metadata": {
|
52 |
+
"trusted": true
|
53 |
+
},
|
54 |
+
"execution_count": 3,
|
55 |
+
"outputs": [
|
56 |
+
{
|
57 |
+
"name": "stdout",
|
58 |
+
"text": "0 1\n1 2\n2 3\n3 4\ndtype: int64\n",
|
59 |
+
"output_type": "stream"
|
60 |
+
}
|
61 |
+
]
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"cell_type": "code",
|
65 |
+
"source": "Employ=pd.read_csv(\"employees.csv\")",
|
66 |
+
"metadata": {
|
67 |
+
"trusted": true
|
68 |
+
},
|
69 |
+
"execution_count": 115,
|
70 |
+
"outputs": []
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"cell_type": "code",
|
74 |
+
"source": "Employ_dub=Employ.head(20)",
|
75 |
+
"metadata": {
|
76 |
+
"trusted": true
|
77 |
+
},
|
78 |
+
"execution_count": 116,
|
79 |
+
"outputs": []
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"cell_type": "code",
|
83 |
+
"source": "Employ_dub",
|
84 |
+
"metadata": {
|
85 |
+
"trusted": true
|
86 |
+
},
|
87 |
+
"execution_count": 117,
|
88 |
+
"outputs": [
|
89 |
+
{
|
90 |
+
"execution_count": 117,
|
91 |
+
"output_type": "execute_result",
|
92 |
+
"data": {
|
93 |
+
"text/plain": " First Name Gender Start Date Last Login Time Salary Bonus % \\\n0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n\n Senior Management Team \n0 True Marketing \n1 True NaN \n2 False Finance \n3 True Finance \n4 True Client Services \n5 False Legal \n6 True Product \n7 NaN Finance \n8 True Engineering \n9 True Business Development \n10 True NaN \n11 True Legal \n12 True Human Resources \n13 False Sales \n14 True Finance \n15 False Product \n16 False Human Resources \n17 False Product \n18 False Client Services \n19 False Product ",
|
94 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>First Name</th>\n <th>Gender</th>\n <th>Start Date</th>\n <th>Last Login Time</th>\n <th>Salary</th>\n <th>Bonus %</th>\n <th>Senior Management</th>\n <th>Team</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Douglas</td>\n <td>Male</td>\n <td>8/6/1993</td>\n <td>12:42 PM</td>\n <td>97308</td>\n <td>6.945</td>\n <td>True</td>\n <td>Marketing</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Thomas</td>\n <td>Male</td>\n <td>3/31/1996</td>\n <td>6:53 AM</td>\n <td>61933</td>\n <td>4.170</td>\n <td>True</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Maria</td>\n <td>Female</td>\n <td>4/23/1993</td>\n <td>11:17 AM</td>\n <td>130590</td>\n <td>11.858</td>\n <td>False</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Jerry</td>\n <td>Male</td>\n <td>3/4/2005</td>\n <td>1:00 PM</td>\n <td>138705</td>\n <td>9.340</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Larry</td>\n <td>Male</td>\n <td>1/24/1998</td>\n <td>4:47 PM</td>\n <td>101004</td>\n <td>1.389</td>\n <td>True</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Dennis</td>\n <td>Male</td>\n <td>4/18/1987</td>\n <td>1:35 AM</td>\n <td>115163</td>\n <td>10.125</td>\n <td>False</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Ruby</td>\n <td>Female</td>\n <td>8/17/1987</td>\n <td>4:20 PM</td>\n <td>65476</td>\n <td>10.012</td>\n <td>True</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>7</th>\n <td>NaN</td>\n <td>Female</td>\n <td>7/20/2015</td>\n <td>10:43 AM</td>\n <td>45906</td>\n <td>11.598</td>\n <td>NaN</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Angela</td>\n <td>Female</td>\n <td>11/22/2005</td>\n <td>6:29 AM</td>\n <td>95570</td>\n <td>18.523</td>\n <td>True</td>\n <td>Engineering</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Frances</td>\n <td>Female</td>\n <td>8/8/2002</td>\n <td>6:51 AM</td>\n <td>139852</td>\n <td>7.524</td>\n <td>True</td>\n <td>Business Development</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Louise</td>\n <td>Female</td>\n <td>8/12/1980</td>\n <td>9:01 AM</td>\n <td>63241</td>\n <td>15.132</td>\n <td>True</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Julie</td>\n <td>Female</td>\n <td>10/26/1997</td>\n <td>3:19 PM</td>\n <td>102508</td>\n <td>12.637</td>\n <td>True</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Brandon</td>\n <td>Male</td>\n <td>12/1/1980</td>\n <td>1:08 AM</td>\n <td>112807</td>\n <td>17.492</td>\n <td>True</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>13</th>\n <td>Gary</td>\n <td>Male</td>\n <td>1/27/2008</td>\n <td>11:40 PM</td>\n <td>109831</td>\n <td>5.831</td>\n <td>False</td>\n <td>Sales</td>\n </tr>\n <tr>\n <th>14</th>\n <td>Kimberly</td>\n <td>Female</td>\n <td>1/14/1999</td>\n <td>7:13 AM</td>\n <td>41426</td>\n <td>14.543</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Lillian</td>\n <td>Female</td>\n <td>6/5/2016</td>\n <td>6:09 AM</td>\n <td>59414</td>\n <td>1.256</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>16</th>\n <td>Jeremy</td>\n <td>Male</td>\n <td>9/21/2010</td>\n <td>5:56 AM</td>\n <td>90370</td>\n <td>7.369</td>\n <td>False</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>17</th>\n <td>Shawn</td>\n <td>Male</td>\n <td>12/7/1986</td>\n <td>7:45 PM</td>\n <td>111737</td>\n <td>6.414</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>18</th>\n <td>Diana</td>\n <td>Female</td>\n <td>10/23/1981</td>\n <td>10:27 AM</td>\n <td>132940</td>\n <td>19.082</td>\n <td>False</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>19</th>\n <td>Donna</td>\n <td>Female</td>\n <td>7/22/2010</td>\n <td>3:48 AM</td>\n <td>81014</td>\n <td>1.894</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
95 |
+
},
|
96 |
+
"metadata": {}
|
97 |
+
}
|
98 |
+
]
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"cell_type": "code",
|
102 |
+
"source": "",
|
103 |
+
"metadata": {
|
104 |
+
"trusted": true
|
105 |
+
},
|
106 |
+
"execution_count": null,
|
107 |
+
"outputs": []
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"cell_type": "code",
|
111 |
+
"source": "#total info about the employee\nEmploy_dub.info()",
|
112 |
+
"metadata": {
|
113 |
+
"trusted": true
|
114 |
+
},
|
115 |
+
"execution_count": 26,
|
116 |
+
"outputs": [
|
117 |
+
{
|
118 |
+
"name": "stdout",
|
119 |
+
"text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 20 entries, 0 to 19\nData columns (total 8 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 First Name 19 non-null object \n 1 Gender 20 non-null object \n 2 Start Date 20 non-null object \n 3 Last Login Time 20 non-null object \n 4 Salary 20 non-null int64 \n 5 Bonus % 20 non-null float64\n 6 Senior Management 19 non-null object \n 7 Team 18 non-null object \ndtypes: float64(1), int64(1), object(6)\nmemory usage: 868.0+ bytes\n",
|
120 |
+
"output_type": "stream"
|
121 |
+
}
|
122 |
+
]
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"cell_type": "code",
|
126 |
+
"source": "Employ_dub.isnull()",
|
127 |
+
"metadata": {
|
128 |
+
"trusted": true
|
129 |
+
},
|
130 |
+
"execution_count": 27,
|
131 |
+
"outputs": [
|
132 |
+
{
|
133 |
+
"execution_count": 27,
|
134 |
+
"output_type": "execute_result",
|
135 |
+
"data": {
|
136 |
+
"text/plain": " First Name Gender Start Date Last Login Time Salary Bonus % \\\n0 False False False False False False \n1 False False False False False False \n2 False False False False False False \n3 False False False False False False \n4 False False False False False False \n5 False False False False False False \n6 False False False False False False \n7 True False False False False False \n8 False False False False False False \n9 False False False False False False \n10 False False False False False False \n11 False False False False False False \n12 False False False False False False \n13 False False False False False False \n14 False False False False False False \n15 False False False False False False \n16 False False False False False False \n17 False False False False False False \n18 False False False False False False \n19 False False False False False False \n\n Senior Management Team \n0 False False \n1 False True \n2 False False \n3 False False \n4 False False \n5 False False \n6 False False \n7 True False \n8 False False \n9 False False \n10 False True \n11 False False \n12 False False \n13 False False \n14 False False \n15 False False \n16 False False \n17 False False \n18 False False \n19 False False ",
|
137 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>First Name</th>\n <th>Gender</th>\n <th>Start Date</th>\n <th>Last Login Time</th>\n <th>Salary</th>\n <th>Bonus %</th>\n <th>Senior Management</th>\n <th>Team</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>1</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>True</td>\n </tr>\n <tr>\n <th>2</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>3</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>4</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>5</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>6</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>7</th>\n <td>True</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>True</td>\n <td>False</td>\n </tr>\n <tr>\n <th>8</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>9</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>10</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>True</td>\n </tr>\n <tr>\n <th>11</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>12</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>13</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>14</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>15</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>16</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>17</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>18</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>19</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
138 |
+
},
|
139 |
+
"metadata": {}
|
140 |
+
}
|
141 |
+
]
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"cell_type": "code",
|
145 |
+
"source": "#checking for the null values in the dataset of employee\nEmploy_dub.isnull().sum()",
|
146 |
+
"metadata": {
|
147 |
+
"trusted": true
|
148 |
+
},
|
149 |
+
"execution_count": 28,
|
150 |
+
"outputs": [
|
151 |
+
{
|
152 |
+
"execution_count": 28,
|
153 |
+
"output_type": "execute_result",
|
154 |
+
"data": {
|
155 |
+
"text/plain": "First Name 1\nGender 0\nStart Date 0\nLast Login Time 0\nSalary 0\nBonus % 0\nSenior Management 1\nTeam 2\ndtype: int64"
|
156 |
+
},
|
157 |
+
"metadata": {}
|
158 |
+
}
|
159 |
+
]
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"cell_type": "code",
|
163 |
+
"source": "#changing the name of the dataset\ned=Employ_dub",
|
164 |
+
"metadata": {
|
165 |
+
"trusted": true
|
166 |
+
},
|
167 |
+
"execution_count": 29,
|
168 |
+
"outputs": []
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"cell_type": "code",
|
172 |
+
"source": "#dimension of the dataset\ned.shape",
|
173 |
+
"metadata": {
|
174 |
+
"trusted": true
|
175 |
+
},
|
176 |
+
"execution_count": 30,
|
177 |
+
"outputs": [
|
178 |
+
{
|
179 |
+
"execution_count": 30,
|
180 |
+
"output_type": "execute_result",
|
181 |
+
"data": {
|
182 |
+
"text/plain": "(20, 8)"
|
183 |
+
},
|
184 |
+
"metadata": {}
|
185 |
+
}
|
186 |
+
]
|
187 |
+
},
|
188 |
+
{
|
189 |
+
"cell_type": "code",
|
190 |
+
"source": "ed.columns",
|
191 |
+
"metadata": {
|
192 |
+
"trusted": true
|
193 |
+
},
|
194 |
+
"execution_count": 31,
|
195 |
+
"outputs": [
|
196 |
+
{
|
197 |
+
"execution_count": 31,
|
198 |
+
"output_type": "execute_result",
|
199 |
+
"data": {
|
200 |
+
"text/plain": "Index(['First Name', 'Gender', 'Start Date', 'Last Login Time', 'Salary',\n 'Bonus %', 'Senior Management', 'Team'],\n dtype='object')"
|
201 |
+
},
|
202 |
+
"metadata": {}
|
203 |
+
}
|
204 |
+
]
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"cell_type": "code",
|
208 |
+
"source": "#working on the dictionary for a while:",
|
209 |
+
"metadata": {
|
210 |
+
"trusted": true
|
211 |
+
},
|
212 |
+
"execution_count": 32,
|
213 |
+
"outputs": []
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"cell_type": "code",
|
217 |
+
"source": "#creating test objects:\nimport numpy as np\nff=pd.DataFrame(np.random.rand(20,5))",
|
218 |
+
"metadata": {
|
219 |
+
"trusted": true
|
220 |
+
},
|
221 |
+
"execution_count": 39,
|
222 |
+
"outputs": []
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"cell_type": "code",
|
226 |
+
"source": "ff",
|
227 |
+
"metadata": {
|
228 |
+
"trusted": true
|
229 |
+
},
|
230 |
+
"execution_count": 40,
|
231 |
+
"outputs": [
|
232 |
+
{
|
233 |
+
"execution_count": 40,
|
234 |
+
"output_type": "execute_result",
|
235 |
+
"data": {
|
236 |
+
"text/plain": " 0 1 2 3 4\n0 0.020780 0.365190 0.673825 0.800112 0.188644\n1 0.660845 0.265913 0.445028 0.889438 0.601047\n2 0.646987 0.926823 0.722838 0.475271 0.827945\n3 0.871724 0.290353 0.099578 0.109949 0.229182\n4 0.704794 0.884062 0.751327 0.595746 0.612269\n5 0.371269 0.560512 0.510264 0.247923 0.618853\n6 0.150398 0.116999 0.934865 0.315723 0.221538\n7 0.556336 0.875514 0.471526 0.539511 0.271221\n8 0.428221 0.546766 0.921274 0.500520 0.400341\n9 0.150170 0.802378 0.608124 0.342871 0.076631\n10 0.099049 0.280748 0.865939 0.214541 0.083318\n11 0.042867 0.701639 0.051457 0.691385 0.051529\n12 0.530845 0.248395 0.433733 0.049458 0.314959\n13 0.142230 0.746634 0.536247 0.096499 0.123294\n14 0.139630 0.056464 0.595644 0.764071 0.193826\n15 0.709624 0.590262 0.816268 0.187931 0.366224\n16 0.982939 0.260358 0.918897 0.531278 0.304655\n17 0.381823 0.003594 0.052597 0.921529 0.022103\n18 0.227944 0.706832 0.137266 0.129158 0.882734\n19 0.226257 0.818213 0.326071 0.230419 0.668891",
|
237 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0.020780</td>\n <td>0.365190</td>\n <td>0.673825</td>\n <td>0.800112</td>\n <td>0.188644</td>\n </tr>\n <tr>\n <th>1</th>\n <td>0.660845</td>\n <td>0.265913</td>\n <td>0.445028</td>\n <td>0.889438</td>\n <td>0.601047</td>\n </tr>\n <tr>\n <th>2</th>\n <td>0.646987</td>\n <td>0.926823</td>\n <td>0.722838</td>\n <td>0.475271</td>\n <td>0.827945</td>\n </tr>\n <tr>\n <th>3</th>\n <td>0.871724</td>\n <td>0.290353</td>\n <td>0.099578</td>\n <td>0.109949</td>\n <td>0.229182</td>\n </tr>\n <tr>\n <th>4</th>\n <td>0.704794</td>\n <td>0.884062</td>\n <td>0.751327</td>\n <td>0.595746</td>\n <td>0.612269</td>\n </tr>\n <tr>\n <th>5</th>\n <td>0.371269</td>\n <td>0.560512</td>\n <td>0.510264</td>\n <td>0.247923</td>\n <td>0.618853</td>\n </tr>\n <tr>\n <th>6</th>\n <td>0.150398</td>\n <td>0.116999</td>\n <td>0.934865</td>\n <td>0.315723</td>\n <td>0.221538</td>\n </tr>\n <tr>\n <th>7</th>\n <td>0.556336</td>\n <td>0.875514</td>\n <td>0.471526</td>\n <td>0.539511</td>\n <td>0.271221</td>\n </tr>\n <tr>\n <th>8</th>\n <td>0.428221</td>\n <td>0.546766</td>\n <td>0.921274</td>\n <td>0.500520</td>\n <td>0.400341</td>\n </tr>\n <tr>\n <th>9</th>\n <td>0.150170</td>\n <td>0.802378</td>\n <td>0.608124</td>\n <td>0.342871</td>\n <td>0.076631</td>\n </tr>\n <tr>\n <th>10</th>\n <td>0.099049</td>\n <td>0.280748</td>\n <td>0.865939</td>\n <td>0.214541</td>\n <td>0.083318</td>\n </tr>\n <tr>\n <th>11</th>\n <td>0.042867</td>\n <td>0.701639</td>\n <td>0.051457</td>\n <td>0.691385</td>\n <td>0.051529</td>\n </tr>\n <tr>\n <th>12</th>\n <td>0.530845</td>\n <td>0.248395</td>\n <td>0.433733</td>\n <td>0.049458</td>\n <td>0.314959</td>\n </tr>\n <tr>\n <th>13</th>\n <td>0.142230</td>\n <td>0.746634</td>\n <td>0.536247</td>\n <td>0.096499</td>\n <td>0.123294</td>\n </tr>\n <tr>\n <th>14</th>\n <td>0.139630</td>\n <td>0.056464</td>\n <td>0.595644</td>\n <td>0.764071</td>\n <td>0.193826</td>\n </tr>\n <tr>\n <th>15</th>\n <td>0.709624</td>\n <td>0.590262</td>\n <td>0.816268</td>\n <td>0.187931</td>\n <td>0.366224</td>\n </tr>\n <tr>\n <th>16</th>\n <td>0.982939</td>\n <td>0.260358</td>\n <td>0.918897</td>\n <td>0.531278</td>\n <td>0.304655</td>\n </tr>\n <tr>\n <th>17</th>\n <td>0.381823</td>\n <td>0.003594</td>\n <td>0.052597</td>\n <td>0.921529</td>\n <td>0.022103</td>\n </tr>\n <tr>\n <th>18</th>\n <td>0.227944</td>\n <td>0.706832</td>\n <td>0.137266</td>\n <td>0.129158</td>\n <td>0.882734</td>\n </tr>\n <tr>\n <th>19</th>\n <td>0.226257</td>\n <td>0.818213</td>\n <td>0.326071</td>\n <td>0.230419</td>\n <td>0.668891</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
238 |
+
},
|
239 |
+
"metadata": {}
|
240 |
+
}
|
241 |
+
]
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"cell_type": "code",
|
245 |
+
"source": "ff.info()",
|
246 |
+
"metadata": {
|
247 |
+
"trusted": true
|
248 |
+
},
|
249 |
+
"execution_count": 41,
|
250 |
+
"outputs": [
|
251 |
+
{
|
252 |
+
"name": "stdout",
|
253 |
+
"text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 20 entries, 0 to 19\nData columns (total 5 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 0 20 non-null float64\n 1 1 20 non-null float64\n 2 2 20 non-null float64\n 3 3 20 non-null float64\n 4 4 20 non-null float64\ndtypes: float64(5)\nmemory usage: 868.0 bytes\n",
|
254 |
+
"output_type": "stream"
|
255 |
+
}
|
256 |
+
]
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"cell_type": "code",
|
260 |
+
"source": "#data functon:\ndate=pd.DataFrame(\n{\n\"date\":['10/9/2020','11/09/2020','12/09/2020'],\n\"students\":[10,20,30]})\n",
|
261 |
+
"metadata": {
|
262 |
+
"trusted": true
|
263 |
+
},
|
264 |
+
"execution_count": 43,
|
265 |
+
"outputs": [
|
266 |
+
{
|
267 |
+
"execution_count": 43,
|
268 |
+
"output_type": "execute_result",
|
269 |
+
"data": {
|
270 |
+
"text/plain": " date students\n0 10/9/2020 10\n1 11/09/2020 20\n2 12/09/2020 30",
|
271 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>date</th>\n <th>students</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10/9/2020</td>\n <td>10</td>\n </tr>\n <tr>\n <th>1</th>\n <td>11/09/2020</td>\n <td>20</td>\n </tr>\n <tr>\n <th>2</th>\n <td>12/09/2020</td>\n <td>30</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
272 |
+
},
|
273 |
+
"metadata": {}
|
274 |
+
}
|
275 |
+
]
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"cell_type": "code",
|
279 |
+
"source": "ed.value_counts()",
|
280 |
+
"metadata": {
|
281 |
+
"trusted": true
|
282 |
+
},
|
283 |
+
"execution_count": 52,
|
284 |
+
"outputs": [
|
285 |
+
{
|
286 |
+
"execution_count": 52,
|
287 |
+
"output_type": "execute_result",
|
288 |
+
"data": {
|
289 |
+
"text/plain": "First Name Gender Start Date Last Login Time Salary Bonus % Senior Management Team \nAngela Female 11/22/2005 6:29 AM 95570 18.523 True Engineering 1\nJerry Male 3/4/2005 1:00 PM 138705 9.340 True Finance 1\nRuby Female 8/17/1987 4:20 PM 65476 10.012 True Product 1\nMaria Female 4/23/1993 11:17 AM 130590 11.858 False Finance 1\nLillian Female 6/5/2016 6:09 AM 59414 1.256 False Product 1\nLarry Male 1/24/1998 4:47 PM 101004 1.389 True Client Services 1\nKimberly Female 1/14/1999 7:13 AM 41426 14.543 True Finance 1\nJulie Female 10/26/1997 3:19 PM 102508 12.637 True Legal 1\nJeremy Male 9/21/2010 5:56 AM 90370 7.369 False Human Resources 1\nBrandon Male 12/1/1980 1:08 AM 112807 17.492 True Human Resources 1\nGary Male 1/27/2008 11:40 PM 109831 5.831 False Sales 1\nFrances Female 8/8/2002 6:51 AM 139852 7.524 True Business Development 1\nDouglas Male 8/6/1993 12:42 PM 97308 6.945 True Marketing 1\nDonna Female 7/22/2010 3:48 AM 81014 1.894 False Product 1\nDiana Female 10/23/1981 10:27 AM 132940 19.082 False Client Services 1\nDennis Male 4/18/1987 1:35 AM 115163 10.125 False Legal 1\nShawn Male 12/7/1986 7:45 PM 111737 6.414 False Product 1\ndtype: int64"
|
290 |
+
},
|
291 |
+
"metadata": {}
|
292 |
+
}
|
293 |
+
]
|
294 |
+
},
|
295 |
+
{
|
296 |
+
"cell_type": "code",
|
297 |
+
"source": "ed[['Gender','Salary']]",
|
298 |
+
"metadata": {
|
299 |
+
"trusted": true
|
300 |
+
},
|
301 |
+
"execution_count": 62,
|
302 |
+
"outputs": [
|
303 |
+
{
|
304 |
+
"execution_count": 62,
|
305 |
+
"output_type": "execute_result",
|
306 |
+
"data": {
|
307 |
+
"text/plain": " Gender Salary\n0 Male 97308\n1 Male 61933\n2 Female 130590\n3 Male 138705\n4 Male 101004\n5 Male 115163\n6 Female 65476\n7 Female 45906\n8 Female 95570\n9 Female 139852\n10 Female 63241\n11 Female 102508\n12 Male 112807\n13 Male 109831\n14 Female 41426\n15 Female 59414\n16 Male 90370\n17 Male 111737\n18 Female 132940\n19 Female 81014",
|
308 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Gender</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Male</td>\n <td>97308</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Male</td>\n <td>61933</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Female</td>\n <td>130590</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Male</td>\n <td>138705</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Male</td>\n <td>101004</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Male</td>\n <td>115163</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Female</td>\n <td>65476</td>\n </tr>\n <tr>\n <th>7</th>\n <td>Female</td>\n <td>45906</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Female</td>\n <td>95570</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Female</td>\n <td>139852</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Female</td>\n <td>63241</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Female</td>\n <td>102508</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Male</td>\n <td>112807</td>\n </tr>\n <tr>\n <th>13</th>\n <td>Male</td>\n <td>109831</td>\n </tr>\n <tr>\n <th>14</th>\n <td>Female</td>\n <td>41426</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Female</td>\n <td>59414</td>\n </tr>\n <tr>\n <th>16</th>\n <td>Male</td>\n <td>90370</td>\n </tr>\n <tr>\n <th>17</th>\n <td>Male</td>\n <td>111737</td>\n </tr>\n <tr>\n <th>18</th>\n <td>Female</td>\n <td>132940</td>\n </tr>\n <tr>\n <th>19</th>\n <td>Female</td>\n <td>81014</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
309 |
+
},
|
310 |
+
"metadata": {}
|
311 |
+
}
|
312 |
+
]
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"cell_type": "code",
|
316 |
+
"source": "#selection by position:rows data:\ned.iloc[8:12]",
|
317 |
+
"metadata": {
|
318 |
+
"trusted": true
|
319 |
+
},
|
320 |
+
"execution_count": 69,
|
321 |
+
"outputs": [
|
322 |
+
{
|
323 |
+
"execution_count": 69,
|
324 |
+
"output_type": "execute_result",
|
325 |
+
"data": {
|
326 |
+
"text/plain": " First Name Gender Start Date Last Login Time Salary Bonus % \\\n8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n\n Senior Management Team \n8 True Engineering \n9 True Business Development \n10 True NaN \n11 True Legal ",
|
327 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>First Name</th>\n <th>Gender</th>\n <th>Start Date</th>\n <th>Last Login Time</th>\n <th>Salary</th>\n <th>Bonus %</th>\n <th>Senior Management</th>\n <th>Team</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>8</th>\n <td>Angela</td>\n <td>Female</td>\n <td>11/22/2005</td>\n <td>6:29 AM</td>\n <td>95570</td>\n <td>18.523</td>\n <td>True</td>\n <td>Engineering</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Frances</td>\n <td>Female</td>\n <td>8/8/2002</td>\n <td>6:51 AM</td>\n <td>139852</td>\n <td>7.524</td>\n <td>True</td>\n <td>Business Development</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Louise</td>\n <td>Female</td>\n <td>8/12/1980</td>\n <td>9:01 AM</td>\n <td>63241</td>\n <td>15.132</td>\n <td>True</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Julie</td>\n <td>Female</td>\n <td>10/26/1997</td>\n <td>3:19 PM</td>\n <td>102508</td>\n <td>12.637</td>\n <td>True</td>\n <td>Legal</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
328 |
+
},
|
329 |
+
"metadata": {}
|
330 |
+
}
|
331 |
+
]
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"cell_type": "code",
|
335 |
+
"source": "ed.loc[8]",
|
336 |
+
"metadata": {
|
337 |
+
"trusted": true
|
338 |
+
},
|
339 |
+
"execution_count": 64,
|
340 |
+
"outputs": [
|
341 |
+
{
|
342 |
+
"execution_count": 64,
|
343 |
+
"output_type": "execute_result",
|
344 |
+
"data": {
|
345 |
+
"text/plain": "First Name Angela\nGender Female\nStart Date 11/22/2005\nLast Login Time 6:29 AM\nSalary 95570\nBonus % 18.523\nSenior Management True\nTeam Engineering\nName: 8, dtype: object"
|
346 |
+
},
|
347 |
+
"metadata": {}
|
348 |
+
}
|
349 |
+
]
|
350 |
+
},
|
351 |
+
{
|
352 |
+
"cell_type": "code",
|
353 |
+
"source": "#data cleaning:\ned.isnull().sum()",
|
354 |
+
"metadata": {
|
355 |
+
"trusted": true
|
356 |
+
},
|
357 |
+
"execution_count": 73,
|
358 |
+
"outputs": [
|
359 |
+
{
|
360 |
+
"execution_count": 73,
|
361 |
+
"output_type": "execute_result",
|
362 |
+
"data": {
|
363 |
+
"text/plain": "First Name 1\nGender 0\nStart Date 0\nLast Login Time 0\nSalary 0\nBonus % 0\nSenior Management 1\nTeam 2\ndtype: int64"
|
364 |
+
},
|
365 |
+
"metadata": {}
|
366 |
+
}
|
367 |
+
]
|
368 |
+
},
|
369 |
+
{
|
370 |
+
"cell_type": "code",
|
371 |
+
"source": "#total null values:\ned.isnull().sum().sum()",
|
372 |
+
"metadata": {
|
373 |
+
"trusted": true
|
374 |
+
},
|
375 |
+
"execution_count": 74,
|
376 |
+
"outputs": [
|
377 |
+
{
|
378 |
+
"execution_count": 74,
|
379 |
+
"output_type": "execute_result",
|
380 |
+
"data": {
|
381 |
+
"text/plain": "4"
|
382 |
+
},
|
383 |
+
"metadata": {}
|
384 |
+
}
|
385 |
+
]
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"cell_type": "code",
|
389 |
+
"source": "ed.notnull().sum().sum()",
|
390 |
+
"metadata": {
|
391 |
+
"trusted": true
|
392 |
+
},
|
393 |
+
"execution_count": 77,
|
394 |
+
"outputs": [
|
395 |
+
{
|
396 |
+
"execution_count": 77,
|
397 |
+
"output_type": "execute_result",
|
398 |
+
"data": {
|
399 |
+
"text/plain": "156"
|
400 |
+
},
|
401 |
+
"metadata": {}
|
402 |
+
}
|
403 |
+
]
|
404 |
+
},
|
405 |
+
{
|
406 |
+
"cell_type": "code",
|
407 |
+
"source": "#fpr practice on drop we will take the copy of the original data:\ned2=ed",
|
408 |
+
"metadata": {
|
409 |
+
"trusted": true
|
410 |
+
},
|
411 |
+
"execution_count": 78,
|
412 |
+
"outputs": []
|
413 |
+
},
|
414 |
+
{
|
415 |
+
"cell_type": "code",
|
416 |
+
"source": "ed2",
|
417 |
+
"metadata": {
|
418 |
+
"trusted": true
|
419 |
+
},
|
420 |
+
"execution_count": 79,
|
421 |
+
"outputs": [
|
422 |
+
{
|
423 |
+
"execution_count": 79,
|
424 |
+
"output_type": "execute_result",
|
425 |
+
"data": {
|
426 |
+
"text/plain": " First Name Gender Start Date Last Login Time Salary Bonus % \\\n0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n\n Senior Management Team \n0 True Marketing \n1 True NaN \n2 False Finance \n3 True Finance \n4 True Client Services \n5 False Legal \n6 True Product \n7 NaN Finance \n8 True Engineering \n9 True Business Development \n10 True NaN \n11 True Legal \n12 True Human Resources \n13 False Sales \n14 True Finance \n15 False Product \n16 False Human Resources \n17 False Product \n18 False Client Services \n19 False Product ",
|
427 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>First Name</th>\n <th>Gender</th>\n <th>Start Date</th>\n <th>Last Login Time</th>\n <th>Salary</th>\n <th>Bonus %</th>\n <th>Senior Management</th>\n <th>Team</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Douglas</td>\n <td>Male</td>\n <td>8/6/1993</td>\n <td>12:42 PM</td>\n <td>97308</td>\n <td>6.945</td>\n <td>True</td>\n <td>Marketing</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Thomas</td>\n <td>Male</td>\n <td>3/31/1996</td>\n <td>6:53 AM</td>\n <td>61933</td>\n <td>4.170</td>\n <td>True</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Maria</td>\n <td>Female</td>\n <td>4/23/1993</td>\n <td>11:17 AM</td>\n <td>130590</td>\n <td>11.858</td>\n <td>False</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Jerry</td>\n <td>Male</td>\n <td>3/4/2005</td>\n <td>1:00 PM</td>\n <td>138705</td>\n <td>9.340</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Larry</td>\n <td>Male</td>\n <td>1/24/1998</td>\n <td>4:47 PM</td>\n <td>101004</td>\n <td>1.389</td>\n <td>True</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Dennis</td>\n <td>Male</td>\n <td>4/18/1987</td>\n <td>1:35 AM</td>\n <td>115163</td>\n <td>10.125</td>\n <td>False</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Ruby</td>\n <td>Female</td>\n <td>8/17/1987</td>\n <td>4:20 PM</td>\n <td>65476</td>\n <td>10.012</td>\n <td>True</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>7</th>\n <td>NaN</td>\n <td>Female</td>\n <td>7/20/2015</td>\n <td>10:43 AM</td>\n <td>45906</td>\n <td>11.598</td>\n <td>NaN</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Angela</td>\n <td>Female</td>\n <td>11/22/2005</td>\n <td>6:29 AM</td>\n <td>95570</td>\n <td>18.523</td>\n <td>True</td>\n <td>Engineering</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Frances</td>\n <td>Female</td>\n <td>8/8/2002</td>\n <td>6:51 AM</td>\n <td>139852</td>\n <td>7.524</td>\n <td>True</td>\n <td>Business Development</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Louise</td>\n <td>Female</td>\n <td>8/12/1980</td>\n <td>9:01 AM</td>\n <td>63241</td>\n <td>15.132</td>\n <td>True</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Julie</td>\n <td>Female</td>\n <td>10/26/1997</td>\n <td>3:19 PM</td>\n <td>102508</td>\n <td>12.637</td>\n <td>True</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Brandon</td>\n <td>Male</td>\n <td>12/1/1980</td>\n <td>1:08 AM</td>\n <td>112807</td>\n <td>17.492</td>\n <td>True</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>13</th>\n <td>Gary</td>\n <td>Male</td>\n <td>1/27/2008</td>\n <td>11:40 PM</td>\n <td>109831</td>\n <td>5.831</td>\n <td>False</td>\n <td>Sales</td>\n </tr>\n <tr>\n <th>14</th>\n <td>Kimberly</td>\n <td>Female</td>\n <td>1/14/1999</td>\n <td>7:13 AM</td>\n <td>41426</td>\n <td>14.543</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Lillian</td>\n <td>Female</td>\n <td>6/5/2016</td>\n <td>6:09 AM</td>\n <td>59414</td>\n <td>1.256</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>16</th>\n <td>Jeremy</td>\n <td>Male</td>\n <td>9/21/2010</td>\n <td>5:56 AM</td>\n <td>90370</td>\n <td>7.369</td>\n <td>False</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>17</th>\n <td>Shawn</td>\n <td>Male</td>\n <td>12/7/1986</td>\n <td>7:45 PM</td>\n <td>111737</td>\n <td>6.414</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>18</th>\n <td>Diana</td>\n <td>Female</td>\n <td>10/23/1981</td>\n <td>10:27 AM</td>\n <td>132940</td>\n <td>19.082</td>\n <td>False</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>19</th>\n <td>Donna</td>\n <td>Female</td>\n <td>7/22/2010</td>\n <td>3:48 AM</td>\n <td>81014</td>\n <td>1.894</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
428 |
+
},
|
429 |
+
"metadata": {}
|
430 |
+
}
|
431 |
+
]
|
432 |
+
},
|
433 |
+
{
|
434 |
+
"cell_type": "code",
|
435 |
+
"source": "#removing the totyal columns if they are with the null values:\ned3=ed2.dropna(axis=1)\nprint(\"prasent null values:\",ed3.isnull().sum().sum())",
|
436 |
+
"metadata": {
|
437 |
+
"trusted": true
|
438 |
+
},
|
439 |
+
"execution_count": 90,
|
440 |
+
"outputs": [
|
441 |
+
{
|
442 |
+
"name": "stdout",
|
443 |
+
"text": "prasent null values: 0\n",
|
444 |
+
"output_type": "stream"
|
445 |
+
}
|
446 |
+
]
|
447 |
+
},
|
448 |
+
{
|
449 |
+
"cell_type": "code",
|
450 |
+
"source": "ed2.fillna(10,inplace=True)",
|
451 |
+
"metadata": {
|
452 |
+
"trusted": true
|
453 |
+
},
|
454 |
+
"execution_count": 96,
|
455 |
+
"outputs": [
|
456 |
+
{
|
457 |
+
"name": "stderr",
|
458 |
+
"text": "<ipython-input-96-9a2616dc4607>:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n ed2.fillna(10,inplace=True)\n",
|
459 |
+
"output_type": "stream"
|
460 |
+
}
|
461 |
+
]
|
462 |
+
},
|
463 |
+
{
|
464 |
+
"cell_type": "code",
|
465 |
+
"source": "ed2.isnull().sum()",
|
466 |
+
"metadata": {
|
467 |
+
"trusted": true
|
468 |
+
},
|
469 |
+
"execution_count": 97,
|
470 |
+
"outputs": [
|
471 |
+
{
|
472 |
+
"execution_count": 97,
|
473 |
+
"output_type": "execute_result",
|
474 |
+
"data": {
|
475 |
+
"text/plain": "First Name 0\nGender 0\nStart Date 0\nLast Login Time 0\nSalary 0\nBonus % 0\nSenior Management 0\nTeam 0\ndtype: int64"
|
476 |
+
},
|
477 |
+
"metadata": {}
|
478 |
+
}
|
479 |
+
]
|
480 |
+
},
|
481 |
+
{
|
482 |
+
"cell_type": "code",
|
483 |
+
"source": "ed2",
|
484 |
+
"metadata": {
|
485 |
+
"trusted": true
|
486 |
+
},
|
487 |
+
"execution_count": 98,
|
488 |
+
"outputs": [
|
489 |
+
{
|
490 |
+
"execution_count": 98,
|
491 |
+
"output_type": "execute_result",
|
492 |
+
"data": {
|
493 |
+
"text/plain": " First Name Gender Start Date Last Login Time Salary Bonus % \\\n0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n7 10 Female 7/20/2015 10:43 AM 45906 11.598 \n8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n\n Senior Management Team \n0 True Marketing \n1 True 10 \n2 False Finance \n3 True Finance \n4 True Client Services \n5 False Legal \n6 True Product \n7 10 Finance \n8 True Engineering \n9 True Business Development \n10 True 10 \n11 True Legal \n12 True Human Resources \n13 False Sales \n14 True Finance \n15 False Product \n16 False Human Resources \n17 False Product \n18 False Client Services \n19 False Product ",
|
494 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>First Name</th>\n <th>Gender</th>\n <th>Start Date</th>\n <th>Last Login Time</th>\n <th>Salary</th>\n <th>Bonus %</th>\n <th>Senior Management</th>\n <th>Team</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Douglas</td>\n <td>Male</td>\n <td>8/6/1993</td>\n <td>12:42 PM</td>\n <td>97308</td>\n <td>6.945</td>\n <td>True</td>\n <td>Marketing</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Thomas</td>\n <td>Male</td>\n <td>3/31/1996</td>\n <td>6:53 AM</td>\n <td>61933</td>\n <td>4.170</td>\n <td>True</td>\n <td>10</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Maria</td>\n <td>Female</td>\n <td>4/23/1993</td>\n <td>11:17 AM</td>\n <td>130590</td>\n <td>11.858</td>\n <td>False</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Jerry</td>\n <td>Male</td>\n <td>3/4/2005</td>\n <td>1:00 PM</td>\n <td>138705</td>\n <td>9.340</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Larry</td>\n <td>Male</td>\n <td>1/24/1998</td>\n <td>4:47 PM</td>\n <td>101004</td>\n <td>1.389</td>\n <td>True</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Dennis</td>\n <td>Male</td>\n <td>4/18/1987</td>\n <td>1:35 AM</td>\n <td>115163</td>\n <td>10.125</td>\n <td>False</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Ruby</td>\n <td>Female</td>\n <td>8/17/1987</td>\n <td>4:20 PM</td>\n <td>65476</td>\n <td>10.012</td>\n <td>True</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>7</th>\n <td>10</td>\n <td>Female</td>\n <td>7/20/2015</td>\n <td>10:43 AM</td>\n <td>45906</td>\n <td>11.598</td>\n <td>10</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Angela</td>\n <td>Female</td>\n <td>11/22/2005</td>\n <td>6:29 AM</td>\n <td>95570</td>\n <td>18.523</td>\n <td>True</td>\n <td>Engineering</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Frances</td>\n <td>Female</td>\n <td>8/8/2002</td>\n <td>6:51 AM</td>\n <td>139852</td>\n <td>7.524</td>\n <td>True</td>\n <td>Business Development</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Louise</td>\n <td>Female</td>\n <td>8/12/1980</td>\n <td>9:01 AM</td>\n <td>63241</td>\n <td>15.132</td>\n <td>True</td>\n <td>10</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Julie</td>\n <td>Female</td>\n <td>10/26/1997</td>\n <td>3:19 PM</td>\n <td>102508</td>\n <td>12.637</td>\n <td>True</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Brandon</td>\n <td>Male</td>\n <td>12/1/1980</td>\n <td>1:08 AM</td>\n <td>112807</td>\n <td>17.492</td>\n <td>True</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>13</th>\n <td>Gary</td>\n <td>Male</td>\n <td>1/27/2008</td>\n <td>11:40 PM</td>\n <td>109831</td>\n <td>5.831</td>\n <td>False</td>\n <td>Sales</td>\n </tr>\n <tr>\n <th>14</th>\n <td>Kimberly</td>\n <td>Female</td>\n <td>1/14/1999</td>\n <td>7:13 AM</td>\n <td>41426</td>\n <td>14.543</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Lillian</td>\n <td>Female</td>\n <td>6/5/2016</td>\n <td>6:09 AM</td>\n <td>59414</td>\n <td>1.256</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>16</th>\n <td>Jeremy</td>\n <td>Male</td>\n <td>9/21/2010</td>\n <td>5:56 AM</td>\n <td>90370</td>\n <td>7.369</td>\n <td>False</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>17</th>\n <td>Shawn</td>\n <td>Male</td>\n <td>12/7/1986</td>\n <td>7:45 PM</td>\n <td>111737</td>\n <td>6.414</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>18</th>\n <td>Diana</td>\n <td>Female</td>\n <td>10/23/1981</td>\n <td>10:27 AM</td>\n <td>132940</td>\n <td>19.082</td>\n <td>False</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>19</th>\n <td>Donna</td>\n <td>Female</td>\n <td>7/22/2010</td>\n <td>3:48 AM</td>\n <td>81014</td>\n <td>1.894</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
495 |
+
},
|
496 |
+
"metadata": {}
|
497 |
+
}
|
498 |
+
]
|
499 |
+
},
|
500 |
+
{
|
501 |
+
"cell_type": "code",
|
502 |
+
"source": "ed5=Employ.head(20)\ned5",
|
503 |
+
"metadata": {
|
504 |
+
"trusted": true
|
505 |
+
},
|
506 |
+
"execution_count": 118,
|
507 |
+
"outputs": [
|
508 |
+
{
|
509 |
+
"execution_count": 118,
|
510 |
+
"output_type": "execute_result",
|
511 |
+
"data": {
|
512 |
+
"text/plain": " First Name Gender Start Date Last Login Time Salary Bonus % \\\n0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n\n Senior Management Team \n0 True Marketing \n1 True NaN \n2 False Finance \n3 True Finance \n4 True Client Services \n5 False Legal \n6 True Product \n7 NaN Finance \n8 True Engineering \n9 True Business Development \n10 True NaN \n11 True Legal \n12 True Human Resources \n13 False Sales \n14 True Finance \n15 False Product \n16 False Human Resources \n17 False Product \n18 False Client Services \n19 False Product ",
|
513 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>First Name</th>\n <th>Gender</th>\n <th>Start Date</th>\n <th>Last Login Time</th>\n <th>Salary</th>\n <th>Bonus %</th>\n <th>Senior Management</th>\n <th>Team</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Douglas</td>\n <td>Male</td>\n <td>8/6/1993</td>\n <td>12:42 PM</td>\n <td>97308</td>\n <td>6.945</td>\n <td>True</td>\n <td>Marketing</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Thomas</td>\n <td>Male</td>\n <td>3/31/1996</td>\n <td>6:53 AM</td>\n <td>61933</td>\n <td>4.170</td>\n <td>True</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Maria</td>\n <td>Female</td>\n <td>4/23/1993</td>\n <td>11:17 AM</td>\n <td>130590</td>\n <td>11.858</td>\n <td>False</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Jerry</td>\n <td>Male</td>\n <td>3/4/2005</td>\n <td>1:00 PM</td>\n <td>138705</td>\n <td>9.340</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Larry</td>\n <td>Male</td>\n <td>1/24/1998</td>\n <td>4:47 PM</td>\n <td>101004</td>\n <td>1.389</td>\n <td>True</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Dennis</td>\n <td>Male</td>\n <td>4/18/1987</td>\n <td>1:35 AM</td>\n <td>115163</td>\n <td>10.125</td>\n <td>False</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Ruby</td>\n <td>Female</td>\n <td>8/17/1987</td>\n <td>4:20 PM</td>\n <td>65476</td>\n <td>10.012</td>\n <td>True</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>7</th>\n <td>NaN</td>\n <td>Female</td>\n <td>7/20/2015</td>\n <td>10:43 AM</td>\n <td>45906</td>\n <td>11.598</td>\n <td>NaN</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Angela</td>\n <td>Female</td>\n <td>11/22/2005</td>\n <td>6:29 AM</td>\n <td>95570</td>\n <td>18.523</td>\n <td>True</td>\n <td>Engineering</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Frances</td>\n <td>Female</td>\n <td>8/8/2002</td>\n <td>6:51 AM</td>\n <td>139852</td>\n <td>7.524</td>\n <td>True</td>\n <td>Business Development</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Louise</td>\n <td>Female</td>\n <td>8/12/1980</td>\n <td>9:01 AM</td>\n <td>63241</td>\n <td>15.132</td>\n <td>True</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Julie</td>\n <td>Female</td>\n <td>10/26/1997</td>\n <td>3:19 PM</td>\n <td>102508</td>\n <td>12.637</td>\n <td>True</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Brandon</td>\n <td>Male</td>\n <td>12/1/1980</td>\n <td>1:08 AM</td>\n <td>112807</td>\n <td>17.492</td>\n <td>True</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>13</th>\n <td>Gary</td>\n <td>Male</td>\n <td>1/27/2008</td>\n <td>11:40 PM</td>\n <td>109831</td>\n <td>5.831</td>\n <td>False</td>\n <td>Sales</td>\n </tr>\n <tr>\n <th>14</th>\n <td>Kimberly</td>\n <td>Female</td>\n <td>1/14/1999</td>\n <td>7:13 AM</td>\n <td>41426</td>\n <td>14.543</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Lillian</td>\n <td>Female</td>\n <td>6/5/2016</td>\n <td>6:09 AM</td>\n <td>59414</td>\n <td>1.256</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>16</th>\n <td>Jeremy</td>\n <td>Male</td>\n <td>9/21/2010</td>\n <td>5:56 AM</td>\n <td>90370</td>\n <td>7.369</td>\n <td>False</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>17</th>\n <td>Shawn</td>\n <td>Male</td>\n <td>12/7/1986</td>\n <td>7:45 PM</td>\n <td>111737</td>\n <td>6.414</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>18</th>\n <td>Diana</td>\n <td>Female</td>\n <td>10/23/1981</td>\n <td>10:27 AM</td>\n <td>132940</td>\n <td>19.082</td>\n <td>False</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>19</th>\n <td>Donna</td>\n <td>Female</td>\n <td>7/22/2010</td>\n <td>3:48 AM</td>\n <td>81014</td>\n <td>1.894</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
514 |
+
},
|
515 |
+
"metadata": {}
|
516 |
+
}
|
517 |
+
]
|
518 |
+
},
|
519 |
+
{
|
520 |
+
"cell_type": "code",
|
521 |
+
"source": "ed2.isnull().sum()",
|
522 |
+
"metadata": {
|
523 |
+
"trusted": true
|
524 |
+
},
|
525 |
+
"execution_count": 111,
|
526 |
+
"outputs": [
|
527 |
+
{
|
528 |
+
"execution_count": 111,
|
529 |
+
"output_type": "execute_result",
|
530 |
+
"data": {
|
531 |
+
"text/plain": "First Name 0\nGender 0\nStart Date 0\nLast Login Time 0\nSalary 0\nBonus % 0\nSenior Management 0\nTeam 0\ndtype: int64"
|
532 |
+
},
|
533 |
+
"metadata": {}
|
534 |
+
}
|
535 |
+
]
|
536 |
+
},
|
537 |
+
{
|
538 |
+
"cell_type": "code",
|
539 |
+
"source": "date",
|
540 |
+
"metadata": {
|
541 |
+
"trusted": true
|
542 |
+
},
|
543 |
+
"execution_count": 120,
|
544 |
+
"outputs": [
|
545 |
+
{
|
546 |
+
"execution_count": 120,
|
547 |
+
"output_type": "execute_result",
|
548 |
+
"data": {
|
549 |
+
"text/plain": " date students\n0 10/9/2020 10\n1 11/09/2020 20\n2 12/09/2020 30",
|
550 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>date</th>\n <th>students</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10/9/2020</td>\n <td>10</td>\n </tr>\n <tr>\n <th>1</th>\n <td>11/09/2020</td>\n <td>20</td>\n </tr>\n <tr>\n <th>2</th>\n <td>12/09/2020</td>\n <td>30</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
551 |
+
},
|
552 |
+
"metadata": {}
|
553 |
+
}
|
554 |
+
]
|
555 |
+
},
|
556 |
+
{
|
557 |
+
"cell_type": "code",
|
558 |
+
"source": "date.dtypes",
|
559 |
+
"metadata": {
|
560 |
+
"trusted": true
|
561 |
+
},
|
562 |
+
"execution_count": 124,
|
563 |
+
"outputs": [
|
564 |
+
{
|
565 |
+
"execution_count": 124,
|
566 |
+
"output_type": "execute_result",
|
567 |
+
"data": {
|
568 |
+
"text/plain": "date object\nstudents int64\ndtype: object"
|
569 |
+
},
|
570 |
+
"metadata": {}
|
571 |
+
}
|
572 |
+
]
|
573 |
+
},
|
574 |
+
{
|
575 |
+
"cell_type": "code",
|
576 |
+
"source": "#rename for the date with the check:\ndate.rename(columns={'date':'check'})",
|
577 |
+
"metadata": {
|
578 |
+
"trusted": true
|
579 |
+
},
|
580 |
+
"execution_count": 131,
|
581 |
+
"outputs": [
|
582 |
+
{
|
583 |
+
"execution_count": 131,
|
584 |
+
"output_type": "execute_result",
|
585 |
+
"data": {
|
586 |
+
"text/plain": " check students\n0 10/9/2020 10\n1 11/09/2020 20\n2 12/09/2020 30",
|
587 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>check</th>\n <th>students</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10/9/2020</td>\n <td>10</td>\n </tr>\n <tr>\n <th>1</th>\n <td>11/09/2020</td>\n <td>20</td>\n </tr>\n <tr>\n <th>2</th>\n <td>12/09/2020</td>\n <td>30</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
588 |
+
},
|
589 |
+
"metadata": {}
|
590 |
+
}
|
591 |
+
]
|
592 |
+
},
|
593 |
+
{
|
594 |
+
"cell_type": "code",
|
595 |
+
"source": "date.sort_values('students',ascending=False)",
|
596 |
+
"metadata": {
|
597 |
+
"trusted": true
|
598 |
+
},
|
599 |
+
"execution_count": 134,
|
600 |
+
"outputs": [
|
601 |
+
{
|
602 |
+
"execution_count": 134,
|
603 |
+
"output_type": "execute_result",
|
604 |
+
"data": {
|
605 |
+
"text/plain": " date students\n2 12/09/2020 30\n1 11/09/2020 20\n0 10/9/2020 10",
|
606 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>date</th>\n <th>students</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2</th>\n <td>12/09/2020</td>\n <td>30</td>\n </tr>\n <tr>\n <th>1</th>\n <td>11/09/2020</td>\n <td>20</td>\n </tr>\n <tr>\n <th>0</th>\n <td>10/9/2020</td>\n <td>10</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
607 |
+
},
|
608 |
+
"metadata": {}
|
609 |
+
}
|
610 |
+
]
|
611 |
+
},
|
612 |
+
{
|
613 |
+
"cell_type": "code",
|
614 |
+
"source": "",
|
615 |
+
"metadata": {
|
616 |
+
"trusted": true
|
617 |
+
},
|
618 |
+
"execution_count": null,
|
619 |
+
"outputs": []
|
620 |
+
},
|
621 |
+
{
|
622 |
+
"cell_type": "code",
|
623 |
+
"source": "",
|
624 |
+
"metadata": {
|
625 |
+
"trusted": true
|
626 |
+
},
|
627 |
+
"execution_count": null,
|
628 |
+
"outputs": []
|
629 |
+
},
|
630 |
+
{
|
631 |
+
"cell_type": "code",
|
632 |
+
"source": "",
|
633 |
+
"metadata": {},
|
634 |
+
"execution_count": null,
|
635 |
+
"outputs": []
|
636 |
+
}
|
637 |
+
]
|
638 |
+
}
|