File size: 68,989 Bytes
113b600 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 |
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "python",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8"
},
"kernelspec": {
"name": "python",
"display_name": "Python (Pyodide)",
"language": "python"
}
},
"nbformat_minor": 4,
"nbformat": 4,
"cells": [
{
"cell_type": "code",
"source": "import pandas as pd",
"metadata": {
"trusted": true
},
"execution_count": 1,
"outputs": []
},
{
"cell_type": "code",
"source": "df=[1,2,3,4]\nprint(pd.DataFrame(df))",
"metadata": {
"trusted": true
},
"execution_count": 2,
"outputs": [
{
"name": "stdout",
"text": " 0\n0 1\n1 2\n2 3\n3 4\n",
"output_type": "stream"
}
]
},
{
"cell_type": "code",
"source": "print(pd.Series(df))",
"metadata": {
"trusted": true
},
"execution_count": 3,
"outputs": [
{
"name": "stdout",
"text": "0 1\n1 2\n2 3\n3 4\ndtype: int64\n",
"output_type": "stream"
}
]
},
{
"cell_type": "code",
"source": "Employ=pd.read_csv(\"employees.csv\")",
"metadata": {
"trusted": true
},
"execution_count": 115,
"outputs": []
},
{
"cell_type": "code",
"source": "Employ_dub=Employ.head(20)",
"metadata": {
"trusted": true
},
"execution_count": 116,
"outputs": []
},
{
"cell_type": "code",
"source": "Employ_dub",
"metadata": {
"trusted": true
},
"execution_count": 117,
"outputs": [
{
"execution_count": 117,
"output_type": "execute_result",
"data": {
"text/plain": " First Name Gender Start Date Last Login Time Salary Bonus % \\\n0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n\n Senior Management Team \n0 True Marketing \n1 True NaN \n2 False Finance \n3 True Finance \n4 True Client Services \n5 False Legal \n6 True Product \n7 NaN Finance \n8 True Engineering \n9 True Business Development \n10 True NaN \n11 True Legal \n12 True Human Resources \n13 False Sales \n14 True Finance \n15 False Product \n16 False Human Resources \n17 False Product \n18 False Client Services \n19 False Product ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>First Name</th>\n <th>Gender</th>\n <th>Start Date</th>\n <th>Last Login Time</th>\n <th>Salary</th>\n <th>Bonus %</th>\n <th>Senior Management</th>\n <th>Team</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Douglas</td>\n <td>Male</td>\n <td>8/6/1993</td>\n <td>12:42 PM</td>\n <td>97308</td>\n <td>6.945</td>\n <td>True</td>\n <td>Marketing</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Thomas</td>\n <td>Male</td>\n <td>3/31/1996</td>\n <td>6:53 AM</td>\n <td>61933</td>\n <td>4.170</td>\n <td>True</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Maria</td>\n <td>Female</td>\n <td>4/23/1993</td>\n <td>11:17 AM</td>\n <td>130590</td>\n <td>11.858</td>\n <td>False</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Jerry</td>\n <td>Male</td>\n <td>3/4/2005</td>\n <td>1:00 PM</td>\n <td>138705</td>\n <td>9.340</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Larry</td>\n <td>Male</td>\n <td>1/24/1998</td>\n <td>4:47 PM</td>\n <td>101004</td>\n <td>1.389</td>\n <td>True</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Dennis</td>\n <td>Male</td>\n <td>4/18/1987</td>\n <td>1:35 AM</td>\n <td>115163</td>\n <td>10.125</td>\n <td>False</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Ruby</td>\n <td>Female</td>\n <td>8/17/1987</td>\n <td>4:20 PM</td>\n <td>65476</td>\n <td>10.012</td>\n <td>True</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>7</th>\n <td>NaN</td>\n <td>Female</td>\n <td>7/20/2015</td>\n <td>10:43 AM</td>\n <td>45906</td>\n <td>11.598</td>\n <td>NaN</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Angela</td>\n <td>Female</td>\n <td>11/22/2005</td>\n <td>6:29 AM</td>\n <td>95570</td>\n <td>18.523</td>\n <td>True</td>\n <td>Engineering</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Frances</td>\n <td>Female</td>\n <td>8/8/2002</td>\n <td>6:51 AM</td>\n <td>139852</td>\n <td>7.524</td>\n <td>True</td>\n <td>Business Development</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Louise</td>\n <td>Female</td>\n <td>8/12/1980</td>\n <td>9:01 AM</td>\n <td>63241</td>\n <td>15.132</td>\n <td>True</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Julie</td>\n <td>Female</td>\n <td>10/26/1997</td>\n <td>3:19 PM</td>\n <td>102508</td>\n <td>12.637</td>\n <td>True</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Brandon</td>\n <td>Male</td>\n <td>12/1/1980</td>\n <td>1:08 AM</td>\n <td>112807</td>\n <td>17.492</td>\n <td>True</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>13</th>\n <td>Gary</td>\n <td>Male</td>\n <td>1/27/2008</td>\n <td>11:40 PM</td>\n <td>109831</td>\n <td>5.831</td>\n <td>False</td>\n <td>Sales</td>\n </tr>\n <tr>\n <th>14</th>\n <td>Kimberly</td>\n <td>Female</td>\n <td>1/14/1999</td>\n <td>7:13 AM</td>\n <td>41426</td>\n <td>14.543</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Lillian</td>\n <td>Female</td>\n <td>6/5/2016</td>\n <td>6:09 AM</td>\n <td>59414</td>\n <td>1.256</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>16</th>\n <td>Jeremy</td>\n <td>Male</td>\n <td>9/21/2010</td>\n <td>5:56 AM</td>\n <td>90370</td>\n <td>7.369</td>\n <td>False</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>17</th>\n <td>Shawn</td>\n <td>Male</td>\n <td>12/7/1986</td>\n <td>7:45 PM</td>\n <td>111737</td>\n <td>6.414</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>18</th>\n <td>Diana</td>\n <td>Female</td>\n <td>10/23/1981</td>\n <td>10:27 AM</td>\n <td>132940</td>\n <td>19.082</td>\n <td>False</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>19</th>\n <td>Donna</td>\n <td>Female</td>\n <td>7/22/2010</td>\n <td>3:48 AM</td>\n <td>81014</td>\n <td>1.894</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "#total info about the employee\nEmploy_dub.info()",
"metadata": {
"trusted": true
},
"execution_count": 26,
"outputs": [
{
"name": "stdout",
"text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 20 entries, 0 to 19\nData columns (total 8 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 First Name 19 non-null object \n 1 Gender 20 non-null object \n 2 Start Date 20 non-null object \n 3 Last Login Time 20 non-null object \n 4 Salary 20 non-null int64 \n 5 Bonus % 20 non-null float64\n 6 Senior Management 19 non-null object \n 7 Team 18 non-null object \ndtypes: float64(1), int64(1), object(6)\nmemory usage: 868.0+ bytes\n",
"output_type": "stream"
}
]
},
{
"cell_type": "code",
"source": "Employ_dub.isnull()",
"metadata": {
"trusted": true
},
"execution_count": 27,
"outputs": [
{
"execution_count": 27,
"output_type": "execute_result",
"data": {
"text/plain": " First Name Gender Start Date Last Login Time Salary Bonus % \\\n0 False False False False False False \n1 False False False False False False \n2 False False False False False False \n3 False False False False False False \n4 False False False False False False \n5 False False False False False False \n6 False False False False False False \n7 True False False False False False \n8 False False False False False False \n9 False False False False False False \n10 False False False False False False \n11 False False False False False False \n12 False False False False False False \n13 False False False False False False \n14 False False False False False False \n15 False False False False False False \n16 False False False False False False \n17 False False False False False False \n18 False False False False False False \n19 False False False False False False \n\n Senior Management Team \n0 False False \n1 False True \n2 False False \n3 False False \n4 False False \n5 False False \n6 False False \n7 True False \n8 False False \n9 False False \n10 False True \n11 False False \n12 False False \n13 False False \n14 False False \n15 False False \n16 False False \n17 False False \n18 False False \n19 False False ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>First Name</th>\n <th>Gender</th>\n <th>Start Date</th>\n <th>Last Login Time</th>\n <th>Salary</th>\n <th>Bonus %</th>\n <th>Senior Management</th>\n <th>Team</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>1</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>True</td>\n </tr>\n <tr>\n <th>2</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>3</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>4</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>5</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>6</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>7</th>\n <td>True</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>True</td>\n <td>False</td>\n </tr>\n <tr>\n <th>8</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>9</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>10</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>True</td>\n </tr>\n <tr>\n <th>11</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>12</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>13</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>14</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>15</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>16</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>17</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>18</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n <tr>\n <th>19</th>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n <td>False</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "#checking for the null values in the dataset of employee\nEmploy_dub.isnull().sum()",
"metadata": {
"trusted": true
},
"execution_count": 28,
"outputs": [
{
"execution_count": 28,
"output_type": "execute_result",
"data": {
"text/plain": "First Name 1\nGender 0\nStart Date 0\nLast Login Time 0\nSalary 0\nBonus % 0\nSenior Management 1\nTeam 2\ndtype: int64"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "#changing the name of the dataset\ned=Employ_dub",
"metadata": {
"trusted": true
},
"execution_count": 29,
"outputs": []
},
{
"cell_type": "code",
"source": "#dimension of the dataset\ned.shape",
"metadata": {
"trusted": true
},
"execution_count": 30,
"outputs": [
{
"execution_count": 30,
"output_type": "execute_result",
"data": {
"text/plain": "(20, 8)"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "ed.columns",
"metadata": {
"trusted": true
},
"execution_count": 31,
"outputs": [
{
"execution_count": 31,
"output_type": "execute_result",
"data": {
"text/plain": "Index(['First Name', 'Gender', 'Start Date', 'Last Login Time', 'Salary',\n 'Bonus %', 'Senior Management', 'Team'],\n dtype='object')"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "#working on the dictionary for a while:",
"metadata": {
"trusted": true
},
"execution_count": 32,
"outputs": []
},
{
"cell_type": "code",
"source": "#creating test objects:\nimport numpy as np\nff=pd.DataFrame(np.random.rand(20,5))",
"metadata": {
"trusted": true
},
"execution_count": 39,
"outputs": []
},
{
"cell_type": "code",
"source": "ff",
"metadata": {
"trusted": true
},
"execution_count": 40,
"outputs": [
{
"execution_count": 40,
"output_type": "execute_result",
"data": {
"text/plain": " 0 1 2 3 4\n0 0.020780 0.365190 0.673825 0.800112 0.188644\n1 0.660845 0.265913 0.445028 0.889438 0.601047\n2 0.646987 0.926823 0.722838 0.475271 0.827945\n3 0.871724 0.290353 0.099578 0.109949 0.229182\n4 0.704794 0.884062 0.751327 0.595746 0.612269\n5 0.371269 0.560512 0.510264 0.247923 0.618853\n6 0.150398 0.116999 0.934865 0.315723 0.221538\n7 0.556336 0.875514 0.471526 0.539511 0.271221\n8 0.428221 0.546766 0.921274 0.500520 0.400341\n9 0.150170 0.802378 0.608124 0.342871 0.076631\n10 0.099049 0.280748 0.865939 0.214541 0.083318\n11 0.042867 0.701639 0.051457 0.691385 0.051529\n12 0.530845 0.248395 0.433733 0.049458 0.314959\n13 0.142230 0.746634 0.536247 0.096499 0.123294\n14 0.139630 0.056464 0.595644 0.764071 0.193826\n15 0.709624 0.590262 0.816268 0.187931 0.366224\n16 0.982939 0.260358 0.918897 0.531278 0.304655\n17 0.381823 0.003594 0.052597 0.921529 0.022103\n18 0.227944 0.706832 0.137266 0.129158 0.882734\n19 0.226257 0.818213 0.326071 0.230419 0.668891",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n <th>2</th>\n <th>3</th>\n <th>4</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0.020780</td>\n <td>0.365190</td>\n <td>0.673825</td>\n <td>0.800112</td>\n <td>0.188644</td>\n </tr>\n <tr>\n <th>1</th>\n <td>0.660845</td>\n <td>0.265913</td>\n <td>0.445028</td>\n <td>0.889438</td>\n <td>0.601047</td>\n </tr>\n <tr>\n <th>2</th>\n <td>0.646987</td>\n <td>0.926823</td>\n <td>0.722838</td>\n <td>0.475271</td>\n <td>0.827945</td>\n </tr>\n <tr>\n <th>3</th>\n <td>0.871724</td>\n <td>0.290353</td>\n <td>0.099578</td>\n <td>0.109949</td>\n <td>0.229182</td>\n </tr>\n <tr>\n <th>4</th>\n <td>0.704794</td>\n <td>0.884062</td>\n <td>0.751327</td>\n <td>0.595746</td>\n <td>0.612269</td>\n </tr>\n <tr>\n <th>5</th>\n <td>0.371269</td>\n <td>0.560512</td>\n <td>0.510264</td>\n <td>0.247923</td>\n <td>0.618853</td>\n </tr>\n <tr>\n <th>6</th>\n <td>0.150398</td>\n <td>0.116999</td>\n <td>0.934865</td>\n <td>0.315723</td>\n <td>0.221538</td>\n </tr>\n <tr>\n <th>7</th>\n <td>0.556336</td>\n <td>0.875514</td>\n <td>0.471526</td>\n <td>0.539511</td>\n <td>0.271221</td>\n </tr>\n <tr>\n <th>8</th>\n <td>0.428221</td>\n <td>0.546766</td>\n <td>0.921274</td>\n <td>0.500520</td>\n <td>0.400341</td>\n </tr>\n <tr>\n <th>9</th>\n <td>0.150170</td>\n <td>0.802378</td>\n <td>0.608124</td>\n <td>0.342871</td>\n <td>0.076631</td>\n </tr>\n <tr>\n <th>10</th>\n <td>0.099049</td>\n <td>0.280748</td>\n <td>0.865939</td>\n <td>0.214541</td>\n <td>0.083318</td>\n </tr>\n <tr>\n <th>11</th>\n <td>0.042867</td>\n <td>0.701639</td>\n <td>0.051457</td>\n <td>0.691385</td>\n <td>0.051529</td>\n </tr>\n <tr>\n <th>12</th>\n <td>0.530845</td>\n <td>0.248395</td>\n <td>0.433733</td>\n <td>0.049458</td>\n <td>0.314959</td>\n </tr>\n <tr>\n <th>13</th>\n <td>0.142230</td>\n <td>0.746634</td>\n <td>0.536247</td>\n <td>0.096499</td>\n <td>0.123294</td>\n </tr>\n <tr>\n <th>14</th>\n <td>0.139630</td>\n <td>0.056464</td>\n <td>0.595644</td>\n <td>0.764071</td>\n <td>0.193826</td>\n </tr>\n <tr>\n <th>15</th>\n <td>0.709624</td>\n <td>0.590262</td>\n <td>0.816268</td>\n <td>0.187931</td>\n <td>0.366224</td>\n </tr>\n <tr>\n <th>16</th>\n <td>0.982939</td>\n <td>0.260358</td>\n <td>0.918897</td>\n <td>0.531278</td>\n <td>0.304655</td>\n </tr>\n <tr>\n <th>17</th>\n <td>0.381823</td>\n <td>0.003594</td>\n <td>0.052597</td>\n <td>0.921529</td>\n <td>0.022103</td>\n </tr>\n <tr>\n <th>18</th>\n <td>0.227944</td>\n <td>0.706832</td>\n <td>0.137266</td>\n <td>0.129158</td>\n <td>0.882734</td>\n </tr>\n <tr>\n <th>19</th>\n <td>0.226257</td>\n <td>0.818213</td>\n <td>0.326071</td>\n <td>0.230419</td>\n <td>0.668891</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "ff.info()",
"metadata": {
"trusted": true
},
"execution_count": 41,
"outputs": [
{
"name": "stdout",
"text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 20 entries, 0 to 19\nData columns (total 5 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 0 20 non-null float64\n 1 1 20 non-null float64\n 2 2 20 non-null float64\n 3 3 20 non-null float64\n 4 4 20 non-null float64\ndtypes: float64(5)\nmemory usage: 868.0 bytes\n",
"output_type": "stream"
}
]
},
{
"cell_type": "code",
"source": "#data functon:\ndate=pd.DataFrame(\n{\n\"date\":['10/9/2020','11/09/2020','12/09/2020'],\n\"students\":[10,20,30]})\n",
"metadata": {
"trusted": true
},
"execution_count": 43,
"outputs": [
{
"execution_count": 43,
"output_type": "execute_result",
"data": {
"text/plain": " date students\n0 10/9/2020 10\n1 11/09/2020 20\n2 12/09/2020 30",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>date</th>\n <th>students</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10/9/2020</td>\n <td>10</td>\n </tr>\n <tr>\n <th>1</th>\n <td>11/09/2020</td>\n <td>20</td>\n </tr>\n <tr>\n <th>2</th>\n <td>12/09/2020</td>\n <td>30</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "ed.value_counts()",
"metadata": {
"trusted": true
},
"execution_count": 52,
"outputs": [
{
"execution_count": 52,
"output_type": "execute_result",
"data": {
"text/plain": "First Name Gender Start Date Last Login Time Salary Bonus % Senior Management Team \nAngela Female 11/22/2005 6:29 AM 95570 18.523 True Engineering 1\nJerry Male 3/4/2005 1:00 PM 138705 9.340 True Finance 1\nRuby Female 8/17/1987 4:20 PM 65476 10.012 True Product 1\nMaria Female 4/23/1993 11:17 AM 130590 11.858 False Finance 1\nLillian Female 6/5/2016 6:09 AM 59414 1.256 False Product 1\nLarry Male 1/24/1998 4:47 PM 101004 1.389 True Client Services 1\nKimberly Female 1/14/1999 7:13 AM 41426 14.543 True Finance 1\nJulie Female 10/26/1997 3:19 PM 102508 12.637 True Legal 1\nJeremy Male 9/21/2010 5:56 AM 90370 7.369 False Human Resources 1\nBrandon Male 12/1/1980 1:08 AM 112807 17.492 True Human Resources 1\nGary Male 1/27/2008 11:40 PM 109831 5.831 False Sales 1\nFrances Female 8/8/2002 6:51 AM 139852 7.524 True Business Development 1\nDouglas Male 8/6/1993 12:42 PM 97308 6.945 True Marketing 1\nDonna Female 7/22/2010 3:48 AM 81014 1.894 False Product 1\nDiana Female 10/23/1981 10:27 AM 132940 19.082 False Client Services 1\nDennis Male 4/18/1987 1:35 AM 115163 10.125 False Legal 1\nShawn Male 12/7/1986 7:45 PM 111737 6.414 False Product 1\ndtype: int64"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "ed[['Gender','Salary']]",
"metadata": {
"trusted": true
},
"execution_count": 62,
"outputs": [
{
"execution_count": 62,
"output_type": "execute_result",
"data": {
"text/plain": " Gender Salary\n0 Male 97308\n1 Male 61933\n2 Female 130590\n3 Male 138705\n4 Male 101004\n5 Male 115163\n6 Female 65476\n7 Female 45906\n8 Female 95570\n9 Female 139852\n10 Female 63241\n11 Female 102508\n12 Male 112807\n13 Male 109831\n14 Female 41426\n15 Female 59414\n16 Male 90370\n17 Male 111737\n18 Female 132940\n19 Female 81014",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Gender</th>\n <th>Salary</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Male</td>\n <td>97308</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Male</td>\n <td>61933</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Female</td>\n <td>130590</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Male</td>\n <td>138705</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Male</td>\n <td>101004</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Male</td>\n <td>115163</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Female</td>\n <td>65476</td>\n </tr>\n <tr>\n <th>7</th>\n <td>Female</td>\n <td>45906</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Female</td>\n <td>95570</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Female</td>\n <td>139852</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Female</td>\n <td>63241</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Female</td>\n <td>102508</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Male</td>\n <td>112807</td>\n </tr>\n <tr>\n <th>13</th>\n <td>Male</td>\n <td>109831</td>\n </tr>\n <tr>\n <th>14</th>\n <td>Female</td>\n <td>41426</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Female</td>\n <td>59414</td>\n </tr>\n <tr>\n <th>16</th>\n <td>Male</td>\n <td>90370</td>\n </tr>\n <tr>\n <th>17</th>\n <td>Male</td>\n <td>111737</td>\n </tr>\n <tr>\n <th>18</th>\n <td>Female</td>\n <td>132940</td>\n </tr>\n <tr>\n <th>19</th>\n <td>Female</td>\n <td>81014</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "#selection by position:rows data:\ned.iloc[8:12]",
"metadata": {
"trusted": true
},
"execution_count": 69,
"outputs": [
{
"execution_count": 69,
"output_type": "execute_result",
"data": {
"text/plain": " First Name Gender Start Date Last Login Time Salary Bonus % \\\n8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n\n Senior Management Team \n8 True Engineering \n9 True Business Development \n10 True NaN \n11 True Legal ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>First Name</th>\n <th>Gender</th>\n <th>Start Date</th>\n <th>Last Login Time</th>\n <th>Salary</th>\n <th>Bonus %</th>\n <th>Senior Management</th>\n <th>Team</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>8</th>\n <td>Angela</td>\n <td>Female</td>\n <td>11/22/2005</td>\n <td>6:29 AM</td>\n <td>95570</td>\n <td>18.523</td>\n <td>True</td>\n <td>Engineering</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Frances</td>\n <td>Female</td>\n <td>8/8/2002</td>\n <td>6:51 AM</td>\n <td>139852</td>\n <td>7.524</td>\n <td>True</td>\n <td>Business Development</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Louise</td>\n <td>Female</td>\n <td>8/12/1980</td>\n <td>9:01 AM</td>\n <td>63241</td>\n <td>15.132</td>\n <td>True</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Julie</td>\n <td>Female</td>\n <td>10/26/1997</td>\n <td>3:19 PM</td>\n <td>102508</td>\n <td>12.637</td>\n <td>True</td>\n <td>Legal</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "ed.loc[8]",
"metadata": {
"trusted": true
},
"execution_count": 64,
"outputs": [
{
"execution_count": 64,
"output_type": "execute_result",
"data": {
"text/plain": "First Name Angela\nGender Female\nStart Date 11/22/2005\nLast Login Time 6:29 AM\nSalary 95570\nBonus % 18.523\nSenior Management True\nTeam Engineering\nName: 8, dtype: object"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "#data cleaning:\ned.isnull().sum()",
"metadata": {
"trusted": true
},
"execution_count": 73,
"outputs": [
{
"execution_count": 73,
"output_type": "execute_result",
"data": {
"text/plain": "First Name 1\nGender 0\nStart Date 0\nLast Login Time 0\nSalary 0\nBonus % 0\nSenior Management 1\nTeam 2\ndtype: int64"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "#total null values:\ned.isnull().sum().sum()",
"metadata": {
"trusted": true
},
"execution_count": 74,
"outputs": [
{
"execution_count": 74,
"output_type": "execute_result",
"data": {
"text/plain": "4"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "ed.notnull().sum().sum()",
"metadata": {
"trusted": true
},
"execution_count": 77,
"outputs": [
{
"execution_count": 77,
"output_type": "execute_result",
"data": {
"text/plain": "156"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "#fpr practice on drop we will take the copy of the original data:\ned2=ed",
"metadata": {
"trusted": true
},
"execution_count": 78,
"outputs": []
},
{
"cell_type": "code",
"source": "ed2",
"metadata": {
"trusted": true
},
"execution_count": 79,
"outputs": [
{
"execution_count": 79,
"output_type": "execute_result",
"data": {
"text/plain": " First Name Gender Start Date Last Login Time Salary Bonus % \\\n0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n\n Senior Management Team \n0 True Marketing \n1 True NaN \n2 False Finance \n3 True Finance \n4 True Client Services \n5 False Legal \n6 True Product \n7 NaN Finance \n8 True Engineering \n9 True Business Development \n10 True NaN \n11 True Legal \n12 True Human Resources \n13 False Sales \n14 True Finance \n15 False Product \n16 False Human Resources \n17 False Product \n18 False Client Services \n19 False Product ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>First Name</th>\n <th>Gender</th>\n <th>Start Date</th>\n <th>Last Login Time</th>\n <th>Salary</th>\n <th>Bonus %</th>\n <th>Senior Management</th>\n <th>Team</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Douglas</td>\n <td>Male</td>\n <td>8/6/1993</td>\n <td>12:42 PM</td>\n <td>97308</td>\n <td>6.945</td>\n <td>True</td>\n <td>Marketing</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Thomas</td>\n <td>Male</td>\n <td>3/31/1996</td>\n <td>6:53 AM</td>\n <td>61933</td>\n <td>4.170</td>\n <td>True</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Maria</td>\n <td>Female</td>\n <td>4/23/1993</td>\n <td>11:17 AM</td>\n <td>130590</td>\n <td>11.858</td>\n <td>False</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Jerry</td>\n <td>Male</td>\n <td>3/4/2005</td>\n <td>1:00 PM</td>\n <td>138705</td>\n <td>9.340</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Larry</td>\n <td>Male</td>\n <td>1/24/1998</td>\n <td>4:47 PM</td>\n <td>101004</td>\n <td>1.389</td>\n <td>True</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Dennis</td>\n <td>Male</td>\n <td>4/18/1987</td>\n <td>1:35 AM</td>\n <td>115163</td>\n <td>10.125</td>\n <td>False</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Ruby</td>\n <td>Female</td>\n <td>8/17/1987</td>\n <td>4:20 PM</td>\n <td>65476</td>\n <td>10.012</td>\n <td>True</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>7</th>\n <td>NaN</td>\n <td>Female</td>\n <td>7/20/2015</td>\n <td>10:43 AM</td>\n <td>45906</td>\n <td>11.598</td>\n <td>NaN</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Angela</td>\n <td>Female</td>\n <td>11/22/2005</td>\n <td>6:29 AM</td>\n <td>95570</td>\n <td>18.523</td>\n <td>True</td>\n <td>Engineering</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Frances</td>\n <td>Female</td>\n <td>8/8/2002</td>\n <td>6:51 AM</td>\n <td>139852</td>\n <td>7.524</td>\n <td>True</td>\n <td>Business Development</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Louise</td>\n <td>Female</td>\n <td>8/12/1980</td>\n <td>9:01 AM</td>\n <td>63241</td>\n <td>15.132</td>\n <td>True</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Julie</td>\n <td>Female</td>\n <td>10/26/1997</td>\n <td>3:19 PM</td>\n <td>102508</td>\n <td>12.637</td>\n <td>True</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Brandon</td>\n <td>Male</td>\n <td>12/1/1980</td>\n <td>1:08 AM</td>\n <td>112807</td>\n <td>17.492</td>\n <td>True</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>13</th>\n <td>Gary</td>\n <td>Male</td>\n <td>1/27/2008</td>\n <td>11:40 PM</td>\n <td>109831</td>\n <td>5.831</td>\n <td>False</td>\n <td>Sales</td>\n </tr>\n <tr>\n <th>14</th>\n <td>Kimberly</td>\n <td>Female</td>\n <td>1/14/1999</td>\n <td>7:13 AM</td>\n <td>41426</td>\n <td>14.543</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Lillian</td>\n <td>Female</td>\n <td>6/5/2016</td>\n <td>6:09 AM</td>\n <td>59414</td>\n <td>1.256</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>16</th>\n <td>Jeremy</td>\n <td>Male</td>\n <td>9/21/2010</td>\n <td>5:56 AM</td>\n <td>90370</td>\n <td>7.369</td>\n <td>False</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>17</th>\n <td>Shawn</td>\n <td>Male</td>\n <td>12/7/1986</td>\n <td>7:45 PM</td>\n <td>111737</td>\n <td>6.414</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>18</th>\n <td>Diana</td>\n <td>Female</td>\n <td>10/23/1981</td>\n <td>10:27 AM</td>\n <td>132940</td>\n <td>19.082</td>\n <td>False</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>19</th>\n <td>Donna</td>\n <td>Female</td>\n <td>7/22/2010</td>\n <td>3:48 AM</td>\n <td>81014</td>\n <td>1.894</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "#removing the totyal columns if they are with the null values:\ned3=ed2.dropna(axis=1)\nprint(\"prasent null values:\",ed3.isnull().sum().sum())",
"metadata": {
"trusted": true
},
"execution_count": 90,
"outputs": [
{
"name": "stdout",
"text": "prasent null values: 0\n",
"output_type": "stream"
}
]
},
{
"cell_type": "code",
"source": "ed2.fillna(10,inplace=True)",
"metadata": {
"trusted": true
},
"execution_count": 96,
"outputs": [
{
"name": "stderr",
"text": "<ipython-input-96-9a2616dc4607>:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n ed2.fillna(10,inplace=True)\n",
"output_type": "stream"
}
]
},
{
"cell_type": "code",
"source": "ed2.isnull().sum()",
"metadata": {
"trusted": true
},
"execution_count": 97,
"outputs": [
{
"execution_count": 97,
"output_type": "execute_result",
"data": {
"text/plain": "First Name 0\nGender 0\nStart Date 0\nLast Login Time 0\nSalary 0\nBonus % 0\nSenior Management 0\nTeam 0\ndtype: int64"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "ed2",
"metadata": {
"trusted": true
},
"execution_count": 98,
"outputs": [
{
"execution_count": 98,
"output_type": "execute_result",
"data": {
"text/plain": " First Name Gender Start Date Last Login Time Salary Bonus % \\\n0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n7 10 Female 7/20/2015 10:43 AM 45906 11.598 \n8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n\n Senior Management Team \n0 True Marketing \n1 True 10 \n2 False Finance \n3 True Finance \n4 True Client Services \n5 False Legal \n6 True Product \n7 10 Finance \n8 True Engineering \n9 True Business Development \n10 True 10 \n11 True Legal \n12 True Human Resources \n13 False Sales \n14 True Finance \n15 False Product \n16 False Human Resources \n17 False Product \n18 False Client Services \n19 False Product ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>First Name</th>\n <th>Gender</th>\n <th>Start Date</th>\n <th>Last Login Time</th>\n <th>Salary</th>\n <th>Bonus %</th>\n <th>Senior Management</th>\n <th>Team</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Douglas</td>\n <td>Male</td>\n <td>8/6/1993</td>\n <td>12:42 PM</td>\n <td>97308</td>\n <td>6.945</td>\n <td>True</td>\n <td>Marketing</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Thomas</td>\n <td>Male</td>\n <td>3/31/1996</td>\n <td>6:53 AM</td>\n <td>61933</td>\n <td>4.170</td>\n <td>True</td>\n <td>10</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Maria</td>\n <td>Female</td>\n <td>4/23/1993</td>\n <td>11:17 AM</td>\n <td>130590</td>\n <td>11.858</td>\n <td>False</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Jerry</td>\n <td>Male</td>\n <td>3/4/2005</td>\n <td>1:00 PM</td>\n <td>138705</td>\n <td>9.340</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Larry</td>\n <td>Male</td>\n <td>1/24/1998</td>\n <td>4:47 PM</td>\n <td>101004</td>\n <td>1.389</td>\n <td>True</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Dennis</td>\n <td>Male</td>\n <td>4/18/1987</td>\n <td>1:35 AM</td>\n <td>115163</td>\n <td>10.125</td>\n <td>False</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Ruby</td>\n <td>Female</td>\n <td>8/17/1987</td>\n <td>4:20 PM</td>\n <td>65476</td>\n <td>10.012</td>\n <td>True</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>7</th>\n <td>10</td>\n <td>Female</td>\n <td>7/20/2015</td>\n <td>10:43 AM</td>\n <td>45906</td>\n <td>11.598</td>\n <td>10</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Angela</td>\n <td>Female</td>\n <td>11/22/2005</td>\n <td>6:29 AM</td>\n <td>95570</td>\n <td>18.523</td>\n <td>True</td>\n <td>Engineering</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Frances</td>\n <td>Female</td>\n <td>8/8/2002</td>\n <td>6:51 AM</td>\n <td>139852</td>\n <td>7.524</td>\n <td>True</td>\n <td>Business Development</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Louise</td>\n <td>Female</td>\n <td>8/12/1980</td>\n <td>9:01 AM</td>\n <td>63241</td>\n <td>15.132</td>\n <td>True</td>\n <td>10</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Julie</td>\n <td>Female</td>\n <td>10/26/1997</td>\n <td>3:19 PM</td>\n <td>102508</td>\n <td>12.637</td>\n <td>True</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Brandon</td>\n <td>Male</td>\n <td>12/1/1980</td>\n <td>1:08 AM</td>\n <td>112807</td>\n <td>17.492</td>\n <td>True</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>13</th>\n <td>Gary</td>\n <td>Male</td>\n <td>1/27/2008</td>\n <td>11:40 PM</td>\n <td>109831</td>\n <td>5.831</td>\n <td>False</td>\n <td>Sales</td>\n </tr>\n <tr>\n <th>14</th>\n <td>Kimberly</td>\n <td>Female</td>\n <td>1/14/1999</td>\n <td>7:13 AM</td>\n <td>41426</td>\n <td>14.543</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Lillian</td>\n <td>Female</td>\n <td>6/5/2016</td>\n <td>6:09 AM</td>\n <td>59414</td>\n <td>1.256</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>16</th>\n <td>Jeremy</td>\n <td>Male</td>\n <td>9/21/2010</td>\n <td>5:56 AM</td>\n <td>90370</td>\n <td>7.369</td>\n <td>False</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>17</th>\n <td>Shawn</td>\n <td>Male</td>\n <td>12/7/1986</td>\n <td>7:45 PM</td>\n <td>111737</td>\n <td>6.414</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>18</th>\n <td>Diana</td>\n <td>Female</td>\n <td>10/23/1981</td>\n <td>10:27 AM</td>\n <td>132940</td>\n <td>19.082</td>\n <td>False</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>19</th>\n <td>Donna</td>\n <td>Female</td>\n <td>7/22/2010</td>\n <td>3:48 AM</td>\n <td>81014</td>\n <td>1.894</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "ed5=Employ.head(20)\ned5",
"metadata": {
"trusted": true
},
"execution_count": 118,
"outputs": [
{
"execution_count": 118,
"output_type": "execute_result",
"data": {
"text/plain": " First Name Gender Start Date Last Login Time Salary Bonus % \\\n0 Douglas Male 8/6/1993 12:42 PM 97308 6.945 \n1 Thomas Male 3/31/1996 6:53 AM 61933 4.170 \n2 Maria Female 4/23/1993 11:17 AM 130590 11.858 \n3 Jerry Male 3/4/2005 1:00 PM 138705 9.340 \n4 Larry Male 1/24/1998 4:47 PM 101004 1.389 \n5 Dennis Male 4/18/1987 1:35 AM 115163 10.125 \n6 Ruby Female 8/17/1987 4:20 PM 65476 10.012 \n7 NaN Female 7/20/2015 10:43 AM 45906 11.598 \n8 Angela Female 11/22/2005 6:29 AM 95570 18.523 \n9 Frances Female 8/8/2002 6:51 AM 139852 7.524 \n10 Louise Female 8/12/1980 9:01 AM 63241 15.132 \n11 Julie Female 10/26/1997 3:19 PM 102508 12.637 \n12 Brandon Male 12/1/1980 1:08 AM 112807 17.492 \n13 Gary Male 1/27/2008 11:40 PM 109831 5.831 \n14 Kimberly Female 1/14/1999 7:13 AM 41426 14.543 \n15 Lillian Female 6/5/2016 6:09 AM 59414 1.256 \n16 Jeremy Male 9/21/2010 5:56 AM 90370 7.369 \n17 Shawn Male 12/7/1986 7:45 PM 111737 6.414 \n18 Diana Female 10/23/1981 10:27 AM 132940 19.082 \n19 Donna Female 7/22/2010 3:48 AM 81014 1.894 \n\n Senior Management Team \n0 True Marketing \n1 True NaN \n2 False Finance \n3 True Finance \n4 True Client Services \n5 False Legal \n6 True Product \n7 NaN Finance \n8 True Engineering \n9 True Business Development \n10 True NaN \n11 True Legal \n12 True Human Resources \n13 False Sales \n14 True Finance \n15 False Product \n16 False Human Resources \n17 False Product \n18 False Client Services \n19 False Product ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>First Name</th>\n <th>Gender</th>\n <th>Start Date</th>\n <th>Last Login Time</th>\n <th>Salary</th>\n <th>Bonus %</th>\n <th>Senior Management</th>\n <th>Team</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Douglas</td>\n <td>Male</td>\n <td>8/6/1993</td>\n <td>12:42 PM</td>\n <td>97308</td>\n <td>6.945</td>\n <td>True</td>\n <td>Marketing</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Thomas</td>\n <td>Male</td>\n <td>3/31/1996</td>\n <td>6:53 AM</td>\n <td>61933</td>\n <td>4.170</td>\n <td>True</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Maria</td>\n <td>Female</td>\n <td>4/23/1993</td>\n <td>11:17 AM</td>\n <td>130590</td>\n <td>11.858</td>\n <td>False</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Jerry</td>\n <td>Male</td>\n <td>3/4/2005</td>\n <td>1:00 PM</td>\n <td>138705</td>\n <td>9.340</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Larry</td>\n <td>Male</td>\n <td>1/24/1998</td>\n <td>4:47 PM</td>\n <td>101004</td>\n <td>1.389</td>\n <td>True</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Dennis</td>\n <td>Male</td>\n <td>4/18/1987</td>\n <td>1:35 AM</td>\n <td>115163</td>\n <td>10.125</td>\n <td>False</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Ruby</td>\n <td>Female</td>\n <td>8/17/1987</td>\n <td>4:20 PM</td>\n <td>65476</td>\n <td>10.012</td>\n <td>True</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>7</th>\n <td>NaN</td>\n <td>Female</td>\n <td>7/20/2015</td>\n <td>10:43 AM</td>\n <td>45906</td>\n <td>11.598</td>\n <td>NaN</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Angela</td>\n <td>Female</td>\n <td>11/22/2005</td>\n <td>6:29 AM</td>\n <td>95570</td>\n <td>18.523</td>\n <td>True</td>\n <td>Engineering</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Frances</td>\n <td>Female</td>\n <td>8/8/2002</td>\n <td>6:51 AM</td>\n <td>139852</td>\n <td>7.524</td>\n <td>True</td>\n <td>Business Development</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Louise</td>\n <td>Female</td>\n <td>8/12/1980</td>\n <td>9:01 AM</td>\n <td>63241</td>\n <td>15.132</td>\n <td>True</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Julie</td>\n <td>Female</td>\n <td>10/26/1997</td>\n <td>3:19 PM</td>\n <td>102508</td>\n <td>12.637</td>\n <td>True</td>\n <td>Legal</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Brandon</td>\n <td>Male</td>\n <td>12/1/1980</td>\n <td>1:08 AM</td>\n <td>112807</td>\n <td>17.492</td>\n <td>True</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>13</th>\n <td>Gary</td>\n <td>Male</td>\n <td>1/27/2008</td>\n <td>11:40 PM</td>\n <td>109831</td>\n <td>5.831</td>\n <td>False</td>\n <td>Sales</td>\n </tr>\n <tr>\n <th>14</th>\n <td>Kimberly</td>\n <td>Female</td>\n <td>1/14/1999</td>\n <td>7:13 AM</td>\n <td>41426</td>\n <td>14.543</td>\n <td>True</td>\n <td>Finance</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Lillian</td>\n <td>Female</td>\n <td>6/5/2016</td>\n <td>6:09 AM</td>\n <td>59414</td>\n <td>1.256</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>16</th>\n <td>Jeremy</td>\n <td>Male</td>\n <td>9/21/2010</td>\n <td>5:56 AM</td>\n <td>90370</td>\n <td>7.369</td>\n <td>False</td>\n <td>Human Resources</td>\n </tr>\n <tr>\n <th>17</th>\n <td>Shawn</td>\n <td>Male</td>\n <td>12/7/1986</td>\n <td>7:45 PM</td>\n <td>111737</td>\n <td>6.414</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n <tr>\n <th>18</th>\n <td>Diana</td>\n <td>Female</td>\n <td>10/23/1981</td>\n <td>10:27 AM</td>\n <td>132940</td>\n <td>19.082</td>\n <td>False</td>\n <td>Client Services</td>\n </tr>\n <tr>\n <th>19</th>\n <td>Donna</td>\n <td>Female</td>\n <td>7/22/2010</td>\n <td>3:48 AM</td>\n <td>81014</td>\n <td>1.894</td>\n <td>False</td>\n <td>Product</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "ed2.isnull().sum()",
"metadata": {
"trusted": true
},
"execution_count": 111,
"outputs": [
{
"execution_count": 111,
"output_type": "execute_result",
"data": {
"text/plain": "First Name 0\nGender 0\nStart Date 0\nLast Login Time 0\nSalary 0\nBonus % 0\nSenior Management 0\nTeam 0\ndtype: int64"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "date",
"metadata": {
"trusted": true
},
"execution_count": 120,
"outputs": [
{
"execution_count": 120,
"output_type": "execute_result",
"data": {
"text/plain": " date students\n0 10/9/2020 10\n1 11/09/2020 20\n2 12/09/2020 30",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>date</th>\n <th>students</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10/9/2020</td>\n <td>10</td>\n </tr>\n <tr>\n <th>1</th>\n <td>11/09/2020</td>\n <td>20</td>\n </tr>\n <tr>\n <th>2</th>\n <td>12/09/2020</td>\n <td>30</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "date.dtypes",
"metadata": {
"trusted": true
},
"execution_count": 124,
"outputs": [
{
"execution_count": 124,
"output_type": "execute_result",
"data": {
"text/plain": "date object\nstudents int64\ndtype: object"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "#rename for the date with the check:\ndate.rename(columns={'date':'check'})",
"metadata": {
"trusted": true
},
"execution_count": 131,
"outputs": [
{
"execution_count": 131,
"output_type": "execute_result",
"data": {
"text/plain": " check students\n0 10/9/2020 10\n1 11/09/2020 20\n2 12/09/2020 30",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>check</th>\n <th>students</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10/9/2020</td>\n <td>10</td>\n </tr>\n <tr>\n <th>1</th>\n <td>11/09/2020</td>\n <td>20</td>\n </tr>\n <tr>\n <th>2</th>\n <td>12/09/2020</td>\n <td>30</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "date.sort_values('students',ascending=False)",
"metadata": {
"trusted": true
},
"execution_count": 134,
"outputs": [
{
"execution_count": 134,
"output_type": "execute_result",
"data": {
"text/plain": " date students\n2 12/09/2020 30\n1 11/09/2020 20\n0 10/9/2020 10",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>date</th>\n <th>students</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2</th>\n <td>12/09/2020</td>\n <td>30</td>\n </tr>\n <tr>\n <th>1</th>\n <td>11/09/2020</td>\n <td>20</td>\n </tr>\n <tr>\n <th>0</th>\n <td>10/9/2020</td>\n <td>10</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": "",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "",
"metadata": {},
"execution_count": null,
"outputs": []
}
]
} |