Azarthehulk commited on
Commit
efbf80d
1 Parent(s): 085fc1c

Upload pca_assignment.ipynb

Browse files

given some data points as x1,x2 and that are in the decimal points
here we will be standeredise the data using the sklearn laibrary standerdiseScaling class,
and doing covariance and then next we calculated the eigen values and eigen vectors from the liner algebra class in the sklearn,
then we applied our pca to cahnge the dimentionality according to their covariance and egenvectors.

Files changed (1) hide show
  1. pca_assignment.ipynb +563 -0
pca_assignment.ipynb ADDED
@@ -0,0 +1,563 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "4b07db50",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 16,
16
+ "id": "0e46369a",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "import numpy as np"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 2,
26
+ "id": "604cc2e1",
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "data={\n",
31
+ " \"x1\":[2.5,0.5,2.2,1.9,3.1,2.3,2.0,1.0,1.5,1.1],\n",
32
+ " \"x2\":[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9]\n",
33
+ "}"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": 3,
39
+ "id": "580be2ef",
40
+ "metadata": {},
41
+ "outputs": [],
42
+ "source": [
43
+ "df=pd.DataFrame(data)"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 4,
49
+ "id": "af756531",
50
+ "metadata": {},
51
+ "outputs": [
52
+ {
53
+ "data": {
54
+ "text/html": [
55
+ "<div>\n",
56
+ "<style scoped>\n",
57
+ " .dataframe tbody tr th:only-of-type {\n",
58
+ " vertical-align: middle;\n",
59
+ " }\n",
60
+ "\n",
61
+ " .dataframe tbody tr th {\n",
62
+ " vertical-align: top;\n",
63
+ " }\n",
64
+ "\n",
65
+ " .dataframe thead th {\n",
66
+ " text-align: right;\n",
67
+ " }\n",
68
+ "</style>\n",
69
+ "<table border=\"1\" class=\"dataframe\">\n",
70
+ " <thead>\n",
71
+ " <tr style=\"text-align: right;\">\n",
72
+ " <th></th>\n",
73
+ " <th>x1</th>\n",
74
+ " <th>x2</th>\n",
75
+ " </tr>\n",
76
+ " </thead>\n",
77
+ " <tbody>\n",
78
+ " <tr>\n",
79
+ " <th>0</th>\n",
80
+ " <td>2.5</td>\n",
81
+ " <td>2.4</td>\n",
82
+ " </tr>\n",
83
+ " <tr>\n",
84
+ " <th>1</th>\n",
85
+ " <td>0.5</td>\n",
86
+ " <td>0.7</td>\n",
87
+ " </tr>\n",
88
+ " <tr>\n",
89
+ " <th>2</th>\n",
90
+ " <td>2.2</td>\n",
91
+ " <td>2.9</td>\n",
92
+ " </tr>\n",
93
+ " <tr>\n",
94
+ " <th>3</th>\n",
95
+ " <td>1.9</td>\n",
96
+ " <td>2.2</td>\n",
97
+ " </tr>\n",
98
+ " <tr>\n",
99
+ " <th>4</th>\n",
100
+ " <td>3.1</td>\n",
101
+ " <td>3.0</td>\n",
102
+ " </tr>\n",
103
+ " <tr>\n",
104
+ " <th>5</th>\n",
105
+ " <td>2.3</td>\n",
106
+ " <td>2.7</td>\n",
107
+ " </tr>\n",
108
+ " <tr>\n",
109
+ " <th>6</th>\n",
110
+ " <td>2.0</td>\n",
111
+ " <td>1.6</td>\n",
112
+ " </tr>\n",
113
+ " <tr>\n",
114
+ " <th>7</th>\n",
115
+ " <td>1.0</td>\n",
116
+ " <td>1.1</td>\n",
117
+ " </tr>\n",
118
+ " <tr>\n",
119
+ " <th>8</th>\n",
120
+ " <td>1.5</td>\n",
121
+ " <td>1.6</td>\n",
122
+ " </tr>\n",
123
+ " <tr>\n",
124
+ " <th>9</th>\n",
125
+ " <td>1.1</td>\n",
126
+ " <td>0.9</td>\n",
127
+ " </tr>\n",
128
+ " </tbody>\n",
129
+ "</table>\n",
130
+ "</div>"
131
+ ],
132
+ "text/plain": [
133
+ " x1 x2\n",
134
+ "0 2.5 2.4\n",
135
+ "1 0.5 0.7\n",
136
+ "2 2.2 2.9\n",
137
+ "3 1.9 2.2\n",
138
+ "4 3.1 3.0\n",
139
+ "5 2.3 2.7\n",
140
+ "6 2.0 1.6\n",
141
+ "7 1.0 1.1\n",
142
+ "8 1.5 1.6\n",
143
+ "9 1.1 0.9"
144
+ ]
145
+ },
146
+ "execution_count": 4,
147
+ "metadata": {},
148
+ "output_type": "execute_result"
149
+ }
150
+ ],
151
+ "source": [
152
+ "df"
153
+ ]
154
+ },
155
+ {
156
+ "cell_type": "code",
157
+ "execution_count": 9,
158
+ "id": "4936d72b",
159
+ "metadata": {},
160
+ "outputs": [
161
+ {
162
+ "data": {
163
+ "text/plain": [
164
+ "(10, 2)"
165
+ ]
166
+ },
167
+ "execution_count": 9,
168
+ "metadata": {},
169
+ "output_type": "execute_result"
170
+ }
171
+ ],
172
+ "source": [
173
+ "df.shape"
174
+ ]
175
+ },
176
+ {
177
+ "cell_type": "code",
178
+ "execution_count": 14,
179
+ "id": "c3d493bb",
180
+ "metadata": {},
181
+ "outputs": [
182
+ {
183
+ "name": "stdout",
184
+ "output_type": "stream",
185
+ "text": [
186
+ "<class 'pandas.core.frame.DataFrame'>\n"
187
+ ]
188
+ }
189
+ ],
190
+ "source": [
191
+ "print(type(df))"
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "code",
196
+ "execution_count": 5,
197
+ "id": "e071a809",
198
+ "metadata": {},
199
+ "outputs": [],
200
+ "source": [
201
+ "#for stderdizing the data with preprocessor class\n",
202
+ "from sklearn.preprocessing import StandardScaler"
203
+ ]
204
+ },
205
+ {
206
+ "cell_type": "code",
207
+ "execution_count": 7,
208
+ "id": "18b72826",
209
+ "metadata": {},
210
+ "outputs": [],
211
+ "source": [
212
+ "std_data=StandardScaler().fit_transform(df)"
213
+ ]
214
+ },
215
+ {
216
+ "cell_type": "code",
217
+ "execution_count": 10,
218
+ "id": "abad0bf1",
219
+ "metadata": {},
220
+ "outputs": [
221
+ {
222
+ "data": {
223
+ "text/plain": [
224
+ "(10, 2)"
225
+ ]
226
+ },
227
+ "execution_count": 10,
228
+ "metadata": {},
229
+ "output_type": "execute_result"
230
+ }
231
+ ],
232
+ "source": [
233
+ "std_data.shape"
234
+ ]
235
+ },
236
+ {
237
+ "cell_type": "code",
238
+ "execution_count": 8,
239
+ "id": "0bc25f21",
240
+ "metadata": {},
241
+ "outputs": [
242
+ {
243
+ "data": {
244
+ "text/plain": [
245
+ "array([[ 0.92627881, 0.61016865],\n",
246
+ " [-1.7585873 , -1.506743 ],\n",
247
+ " [ 0.52354889, 1.23278973],\n",
248
+ " [ 0.12081898, 0.36112022],\n",
249
+ " [ 1.73173864, 1.35731394],\n",
250
+ " [ 0.6577922 , 0.9837413 ],\n",
251
+ " [ 0.25506228, -0.38602507],\n",
252
+ " [-1.08737078, -1.00864614],\n",
253
+ " [-0.41615425, -0.38602507],\n",
254
+ " [-0.95312747, -1.25769457]])"
255
+ ]
256
+ },
257
+ "execution_count": 8,
258
+ "metadata": {},
259
+ "output_type": "execute_result"
260
+ }
261
+ ],
262
+ "source": [
263
+ "std_data"
264
+ ]
265
+ },
266
+ {
267
+ "cell_type": "code",
268
+ "execution_count": 15,
269
+ "id": "df9cca8c",
270
+ "metadata": {},
271
+ "outputs": [
272
+ {
273
+ "name": "stdout",
274
+ "output_type": "stream",
275
+ "text": [
276
+ "<class 'numpy.ndarray'>\n"
277
+ ]
278
+ }
279
+ ],
280
+ "source": [
281
+ "print(type(std_data))"
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "code",
286
+ "execution_count": 17,
287
+ "id": "555a8b94",
288
+ "metadata": {},
289
+ "outputs": [],
290
+ "source": [
291
+ "sample_sets=std_data\n",
292
+ "\n",
293
+ "co_data=np.matmul(sample_sets.T,sample_sets)"
294
+ ]
295
+ },
296
+ {
297
+ "cell_type": "code",
298
+ "execution_count": 18,
299
+ "id": "f16eb66b",
300
+ "metadata": {},
301
+ "outputs": [
302
+ {
303
+ "data": {
304
+ "text/plain": [
305
+ "array([[10. , 9.25929273],\n",
306
+ " [ 9.25929273, 10. ]])"
307
+ ]
308
+ },
309
+ "execution_count": 18,
310
+ "metadata": {},
311
+ "output_type": "execute_result"
312
+ }
313
+ ],
314
+ "source": [
315
+ "co_data"
316
+ ]
317
+ },
318
+ {
319
+ "cell_type": "code",
320
+ "execution_count": 19,
321
+ "id": "6f256d58",
322
+ "metadata": {},
323
+ "outputs": [
324
+ {
325
+ "data": {
326
+ "text/plain": [
327
+ "(2, 2)"
328
+ ]
329
+ },
330
+ "execution_count": 19,
331
+ "metadata": {},
332
+ "output_type": "execute_result"
333
+ }
334
+ ],
335
+ "source": [
336
+ "co_data.shape"
337
+ ]
338
+ },
339
+ {
340
+ "cell_type": "code",
341
+ "execution_count": 25,
342
+ "id": "81764c46",
343
+ "metadata": {},
344
+ "outputs": [],
345
+ "source": [
346
+ "from scipy.linalg import eigh \n",
347
+ "values, vectors = eigh(co_data, eigvals=(0,1))"
348
+ ]
349
+ },
350
+ {
351
+ "cell_type": "code",
352
+ "execution_count": 26,
353
+ "id": "42bb4049",
354
+ "metadata": {},
355
+ "outputs": [
356
+ {
357
+ "data": {
358
+ "text/plain": [
359
+ "array([ 0.74070727, 19.25929273])"
360
+ ]
361
+ },
362
+ "execution_count": 26,
363
+ "metadata": {},
364
+ "output_type": "execute_result"
365
+ }
366
+ ],
367
+ "source": [
368
+ "values"
369
+ ]
370
+ },
371
+ {
372
+ "cell_type": "code",
373
+ "execution_count": 27,
374
+ "id": "5aeea5cb",
375
+ "metadata": {},
376
+ "outputs": [
377
+ {
378
+ "data": {
379
+ "text/plain": [
380
+ "array([[-0.70710678, 0.70710678],\n",
381
+ " [ 0.70710678, 0.70710678]])"
382
+ ]
383
+ },
384
+ "execution_count": 27,
385
+ "metadata": {},
386
+ "output_type": "execute_result"
387
+ }
388
+ ],
389
+ "source": [
390
+ "vectors"
391
+ ]
392
+ },
393
+ {
394
+ "cell_type": "code",
395
+ "execution_count": 33,
396
+ "id": "8aa63112",
397
+ "metadata": {},
398
+ "outputs": [],
399
+ "source": [
400
+ "from sklearn import decomposition\n",
401
+ "pca = decomposition.PCA()\n",
402
+ "pca.n_components = 1\n",
403
+ "pca_data = pca.fit_transform(sample_sets)"
404
+ ]
405
+ },
406
+ {
407
+ "cell_type": "code",
408
+ "execution_count": 35,
409
+ "id": "2c690fcd",
410
+ "metadata": {},
411
+ "outputs": [
412
+ {
413
+ "data": {
414
+ "text/plain": [
415
+ "array([[-1.08643242],\n",
416
+ " [ 2.3089372 ],\n",
417
+ " [-1.24191895],\n",
418
+ " [-0.34078247],\n",
419
+ " [-2.18429003],\n",
420
+ " [-1.16073946],\n",
421
+ " [ 0.09260467],\n",
422
+ " [ 1.48210777],\n",
423
+ " [ 0.56722643],\n",
424
+ " [ 1.56328726]])"
425
+ ]
426
+ },
427
+ "execution_count": 35,
428
+ "metadata": {},
429
+ "output_type": "execute_result"
430
+ }
431
+ ],
432
+ "source": [
433
+ "pca_data"
434
+ ]
435
+ },
436
+ {
437
+ "cell_type": "code",
438
+ "execution_count": 36,
439
+ "id": "74d44bb1",
440
+ "metadata": {},
441
+ "outputs": [
442
+ {
443
+ "data": {
444
+ "text/plain": [
445
+ "(10, 1)"
446
+ ]
447
+ },
448
+ "execution_count": 36,
449
+ "metadata": {},
450
+ "output_type": "execute_result"
451
+ }
452
+ ],
453
+ "source": [
454
+ "pca_data.shape"
455
+ ]
456
+ },
457
+ {
458
+ "cell_type": "code",
459
+ "execution_count": 37,
460
+ "id": "106cb61b",
461
+ "metadata": {},
462
+ "outputs": [],
463
+ "source": [
464
+ "import matplotlib.pyplot as plt"
465
+ ]
466
+ },
467
+ {
468
+ "cell_type": "code",
469
+ "execution_count": 51,
470
+ "id": "d6d6c8f8",
471
+ "metadata": {},
472
+ "outputs": [
473
+ {
474
+ "name": "stderr",
475
+ "output_type": "stream",
476
+ "text": [
477
+ "No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.\n"
478
+ ]
479
+ },
480
+ {
481
+ "data": {
482
+ "image/png": "\n",
483
+ "text/plain": [
484
+ "<Figure size 432x288 with 1 Axes>"
485
+ ]
486
+ },
487
+ "metadata": {
488
+ "needs_background": "light"
489
+ },
490
+ "output_type": "display_data"
491
+ }
492
+ ],
493
+ "source": [
494
+ "plt.plot(df)\n",
495
+ "plt.plot(\"original data\")\n",
496
+ "plt.legend()\n",
497
+ "plt.show()"
498
+ ]
499
+ },
500
+ {
501
+ "cell_type": "code",
502
+ "execution_count": 50,
503
+ "id": "1940011f",
504
+ "metadata": {},
505
+ "outputs": [
506
+ {
507
+ "data": {
508
+ "image/png": "\n",
509
+ "text/plain": [
510
+ "<Figure size 432x288 with 1 Axes>"
511
+ ]
512
+ },
513
+ "metadata": {
514
+ "needs_background": "light"
515
+ },
516
+ "output_type": "display_data"
517
+ }
518
+ ],
519
+ "source": [
520
+ "plt.plot(pca_data)\n",
521
+ "plt.title(\"after applying pca on data\")\n",
522
+ "plt.show()"
523
+ ]
524
+ },
525
+ {
526
+ "cell_type": "code",
527
+ "execution_count": null,
528
+ "id": "2efbfe65",
529
+ "metadata": {},
530
+ "outputs": [],
531
+ "source": []
532
+ },
533
+ {
534
+ "cell_type": "code",
535
+ "execution_count": null,
536
+ "id": "df88b872",
537
+ "metadata": {},
538
+ "outputs": [],
539
+ "source": []
540
+ }
541
+ ],
542
+ "metadata": {
543
+ "kernelspec": {
544
+ "display_name": "Python 3 (ipykernel)",
545
+ "language": "python",
546
+ "name": "python3"
547
+ },
548
+ "language_info": {
549
+ "codemirror_mode": {
550
+ "name": "ipython",
551
+ "version": 3
552
+ },
553
+ "file_extension": ".py",
554
+ "mimetype": "text/x-python",
555
+ "name": "python",
556
+ "nbconvert_exporter": "python",
557
+ "pygments_lexer": "ipython3",
558
+ "version": "3.9.12"
559
+ }
560
+ },
561
+ "nbformat": 4,
562
+ "nbformat_minor": 5
563
+ }