{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import re\n", "import tqdm\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns;\n", "\n", "from sklearn.datasets import fetch_20newsgroups\n", "from sklearn.manifold import TSNE\n", "from pycaret.anomaly import *\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "embeding_df=pd.read_csv('/mnt/c/Users/selin_uzturk/Desktop/sinkaf/encoded.csv')\n", "embeding_df=embeding_df.drop(['Unnamed: 0'], axis=1)\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...5657585960616263labelstweet
0101101101757865318925285159764084027631623...000000000en güzel uyuyan insan ödülü jeon jungkook'a g...
11011158910706107131079494698306682488311723763...000000000Mekanı cennet olsun, saygılar sayın avukatımı...
21011483047110774137851377933642143994827176686...000000000Kızlar aranızda kas yığını beylere düşenler ol...
3101193191672410118101077832312407389592293410147...000000000Biraz ders çalışayım. Tembellik ve uyku düşman...
41013093258706580544490710224106583102881252413878...000000000Trezeguet yerine El Sharawy daha iyi olmaz mı
..................................................................
43344101200651016111511510378410774213881024592067...000000001Hil**adamlar kesinlikle kelimeleri anlamıyorla...
4334510113980839241091340618985162851016311062276...000000001Böyle piçlerin çok erken ölmemelerini ve çok f...
43346101105549102635101402694311499110516218991186110561...000000001Turgay denilen bu holigonda bir sorun yok, gur...
433471018142426398920171096201094176010101151983026083...000000001Umarım ülkenin düşük zekadan kurtulması ilgile...
433481013977411127459892459611933170171451071039125...000000001CHP sandıkları bırakmaz, üzerine oturur, bir c...
\n", "

43349 rows × 66 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 \\\n", "0 101 10110 175 78653 189 25285 15976 40840 276 \n", "1 101 11589 10706 10713 10794 94698 30668 24883 117 \n", "2 101 148 30471 10774 13785 13779 33642 14399 48271 \n", "3 101 19319 16724 10118 10107 78323 12407 38959 22934 \n", "4 101 30932 58706 58054 44907 10224 106583 10288 12524 \n", "... ... ... ... ... ... ... ... ... ... \n", "43344 101 20065 10161 115 115 103784 10774 21388 10245 \n", "43345 101 139 80839 24109 13406 18985 16285 10163 11062 \n", "43346 101 105549 102635 10140 26943 11499 110516 21899 11861 \n", "43347 101 81424 26398 92017 109620 10941 76010 10115 19830 \n", "43348 101 39774 11127 45989 24596 11933 170 17145 10710 \n", "\n", " 9 ... 56 57 58 59 60 61 62 63 labels \\\n", "0 31623 ... 0 0 0 0 0 0 0 0 0 \n", "1 23763 ... 0 0 0 0 0 0 0 0 0 \n", "2 76686 ... 0 0 0 0 0 0 0 0 0 \n", "3 10147 ... 0 0 0 0 0 0 0 0 0 \n", "4 13878 ... 0 0 0 0 0 0 0 0 0 \n", "... ... ... .. .. .. .. .. .. .. .. ... \n", "43344 92067 ... 0 0 0 0 0 0 0 0 1 \n", "43345 276 ... 0 0 0 0 0 0 0 0 1 \n", "43346 10561 ... 0 0 0 0 0 0 0 0 1 \n", "43347 26083 ... 0 0 0 0 0 0 0 0 1 \n", "43348 39125 ... 0 0 0 0 0 0 0 0 1 \n", "\n", " tweet \n", "0 en güzel uyuyan insan ödülü jeon jungkook'a g... \n", "1 Mekanı cennet olsun, saygılar sayın avukatımı... \n", "2 Kızlar aranızda kas yığını beylere düşenler ol... \n", "3 Biraz ders çalışayım. Tembellik ve uyku düşman... \n", "4 Trezeguet yerine El Sharawy daha iyi olmaz mı \n", "... ... \n", "43344 Hil**adamlar kesinlikle kelimeleri anlamıyorla... \n", "43345 Böyle piçlerin çok erken ölmemelerini ve çok f... \n", "43346 Turgay denilen bu holigonda bir sorun yok, gur... \n", "43347 Umarım ülkenin düşük zekadan kurtulması ilgile... \n", "43348 CHP sandıkları bırakmaz, üzerine oturur, bir c... \n", "\n", "[43349 rows x 66 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "embeding_df" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 DescriptionValue
0Session id5272
1Original data shape(43349, 66)
2Transformed data shape(43349, 65)
3Ignore features1
4Numeric features65
5PreprocessTrue
6Imputation typesimple
7Numeric imputationmean
8Categorical imputationmode
9CPU Jobs-1
10Use GPUFalse
11Log ExperimentFalse
12Experiment Nameanomaly-default-name
13USIca74
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ano1= setup(embeding_df,ignore_features=['tweet'])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameReference
ID
abodAngle-base Outlier Detectionpyod.models.abod.ABOD
clusterClustering-Based Local Outlierpycaret.internal.patches.pyod.CBLOFForceToDouble
cofConnectivity-Based Local Outlierpyod.models.cof.COF
iforestIsolation Forestpyod.models.iforest.IForest
histogramHistogram-based Outlier Detectionpyod.models.hbos.HBOS
knnK-Nearest Neighbors Detectorpyod.models.knn.KNN
lofLocal Outlier Factorpyod.models.lof.LOF
svmOne-class SVM detectorpyod.models.ocsvm.OCSVM
pcaPrincipal Component Analysispyod.models.pca.PCA
mcdMinimum Covariance Determinantpyod.models.mcd.MCD
sodSubspace Outlier Detectionpyod.models.sod.SOD
sosStochastic Outlier Selectionpyod.models.sos.SOS
\n", "
" ], "text/plain": [ " Name \\\n", "ID \n", "abod Angle-base Outlier Detection \n", "cluster Clustering-Based Local Outlier \n", "cof Connectivity-Based Local Outlier \n", "iforest Isolation Forest \n", "histogram Histogram-based Outlier Detection \n", "knn K-Nearest Neighbors Detector \n", "lof Local Outlier Factor \n", "svm One-class SVM detector \n", "pca Principal Component Analysis \n", "mcd Minimum Covariance Determinant \n", "sod Subspace Outlier Detection \n", "sos Stochastic Outlier Selection \n", "\n", " Reference \n", "ID \n", "abod pyod.models.abod.ABOD \n", "cluster pycaret.internal.patches.pyod.CBLOFForceToDouble \n", "cof pyod.models.cof.COF \n", "iforest pyod.models.iforest.IForest \n", "histogram pyod.models.hbos.HBOS \n", "knn pyod.models.knn.KNN \n", "lof pyod.models.lof.LOF \n", "svm pyod.models.ocsvm.OCSVM \n", "pca pyod.models.pca.PCA \n", "mcd pyod.models.mcd.MCD \n", "sod pyod.models.sod.SOD \n", "sos pyod.models.sos.SOS " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "models()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# iforest" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# iforest = create_model('iforest')\n", "# iforest_anomalies = assign_model(iforest)\n", "# # iso_df=embeding_df.drop(['tweet'], axis=1)\n", "# iforest_pred = predict_model(iforest, data=iso_df)\n", "# iforest_pred" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# iforest_pred['Anomaly'].value_counts()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# knn\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...57585960616263labelsAnomalyAnomaly_Score
0101.010110.0175.078653.0189.025285.015976.040840.0276.031623.0...0.00.00.00.00.00.00.00.0070971.171936
1101.011589.010706.010713.010794.094698.030668.024883.0117.023763.0...0.00.00.00.00.00.00.00.0077147.550363
2101.0148.030471.010774.013785.013779.033642.014399.048271.076686.0...0.00.00.00.00.00.00.00.00118676.465801
3101.019319.016724.010118.010107.078323.012407.038959.022934.010147.0...0.00.00.00.00.00.00.00.0094310.765409
4101.030932.058706.058054.044907.010224.0106583.010288.012524.013878.0...0.00.00.00.00.00.00.00.0063569.489655
..................................................................
43344101.020065.010161.0115.0115.0103784.010774.021388.010245.092067.0...0.00.00.00.00.00.00.01.00183310.474995
43345101.0139.080839.024109.013406.018985.016285.010163.011062.0276.0...0.00.00.00.00.00.00.01.00140717.435036
43346101.0105549.0102635.010140.026943.011499.0110516.021899.011861.010561.0...0.00.00.00.00.00.00.01.0098954.428628
43347101.081424.026398.092017.0109620.010941.076010.010115.019830.026083.0...0.00.00.00.00.00.00.01.0065424.117159
43348101.039774.011127.045989.024596.011933.0170.017145.010710.039125.0...0.00.00.00.00.00.00.01.00182332.274049
\n", "

43349 rows × 67 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 \\\n", "0 101.0 10110.0 175.0 78653.0 189.0 25285.0 15976.0 \n", "1 101.0 11589.0 10706.0 10713.0 10794.0 94698.0 30668.0 \n", "2 101.0 148.0 30471.0 10774.0 13785.0 13779.0 33642.0 \n", "3 101.0 19319.0 16724.0 10118.0 10107.0 78323.0 12407.0 \n", "4 101.0 30932.0 58706.0 58054.0 44907.0 10224.0 106583.0 \n", "... ... ... ... ... ... ... ... \n", "43344 101.0 20065.0 10161.0 115.0 115.0 103784.0 10774.0 \n", "43345 101.0 139.0 80839.0 24109.0 13406.0 18985.0 16285.0 \n", "43346 101.0 105549.0 102635.0 10140.0 26943.0 11499.0 110516.0 \n", "43347 101.0 81424.0 26398.0 92017.0 109620.0 10941.0 76010.0 \n", "43348 101.0 39774.0 11127.0 45989.0 24596.0 11933.0 170.0 \n", "\n", " 7 8 9 ... 57 58 59 60 61 62 63 \\\n", "0 40840.0 276.0 31623.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "1 24883.0 117.0 23763.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "2 14399.0 48271.0 76686.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 38959.0 22934.0 10147.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "4 10288.0 12524.0 13878.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... ... ... ... ... ... ... ... \n", "43344 21388.0 10245.0 92067.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43345 10163.0 11062.0 276.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43346 21899.0 11861.0 10561.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43347 10115.0 19830.0 26083.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43348 17145.0 10710.0 39125.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "\n", " labels Anomaly Anomaly_Score \n", "0 0.0 0 70971.171936 \n", "1 0.0 0 77147.550363 \n", "2 0.0 0 118676.465801 \n", "3 0.0 0 94310.765409 \n", "4 0.0 0 63569.489655 \n", "... ... ... ... \n", "43344 1.0 0 183310.474995 \n", "43345 1.0 0 140717.435036 \n", "43346 1.0 0 98954.428628 \n", "43347 1.0 0 65424.117159 \n", "43348 1.0 0 182332.274049 \n", "\n", "[43349 rows x 67 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "knn = create_model('knn')\n", "knn_anomalies = assign_model(knn)\n", "knn_df=embeding_df.drop(['tweet'], axis=1)\n", "knn_pred = predict_model(knn, data=knn_df)\n", "knn_pred" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 41376\n", "1 1973\n", "Name: Anomaly, dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "knn_pred['Anomaly'].value_counts()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...57585960616263labelsAnomalyAnomaly_Score
0101.010110.0175.078653.0189.025285.015976.040840.0276.031623.0...0.00.00.00.00.00.00.00.0070971.171936
1101.011589.010706.010713.010794.094698.030668.024883.0117.023763.0...0.00.00.00.00.00.00.00.0077147.550363
2101.0148.030471.010774.013785.013779.033642.014399.048271.076686.0...0.00.00.00.00.00.00.00.00118676.465801
3101.019319.016724.010118.010107.078323.012407.038959.022934.010147.0...0.00.00.00.00.00.00.00.0094310.765409
4101.030932.058706.058054.044907.010224.0106583.010288.012524.013878.0...0.00.00.00.00.00.00.00.0063569.489655
..................................................................
43344101.020065.010161.0115.0115.0103784.010774.021388.010245.092067.0...0.00.00.00.00.00.00.01.00183310.474995
43345101.0139.080839.024109.013406.018985.016285.010163.011062.0276.0...0.00.00.00.00.00.00.01.00140717.435036
43346101.0105549.0102635.010140.026943.011499.0110516.021899.011861.010561.0...0.00.00.00.00.00.00.01.0098954.428628
43347101.081424.026398.092017.0109620.010941.076010.010115.019830.026083.0...0.00.00.00.00.00.00.01.0065424.117159
43348101.039774.011127.045989.024596.011933.0170.017145.010710.039125.0...0.00.00.00.00.00.00.01.00182332.274049
\n", "

43349 rows × 67 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 \\\n", "0 101.0 10110.0 175.0 78653.0 189.0 25285.0 15976.0 \n", "1 101.0 11589.0 10706.0 10713.0 10794.0 94698.0 30668.0 \n", "2 101.0 148.0 30471.0 10774.0 13785.0 13779.0 33642.0 \n", "3 101.0 19319.0 16724.0 10118.0 10107.0 78323.0 12407.0 \n", "4 101.0 30932.0 58706.0 58054.0 44907.0 10224.0 106583.0 \n", "... ... ... ... ... ... ... ... \n", "43344 101.0 20065.0 10161.0 115.0 115.0 103784.0 10774.0 \n", "43345 101.0 139.0 80839.0 24109.0 13406.0 18985.0 16285.0 \n", "43346 101.0 105549.0 102635.0 10140.0 26943.0 11499.0 110516.0 \n", "43347 101.0 81424.0 26398.0 92017.0 109620.0 10941.0 76010.0 \n", "43348 101.0 39774.0 11127.0 45989.0 24596.0 11933.0 170.0 \n", "\n", " 7 8 9 ... 57 58 59 60 61 62 63 \\\n", "0 40840.0 276.0 31623.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "1 24883.0 117.0 23763.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "2 14399.0 48271.0 76686.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 38959.0 22934.0 10147.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "4 10288.0 12524.0 13878.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... ... ... ... ... ... ... ... \n", "43344 21388.0 10245.0 92067.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43345 10163.0 11062.0 276.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43346 21899.0 11861.0 10561.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43347 10115.0 19830.0 26083.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43348 17145.0 10710.0 39125.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "\n", " labels Anomaly Anomaly_Score \n", "0 0.0 0 70971.171936 \n", "1 0.0 0 77147.550363 \n", "2 0.0 0 118676.465801 \n", "3 0.0 0 94310.765409 \n", "4 0.0 0 63569.489655 \n", "... ... ... ... \n", "43344 1.0 0 183310.474995 \n", "43345 1.0 0 140717.435036 \n", "43346 1.0 0 98954.428628 \n", "43347 1.0 0 65424.117159 \n", "43348 1.0 0 182332.274049 \n", "\n", "[43349 rows x 67 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "knn_pred" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 41376\n", "1 1973\n", "Name: Anomaly, dtype: int64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "knn_pred['Anomaly'].value_counts()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...5657585960616263labelstweet
0101101101757865318925285159764084027631623...000000000en güzel uyuyan insan ödülü jeon jungkook'a g...
11011158910706107131079494698306682488311723763...000000000Mekanı cennet olsun, saygılar sayın avukatımı...
21011483047110774137851377933642143994827176686...000000000Kızlar aranızda kas yığını beylere düşenler ol...
3101193191672410118101077832312407389592293410147...000000000Biraz ders çalışayım. Tembellik ve uyku düşman...
41013093258706580544490710224106583102881252413878...000000000Trezeguet yerine El Sharawy daha iyi olmaz mı
..................................................................
43344101200651016111511510378410774213881024592067...000000001Hil**adamlar kesinlikle kelimeleri anlamıyorla...
4334510113980839241091340618985162851016311062276...000000001Böyle piçlerin çok erken ölmemelerini ve çok f...
43346101105549102635101402694311499110516218991186110561...000000001Turgay denilen bu holigonda bir sorun yok, gur...
433471018142426398920171096201094176010101151983026083...000000001Umarım ülkenin düşük zekadan kurtulması ilgile...
433481013977411127459892459611933170171451071039125...000000001CHP sandıkları bırakmaz, üzerine oturur, bir c...
\n", "

43349 rows × 66 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 \\\n", "0 101 10110 175 78653 189 25285 15976 40840 276 \n", "1 101 11589 10706 10713 10794 94698 30668 24883 117 \n", "2 101 148 30471 10774 13785 13779 33642 14399 48271 \n", "3 101 19319 16724 10118 10107 78323 12407 38959 22934 \n", "4 101 30932 58706 58054 44907 10224 106583 10288 12524 \n", "... ... ... ... ... ... ... ... ... ... \n", "43344 101 20065 10161 115 115 103784 10774 21388 10245 \n", "43345 101 139 80839 24109 13406 18985 16285 10163 11062 \n", "43346 101 105549 102635 10140 26943 11499 110516 21899 11861 \n", "43347 101 81424 26398 92017 109620 10941 76010 10115 19830 \n", "43348 101 39774 11127 45989 24596 11933 170 17145 10710 \n", "\n", " 9 ... 56 57 58 59 60 61 62 63 labels \\\n", "0 31623 ... 0 0 0 0 0 0 0 0 0 \n", "1 23763 ... 0 0 0 0 0 0 0 0 0 \n", "2 76686 ... 0 0 0 0 0 0 0 0 0 \n", "3 10147 ... 0 0 0 0 0 0 0 0 0 \n", "4 13878 ... 0 0 0 0 0 0 0 0 0 \n", "... ... ... .. .. .. .. .. .. .. .. ... \n", "43344 92067 ... 0 0 0 0 0 0 0 0 1 \n", "43345 276 ... 0 0 0 0 0 0 0 0 1 \n", "43346 10561 ... 0 0 0 0 0 0 0 0 1 \n", "43347 26083 ... 0 0 0 0 0 0 0 0 1 \n", "43348 39125 ... 0 0 0 0 0 0 0 0 1 \n", "\n", " tweet \n", "0 en güzel uyuyan insan ödülü jeon jungkook'a g... \n", "1 Mekanı cennet olsun, saygılar sayın avukatımı... \n", "2 Kızlar aranızda kas yığını beylere düşenler ol... \n", "3 Biraz ders çalışayım. Tembellik ve uyku düşman... \n", "4 Trezeguet yerine El Sharawy daha iyi olmaz mı \n", "... ... \n", "43344 Hil**adamlar kesinlikle kelimeleri anlamıyorla... \n", "43345 Böyle piçlerin çok erken ölmemelerini ve çok f... \n", "43346 Turgay denilen bu holigonda bir sorun yok, gur... \n", "43347 Umarım ülkenin düşük zekadan kurtulması ilgile... \n", "43348 CHP sandıkları bırakmaz, üzerine oturur, bir c... \n", "\n", "[43349 rows x 66 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "embeding_df" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "embeding_df.drop(knn_pred.loc[knn_pred['Anomaly']==1 ].index, inplace=True)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...5657585960616263labelstweet
0101101101757865318925285159764084027631623...000000000en güzel uyuyan insan ödülü jeon jungkook'a g...
11011158910706107131079494698306682488311723763...000000000Mekanı cennet olsun, saygılar sayın avukatımı...
21011483047110774137851377933642143994827176686...000000000Kızlar aranızda kas yığını beylere düşenler ol...
3101193191672410118101077832312407389592293410147...000000000Biraz ders çalışayım. Tembellik ve uyku düşman...
41013093258706580544490710224106583102881252413878...000000000Trezeguet yerine El Sharawy daha iyi olmaz mı
..................................................................
43344101200651016111511510378410774213881024592067...000000001Hil**adamlar kesinlikle kelimeleri anlamıyorla...
4334510113980839241091340618985162851016311062276...000000001Böyle piçlerin çok erken ölmemelerini ve çok f...
43346101105549102635101402694311499110516218991186110561...000000001Turgay denilen bu holigonda bir sorun yok, gur...
433471018142426398920171096201094176010101151983026083...000000001Umarım ülkenin düşük zekadan kurtulması ilgile...
433481013977411127459892459611933170171451071039125...000000001CHP sandıkları bırakmaz, üzerine oturur, bir c...
\n", "

41376 rows × 66 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 \\\n", "0 101 10110 175 78653 189 25285 15976 40840 276 \n", "1 101 11589 10706 10713 10794 94698 30668 24883 117 \n", "2 101 148 30471 10774 13785 13779 33642 14399 48271 \n", "3 101 19319 16724 10118 10107 78323 12407 38959 22934 \n", "4 101 30932 58706 58054 44907 10224 106583 10288 12524 \n", "... ... ... ... ... ... ... ... ... ... \n", "43344 101 20065 10161 115 115 103784 10774 21388 10245 \n", "43345 101 139 80839 24109 13406 18985 16285 10163 11062 \n", "43346 101 105549 102635 10140 26943 11499 110516 21899 11861 \n", "43347 101 81424 26398 92017 109620 10941 76010 10115 19830 \n", "43348 101 39774 11127 45989 24596 11933 170 17145 10710 \n", "\n", " 9 ... 56 57 58 59 60 61 62 63 labels \\\n", "0 31623 ... 0 0 0 0 0 0 0 0 0 \n", "1 23763 ... 0 0 0 0 0 0 0 0 0 \n", "2 76686 ... 0 0 0 0 0 0 0 0 0 \n", "3 10147 ... 0 0 0 0 0 0 0 0 0 \n", "4 13878 ... 0 0 0 0 0 0 0 0 0 \n", "... ... ... .. .. .. .. .. .. .. .. ... \n", "43344 92067 ... 0 0 0 0 0 0 0 0 1 \n", "43345 276 ... 0 0 0 0 0 0 0 0 1 \n", "43346 10561 ... 0 0 0 0 0 0 0 0 1 \n", "43347 26083 ... 0 0 0 0 0 0 0 0 1 \n", "43348 39125 ... 0 0 0 0 0 0 0 0 1 \n", "\n", " tweet \n", "0 en güzel uyuyan insan ödülü jeon jungkook'a g... \n", "1 Mekanı cennet olsun, saygılar sayın avukatımı... \n", "2 Kızlar aranızda kas yığını beylere düşenler ol... \n", "3 Biraz ders çalışayım. Tembellik ve uyku düşman... \n", "4 Trezeguet yerine El Sharawy daha iyi olmaz mı \n", "... ... \n", "43344 Hil**adamlar kesinlikle kelimeleri anlamıyorla... \n", "43345 Böyle piçlerin çok erken ölmemelerini ve çok f... \n", "43346 Turgay denilen bu holigonda bir sorun yok, gur... \n", "43347 Umarım ülkenin düşük zekadan kurtulması ilgile... \n", "43348 CHP sandıkları bırakmaz, üzerine oturur, bir c... \n", "\n", "[41376 rows x 66 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "embeding_df" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "df=pd.DataFrame()\n", "df['tweet']=embeding_df['tweet']\n", "df['subtas_a']=embeding_df['labels']" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "df.to_csv('knn_outliers.csv') " ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# pca" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...57585960616263labelsAnomalyAnomaly_Score
0101.010110.0175.078653.0189.025285.015976.040840.0276.031623.0...0.00.00.00.00.00.00.00.001.354399e+32
1101.011589.010706.010713.010794.094698.030668.024883.0117.023763.0...0.00.00.00.00.00.00.00.001.311723e+32
2101.0148.030471.010774.013785.013779.033642.014399.048271.076686.0...0.00.00.00.00.00.00.00.001.597792e+32
3101.019319.016724.010118.010107.078323.012407.038959.022934.010147.0...0.00.00.00.00.00.00.00.001.551488e+32
4101.030932.058706.058054.044907.010224.0106583.010288.012524.013878.0...0.00.00.00.00.00.00.00.001.348867e+32
..................................................................
43344101.020065.010161.0115.0115.0103784.010774.021388.010245.092067.0...0.00.00.00.00.00.00.01.002.346619e+32
43345101.0139.080839.024109.013406.018985.016285.010163.011062.0276.0...0.00.00.00.00.00.00.01.001.778253e+32
43346101.0105549.0102635.010140.026943.011499.0110516.021899.011861.010561.0...0.00.00.00.00.00.00.01.001.762300e+32
43347101.081424.026398.092017.0109620.010941.076010.010115.019830.026083.0...0.00.00.00.00.00.00.01.001.564075e+32
43348101.039774.011127.045989.024596.011933.0170.017145.010710.039125.0...0.00.00.00.00.00.00.01.002.685411e+32
\n", "

43349 rows × 67 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 \\\n", "0 101.0 10110.0 175.0 78653.0 189.0 25285.0 15976.0 \n", "1 101.0 11589.0 10706.0 10713.0 10794.0 94698.0 30668.0 \n", "2 101.0 148.0 30471.0 10774.0 13785.0 13779.0 33642.0 \n", "3 101.0 19319.0 16724.0 10118.0 10107.0 78323.0 12407.0 \n", "4 101.0 30932.0 58706.0 58054.0 44907.0 10224.0 106583.0 \n", "... ... ... ... ... ... ... ... \n", "43344 101.0 20065.0 10161.0 115.0 115.0 103784.0 10774.0 \n", "43345 101.0 139.0 80839.0 24109.0 13406.0 18985.0 16285.0 \n", "43346 101.0 105549.0 102635.0 10140.0 26943.0 11499.0 110516.0 \n", "43347 101.0 81424.0 26398.0 92017.0 109620.0 10941.0 76010.0 \n", "43348 101.0 39774.0 11127.0 45989.0 24596.0 11933.0 170.0 \n", "\n", " 7 8 9 ... 57 58 59 60 61 62 63 \\\n", "0 40840.0 276.0 31623.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "1 24883.0 117.0 23763.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "2 14399.0 48271.0 76686.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 38959.0 22934.0 10147.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "4 10288.0 12524.0 13878.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... ... ... ... ... ... ... ... \n", "43344 21388.0 10245.0 92067.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43345 10163.0 11062.0 276.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43346 21899.0 11861.0 10561.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43347 10115.0 19830.0 26083.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43348 17145.0 10710.0 39125.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "\n", " labels Anomaly Anomaly_Score \n", "0 0.0 0 1.354399e+32 \n", "1 0.0 0 1.311723e+32 \n", "2 0.0 0 1.597792e+32 \n", "3 0.0 0 1.551488e+32 \n", "4 0.0 0 1.348867e+32 \n", "... ... ... ... \n", "43344 1.0 0 2.346619e+32 \n", "43345 1.0 0 1.778253e+32 \n", "43346 1.0 0 1.762300e+32 \n", "43347 1.0 0 1.564075e+32 \n", "43348 1.0 0 2.685411e+32 \n", "\n", "[43349 rows x 67 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pca = create_model('pca')\n", "pca_anomalies = assign_model(pca)\n", "pca_df=embeding_df.drop(['tweet'], axis=1)\n", "pca_pred = predict_model(pca, data=pca_df)\n", "pca_pred" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 41181\n", "1 2168\n", "Name: Anomaly, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pca_pred['Anomaly'].value_counts()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "embeding_df.drop(pca_pred.loc[pca_pred['Anomaly']==1 ].index, inplace=True)\n", "df=pd.DataFrame()\n", "df['tweet']=embeding_df['tweet']\n", "df['subtas_a']=embeding_df['labels']\n", "df.to_csv('pca_outliers.csv') " ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# abod" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...57585960616263labelsAnomalyAnomaly_Score
0101.010110.0175.078653.0189.025285.015976.040840.0276.031623.0...0.00.00.00.00.00.00.00.00-7.719921e-22
1101.011589.010706.010713.010794.094698.030668.024883.0117.023763.0...0.00.00.00.00.00.00.00.00-4.030618e-21
2101.0148.030471.010774.013785.013779.033642.014399.048271.076686.0...0.00.00.00.00.00.00.00.00-3.558939e-22
3101.019319.016724.010118.010107.078323.012407.038959.022934.010147.0...0.00.00.00.00.00.00.00.00-2.895136e-22
4101.030932.058706.058054.044907.010224.0106583.010288.012524.013878.0...0.00.00.00.00.00.00.00.00-4.832515e-21
..................................................................
43344101.020065.010161.0115.0115.0103784.010774.021388.010245.092067.0...0.00.00.00.00.00.00.01.00-7.984637e-25
43345101.0139.080839.024109.013406.018985.016285.010163.011062.0276.0...0.00.00.00.00.00.00.01.00-1.059387e-22
43346101.0105549.0102635.010140.026943.011499.0110516.021899.011861.010561.0...0.00.00.00.00.00.00.01.00-3.592603e-22
43347101.081424.026398.092017.0109620.010941.076010.010115.019830.026083.0...0.00.00.00.00.00.00.01.00-2.226002e-21
43348101.039774.011127.045989.024596.011933.0170.017145.010710.039125.0...0.00.00.00.00.00.00.01.00-2.864757e-23
\n", "

43349 rows × 67 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 \\\n", "0 101.0 10110.0 175.0 78653.0 189.0 25285.0 15976.0 \n", "1 101.0 11589.0 10706.0 10713.0 10794.0 94698.0 30668.0 \n", "2 101.0 148.0 30471.0 10774.0 13785.0 13779.0 33642.0 \n", "3 101.0 19319.0 16724.0 10118.0 10107.0 78323.0 12407.0 \n", "4 101.0 30932.0 58706.0 58054.0 44907.0 10224.0 106583.0 \n", "... ... ... ... ... ... ... ... \n", "43344 101.0 20065.0 10161.0 115.0 115.0 103784.0 10774.0 \n", "43345 101.0 139.0 80839.0 24109.0 13406.0 18985.0 16285.0 \n", "43346 101.0 105549.0 102635.0 10140.0 26943.0 11499.0 110516.0 \n", "43347 101.0 81424.0 26398.0 92017.0 109620.0 10941.0 76010.0 \n", "43348 101.0 39774.0 11127.0 45989.0 24596.0 11933.0 170.0 \n", "\n", " 7 8 9 ... 57 58 59 60 61 62 63 \\\n", "0 40840.0 276.0 31623.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "1 24883.0 117.0 23763.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "2 14399.0 48271.0 76686.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 38959.0 22934.0 10147.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "4 10288.0 12524.0 13878.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... ... ... ... ... ... ... ... \n", "43344 21388.0 10245.0 92067.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43345 10163.0 11062.0 276.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43346 21899.0 11861.0 10561.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43347 10115.0 19830.0 26083.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43348 17145.0 10710.0 39125.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "\n", " labels Anomaly Anomaly_Score \n", "0 0.0 0 -7.719921e-22 \n", "1 0.0 0 -4.030618e-21 \n", "2 0.0 0 -3.558939e-22 \n", "3 0.0 0 -2.895136e-22 \n", "4 0.0 0 -4.832515e-21 \n", "... ... ... ... \n", "43344 1.0 0 -7.984637e-25 \n", "43345 1.0 0 -1.059387e-22 \n", "43346 1.0 0 -3.592603e-22 \n", "43347 1.0 0 -2.226002e-21 \n", "43348 1.0 0 -2.864757e-23 \n", "\n", "[43349 rows x 67 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "abod = create_model('abod')\n", "abod_anomalies = assign_model(abod)\n", "abod_df=embeding_df.drop(['tweet'], axis=1)\n", "abod_pred = predict_model(abod, data=abod_df)\n", "abod_pred" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 43349\n", "Name: Anomaly, dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "abod_pred['Anomaly'].value_counts()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# cluster" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Initiated. . . . . . . . . . . . . . . . . .16:19:48
Status. . . . . . . . . . . . . . . . . .Fitting 0.05 Fraction
Estimator. . . . . . . . . . . . . . . . . .Clustering-Based Local Outlier
\n", "
" ], "text/plain": [ " \n", " \n", "Initiated . . . . . . . . . . . . . . . . . . 16:19:48\n", "Status . . . . . . . . . . . . . . . . . . Fitting 0.05 Fraction\n", "Estimator . . . . . . . . . . . . . . . . . . Clustering-Based Local Outlier" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...57585960616263labelsAnomalyAnomaly_Score
0101.010110.0175.078653.0189.025285.015976.040840.0276.031623.0...0.00.00.00.00.00.00.00.00123828.159076
1101.011589.010706.010713.010794.094698.030668.024883.0117.023763.0...0.00.00.00.00.00.00.00.00112972.396566
2101.0148.030471.010774.013785.013779.033642.014399.048271.076686.0...0.00.00.00.00.00.00.00.00145701.165368
3101.019319.016724.010118.010107.078323.012407.038959.022934.010147.0...0.00.00.00.00.00.00.00.00141686.216880
4101.030932.058706.058054.044907.010224.0106583.010288.012524.013878.0...0.00.00.00.00.00.00.00.00101399.757887
..................................................................
43344101.020065.010161.0115.0115.0103784.010774.021388.010245.092067.0...0.00.00.00.00.00.00.01.00193403.127721
43345101.0139.080839.024109.013406.018985.016285.010163.011062.0276.0...0.00.00.00.00.00.00.01.00154821.530684
43346101.0105549.0102635.010140.026943.011499.0110516.021899.011861.010561.0...0.00.00.00.00.00.00.01.00166024.182457
43347101.081424.026398.092017.0109620.010941.076010.010115.019830.026083.0...0.00.00.00.00.00.00.01.00130852.856537
43348101.039774.011127.045989.024596.011933.0170.017145.010710.039125.0...0.00.00.00.00.00.00.01.00206109.572124
\n", "

43349 rows × 67 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 \\\n", "0 101.0 10110.0 175.0 78653.0 189.0 25285.0 15976.0 \n", "1 101.0 11589.0 10706.0 10713.0 10794.0 94698.0 30668.0 \n", "2 101.0 148.0 30471.0 10774.0 13785.0 13779.0 33642.0 \n", "3 101.0 19319.0 16724.0 10118.0 10107.0 78323.0 12407.0 \n", "4 101.0 30932.0 58706.0 58054.0 44907.0 10224.0 106583.0 \n", "... ... ... ... ... ... ... ... \n", "43344 101.0 20065.0 10161.0 115.0 115.0 103784.0 10774.0 \n", "43345 101.0 139.0 80839.0 24109.0 13406.0 18985.0 16285.0 \n", "43346 101.0 105549.0 102635.0 10140.0 26943.0 11499.0 110516.0 \n", "43347 101.0 81424.0 26398.0 92017.0 109620.0 10941.0 76010.0 \n", "43348 101.0 39774.0 11127.0 45989.0 24596.0 11933.0 170.0 \n", "\n", " 7 8 9 ... 57 58 59 60 61 62 63 \\\n", "0 40840.0 276.0 31623.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "1 24883.0 117.0 23763.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "2 14399.0 48271.0 76686.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 38959.0 22934.0 10147.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "4 10288.0 12524.0 13878.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... ... ... ... ... ... ... ... \n", "43344 21388.0 10245.0 92067.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43345 10163.0 11062.0 276.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43346 21899.0 11861.0 10561.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43347 10115.0 19830.0 26083.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "43348 17145.0 10710.0 39125.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "\n", " labels Anomaly Anomaly_Score \n", "0 0.0 0 123828.159076 \n", "1 0.0 0 112972.396566 \n", "2 0.0 0 145701.165368 \n", "3 0.0 0 141686.216880 \n", "4 0.0 0 101399.757887 \n", "... ... ... ... \n", "43344 1.0 0 193403.127721 \n", "43345 1.0 0 154821.530684 \n", "43346 1.0 0 166024.182457 \n", "43347 1.0 0 130852.856537 \n", "43348 1.0 0 206109.572124 \n", "\n", "[43349 rows x 67 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cluster = create_model('cluster')\n", "cluster_anomalies = assign_model(cluster)\n", "cluster_df=embeding_df.drop(['tweet'], axis=1)\n", "cluster_pred = predict_model(cluster, data=cluster_df)\n", "cluster_pred" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 41182\n", "1 2167\n", "Name: Anomaly, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cluster_pred['Anomaly'].value_counts()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "embeding_df.drop(cluster_pred.loc[cluster_pred['Anomaly']==1 ].index, inplace=True)\n", "df=pd.DataFrame()\n", "df['tweet']=embeding_df['tweet']\n", "df['subtas_a']=embeding_df['labels']\n", "df.to_csv('cluster_outliers.csv') " ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# cof" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cof = create_model('cof')\n", "cof_anomalies = assign_model(cof)\n", "cof_df=embeding_df.drop(['tweet'], axis=1)\n", "cof_pred = predict_model(cof, data=cof_df)\n", "cof_pred" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cof_pred['Anomaly'].value_counts()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# histogram" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "histogram = create_model('histogram')\n", "histogram_anomalies = assign_model(histogram)\n", "histogram_df=embeding_df.drop(['tweet'], axis=1)\n", "histogram_pred = predict_model(histogram, data=histogram_df)\n", "histogram_pred" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "histogram_pred['Anomaly'].value_counts()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# lof" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lof = create_model('lof')\n", "lof_anomalies = assign_model(lof)\n", "lof_df=embeding_df.drop(['tweet'], axis=1)\n", "lof_pred = predict_model(lof, data=lof_df)\n", "lof_pred" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lof_pred['Anomaly'].value_counts()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# svm" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lof = create_model('lof')\n", "lof_anomalies = assign_model(lof)\n", "lof_df=embeding_df.drop(['tweet'], axis=1)\n", "lof_pred = predict_model(lof, data=lof_df)\n", "lof_pred" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lof_pred['Anomaly'].value_counts()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# mcd" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mcd = create_model('mcd')\n", "mcd_anomalies = assign_model(mcd)\n", "mcd_df=embeding_df.drop(['tweet'], axis=1)\n", "mcd_pred = predict_model(mcd, data=mcd_df)\n", "mcd_pred" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mcd_pred['Anomaly'].value_counts()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# sod" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sod = create_model('sod')\n", "sod_anomalies = assign_model(sod)\n", "sod_df=embeding_df.drop(['tweet'], axis=1)\n", "sod_pred = predict_model(sod, data=sod_df)\n", "sod_pred" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sod_pred['Anomaly'].value_counts()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# sos" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sos = create_model('sos')\n", "sos_anomalies = assign_model(sos)\n", "sos_df=embeding_df.drop(['tweet'], axis=1)\n", "sos_pred = predict_model(sos, data=sos_df)\n", "sos_pred" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sos_pred['Anomaly'].value_counts()" ] } ], "metadata": { "kernelspec": { "display_name": "dl_env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }