Upload HELPER.ipynb
Browse files- HELPER.ipynb +287 -0
HELPER.ipynb
ADDED
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"#### pickle file checking for AUPRC"
|
8 |
+
]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "code",
|
12 |
+
"execution_count": 6,
|
13 |
+
"metadata": {},
|
14 |
+
"outputs": [
|
15 |
+
{
|
16 |
+
"name": "stdout",
|
17 |
+
"output_type": "stream",
|
18 |
+
"text": [
|
19 |
+
"<class 'list'> 50\n",
|
20 |
+
"<class 'dict'>\n",
|
21 |
+
"----------------------\n",
|
22 |
+
"epoch: \n",
|
23 |
+
"model: \n",
|
24 |
+
"train_auprc: \n",
|
25 |
+
"valid_auprc: \n",
|
26 |
+
"valid_targets: \n",
|
27 |
+
"valid_outputs: \n",
|
28 |
+
"-----------------------\n",
|
29 |
+
"-----------------------\n",
|
30 |
+
"[0.20795198881124255, 0.2924131615408049, 0.31194815399388126, 0.357671229080611, 0.3907590012977773, 0.39197022751675975, 0.39688932315376796, 0.41098642756821824, 0.4280303875603716, 0.4251116328825386, 0.41492397254078656, 0.44119503399957305, 0.42866565608661766, 0.42155615910506705, 0.4352771610735857, 0.4355309812927433, 0.4575302940022513, 0.4621060999031488, 0.4615244295921646, 0.4347042141353311, 0.4843673460502776, 0.49216570578173724, 0.49284316077316226, 0.4976730562122618, 0.4981241668777771, 0.4985906269863735, 0.5023674118168958, 0.5039947051779108, 0.5025596400291938, 0.501332454384853, 0.5017141509761979, 0.5033696471830942, 0.5035807094153067, 0.5044712423289812, 0.49912591150498187, 0.5036493639939076, 0.5073756144905568, 0.5066738446153692, 0.5041024684427422, 0.5061074251973712, 0.5079663458037375, 0.5080434717076571, 0.5071731389137064, 0.5066158069067092, 0.5059333249321385, 0.5078252460128987, 0.5081895157894929, 0.5079278975582764, 0.5073543066159428, 0.5078677916025073]\n",
|
31 |
+
"0.5081895157894929 46\n"
|
32 |
+
]
|
33 |
+
}
|
34 |
+
],
|
35 |
+
"source": [
|
36 |
+
"import pickle\n",
|
37 |
+
"import torch\n",
|
38 |
+
"\n",
|
39 |
+
"address = \"./model_output/model_group5/PROGRESS.pickle\"\n",
|
40 |
+
"\n",
|
41 |
+
"with open(address, 'rb') as file:\n",
|
42 |
+
" data = pickle.load(file)\n",
|
43 |
+
"\n",
|
44 |
+
"print(type(data), len(data))\n",
|
45 |
+
"# print(data[0])\n",
|
46 |
+
"print(type(data[1]))\n",
|
47 |
+
"print(\"----------------------\")\n",
|
48 |
+
"for key, _ in data[1].items():\n",
|
49 |
+
" print(f\"{key}: \")\n",
|
50 |
+
"\n",
|
51 |
+
"print(\"-----------------------\")\n",
|
52 |
+
"AUPRC_list = []\n",
|
53 |
+
"for i in range(len(data)):\n",
|
54 |
+
" AUPRC_list.append(data[i][\"valid_auprc\"])\n",
|
55 |
+
"\n",
|
56 |
+
"print(\"-----------------------\") \n",
|
57 |
+
"print(AUPRC_list)\n",
|
58 |
+
"(\"-----------------------\")\n",
|
59 |
+
"largest_number = max(AUPRC_list)\n",
|
60 |
+
"index = AUPRC_list.index(largest_number)\n",
|
61 |
+
"print(largest_number, index)"
|
62 |
+
]
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"cell_type": "markdown",
|
66 |
+
"metadata": {},
|
67 |
+
"source": [
|
68 |
+
"group#1\n",
|
69 |
+
"\n",
|
70 |
+
"[0.24092945522182005, 0.3139675367502194, 0.3062163369752217, 0.32297163568130305, 0.3672050308180419, 0.3801609216698969, 0.3915211363523951, 0.4034875773118736, 0.41721359538446234, 0.41755420607909477, 0.4101699028342543, 0.42683222688245664, 0.4338339272938271, 0.4432706404963518, 0.4451886249025738, 0.4436839678451211, 0.46470292201596697, 0.4619959382638624, 0.4389299870874322, 0.4537386141609928, 0.4880276013143086, 0.48964141469390005, 0.49214694908533474, 0.49336784163926267, 0.4978899412041259, 0.4960868620495151, 0.4949812567178974, 0.49875221067947606, 0.4959535547710648, 0.49723019893878023, 0.49849758106937503, 0.5005045769636993, 0.4968324354226746, 0.4985954057932132, 0.4985684464062525, 0.4948398218890804, 0.5003443438290083, 0.49804674478254773, 0.5015115944170082, 0.5043099513157541, 0.5022930844045073, 0.502102123403741, 0.5025587387783707, 0.5026322695878688, 0.5028108420912678, 0.501853319716798, 0.5044486284061104, 0.5043333679462079, 0.503047975296802, 0.5021477867974229]\n",
|
71 |
+
"\n",
|
72 |
+
"0.5044486284061104, index: 46\n",
|
73 |
+
"\n",
|
74 |
+
"group #2\n",
|
75 |
+
"\n",
|
76 |
+
"[0.24668762844932296, 0.31123092790061574, 0.35728718371921886, 0.37858993755415526, 0.38325613445804607, 0.38183540019756823, 0.40688905625255206, 0.4050292403852287, 0.4103841963804383, 0.4288343036036706, 0.4293594683280219, 0.44373329349811874, 0.44694196761428867, 0.44516332505161516, 0.4570591656299683, 0.44925142278910385, 0.45783436251651694, 0.4512008966459152, 0.4628860929136446, 0.46190128250293605, 0.4891415053038087, 0.4933325648723347, 0.49795793473520533, 0.4989478549566136, 0.507199717375493, 0.5031777644234027, 0.5048360591023886, 0.5026344145441939, 0.5070084702134143, 0.50851780828997, 0.5013767142024679, 0.5077028354409389, 0.5073222030725629, 0.5103865617070087, 0.5070321372047399, 0.5069057373554984, 0.5054984338086199, 0.5052088211513525, 0.5085875776438461, 0.5015018579996042, 0.507983738986951, 0.506001318616706, 0.5078548999343991, 0.5084694227173217, 0.5081644743764611, 0.5070537320211395, 0.5072728550164887, 0.5084469401746737, 0.5081580384861908, 0.5092361778552277]\n",
|
77 |
+
"\n",
|
78 |
+
"0.5103865617070087, index: 33\n",
|
79 |
+
"\n",
|
80 |
+
"group #3\n",
|
81 |
+
"[0.20546938178065813, 0.31056285598824596, 0.3521164077944065, 0.36566363279169545, 0.3649970330628938, 0.3816742095036071, 0.408841252427171, 0.4192963362391232, 0.419725128897165, 0.4009845215509139, 0.4221866024862177, 0.4383579336817017, 0.41634488480301257, 0.4394011015343916, 0.42674958918677536, 0.4484833626141604, 0.43733868299572076, 0.42813204282903494, 0.44362467579095183, 0.4525213211300688, 0.47993303563958817, 0.48221178536835363, 0.4832912567732829, 0.485964752652683, 0.4894140885779246, 0.49081305081555826, 0.4835906970652839, 0.4881328848995447, 0.49108874994886303, 0.49205732309554323, 0.4918174541861535, 0.49104602501641953, 0.49033495002806987, 0.49255438103140303, 0.4982302563540638, 0.4919847023325378, 0.49138268849817107, 0.49216471663752714, 0.49367968532436873, 0.49558690171904884, 0.4952242601993453, 0.49709259551176815, 0.4969043181087201, 0.49722348299821856, 0.49599951407363857, 0.49572421827303714, 0.49551046935516674, 0.4969339282495756, 0.49522481850002315, 0.4956301125397299]\n",
|
82 |
+
"\n",
|
83 |
+
"0.4982302563540638, index: 34\n",
|
84 |
+
"\n",
|
85 |
+
"group #4\n",
|
86 |
+
"\n",
|
87 |
+
"[0.16705442847351432, 0.2811237847091236, 0.3227277423619332, 0.3459164670019608, 0.3433205542817934, 0.38953865811323535, 0.40093825754134493, 0.4042482476980622, 0.4179255247142833, 0.42026119275049384, 0.415263850960453, 0.4326573070148512, 0.4284856196846552, 0.455811861263988, 0.44742754829379755, 0.4428520431746461, 0.4288860834282809, 0.43801462440444205, 0.441347802107846, 0.4560878428908129, 0.47952984096244766, 0.4859939647185739, 0.48291741623601653, 0.4863560035613435, 0.4879069301596515, 0.49283878286572264, 0.4925634321692941, 0.49296767067476266, 0.4925321693215088, 0.4930295366233496, 0.4927986378984127, 0.49612537918838245, 0.4992350455119594, 0.4951830005033058, 0.49014993853897326, 0.4924448141210762, 0.4945801109607605, 0.4971188401719394, 0.49753234729288465, 0.49315691206981155, 0.4963229926370793, 0.49660539254449804, 0.49752930191373473, 0.4983978705842285, 0.498218560630721, 0.49778016282127696, 0.4980937334749714, 0.4982398417549309, 0.49825272820647715, 0.4978916971990578]\n",
|
88 |
+
"\n",
|
89 |
+
"0.4992350455119594, index: 32\n",
|
90 |
+
"\n",
|
91 |
+
"group #5\n",
|
92 |
+
"[0.20795198881124255, 0.2924131615408049, 0.31194815399388126, 0.357671229080611, 0.3907590012977773, 0.39197022751675975, 0.39688932315376796, 0.41098642756821824, 0.4280303875603716, 0.4251116328825386, 0.41492397254078656, 0.44119503399957305, 0.42866565608661766, 0.42155615910506705, 0.4352771610735857, 0.4355309812927433, 0.4575302940022513, 0.4621060999031488, 0.4615244295921646, 0.4347042141353311, 0.4843673460502776, 0.49216570578173724, 0.49284316077316226, 0.4976730562122618, 0.4981241668777771, 0.4985906269863735, 0.5023674118168958, 0.5039947051779108, 0.5025596400291938, 0.501332454384853, 0.5017141509761979, 0.5033696471830942, 0.5035807094153067, 0.5044712423289812, 0.49912591150498187, 0.5036493639939076, 0.5073756144905568, 0.5066738446153692, 0.5041024684427422, 0.5061074251973712, 0.5079663458037375, 0.5080434717076571, 0.5071731389137064, 0.5066158069067092, 0.5059333249321385, 0.5078252460128987, 0.5081895157894929, 0.5079278975582764, 0.5073543066159428, 0.5078677916025073]\n",
|
93 |
+
"\n",
|
94 |
+
"0.5081895157894929, index: 46\n",
|
95 |
+
"\n"
|
96 |
+
]
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"cell_type": "code",
|
100 |
+
"execution_count": 7,
|
101 |
+
"metadata": {},
|
102 |
+
"outputs": [
|
103 |
+
{
|
104 |
+
"name": "stdout",
|
105 |
+
"output_type": "stream",
|
106 |
+
"text": [
|
107 |
+
"A0006.hea\n"
|
108 |
+
]
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"name": "stderr",
|
112 |
+
"output_type": "stream",
|
113 |
+
"text": [
|
114 |
+
"100%|ββββββββββ| 17651/17651 [00:02<00:00, 8256.67it/s]\n"
|
115 |
+
]
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"name": "stdout",
|
119 |
+
"output_type": "stream",
|
120 |
+
"text": [
|
121 |
+
"164889003 1051.0\n",
|
122 |
+
"164890007 1675.0\n",
|
123 |
+
"6374002 104.0\n",
|
124 |
+
"426627000 59.0\n",
|
125 |
+
"733534002 299.0\n",
|
126 |
+
"713427006 963.0\n",
|
127 |
+
"270492004 707.0\n",
|
128 |
+
"713426002 371.0\n",
|
129 |
+
"39732003 1526.0\n",
|
130 |
+
"445118002 437.0\n",
|
131 |
+
"164947007 77.0\n",
|
132 |
+
"251146004 319.0\n",
|
133 |
+
"111975006 381.0\n",
|
134 |
+
"698252002 354.0\n",
|
135 |
+
"426783006 5794.0\n",
|
136 |
+
"284470004 652.0\n",
|
137 |
+
"10370003 296.0\n",
|
138 |
+
"365413008 123.0\n",
|
139 |
+
"427172004 387.0\n",
|
140 |
+
"164917005 415.0\n",
|
141 |
+
"47665007 256.0\n",
|
142 |
+
"427393009 758.0\n",
|
143 |
+
"426177001 3784.0\n",
|
144 |
+
"427084000 1932.0\n",
|
145 |
+
"164934002 2344.0\n",
|
146 |
+
"59931005 797.0\n",
|
147 |
+
"dtype: float64\n"
|
148 |
+
]
|
149 |
+
},
|
150 |
+
{
|
151 |
+
"data": {
|
152 |
+
"text/plain": [
|
153 |
+
"<All keys matched successfully>"
|
154 |
+
]
|
155 |
+
},
|
156 |
+
"execution_count": 7,
|
157 |
+
"metadata": {},
|
158 |
+
"output_type": "execute_result"
|
159 |
+
}
|
160 |
+
],
|
161 |
+
"source": [
|
162 |
+
"import torch\n",
|
163 |
+
"import numpy as np\n",
|
164 |
+
"from tqdm import tqdm\n",
|
165 |
+
"from sklearn.metrics import average_precision_score, roc_auc_score, f1_score\n",
|
166 |
+
"import pandas as pd\n",
|
167 |
+
"from dataset import dataset\n",
|
168 |
+
"from torch.utils.data import DataLoader\n",
|
169 |
+
"from model import NN\n",
|
170 |
+
"\n",
|
171 |
+
"\n",
|
172 |
+
"DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'\n",
|
173 |
+
"\n",
|
174 |
+
"address = \"./model_output/model_group1/PROGRESS.pickle\"\n",
|
175 |
+
"\n",
|
176 |
+
"with open(address, 'rb') as file:\n",
|
177 |
+
" data = pickle.load(file)\n",
|
178 |
+
"\n",
|
179 |
+
"new_state_dict = data[46]['model']\n",
|
180 |
+
"\n",
|
181 |
+
"def collate(batch):\n",
|
182 |
+
"\n",
|
183 |
+
" ch = batch[0][0].shape[0]\n",
|
184 |
+
" maxL = 8192\n",
|
185 |
+
" X = np.zeros((len(batch), ch, maxL))\n",
|
186 |
+
" \n",
|
187 |
+
" for i in range(len(batch)):\n",
|
188 |
+
" X[i, :, -batch[i][0].shape[-1]:] = batch[i][0]\n",
|
189 |
+
" \n",
|
190 |
+
" t = np.array([b[1] for b in batch])\n",
|
191 |
+
" l = np.concatenate([b[2].reshape(1,12) for b in batch], axis=0)\n",
|
192 |
+
"\n",
|
193 |
+
" X = torch.from_numpy(X)\n",
|
194 |
+
" t = torch.from_numpy(t)\n",
|
195 |
+
" l = torch.from_numpy(l)\n",
|
196 |
+
" return X, t, l\n",
|
197 |
+
"\n",
|
198 |
+
"def valid_part(model, dataset):\n",
|
199 |
+
" targets = []\n",
|
200 |
+
" outputs = []\n",
|
201 |
+
" model.eval()\n",
|
202 |
+
" with torch.no_grad():\n",
|
203 |
+
" for i, (x, t, l) in enumerate(tqdm(dataset)):\n",
|
204 |
+
" x = x.unsqueeze(2).float().to(DEVICE)\n",
|
205 |
+
" t = t.to(DEVICE)\n",
|
206 |
+
" l = l.float().to(DEVICE)\n",
|
207 |
+
"\n",
|
208 |
+
" y,p = model(x, l)\n",
|
209 |
+
" #p = torch.sigmoid(y)\n",
|
210 |
+
"\n",
|
211 |
+
" targets.append(t.data.cpu().numpy())\n",
|
212 |
+
" outputs.append(p.data.cpu().numpy())\n",
|
213 |
+
" \n",
|
214 |
+
" targets = np.concatenate(targets, axis=0)\n",
|
215 |
+
" outputs = np.concatenate(outputs, axis=0)\n",
|
216 |
+
" auprc = average_precision_score(y_true=targets, y_score=outputs)\n",
|
217 |
+
" auroc = roc_auc_score(targets, outputs)\n",
|
218 |
+
"\n",
|
219 |
+
" outputs_f1 = np.array([[(1 if prob > 0 else 0) for prob in probs] for probs in np.array(outputs)])\n",
|
220 |
+
" f1 = f1_score(targets, outputs_f1, average='weighted')\n",
|
221 |
+
" print(\"This is the auroc of testing:\", auroc)\n",
|
222 |
+
" print(\"This is the f1 of testing:\", f1)\n",
|
223 |
+
"\n",
|
224 |
+
" return auprc, targets, outputs, auroc, f1\n",
|
225 |
+
"\n",
|
226 |
+
"file_address = \"../../physionet.org/files/challenge-2021/1.0.3/training/python-classifier-2021-main/training_data/collection_of_all_datasets/\"\n",
|
227 |
+
"data_directory = \"./csv-file/training_validation_testing/group1\"\n",
|
228 |
+
"\n",
|
229 |
+
"############ testing area #########################\n",
|
230 |
+
"the_testing_address = data_directory + \"/testing_group\"+data_directory[-1]+\".csv\"\n",
|
231 |
+
"df = pd.read_csv(the_testing_address)\n",
|
232 |
+
"print(df['Name'][0])\n",
|
233 |
+
"\n",
|
234 |
+
"testing_header_files=[]\n",
|
235 |
+
"\n",
|
236 |
+
"for i in range(len(df['Name'])):\n",
|
237 |
+
" each_header_file = file_address + df['Name'][i]\n",
|
238 |
+
" testing_header_files.append(each_header_file)\n",
|
239 |
+
" \n",
|
240 |
+
"test_dataset = dataset(testing_header_files)\n",
|
241 |
+
"print(test_dataset.summary('pandas'))\n",
|
242 |
+
" \n",
|
243 |
+
"\n",
|
244 |
+
"test_dataset.num_leads = 12\n",
|
245 |
+
"test_dataset.sample = True\n",
|
246 |
+
"###################################################\n",
|
247 |
+
"valid = DataLoader(dataset=test_dataset,\n",
|
248 |
+
" batch_size=128,\n",
|
249 |
+
" shuffle=False,\n",
|
250 |
+
" num_workers=8,\n",
|
251 |
+
" collate_fn=collate,\n",
|
252 |
+
" pin_memory=True,\n",
|
253 |
+
" drop_last=False)\n",
|
254 |
+
"\n",
|
255 |
+
"model = NN(nOUT=26).to(DEVICE)\n",
|
256 |
+
"model.load_state_dict(new_state_dict)\n",
|
257 |
+
"\n",
|
258 |
+
"auprc, targets, outputs, auroc, f1 = valid_part(model, valid)\n",
|
259 |
+
"print(\"============================================\")\n",
|
260 |
+
"print(\"This is the auprc:\", auprc)\n",
|
261 |
+
"print(\"This is the auroc:\", auroc)\n",
|
262 |
+
"print(\"This is the f1: \", f1)\n"
|
263 |
+
]
|
264 |
+
}
|
265 |
+
],
|
266 |
+
"metadata": {
|
267 |
+
"kernelspec": {
|
268 |
+
"display_name": "testing",
|
269 |
+
"language": "python",
|
270 |
+
"name": "python3"
|
271 |
+
},
|
272 |
+
"language_info": {
|
273 |
+
"codemirror_mode": {
|
274 |
+
"name": "ipython",
|
275 |
+
"version": 3
|
276 |
+
},
|
277 |
+
"file_extension": ".py",
|
278 |
+
"mimetype": "text/x-python",
|
279 |
+
"name": "python",
|
280 |
+
"nbconvert_exporter": "python",
|
281 |
+
"pygments_lexer": "ipython3",
|
282 |
+
"version": "3.8.18"
|
283 |
+
}
|
284 |
+
},
|
285 |
+
"nbformat": 4,
|
286 |
+
"nbformat_minor": 2
|
287 |
+
}
|