Christina Theodoris commited on
Commit
875ef33
·
1 Parent(s): 0a8c47b

Move example input files to dataset repository to include example datasets for fine-tuning

Browse files
benchmarking/castle_cell_type_annotation.r DELETED
@@ -1,80 +0,0 @@
1
- # Usage: Rscript castle_cell_type_annotation.r organ
2
-
3
- # parse ordered arguments
4
- args <- commandArgs(trailingOnly=TRUE)
5
- organ <- args[1]
6
-
7
- suppressPackageStartupMessages(library(scater))
8
- suppressPackageStartupMessages(library(xgboost))
9
- suppressPackageStartupMessages(library(igraph))
10
- BREAKS=c(-1, 0, 1, 6, Inf)
11
- nFeatures = 100
12
-
13
- print(paste("Training ", organ, sep=""))
14
-
15
- # import training and test data
16
- rootdir="/path/to/data/"
17
- train_counts <- t(as.matrix(read.csv(file = paste(rootdir, organ, "_filtered_data_train.csv", sep=""), row.names = 1)))
18
- test_counts <- t(as.matrix(read.csv(file = paste(rootdir, organ, "_filtered_data_test.csv", sep=""), row.names = 1)))
19
- train_celltype <- as.matrix(read.csv(file = paste(rootdir, organ, "_filtered_celltype_train.csv", sep="")))
20
- test_celltype <- as.matrix(read.csv(file = paste(rootdir, organ, "_filtered_celltype_test.csv", sep="")))
21
-
22
- # select features
23
- sourceCellTypes = as.factor(train_celltype[,"Cell_type"])
24
- ds = rbind(train_counts,test_counts)
25
- ds[is.na(ds)] <- 0
26
- isSource = c(rep(TRUE,nrow(train_counts)), rep(FALSE,nrow(test_counts)))
27
- topFeaturesAvg = colnames(ds[isSource,])[order(apply(ds[isSource,], 2, mean), decreasing = T)]
28
- topFeaturesMi = names(sort(apply(ds[isSource,],2,function(x) { compare(cut(x,breaks=BREAKS),sourceCellTypes,method = "nmi") }), decreasing = T))
29
- selectedFeatures = union(head(topFeaturesAvg, nFeatures) , head(topFeaturesMi, nFeatures) )
30
- tmp = cor(ds[isSource,selectedFeatures], method = "pearson")
31
- tmp[!lower.tri(tmp)] = 0
32
- selectedFeatures = selectedFeatures[apply(tmp,2,function(x) any(x < 0.9))]
33
- remove(tmp)
34
-
35
- # bin expression values and expand features by bins
36
- dsBins = apply(ds[, selectedFeatures], 2, cut, breaks= BREAKS)
37
- nUniq = apply(dsBins, 2, function(x) { length(unique(x)) })
38
- ds = model.matrix(~ . , as.data.frame(dsBins[,nUniq>1]))
39
- remove(dsBins, nUniq)
40
-
41
- # train model
42
- train = runif(nrow(ds[isSource,]))<0.8
43
- # slightly different setup for multiclass and binary classification
44
- if (length(unique(sourceCellTypes)) > 2) {
45
- xg=xgboost(data=ds[isSource,][train, ] ,
46
- label=as.numeric(sourceCellTypes[train])-1,
47
- objective="multi:softmax", num_class=length(unique(sourceCellTypes)),
48
- eta=0.7 , nthread=5, nround=20, verbose=0,
49
- gamma=0.001, max_depth=5, min_child_weight=10)
50
- } else {
51
- xg=xgboost(data=ds[isSource,][train, ] ,
52
- label=as.numeric(sourceCellTypes[train])-1,
53
- eta=0.7 , nthread=5, nround=20, verbose=0,
54
- gamma=0.001, max_depth=5, min_child_weight=10)
55
- }
56
-
57
- # validate model
58
- predictedClasses = predict(xg, ds[!isSource, ])
59
- testCellTypes = as.factor(test_celltype[,"Cell_type"])
60
- trueClasses <- as.numeric(testCellTypes)-1
61
-
62
- cm <- as.matrix(table(Actual = trueClasses, Predicted = predictedClasses))
63
- n <- sum(cm)
64
- nc = nrow(cm) # number of classes
65
- diag = diag(cm) # number of correctly classified instances per class
66
- rowsums = apply(cm, 1, sum) # number of instances per class
67
- colsums = apply(cm, 2, sum) # number of predictions per class
68
- p = rowsums / n # distribution of instances over the actual classes
69
- q = colsums / n # distribution of instances over the predicted classes
70
- accuracy = sum(diag) / n
71
- precision = diag / colsums
72
- recall = diag / rowsums
73
- f1 = 2 * precision * recall / (precision + recall)
74
- macroF1 = mean(f1)
75
-
76
- print(paste(organ, " accuracy: ", accuracy, sep=""))
77
- print(paste(organ, " macroF1: ", macroF1, sep=""))
78
-
79
- results_df = data.frame(Accuracy=c(accuracy),macroF1=c(macroF1))
80
- write.csv(results_df,paste(rootdir, organ, "_castle_results_test.csv", sep=""), row.names = FALSE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
benchmarking/prepare_datasplits_for_cell_type_annotation.ipynb DELETED
@@ -1,288 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "id": "25107132",
6
- "metadata": {},
7
- "source": [
8
- "### Preparing train and test data splits for cell type annotation application"
9
- ]
10
- },
11
- {
12
- "cell_type": "code",
13
- "execution_count": 3,
14
- "id": "83d8d249-affe-45dd-915e-992b4b35b31a",
15
- "metadata": {},
16
- "outputs": [],
17
- "source": [
18
- "import os\n",
19
- "import pandas as pd\n",
20
- "from sklearn.model_selection import train_test_split\n",
21
- "from tqdm.notebook import tqdm\n",
22
- "from collections import Counter\n",
23
- "import pickle"
24
- ]
25
- },
26
- {
27
- "cell_type": "code",
28
- "execution_count": 4,
29
- "id": "e3e6a2bf-44c8-4164-9ecd-1686230ea8be",
30
- "metadata": {},
31
- "outputs": [
32
- {
33
- "data": {
34
- "text/plain": [
35
- "['pancreas',\n",
36
- " 'liver',\n",
37
- " 'blood',\n",
38
- " 'lung',\n",
39
- " 'spleen',\n",
40
- " 'placenta',\n",
41
- " 'colorectum',\n",
42
- " 'kidney',\n",
43
- " 'brain']"
44
- ]
45
- },
46
- "execution_count": 4,
47
- "metadata": {},
48
- "output_type": "execute_result"
49
- }
50
- ],
51
- "source": [
52
- "rootdir = \"/path/to/data/\"\n",
53
- "\n",
54
- "# collect panel of tissues to test\n",
55
- "dir_list = []\n",
56
- "for dir_i in os.listdir(rootdir):\n",
57
- " if (\"results\" not in dir_i) & (os.path.isdir(os.path.join(rootdir, dir_i))):\n",
58
- " dir_list += [dir_i]\n",
59
- "dir_list"
60
- ]
61
- },
62
- {
63
- "cell_type": "code",
64
- "execution_count": 5,
65
- "id": "0b205eec-a518-472a-ab90-dd63ef9803cd",
66
- "metadata": {},
67
- "outputs": [
68
- {
69
- "data": {
70
- "text/html": [
71
- "<div>\n",
72
- "<style scoped>\n",
73
- " .dataframe tbody tr th:only-of-type {\n",
74
- " vertical-align: middle;\n",
75
- " }\n",
76
- "\n",
77
- " .dataframe tbody tr th {\n",
78
- " vertical-align: top;\n",
79
- " }\n",
80
- "\n",
81
- " .dataframe thead th {\n",
82
- " text-align: right;\n",
83
- " }\n",
84
- "</style>\n",
85
- "<table border=\"1\" class=\"dataframe\">\n",
86
- " <thead>\n",
87
- " <tr style=\"text-align: right;\">\n",
88
- " <th></th>\n",
89
- " <th>filter_pass</th>\n",
90
- " <th>original_cell_id</th>\n",
91
- " </tr>\n",
92
- " </thead>\n",
93
- " <tbody>\n",
94
- " <tr>\n",
95
- " <th>0</th>\n",
96
- " <td>0</td>\n",
97
- " <td>C_1</td>\n",
98
- " </tr>\n",
99
- " <tr>\n",
100
- " <th>1</th>\n",
101
- " <td>1</td>\n",
102
- " <td>C_2</td>\n",
103
- " </tr>\n",
104
- " <tr>\n",
105
- " <th>2</th>\n",
106
- " <td>0</td>\n",
107
- " <td>C_3</td>\n",
108
- " </tr>\n",
109
- " <tr>\n",
110
- " <th>3</th>\n",
111
- " <td>1</td>\n",
112
- " <td>C_4</td>\n",
113
- " </tr>\n",
114
- " <tr>\n",
115
- " <th>4</th>\n",
116
- " <td>0</td>\n",
117
- " <td>C_5</td>\n",
118
- " </tr>\n",
119
- " <tr>\n",
120
- " <th>...</th>\n",
121
- " <td>...</td>\n",
122
- " <td>...</td>\n",
123
- " </tr>\n",
124
- " <tr>\n",
125
- " <th>9590</th>\n",
126
- " <td>1</td>\n",
127
- " <td>C_9591</td>\n",
128
- " </tr>\n",
129
- " <tr>\n",
130
- " <th>9591</th>\n",
131
- " <td>1</td>\n",
132
- " <td>C_9592</td>\n",
133
- " </tr>\n",
134
- " <tr>\n",
135
- " <th>9592</th>\n",
136
- " <td>1</td>\n",
137
- " <td>C_9593</td>\n",
138
- " </tr>\n",
139
- " <tr>\n",
140
- " <th>9593</th>\n",
141
- " <td>1</td>\n",
142
- " <td>C_9594</td>\n",
143
- " </tr>\n",
144
- " <tr>\n",
145
- " <th>9594</th>\n",
146
- " <td>1</td>\n",
147
- " <td>C_9595</td>\n",
148
- " </tr>\n",
149
- " </tbody>\n",
150
- "</table>\n",
151
- "<p>9595 rows × 2 columns</p>\n",
152
- "</div>"
153
- ],
154
- "text/plain": [
155
- " filter_pass original_cell_id\n",
156
- "0 0 C_1\n",
157
- "1 1 C_2\n",
158
- "2 0 C_3\n",
159
- "3 1 C_4\n",
160
- "4 0 C_5\n",
161
- "... ... ...\n",
162
- "9590 1 C_9591\n",
163
- "9591 1 C_9592\n",
164
- "9592 1 C_9593\n",
165
- "9593 1 C_9594\n",
166
- "9594 1 C_9595\n",
167
- "\n",
168
- "[9595 rows x 2 columns]"
169
- ]
170
- },
171
- "execution_count": 5,
172
- "metadata": {},
173
- "output_type": "execute_result"
174
- }
175
- ],
176
- "source": [
177
- "# dictionary of cell barcodes that passed QC filtering applied by Geneformer \n",
178
- "# to ensure same cells were used for comparison\n",
179
- "with open(f\"{rootdir}deepsort_filter_dict.pickle\", \"rb\") as fp:\n",
180
- " filter_dict = pickle.load(fp)\n",
181
- "\n",
182
- "# for example:\n",
183
- "filter_dict[\"human_Placenta9595_data\"]"
184
- ]
185
- },
186
- {
187
- "cell_type": "code",
188
- "execution_count": null,
189
- "id": "207e3571-0236-4493-83b3-a89b67b16cb2",
190
- "metadata": {
191
- "tags": []
192
- },
193
- "outputs": [],
194
- "source": [
195
- "for dir_name in tqdm(dir_list):\n",
196
- "\n",
197
- " df = pd.DataFrame()\n",
198
- " ct_df = pd.DataFrame(columns=[\"Cell\",\"Cell_type\"])\n",
199
- " \n",
200
- " subrootdir = f\"{rootdir}{dir_name}/\"\n",
201
- " for subdir, dirs, files in os.walk(subrootdir):\n",
202
- " for i in range(len(files)):\n",
203
- " file = files[i]\n",
204
- " if file.endswith(\"_data.csv\"):\n",
205
- " file_prefix = file.replace(\"_data.csv\",\"\")\n",
206
- " sample_prefix = file.replace(\".csv\",\"\")\n",
207
- " filter_df = filter_dict[sample_prefix]\n",
208
- " sample_to_analyze = list(filter_df[filter_df[\"filter_pass\"]==1][\"original_cell_id\"])\n",
209
- " \n",
210
- " # collect data for each tissue\n",
211
- " df_i = pd.read_csv(f\"{subrootdir}{file}\", index_col=0)\n",
212
- " df_i = df_i[sample_to_analyze]\n",
213
- " df_i.columns = [f\"{i}_{cell_id}\" for cell_id in df_i.columns]\n",
214
- " df = pd.concat([df,df_i],axis=1)\n",
215
- " \n",
216
- " # collect cell type metadata\n",
217
- " ct_df_i = pd.read_csv(f\"{subrootdir}{file_prefix}_celltype.csv\", index_col=0)\n",
218
- " ct_df_i.columns = [\"Cell\",\"Cell_type\"]\n",
219
- " ct_df_i[\"Cell\"] = [f\"{i}_{cell_id}\" for cell_id in ct_df_i[\"Cell\"]]\n",
220
- " ct_df = pd.concat([ct_df,ct_df_i],axis=0)\n",
221
- " \n",
222
- " # per published scDeepsort method, filter data for cell types >0.5% of data\n",
223
- " ct_counts = Counter(ct_df[\"Cell_type\"])\n",
224
- " total_count = sum(ct_counts.values())\n",
225
- " nonrare_cell_types = [cell_type for cell_type,count in ct_counts.items() if count>(total_count*0.005)]\n",
226
- " nonrare_cells = list(ct_df[ct_df[\"Cell_type\"].isin(nonrare_cell_types)][\"Cell\"])\n",
227
- " df = df[df.columns.intersection(nonrare_cells)]\n",
228
- "\n",
229
- " # split into 80/20 train/test data\n",
230
- " train, test = train_test_split(df.T, test_size=0.2)\n",
231
- " train = train.T\n",
232
- " test = test.T \n",
233
- " \n",
234
- " # save filtered train/test data\n",
235
- " train.to_csv(f\"{subrootdir}{dir_name}_filtered_data_train.csv\")\n",
236
- " test.to_csv(f\"{subrootdir}{dir_name}_filtered_data_test.csv\")\n",
237
- "\n",
238
- " # split metadata into train/test data\n",
239
- " ct_df_train = ct_df[ct_df[\"Cell\"].isin(list(train.columns))]\n",
240
- " ct_df_test = ct_df[ct_df[\"Cell\"].isin(list(test.columns))]\n",
241
- " train_order_dict = dict(zip(train.columns,[i for i in range(len(train.columns))]))\n",
242
- " test_order_dict = dict(zip(test.columns,[i for i in range(len(test.columns))]))\n",
243
- " ct_df_train[\"order\"] = [train_order_dict[cell_id] for cell_id in ct_df_train[\"Cell\"]]\n",
244
- " ct_df_test[\"order\"] = [test_order_dict[cell_id] for cell_id in ct_df_test[\"Cell\"]]\n",
245
- " ct_df_train = ct_df_train.sort_values(\"order\")\n",
246
- " ct_df_test = ct_df_test.sort_values(\"order\")\n",
247
- " ct_df_train = ct_df_train.drop(\"order\",axis=1)\n",
248
- " ct_df_test = ct_df_test.drop(\"order\",axis=1)\n",
249
- " assert list(ct_df_train[\"Cell\"]) == list(train.columns)\n",
250
- " assert list(ct_df_test[\"Cell\"]) == list(test.columns)\n",
251
- " train_labels = list(Counter(ct_df_train[\"Cell_type\"]).keys())\n",
252
- " test_labels = list(Counter(ct_df_test[\"Cell_type\"]).keys())\n",
253
- " assert set(train_labels) == set(test_labels)\n",
254
- " \n",
255
- " # save train/test cell type annotations\n",
256
- " ct_df_train.to_csv(f\"{subrootdir}{dir_name}_filtered_celltype_train.csv\")\n",
257
- " ct_df_test.to_csv(f\"{subrootdir}{dir_name}_filtered_celltype_test.csv\")\n",
258
- " "
259
- ]
260
- }
261
- ],
262
- "metadata": {
263
- "kernelspec": {
264
- "display_name": "Python 3.8.6 64-bit ('3.8.6')",
265
- "language": "python",
266
- "name": "python3"
267
- },
268
- "language_info": {
269
- "codemirror_mode": {
270
- "name": "ipython",
271
- "version": 3
272
- },
273
- "file_extension": ".py",
274
- "mimetype": "text/x-python",
275
- "name": "python",
276
- "nbconvert_exporter": "python",
277
- "pygments_lexer": "ipython3",
278
- "version": "3.8.6"
279
- },
280
- "vscode": {
281
- "interpreter": {
282
- "hash": "eba1599a1f7e611c14c87ccff6793920aa63510b01fc0e229d6dd014149b8829"
283
- }
284
- }
285
- },
286
- "nbformat": 4,
287
- "nbformat_minor": 5
288
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
benchmarking/randomForest_token_classifier_dosageTF_10k.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
benchmarking/scDeepsort_train_predict.ipynb DELETED
@@ -1,166 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 3,
6
- "id": "83d8d249-affe-45dd-915e-992b4b35b31a",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "import os\n",
11
- "import numpy as np\n",
12
- "import pandas as pd\n",
13
- "import deepsort\n",
14
- "from sklearn.metrics import accuracy_score, f1_score\n",
15
- "from tqdm.notebook import tqdm\n",
16
- "import pickle"
17
- ]
18
- },
19
- {
20
- "cell_type": "code",
21
- "execution_count": 4,
22
- "id": "25de46ec-8a41-484d-8e14-d2b19768fc2c",
23
- "metadata": {},
24
- "outputs": [],
25
- "source": [
26
- "def compute_metrics(labels, preds):\n",
27
- "\n",
28
- " # calculate accuracy and macro f1 using sklearn's function\n",
29
- " acc = accuracy_score(labels, preds)\n",
30
- " macro_f1 = f1_score(labels, preds, average='macro')\n",
31
- " return {\n",
32
- " 'accuracy': acc,\n",
33
- " 'macro_f1': macro_f1\n",
34
- " }"
35
- ]
36
- },
37
- {
38
- "cell_type": "code",
39
- "execution_count": 5,
40
- "id": "a4029b2b-afca-4300-82a2-082fec59f191",
41
- "metadata": {},
42
- "outputs": [
43
- {
44
- "data": {
45
- "text/plain": [
46
- "['pancreas',\n",
47
- " 'liver',\n",
48
- " 'blood',\n",
49
- " 'lung',\n",
50
- " 'spleen',\n",
51
- " 'placenta',\n",
52
- " 'colorectum',\n",
53
- " 'kidney',\n",
54
- " 'brain']"
55
- ]
56
- },
57
- "execution_count": 5,
58
- "metadata": {},
59
- "output_type": "execute_result"
60
- }
61
- ],
62
- "source": [
63
- "rootdir = \"/path/to/data/\"\n",
64
- "\n",
65
- "dir_list = []\n",
66
- "for dir_i in os.listdir(rootdir):\n",
67
- " if (\"results\" not in dir_i) & (os.path.isdir(os.path.join(rootdir, dir_i))):\n",
68
- " dir_list += [dir_i]\n",
69
- "dir_list"
70
- ]
71
- },
72
- {
73
- "cell_type": "code",
74
- "execution_count": null,
75
- "id": "ddcdc5cd-871e-4fd2-8457-18d3049fa76c",
76
- "metadata": {
77
- "tags": []
78
- },
79
- "outputs": [],
80
- "source": [
81
- "output_dir = \"results_EDefault_filtered\"\n",
82
- "n_epochs = \"Default\" # scDeepsort default epochs = 300\n",
83
- "\n",
84
- "results_dict = dict()\n",
85
- "for dir_name in tqdm(dir_list):\n",
86
- " print(f\"TRAINING: {dir_name}\")\n",
87
- " subrootdir = f\"{rootdir}{dir_name}/\"\n",
88
- " train_files = [(f\"{subrootdir}{dir_name}_filtered_data_train.csv\",f\"{subrootdir}{dir_name}_filtered_celltype_train.csv\")]\n",
89
- " test_file = f\"{subrootdir}{dir_name}_filtered_data_test.csv\"\n",
90
- " label_file = f\"{subrootdir}{dir_name}_filtered_celltype_test.csv\"\n",
91
- " \n",
92
- " # define the model\n",
93
- " model = deepsort.DeepSortClassifier(species='human',\n",
94
- " tissue=dir_name,\n",
95
- " gpu_id=0,\n",
96
- " random_seed=1,\n",
97
- " validation_fraction=0) # use all training data (already held out 20% in test data file)\n",
98
- "\n",
99
- " # fit the model\n",
100
- " model.fit(train_files, save_path=f\"{subrootdir}{output_dir}\")\n",
101
- " \n",
102
- " # use the saved model to predict cell types in test data\n",
103
- " model.predict(input_file=test_file,\n",
104
- " model_path=f\"{subrootdir}{output_dir}\",\n",
105
- " save_path=f\"{subrootdir}{output_dir}\",\n",
106
- " unsure_rate=0,\n",
107
- " file_type='csv')\n",
108
- " labels_df = pd.read_csv(label_file)\n",
109
- " preds_df = pd.read_csv(f\"{subrootdir}{output_dir}/human_{dir_name}_{dir_name}_filtered_data_test.csv\")\n",
110
- " label_cell_ids = labels_df[\"Cell\"]\n",
111
- " pred_cell_ids = preds_df[\"index\"]\n",
112
- " assert list(label_cell_ids) == list(pred_cell_ids)\n",
113
- " labels = list(labels_df[\"Cell_type\"])\n",
114
- " if isinstance(preds_df[\"cell_subtype\"][0],float):\n",
115
- " if np.isnan(preds_df[\"cell_subtype\"][0]):\n",
116
- " preds = list(preds_df[\"cell_type\"])\n",
117
- " results = compute_metrics(labels, preds)\n",
118
- " else:\n",
119
- " preds1 = list(preds_df[\"cell_type\"])\n",
120
- " preds2 = list(preds_df[\"cell_subtype\"])\n",
121
- " results1 = compute_metrics(labels, preds1)\n",
122
- " results2 = compute_metrics(labels, preds2)\n",
123
- " if results2[\"accuracy\"] > results1[\"accuracy\"]:\n",
124
- " results = results2\n",
125
- " else:\n",
126
- " results = results1\n",
127
- " \n",
128
- " print(f\"{dir_name}: {results}\")\n",
129
- " results_dict[dir_name] = results\n",
130
- " with open(f\"{subrootdir}deepsort_E{n_epochs}_filtered_pred_{dir_name}.pickle\", \"wb\") as output_file:\n",
131
- " pickle.dump(results, output_file)\n",
132
- "\n",
133
- "# save results\n",
134
- "with open(f\"{rootdir}deepsort_E{n_epochs}_filtered_pred_dict.pickle\", \"wb\") as output_file:\n",
135
- " pickle.dump(results_dict, output_file)\n",
136
- " "
137
- ]
138
- }
139
- ],
140
- "metadata": {
141
- "kernelspec": {
142
- "display_name": "Python 3.8.6 64-bit ('3.8.6')",
143
- "language": "python",
144
- "name": "python3"
145
- },
146
- "language_info": {
147
- "codemirror_mode": {
148
- "name": "ipython",
149
- "version": 3
150
- },
151
- "file_extension": ".py",
152
- "mimetype": "text/x-python",
153
- "name": "python",
154
- "nbconvert_exporter": "python",
155
- "pygments_lexer": "ipython3",
156
- "version": "3.8.6"
157
- },
158
- "vscode": {
159
- "interpreter": {
160
- "hash": "eba1599a1f7e611c14c87ccff6793920aa63510b01fc0e229d6dd014149b8829"
161
- }
162
- }
163
- },
164
- "nbformat": 4,
165
- "nbformat_minor": 5
166
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/example_input_files/bivalent_promoters/bivalent_gene_labels.txt DELETED
@@ -1,107 +0,0 @@
1
- ENSG00000005073
2
- ENSG00000007372
3
- ENSG00000007372
4
- ENSG00000043355
5
- ENSG00000068120
6
- ENSG00000075891
7
- ENSG00000078399
8
- ENSG00000105991
9
- ENSG00000105996
10
- ENSG00000105997
11
- ENSG00000106004
12
- ENSG00000106006
13
- ENSG00000106031
14
- ENSG00000106038
15
- ENSG00000107807
16
- ENSG00000107821
17
- ENSG00000107831
18
- ENSG00000107859
19
- ENSG00000107862
20
- ENSG00000108511
21
- ENSG00000108786
22
- ENSG00000108797
23
- ENSG00000110693
24
- ENSG00000110693
25
- ENSG00000113430
26
- ENSG00000115844
27
- ENSG00000117707
28
- ENSG00000117707
29
- ENSG00000119915
30
- ENSG00000120068
31
- ENSG00000120075
32
- ENSG00000120093
33
- ENSG00000120093
34
- ENSG00000120094
35
- ENSG00000122592
36
- ENSG00000125285
37
- ENSG00000125798
38
- ENSG00000125813
39
- ENSG00000125813
40
- ENSG00000125816
41
- ENSG00000125820
42
- ENSG00000128573
43
- ENSG00000128645
44
- ENSG00000128652
45
- ENSG00000128709
46
- ENSG00000128710
47
- ENSG00000128713
48
- ENSG00000128714
49
- ENSG00000129514
50
- ENSG00000131196
51
- ENSG00000131196
52
- ENSG00000136327
53
- ENSG00000136944
54
- ENSG00000138083
55
- ENSG00000139800
56
- ENSG00000143013
57
- ENSG00000143632
58
- ENSG00000144355
59
- ENSG00000148680
60
- ENSG00000148826
61
- ENSG00000151615
62
- ENSG00000152192
63
- ENSG00000152977
64
- ENSG00000159184
65
- ENSG00000159387
66
- ENSG00000163412
67
- ENSG00000163421
68
- ENSG00000163623
69
- ENSG00000164330
70
- ENSG00000164438
71
- ENSG00000164690
72
- ENSG00000164778
73
- ENSG00000165588
74
- ENSG00000165588
75
- ENSG00000165588
76
- ENSG00000166407
77
- ENSG00000166407
78
- ENSG00000168505
79
- ENSG00000168875
80
- ENSG00000169946
81
- ENSG00000170166
82
- ENSG00000170178
83
- ENSG00000170549
84
- ENSG00000170561
85
- ENSG00000170577
86
- ENSG00000170689
87
- ENSG00000173917
88
- ENSG00000174279
89
- ENSG00000174963
90
- ENSG00000174963
91
- ENSG00000175879
92
- ENSG00000176842
93
- ENSG00000177508
94
- ENSG00000178573
95
- ENSG00000182568
96
- ENSG00000182742
97
- ENSG00000185551
98
- ENSG00000185551
99
- ENSG00000187140
100
- ENSG00000196092
101
- ENSG00000197576
102
- ENSG00000198807
103
- ENSG00000253293
104
- ENSG00000256463
105
- ENSG00000260027
106
- ENSG00000276644
107
- ENSG00000285708
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/example_input_files/bivalent_promoters/lys4_only_gene_labels.txt DELETED
@@ -1,80 +0,0 @@
1
- ENSG00000012048
2
- ENSG00000033627
3
- ENSG00000037042
4
- ENSG00000055950
5
- ENSG00000067596
6
- ENSG00000069248
7
- ENSG00000072682
8
- ENSG00000085274
9
- ENSG00000088035
10
- ENSG00000088930
11
- ENSG00000095539
12
- ENSG00000102471
13
- ENSG00000102967
14
- ENSG00000104313
15
- ENSG00000105146
16
- ENSG00000105379
17
- ENSG00000105982
18
- ENSG00000105983
19
- ENSG00000107816
20
- ENSG00000107819
21
- ENSG00000107829
22
- ENSG00000107833
23
- ENSG00000108784
24
- ENSG00000108799
25
- ENSG00000108828
26
- ENSG00000108830
27
- ENSG00000109911
28
- ENSG00000113522
29
- ENSG00000119487
30
- ENSG00000120049
31
- ENSG00000125347
32
- ENSG00000126581
33
- ENSG00000131374
34
- ENSG00000131437
35
- ENSG00000131462
36
- ENSG00000131467
37
- ENSG00000131469
38
- ENSG00000131470
39
- ENSG00000131475
40
- ENSG00000131477
41
- ENSG00000135272
42
- ENSG00000135776
43
- ENSG00000135801
44
- ENSG00000136158
45
- ENSG00000140262
46
- ENSG00000140450
47
- ENSG00000140563
48
- ENSG00000140829
49
- ENSG00000140830
50
- ENSG00000145494
51
- ENSG00000146909
52
- ENSG00000147905
53
- ENSG00000148688
54
- ENSG00000148840
55
- ENSG00000148950
56
- ENSG00000151332
57
- ENSG00000151338
58
- ENSG00000165637
59
- ENSG00000165644
60
- ENSG00000166135
61
- ENSG00000166136
62
- ENSG00000166167
63
- ENSG00000166169
64
- ENSG00000166189
65
- ENSG00000166197
66
- ENSG00000166377
67
- ENSG00000167081
68
- ENSG00000168118
69
- ENSG00000171421
70
- ENSG00000175832
71
- ENSG00000186480
72
- ENSG00000187098
73
- ENSG00000188554
74
- ENSG00000196628
75
- ENSG00000196628
76
- ENSG00000198728
77
- ENSG00000198728
78
- ENSG00000198863
79
- ENSG00000285283
80
- ENSG00000285708
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/example_input_files/bivalent_promoters/no_methylation_gene_labels.txt DELETED
@@ -1,42 +0,0 @@
1
- ENSG00000068079
2
- ENSG00000068383
3
- ENSG00000075290
4
- ENSG00000104313
5
- ENSG00000105370
6
- ENSG00000105374
7
- ENSG00000105383
8
- ENSG00000106536
9
- ENSG00000113520
10
- ENSG00000113525
11
- ENSG00000118557
12
- ENSG00000125257
13
- ENSG00000128573
14
- ENSG00000131471
15
- ENSG00000131480
16
- ENSG00000131482
17
- ENSG00000134532
18
- ENSG00000136319
19
- ENSG00000138792
20
- ENSG00000140262
21
- ENSG00000140718
22
- ENSG00000147488
23
- ENSG00000147488
24
- ENSG00000148677
25
- ENSG00000151322
26
- ENSG00000151322
27
- ENSG00000156113
28
- ENSG00000164399
29
- ENSG00000164400
30
- ENSG00000167749
31
- ENSG00000167754
32
- ENSG00000167755
33
- ENSG00000169035
34
- ENSG00000170927
35
- ENSG00000182177
36
- ENSG00000186153
37
- ENSG00000187098
38
- ENSG00000204764
39
- ENSG00000213022
40
- ENSG00000213822
41
- ENSG00000261701
42
- ENSG00000285708
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/example_input_files/dosage_sensitive_tfs/dosage_sens_tf_labels.csv DELETED
@@ -1,369 +0,0 @@
1
- dosage_sensitive,dosage_insensitive
2
- ENSG00000008197,ENSG00000010539
3
- ENSG00000008441,ENSG00000011590
4
- ENSG00000010818,ENSG00000063438
5
- ENSG00000011332,ENSG00000063587
6
- ENSG00000030419,ENSG00000064218
7
- ENSG00000062194,ENSG00000064489
8
- ENSG00000065970,ENSG00000067646
9
- ENSG00000067082,ENSG00000075407
10
- ENSG00000069667,ENSG00000079263
11
- ENSG00000072736,ENSG00000081386
12
- ENSG00000073282,ENSG00000083812
13
- ENSG00000073861,ENSG00000083814
14
- ENSG00000077092,ENSG00000083828
15
- ENSG00000083307,ENSG00000083838
16
- ENSG00000084676,ENSG00000083844
17
- ENSG00000085276,ENSG00000085644
18
- ENSG00000087510,ENSG00000089335
19
- ENSG00000087903,ENSG00000089775
20
- ENSG00000089225,ENSG00000102901
21
- ENSG00000091656,ENSG00000103199
22
- ENSG00000091831,ENSG00000105136
23
- ENSG00000095951,ENSG00000105610
24
- ENSG00000100644,ENSG00000105672
25
- ENSG00000100968,ENSG00000106410
26
- ENSG00000101076,ENSG00000106948
27
- ENSG00000101412,ENSG00000109705
28
- ENSG00000102870,ENSG00000115568
29
- ENSG00000102935,ENSG00000117010
30
- ENSG00000103449,ENSG00000118620
31
- ENSG00000105698,ENSG00000119574
32
- ENSG00000105866,ENSG00000120669
33
- ENSG00000106689,ENSG00000121406
34
- ENSG00000106852,ENSG00000121864
35
- ENSG00000111249,ENSG00000122085
36
- ENSG00000111783,ENSG00000124203
37
- ENSG00000112033,ENSG00000124232
38
- ENSG00000112246,ENSG00000124444
39
- ENSG00000112561,ENSG00000124613
40
- ENSG00000112837,ENSG00000125520
41
- ENSG00000115112,ENSG00000127081
42
- ENSG00000116809,ENSG00000127903
43
- ENSG00000116833,ENSG00000127989
44
- ENSG00000117000,ENSG00000129028
45
- ENSG00000118263,ENSG00000129071
46
- ENSG00000118922,ENSG00000129194
47
- ENSG00000119547,ENSG00000130544
48
- ENSG00000120798,ENSG00000130818
49
- ENSG00000121068,ENSG00000131848
50
- ENSG00000123358,ENSG00000132010
51
- ENSG00000123411,ENSG00000132846
52
- ENSG00000124496,ENSG00000133250
53
- ENSG00000124813,ENSG00000134874
54
- ENSG00000125398,ENSG00000135899
55
- ENSG00000125618,ENSG00000136866
56
- ENSG00000126368,ENSG00000137185
57
- ENSG00000127152,ENSG00000137504
58
- ENSG00000128573,ENSG00000138380
59
- ENSG00000129173,ENSG00000140993
60
- ENSG00000131759,ENSG00000141946
61
- ENSG00000132005,ENSG00000142556
62
- ENSG00000133794,ENSG00000143067
63
- ENSG00000134046,ENSG00000144026
64
- ENSG00000134317,ENSG00000144161
65
- ENSG00000134323,ENSG00000145908
66
- ENSG00000134852,ENSG00000146587
67
- ENSG00000135111,ENSG00000147183
68
- ENSG00000137203,ENSG00000147789
69
- ENSG00000137270,ENSG00000148300
70
- ENSG00000138795,ENSG00000149054
71
- ENSG00000139083,ENSG00000149922
72
- ENSG00000139793,ENSG00000151500
73
- ENSG00000140548,ENSG00000151650
74
- ENSG00000140968,ENSG00000151657
75
- ENSG00000142611,ENSG00000152439
76
- ENSG00000143033,ENSG00000152467
77
- ENSG00000143171,ENSG00000152475
78
- ENSG00000143190,ENSG00000153975
79
- ENSG00000143355,ENSG00000155592
80
- ENSG00000143365,ENSG00000156469
81
- ENSG00000143373,ENSG00000157429
82
- ENSG00000143437,ENSG00000159882
83
- ENSG00000144355,ENSG00000159885
84
- ENSG00000147862,ENSG00000159915
85
- ENSG00000148516,ENSG00000160224
86
- ENSG00000150907,ENSG00000160229
87
- ENSG00000151090,ENSG00000160352
88
- ENSG00000153234,ENSG00000160908
89
- ENSG00000158055,ENSG00000160961
90
- ENSG00000160007,ENSG00000161277
91
- ENSG00000160094,ENSG00000162086
92
- ENSG00000161405,ENSG00000163516
93
- ENSG00000162761,ENSG00000164011
94
- ENSG00000162924,ENSG00000164048
95
- ENSG00000164683,ENSG00000164296
96
- ENSG00000164684,ENSG00000164299
97
- ENSG00000167182,ENSG00000165066
98
- ENSG00000168610,ENSG00000165512
99
- ENSG00000168916,ENSG00000165643
100
- ENSG00000169554,ENSG00000165684
101
- ENSG00000169946,ENSG00000166529
102
- ENSG00000170370,ENSG00000166823
103
- ENSG00000172733,ENSG00000166860
104
- ENSG00000172819,ENSG00000167034
105
- ENSG00000177463,ENSG00000167384
106
- ENSG00000178177,ENSG00000167554
107
- ENSG00000179348,ENSG00000167625
108
- ENSG00000179361,ENSG00000167785
109
- ENSG00000179456,ENSG00000167800
110
- ENSG00000180357,ENSG00000167840
111
- ENSG00000185551,ENSG00000167962
112
- ENSG00000185591,ENSG00000167981
113
- ENSG00000187098,ENSG00000168152
114
- ENSG00000187605,ENSG00000168286
115
- ENSG00000189308,ENSG00000168769
116
- ENSG00000196092,ENSG00000169131
117
- ENSG00000196482,ENSG00000169136
118
- ENSG00000196628,ENSG00000169548
119
- ENSG00000197757,ENSG00000169951
120
- ENSG00000198815,ENSG00000169955
121
- ENSG00000198945,ENSG00000169989
122
- ENSG00000198963,ENSG00000170260
123
- ENSG00000204231,ENSG00000170608
124
- ,ENSG00000170954
125
- ,ENSG00000171291
126
- ,ENSG00000171295
127
- ,ENSG00000171425
128
- ,ENSG00000171443
129
- ,ENSG00000171466
130
- ,ENSG00000171469
131
- ,ENSG00000171574
132
- ,ENSG00000171606
133
- ,ENSG00000171827
134
- ,ENSG00000171872
135
- ,ENSG00000171970
136
- ,ENSG00000172000
137
- ,ENSG00000172888
138
- ,ENSG00000173041
139
- ,ENSG00000173258
140
- ,ENSG00000173480
141
- ,ENSG00000173673
142
- ,ENSG00000173825
143
- ,ENSG00000174255
144
- ,ENSG00000174652
145
- ,ENSG00000174796
146
- ,ENSG00000175279
147
- ,ENSG00000175325
148
- ,ENSG00000175395
149
- ,ENSG00000175691
150
- ,ENSG00000176009
151
- ,ENSG00000176024
152
- ,ENSG00000176083
153
- ,ENSG00000176222
154
- ,ENSG00000176302
155
- ,ENSG00000176472
156
- ,ENSG00000176678
157
- ,ENSG00000176679
158
- ,ENSG00000177030
159
- ,ENSG00000177494
160
- ,ENSG00000177599
161
- ,ENSG00000177683
162
- ,ENSG00000177842
163
- ,ENSG00000177873
164
- ,ENSG00000177932
165
- ,ENSG00000177946
166
- ,ENSG00000178150
167
- ,ENSG00000178229
168
- ,ENSG00000178338
169
- ,ENSG00000178386
170
- ,ENSG00000178665
171
- ,ENSG00000178917
172
- ,ENSG00000178928
173
- ,ENSG00000178935
174
- ,ENSG00000179195
175
- ,ENSG00000179772
176
- ,ENSG00000179774
177
- ,ENSG00000179886
178
- ,ENSG00000179909
179
- ,ENSG00000179922
180
- ,ENSG00000179930
181
- ,ENSG00000179943
182
- ,ENSG00000179965
183
- ,ENSG00000180257
184
- ,ENSG00000180346
185
- ,ENSG00000180532
186
- ,ENSG00000180535
187
- ,ENSG00000180938
188
- ,ENSG00000181135
189
- ,ENSG00000181444
190
- ,ENSG00000181450
191
- ,ENSG00000181638
192
- ,ENSG00000181894
193
- ,ENSG00000181896
194
- ,ENSG00000182318
195
- ,ENSG00000182983
196
- ,ENSG00000182986
197
- ,ENSG00000183340
198
- ,ENSG00000183647
199
- ,ENSG00000183734
200
- ,ENSG00000183850
201
- ,ENSG00000184221
202
- ,ENSG00000184517
203
- ,ENSG00000184635
204
- ,ENSG00000184677
205
- ,ENSG00000184895
206
- ,ENSG00000185155
207
- ,ENSG00000185252
208
- ,ENSG00000185404
209
- ,ENSG00000185730
210
- ,ENSG00000186020
211
- ,ENSG00000186026
212
- ,ENSG00000186051
213
- ,ENSG00000186103
214
- ,ENSG00000186230
215
- ,ENSG00000186300
216
- ,ENSG00000186376
217
- ,ENSG00000186446
218
- ,ENSG00000186496
219
- ,ENSG00000186777
220
- ,ENSG00000186812
221
- ,ENSG00000186814
222
- ,ENSG00000187626
223
- ,ENSG00000187801
224
- ,ENSG00000187821
225
- ,ENSG00000187855
226
- ,ENSG00000187987
227
- ,ENSG00000188033
228
- ,ENSG00000188095
229
- ,ENSG00000188171
230
- ,ENSG00000188295
231
- ,ENSG00000188321
232
- ,ENSG00000188629
233
- ,ENSG00000188785
234
- ,ENSG00000188868
235
- ,ENSG00000189164
236
- ,ENSG00000189190
237
- ,ENSG00000189298
238
- ,ENSG00000189299
239
- ,ENSG00000196152
240
- ,ENSG00000196172
241
- ,ENSG00000196214
242
- ,ENSG00000196345
243
- ,ENSG00000196357
244
- ,ENSG00000196378
245
- ,ENSG00000196381
246
- ,ENSG00000196387
247
- ,ENSG00000196391
248
- ,ENSG00000196417
249
- ,ENSG00000196418
250
- ,ENSG00000196456
251
- ,ENSG00000196460
252
- ,ENSG00000196466
253
- ,ENSG00000196605
254
- ,ENSG00000196646
255
- ,ENSG00000196652
256
- ,ENSG00000196670
257
- ,ENSG00000196693
258
- ,ENSG00000196705
259
- ,ENSG00000196812
260
- ,ENSG00000196946
261
- ,ENSG00000197008
262
- ,ENSG00000197020
263
- ,ENSG00000197037
264
- ,ENSG00000197044
265
- ,ENSG00000197054
266
- ,ENSG00000197124
267
- ,ENSG00000197134
268
- ,ENSG00000197162
269
- ,ENSG00000197213
270
- ,ENSG00000197279
271
- ,ENSG00000197343
272
- ,ENSG00000197360
273
- ,ENSG00000197363
274
- ,ENSG00000197472
275
- ,ENSG00000197779
276
- ,ENSG00000197841
277
- ,ENSG00000197857
278
- ,ENSG00000197863
279
- ,ENSG00000197928
280
- ,ENSG00000197933
281
- ,ENSG00000197951
282
- ,ENSG00000198028
283
- ,ENSG00000198039
284
- ,ENSG00000198046
285
- ,ENSG00000198185
286
- ,ENSG00000198205
287
- ,ENSG00000198300
288
- ,ENSG00000198315
289
- ,ENSG00000198342
290
- ,ENSG00000198346
291
- ,ENSG00000198429
292
- ,ENSG00000198440
293
- ,ENSG00000198464
294
- ,ENSG00000198466
295
- ,ENSG00000198482
296
- ,ENSG00000198538
297
- ,ENSG00000198546
298
- ,ENSG00000198551
299
- ,ENSG00000198556
300
- ,ENSG00000198633
301
- ,ENSG00000198939
302
- ,ENSG00000203326
303
- ,ENSG00000204514
304
- ,ENSG00000204519
305
- ,ENSG00000204532
306
- ,ENSG00000204595
307
- ,ENSG00000204604
308
- ,ENSG00000204644
309
- ,ENSG00000204946
310
- ,ENSG00000213020
311
- ,ENSG00000213799
312
- ,ENSG00000213973
313
- ,ENSG00000213988
314
- ,ENSG00000214189
315
- ,ENSG00000215271
316
- ,ENSG00000215372
317
- ,ENSG00000215612
318
- ,ENSG00000220201
319
- ,ENSG00000221923
320
- ,ENSG00000223547
321
- ,ENSG00000227124
322
- ,ENSG00000229676
323
- ,ENSG00000229809
324
- ,ENSG00000230797
325
- ,ENSG00000232040
326
- ,ENSG00000234284
327
- ,ENSG00000234444
328
- ,ENSG00000235109
329
- ,ENSG00000235608
330
- ,ENSG00000236104
331
- ,ENSG00000236609
332
- ,ENSG00000237440
333
- ,ENSG00000242852
334
- ,ENSG00000243660
335
- ,ENSG00000245680
336
- ,ENSG00000248483
337
- ,ENSG00000249459
338
- ,ENSG00000249471
339
- ,ENSG00000249709
340
- ,ENSG00000250571
341
- ,ENSG00000250709
342
- ,ENSG00000251192
343
- ,ENSG00000251247
344
- ,ENSG00000251369
345
- ,ENSG00000253831
346
- ,ENSG00000254004
347
- ,ENSG00000256087
348
- ,ENSG00000256223
349
- ,ENSG00000256229
350
- ,ENSG00000256294
351
- ,ENSG00000256463
352
- ,ENSG00000256683
353
- ,ENSG00000256771
354
- ,ENSG00000257446
355
- ,ENSG00000257591
356
- ,ENSG00000258405
357
- ,ENSG00000258873
358
- ,ENSG00000263002
359
- ,ENSG00000264668
360
- ,ENSG00000265763
361
- ,ENSG00000267041
362
- ,ENSG00000267179
363
- ,ENSG00000267281
364
- ,ENSG00000267508
365
- ,ENSG00000267680
366
- ,ENSG00000269067
367
- ,ENSG00000269343
368
- ,ENSG00000269699
369
- ,ENSG00000272602
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/example_input_files/gene_info_table.csv DELETED
The diff for this file is too large to render. See raw diff