Christina Theodoris
commited on
Commit
·
875ef33
1
Parent(s):
0a8c47b
Move example input files to dataset repository to include example datasets for fine-tuning
Browse files- benchmarking/castle_cell_type_annotation.r +0 -80
- benchmarking/prepare_datasplits_for_cell_type_annotation.ipynb +0 -288
- benchmarking/randomForest_token_classifier_dosageTF_10k.ipynb +0 -0
- benchmarking/scDeepsort_train_predict.ipynb +0 -166
- examples/example_input_files/bivalent_promoters/bivalent_gene_labels.txt +0 -107
- examples/example_input_files/bivalent_promoters/lys4_only_gene_labels.txt +0 -80
- examples/example_input_files/bivalent_promoters/no_methylation_gene_labels.txt +0 -42
- examples/example_input_files/dosage_sensitive_tfs/dosage_sens_tf_labels.csv +0 -369
- examples/example_input_files/gene_info_table.csv +0 -0
benchmarking/castle_cell_type_annotation.r
DELETED
@@ -1,80 +0,0 @@
|
|
1 |
-
# Usage: Rscript castle_cell_type_annotation.r organ
|
2 |
-
|
3 |
-
# parse ordered arguments
|
4 |
-
args <- commandArgs(trailingOnly=TRUE)
|
5 |
-
organ <- args[1]
|
6 |
-
|
7 |
-
suppressPackageStartupMessages(library(scater))
|
8 |
-
suppressPackageStartupMessages(library(xgboost))
|
9 |
-
suppressPackageStartupMessages(library(igraph))
|
10 |
-
BREAKS=c(-1, 0, 1, 6, Inf)
|
11 |
-
nFeatures = 100
|
12 |
-
|
13 |
-
print(paste("Training ", organ, sep=""))
|
14 |
-
|
15 |
-
# import training and test data
|
16 |
-
rootdir="/path/to/data/"
|
17 |
-
train_counts <- t(as.matrix(read.csv(file = paste(rootdir, organ, "_filtered_data_train.csv", sep=""), row.names = 1)))
|
18 |
-
test_counts <- t(as.matrix(read.csv(file = paste(rootdir, organ, "_filtered_data_test.csv", sep=""), row.names = 1)))
|
19 |
-
train_celltype <- as.matrix(read.csv(file = paste(rootdir, organ, "_filtered_celltype_train.csv", sep="")))
|
20 |
-
test_celltype <- as.matrix(read.csv(file = paste(rootdir, organ, "_filtered_celltype_test.csv", sep="")))
|
21 |
-
|
22 |
-
# select features
|
23 |
-
sourceCellTypes = as.factor(train_celltype[,"Cell_type"])
|
24 |
-
ds = rbind(train_counts,test_counts)
|
25 |
-
ds[is.na(ds)] <- 0
|
26 |
-
isSource = c(rep(TRUE,nrow(train_counts)), rep(FALSE,nrow(test_counts)))
|
27 |
-
topFeaturesAvg = colnames(ds[isSource,])[order(apply(ds[isSource,], 2, mean), decreasing = T)]
|
28 |
-
topFeaturesMi = names(sort(apply(ds[isSource,],2,function(x) { compare(cut(x,breaks=BREAKS),sourceCellTypes,method = "nmi") }), decreasing = T))
|
29 |
-
selectedFeatures = union(head(topFeaturesAvg, nFeatures) , head(topFeaturesMi, nFeatures) )
|
30 |
-
tmp = cor(ds[isSource,selectedFeatures], method = "pearson")
|
31 |
-
tmp[!lower.tri(tmp)] = 0
|
32 |
-
selectedFeatures = selectedFeatures[apply(tmp,2,function(x) any(x < 0.9))]
|
33 |
-
remove(tmp)
|
34 |
-
|
35 |
-
# bin expression values and expand features by bins
|
36 |
-
dsBins = apply(ds[, selectedFeatures], 2, cut, breaks= BREAKS)
|
37 |
-
nUniq = apply(dsBins, 2, function(x) { length(unique(x)) })
|
38 |
-
ds = model.matrix(~ . , as.data.frame(dsBins[,nUniq>1]))
|
39 |
-
remove(dsBins, nUniq)
|
40 |
-
|
41 |
-
# train model
|
42 |
-
train = runif(nrow(ds[isSource,]))<0.8
|
43 |
-
# slightly different setup for multiclass and binary classification
|
44 |
-
if (length(unique(sourceCellTypes)) > 2) {
|
45 |
-
xg=xgboost(data=ds[isSource,][train, ] ,
|
46 |
-
label=as.numeric(sourceCellTypes[train])-1,
|
47 |
-
objective="multi:softmax", num_class=length(unique(sourceCellTypes)),
|
48 |
-
eta=0.7 , nthread=5, nround=20, verbose=0,
|
49 |
-
gamma=0.001, max_depth=5, min_child_weight=10)
|
50 |
-
} else {
|
51 |
-
xg=xgboost(data=ds[isSource,][train, ] ,
|
52 |
-
label=as.numeric(sourceCellTypes[train])-1,
|
53 |
-
eta=0.7 , nthread=5, nround=20, verbose=0,
|
54 |
-
gamma=0.001, max_depth=5, min_child_weight=10)
|
55 |
-
}
|
56 |
-
|
57 |
-
# validate model
|
58 |
-
predictedClasses = predict(xg, ds[!isSource, ])
|
59 |
-
testCellTypes = as.factor(test_celltype[,"Cell_type"])
|
60 |
-
trueClasses <- as.numeric(testCellTypes)-1
|
61 |
-
|
62 |
-
cm <- as.matrix(table(Actual = trueClasses, Predicted = predictedClasses))
|
63 |
-
n <- sum(cm)
|
64 |
-
nc = nrow(cm) # number of classes
|
65 |
-
diag = diag(cm) # number of correctly classified instances per class
|
66 |
-
rowsums = apply(cm, 1, sum) # number of instances per class
|
67 |
-
colsums = apply(cm, 2, sum) # number of predictions per class
|
68 |
-
p = rowsums / n # distribution of instances over the actual classes
|
69 |
-
q = colsums / n # distribution of instances over the predicted classes
|
70 |
-
accuracy = sum(diag) / n
|
71 |
-
precision = diag / colsums
|
72 |
-
recall = diag / rowsums
|
73 |
-
f1 = 2 * precision * recall / (precision + recall)
|
74 |
-
macroF1 = mean(f1)
|
75 |
-
|
76 |
-
print(paste(organ, " accuracy: ", accuracy, sep=""))
|
77 |
-
print(paste(organ, " macroF1: ", macroF1, sep=""))
|
78 |
-
|
79 |
-
results_df = data.frame(Accuracy=c(accuracy),macroF1=c(macroF1))
|
80 |
-
write.csv(results_df,paste(rootdir, organ, "_castle_results_test.csv", sep=""), row.names = FALSE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
benchmarking/prepare_datasplits_for_cell_type_annotation.ipynb
DELETED
@@ -1,288 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "markdown",
|
5 |
-
"id": "25107132",
|
6 |
-
"metadata": {},
|
7 |
-
"source": [
|
8 |
-
"### Preparing train and test data splits for cell type annotation application"
|
9 |
-
]
|
10 |
-
},
|
11 |
-
{
|
12 |
-
"cell_type": "code",
|
13 |
-
"execution_count": 3,
|
14 |
-
"id": "83d8d249-affe-45dd-915e-992b4b35b31a",
|
15 |
-
"metadata": {},
|
16 |
-
"outputs": [],
|
17 |
-
"source": [
|
18 |
-
"import os\n",
|
19 |
-
"import pandas as pd\n",
|
20 |
-
"from sklearn.model_selection import train_test_split\n",
|
21 |
-
"from tqdm.notebook import tqdm\n",
|
22 |
-
"from collections import Counter\n",
|
23 |
-
"import pickle"
|
24 |
-
]
|
25 |
-
},
|
26 |
-
{
|
27 |
-
"cell_type": "code",
|
28 |
-
"execution_count": 4,
|
29 |
-
"id": "e3e6a2bf-44c8-4164-9ecd-1686230ea8be",
|
30 |
-
"metadata": {},
|
31 |
-
"outputs": [
|
32 |
-
{
|
33 |
-
"data": {
|
34 |
-
"text/plain": [
|
35 |
-
"['pancreas',\n",
|
36 |
-
" 'liver',\n",
|
37 |
-
" 'blood',\n",
|
38 |
-
" 'lung',\n",
|
39 |
-
" 'spleen',\n",
|
40 |
-
" 'placenta',\n",
|
41 |
-
" 'colorectum',\n",
|
42 |
-
" 'kidney',\n",
|
43 |
-
" 'brain']"
|
44 |
-
]
|
45 |
-
},
|
46 |
-
"execution_count": 4,
|
47 |
-
"metadata": {},
|
48 |
-
"output_type": "execute_result"
|
49 |
-
}
|
50 |
-
],
|
51 |
-
"source": [
|
52 |
-
"rootdir = \"/path/to/data/\"\n",
|
53 |
-
"\n",
|
54 |
-
"# collect panel of tissues to test\n",
|
55 |
-
"dir_list = []\n",
|
56 |
-
"for dir_i in os.listdir(rootdir):\n",
|
57 |
-
" if (\"results\" not in dir_i) & (os.path.isdir(os.path.join(rootdir, dir_i))):\n",
|
58 |
-
" dir_list += [dir_i]\n",
|
59 |
-
"dir_list"
|
60 |
-
]
|
61 |
-
},
|
62 |
-
{
|
63 |
-
"cell_type": "code",
|
64 |
-
"execution_count": 5,
|
65 |
-
"id": "0b205eec-a518-472a-ab90-dd63ef9803cd",
|
66 |
-
"metadata": {},
|
67 |
-
"outputs": [
|
68 |
-
{
|
69 |
-
"data": {
|
70 |
-
"text/html": [
|
71 |
-
"<div>\n",
|
72 |
-
"<style scoped>\n",
|
73 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
74 |
-
" vertical-align: middle;\n",
|
75 |
-
" }\n",
|
76 |
-
"\n",
|
77 |
-
" .dataframe tbody tr th {\n",
|
78 |
-
" vertical-align: top;\n",
|
79 |
-
" }\n",
|
80 |
-
"\n",
|
81 |
-
" .dataframe thead th {\n",
|
82 |
-
" text-align: right;\n",
|
83 |
-
" }\n",
|
84 |
-
"</style>\n",
|
85 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
86 |
-
" <thead>\n",
|
87 |
-
" <tr style=\"text-align: right;\">\n",
|
88 |
-
" <th></th>\n",
|
89 |
-
" <th>filter_pass</th>\n",
|
90 |
-
" <th>original_cell_id</th>\n",
|
91 |
-
" </tr>\n",
|
92 |
-
" </thead>\n",
|
93 |
-
" <tbody>\n",
|
94 |
-
" <tr>\n",
|
95 |
-
" <th>0</th>\n",
|
96 |
-
" <td>0</td>\n",
|
97 |
-
" <td>C_1</td>\n",
|
98 |
-
" </tr>\n",
|
99 |
-
" <tr>\n",
|
100 |
-
" <th>1</th>\n",
|
101 |
-
" <td>1</td>\n",
|
102 |
-
" <td>C_2</td>\n",
|
103 |
-
" </tr>\n",
|
104 |
-
" <tr>\n",
|
105 |
-
" <th>2</th>\n",
|
106 |
-
" <td>0</td>\n",
|
107 |
-
" <td>C_3</td>\n",
|
108 |
-
" </tr>\n",
|
109 |
-
" <tr>\n",
|
110 |
-
" <th>3</th>\n",
|
111 |
-
" <td>1</td>\n",
|
112 |
-
" <td>C_4</td>\n",
|
113 |
-
" </tr>\n",
|
114 |
-
" <tr>\n",
|
115 |
-
" <th>4</th>\n",
|
116 |
-
" <td>0</td>\n",
|
117 |
-
" <td>C_5</td>\n",
|
118 |
-
" </tr>\n",
|
119 |
-
" <tr>\n",
|
120 |
-
" <th>...</th>\n",
|
121 |
-
" <td>...</td>\n",
|
122 |
-
" <td>...</td>\n",
|
123 |
-
" </tr>\n",
|
124 |
-
" <tr>\n",
|
125 |
-
" <th>9590</th>\n",
|
126 |
-
" <td>1</td>\n",
|
127 |
-
" <td>C_9591</td>\n",
|
128 |
-
" </tr>\n",
|
129 |
-
" <tr>\n",
|
130 |
-
" <th>9591</th>\n",
|
131 |
-
" <td>1</td>\n",
|
132 |
-
" <td>C_9592</td>\n",
|
133 |
-
" </tr>\n",
|
134 |
-
" <tr>\n",
|
135 |
-
" <th>9592</th>\n",
|
136 |
-
" <td>1</td>\n",
|
137 |
-
" <td>C_9593</td>\n",
|
138 |
-
" </tr>\n",
|
139 |
-
" <tr>\n",
|
140 |
-
" <th>9593</th>\n",
|
141 |
-
" <td>1</td>\n",
|
142 |
-
" <td>C_9594</td>\n",
|
143 |
-
" </tr>\n",
|
144 |
-
" <tr>\n",
|
145 |
-
" <th>9594</th>\n",
|
146 |
-
" <td>1</td>\n",
|
147 |
-
" <td>C_9595</td>\n",
|
148 |
-
" </tr>\n",
|
149 |
-
" </tbody>\n",
|
150 |
-
"</table>\n",
|
151 |
-
"<p>9595 rows × 2 columns</p>\n",
|
152 |
-
"</div>"
|
153 |
-
],
|
154 |
-
"text/plain": [
|
155 |
-
" filter_pass original_cell_id\n",
|
156 |
-
"0 0 C_1\n",
|
157 |
-
"1 1 C_2\n",
|
158 |
-
"2 0 C_3\n",
|
159 |
-
"3 1 C_4\n",
|
160 |
-
"4 0 C_5\n",
|
161 |
-
"... ... ...\n",
|
162 |
-
"9590 1 C_9591\n",
|
163 |
-
"9591 1 C_9592\n",
|
164 |
-
"9592 1 C_9593\n",
|
165 |
-
"9593 1 C_9594\n",
|
166 |
-
"9594 1 C_9595\n",
|
167 |
-
"\n",
|
168 |
-
"[9595 rows x 2 columns]"
|
169 |
-
]
|
170 |
-
},
|
171 |
-
"execution_count": 5,
|
172 |
-
"metadata": {},
|
173 |
-
"output_type": "execute_result"
|
174 |
-
}
|
175 |
-
],
|
176 |
-
"source": [
|
177 |
-
"# dictionary of cell barcodes that passed QC filtering applied by Geneformer \n",
|
178 |
-
"# to ensure same cells were used for comparison\n",
|
179 |
-
"with open(f\"{rootdir}deepsort_filter_dict.pickle\", \"rb\") as fp:\n",
|
180 |
-
" filter_dict = pickle.load(fp)\n",
|
181 |
-
"\n",
|
182 |
-
"# for example:\n",
|
183 |
-
"filter_dict[\"human_Placenta9595_data\"]"
|
184 |
-
]
|
185 |
-
},
|
186 |
-
{
|
187 |
-
"cell_type": "code",
|
188 |
-
"execution_count": null,
|
189 |
-
"id": "207e3571-0236-4493-83b3-a89b67b16cb2",
|
190 |
-
"metadata": {
|
191 |
-
"tags": []
|
192 |
-
},
|
193 |
-
"outputs": [],
|
194 |
-
"source": [
|
195 |
-
"for dir_name in tqdm(dir_list):\n",
|
196 |
-
"\n",
|
197 |
-
" df = pd.DataFrame()\n",
|
198 |
-
" ct_df = pd.DataFrame(columns=[\"Cell\",\"Cell_type\"])\n",
|
199 |
-
" \n",
|
200 |
-
" subrootdir = f\"{rootdir}{dir_name}/\"\n",
|
201 |
-
" for subdir, dirs, files in os.walk(subrootdir):\n",
|
202 |
-
" for i in range(len(files)):\n",
|
203 |
-
" file = files[i]\n",
|
204 |
-
" if file.endswith(\"_data.csv\"):\n",
|
205 |
-
" file_prefix = file.replace(\"_data.csv\",\"\")\n",
|
206 |
-
" sample_prefix = file.replace(\".csv\",\"\")\n",
|
207 |
-
" filter_df = filter_dict[sample_prefix]\n",
|
208 |
-
" sample_to_analyze = list(filter_df[filter_df[\"filter_pass\"]==1][\"original_cell_id\"])\n",
|
209 |
-
" \n",
|
210 |
-
" # collect data for each tissue\n",
|
211 |
-
" df_i = pd.read_csv(f\"{subrootdir}{file}\", index_col=0)\n",
|
212 |
-
" df_i = df_i[sample_to_analyze]\n",
|
213 |
-
" df_i.columns = [f\"{i}_{cell_id}\" for cell_id in df_i.columns]\n",
|
214 |
-
" df = pd.concat([df,df_i],axis=1)\n",
|
215 |
-
" \n",
|
216 |
-
" # collect cell type metadata\n",
|
217 |
-
" ct_df_i = pd.read_csv(f\"{subrootdir}{file_prefix}_celltype.csv\", index_col=0)\n",
|
218 |
-
" ct_df_i.columns = [\"Cell\",\"Cell_type\"]\n",
|
219 |
-
" ct_df_i[\"Cell\"] = [f\"{i}_{cell_id}\" for cell_id in ct_df_i[\"Cell\"]]\n",
|
220 |
-
" ct_df = pd.concat([ct_df,ct_df_i],axis=0)\n",
|
221 |
-
" \n",
|
222 |
-
" # per published scDeepsort method, filter data for cell types >0.5% of data\n",
|
223 |
-
" ct_counts = Counter(ct_df[\"Cell_type\"])\n",
|
224 |
-
" total_count = sum(ct_counts.values())\n",
|
225 |
-
" nonrare_cell_types = [cell_type for cell_type,count in ct_counts.items() if count>(total_count*0.005)]\n",
|
226 |
-
" nonrare_cells = list(ct_df[ct_df[\"Cell_type\"].isin(nonrare_cell_types)][\"Cell\"])\n",
|
227 |
-
" df = df[df.columns.intersection(nonrare_cells)]\n",
|
228 |
-
"\n",
|
229 |
-
" # split into 80/20 train/test data\n",
|
230 |
-
" train, test = train_test_split(df.T, test_size=0.2)\n",
|
231 |
-
" train = train.T\n",
|
232 |
-
" test = test.T \n",
|
233 |
-
" \n",
|
234 |
-
" # save filtered train/test data\n",
|
235 |
-
" train.to_csv(f\"{subrootdir}{dir_name}_filtered_data_train.csv\")\n",
|
236 |
-
" test.to_csv(f\"{subrootdir}{dir_name}_filtered_data_test.csv\")\n",
|
237 |
-
"\n",
|
238 |
-
" # split metadata into train/test data\n",
|
239 |
-
" ct_df_train = ct_df[ct_df[\"Cell\"].isin(list(train.columns))]\n",
|
240 |
-
" ct_df_test = ct_df[ct_df[\"Cell\"].isin(list(test.columns))]\n",
|
241 |
-
" train_order_dict = dict(zip(train.columns,[i for i in range(len(train.columns))]))\n",
|
242 |
-
" test_order_dict = dict(zip(test.columns,[i for i in range(len(test.columns))]))\n",
|
243 |
-
" ct_df_train[\"order\"] = [train_order_dict[cell_id] for cell_id in ct_df_train[\"Cell\"]]\n",
|
244 |
-
" ct_df_test[\"order\"] = [test_order_dict[cell_id] for cell_id in ct_df_test[\"Cell\"]]\n",
|
245 |
-
" ct_df_train = ct_df_train.sort_values(\"order\")\n",
|
246 |
-
" ct_df_test = ct_df_test.sort_values(\"order\")\n",
|
247 |
-
" ct_df_train = ct_df_train.drop(\"order\",axis=1)\n",
|
248 |
-
" ct_df_test = ct_df_test.drop(\"order\",axis=1)\n",
|
249 |
-
" assert list(ct_df_train[\"Cell\"]) == list(train.columns)\n",
|
250 |
-
" assert list(ct_df_test[\"Cell\"]) == list(test.columns)\n",
|
251 |
-
" train_labels = list(Counter(ct_df_train[\"Cell_type\"]).keys())\n",
|
252 |
-
" test_labels = list(Counter(ct_df_test[\"Cell_type\"]).keys())\n",
|
253 |
-
" assert set(train_labels) == set(test_labels)\n",
|
254 |
-
" \n",
|
255 |
-
" # save train/test cell type annotations\n",
|
256 |
-
" ct_df_train.to_csv(f\"{subrootdir}{dir_name}_filtered_celltype_train.csv\")\n",
|
257 |
-
" ct_df_test.to_csv(f\"{subrootdir}{dir_name}_filtered_celltype_test.csv\")\n",
|
258 |
-
" "
|
259 |
-
]
|
260 |
-
}
|
261 |
-
],
|
262 |
-
"metadata": {
|
263 |
-
"kernelspec": {
|
264 |
-
"display_name": "Python 3.8.6 64-bit ('3.8.6')",
|
265 |
-
"language": "python",
|
266 |
-
"name": "python3"
|
267 |
-
},
|
268 |
-
"language_info": {
|
269 |
-
"codemirror_mode": {
|
270 |
-
"name": "ipython",
|
271 |
-
"version": 3
|
272 |
-
},
|
273 |
-
"file_extension": ".py",
|
274 |
-
"mimetype": "text/x-python",
|
275 |
-
"name": "python",
|
276 |
-
"nbconvert_exporter": "python",
|
277 |
-
"pygments_lexer": "ipython3",
|
278 |
-
"version": "3.8.6"
|
279 |
-
},
|
280 |
-
"vscode": {
|
281 |
-
"interpreter": {
|
282 |
-
"hash": "eba1599a1f7e611c14c87ccff6793920aa63510b01fc0e229d6dd014149b8829"
|
283 |
-
}
|
284 |
-
}
|
285 |
-
},
|
286 |
-
"nbformat": 4,
|
287 |
-
"nbformat_minor": 5
|
288 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
benchmarking/randomForest_token_classifier_dosageTF_10k.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
benchmarking/scDeepsort_train_predict.ipynb
DELETED
@@ -1,166 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 3,
|
6 |
-
"id": "83d8d249-affe-45dd-915e-992b4b35b31a",
|
7 |
-
"metadata": {},
|
8 |
-
"outputs": [],
|
9 |
-
"source": [
|
10 |
-
"import os\n",
|
11 |
-
"import numpy as np\n",
|
12 |
-
"import pandas as pd\n",
|
13 |
-
"import deepsort\n",
|
14 |
-
"from sklearn.metrics import accuracy_score, f1_score\n",
|
15 |
-
"from tqdm.notebook import tqdm\n",
|
16 |
-
"import pickle"
|
17 |
-
]
|
18 |
-
},
|
19 |
-
{
|
20 |
-
"cell_type": "code",
|
21 |
-
"execution_count": 4,
|
22 |
-
"id": "25de46ec-8a41-484d-8e14-d2b19768fc2c",
|
23 |
-
"metadata": {},
|
24 |
-
"outputs": [],
|
25 |
-
"source": [
|
26 |
-
"def compute_metrics(labels, preds):\n",
|
27 |
-
"\n",
|
28 |
-
" # calculate accuracy and macro f1 using sklearn's function\n",
|
29 |
-
" acc = accuracy_score(labels, preds)\n",
|
30 |
-
" macro_f1 = f1_score(labels, preds, average='macro')\n",
|
31 |
-
" return {\n",
|
32 |
-
" 'accuracy': acc,\n",
|
33 |
-
" 'macro_f1': macro_f1\n",
|
34 |
-
" }"
|
35 |
-
]
|
36 |
-
},
|
37 |
-
{
|
38 |
-
"cell_type": "code",
|
39 |
-
"execution_count": 5,
|
40 |
-
"id": "a4029b2b-afca-4300-82a2-082fec59f191",
|
41 |
-
"metadata": {},
|
42 |
-
"outputs": [
|
43 |
-
{
|
44 |
-
"data": {
|
45 |
-
"text/plain": [
|
46 |
-
"['pancreas',\n",
|
47 |
-
" 'liver',\n",
|
48 |
-
" 'blood',\n",
|
49 |
-
" 'lung',\n",
|
50 |
-
" 'spleen',\n",
|
51 |
-
" 'placenta',\n",
|
52 |
-
" 'colorectum',\n",
|
53 |
-
" 'kidney',\n",
|
54 |
-
" 'brain']"
|
55 |
-
]
|
56 |
-
},
|
57 |
-
"execution_count": 5,
|
58 |
-
"metadata": {},
|
59 |
-
"output_type": "execute_result"
|
60 |
-
}
|
61 |
-
],
|
62 |
-
"source": [
|
63 |
-
"rootdir = \"/path/to/data/\"\n",
|
64 |
-
"\n",
|
65 |
-
"dir_list = []\n",
|
66 |
-
"for dir_i in os.listdir(rootdir):\n",
|
67 |
-
" if (\"results\" not in dir_i) & (os.path.isdir(os.path.join(rootdir, dir_i))):\n",
|
68 |
-
" dir_list += [dir_i]\n",
|
69 |
-
"dir_list"
|
70 |
-
]
|
71 |
-
},
|
72 |
-
{
|
73 |
-
"cell_type": "code",
|
74 |
-
"execution_count": null,
|
75 |
-
"id": "ddcdc5cd-871e-4fd2-8457-18d3049fa76c",
|
76 |
-
"metadata": {
|
77 |
-
"tags": []
|
78 |
-
},
|
79 |
-
"outputs": [],
|
80 |
-
"source": [
|
81 |
-
"output_dir = \"results_EDefault_filtered\"\n",
|
82 |
-
"n_epochs = \"Default\" # scDeepsort default epochs = 300\n",
|
83 |
-
"\n",
|
84 |
-
"results_dict = dict()\n",
|
85 |
-
"for dir_name in tqdm(dir_list):\n",
|
86 |
-
" print(f\"TRAINING: {dir_name}\")\n",
|
87 |
-
" subrootdir = f\"{rootdir}{dir_name}/\"\n",
|
88 |
-
" train_files = [(f\"{subrootdir}{dir_name}_filtered_data_train.csv\",f\"{subrootdir}{dir_name}_filtered_celltype_train.csv\")]\n",
|
89 |
-
" test_file = f\"{subrootdir}{dir_name}_filtered_data_test.csv\"\n",
|
90 |
-
" label_file = f\"{subrootdir}{dir_name}_filtered_celltype_test.csv\"\n",
|
91 |
-
" \n",
|
92 |
-
" # define the model\n",
|
93 |
-
" model = deepsort.DeepSortClassifier(species='human',\n",
|
94 |
-
" tissue=dir_name,\n",
|
95 |
-
" gpu_id=0,\n",
|
96 |
-
" random_seed=1,\n",
|
97 |
-
" validation_fraction=0) # use all training data (already held out 20% in test data file)\n",
|
98 |
-
"\n",
|
99 |
-
" # fit the model\n",
|
100 |
-
" model.fit(train_files, save_path=f\"{subrootdir}{output_dir}\")\n",
|
101 |
-
" \n",
|
102 |
-
" # use the saved model to predict cell types in test data\n",
|
103 |
-
" model.predict(input_file=test_file,\n",
|
104 |
-
" model_path=f\"{subrootdir}{output_dir}\",\n",
|
105 |
-
" save_path=f\"{subrootdir}{output_dir}\",\n",
|
106 |
-
" unsure_rate=0,\n",
|
107 |
-
" file_type='csv')\n",
|
108 |
-
" labels_df = pd.read_csv(label_file)\n",
|
109 |
-
" preds_df = pd.read_csv(f\"{subrootdir}{output_dir}/human_{dir_name}_{dir_name}_filtered_data_test.csv\")\n",
|
110 |
-
" label_cell_ids = labels_df[\"Cell\"]\n",
|
111 |
-
" pred_cell_ids = preds_df[\"index\"]\n",
|
112 |
-
" assert list(label_cell_ids) == list(pred_cell_ids)\n",
|
113 |
-
" labels = list(labels_df[\"Cell_type\"])\n",
|
114 |
-
" if isinstance(preds_df[\"cell_subtype\"][0],float):\n",
|
115 |
-
" if np.isnan(preds_df[\"cell_subtype\"][0]):\n",
|
116 |
-
" preds = list(preds_df[\"cell_type\"])\n",
|
117 |
-
" results = compute_metrics(labels, preds)\n",
|
118 |
-
" else:\n",
|
119 |
-
" preds1 = list(preds_df[\"cell_type\"])\n",
|
120 |
-
" preds2 = list(preds_df[\"cell_subtype\"])\n",
|
121 |
-
" results1 = compute_metrics(labels, preds1)\n",
|
122 |
-
" results2 = compute_metrics(labels, preds2)\n",
|
123 |
-
" if results2[\"accuracy\"] > results1[\"accuracy\"]:\n",
|
124 |
-
" results = results2\n",
|
125 |
-
" else:\n",
|
126 |
-
" results = results1\n",
|
127 |
-
" \n",
|
128 |
-
" print(f\"{dir_name}: {results}\")\n",
|
129 |
-
" results_dict[dir_name] = results\n",
|
130 |
-
" with open(f\"{subrootdir}deepsort_E{n_epochs}_filtered_pred_{dir_name}.pickle\", \"wb\") as output_file:\n",
|
131 |
-
" pickle.dump(results, output_file)\n",
|
132 |
-
"\n",
|
133 |
-
"# save results\n",
|
134 |
-
"with open(f\"{rootdir}deepsort_E{n_epochs}_filtered_pred_dict.pickle\", \"wb\") as output_file:\n",
|
135 |
-
" pickle.dump(results_dict, output_file)\n",
|
136 |
-
" "
|
137 |
-
]
|
138 |
-
}
|
139 |
-
],
|
140 |
-
"metadata": {
|
141 |
-
"kernelspec": {
|
142 |
-
"display_name": "Python 3.8.6 64-bit ('3.8.6')",
|
143 |
-
"language": "python",
|
144 |
-
"name": "python3"
|
145 |
-
},
|
146 |
-
"language_info": {
|
147 |
-
"codemirror_mode": {
|
148 |
-
"name": "ipython",
|
149 |
-
"version": 3
|
150 |
-
},
|
151 |
-
"file_extension": ".py",
|
152 |
-
"mimetype": "text/x-python",
|
153 |
-
"name": "python",
|
154 |
-
"nbconvert_exporter": "python",
|
155 |
-
"pygments_lexer": "ipython3",
|
156 |
-
"version": "3.8.6"
|
157 |
-
},
|
158 |
-
"vscode": {
|
159 |
-
"interpreter": {
|
160 |
-
"hash": "eba1599a1f7e611c14c87ccff6793920aa63510b01fc0e229d6dd014149b8829"
|
161 |
-
}
|
162 |
-
}
|
163 |
-
},
|
164 |
-
"nbformat": 4,
|
165 |
-
"nbformat_minor": 5
|
166 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/example_input_files/bivalent_promoters/bivalent_gene_labels.txt
DELETED
@@ -1,107 +0,0 @@
|
|
1 |
-
ENSG00000005073
|
2 |
-
ENSG00000007372
|
3 |
-
ENSG00000007372
|
4 |
-
ENSG00000043355
|
5 |
-
ENSG00000068120
|
6 |
-
ENSG00000075891
|
7 |
-
ENSG00000078399
|
8 |
-
ENSG00000105991
|
9 |
-
ENSG00000105996
|
10 |
-
ENSG00000105997
|
11 |
-
ENSG00000106004
|
12 |
-
ENSG00000106006
|
13 |
-
ENSG00000106031
|
14 |
-
ENSG00000106038
|
15 |
-
ENSG00000107807
|
16 |
-
ENSG00000107821
|
17 |
-
ENSG00000107831
|
18 |
-
ENSG00000107859
|
19 |
-
ENSG00000107862
|
20 |
-
ENSG00000108511
|
21 |
-
ENSG00000108786
|
22 |
-
ENSG00000108797
|
23 |
-
ENSG00000110693
|
24 |
-
ENSG00000110693
|
25 |
-
ENSG00000113430
|
26 |
-
ENSG00000115844
|
27 |
-
ENSG00000117707
|
28 |
-
ENSG00000117707
|
29 |
-
ENSG00000119915
|
30 |
-
ENSG00000120068
|
31 |
-
ENSG00000120075
|
32 |
-
ENSG00000120093
|
33 |
-
ENSG00000120093
|
34 |
-
ENSG00000120094
|
35 |
-
ENSG00000122592
|
36 |
-
ENSG00000125285
|
37 |
-
ENSG00000125798
|
38 |
-
ENSG00000125813
|
39 |
-
ENSG00000125813
|
40 |
-
ENSG00000125816
|
41 |
-
ENSG00000125820
|
42 |
-
ENSG00000128573
|
43 |
-
ENSG00000128645
|
44 |
-
ENSG00000128652
|
45 |
-
ENSG00000128709
|
46 |
-
ENSG00000128710
|
47 |
-
ENSG00000128713
|
48 |
-
ENSG00000128714
|
49 |
-
ENSG00000129514
|
50 |
-
ENSG00000131196
|
51 |
-
ENSG00000131196
|
52 |
-
ENSG00000136327
|
53 |
-
ENSG00000136944
|
54 |
-
ENSG00000138083
|
55 |
-
ENSG00000139800
|
56 |
-
ENSG00000143013
|
57 |
-
ENSG00000143632
|
58 |
-
ENSG00000144355
|
59 |
-
ENSG00000148680
|
60 |
-
ENSG00000148826
|
61 |
-
ENSG00000151615
|
62 |
-
ENSG00000152192
|
63 |
-
ENSG00000152977
|
64 |
-
ENSG00000159184
|
65 |
-
ENSG00000159387
|
66 |
-
ENSG00000163412
|
67 |
-
ENSG00000163421
|
68 |
-
ENSG00000163623
|
69 |
-
ENSG00000164330
|
70 |
-
ENSG00000164438
|
71 |
-
ENSG00000164690
|
72 |
-
ENSG00000164778
|
73 |
-
ENSG00000165588
|
74 |
-
ENSG00000165588
|
75 |
-
ENSG00000165588
|
76 |
-
ENSG00000166407
|
77 |
-
ENSG00000166407
|
78 |
-
ENSG00000168505
|
79 |
-
ENSG00000168875
|
80 |
-
ENSG00000169946
|
81 |
-
ENSG00000170166
|
82 |
-
ENSG00000170178
|
83 |
-
ENSG00000170549
|
84 |
-
ENSG00000170561
|
85 |
-
ENSG00000170577
|
86 |
-
ENSG00000170689
|
87 |
-
ENSG00000173917
|
88 |
-
ENSG00000174279
|
89 |
-
ENSG00000174963
|
90 |
-
ENSG00000174963
|
91 |
-
ENSG00000175879
|
92 |
-
ENSG00000176842
|
93 |
-
ENSG00000177508
|
94 |
-
ENSG00000178573
|
95 |
-
ENSG00000182568
|
96 |
-
ENSG00000182742
|
97 |
-
ENSG00000185551
|
98 |
-
ENSG00000185551
|
99 |
-
ENSG00000187140
|
100 |
-
ENSG00000196092
|
101 |
-
ENSG00000197576
|
102 |
-
ENSG00000198807
|
103 |
-
ENSG00000253293
|
104 |
-
ENSG00000256463
|
105 |
-
ENSG00000260027
|
106 |
-
ENSG00000276644
|
107 |
-
ENSG00000285708
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/example_input_files/bivalent_promoters/lys4_only_gene_labels.txt
DELETED
@@ -1,80 +0,0 @@
|
|
1 |
-
ENSG00000012048
|
2 |
-
ENSG00000033627
|
3 |
-
ENSG00000037042
|
4 |
-
ENSG00000055950
|
5 |
-
ENSG00000067596
|
6 |
-
ENSG00000069248
|
7 |
-
ENSG00000072682
|
8 |
-
ENSG00000085274
|
9 |
-
ENSG00000088035
|
10 |
-
ENSG00000088930
|
11 |
-
ENSG00000095539
|
12 |
-
ENSG00000102471
|
13 |
-
ENSG00000102967
|
14 |
-
ENSG00000104313
|
15 |
-
ENSG00000105146
|
16 |
-
ENSG00000105379
|
17 |
-
ENSG00000105982
|
18 |
-
ENSG00000105983
|
19 |
-
ENSG00000107816
|
20 |
-
ENSG00000107819
|
21 |
-
ENSG00000107829
|
22 |
-
ENSG00000107833
|
23 |
-
ENSG00000108784
|
24 |
-
ENSG00000108799
|
25 |
-
ENSG00000108828
|
26 |
-
ENSG00000108830
|
27 |
-
ENSG00000109911
|
28 |
-
ENSG00000113522
|
29 |
-
ENSG00000119487
|
30 |
-
ENSG00000120049
|
31 |
-
ENSG00000125347
|
32 |
-
ENSG00000126581
|
33 |
-
ENSG00000131374
|
34 |
-
ENSG00000131437
|
35 |
-
ENSG00000131462
|
36 |
-
ENSG00000131467
|
37 |
-
ENSG00000131469
|
38 |
-
ENSG00000131470
|
39 |
-
ENSG00000131475
|
40 |
-
ENSG00000131477
|
41 |
-
ENSG00000135272
|
42 |
-
ENSG00000135776
|
43 |
-
ENSG00000135801
|
44 |
-
ENSG00000136158
|
45 |
-
ENSG00000140262
|
46 |
-
ENSG00000140450
|
47 |
-
ENSG00000140563
|
48 |
-
ENSG00000140829
|
49 |
-
ENSG00000140830
|
50 |
-
ENSG00000145494
|
51 |
-
ENSG00000146909
|
52 |
-
ENSG00000147905
|
53 |
-
ENSG00000148688
|
54 |
-
ENSG00000148840
|
55 |
-
ENSG00000148950
|
56 |
-
ENSG00000151332
|
57 |
-
ENSG00000151338
|
58 |
-
ENSG00000165637
|
59 |
-
ENSG00000165644
|
60 |
-
ENSG00000166135
|
61 |
-
ENSG00000166136
|
62 |
-
ENSG00000166167
|
63 |
-
ENSG00000166169
|
64 |
-
ENSG00000166189
|
65 |
-
ENSG00000166197
|
66 |
-
ENSG00000166377
|
67 |
-
ENSG00000167081
|
68 |
-
ENSG00000168118
|
69 |
-
ENSG00000171421
|
70 |
-
ENSG00000175832
|
71 |
-
ENSG00000186480
|
72 |
-
ENSG00000187098
|
73 |
-
ENSG00000188554
|
74 |
-
ENSG00000196628
|
75 |
-
ENSG00000196628
|
76 |
-
ENSG00000198728
|
77 |
-
ENSG00000198728
|
78 |
-
ENSG00000198863
|
79 |
-
ENSG00000285283
|
80 |
-
ENSG00000285708
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/example_input_files/bivalent_promoters/no_methylation_gene_labels.txt
DELETED
@@ -1,42 +0,0 @@
|
|
1 |
-
ENSG00000068079
|
2 |
-
ENSG00000068383
|
3 |
-
ENSG00000075290
|
4 |
-
ENSG00000104313
|
5 |
-
ENSG00000105370
|
6 |
-
ENSG00000105374
|
7 |
-
ENSG00000105383
|
8 |
-
ENSG00000106536
|
9 |
-
ENSG00000113520
|
10 |
-
ENSG00000113525
|
11 |
-
ENSG00000118557
|
12 |
-
ENSG00000125257
|
13 |
-
ENSG00000128573
|
14 |
-
ENSG00000131471
|
15 |
-
ENSG00000131480
|
16 |
-
ENSG00000131482
|
17 |
-
ENSG00000134532
|
18 |
-
ENSG00000136319
|
19 |
-
ENSG00000138792
|
20 |
-
ENSG00000140262
|
21 |
-
ENSG00000140718
|
22 |
-
ENSG00000147488
|
23 |
-
ENSG00000147488
|
24 |
-
ENSG00000148677
|
25 |
-
ENSG00000151322
|
26 |
-
ENSG00000151322
|
27 |
-
ENSG00000156113
|
28 |
-
ENSG00000164399
|
29 |
-
ENSG00000164400
|
30 |
-
ENSG00000167749
|
31 |
-
ENSG00000167754
|
32 |
-
ENSG00000167755
|
33 |
-
ENSG00000169035
|
34 |
-
ENSG00000170927
|
35 |
-
ENSG00000182177
|
36 |
-
ENSG00000186153
|
37 |
-
ENSG00000187098
|
38 |
-
ENSG00000204764
|
39 |
-
ENSG00000213022
|
40 |
-
ENSG00000213822
|
41 |
-
ENSG00000261701
|
42 |
-
ENSG00000285708
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/example_input_files/dosage_sensitive_tfs/dosage_sens_tf_labels.csv
DELETED
@@ -1,369 +0,0 @@
|
|
1 |
-
dosage_sensitive,dosage_insensitive
|
2 |
-
ENSG00000008197,ENSG00000010539
|
3 |
-
ENSG00000008441,ENSG00000011590
|
4 |
-
ENSG00000010818,ENSG00000063438
|
5 |
-
ENSG00000011332,ENSG00000063587
|
6 |
-
ENSG00000030419,ENSG00000064218
|
7 |
-
ENSG00000062194,ENSG00000064489
|
8 |
-
ENSG00000065970,ENSG00000067646
|
9 |
-
ENSG00000067082,ENSG00000075407
|
10 |
-
ENSG00000069667,ENSG00000079263
|
11 |
-
ENSG00000072736,ENSG00000081386
|
12 |
-
ENSG00000073282,ENSG00000083812
|
13 |
-
ENSG00000073861,ENSG00000083814
|
14 |
-
ENSG00000077092,ENSG00000083828
|
15 |
-
ENSG00000083307,ENSG00000083838
|
16 |
-
ENSG00000084676,ENSG00000083844
|
17 |
-
ENSG00000085276,ENSG00000085644
|
18 |
-
ENSG00000087510,ENSG00000089335
|
19 |
-
ENSG00000087903,ENSG00000089775
|
20 |
-
ENSG00000089225,ENSG00000102901
|
21 |
-
ENSG00000091656,ENSG00000103199
|
22 |
-
ENSG00000091831,ENSG00000105136
|
23 |
-
ENSG00000095951,ENSG00000105610
|
24 |
-
ENSG00000100644,ENSG00000105672
|
25 |
-
ENSG00000100968,ENSG00000106410
|
26 |
-
ENSG00000101076,ENSG00000106948
|
27 |
-
ENSG00000101412,ENSG00000109705
|
28 |
-
ENSG00000102870,ENSG00000115568
|
29 |
-
ENSG00000102935,ENSG00000117010
|
30 |
-
ENSG00000103449,ENSG00000118620
|
31 |
-
ENSG00000105698,ENSG00000119574
|
32 |
-
ENSG00000105866,ENSG00000120669
|
33 |
-
ENSG00000106689,ENSG00000121406
|
34 |
-
ENSG00000106852,ENSG00000121864
|
35 |
-
ENSG00000111249,ENSG00000122085
|
36 |
-
ENSG00000111783,ENSG00000124203
|
37 |
-
ENSG00000112033,ENSG00000124232
|
38 |
-
ENSG00000112246,ENSG00000124444
|
39 |
-
ENSG00000112561,ENSG00000124613
|
40 |
-
ENSG00000112837,ENSG00000125520
|
41 |
-
ENSG00000115112,ENSG00000127081
|
42 |
-
ENSG00000116809,ENSG00000127903
|
43 |
-
ENSG00000116833,ENSG00000127989
|
44 |
-
ENSG00000117000,ENSG00000129028
|
45 |
-
ENSG00000118263,ENSG00000129071
|
46 |
-
ENSG00000118922,ENSG00000129194
|
47 |
-
ENSG00000119547,ENSG00000130544
|
48 |
-
ENSG00000120798,ENSG00000130818
|
49 |
-
ENSG00000121068,ENSG00000131848
|
50 |
-
ENSG00000123358,ENSG00000132010
|
51 |
-
ENSG00000123411,ENSG00000132846
|
52 |
-
ENSG00000124496,ENSG00000133250
|
53 |
-
ENSG00000124813,ENSG00000134874
|
54 |
-
ENSG00000125398,ENSG00000135899
|
55 |
-
ENSG00000125618,ENSG00000136866
|
56 |
-
ENSG00000126368,ENSG00000137185
|
57 |
-
ENSG00000127152,ENSG00000137504
|
58 |
-
ENSG00000128573,ENSG00000138380
|
59 |
-
ENSG00000129173,ENSG00000140993
|
60 |
-
ENSG00000131759,ENSG00000141946
|
61 |
-
ENSG00000132005,ENSG00000142556
|
62 |
-
ENSG00000133794,ENSG00000143067
|
63 |
-
ENSG00000134046,ENSG00000144026
|
64 |
-
ENSG00000134317,ENSG00000144161
|
65 |
-
ENSG00000134323,ENSG00000145908
|
66 |
-
ENSG00000134852,ENSG00000146587
|
67 |
-
ENSG00000135111,ENSG00000147183
|
68 |
-
ENSG00000137203,ENSG00000147789
|
69 |
-
ENSG00000137270,ENSG00000148300
|
70 |
-
ENSG00000138795,ENSG00000149054
|
71 |
-
ENSG00000139083,ENSG00000149922
|
72 |
-
ENSG00000139793,ENSG00000151500
|
73 |
-
ENSG00000140548,ENSG00000151650
|
74 |
-
ENSG00000140968,ENSG00000151657
|
75 |
-
ENSG00000142611,ENSG00000152439
|
76 |
-
ENSG00000143033,ENSG00000152467
|
77 |
-
ENSG00000143171,ENSG00000152475
|
78 |
-
ENSG00000143190,ENSG00000153975
|
79 |
-
ENSG00000143355,ENSG00000155592
|
80 |
-
ENSG00000143365,ENSG00000156469
|
81 |
-
ENSG00000143373,ENSG00000157429
|
82 |
-
ENSG00000143437,ENSG00000159882
|
83 |
-
ENSG00000144355,ENSG00000159885
|
84 |
-
ENSG00000147862,ENSG00000159915
|
85 |
-
ENSG00000148516,ENSG00000160224
|
86 |
-
ENSG00000150907,ENSG00000160229
|
87 |
-
ENSG00000151090,ENSG00000160352
|
88 |
-
ENSG00000153234,ENSG00000160908
|
89 |
-
ENSG00000158055,ENSG00000160961
|
90 |
-
ENSG00000160007,ENSG00000161277
|
91 |
-
ENSG00000160094,ENSG00000162086
|
92 |
-
ENSG00000161405,ENSG00000163516
|
93 |
-
ENSG00000162761,ENSG00000164011
|
94 |
-
ENSG00000162924,ENSG00000164048
|
95 |
-
ENSG00000164683,ENSG00000164296
|
96 |
-
ENSG00000164684,ENSG00000164299
|
97 |
-
ENSG00000167182,ENSG00000165066
|
98 |
-
ENSG00000168610,ENSG00000165512
|
99 |
-
ENSG00000168916,ENSG00000165643
|
100 |
-
ENSG00000169554,ENSG00000165684
|
101 |
-
ENSG00000169946,ENSG00000166529
|
102 |
-
ENSG00000170370,ENSG00000166823
|
103 |
-
ENSG00000172733,ENSG00000166860
|
104 |
-
ENSG00000172819,ENSG00000167034
|
105 |
-
ENSG00000177463,ENSG00000167384
|
106 |
-
ENSG00000178177,ENSG00000167554
|
107 |
-
ENSG00000179348,ENSG00000167625
|
108 |
-
ENSG00000179361,ENSG00000167785
|
109 |
-
ENSG00000179456,ENSG00000167800
|
110 |
-
ENSG00000180357,ENSG00000167840
|
111 |
-
ENSG00000185551,ENSG00000167962
|
112 |
-
ENSG00000185591,ENSG00000167981
|
113 |
-
ENSG00000187098,ENSG00000168152
|
114 |
-
ENSG00000187605,ENSG00000168286
|
115 |
-
ENSG00000189308,ENSG00000168769
|
116 |
-
ENSG00000196092,ENSG00000169131
|
117 |
-
ENSG00000196482,ENSG00000169136
|
118 |
-
ENSG00000196628,ENSG00000169548
|
119 |
-
ENSG00000197757,ENSG00000169951
|
120 |
-
ENSG00000198815,ENSG00000169955
|
121 |
-
ENSG00000198945,ENSG00000169989
|
122 |
-
ENSG00000198963,ENSG00000170260
|
123 |
-
ENSG00000204231,ENSG00000170608
|
124 |
-
,ENSG00000170954
|
125 |
-
,ENSG00000171291
|
126 |
-
,ENSG00000171295
|
127 |
-
,ENSG00000171425
|
128 |
-
,ENSG00000171443
|
129 |
-
,ENSG00000171466
|
130 |
-
,ENSG00000171469
|
131 |
-
,ENSG00000171574
|
132 |
-
,ENSG00000171606
|
133 |
-
,ENSG00000171827
|
134 |
-
,ENSG00000171872
|
135 |
-
,ENSG00000171970
|
136 |
-
,ENSG00000172000
|
137 |
-
,ENSG00000172888
|
138 |
-
,ENSG00000173041
|
139 |
-
,ENSG00000173258
|
140 |
-
,ENSG00000173480
|
141 |
-
,ENSG00000173673
|
142 |
-
,ENSG00000173825
|
143 |
-
,ENSG00000174255
|
144 |
-
,ENSG00000174652
|
145 |
-
,ENSG00000174796
|
146 |
-
,ENSG00000175279
|
147 |
-
,ENSG00000175325
|
148 |
-
,ENSG00000175395
|
149 |
-
,ENSG00000175691
|
150 |
-
,ENSG00000176009
|
151 |
-
,ENSG00000176024
|
152 |
-
,ENSG00000176083
|
153 |
-
,ENSG00000176222
|
154 |
-
,ENSG00000176302
|
155 |
-
,ENSG00000176472
|
156 |
-
,ENSG00000176678
|
157 |
-
,ENSG00000176679
|
158 |
-
,ENSG00000177030
|
159 |
-
,ENSG00000177494
|
160 |
-
,ENSG00000177599
|
161 |
-
,ENSG00000177683
|
162 |
-
,ENSG00000177842
|
163 |
-
,ENSG00000177873
|
164 |
-
,ENSG00000177932
|
165 |
-
,ENSG00000177946
|
166 |
-
,ENSG00000178150
|
167 |
-
,ENSG00000178229
|
168 |
-
,ENSG00000178338
|
169 |
-
,ENSG00000178386
|
170 |
-
,ENSG00000178665
|
171 |
-
,ENSG00000178917
|
172 |
-
,ENSG00000178928
|
173 |
-
,ENSG00000178935
|
174 |
-
,ENSG00000179195
|
175 |
-
,ENSG00000179772
|
176 |
-
,ENSG00000179774
|
177 |
-
,ENSG00000179886
|
178 |
-
,ENSG00000179909
|
179 |
-
,ENSG00000179922
|
180 |
-
,ENSG00000179930
|
181 |
-
,ENSG00000179943
|
182 |
-
,ENSG00000179965
|
183 |
-
,ENSG00000180257
|
184 |
-
,ENSG00000180346
|
185 |
-
,ENSG00000180532
|
186 |
-
,ENSG00000180535
|
187 |
-
,ENSG00000180938
|
188 |
-
,ENSG00000181135
|
189 |
-
,ENSG00000181444
|
190 |
-
,ENSG00000181450
|
191 |
-
,ENSG00000181638
|
192 |
-
,ENSG00000181894
|
193 |
-
,ENSG00000181896
|
194 |
-
,ENSG00000182318
|
195 |
-
,ENSG00000182983
|
196 |
-
,ENSG00000182986
|
197 |
-
,ENSG00000183340
|
198 |
-
,ENSG00000183647
|
199 |
-
,ENSG00000183734
|
200 |
-
,ENSG00000183850
|
201 |
-
,ENSG00000184221
|
202 |
-
,ENSG00000184517
|
203 |
-
,ENSG00000184635
|
204 |
-
,ENSG00000184677
|
205 |
-
,ENSG00000184895
|
206 |
-
,ENSG00000185155
|
207 |
-
,ENSG00000185252
|
208 |
-
,ENSG00000185404
|
209 |
-
,ENSG00000185730
|
210 |
-
,ENSG00000186020
|
211 |
-
,ENSG00000186026
|
212 |
-
,ENSG00000186051
|
213 |
-
,ENSG00000186103
|
214 |
-
,ENSG00000186230
|
215 |
-
,ENSG00000186300
|
216 |
-
,ENSG00000186376
|
217 |
-
,ENSG00000186446
|
218 |
-
,ENSG00000186496
|
219 |
-
,ENSG00000186777
|
220 |
-
,ENSG00000186812
|
221 |
-
,ENSG00000186814
|
222 |
-
,ENSG00000187626
|
223 |
-
,ENSG00000187801
|
224 |
-
,ENSG00000187821
|
225 |
-
,ENSG00000187855
|
226 |
-
,ENSG00000187987
|
227 |
-
,ENSG00000188033
|
228 |
-
,ENSG00000188095
|
229 |
-
,ENSG00000188171
|
230 |
-
,ENSG00000188295
|
231 |
-
,ENSG00000188321
|
232 |
-
,ENSG00000188629
|
233 |
-
,ENSG00000188785
|
234 |
-
,ENSG00000188868
|
235 |
-
,ENSG00000189164
|
236 |
-
,ENSG00000189190
|
237 |
-
,ENSG00000189298
|
238 |
-
,ENSG00000189299
|
239 |
-
,ENSG00000196152
|
240 |
-
,ENSG00000196172
|
241 |
-
,ENSG00000196214
|
242 |
-
,ENSG00000196345
|
243 |
-
,ENSG00000196357
|
244 |
-
,ENSG00000196378
|
245 |
-
,ENSG00000196381
|
246 |
-
,ENSG00000196387
|
247 |
-
,ENSG00000196391
|
248 |
-
,ENSG00000196417
|
249 |
-
,ENSG00000196418
|
250 |
-
,ENSG00000196456
|
251 |
-
,ENSG00000196460
|
252 |
-
,ENSG00000196466
|
253 |
-
,ENSG00000196605
|
254 |
-
,ENSG00000196646
|
255 |
-
,ENSG00000196652
|
256 |
-
,ENSG00000196670
|
257 |
-
,ENSG00000196693
|
258 |
-
,ENSG00000196705
|
259 |
-
,ENSG00000196812
|
260 |
-
,ENSG00000196946
|
261 |
-
,ENSG00000197008
|
262 |
-
,ENSG00000197020
|
263 |
-
,ENSG00000197037
|
264 |
-
,ENSG00000197044
|
265 |
-
,ENSG00000197054
|
266 |
-
,ENSG00000197124
|
267 |
-
,ENSG00000197134
|
268 |
-
,ENSG00000197162
|
269 |
-
,ENSG00000197213
|
270 |
-
,ENSG00000197279
|
271 |
-
,ENSG00000197343
|
272 |
-
,ENSG00000197360
|
273 |
-
,ENSG00000197363
|
274 |
-
,ENSG00000197472
|
275 |
-
,ENSG00000197779
|
276 |
-
,ENSG00000197841
|
277 |
-
,ENSG00000197857
|
278 |
-
,ENSG00000197863
|
279 |
-
,ENSG00000197928
|
280 |
-
,ENSG00000197933
|
281 |
-
,ENSG00000197951
|
282 |
-
,ENSG00000198028
|
283 |
-
,ENSG00000198039
|
284 |
-
,ENSG00000198046
|
285 |
-
,ENSG00000198185
|
286 |
-
,ENSG00000198205
|
287 |
-
,ENSG00000198300
|
288 |
-
,ENSG00000198315
|
289 |
-
,ENSG00000198342
|
290 |
-
,ENSG00000198346
|
291 |
-
,ENSG00000198429
|
292 |
-
,ENSG00000198440
|
293 |
-
,ENSG00000198464
|
294 |
-
,ENSG00000198466
|
295 |
-
,ENSG00000198482
|
296 |
-
,ENSG00000198538
|
297 |
-
,ENSG00000198546
|
298 |
-
,ENSG00000198551
|
299 |
-
,ENSG00000198556
|
300 |
-
,ENSG00000198633
|
301 |
-
,ENSG00000198939
|
302 |
-
,ENSG00000203326
|
303 |
-
,ENSG00000204514
|
304 |
-
,ENSG00000204519
|
305 |
-
,ENSG00000204532
|
306 |
-
,ENSG00000204595
|
307 |
-
,ENSG00000204604
|
308 |
-
,ENSG00000204644
|
309 |
-
,ENSG00000204946
|
310 |
-
,ENSG00000213020
|
311 |
-
,ENSG00000213799
|
312 |
-
,ENSG00000213973
|
313 |
-
,ENSG00000213988
|
314 |
-
,ENSG00000214189
|
315 |
-
,ENSG00000215271
|
316 |
-
,ENSG00000215372
|
317 |
-
,ENSG00000215612
|
318 |
-
,ENSG00000220201
|
319 |
-
,ENSG00000221923
|
320 |
-
,ENSG00000223547
|
321 |
-
,ENSG00000227124
|
322 |
-
,ENSG00000229676
|
323 |
-
,ENSG00000229809
|
324 |
-
,ENSG00000230797
|
325 |
-
,ENSG00000232040
|
326 |
-
,ENSG00000234284
|
327 |
-
,ENSG00000234444
|
328 |
-
,ENSG00000235109
|
329 |
-
,ENSG00000235608
|
330 |
-
,ENSG00000236104
|
331 |
-
,ENSG00000236609
|
332 |
-
,ENSG00000237440
|
333 |
-
,ENSG00000242852
|
334 |
-
,ENSG00000243660
|
335 |
-
,ENSG00000245680
|
336 |
-
,ENSG00000248483
|
337 |
-
,ENSG00000249459
|
338 |
-
,ENSG00000249471
|
339 |
-
,ENSG00000249709
|
340 |
-
,ENSG00000250571
|
341 |
-
,ENSG00000250709
|
342 |
-
,ENSG00000251192
|
343 |
-
,ENSG00000251247
|
344 |
-
,ENSG00000251369
|
345 |
-
,ENSG00000253831
|
346 |
-
,ENSG00000254004
|
347 |
-
,ENSG00000256087
|
348 |
-
,ENSG00000256223
|
349 |
-
,ENSG00000256229
|
350 |
-
,ENSG00000256294
|
351 |
-
,ENSG00000256463
|
352 |
-
,ENSG00000256683
|
353 |
-
,ENSG00000256771
|
354 |
-
,ENSG00000257446
|
355 |
-
,ENSG00000257591
|
356 |
-
,ENSG00000258405
|
357 |
-
,ENSG00000258873
|
358 |
-
,ENSG00000263002
|
359 |
-
,ENSG00000264668
|
360 |
-
,ENSG00000265763
|
361 |
-
,ENSG00000267041
|
362 |
-
,ENSG00000267179
|
363 |
-
,ENSG00000267281
|
364 |
-
,ENSG00000267508
|
365 |
-
,ENSG00000267680
|
366 |
-
,ENSG00000269067
|
367 |
-
,ENSG00000269343
|
368 |
-
,ENSG00000269699
|
369 |
-
,ENSG00000272602
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/example_input_files/gene_info_table.csv
DELETED
The diff for this file is too large to render.
See raw diff
|
|