Spaces:
Running
Running
Erva Ulusoy
commited on
Commit
·
e0fbc94
1
Parent(s):
9fb2870
updated node titles to contain node name instead of id
Browse files- visualize_kg.py +68 -7
visualize_kg.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
from pyvis.network import Network
|
2 |
import os
|
|
|
|
|
3 |
|
4 |
NODE_TYPE_COLORS = {
|
5 |
'Disease': '#079dbb',
|
@@ -48,6 +50,40 @@ GO_CATEGORY_MAPPING = {
|
|
48 |
'Cellular Component': 'GO_term_C'
|
49 |
}
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
def _gather_protein_edges(data, protein_id):
|
52 |
|
53 |
protein_idx = data['Protein']['id_mapping'][protein_id]
|
@@ -134,6 +170,10 @@ def _filter_edges(protein_id, protein_edges, prediction_df, limit=10):
|
|
134 |
|
135 |
|
136 |
def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
|
|
|
|
|
|
|
|
|
137 |
protein_edges = _gather_protein_edges(data, protein_id)
|
138 |
visualized_edges = _filter_edges(protein_id, protein_edges, prediction_df, limit)
|
139 |
print(f'Edges to be visualized: {visualized_edges}')
|
@@ -148,7 +188,6 @@ def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
|
|
148 |
}
|
149 |
|
150 |
# Convert groups_config to a JSON-compatible string
|
151 |
-
import json
|
152 |
groups_json = json.dumps(groups_config)
|
153 |
|
154 |
# Configure physics options with settings for better clustering
|
@@ -196,8 +235,15 @@ def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
|
|
196 |
"groups": """ + groups_json + "}")
|
197 |
|
198 |
# Add the main protein node
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
net.add_node(protein_id,
|
200 |
-
label=
|
|
|
201 |
color={'background': 'white', 'border': '#c1121f'},
|
202 |
borderWidth=4,
|
203 |
shape="dot",
|
@@ -226,11 +272,19 @@ def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
|
|
226 |
|
227 |
# Add source node if not present
|
228 |
if source_str not in added_nodes:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
net.add_node(source_str,
|
230 |
-
label=
|
231 |
shape="dot",
|
232 |
font={'color': '#000000', 'size': 12},
|
233 |
-
title=
|
234 |
group=source_type,
|
235 |
size=15,
|
236 |
mass=1.5)
|
@@ -238,16 +292,23 @@ def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
|
|
238 |
|
239 |
# Add target node if not present
|
240 |
if target_str not in added_nodes:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
net.add_node(target_str,
|
242 |
-
label=
|
243 |
shape="dot",
|
244 |
font={'color': '#000000', 'size': 12},
|
245 |
-
title=
|
246 |
group=target_type,
|
247 |
size=15,
|
248 |
mass=1.5)
|
249 |
added_nodes.add(target_str)
|
250 |
-
|
251 |
# Add edge with relationship type and probability as label
|
252 |
edge_label = f"{relation_type}"
|
253 |
if probability is not None:
|
|
|
1 |
from pyvis.network import Network
|
2 |
import os
|
3 |
+
import json
|
4 |
+
import gzip
|
5 |
|
6 |
NODE_TYPE_COLORS = {
|
7 |
'Disease': '#079dbb',
|
|
|
50 |
'Cellular Component': 'GO_term_C'
|
51 |
}
|
52 |
|
53 |
+
def get_node_url(node_type, node_id):
|
54 |
+
"""Get the URL for a node based on its type and ID"""
|
55 |
+
if node_type.startswith('GO_term'):
|
56 |
+
return f"https://www.ebi.ac.uk/QuickGO/term/{node_id}"
|
57 |
+
elif node_type == 'Protein':
|
58 |
+
return f"https://www.uniprot.org/uniprotkb/{node_id}/entry"
|
59 |
+
elif node_type == 'Disease':
|
60 |
+
if ':' in node_id:
|
61 |
+
ontology = node_id.split(':')[0]
|
62 |
+
if ontology == 'EFO':
|
63 |
+
return f"http://www.ebi.ac.uk/efo/EFO_{node_id.split(':')[1]}"
|
64 |
+
elif ontology == 'MONDO':
|
65 |
+
return f'http://purl.obolibrary.org/obo/MONDO_{node_id.split(":")[1]}'
|
66 |
+
elif ontology == 'Orphanet':
|
67 |
+
return f"http://www.orpha.net/ORDO/Orphanet_{node_id.split(':')[1]}"
|
68 |
+
else:
|
69 |
+
return f"https://www.genome.jp/entry/{node_id}"
|
70 |
+
elif node_type == 'HPO':
|
71 |
+
return f"https://hpo.jax.org/browse/term/{node_id}"
|
72 |
+
elif node_type == 'Drug':
|
73 |
+
return f"https://go.drugbank.com/drugs/{node_id}"
|
74 |
+
elif node_type == 'Compound':
|
75 |
+
return f"https://www.ebi.ac.uk/chembl/explore/compound/{node_id}"
|
76 |
+
elif node_type == 'Domain':
|
77 |
+
return f"https://www.ebi.ac.uk/interpro/entry/InterPro/{node_id}"
|
78 |
+
elif node_type == 'Pathway':
|
79 |
+
return f"https://reactome.org/content/detail/{node_id}"
|
80 |
+
elif node_type == 'kegg_Pathway':
|
81 |
+
return f"https://www.genome.jp/pathway/{node_id}"
|
82 |
+
elif node_type == 'EC_number':
|
83 |
+
return f"https://enzyme.expasy.org/EC/{node_id}"
|
84 |
+
else:
|
85 |
+
return None
|
86 |
+
|
87 |
def _gather_protein_edges(data, protein_id):
|
88 |
|
89 |
protein_idx = data['Protein']['id_mapping'][protein_id]
|
|
|
170 |
|
171 |
|
172 |
def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
|
173 |
+
|
174 |
+
with gzip.open('data/name_info.json.gz', 'rt', encoding='utf-8') as file:
|
175 |
+
name_info = json.load(file)
|
176 |
+
|
177 |
protein_edges = _gather_protein_edges(data, protein_id)
|
178 |
visualized_edges = _filter_edges(protein_id, protein_edges, prediction_df, limit)
|
179 |
print(f'Edges to be visualized: {visualized_edges}')
|
|
|
188 |
}
|
189 |
|
190 |
# Convert groups_config to a JSON-compatible string
|
|
|
191 |
groups_json = json.dumps(groups_config)
|
192 |
|
193 |
# Configure physics options with settings for better clustering
|
|
|
235 |
"groups": """ + groups_json + "}")
|
236 |
|
237 |
# Add the main protein node
|
238 |
+
query_node_url = get_node_url('Protein', protein_id)
|
239 |
+
node_name = name_info['Protein'][protein_id]
|
240 |
+
query_node_title = f"{node_name} (Query Protein)"
|
241 |
+
if query_node_url:
|
242 |
+
query_node_title = f'<a href="{query_node_url}" target="_blank">{query_node_title}</a>'
|
243 |
+
|
244 |
net.add_node(protein_id,
|
245 |
+
label=protein_id,
|
246 |
+
title=query_node_title,
|
247 |
color={'background': 'white', 'border': '#c1121f'},
|
248 |
borderWidth=4,
|
249 |
shape="dot",
|
|
|
272 |
|
273 |
# Add source node if not present
|
274 |
if source_str not in added_nodes:
|
275 |
+
if not source_type.startswith('GO_term'):
|
276 |
+
node_name = name_info[source_type][source_str]
|
277 |
+
else:
|
278 |
+
node_name = name_info['GO_term'][source_str]
|
279 |
+
url = get_node_url(source_type, source_str)
|
280 |
+
title = f"{node_name} ({NODE_LABEL_TRANSLATION[source_type] if source_type in NODE_LABEL_TRANSLATION else source_type})"
|
281 |
+
if url:
|
282 |
+
title = f'<a href="{url}" target="_blank">{title}</a>'
|
283 |
net.add_node(source_str,
|
284 |
+
label=source_str,
|
285 |
shape="dot",
|
286 |
font={'color': '#000000', 'size': 12},
|
287 |
+
title=title,
|
288 |
group=source_type,
|
289 |
size=15,
|
290 |
mass=1.5)
|
|
|
292 |
|
293 |
# Add target node if not present
|
294 |
if target_str not in added_nodes:
|
295 |
+
if not target_type.startswith('GO_term'):
|
296 |
+
node_name = name_info[target_type][target_str]
|
297 |
+
else:
|
298 |
+
node_name = name_info['GO_term'][target_str]
|
299 |
+
url = get_node_url(target_type, target_str)
|
300 |
+
title = f"{node_name} ({NODE_LABEL_TRANSLATION[target_type] if target_type in NODE_LABEL_TRANSLATION else target_type})"
|
301 |
+
if url:
|
302 |
+
title = f'<a href="{url}" target="_blank">{title}</a>'
|
303 |
net.add_node(target_str,
|
304 |
+
label=target_str,
|
305 |
shape="dot",
|
306 |
font={'color': '#000000', 'size': 12},
|
307 |
+
title=title,
|
308 |
group=target_type,
|
309 |
size=15,
|
310 |
mass=1.5)
|
311 |
added_nodes.add(target_str)
|
|
|
312 |
# Add edge with relationship type and probability as label
|
313 |
edge_label = f"{relation_type}"
|
314 |
if probability is not None:
|