Erva Ulusoy commited on
Commit
e0fbc94
·
1 Parent(s): 9fb2870

updated node titles to contain node name instead of id

Browse files
Files changed (1) hide show
  1. visualize_kg.py +68 -7
visualize_kg.py CHANGED
@@ -1,5 +1,7 @@
1
  from pyvis.network import Network
2
  import os
 
 
3
 
4
  NODE_TYPE_COLORS = {
5
  'Disease': '#079dbb',
@@ -48,6 +50,40 @@ GO_CATEGORY_MAPPING = {
48
  'Cellular Component': 'GO_term_C'
49
  }
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def _gather_protein_edges(data, protein_id):
52
 
53
  protein_idx = data['Protein']['id_mapping'][protein_id]
@@ -134,6 +170,10 @@ def _filter_edges(protein_id, protein_edges, prediction_df, limit=10):
134
 
135
 
136
  def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
 
 
 
 
137
  protein_edges = _gather_protein_edges(data, protein_id)
138
  visualized_edges = _filter_edges(protein_id, protein_edges, prediction_df, limit)
139
  print(f'Edges to be visualized: {visualized_edges}')
@@ -148,7 +188,6 @@ def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
148
  }
149
 
150
  # Convert groups_config to a JSON-compatible string
151
- import json
152
  groups_json = json.dumps(groups_config)
153
 
154
  # Configure physics options with settings for better clustering
@@ -196,8 +235,15 @@ def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
196
  "groups": """ + groups_json + "}")
197
 
198
  # Add the main protein node
 
 
 
 
 
 
199
  net.add_node(protein_id,
200
- label=f"{protein_id} (Protein)",
 
201
  color={'background': 'white', 'border': '#c1121f'},
202
  borderWidth=4,
203
  shape="dot",
@@ -226,11 +272,19 @@ def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
226
 
227
  # Add source node if not present
228
  if source_str not in added_nodes:
 
 
 
 
 
 
 
 
229
  net.add_node(source_str,
230
- label= source_str,
231
  shape="dot",
232
  font={'color': '#000000', 'size': 12},
233
- title=f"{source_str} ({NODE_LABEL_TRANSLATION[source_type] if source_type in NODE_LABEL_TRANSLATION else source_type})",
234
  group=source_type,
235
  size=15,
236
  mass=1.5)
@@ -238,16 +292,23 @@ def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
238
 
239
  # Add target node if not present
240
  if target_str not in added_nodes:
 
 
 
 
 
 
 
 
241
  net.add_node(target_str,
242
- label= target_str,
243
  shape="dot",
244
  font={'color': '#000000', 'size': 12},
245
- title=f"{target_str} ({NODE_LABEL_TRANSLATION[target_type] if target_type in NODE_LABEL_TRANSLATION else target_type})",
246
  group=target_type,
247
  size=15,
248
  mass=1.5)
249
  added_nodes.add(target_str)
250
-
251
  # Add edge with relationship type and probability as label
252
  edge_label = f"{relation_type}"
253
  if probability is not None:
 
1
  from pyvis.network import Network
2
  import os
3
+ import json
4
+ import gzip
5
 
6
  NODE_TYPE_COLORS = {
7
  'Disease': '#079dbb',
 
50
  'Cellular Component': 'GO_term_C'
51
  }
52
 
53
+ def get_node_url(node_type, node_id):
54
+ """Get the URL for a node based on its type and ID"""
55
+ if node_type.startswith('GO_term'):
56
+ return f"https://www.ebi.ac.uk/QuickGO/term/{node_id}"
57
+ elif node_type == 'Protein':
58
+ return f"https://www.uniprot.org/uniprotkb/{node_id}/entry"
59
+ elif node_type == 'Disease':
60
+ if ':' in node_id:
61
+ ontology = node_id.split(':')[0]
62
+ if ontology == 'EFO':
63
+ return f"http://www.ebi.ac.uk/efo/EFO_{node_id.split(':')[1]}"
64
+ elif ontology == 'MONDO':
65
+ return f'http://purl.obolibrary.org/obo/MONDO_{node_id.split(":")[1]}'
66
+ elif ontology == 'Orphanet':
67
+ return f"http://www.orpha.net/ORDO/Orphanet_{node_id.split(':')[1]}"
68
+ else:
69
+ return f"https://www.genome.jp/entry/{node_id}"
70
+ elif node_type == 'HPO':
71
+ return f"https://hpo.jax.org/browse/term/{node_id}"
72
+ elif node_type == 'Drug':
73
+ return f"https://go.drugbank.com/drugs/{node_id}"
74
+ elif node_type == 'Compound':
75
+ return f"https://www.ebi.ac.uk/chembl/explore/compound/{node_id}"
76
+ elif node_type == 'Domain':
77
+ return f"https://www.ebi.ac.uk/interpro/entry/InterPro/{node_id}"
78
+ elif node_type == 'Pathway':
79
+ return f"https://reactome.org/content/detail/{node_id}"
80
+ elif node_type == 'kegg_Pathway':
81
+ return f"https://www.genome.jp/pathway/{node_id}"
82
+ elif node_type == 'EC_number':
83
+ return f"https://enzyme.expasy.org/EC/{node_id}"
84
+ else:
85
+ return None
86
+
87
  def _gather_protein_edges(data, protein_id):
88
 
89
  protein_idx = data['Protein']['id_mapping'][protein_id]
 
170
 
171
 
172
  def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
173
+
174
+ with gzip.open('data/name_info.json.gz', 'rt', encoding='utf-8') as file:
175
+ name_info = json.load(file)
176
+
177
  protein_edges = _gather_protein_edges(data, protein_id)
178
  visualized_edges = _filter_edges(protein_id, protein_edges, prediction_df, limit)
179
  print(f'Edges to be visualized: {visualized_edges}')
 
188
  }
189
 
190
  # Convert groups_config to a JSON-compatible string
 
191
  groups_json = json.dumps(groups_config)
192
 
193
  # Configure physics options with settings for better clustering
 
235
  "groups": """ + groups_json + "}")
236
 
237
  # Add the main protein node
238
+ query_node_url = get_node_url('Protein', protein_id)
239
+ node_name = name_info['Protein'][protein_id]
240
+ query_node_title = f"{node_name} (Query Protein)"
241
+ if query_node_url:
242
+ query_node_title = f'<a href="{query_node_url}" target="_blank">{query_node_title}</a>'
243
+
244
  net.add_node(protein_id,
245
+ label=protein_id,
246
+ title=query_node_title,
247
  color={'background': 'white', 'border': '#c1121f'},
248
  borderWidth=4,
249
  shape="dot",
 
272
 
273
  # Add source node if not present
274
  if source_str not in added_nodes:
275
+ if not source_type.startswith('GO_term'):
276
+ node_name = name_info[source_type][source_str]
277
+ else:
278
+ node_name = name_info['GO_term'][source_str]
279
+ url = get_node_url(source_type, source_str)
280
+ title = f"{node_name} ({NODE_LABEL_TRANSLATION[source_type] if source_type in NODE_LABEL_TRANSLATION else source_type})"
281
+ if url:
282
+ title = f'<a href="{url}" target="_blank">{title}</a>'
283
  net.add_node(source_str,
284
+ label=source_str,
285
  shape="dot",
286
  font={'color': '#000000', 'size': 12},
287
+ title=title,
288
  group=source_type,
289
  size=15,
290
  mass=1.5)
 
292
 
293
  # Add target node if not present
294
  if target_str not in added_nodes:
295
+ if not target_type.startswith('GO_term'):
296
+ node_name = name_info[target_type][target_str]
297
+ else:
298
+ node_name = name_info['GO_term'][target_str]
299
+ url = get_node_url(target_type, target_str)
300
+ title = f"{node_name} ({NODE_LABEL_TRANSLATION[target_type] if target_type in NODE_LABEL_TRANSLATION else target_type})"
301
+ if url:
302
+ title = f'<a href="{url}" target="_blank">{title}</a>'
303
  net.add_node(target_str,
304
+ label=target_str,
305
  shape="dot",
306
  font={'color': '#000000', 'size': 12},
307
+ title=title,
308
  group=target_type,
309
  size=15,
310
  mass=1.5)
311
  added_nodes.add(target_str)
 
312
  # Add edge with relationship type and probability as label
313
  edge_label = f"{relation_type}"
314
  if probability is not None: