Simon Duerr commited on
Commit
46124fc
·
1 Parent(s): a412119

fix voxelization issue with structures containing H

Browse files
Files changed (3) hide show
  1. app.py +67 -35
  2. utils/helpers.py +6 -4
  3. utils/voxelization.py +3 -1
app.py CHANGED
@@ -52,26 +52,35 @@ def update(inp, file, mode, custom_resids, clustering_threshold):
52
  return "pdb code must be 4 letters or Uniprot code does not match", ""
53
  identifier = os.path.basename(filepath)
54
  if mode == "All residues":
55
- print('using all residues')
56
  ids = get_all_protein_resids(filepath)
57
- elif len(custom_resids)!=0:
58
- print('using listed residues', custom_resids)
59
- ids=get_all_resids_from_list(filepath,custom_resids.replace(","," "))
60
  else:
61
- print('using metalbinding')
62
  ids = get_all_metalbinding_resids(filepath)
63
  print(filepath)
64
  print(ids)
65
  try:
66
  voxels, prot_centers, prot_N, prots = processStructures(filepath, ids)
67
  except Exception as e:
68
- return "Error", f"""<div class="text-center mt-4"> Something went wrong with the voxelization, reset custom residues and other input fiels and check error message <br> <br> <code>{e}</code></div>"""
 
 
 
 
69
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
70
  voxels.to(device)
71
-
72
  model = Model()
73
  model.to(device)
74
- model.load_state_dict(torch.load("weights/metal_0.5A_v3_d0.2_16Abox.pth", map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu")))
 
 
 
 
 
75
  model.eval()
76
  with warnings.catch_warnings():
77
  warnings.filterwarnings("ignore")
@@ -107,7 +116,6 @@ def update(inp, file, mode, custom_resids, clustering_threshold):
107
  )
108
 
109
 
110
-
111
  def read_mol(molpath):
112
  with open(molpath, "r") as fp:
113
  lines = fp.readlines()
@@ -171,7 +179,7 @@ def molecule(pdb, probes, cube):
171
  </div>
172
  <div class="px-4">
173
  <label for="pdbmetal" class="relative inline-flex items-center mb-4 cursor-pointer ">
174
- <input id="pdbmetal" type="checkbox" class="sr-only peer" checked>
175
  <div class="w-11 h-6 bg-gray-200 rounded-full peer peer-focus:ring-4 peer-focus:ring-blue-300 dark:peer-focus:ring-blue-800 dark:bg-gray-700 peer-checked:after:translate-x-full peer-checked:after:border-white after:absolute after:top-0.5 after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all dark:border-gray-600 peer-checked:bg-blue-600"></div>
176
  <span class="ml-3 text-sm font-medium text-gray-900 dark:text-gray-300">Show PDB metals</span>
177
  </label>
@@ -311,13 +319,11 @@ def molecule(pdb, probes, cube):
311
  allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
312
  allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
313
 
 
314
  def set_examples(example):
315
- n,code, resids = example
316
- return [
317
- n,
318
- code,
319
- resids
320
- ]
321
 
322
  metal3d = gr.Blocks()
323
 
@@ -325,40 +331,66 @@ with metal3d:
325
  gr.Markdown("# Metal3D")
326
  with gr.Tabs():
327
  with gr.TabItem("Input"):
328
- inp = gr.Textbox( placeholder="PDB Code or Uniprot identifier or upload file below", label="Input molecule"
 
 
329
  )
330
  file = gr.File(file_count="single", type="file")
331
-
332
  with gr.TabItem("Settings"):
333
  with gr.Row():
334
  mode = gr.Radio(
335
  ["All metalbinding residues (ASP, CYS, GLU, HIS)", "All residues"],
336
  label="Residues to use for prediction",
337
  )
338
- custom_resids = gr.Textbox(placeholder="Comma separated list of residues", label="Custom residues")
 
 
 
339
  with gr.Row():
340
- clustering_threshold = gr.Slider(minimum=0.15,maximum=1, value=0.15,step=0.05, label="Clustering threshold")
341
- distance_cutoff = gr.Slider(minimum=1,maximum=10, value=7,step=1, label="Clustering distance cutoff")
 
 
 
 
 
 
 
 
 
 
 
 
342
  btn = gr.Button("Run")
343
- n = gr.Textbox(label="Label",visible=False)
344
- examples = gr.Dataset(components=[n,inp, custom_resids],
345
- samples=[["HCA2", "2CBA", ""],
346
- ["Nickel in GB1 dimer", "6F5N", ""],
347
- ["Zebrafish palmitoyltransferase ZDHHC15B PDB", "6BMS", ""],
348
- ["Human palmitoyltransferase ZDHHC23 AlphaFold", "Q8IYP9", "280,273,263,260,274,277,274,287"]],
349
- )
 
 
 
 
 
 
 
350
  examples.click(fn=set_examples, inputs=examples, outputs=examples.components)
351
- #gr.Markdown(
352
  # """ <small>Inference using CPU-only, can be quite slow for more than 20 residues. Use Colab notebook for GPU acceleration</small>
353
- #"""
354
- #)
355
-
356
 
357
  gr.Markdown("# Output")
358
-
359
  out = gr.Textbox(label="status")
360
  mol = gr.HTML()
361
- btn.click(fn=update, inputs=[inp, file, mode, custom_resids, clustering_threshold], outputs=[out, mol])
 
 
 
 
362
 
363
  metal3d.launch(share=True)
364
-
 
52
  return "pdb code must be 4 letters or Uniprot code does not match", ""
53
  identifier = os.path.basename(filepath)
54
  if mode == "All residues":
55
+ print("using all residues")
56
  ids = get_all_protein_resids(filepath)
57
+ elif len(custom_resids) != 0:
58
+ print("using listed residues", custom_resids)
59
+ ids = get_all_resids_from_list(filepath, custom_resids.replace(",", " "))
60
  else:
61
+ print("using metalbinding")
62
  ids = get_all_metalbinding_resids(filepath)
63
  print(filepath)
64
  print(ids)
65
  try:
66
  voxels, prot_centers, prot_N, prots = processStructures(filepath, ids)
67
  except Exception as e:
68
+ print(e)
69
+ return (
70
+ "Error",
71
+ f"""<div class="text-center mt-4"> Something went wrong with the voxelization, reset custom residues and other input fiels and check error message <br> <br> <code>{e}</code></div>""",
72
+ )
73
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
74
  voxels.to(device)
75
+
76
  model = Model()
77
  model.to(device)
78
+ model.load_state_dict(
79
+ torch.load(
80
+ "weights/metal_0.5A_v3_d0.2_16Abox.pth",
81
+ map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
82
+ )
83
+ )
84
  model.eval()
85
  with warnings.catch_warnings():
86
  warnings.filterwarnings("ignore")
 
116
  )
117
 
118
 
 
119
  def read_mol(molpath):
120
  with open(molpath, "r") as fp:
121
  lines = fp.readlines()
 
179
  </div>
180
  <div class="px-4">
181
  <label for="pdbmetal" class="relative inline-flex items-center mb-4 cursor-pointer ">
182
+ <input id="pdbmetal" type="checkbox" class="sr-only peer">
183
  <div class="w-11 h-6 bg-gray-200 rounded-full peer peer-focus:ring-4 peer-focus:ring-blue-300 dark:peer-focus:ring-blue-800 dark:bg-gray-700 peer-checked:after:translate-x-full peer-checked:after:border-white after:absolute after:top-0.5 after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all dark:border-gray-600 peer-checked:bg-blue-600"></div>
184
  <span class="ml-3 text-sm font-medium text-gray-900 dark:text-gray-300">Show PDB metals</span>
185
  </label>
 
319
  allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
320
  allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
321
 
322
+
323
  def set_examples(example):
324
+ n, code, resids = example
325
+ return [n, code, resids]
326
+
 
 
 
327
 
328
  metal3d = gr.Blocks()
329
 
 
331
  gr.Markdown("# Metal3D")
332
  with gr.Tabs():
333
  with gr.TabItem("Input"):
334
+ inp = gr.Textbox(
335
+ placeholder="PDB Code or Uniprot identifier or upload file below",
336
+ label="Input molecule",
337
  )
338
  file = gr.File(file_count="single", type="file")
339
+
340
  with gr.TabItem("Settings"):
341
  with gr.Row():
342
  mode = gr.Radio(
343
  ["All metalbinding residues (ASP, CYS, GLU, HIS)", "All residues"],
344
  label="Residues to use for prediction",
345
  )
346
+ custom_resids = gr.Textbox(
347
+ placeholder="Comma separated list of residues",
348
+ label="Custom residues",
349
+ )
350
  with gr.Row():
351
+ clustering_threshold = gr.Slider(
352
+ minimum=0.15,
353
+ maximum=1,
354
+ value=0.15,
355
+ step=0.05,
356
+ label="Clustering threshold",
357
+ )
358
+ distance_cutoff = gr.Slider(
359
+ minimum=1,
360
+ maximum=10,
361
+ value=7,
362
+ step=1,
363
+ label="Clustering distance cutoff",
364
+ )
365
  btn = gr.Button("Run")
366
+ n = gr.Textbox(label="Label", visible=False)
367
+ examples = gr.Dataset(
368
+ components=[n, inp, custom_resids],
369
+ samples=[
370
+ ["HCA2", "2CBA", ""],
371
+ ["Nickel in GB1 dimer", "6F5N", ""],
372
+ ["Zebrafish palmitoyltransferase ZDHHC15B PDB", "6BMS", ""],
373
+ [
374
+ "Human palmitoyltransferase ZDHHC23 AlphaFold",
375
+ "Q8IYP9",
376
+ "280,273,263,260,274,277,274,287",
377
+ ],
378
+ ],
379
+ )
380
  examples.click(fn=set_examples, inputs=examples, outputs=examples.components)
381
+ # gr.Markdown(
382
  # """ <small>Inference using CPU-only, can be quite slow for more than 20 residues. Use Colab notebook for GPU acceleration</small>
383
+ # """
384
+ # )
 
385
 
386
  gr.Markdown("# Output")
387
+
388
  out = gr.Textbox(label="status")
389
  mol = gr.HTML()
390
+ btn.click(
391
+ fn=update,
392
+ inputs=[inp, file, mode, custom_resids, clustering_threshold],
393
+ outputs=[out, mol],
394
+ )
395
 
396
  metal3d.launch(share=True)
 
utils/helpers.py CHANGED
@@ -89,7 +89,7 @@ def get_all_protein_resids(pdb_file):
89
  prot = Molecule(pdb_file)
90
  except:
91
  exit("could not read file")
92
- prot.filter("protein")
93
  return prot.get("index", sel="name CA")
94
 
95
 
@@ -112,12 +112,13 @@ def get_all_metalbinding_resids(pdb_file):
112
  prot = Molecule(pdb_file)
113
  except:
114
  exit("could not read file")
115
- prot.filter("protein")
116
  return prot.get(
117
  "index",
118
  sel="name CA and resname HIS HID HIE HIP CYS CYX GLU GLH GLN ASP ASH ASN GLN MET",
119
  )
120
 
 
121
  def get_all_resids_from_list(pdb_file, resids):
122
  """Return all metal binding residues from a pdb file
123
 
@@ -131,7 +132,7 @@ def get_all_resids_from_list(pdb_file, resids):
131
  Returns
132
  -------
133
  resids : numpy.ndarray
134
- indexes of name CA resids
135
 
136
  """
137
 
@@ -139,12 +140,13 @@ def get_all_resids_from_list(pdb_file, resids):
139
  prot = Molecule(pdb_file)
140
  except:
141
  exit("could not read file")
142
- prot.filter("protein")
143
  return prot.get(
144
  "index",
145
  sel=f"name CA and resid {resids}",
146
  )
147
 
 
148
  def compute_average_p_fast(point, cutoff=1):
149
  """Using KDTree find the closest gridpoints
150
 
 
89
  prot = Molecule(pdb_file)
90
  except:
91
  exit("could not read file")
92
+ prot.filter("protein and not hydrogen")
93
  return prot.get("index", sel="name CA")
94
 
95
 
 
112
  prot = Molecule(pdb_file)
113
  except:
114
  exit("could not read file")
115
+ prot.filter("protein and not hydrogen")
116
  return prot.get(
117
  "index",
118
  sel="name CA and resname HIS HID HIE HIP CYS CYX GLU GLH GLN ASP ASH ASN GLN MET",
119
  )
120
 
121
+
122
  def get_all_resids_from_list(pdb_file, resids):
123
  """Return all metal binding residues from a pdb file
124
 
 
132
  Returns
133
  -------
134
  resids : numpy.ndarray
135
+ indexes of name CA resids
136
 
137
  """
138
 
 
140
  prot = Molecule(pdb_file)
141
  except:
142
  exit("could not read file")
143
+ prot.filter("protein and not hydrogen")
144
  return prot.get(
145
  "index",
146
  sel=f"name CA and resid {resids}",
147
  )
148
 
149
+
150
  def compute_average_p_fast(point, cutoff=1):
151
  """Using KDTree find the closest gridpoints
152
 
utils/voxelization.py CHANGED
@@ -136,7 +136,9 @@ def voxelize_single_notcentered(env):
136
  voxelsize=0.5,
137
  validitychecks=False,
138
  )
139
- except:
 
 
140
  raise VoxelizationError(f"voxelization of {id} failed")
141
  nchannels = prot_vox.shape[1]
142
  prot_vox_t = (
 
136
  voxelsize=0.5,
137
  validitychecks=False,
138
  )
139
+ except Exception as e:
140
+ print(e)
141
+ print(id)
142
  raise VoxelizationError(f"voxelization of {id} failed")
143
  nchannels = prot_vox.shape[1]
144
  prot_vox_t = (