Spaces:
Runtime error
Runtime error
Simon Duerr
commited on
Commit
·
46124fc
1
Parent(s):
a412119
fix voxelization issue with structures containing H
Browse files- app.py +67 -35
- utils/helpers.py +6 -4
- utils/voxelization.py +3 -1
app.py
CHANGED
@@ -52,26 +52,35 @@ def update(inp, file, mode, custom_resids, clustering_threshold):
|
|
52 |
return "pdb code must be 4 letters or Uniprot code does not match", ""
|
53 |
identifier = os.path.basename(filepath)
|
54 |
if mode == "All residues":
|
55 |
-
print(
|
56 |
ids = get_all_protein_resids(filepath)
|
57 |
-
elif len(custom_resids)!=0:
|
58 |
-
print(
|
59 |
-
ids=get_all_resids_from_list(filepath,custom_resids.replace(","," "))
|
60 |
else:
|
61 |
-
print(
|
62 |
ids = get_all_metalbinding_resids(filepath)
|
63 |
print(filepath)
|
64 |
print(ids)
|
65 |
try:
|
66 |
voxels, prot_centers, prot_N, prots = processStructures(filepath, ids)
|
67 |
except Exception as e:
|
68 |
-
|
|
|
|
|
|
|
|
|
69 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
70 |
voxels.to(device)
|
71 |
-
|
72 |
model = Model()
|
73 |
model.to(device)
|
74 |
-
model.load_state_dict(
|
|
|
|
|
|
|
|
|
|
|
75 |
model.eval()
|
76 |
with warnings.catch_warnings():
|
77 |
warnings.filterwarnings("ignore")
|
@@ -107,7 +116,6 @@ def update(inp, file, mode, custom_resids, clustering_threshold):
|
|
107 |
)
|
108 |
|
109 |
|
110 |
-
|
111 |
def read_mol(molpath):
|
112 |
with open(molpath, "r") as fp:
|
113 |
lines = fp.readlines()
|
@@ -171,7 +179,7 @@ def molecule(pdb, probes, cube):
|
|
171 |
</div>
|
172 |
<div class="px-4">
|
173 |
<label for="pdbmetal" class="relative inline-flex items-center mb-4 cursor-pointer ">
|
174 |
-
<input id="pdbmetal" type="checkbox" class="sr-only peer"
|
175 |
<div class="w-11 h-6 bg-gray-200 rounded-full peer peer-focus:ring-4 peer-focus:ring-blue-300 dark:peer-focus:ring-blue-800 dark:bg-gray-700 peer-checked:after:translate-x-full peer-checked:after:border-white after:absolute after:top-0.5 after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all dark:border-gray-600 peer-checked:bg-blue-600"></div>
|
176 |
<span class="ml-3 text-sm font-medium text-gray-900 dark:text-gray-300">Show PDB metals</span>
|
177 |
</label>
|
@@ -311,13 +319,11 @@ def molecule(pdb, probes, cube):
|
|
311 |
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
|
312 |
allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
|
313 |
|
|
|
314 |
def set_examples(example):
|
315 |
-
n,code, resids = example
|
316 |
-
return [
|
317 |
-
|
318 |
-
code,
|
319 |
-
resids
|
320 |
-
]
|
321 |
|
322 |
metal3d = gr.Blocks()
|
323 |
|
@@ -325,40 +331,66 @@ with metal3d:
|
|
325 |
gr.Markdown("# Metal3D")
|
326 |
with gr.Tabs():
|
327 |
with gr.TabItem("Input"):
|
328 |
-
inp = gr.Textbox(
|
|
|
|
|
329 |
)
|
330 |
file = gr.File(file_count="single", type="file")
|
331 |
-
|
332 |
with gr.TabItem("Settings"):
|
333 |
with gr.Row():
|
334 |
mode = gr.Radio(
|
335 |
["All metalbinding residues (ASP, CYS, GLU, HIS)", "All residues"],
|
336 |
label="Residues to use for prediction",
|
337 |
)
|
338 |
-
custom_resids = gr.Textbox(
|
|
|
|
|
|
|
339 |
with gr.Row():
|
340 |
-
clustering_threshold = gr.Slider(
|
341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
btn = gr.Button("Run")
|
343 |
-
n = gr.Textbox(label="Label",visible=False)
|
344 |
-
examples = gr.Dataset(
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
examples.click(fn=set_examples, inputs=examples, outputs=examples.components)
|
351 |
-
#gr.Markdown(
|
352 |
# """ <small>Inference using CPU-only, can be quite slow for more than 20 residues. Use Colab notebook for GPU acceleration</small>
|
353 |
-
#"""
|
354 |
-
#)
|
355 |
-
|
356 |
|
357 |
gr.Markdown("# Output")
|
358 |
-
|
359 |
out = gr.Textbox(label="status")
|
360 |
mol = gr.HTML()
|
361 |
-
btn.click(
|
|
|
|
|
|
|
|
|
362 |
|
363 |
metal3d.launch(share=True)
|
364 |
-
|
|
|
52 |
return "pdb code must be 4 letters or Uniprot code does not match", ""
|
53 |
identifier = os.path.basename(filepath)
|
54 |
if mode == "All residues":
|
55 |
+
print("using all residues")
|
56 |
ids = get_all_protein_resids(filepath)
|
57 |
+
elif len(custom_resids) != 0:
|
58 |
+
print("using listed residues", custom_resids)
|
59 |
+
ids = get_all_resids_from_list(filepath, custom_resids.replace(",", " "))
|
60 |
else:
|
61 |
+
print("using metalbinding")
|
62 |
ids = get_all_metalbinding_resids(filepath)
|
63 |
print(filepath)
|
64 |
print(ids)
|
65 |
try:
|
66 |
voxels, prot_centers, prot_N, prots = processStructures(filepath, ids)
|
67 |
except Exception as e:
|
68 |
+
print(e)
|
69 |
+
return (
|
70 |
+
"Error",
|
71 |
+
f"""<div class="text-center mt-4"> Something went wrong with the voxelization, reset custom residues and other input fiels and check error message <br> <br> <code>{e}</code></div>""",
|
72 |
+
)
|
73 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
74 |
voxels.to(device)
|
75 |
+
|
76 |
model = Model()
|
77 |
model.to(device)
|
78 |
+
model.load_state_dict(
|
79 |
+
torch.load(
|
80 |
+
"weights/metal_0.5A_v3_d0.2_16Abox.pth",
|
81 |
+
map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
|
82 |
+
)
|
83 |
+
)
|
84 |
model.eval()
|
85 |
with warnings.catch_warnings():
|
86 |
warnings.filterwarnings("ignore")
|
|
|
116 |
)
|
117 |
|
118 |
|
|
|
119 |
def read_mol(molpath):
|
120 |
with open(molpath, "r") as fp:
|
121 |
lines = fp.readlines()
|
|
|
179 |
</div>
|
180 |
<div class="px-4">
|
181 |
<label for="pdbmetal" class="relative inline-flex items-center mb-4 cursor-pointer ">
|
182 |
+
<input id="pdbmetal" type="checkbox" class="sr-only peer">
|
183 |
<div class="w-11 h-6 bg-gray-200 rounded-full peer peer-focus:ring-4 peer-focus:ring-blue-300 dark:peer-focus:ring-blue-800 dark:bg-gray-700 peer-checked:after:translate-x-full peer-checked:after:border-white after:absolute after:top-0.5 after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all dark:border-gray-600 peer-checked:bg-blue-600"></div>
|
184 |
<span class="ml-3 text-sm font-medium text-gray-900 dark:text-gray-300">Show PDB metals</span>
|
185 |
</label>
|
|
|
319 |
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
|
320 |
allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
|
321 |
|
322 |
+
|
323 |
def set_examples(example):
|
324 |
+
n, code, resids = example
|
325 |
+
return [n, code, resids]
|
326 |
+
|
|
|
|
|
|
|
327 |
|
328 |
metal3d = gr.Blocks()
|
329 |
|
|
|
331 |
gr.Markdown("# Metal3D")
|
332 |
with gr.Tabs():
|
333 |
with gr.TabItem("Input"):
|
334 |
+
inp = gr.Textbox(
|
335 |
+
placeholder="PDB Code or Uniprot identifier or upload file below",
|
336 |
+
label="Input molecule",
|
337 |
)
|
338 |
file = gr.File(file_count="single", type="file")
|
339 |
+
|
340 |
with gr.TabItem("Settings"):
|
341 |
with gr.Row():
|
342 |
mode = gr.Radio(
|
343 |
["All metalbinding residues (ASP, CYS, GLU, HIS)", "All residues"],
|
344 |
label="Residues to use for prediction",
|
345 |
)
|
346 |
+
custom_resids = gr.Textbox(
|
347 |
+
placeholder="Comma separated list of residues",
|
348 |
+
label="Custom residues",
|
349 |
+
)
|
350 |
with gr.Row():
|
351 |
+
clustering_threshold = gr.Slider(
|
352 |
+
minimum=0.15,
|
353 |
+
maximum=1,
|
354 |
+
value=0.15,
|
355 |
+
step=0.05,
|
356 |
+
label="Clustering threshold",
|
357 |
+
)
|
358 |
+
distance_cutoff = gr.Slider(
|
359 |
+
minimum=1,
|
360 |
+
maximum=10,
|
361 |
+
value=7,
|
362 |
+
step=1,
|
363 |
+
label="Clustering distance cutoff",
|
364 |
+
)
|
365 |
btn = gr.Button("Run")
|
366 |
+
n = gr.Textbox(label="Label", visible=False)
|
367 |
+
examples = gr.Dataset(
|
368 |
+
components=[n, inp, custom_resids],
|
369 |
+
samples=[
|
370 |
+
["HCA2", "2CBA", ""],
|
371 |
+
["Nickel in GB1 dimer", "6F5N", ""],
|
372 |
+
["Zebrafish palmitoyltransferase ZDHHC15B PDB", "6BMS", ""],
|
373 |
+
[
|
374 |
+
"Human palmitoyltransferase ZDHHC23 AlphaFold",
|
375 |
+
"Q8IYP9",
|
376 |
+
"280,273,263,260,274,277,274,287",
|
377 |
+
],
|
378 |
+
],
|
379 |
+
)
|
380 |
examples.click(fn=set_examples, inputs=examples, outputs=examples.components)
|
381 |
+
# gr.Markdown(
|
382 |
# """ <small>Inference using CPU-only, can be quite slow for more than 20 residues. Use Colab notebook for GPU acceleration</small>
|
383 |
+
# """
|
384 |
+
# )
|
|
|
385 |
|
386 |
gr.Markdown("# Output")
|
387 |
+
|
388 |
out = gr.Textbox(label="status")
|
389 |
mol = gr.HTML()
|
390 |
+
btn.click(
|
391 |
+
fn=update,
|
392 |
+
inputs=[inp, file, mode, custom_resids, clustering_threshold],
|
393 |
+
outputs=[out, mol],
|
394 |
+
)
|
395 |
|
396 |
metal3d.launch(share=True)
|
|
utils/helpers.py
CHANGED
@@ -89,7 +89,7 @@ def get_all_protein_resids(pdb_file):
|
|
89 |
prot = Molecule(pdb_file)
|
90 |
except:
|
91 |
exit("could not read file")
|
92 |
-
prot.filter("protein")
|
93 |
return prot.get("index", sel="name CA")
|
94 |
|
95 |
|
@@ -112,12 +112,13 @@ def get_all_metalbinding_resids(pdb_file):
|
|
112 |
prot = Molecule(pdb_file)
|
113 |
except:
|
114 |
exit("could not read file")
|
115 |
-
prot.filter("protein")
|
116 |
return prot.get(
|
117 |
"index",
|
118 |
sel="name CA and resname HIS HID HIE HIP CYS CYX GLU GLH GLN ASP ASH ASN GLN MET",
|
119 |
)
|
120 |
|
|
|
121 |
def get_all_resids_from_list(pdb_file, resids):
|
122 |
"""Return all metal binding residues from a pdb file
|
123 |
|
@@ -131,7 +132,7 @@ def get_all_resids_from_list(pdb_file, resids):
|
|
131 |
Returns
|
132 |
-------
|
133 |
resids : numpy.ndarray
|
134 |
-
indexes of name CA resids
|
135 |
|
136 |
"""
|
137 |
|
@@ -139,12 +140,13 @@ def get_all_resids_from_list(pdb_file, resids):
|
|
139 |
prot = Molecule(pdb_file)
|
140 |
except:
|
141 |
exit("could not read file")
|
142 |
-
prot.filter("protein")
|
143 |
return prot.get(
|
144 |
"index",
|
145 |
sel=f"name CA and resid {resids}",
|
146 |
)
|
147 |
|
|
|
148 |
def compute_average_p_fast(point, cutoff=1):
|
149 |
"""Using KDTree find the closest gridpoints
|
150 |
|
|
|
89 |
prot = Molecule(pdb_file)
|
90 |
except:
|
91 |
exit("could not read file")
|
92 |
+
prot.filter("protein and not hydrogen")
|
93 |
return prot.get("index", sel="name CA")
|
94 |
|
95 |
|
|
|
112 |
prot = Molecule(pdb_file)
|
113 |
except:
|
114 |
exit("could not read file")
|
115 |
+
prot.filter("protein and not hydrogen")
|
116 |
return prot.get(
|
117 |
"index",
|
118 |
sel="name CA and resname HIS HID HIE HIP CYS CYX GLU GLH GLN ASP ASH ASN GLN MET",
|
119 |
)
|
120 |
|
121 |
+
|
122 |
def get_all_resids_from_list(pdb_file, resids):
|
123 |
"""Return all metal binding residues from a pdb file
|
124 |
|
|
|
132 |
Returns
|
133 |
-------
|
134 |
resids : numpy.ndarray
|
135 |
+
indexes of name CA resids
|
136 |
|
137 |
"""
|
138 |
|
|
|
140 |
prot = Molecule(pdb_file)
|
141 |
except:
|
142 |
exit("could not read file")
|
143 |
+
prot.filter("protein and not hydrogen")
|
144 |
return prot.get(
|
145 |
"index",
|
146 |
sel=f"name CA and resid {resids}",
|
147 |
)
|
148 |
|
149 |
+
|
150 |
def compute_average_p_fast(point, cutoff=1):
|
151 |
"""Using KDTree find the closest gridpoints
|
152 |
|
utils/voxelization.py
CHANGED
@@ -136,7 +136,9 @@ def voxelize_single_notcentered(env):
|
|
136 |
voxelsize=0.5,
|
137 |
validitychecks=False,
|
138 |
)
|
139 |
-
except:
|
|
|
|
|
140 |
raise VoxelizationError(f"voxelization of {id} failed")
|
141 |
nchannels = prot_vox.shape[1]
|
142 |
prot_vox_t = (
|
|
|
136 |
voxelsize=0.5,
|
137 |
validitychecks=False,
|
138 |
)
|
139 |
+
except Exception as e:
|
140 |
+
print(e)
|
141 |
+
print(id)
|
142 |
raise VoxelizationError(f"voxelization of {id} failed")
|
143 |
nchannels = prot_vox.shape[1]
|
144 |
prot_vox_t = (
|