Merge branch 'main' of https://huggingface.co/spaces/MISATO-dataset/esm3-conformity-sampling
Browse files
app.py
CHANGED
@@ -16,6 +16,8 @@ from dotenv import load_dotenv
|
|
16 |
import torch
|
17 |
import json
|
18 |
import time
|
|
|
|
|
19 |
|
20 |
load_dotenv()
|
21 |
|
@@ -39,6 +41,49 @@ amino3to1 = {
|
|
39 |
}
|
40 |
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
def read_pdb_io(pdb_file):
|
43 |
if isinstance(pdb_file, io.StringIO):
|
44 |
pdb_content = pdb_file.getvalue()
|
@@ -242,6 +287,7 @@ def run_prediction(pdb_file, num_runs, noise_level, num_frames, progress=gr.Prog
|
|
242 |
|
243 |
progress(0, desc="Starting prediction")
|
244 |
view_data, crmsd_text = prediction_visualization(pdb_file, num_runs, noise_level, num_frames, progress)
|
|
|
245 |
if view_data is None:
|
246 |
return "No successful predictions were made. Try adjusting the parameters or check the PDB file.", crmsd_text
|
247 |
|
@@ -260,7 +306,7 @@ def run_prediction(pdb_file, num_runs, noise_level, num_frames, progress=gr.Prog
|
|
260 |
</div>
|
261 |
"""
|
262 |
progress(1.0, desc="Completed")
|
263 |
-
return html_content, crmsd_text
|
264 |
except Exception as e:
|
265 |
error_message = str(e)
|
266 |
stack_trace = traceback.format_exc()
|
@@ -304,6 +350,7 @@ def create_demo():
|
|
304 |
5. Click the "Run Prediction" button to start the process.
|
305 |
6. The 3D visualization will show the original structure (grey) and the best predicted structure (green).
|
306 |
7. The alignment result will display the best cRMSD (lower is better).
|
|
|
307 |
|
308 |
## About
|
309 |
This demo uses the ESM3 model to predict protein structures from PDB files.
|
|
|
16 |
import torch
|
17 |
import json
|
18 |
import time
|
19 |
+
from Bio.PDB import PDBParser
|
20 |
+
import itertools
|
21 |
|
22 |
load_dotenv()
|
23 |
|
|
|
41 |
}
|
42 |
|
43 |
|
44 |
+
# Covalent radii dictionary
|
45 |
+
COVALENT_RADIUS = {
|
46 |
+
"H": 0.31, "HE": 0.28, "LI": 1.28, "BE": 0.96, "B": 0.84, "C": 0.76, "N": 0.71, "O": 0.66, "F": 0.57, "NE": 0.58,
|
47 |
+
"NA": 1.66, "MG": 1.41, "AL": 1.21, "SI": 1.11, "P": 1.07, "S": 1.05, "CL": 1.02, "AR": 1.06, "K": 2.03,
|
48 |
+
"CA": 1.76, "SC": 1.7, "TI": 1.6, "V": 1.53, "CR": 1.39, "MN": 1.5, "FE": 1.42, "CO": 1.38, "NI": 1.24,
|
49 |
+
"CU": 1.32, "ZN": 1.22, "GA": 1.22, "GE": 1.2, "AS": 1.19, "SE": 1.2, "BR": 1.2, "KR": 1.16, "RB": 2.2,
|
50 |
+
"SR": 1.95, "Y": 1.9, "ZR": 1.75, "NB": 1.64, "MO": 1.54, "TC": 1.47, "RU": 1.46, "RH": 1.42, "PD": 1.39,
|
51 |
+
"AG": 1.45, "CD": 1.44, "IN": 1.42, "SN": 1.39, "SB": 1.39, "TE": 1.38, "I": 1.39, "XE": 1.4, "CS": 2.44,
|
52 |
+
"BA": 2.15, "LA": 2.07, "CE": 2.04, "PR": 2.03, "ND": 2.01, "PM": 1.99, "SM": 1.98, "EU": 1.98, "GD": 1.96,
|
53 |
+
"TB": 1.94, "DY": 1.92, "HO": 1.92, "ER": 1.89, "TM": 1.9, "YB": 1.87, "LU": 1.87, "HF": 1.75, "TA": 1.7,
|
54 |
+
"W": 1.62, "RE": 1.51, "OS": 1.44, "IR": 1.41, "PT": 1.36, "AU": 1.36, "HG": 1.32, "TL": 1.45, "PB": 1.46,
|
55 |
+
"BI": 1.48, "PO": 1.4, "AT": 1.5, "RN": 1.5, "FR": 2.6, "RA": 2.21, "AC": 2.15, "TH": 2.06, "PA": 2.0,
|
56 |
+
"U": 1.96, "NP": 1.9, "PU": 1.87, "AM": 1.8, "CM": 1.69, "BK": 2.0, "CF": 2.0, "ES": 2.0, "FM": 2.0,
|
57 |
+
"MD": 2.0, "NO": 2.0, "LR": 2.0, "RF": 2.0, "DB": 2.0, "SG": 2.0, "BH": 2.0, "HS": 2.0, "MT": 2.0,
|
58 |
+
"DS": 2.0, "RG": 2.0, "CN": 2.0, "UUT": 2.0, "UUQ": 2.0, "UUP": 2.0, "UUH": 2.0, "UUS": 2.0, "UUO": 2.0
|
59 |
+
}
|
60 |
+
|
61 |
+
# Function to get the covalent radius of an atom
|
62 |
+
def get_covalent_radius(atom):
|
63 |
+
element = atom.element.upper()
|
64 |
+
return COVALENT_RADIUS.get(element, 2.0) # Default to 2.0 Å if element is not in the dictionary
|
65 |
+
|
66 |
+
def calculate_clashes_for_pdb(pdb_file):
|
67 |
+
parser = PDBParser(QUIET=True)
|
68 |
+
structure = parser.get_structure("protein", pdb_file)
|
69 |
+
atoms = list(structure.get_atoms())
|
70 |
+
steric_clash_count = 0
|
71 |
+
|
72 |
+
num_atoms = len(atoms)
|
73 |
+
|
74 |
+
# Check atom pairs for steric clashes
|
75 |
+
for atom1, atom2 in itertools.combinations(atoms, 2):
|
76 |
+
covalent_radius_sum = get_covalent_radius(atom1) + get_covalent_radius(atom2)
|
77 |
+
distance = atom1 - atom2 # Distance between atom1 and atom2
|
78 |
+
|
79 |
+
# Check if the distance is less than the sum of covalent radii
|
80 |
+
if distance + 0.5 < covalent_radius_sum:
|
81 |
+
steric_clash_count += 1
|
82 |
+
|
83 |
+
# Normalize steric clashes per number of atoms
|
84 |
+
norm_ster_clash_count = steric_clash_count / num_atoms
|
85 |
+
return f"Total steric clashes in {pdb_file}: {steric_clash_count}", f"Normalized steric clashes per atom in {pdb_file}: {norm_ster_clash_count}"
|
86 |
+
|
87 |
def read_pdb_io(pdb_file):
|
88 |
if isinstance(pdb_file, io.StringIO):
|
89 |
pdb_content = pdb_file.getvalue()
|
|
|
287 |
|
288 |
progress(0, desc="Starting prediction")
|
289 |
view_data, crmsd_text = prediction_visualization(pdb_file, num_runs, noise_level, num_frames, progress)
|
290 |
+
steric_clash_text, norm_steric_clas_text = calculate_clashes_for_pdb(pdb_file)
|
291 |
if view_data is None:
|
292 |
return "No successful predictions were made. Try adjusting the parameters or check the PDB file.", crmsd_text
|
293 |
|
|
|
306 |
</div>
|
307 |
"""
|
308 |
progress(1.0, desc="Completed")
|
309 |
+
return html_content, crmsd_text, steric_clash_text, norm_steric_clas_text
|
310 |
except Exception as e:
|
311 |
error_message = str(e)
|
312 |
stack_trace = traceback.format_exc()
|
|
|
350 |
5. Click the "Run Prediction" button to start the process.
|
351 |
6. The 3D visualization will show the original structure (grey) and the best predicted structure (green).
|
352 |
7. The alignment result will display the best cRMSD (lower is better).
|
353 |
+
8. Total and Normalized (per atom) steric clashes (lower is better)
|
354 |
|
355 |
## About
|
356 |
This demo uses the ESM3 model to predict protein structures from PDB files.
|