fix api calls
Browse files
app.py
CHANGED
@@ -4,27 +4,29 @@ import io
|
|
4 |
import numpy as np
|
5 |
import os
|
6 |
import traceback
|
7 |
-
# import spaces
|
8 |
from esm.sdk import client
|
9 |
-
from esm.sdk.api import ESM3InferenceClient, ESMProtein, GenerationConfig
|
10 |
from esm.utils.structure.protein_chain import ProteinChain
|
11 |
from Bio.Data import PDBData
|
12 |
import biotite.structure as bs
|
13 |
from biotite.structure.io import pdb
|
14 |
from esm.utils import residue_constants as RC
|
15 |
-
|
16 |
from dotenv import load_dotenv
|
|
|
17 |
|
18 |
load_dotenv()
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
22 |
raise ValueError("ESM_API_TOKEN environment variable is not set")
|
23 |
|
24 |
model = client(
|
25 |
-
model=
|
26 |
-
url=
|
27 |
-
token=
|
28 |
)
|
29 |
|
30 |
amino3to1 = {
|
@@ -34,6 +36,7 @@ amino3to1 = {
|
|
34 |
'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y'
|
35 |
}
|
36 |
|
|
|
37 |
def read_pdb_io(pdb_file):
|
38 |
if isinstance(pdb_file, io.StringIO):
|
39 |
pdb_content = pdb_file.getvalue()
|
@@ -129,15 +132,19 @@ def add_noise_to_coordinates(protein: ESMProtein, noise_level: float) -> ESMProt
|
|
129 |
def run_structure_prediction(protein: ESMProtein) -> ESMProtein:
|
130 |
structure_prediction_config = GenerationConfig(
|
131 |
track="structure",
|
132 |
-
num_steps=
|
133 |
temperature=0.7,
|
134 |
)
|
135 |
try:
|
136 |
-
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
|
|
139 |
else:
|
140 |
-
raise ValueError(f"Unexpected
|
141 |
except Exception as e:
|
142 |
print(f"Error during structure prediction: {str(e)}")
|
143 |
return None
|
@@ -149,15 +156,55 @@ def align_after_prediction(protein: ESMProtein, structure_prediction: ESMProtein
|
|
149 |
try:
|
150 |
structure_prediction_chain = structure_prediction.to_protein_chain()
|
151 |
protein_chain = protein.to_protein_chain()
|
152 |
-
|
153 |
-
|
154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
return ESMProtein.from_protein_chain(aligned_chain), crmsd
|
156 |
except AttributeError as e:
|
157 |
print(f"Error during alignment: {str(e)}")
|
158 |
print(f"Structure prediction type: {type(structure_prediction)}")
|
159 |
print(f"Structure prediction attributes: {dir(structure_prediction)}")
|
160 |
return None, float('inf')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
def prediction_visualization(pdb_file, num_runs: int, noise_level: float, num_frames: int):
|
163 |
protein = get_protein(pdb_file)
|
@@ -168,30 +215,18 @@ def prediction_visualization(pdb_file, num_runs: int, noise_level: float, num_fr
|
|
168 |
|
169 |
for i in range(num_runs):
|
170 |
structure_prediction = run_structure_prediction(noisy_protein)
|
171 |
-
|
172 |
-
|
173 |
-
|
|
|
174 |
|
175 |
if not runs:
|
176 |
return None, "No successful predictions"
|
177 |
|
178 |
-
best_aligned = sorted(runs)[0]
|
179 |
view = visualize_after_pred(protein, best_aligned[1])
|
180 |
return view, f"Best cRMSD: {best_aligned[0]:.4f}"
|
181 |
|
182 |
-
def visualize_after_pred(protein: ESMProtein, aligned: ESMProtein):
|
183 |
-
if aligned is None:
|
184 |
-
return py3Dmol.view(width=800, height=600)
|
185 |
-
|
186 |
-
view = py3Dmol.view(width=800, height=600)
|
187 |
-
view.addModel(protein.to_pdb_string(), "pdb")
|
188 |
-
view.setStyle({"cartoon": {"color": "lightgrey"}})
|
189 |
-
view.addModel(aligned.to_pdb_string(), "pdb")
|
190 |
-
view.setStyle({"model": 1}, {"cartoon": {"color": "lightgreen"}})
|
191 |
-
view.zoomTo()
|
192 |
-
return view
|
193 |
-
|
194 |
-
# @spaces.GPU()
|
195 |
def run_prediction(pdb_file, num_runs, noise_level, num_frames):
|
196 |
try:
|
197 |
if pdb_file is None:
|
@@ -199,7 +234,7 @@ def run_prediction(pdb_file, num_runs, noise_level, num_frames):
|
|
199 |
|
200 |
view, crmsd_text = prediction_visualization(pdb_file, num_runs, noise_level, num_frames)
|
201 |
if view is None:
|
202 |
-
return "No successful predictions were made. Try adjusting the parameters.", crmsd_text
|
203 |
|
204 |
html = view._make_html()
|
205 |
return f"""
|
|
|
4 |
import numpy as np
|
5 |
import os
|
6 |
import traceback
|
|
|
7 |
from esm.sdk import client
|
8 |
+
from esm.sdk.api import ESM3InferenceClient, ESMProtein, GenerationConfig, ESMProteinError
|
9 |
from esm.utils.structure.protein_chain import ProteinChain
|
10 |
from Bio.Data import PDBData
|
11 |
import biotite.structure as bs
|
12 |
from biotite.structure.io import pdb
|
13 |
from esm.utils import residue_constants as RC
|
14 |
+
import requests
|
15 |
from dotenv import load_dotenv
|
16 |
+
import torch
|
17 |
|
18 |
load_dotenv()
|
19 |
+
|
20 |
+
API_URL = "https://forge.evolutionaryscale.ai/api/v1"
|
21 |
+
MODEL = "esm3-open-2024-03"
|
22 |
+
API_TOKEN = os.environ.get("ESM_API_TOKEN")
|
23 |
+
if not API_TOKEN:
|
24 |
raise ValueError("ESM_API_TOKEN environment variable is not set")
|
25 |
|
26 |
model = client(
|
27 |
+
model=MODEL,
|
28 |
+
url=API_URL,
|
29 |
+
token="2x0lifRJCpo8klurAJtRom"
|
30 |
)
|
31 |
|
32 |
amino3to1 = {
|
|
|
36 |
'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y'
|
37 |
}
|
38 |
|
39 |
+
|
40 |
def read_pdb_io(pdb_file):
|
41 |
if isinstance(pdb_file, io.StringIO):
|
42 |
pdb_content = pdb_file.getvalue()
|
|
|
132 |
def run_structure_prediction(protein: ESMProtein) -> ESMProtein:
|
133 |
structure_prediction_config = GenerationConfig(
|
134 |
track="structure",
|
135 |
+
num_steps=10,
|
136 |
temperature=0.7,
|
137 |
)
|
138 |
try:
|
139 |
+
response = model.generate(protein, structure_prediction_config)
|
140 |
+
|
141 |
+
if isinstance(response, ESMProtein):
|
142 |
+
return response
|
143 |
+
elif isinstance(response, ESMProteinError):
|
144 |
+
print(f"ESMProteinError during structure prediction: {response.error_msg}")
|
145 |
+
return None
|
146 |
else:
|
147 |
+
raise ValueError(f"Unexpected response type: {type(response)}")
|
148 |
except Exception as e:
|
149 |
print(f"Error during structure prediction: {str(e)}")
|
150 |
return None
|
|
|
156 |
try:
|
157 |
structure_prediction_chain = structure_prediction.to_protein_chain()
|
158 |
protein_chain = protein.to_protein_chain()
|
159 |
+
|
160 |
+
# Ensure both chains have the same length
|
161 |
+
min_length = min(len(structure_prediction_chain.sequence), len(protein_chain.sequence))
|
162 |
+
structure_indices = np.arange(0, min_length)
|
163 |
+
|
164 |
+
# Perform alignment
|
165 |
+
aligned_chain = structure_prediction_chain.align(
|
166 |
+
protein_chain,
|
167 |
+
mobile_inds=structure_indices,
|
168 |
+
target_inds=structure_indices
|
169 |
+
)
|
170 |
+
|
171 |
+
# Calculate RMSD
|
172 |
+
crmsd = structure_prediction_chain.rmsd(
|
173 |
+
protein_chain,
|
174 |
+
mobile_inds=structure_indices,
|
175 |
+
target_inds=structure_indices
|
176 |
+
)
|
177 |
+
|
178 |
return ESMProtein.from_protein_chain(aligned_chain), crmsd
|
179 |
except AttributeError as e:
|
180 |
print(f"Error during alignment: {str(e)}")
|
181 |
print(f"Structure prediction type: {type(structure_prediction)}")
|
182 |
print(f"Structure prediction attributes: {dir(structure_prediction)}")
|
183 |
return None, float('inf')
|
184 |
+
except Exception as e:
|
185 |
+
print(f"Unexpected error during alignment: {str(e)}")
|
186 |
+
return None, float('inf')
|
187 |
+
|
188 |
+
def visualize_after_pred(protein: ESMProtein, aligned: ESMProtein):
|
189 |
+
if aligned is None:
|
190 |
+
return py3Dmol.view(width=800, height=600)
|
191 |
+
|
192 |
+
view = py3Dmol.view(width=800, height=600)
|
193 |
+
view.addModel(protein_to_pdb(protein), "pdb")
|
194 |
+
view.setStyle({"cartoon": {"color": "lightgrey"}})
|
195 |
+
view.addModel(protein_to_pdb(aligned), "pdb")
|
196 |
+
view.setStyle({"model": 1}, {"cartoon": {"color": "lightgreen"}})
|
197 |
+
view.zoomTo()
|
198 |
+
return view
|
199 |
+
|
200 |
+
def protein_to_pdb(protein: ESMProtein):
|
201 |
+
pdb_str = ""
|
202 |
+
for i, (aa, coords) in enumerate(zip(protein.sequence, protein.coordinates)):
|
203 |
+
for j, atom in enumerate(RC.atom_types):
|
204 |
+
if not torch.isnan(coords[j][0]):
|
205 |
+
x, y, z = coords[j].tolist()
|
206 |
+
pdb_str += f"ATOM {i*37+j+1:5d} {atom:3s} {aa:3s} A{i+1:4d} {x:8.3f}{y:8.3f}{z:8.3f}\n"
|
207 |
+
return pdb_str
|
208 |
|
209 |
def prediction_visualization(pdb_file, num_runs: int, noise_level: float, num_frames: int):
|
210 |
protein = get_protein(pdb_file)
|
|
|
215 |
|
216 |
for i in range(num_runs):
|
217 |
structure_prediction = run_structure_prediction(noisy_protein)
|
218 |
+
if structure_prediction is not None:
|
219 |
+
aligned, crmsd = align_after_prediction(protein, structure_prediction)
|
220 |
+
if aligned is not None:
|
221 |
+
runs.append((crmsd, aligned))
|
222 |
|
223 |
if not runs:
|
224 |
return None, "No successful predictions"
|
225 |
|
226 |
+
best_aligned = sorted(runs, key=lambda x: x[0])[0]
|
227 |
view = visualize_after_pred(protein, best_aligned[1])
|
228 |
return view, f"Best cRMSD: {best_aligned[0]:.4f}"
|
229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
def run_prediction(pdb_file, num_runs, noise_level, num_frames):
|
231 |
try:
|
232 |
if pdb_file is None:
|
|
|
234 |
|
235 |
view, crmsd_text = prediction_visualization(pdb_file, num_runs, noise_level, num_frames)
|
236 |
if view is None:
|
237 |
+
return "No successful predictions were made. Try adjusting the parameters or check the PDB file.", crmsd_text
|
238 |
|
239 |
html = view._make_html()
|
240 |
return f"""
|