Spaces:
Running
on
L4
Running
on
L4
simonduerr
commited on
Commit
·
9d2e7c6
1
Parent(s):
3c09cd6
fix wrong chains and error for seq alignment
Browse files- __pycache__/msa.cpython-310.pyc +0 -0
- app.py +1 -1
- msa.py +43 -44
__pycache__/msa.cpython-310.pyc
ADDED
Binary file (6.83 kB). View file
|
|
app.py
CHANGED
@@ -128,7 +128,7 @@ with gr.Blocks() as blocks:
|
|
128 |
|
129 |
gr.Examples([
|
130 |
["TOP7",{"chains": [{"class": "protein","sequence": "MGDIQVQVNIDDNGKNFDYTYTVTTESELQKVLNELMDYIKKQGAKRVRISITARTKKEAEKFAAILIKVFAELGYNDINVTFDGDTVTVEGQLEGGSLEHHHHHH","chain": "A"}], "covMods":[]}],
|
131 |
-
["ApixacabanBinder", {"chains": [{"class": "protein","sequence": "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL","chain": "A"}, {"class":"ligand", "smiles":"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O", "chain": "
|
132 |
],
|
133 |
inputs = [jobname, inp]
|
134 |
)
|
|
|
128 |
|
129 |
gr.Examples([
|
130 |
["TOP7",{"chains": [{"class": "protein","sequence": "MGDIQVQVNIDDNGKNFDYTYTVTTESELQKVLNELMDYIKKQGAKRVRISITARTKKEAEKFAAILIKVFAELGYNDINVTFDGDTVTVEGQLEGGSLEHHHHHH","chain": "A"}], "covMods":[]}],
|
131 |
+
["ApixacabanBinder", {"chains": [{"class": "protein","sequence": "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL","chain": "A"}, {"class":"ligand", "smiles":"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O", "sdf":"","name":"","chain": "B"}], "covMods":[]}]
|
132 |
],
|
133 |
inputs = [jobname, inp]
|
134 |
)
|
msa.py
CHANGED
@@ -148,55 +148,54 @@ def run_mmseqs2(x, prefix, use_env=True, use_filter=True,
|
|
148 |
[seqs_unique.append(x) for x in seqs if x not in seqs_unique]
|
149 |
Ms = [N + seqs_unique.index(seq) for seq in seqs]
|
150 |
# lets do it!
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
158 |
out = submit(seqs_unique, mode, N)
|
159 |
-
while out["status"] in ["UNKNOWN", "RATELIMIT"]:
|
160 |
-
sleep_time = 5 + random.randint(0, 5)
|
161 |
-
logger.error(f"Sleeping for {sleep_time}s. Reason: {out['status']}")
|
162 |
-
# resubmit
|
163 |
-
time.sleep(sleep_time)
|
164 |
-
out = submit(seqs_unique, mode, N)
|
165 |
|
166 |
-
|
167 |
-
|
168 |
|
169 |
-
|
170 |
-
|
171 |
|
172 |
-
|
173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
pbar.set_description(out["status"])
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
if out["status"] == "ERROR":
|
195 |
-
REDO = False
|
196 |
-
raise Exception(f'MMseqs2 API is giving errors. Please confirm your input is a valid protein sequence. If error persists, please try again an hour later.')
|
197 |
-
|
198 |
-
# Download results
|
199 |
-
download(ID, tar_gz_file)
|
200 |
|
201 |
|
202 |
a3m_files = [f"{path}/uniref.a3m"]
|
|
|
148 |
[seqs_unique.append(x) for x in seqs if x not in seqs_unique]
|
149 |
Ms = [N + seqs_unique.index(seq) for seq in seqs]
|
150 |
# lets do it!
|
151 |
+
TIME_ESTIMATE = 150 * len(seqs_unique)
|
152 |
+
with tqdm(total=TIME_ESTIMATE, bar_format=TQDM_BAR_FORMAT) as pbar:
|
153 |
+
while REDO:
|
154 |
+
pbar.set_description("SUBMIT")
|
155 |
+
|
156 |
+
# Resubmit job until it goes through
|
157 |
+
out = submit(seqs_unique, mode, N)
|
158 |
+
while out["status"] in ["UNKNOWN", "RATELIMIT"]:
|
159 |
+
sleep_time = 5 + random.randint(0, 5)
|
160 |
+
logger.error(f"Sleeping for {sleep_time}s. Reason: {out['status']}")
|
161 |
+
# resubmit
|
162 |
+
time.sleep(sleep_time)
|
163 |
out = submit(seqs_unique, mode, N)
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
|
165 |
+
if out["status"] == "ERROR":
|
166 |
+
raise Exception(f'MMseqs2 API is giving errors. Please confirm your input is a valid protein sequence. If error persists, please try again an hour later.')
|
167 |
|
168 |
+
if out["status"] == "MAINTENANCE":
|
169 |
+
raise Exception(f'MMseqs2 API is undergoing maintenance. Please try again in a few minutes.')
|
170 |
|
171 |
+
# wait for job to finish
|
172 |
+
ID,TIME = out["id"],0
|
173 |
+
pbar.set_description(out["status"])
|
174 |
+
while out["status"] in ["UNKNOWN","RUNNING","PENDING"]:
|
175 |
+
t = 5 + random.randint(0,5)
|
176 |
+
logger.error(f"Sleeping for {t}s. Reason: {out['status']}")
|
177 |
+
time.sleep(t)
|
178 |
+
out = status(ID)
|
179 |
pbar.set_description(out["status"])
|
180 |
+
if out["status"] == "RUNNING":
|
181 |
+
TIME += t
|
182 |
+
pbar.update(n=t)
|
183 |
+
#if TIME > 900 and out["status"] != "COMPLETE":
|
184 |
+
# # something failed on the server side, need to resubmit
|
185 |
+
# N += 1
|
186 |
+
# break
|
187 |
+
|
188 |
+
if out["status"] == "COMPLETE":
|
189 |
+
if TIME < TIME_ESTIMATE:
|
190 |
+
pbar.update(n=(TIME_ESTIMATE-TIME))
|
191 |
+
REDO = False
|
192 |
+
|
193 |
+
if out["status"] == "ERROR":
|
194 |
+
REDO = False
|
195 |
+
raise Exception(f'MMseqs2 API is giving errors. Please confirm your input is a valid protein sequence. If error persists, please try again an hour later.')
|
196 |
+
|
197 |
+
# Download results
|
198 |
+
download(ID, tar_gz_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
|
201 |
a3m_files = [f"{path}/uniref.a3m"]
|