simonduerr commited on
Commit
9d2e7c6
·
1 Parent(s): 3c09cd6

fix wrong chains and error for seq alignment

Browse files
Files changed (3) hide show
  1. __pycache__/msa.cpython-310.pyc +0 -0
  2. app.py +1 -1
  3. msa.py +43 -44
__pycache__/msa.cpython-310.pyc ADDED
Binary file (6.83 kB). View file
 
app.py CHANGED
@@ -128,7 +128,7 @@ with gr.Blocks() as blocks:
128
 
129
  gr.Examples([
130
  ["TOP7",{"chains": [{"class": "protein","sequence": "MGDIQVQVNIDDNGKNFDYTYTVTTESELQKVLNELMDYIKKQGAKRVRISITARTKKEAEKFAAILIKVFAELGYNDINVTFDGDTVTVEGQLEGGSLEHHHHHH","chain": "A"}], "covMods":[]}],
131
- ["ApixacabanBinder", {"chains": [{"class": "protein","sequence": "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL","chain": "A"}, {"class":"ligand", "smiles":"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O", "chain": "A"}], "covMods":[]}]
132
  ],
133
  inputs = [jobname, inp]
134
  )
 
128
 
129
  gr.Examples([
130
  ["TOP7",{"chains": [{"class": "protein","sequence": "MGDIQVQVNIDDNGKNFDYTYTVTTESELQKVLNELMDYIKKQGAKRVRISITARTKKEAEKFAAILIKVFAELGYNDINVTFDGDTVTVEGQLEGGSLEHHHHHH","chain": "A"}], "covMods":[]}],
131
+ ["ApixacabanBinder", {"chains": [{"class": "protein","sequence": "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL","chain": "A"}, {"class":"ligand", "smiles":"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O", "sdf":"","name":"","chain": "B"}], "covMods":[]}]
132
  ],
133
  inputs = [jobname, inp]
134
  )
msa.py CHANGED
@@ -148,55 +148,54 @@ def run_mmseqs2(x, prefix, use_env=True, use_filter=True,
148
  [seqs_unique.append(x) for x in seqs if x not in seqs_unique]
149
  Ms = [N + seqs_unique.index(seq) for seq in seqs]
150
  # lets do it!
151
- if not os.path.isfile(tar_gz_file):
152
- TIME_ESTIMATE = 150 * len(seqs_unique)
153
- with tqdm(total=TIME_ESTIMATE, bar_format=TQDM_BAR_FORMAT) as pbar:
154
- while REDO:
155
- pbar.set_description("SUBMIT")
156
-
157
- # Resubmit job until it goes through
 
 
 
 
 
158
  out = submit(seqs_unique, mode, N)
159
- while out["status"] in ["UNKNOWN", "RATELIMIT"]:
160
- sleep_time = 5 + random.randint(0, 5)
161
- logger.error(f"Sleeping for {sleep_time}s. Reason: {out['status']}")
162
- # resubmit
163
- time.sleep(sleep_time)
164
- out = submit(seqs_unique, mode, N)
165
 
166
- if out["status"] == "ERROR":
167
- raise Exception(f'MMseqs2 API is giving errors. Please confirm your input is a valid protein sequence. If error persists, please try again an hour later.')
168
 
169
- if out["status"] == "MAINTENANCE":
170
- raise Exception(f'MMseqs2 API is undergoing maintenance. Please try again in a few minutes.')
171
 
172
- # wait for job to finish
173
- ID,TIME = out["id"],0
 
 
 
 
 
 
174
  pbar.set_description(out["status"])
175
- while out["status"] in ["UNKNOWN","RUNNING","PENDING"]:
176
- t = 5 + random.randint(0,5)
177
- logger.error(f"Sleeping for {t}s. Reason: {out['status']}")
178
- time.sleep(t)
179
- out = status(ID)
180
- pbar.set_description(out["status"])
181
- if out["status"] == "RUNNING":
182
- TIME += t
183
- pbar.update(n=t)
184
- #if TIME > 900 and out["status"] != "COMPLETE":
185
- # # something failed on the server side, need to resubmit
186
- # N += 1
187
- # break
188
-
189
- if out["status"] == "COMPLETE":
190
- if TIME < TIME_ESTIMATE:
191
- pbar.update(n=(TIME_ESTIMATE-TIME))
192
- REDO = False
193
-
194
- if out["status"] == "ERROR":
195
- REDO = False
196
- raise Exception(f'MMseqs2 API is giving errors. Please confirm your input is a valid protein sequence. If error persists, please try again an hour later.')
197
-
198
- # Download results
199
- download(ID, tar_gz_file)
200
 
201
 
202
  a3m_files = [f"{path}/uniref.a3m"]
 
148
  [seqs_unique.append(x) for x in seqs if x not in seqs_unique]
149
  Ms = [N + seqs_unique.index(seq) for seq in seqs]
150
  # lets do it!
151
+ TIME_ESTIMATE = 150 * len(seqs_unique)
152
+ with tqdm(total=TIME_ESTIMATE, bar_format=TQDM_BAR_FORMAT) as pbar:
153
+ while REDO:
154
+ pbar.set_description("SUBMIT")
155
+
156
+ # Resubmit job until it goes through
157
+ out = submit(seqs_unique, mode, N)
158
+ while out["status"] in ["UNKNOWN", "RATELIMIT"]:
159
+ sleep_time = 5 + random.randint(0, 5)
160
+ logger.error(f"Sleeping for {sleep_time}s. Reason: {out['status']}")
161
+ # resubmit
162
+ time.sleep(sleep_time)
163
  out = submit(seqs_unique, mode, N)
 
 
 
 
 
 
164
 
165
+ if out["status"] == "ERROR":
166
+ raise Exception(f'MMseqs2 API is giving errors. Please confirm your input is a valid protein sequence. If error persists, please try again an hour later.')
167
 
168
+ if out["status"] == "MAINTENANCE":
169
+ raise Exception(f'MMseqs2 API is undergoing maintenance. Please try again in a few minutes.')
170
 
171
+ # wait for job to finish
172
+ ID,TIME = out["id"],0
173
+ pbar.set_description(out["status"])
174
+ while out["status"] in ["UNKNOWN","RUNNING","PENDING"]:
175
+ t = 5 + random.randint(0,5)
176
+ logger.error(f"Sleeping for {t}s. Reason: {out['status']}")
177
+ time.sleep(t)
178
+ out = status(ID)
179
  pbar.set_description(out["status"])
180
+ if out["status"] == "RUNNING":
181
+ TIME += t
182
+ pbar.update(n=t)
183
+ #if TIME > 900 and out["status"] != "COMPLETE":
184
+ # # something failed on the server side, need to resubmit
185
+ # N += 1
186
+ # break
187
+
188
+ if out["status"] == "COMPLETE":
189
+ if TIME < TIME_ESTIMATE:
190
+ pbar.update(n=(TIME_ESTIMATE-TIME))
191
+ REDO = False
192
+
193
+ if out["status"] == "ERROR":
194
+ REDO = False
195
+ raise Exception(f'MMseqs2 API is giving errors. Please confirm your input is a valid protein sequence. If error persists, please try again an hour later.')
196
+
197
+ # Download results
198
+ download(ID, tar_gz_file)
 
 
 
 
 
 
199
 
200
 
201
  a3m_files = [f"{path}/uniref.a3m"]