Spaces:
Runtime error
Runtime error
simonduerr
commited on
Commit
β’
e6f46d3
1
Parent(s):
780cc53
Update app.py
Browse files
app.py
CHANGED
@@ -144,8 +144,9 @@ def update_protGPT2(inp, length,repetitionPenalty, top_k_poolsize, max_seqs):
|
|
144 |
sequencestxt = ""
|
145 |
for i, seq in enumerate(gen_seqs):
|
146 |
s = seq.replace("\n","")
|
|
|
147 |
s = "\n".join([s[i:i+70] for i in range(0, len(s), 70)])
|
148 |
-
sequencestxt +=f">seq{i}\n{s}\n"
|
149 |
return sequencestxt
|
150 |
|
151 |
|
@@ -349,7 +350,7 @@ with proteindream:
|
|
349 |
gr.Markdown("# GradioFold")
|
350 |
gr.Markdown(
|
351 |
"""GradioFold is a web-based tool that combines a large language model trained on natural protein sequence (protGPT2) with structure prediction using AlphaFold.
|
352 |
-
Type a start sequence that protGPT2 can complete or let protGPT2 generate a complete sequence."""
|
353 |
)
|
354 |
gr.Markdown("## protGPT2")
|
355 |
gr.Markdown(
|
@@ -364,7 +365,7 @@ with proteindream:
|
|
364 |
with gr.Row():
|
365 |
repetitionPenalty = gr.Slider(minimum=1, maximum=5,value=1.2, label="Repetition penalty")
|
366 |
top_k_poolsize = gr.Slider(minimum=700, maximum=52056,value=950, label="Top-K sampling pool size")
|
367 |
-
max_seqs = gr.Slider(minimum=2, maximum=20,value=5, label="Number of sequences to generate")
|
368 |
btn = gr.Button("Predict sequences using protGPT2")
|
369 |
|
370 |
results = gr.Textbox(label="Results", lines=15)
|
@@ -372,7 +373,7 @@ with proteindream:
|
|
372 |
|
373 |
gr.Markdown("## AlphaFold")
|
374 |
gr.Markdown(
|
375 |
-
"Select a generated sequence above and copy it in the field below for structure prediction using AlphaFold2."
|
376 |
)
|
377 |
with gr.Group():
|
378 |
chosenSeq = gr.Textbox(label="Chosen sequence")
|
@@ -384,7 +385,7 @@ with proteindream:
|
|
384 |
plot = gr.Plot(label="pLDDT")
|
385 |
gr.Markdown(
|
386 |
"""## Acknowledgements
|
387 |
-
|
388 |
|
389 |
All code is available on [Huggingface](https://huggingface.co/spaces/simonduerr/protGPT2_gradioFold/blob/main) and licensed under MIT license.
|
390 |
|
@@ -394,6 +395,7 @@ with proteindream:
|
|
394 |
- 3Dmol.js: Rego & Koes π[Paper](https://academic.oup.com/bioinformatics/article/31/8/1322/213186) π» [Code](https://github.com/3dmol/3Dmol.js)
|
395 |
|
396 |
Created by [@simonduerr](https://twitter.com/simonduerr)
|
|
|
397 |
"""
|
398 |
)
|
399 |
#seqChoice.change(fn=update_seqs, inputs=seqChoice, outputs=chosenSeq)
|
|
|
144 |
sequencestxt = ""
|
145 |
for i, seq in enumerate(gen_seqs):
|
146 |
s = seq.replace("\n","")
|
147 |
+
seqlen = len(s)
|
148 |
s = "\n".join([s[i:i+70] for i in range(0, len(s), 70)])
|
149 |
+
sequencestxt +=f">seq{i}, {seqlen} residues \n{s}\n\n"
|
150 |
return sequencestxt
|
151 |
|
152 |
|
|
|
350 |
gr.Markdown("# GradioFold")
|
351 |
gr.Markdown(
|
352 |
"""GradioFold is a web-based tool that combines a large language model trained on natural protein sequence (protGPT2) with structure prediction using AlphaFold.
|
353 |
+
Type a start sequence that protGPT2 can complete or let protGPT2 generate a complete sequence without a start token."""
|
354 |
)
|
355 |
gr.Markdown("## protGPT2")
|
356 |
gr.Markdown(
|
|
|
365 |
with gr.Row():
|
366 |
repetitionPenalty = gr.Slider(minimum=1, maximum=5,value=1.2, label="Repetition penalty")
|
367 |
top_k_poolsize = gr.Slider(minimum=700, maximum=52056,value=950, label="Top-K sampling pool size")
|
368 |
+
max_seqs = gr.Slider(minimum=2, maximum=20,value=5, step=1, label="Number of sequences to generate")
|
369 |
btn = gr.Button("Predict sequences using protGPT2")
|
370 |
|
371 |
results = gr.Textbox(label="Results", lines=15)
|
|
|
373 |
|
374 |
gr.Markdown("## AlphaFold")
|
375 |
gr.Markdown(
|
376 |
+
"Select a generated sequence above and copy it in the field below for structure prediction using AlphaFold2. You can also edit the sequence. Predictions will take around 2-5 minutes to be processed. Proteins larger than about 1000 residues will not fit into memory."
|
377 |
)
|
378 |
with gr.Group():
|
379 |
chosenSeq = gr.Textbox(label="Chosen sequence")
|
|
|
385 |
plot = gr.Plot(label="pLDDT")
|
386 |
gr.Markdown(
|
387 |
"""## Acknowledgements
|
388 |
+
More information about the used algorithms can be found below.
|
389 |
|
390 |
All code is available on [Huggingface](https://huggingface.co/spaces/simonduerr/protGPT2_gradioFold/blob/main) and licensed under MIT license.
|
391 |
|
|
|
395 |
- 3Dmol.js: Rego & Koes π[Paper](https://academic.oup.com/bioinformatics/article/31/8/1322/213186) π» [Code](https://github.com/3dmol/3Dmol.js)
|
396 |
|
397 |
Created by [@simonduerr](https://twitter.com/simonduerr)
|
398 |
+
Thanks to Hugginface team for sponsoring a free GPU for this demo.
|
399 |
"""
|
400 |
)
|
401 |
#seqChoice.change(fn=update_seqs, inputs=seqChoice, outputs=chosenSeq)
|