Spaces:
Sleeping
Sleeping
import streamlit as st | |
import streamlit.components.v1 as components | |
import requests | |
from io import StringIO | |
from Bio import SeqIO | |
import os | |
import time | |
import pandas as pd | |
from run_domain2go_app import * | |
def convert_df(df): | |
return df.to_csv(index=False).encode('utf-8') | |
# prevent user from clicking submit button if email or sequence is empty | |
submitted = False | |
with st.sidebar: | |
st.title("Domain2GO: Mutual Annotation-Based Prediction of Protein Domain Functions") | |
st.write("[![publication](https://img.shields.io/badge/DOI-10.1002/pro.4988-b31b1b.svg)](https://doi.org/10.1002/pro.4988) [![github-repository](https://img.shields.io/badge/GitHub-black?logo=github)](https://github.com/HUBioDataLab/Domain2GO)") | |
if 'example_seq_button' not in st.session_state: | |
st.session_state.example_seq_button = False | |
def click_button(): | |
st.session_state.example_seq_button = not st.session_state.example_seq_button | |
input_type = st.radio('Select input type', ['Enter a sequence', 'Upload a FASTA file']) | |
if input_type == 'Enter a sequence': | |
st.button('Use example sequence', on_click=click_button) | |
if st.session_state.example_seq_button: | |
sequence_text_input = st.text_area('Enter a protein sequence in FASTA format*', | |
value='>sp|O18783|PLMN_NOTEU\n' | |
'MEYGKVIFLFLLFLKSGQGESLENYIKTEGASLSNSQKKQFVASSTEECEALCEKETEFVCRSFEHYNKEQKCVIMSENSKTSSVERKRDVVLFEKRIYLSDCKSGNGRNYRGTLSKTKSGITCQKWSDLSPHVPNYAPSKYPDAGLEKNYCRNPDDDVKGPWCYTTNPDIRYEYCDVPECEDECMHCSGENYRGTISKTESGIECQPWDSQEPHSHEYIPSKFPSKDLKENYCRNPDGEPRPWCFTSNPEKRWEFCNIPRCSSPPPPPGPMLQCLKGRGENYRGKIAVTKSGHTCQRWNKQTPHKHNRTPENFPCRGLDENYCRNPDGELEPWCYTTNPDVRQEYCAIPSCGTSSPHTDRVEQSPVIQECYEGKGENYRGTTSTTISGKKCQAWSSMTPHQHKKTPDNFPNADLIRNYCRNPDGDKSPWCYTMDPTVRWEFCNLEKCSGTGSTVLNAQTTRVPSVDTTSHPESDCMYGSGKDYRGKRSTTVTGTLCQAWTAQEPHRHTIFTPDTYPRAGLEENYCRNPDGDPNGPWCYTTNPKKLFDYCDIPQCVSPSSFDCGKPRVEPQKCPGRIVGGCYAQPHSWPWQISLRTRFGEHFCGGTLIAPQWVLTAAHCLERSQWPGAYKVILGLHREVNPESYSQEIGVSRLFKGPLAADIALLKLNRPAAINDKVIPACLPSQDFMVPDRTLCHVTGWGDTQGTSPRGLLKQASLPVIDNRVCNRHEYLNGRVKSTELCAGHLVGRGDSCQGDSGGPLICFEDDKYVLQGVTSWGLGCARPNKPGVYVRVSRYISWIEDVMKNN') | |
else: | |
sequence_text_input = st.text_area('Enter a protein sequence in FASTA format*') | |
fasta_sequences = list(SeqIO.parse(StringIO(sequence_text_input), 'fasta')) | |
if len(fasta_sequences) > 1: | |
st.error('Please enter only one sequence.') | |
fasta_sequences = None | |
elif sequence_text_input and len(fasta_sequences) == 0: | |
st.error('Please enter a sequence.') | |
fasta_sequences = None | |
elif len(fasta_sequences) == 1: | |
st.session_state['sequence'] = str(fasta_sequences[0].seq) | |
st.session_state['name'] = fasta_sequences[0].id | |
else: | |
protein_input = st.file_uploader('Choose a file', type=['.fasta', '.fas', '.fa', '.fna', '.ffn', '.faa', '.mpfa', '.frn', '.txt']) | |
css=''' | |
<style> | |
[data-testid="stFileUploadDropzone"] div div::after {color:gray; font-size: .8em; content:"FASTA file should contain only one sequence*"} | |
[data-testid="stFileUploadDropzone"] div div small{display:none;} | |
</style> | |
''' | |
st.markdown(css, unsafe_allow_html=True) | |
if protein_input: | |
bytes_data = protein_input.read() | |
try: | |
protein_input_stringio = StringIO(bytes_data.decode("utf-8")) | |
except UnicodeDecodeError: | |
protein_input_stringio = StringIO(bytes_data.decode("utf-16")) | |
fasta_sequences = list(SeqIO.parse(protein_input_stringio, 'fasta')) | |
if len(fasta_sequences) == 0: | |
st.error('Please upload a file containing a sequence.') | |
fasta_sequences = None | |
elif len(fasta_sequences) > 1: | |
st.error('Please upload a file containing only one sequence.') | |
fasta_sequences = None | |
elif len(fasta_sequences) == 1: | |
st.session_state['name'], st.session_state['sequence'] = fasta_sequences[0].id, str(fasta_sequences[0].seq) | |
st.session_state['email'] = st.text_input('Enter your email for InterProScan query**') | |
with st.sidebar: | |
if st.button('Predict functions'): | |
if 'email' in st.session_state and 'sequence' in st.session_state and '@' in st.session_state.email and fasta_sequences: | |
submitted = True | |
st.session_state.disabled = True | |
else: | |
with st.sidebar: | |
st.warning('Please enter your email and protein sequence first. If you have already entered your email and protein sequence, please check that your email is valid, your sequence is in FASTA format, and that you have entered only one sequence.') | |
with st.sidebar: | |
c = st.container() | |
c.markdown("---") | |
c.markdown( | |
""" | |
<div style="padding:5px"> | |
<p style="color:#000000;font-size:12px;">*Disclaimer: This program is designed to generate predictions for a single protein due to the extended runtime of InterProScan. If you need predictions for multiple UniProtKB/Swiss-Prot proteins, we recommend utilizing our comprehensive protein function prediction dataset available in our <a href="https://github.com/HUBioDataLab/Domain2GO">Github repository</a>.</p> | |
<p style="color:#000000;font-size:12px;">**InterProScan requests your email to notify you when your job is done. Your email will not be used for any other purpose.</p> | |
</div> | |
""", unsafe_allow_html=True) | |
if not submitted: | |
# on main page, write warning message if user has not submitted email and sequence | |
st.markdown(""" | |
<div style="padding:30px"> | |
<p style="color:#2a7b36;font-size:20px;">Submit your protein sequence to start.</p> | |
</div> | |
""", unsafe_allow_html=True) | |
no_domains = False | |
error_in_interproscan = False | |
if submitted: | |
with st.spinner('Finding domains in sequence using InterProScan. This may take a while...'): | |
result = find_domains(st.session_state.email, st.session_state.sequence, st.session_state.name) | |
result_text = result[0] | |
if result_text == 'Domains found.': | |
# st.success(result_text + ' You can now see function predictions for the sequence in the "Function predictions" tab.') | |
st.session_state['domain_df'] = result[1] | |
elif result_text == 'No domains found.': | |
st.warning(result_text) | |
no_domains = True | |
else: | |
st.error(result_text) | |
st.write(f'InterProScan job id: {result[1]}') | |
st.write(f'InterProScan job response: {result[2]}') | |
error_in_interproscan = True | |
# if 'domain_df' in st.session_state: | |
# with st.expander('Show domains in sequence'): | |
# st.write(st.session_state.domain_df) | |
# domains_csv = convert_df(st.session_state.domain_df) | |
# st.download_button( | |
# label="Download domains in sequence as CSV", | |
# data=domains_csv, | |
# file_name=f"{st.session_state.name}_domains.csv", | |
# mime="text/csv", | |
# ) | |
if 'domain_df' not in st.session_state: | |
if error_in_interproscan: | |
st.error('Error in InterProScan. Please check InterProScan job id and response.') | |
else: | |
with st.spinner('Generating function predictions...'): | |
cwd = os.getcwd() | |
# mapping_path = "{}/Domain2GO/data".format(cwd.split("Domain2GO")[0]) | |
mapping_path = './data' | |
pred_results = generate_function_predictions(st.session_state.domain_df, mapping_path) | |
pred_result_text = pred_results[0] | |
if pred_result_text == 'Function predictions found.': | |
st.success('Function predictions generated.') | |
st.session_state['pred_df'] = pred_results[1] | |
elif pred_result_text == 'No predictions made for domains found in sequence.': | |
st.warning(pred_result_text) | |
if 'pred_df' in st.session_state: | |
with st.expander('Show function predictions'): | |
st.write(st.session_state.pred_df) | |
pred_csv = convert_df(st.session_state.pred_df) | |
st.download_button( | |
label="Download function predictions as CSV", | |
data=pred_csv, | |
file_name=f"{st.session_state.name}_function_predictions.csv", | |
mime="text/csv", | |
) | |