Spaces:
Sleeping
Sleeping
File size: 6,956 Bytes
c712316 8787ae1 c712316 10fe79e c712316 b83c412 c712316 b83c412 c712316 b83c412 c712316 53aa54f c712316 10fe79e 77b1fab 10fe79e 77b1fab c712316 10fe79e 007151e 4cc17e2 10fe79e c712316 10fe79e c712316 10fe79e c712316 c98651d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import streamlit as st
import streamlit.components.v1 as components
import requests
from io import StringIO
from Bio import SeqIO
import os
import time
import pandas as pd
from run_domain2go_app import *
def convert_df(df):
return df.to_csv(index=False).encode('utf-8')
with st.sidebar:
st.title("Domain2GO: Mutual Annotation-Based Prediction of Protein Domain Functions")
st.write("[![biorxiv](https://img.shields.io/badge/bioRxiv-2022.11.03.514980-b31b1b.svg)](https://www.biorxiv.org/content/10.1101/2022.11.03.514980v1) [![github-repository](https://img.shields.io/badge/GitHub-black?logo=github)](https://github.com/HUBioDataLab/Domain2GO)")
if 'example_seq_button' not in st.session_state:
st.session_state.example_seq_button = False
def click_button():
st.session_state.example_seq_button = not st.session_state.example_seq_button
input_type = st.radio('Select input type', ['Enter sequence', 'Upload FASTA file'])
if input_type == 'Enter sequence':
if st.session_state.example_seq_button:
st.session_state['sequence'] = st.text_area('Enter protein sequence in FASTA format.',
value='>sp|O18783|PLMN_NOTEU\n'
'MEYGKVIFLFLLFLKSGQGESLENYIKTEGASLSNSQKKQFVASSTEECEALCEKETEFVCRSFEHYNKEQKCVIMSENSKTSSVERKRDVVLFEKRIYLSDCKSGNGRNYRGTLSKTKSGITCQKWSDLSPHVPNYAPSKYPDAGLEKNYCRNPDDDVKGPWCYTTNPDIRYEYCDVPECEDECMHCSGENYRGTISKTESGIECQPWDSQEPHSHEYIPSKFPSKDLKENYCRNPDGEPRPWCFTSNPEKRWEFCNIPRCSSPPPPPGPMLQCLKGRGENYRGKIAVTKSGHTCQRWNKQTPHKHNRTPENFPCRGLDENYCRNPDGELEPWCYTTNPDVRQEYCAIPSCGTSSPHTDRVEQSPVIQECYEGKGENYRGTTSTTISGKKCQAWSSMTPHQHKKTPDNFPNADLIRNYCRNPDGDKSPWCYTMDPTVRWEFCNLEKCSGTGSTVLNAQTTRVPSVDTTSHPESDCMYGSGKDYRGKRSTTVTGTLCQAWTAQEPHRHTIFTPDTYPRAGLEENYCRNPDGDPNGPWCYTTNPKKLFDYCDIPQCVSPSSFDCGKPRVEPQKCPGRIVGGCYAQPHSWPWQISLRTRFGEHFCGGTLIAPQWVLTAAHCLERSQWPGAYKVILGLHREVNPESYSQEIGVSRLFKGPLAADIALLKLNRPAAINDKVIPACLPSQDFMVPDRTLCHVTGWGDTQGTSPRGLLKQASLPVIDNRVCNRHEYLNGRVKSTELCAGHLVGRGDSCQGDSGGPLICFEDDKYVLQGVTSWGLGCARPNKPGVYVRVSRYISWIEDVMKNN')
else:
st.session_state['sequence'] = st.text_input('Enter protein sequence in FASTA format.')
st.session_state['name'] = st.session_state['sequence'].split('\n')[0].strip('>')
st.button('Use example sequence', on_click=click_button)
else:
protein_input = st.file_uploader('Choose file')
if protein_input:
protein_input_stringio = StringIO(protein_input.getvalue().decode("utf-8"))
fasta_sequences = SeqIO.parse(protein_input_stringio, 'fasta')
for fasta in fasta_sequences:
st.session_state['name'], st.session_state['sequence'] = fasta.id, str(fasta.seq)
st.session_state['email'] = st.text_input('Enter your email for InterProScan query*: ')
st.markdown("""
<p style="color:#000000;font-size:12px;">*InterProScan requests your email to notify you when your job is done. Your email will not be used for any other purpose.</p>
""", unsafe_allow_html=True)
# prevent user from clicking submit button if email or sequence is empty
submitted = False
with st.sidebar:
if st.button('Predict functions'):
if 'email' in st.session_state and 'sequence' in st.session_state and '@' in st.session_state.email:
submitted = True
st.session_state.disabled = True
else:
with st.sidebar:
st.warning('Please enter your email and protein sequence first. If you have already entered your email and protein sequence, please check that your email is valid.')
with st.sidebar:
c = st.container()
c.markdown("---")
c.markdown(
"""
<div style="padding:5px">
<p style="color:#000000;font-size:12px;">Disclaimer: This program is designed to generate predictions for a single protein due to the extended runtime of InterProScan. If you need predictions for multiple UniProtKB/Swiss-Prot proteins, we recommend utilizing our comprehensive protein function prediction dataset available in our <a href="https://github.com/HUBioDataLab/Domain2GO">Github repository</a>.</p>
</div>
""", unsafe_allow_html=True)
if not submitted:
# on main page, write warning message if user has not submitted email and sequence
st.markdown("""
<div style="padding:30px">
<p style="color:#2a7b36;font-size:20px;">Submit your protein sequence to start.</p>
</div>
""", unsafe_allow_html=True)
no_domains = False
error_in_interproscan = False
if submitted:
with st.spinner('Finding domains in sequence using InterProScan. This may take a while...'):
result = find_domains(st.session_state.email, st.session_state.sequence, st.session_state.name)
result_text = result[0]
if result_text == 'Domains found.':
# st.success(result_text + ' You can now see function predictions for the sequence in the "Function predictions" tab.')
st.session_state['domain_df'] = result[1]
elif result_text == 'No domains found.':
st.warning(result_text)
no_domains = True
else:
st.error(result_text)
st.write(f'InterProScan job id: {result[1]}')
st.write(f'InterProScan job response: {result[2]}')
error_in_interproscan = True
# if 'domain_df' in st.session_state:
# with st.expander('Show domains in sequence'):
# st.write(st.session_state.domain_df)
# domains_csv = convert_df(st.session_state.domain_df)
# st.download_button(
# label="Download domains in sequence as CSV",
# data=domains_csv,
# file_name=f"{st.session_state.name}_domains.csv",
# mime="text/csv",
# )
if 'domain_df' not in st.session_state:
if error_in_interproscan:
st.error('Error in InterProScan. Please check InterProScan job id and response.')
else:
with st.spinner('Generating function predictions...'):
cwd = os.getcwd()
# mapping_path = "{}/Domain2GO/data".format(cwd.split("Domain2GO")[0])
mapping_path = './data'
pred_results = generate_function_predictions(st.session_state.domain_df, mapping_path)
pred_result_text = pred_results[0]
if pred_result_text == 'Function predictions found.':
st.success('Function predictions generated.')
st.session_state['pred_df'] = pred_results[1]
elif pred_result_text == 'No predictions made for domains found in sequence.':
st.warning(pred_result_text)
if 'pred_df' in st.session_state:
with st.expander('Show function predictions'):
st.write(st.session_state.pred_df)
pred_csv = convert_df(st.session_state.pred_df)
st.download_button(
label="Download function predictions as CSV",
data=pred_csv,
file_name=f"{st.session_state.name}_function_predictions.csv",
mime="text/csv",
)
|