import streamlit as st import streamlit.components.v1 as components import requests from io import StringIO from Bio import SeqIO import os import time import pandas as pd from run_domain2go_app import * def convert_df(df): return df.to_csv(index=False).encode('utf-8') with st.sidebar: st.title("Domain2GO: Mutual Annotation-Based Prediction of Protein Domain Functions") st.write("[![biorxiv](https://img.shields.io/badge/bioRxiv-2022.11.03.514980-b31b1b.svg)](https://www.biorxiv.org/content/10.1101/2022.11.03.514980v1) [![github-repository](https://img.shields.io/badge/GitHub-black?logo=github)](https://github.com/HUBioDataLab/Domain2GO)") if 'example_seq_button' not in st.session_state: st.session_state.example_seq_button = False def click_button(): st.session_state.example_seq_button = not st.session_state.example_seq_button input_type = st.radio('Select input type', ['Enter sequence', 'Upload FASTA file']) if input_type == 'Enter sequence': if st.session_state.example_seq_button: st.session_state['sequence'] = st.text_area('Enter protein sequence in FASTA format.', value='>sp|O18783|PLMN_NOTEU\n' 'MEYGKVIFLFLLFLKSGQGESLENYIKTEGASLSNSQKKQFVASSTEECEALCEKETEFVCRSFEHYNKEQKCVIMSENSKTSSVERKRDVVLFEKRIYLSDCKSGNGRNYRGTLSKTKSGITCQKWSDLSPHVPNYAPSKYPDAGLEKNYCRNPDDDVKGPWCYTTNPDIRYEYCDVPECEDECMHCSGENYRGTISKTESGIECQPWDSQEPHSHEYIPSKFPSKDLKENYCRNPDGEPRPWCFTSNPEKRWEFCNIPRCSSPPPPPGPMLQCLKGRGENYRGKIAVTKSGHTCQRWNKQTPHKHNRTPENFPCRGLDENYCRNPDGELEPWCYTTNPDVRQEYCAIPSCGTSSPHTDRVEQSPVIQECYEGKGENYRGTTSTTISGKKCQAWSSMTPHQHKKTPDNFPNADLIRNYCRNPDGDKSPWCYTMDPTVRWEFCNLEKCSGTGSTVLNAQTTRVPSVDTTSHPESDCMYGSGKDYRGKRSTTVTGTLCQAWTAQEPHRHTIFTPDTYPRAGLEENYCRNPDGDPNGPWCYTTNPKKLFDYCDIPQCVSPSSFDCGKPRVEPQKCPGRIVGGCYAQPHSWPWQISLRTRFGEHFCGGTLIAPQWVLTAAHCLERSQWPGAYKVILGLHREVNPESYSQEIGVSRLFKGPLAADIALLKLNRPAAINDKVIPACLPSQDFMVPDRTLCHVTGWGDTQGTSPRGLLKQASLPVIDNRVCNRHEYLNGRVKSTELCAGHLVGRGDSCQGDSGGPLICFEDDKYVLQGVTSWGLGCARPNKPGVYVRVSRYISWIEDVMKNN') else: st.session_state['sequence'] = st.text_input('Enter protein sequence in FASTA format.') st.session_state['name'] = st.session_state['sequence'].split('\n')[0].strip('>') st.button('Use example sequence', on_click=click_button) else: protein_input = st.file_uploader('Choose file') if protein_input: protein_input_stringio = StringIO(protein_input.getvalue().decode("utf-8")) fasta_sequences = SeqIO.parse(protein_input_stringio, 'fasta') for fasta in fasta_sequences: st.session_state['name'], st.session_state['sequence'] = fasta.id, str(fasta.seq) st.session_state['email'] = st.text_input('Enter your email for InterProScan query*: ') st.markdown("""
*InterProScan requests your email to notify you when your job is done. Your email will not be used for any other purpose.
""", unsafe_allow_html=True) # prevent user from clicking submit button if email or sequence is empty submitted = False with st.sidebar: if st.button('Predict functions'): if 'email' in st.session_state and 'sequence' in st.session_state and '@' in st.session_state.email: submitted = True st.session_state.disabled = True else: with st.sidebar: st.warning('Please enter your email and protein sequence first. If you have already entered your email and protein sequence, please check that your email is valid.') with st.sidebar: c = st.container() c.markdown("---") c.markdown( """Disclaimer: This program is designed to generate predictions for a single protein due to the extended runtime of InterProScan. If you need predictions for multiple UniProtKB/Swiss-Prot proteins, we recommend utilizing our comprehensive protein function prediction dataset available in our Github repository.
Submit your protein sequence to start.