import streamlit as st import requests from io import StringIO from Bio import SeqIO import os import time import pandas as pd from run_domain2go_app import * def convert_df(df): return df.to_csv(index=False).encode('utf-8') st.markdown("""

Disclaimer

This program is designed to generate predictions for a single protein due to the extended runtime of InterProScan. If you need predictions for multiple UniProtKB/Swiss-Prot proteins, we recommend utilizing our comprehensive protein function prediction dataset available in our Github repository.

""", unsafe_allow_html=True) domain_tab, pred_tab = st.tabs(['Domains', 'Function predictions']) with domain_tab: st.header('Domains in sequence') with st.sidebar: st.title("Domain2GO: Mutual Annotation-Based Prediction of Protein Domain Functions") st.write("[![arXiv](https://img.shields.io/badge/bioRxiv-2022.11.03.514980-b31b1b.svg)](https://www.biorxiv.org/content/10.1101/2022.11.03.514980v1) [![github-repository](https://img.shields.io/badge/GitHub-black?logo=github)](https://github.com/HUBioDataLab/Domain2GO)") if 'example_seq_button' not in st.session_state: st.session_state.example_seq_button = False def click_button(): st.session_state.example_seq_button = not st.session_state.example_seq_button input_type = st.radio('Select input type', ['Enter sequence', 'Upload FASTA file']) if input_type == 'Enter sequence': if st.session_state.example_seq_button: st.session_state['sequence'] = st.text_area('Enter protein sequence in FASTA format.', value='>sp|O18783|PLMN_NOTEU\n' 'MEYGKVIFLFLLFLKSGQGESLENYIKTEGASLSNSQKKQFVASSTEECEALCEKETEFVCRSFEHYNKEQKCVIMSENSKTSSVERKRDVVLFEKRIYLSDCKSGNGRNYRGTLSKTKSGITCQKWSDLSPHVPNYAPSKYPDAGLEKNYCRNPDDDVKGPWCYTTNPDIRYEYCDVPECEDECMHCSGENYRGTISKTESGIECQPWDSQEPHSHEYIPSKFPSKDLKENYCRNPDGEPRPWCFTSNPEKRWEFCNIPRCSSPPPPPGPMLQCLKGRGENYRGKIAVTKSGHTCQRWNKQTPHKHNRTPENFPCRGLDENYCRNPDGELEPWCYTTNPDVRQEYCAIPSCGTSSPHTDRVEQSPVIQECYEGKGENYRGTTSTTISGKKCQAWSSMTPHQHKKTPDNFPNADLIRNYCRNPDGDKSPWCYTMDPTVRWEFCNLEKCSGTGSTVLNAQTTRVPSVDTTSHPESDCMYGSGKDYRGKRSTTVTGTLCQAWTAQEPHRHTIFTPDTYPRAGLEENYCRNPDGDPNGPWCYTTNPKKLFDYCDIPQCVSPSSFDCGKPRVEPQKCPGRIVGGCYAQPHSWPWQISLRTRFGEHFCGGTLIAPQWVLTAAHCLERSQWPGAYKVILGLHREVNPESYSQEIGVSRLFKGPLAADIALLKLNRPAAINDKVIPACLPSQDFMVPDRTLCHVTGWGDTQGTSPRGLLKQASLPVIDNRVCNRHEYLNGRVKSTELCAGHLVGRGDSCQGDSGGPLICFEDDKYVLQGVTSWGLGCARPNKPGVYVRVSRYISWIEDVMKNN') else: st.session_state['sequence'] = st.text_input('Enter protein sequence in FASTA format.') st.session_state['name'] = st.session_state['sequence'].split('\n')[0].strip('>') st.button('Use example sequence', on_click=click_button) else: protein_input = st.file_uploader('Choose file') if protein_input: protein_input_stringio = StringIO(protein_input.getvalue().decode("utf-8")) fasta_sequences = SeqIO.parse(protein_input_stringio, 'fasta') for fasta in fasta_sequences: st.session_state['name'], st.session_state['sequence'] = fasta.id, str(fasta.seq) st.session_state['email'] = st.text_input('Enter your email for InterProScan query: ') # prevent user from clicking 'Find domains' button if email or sequence is empty domains_submitted = False if st.button('Find domains'): if 'email' in st.session_state and 'sequence' in st.session_state: domains_submitted = True else: st.warning('Please enter your email and protein sequence first.') else: with domain_tab: st.warning('Please enter your query and click "Find domains" to see domains in sequence.') with domain_tab: no_domains = False error_in_interproscan = False if domains_submitted: with st.spinner('Finding domains in sequence using InterProScan. This may take a while...'): result = find_domains(st.session_state.email, st.session_state.sequence, st.session_state.name) result_text = result[0] if result_text == 'Domains found.': st.success(result_text + ' You can now see function predictions for the sequence in the "Function predictions" tab.') st.session_state['domain_df'] = result[1] elif result_text == 'No domains found.': st.warning(result_text) no_domains = True else: st.error(result_text) st.write(f'InterProScan job id: {result[1]}') st.write(f'InterProScan job response: {result[2]}') error_in_interproscan = True if 'domain_df' in st.session_state: with st.expander('Show domains in sequence'): st.write(st.session_state.domain_df) domains_csv = convert_df(st.session_state.domain_df) st.download_button( label="Download domains in sequence as CSV", data=domains_csv, file_name=f"{st.session_state.name}_domains.csv", mime="text/csv", ) with pred_tab: st.header('Function predictions') if 'domain_df' not in st.session_state: if no_domains: st.warning('No domains found. Please find domains in sequence first.') elif error_in_interproscan: st.error('Error in InterProScan. Please check InterProScan job id and response.') else: st.warning('Please find domains in sequence first.') else: with st.spinner('Generating function predictions...'): cwd = os.getcwd() # mapping_path = "{}/Domain2GO/data".format(cwd.split("Domain2GO")[0]) mapping_path = './data' pred_results = generate_function_predictions(st.session_state.domain_df, mapping_path) pred_result_text = pred_results[0] if pred_result_text == 'Function predictions found.': st.success(pred_result_text) st.session_state['pred_df'] = pred_results[1] elif pred_result_text == 'No function predictions found.': st.warning(pred_result_text) if 'pred_df' in st.session_state: with st.expander('Show function predictions'): st.write(st.session_state.pred_df) pred_csv = convert_df(st.session_state.pred_df) st.download_button( label="Download function predictions as CSV", data=pred_csv, file_name=f"{st.session_state.name}_function_predictions.csv", mime="text/csv", )