Spaces:
Running
Running
Erva Ulusoy
commited on
Commit
·
10fe79e
1
Parent(s):
77b1fab
results on one page
Browse files- app.py +71 -74
- run_domain2go_app.py +2 -2
app.py
CHANGED
@@ -22,15 +22,10 @@ st.markdown("""
|
|
22 |
|
23 |
|
24 |
|
25 |
-
domain_tab, pred_tab = st.tabs(['Domains', 'Function predictions'])
|
26 |
-
|
27 |
-
with domain_tab:
|
28 |
-
st.header('Domains in sequence')
|
29 |
-
|
30 |
with st.sidebar:
|
31 |
|
32 |
st.title("Domain2GO: Mutual Annotation-Based Prediction of Protein Domain Functions")
|
33 |
-
st.write("[![
|
34 |
|
35 |
if 'example_seq_button' not in st.session_state:
|
36 |
st.session_state.example_seq_button = False
|
@@ -58,79 +53,81 @@ with st.sidebar:
|
|
58 |
|
59 |
st.session_state['email'] = st.text_input('Enter your email for InterProScan query: ')
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
64 |
if 'email' in st.session_state and 'sequence' in st.session_state and '@' in st.session_state.email:
|
65 |
-
|
66 |
st.session_state.disabled = True
|
67 |
else:
|
68 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
else:
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
st.download_button(
|
98 |
-
label="Download
|
99 |
-
data=
|
100 |
-
file_name=f"{st.session_state.name}
|
101 |
mime="text/csv",
|
102 |
)
|
103 |
|
104 |
-
with pred_tab:
|
105 |
-
st.header('Function predictions')
|
106 |
-
if 'domain_df' not in st.session_state:
|
107 |
-
if no_domains:
|
108 |
-
st.warning('No domains found. Please find domains in sequence first.')
|
109 |
-
elif error_in_interproscan:
|
110 |
-
st.error('Error in InterProScan. Please check InterProScan job id and response.')
|
111 |
-
else:
|
112 |
-
st.warning('Please find domains in sequence first.')
|
113 |
-
else:
|
114 |
-
with st.spinner('Generating function predictions...'):
|
115 |
-
cwd = os.getcwd()
|
116 |
-
# mapping_path = "{}/Domain2GO/data".format(cwd.split("Domain2GO")[0])
|
117 |
-
mapping_path = './data'
|
118 |
-
pred_results = generate_function_predictions(st.session_state.domain_df, mapping_path)
|
119 |
-
pred_result_text = pred_results[0]
|
120 |
-
if pred_result_text == 'Function predictions found.':
|
121 |
-
st.success(pred_result_text)
|
122 |
-
st.session_state['pred_df'] = pred_results[1]
|
123 |
-
elif pred_result_text == 'No function predictions found.':
|
124 |
-
st.warning(pred_result_text)
|
125 |
-
|
126 |
-
if 'pred_df' in st.session_state:
|
127 |
-
with st.expander('Show function predictions'):
|
128 |
-
st.write(st.session_state.pred_df)
|
129 |
-
pred_csv = convert_df(st.session_state.pred_df)
|
130 |
-
st.download_button(
|
131 |
-
label="Download function predictions as CSV",
|
132 |
-
data=pred_csv,
|
133 |
-
file_name=f"{st.session_state.name}_function_predictions.csv",
|
134 |
-
mime="text/csv",
|
135 |
-
)
|
136 |
-
|
|
|
22 |
|
23 |
|
24 |
|
|
|
|
|
|
|
|
|
|
|
25 |
with st.sidebar:
|
26 |
|
27 |
st.title("Domain2GO: Mutual Annotation-Based Prediction of Protein Domain Functions")
|
28 |
+
st.write("[![biorxiv](https://img.shields.io/badge/bioRxiv-2022.11.03.514980-b31b1b.svg)](https://www.biorxiv.org/content/10.1101/2022.11.03.514980v1) [![github-repository](https://img.shields.io/badge/GitHub-black?logo=github)](https://github.com/HUBioDataLab/Domain2GO)")
|
29 |
|
30 |
if 'example_seq_button' not in st.session_state:
|
31 |
st.session_state.example_seq_button = False
|
|
|
53 |
|
54 |
st.session_state['email'] = st.text_input('Enter your email for InterProScan query: ')
|
55 |
|
56 |
+
# prevent user from clicking submit button if email or sequence is empty
|
57 |
+
submitted = False
|
58 |
+
|
59 |
+
|
60 |
+
with st.sidebar:
|
61 |
+
if st.button('Predict functions'):
|
62 |
if 'email' in st.session_state and 'sequence' in st.session_state and '@' in st.session_state.email:
|
63 |
+
submitted = True
|
64 |
st.session_state.disabled = True
|
65 |
else:
|
66 |
+
with st.sidebar:
|
67 |
+
st.warning('Please enter your email and protein sequence first. If you have already entered your email and protein sequence, please check that your email is valid.')
|
68 |
+
|
69 |
+
if not submitted:
|
70 |
+
# on main page, write warning message if user has not submitted email and sequence
|
71 |
+
st.markdown("""
|
72 |
+
<div style="padding:30px">
|
73 |
+
<p style="color:#2a7b36;font-size:20px;">Submit your protein sequence to start.</p>
|
74 |
+
</div>
|
75 |
+
""", unsafe_allow_html=True)
|
76 |
+
|
77 |
+
no_domains = False
|
78 |
+
error_in_interproscan = False
|
79 |
+
if submitted:
|
80 |
+
with st.spinner('Finding domains in sequence using InterProScan. This may take a while...'):
|
81 |
+
result = find_domains(st.session_state.email, st.session_state.sequence, st.session_state.name)
|
82 |
+
result_text = result[0]
|
83 |
+
if result_text == 'Domains found.':
|
84 |
+
# st.success(result_text + ' You can now see function predictions for the sequence in the "Function predictions" tab.')
|
85 |
+
st.session_state['domain_df'] = result[1]
|
86 |
+
elif result_text == 'No domains found.':
|
87 |
+
st.warning(result_text)
|
88 |
+
no_domains = True
|
89 |
else:
|
90 |
+
st.error(result_text)
|
91 |
+
st.write(f'InterProScan job id: {result[1]}')
|
92 |
+
st.write(f'InterProScan job response: {result[2]}')
|
93 |
+
error_in_interproscan = True
|
94 |
+
|
95 |
+
|
96 |
+
# if 'domain_df' in st.session_state:
|
97 |
+
# with st.expander('Show domains in sequence'):
|
98 |
+
# st.write(st.session_state.domain_df)
|
99 |
+
# domains_csv = convert_df(st.session_state.domain_df)
|
100 |
+
# st.download_button(
|
101 |
+
# label="Download domains in sequence as CSV",
|
102 |
+
# data=domains_csv,
|
103 |
+
# file_name=f"{st.session_state.name}_domains.csv",
|
104 |
+
# mime="text/csv",
|
105 |
+
# )
|
106 |
+
|
107 |
+
if 'domain_df' not in st.session_state:
|
108 |
+
if error_in_interproscan:
|
109 |
+
st.error('Error in InterProScan. Please check InterProScan job id and response.')
|
110 |
+
else:
|
111 |
+
with st.spinner('Generating function predictions...'):
|
112 |
+
cwd = os.getcwd()
|
113 |
+
# mapping_path = "{}/Domain2GO/data".format(cwd.split("Domain2GO")[0])
|
114 |
+
mapping_path = './data'
|
115 |
+
pred_results = generate_function_predictions(st.session_state.domain_df, mapping_path)
|
116 |
+
pred_result_text = pred_results[0]
|
117 |
+
if pred_result_text == 'Function predictions found.':
|
118 |
+
st.success('Function predictions generated.')
|
119 |
+
st.session_state['pred_df'] = pred_results[1]
|
120 |
+
elif pred_result_text == 'No predictions made for domains found in sequence.':
|
121 |
+
st.warning(pred_result_text)
|
122 |
+
|
123 |
+
if 'pred_df' in st.session_state:
|
124 |
+
with st.expander('Show function predictions'):
|
125 |
+
st.write(st.session_state.pred_df)
|
126 |
+
pred_csv = convert_df(st.session_state.pred_df)
|
127 |
st.download_button(
|
128 |
+
label="Download function predictions as CSV",
|
129 |
+
data=pred_csv,
|
130 |
+
file_name=f"{st.session_state.name}_function_predictions.csv",
|
131 |
mime="text/csv",
|
132 |
)
|
133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
run_domain2go_app.py
CHANGED
@@ -98,7 +98,7 @@ def generate_function_predictions(domains_df, mapping_path):
|
|
98 |
domain2go_df = pd.read_csv(os.path.join(mapping_path, 'finalized_domain2go_mappings.txt'))
|
99 |
print('Domain2GO mappings loaded')
|
100 |
# merge domain2go mappings with domains found in protein sequence
|
101 |
-
merged_df = pd.merge(domains_df, domain2go_df, left_on='
|
102 |
|
103 |
print('Function predictions generated.')
|
104 |
|
@@ -109,7 +109,7 @@ def generate_function_predictions(domains_df, mapping_path):
|
|
109 |
|
110 |
else:
|
111 |
merged_df['protein_name'] = domains_df['protein_name'].iloc[0]
|
112 |
-
merged_df = merged_df[['protein_name', 'GO', '
|
113 |
merged_df.columns = ['protein_name', 'GO_ID', 'domain_locations', 'probability', 'domain_accession', 'domain_name',]
|
114 |
|
115 |
# save protein function predictions
|
|
|
98 |
domain2go_df = pd.read_csv(os.path.join(mapping_path, 'finalized_domain2go_mappings.txt'))
|
99 |
print('Domain2GO mappings loaded')
|
100 |
# merge domain2go mappings with domains found in protein sequence
|
101 |
+
merged_df = pd.merge(domains_df, domain2go_df, left_on='domain_accession', right_on='Interpro')
|
102 |
|
103 |
print('Function predictions generated.')
|
104 |
|
|
|
109 |
|
110 |
else:
|
111 |
merged_df['protein_name'] = domains_df['protein_name'].iloc[0]
|
112 |
+
merged_df = merged_df[['protein_name', 'GO', 'domain_locations', 's', 'domain_accession', 'domain_name',]]
|
113 |
merged_df.columns = ['protein_name', 'GO_ID', 'domain_locations', 'probability', 'domain_accession', 'domain_name',]
|
114 |
|
115 |
# save protein function predictions
|