Erva Ulusoy commited on
Commit
10fe79e
·
1 Parent(s): 77b1fab

results on one page

Browse files
Files changed (2) hide show
  1. app.py +71 -74
  2. run_domain2go_app.py +2 -2
app.py CHANGED
@@ -22,15 +22,10 @@ st.markdown("""
22
 
23
 
24
 
25
- domain_tab, pred_tab = st.tabs(['Domains', 'Function predictions'])
26
-
27
- with domain_tab:
28
- st.header('Domains in sequence')
29
-
30
  with st.sidebar:
31
 
32
  st.title("Domain2GO: Mutual Annotation-Based Prediction of Protein Domain Functions")
33
- st.write("[![arXiv](https://img.shields.io/badge/bioRxiv-2022.11.03.514980-b31b1b.svg)](https://www.biorxiv.org/content/10.1101/2022.11.03.514980v1) [![github-repository](https://img.shields.io/badge/GitHub-black?logo=github)](https://github.com/HUBioDataLab/Domain2GO)")
34
 
35
  if 'example_seq_button' not in st.session_state:
36
  st.session_state.example_seq_button = False
@@ -58,79 +53,81 @@ with st.sidebar:
58
 
59
  st.session_state['email'] = st.text_input('Enter your email for InterProScan query: ')
60
 
61
- # prevent user from clicking 'Find domains' button if email or sequence is empty
62
- domains_submitted = False
63
- if st.button('Find domains'):
 
 
 
64
  if 'email' in st.session_state and 'sequence' in st.session_state and '@' in st.session_state.email:
65
- domains_submitted = True
66
  st.session_state.disabled = True
67
  else:
68
- st.warning('Please enter your email and protein sequence first. If you have already entered your email and protein sequence, please check that your email is valid.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  else:
70
- with domain_tab:
71
- st.warning('Please enter your query and click "Find domains" to see domains in sequence.')
72
-
73
- with domain_tab:
74
- no_domains = False
75
- error_in_interproscan = False
76
- if domains_submitted:
77
- with st.spinner('Finding domains in sequence using InterProScan. This may take a while...'):
78
- result = find_domains(st.session_state.email, st.session_state.sequence, st.session_state.name)
79
- result_text = result[0]
80
- if result_text == 'Domains found.':
81
- st.success(result_text + ' You can now see function predictions for the sequence in the "Function predictions" tab.')
82
- st.session_state['domain_df'] = result[1]
83
- elif result_text == 'No domains found.':
84
- st.warning(result_text)
85
- no_domains = True
86
- else:
87
- st.error(result_text)
88
- st.write(f'InterProScan job id: {result[1]}')
89
- st.write(f'InterProScan job response: {result[2]}')
90
- error_in_interproscan = True
91
-
92
-
93
- if 'domain_df' in st.session_state:
94
- with st.expander('Show domains in sequence'):
95
- st.write(st.session_state.domain_df)
96
- domains_csv = convert_df(st.session_state.domain_df)
 
 
 
 
 
 
 
 
 
 
97
  st.download_button(
98
- label="Download domains in sequence as CSV",
99
- data=domains_csv,
100
- file_name=f"{st.session_state.name}_domains.csv",
101
  mime="text/csv",
102
  )
103
 
104
- with pred_tab:
105
- st.header('Function predictions')
106
- if 'domain_df' not in st.session_state:
107
- if no_domains:
108
- st.warning('No domains found. Please find domains in sequence first.')
109
- elif error_in_interproscan:
110
- st.error('Error in InterProScan. Please check InterProScan job id and response.')
111
- else:
112
- st.warning('Please find domains in sequence first.')
113
- else:
114
- with st.spinner('Generating function predictions...'):
115
- cwd = os.getcwd()
116
- # mapping_path = "{}/Domain2GO/data".format(cwd.split("Domain2GO")[0])
117
- mapping_path = './data'
118
- pred_results = generate_function_predictions(st.session_state.domain_df, mapping_path)
119
- pred_result_text = pred_results[0]
120
- if pred_result_text == 'Function predictions found.':
121
- st.success(pred_result_text)
122
- st.session_state['pred_df'] = pred_results[1]
123
- elif pred_result_text == 'No function predictions found.':
124
- st.warning(pred_result_text)
125
-
126
- if 'pred_df' in st.session_state:
127
- with st.expander('Show function predictions'):
128
- st.write(st.session_state.pred_df)
129
- pred_csv = convert_df(st.session_state.pred_df)
130
- st.download_button(
131
- label="Download function predictions as CSV",
132
- data=pred_csv,
133
- file_name=f"{st.session_state.name}_function_predictions.csv",
134
- mime="text/csv",
135
- )
136
-
 
22
 
23
 
24
 
 
 
 
 
 
25
  with st.sidebar:
26
 
27
  st.title("Domain2GO: Mutual Annotation-Based Prediction of Protein Domain Functions")
28
+ st.write("[![biorxiv](https://img.shields.io/badge/bioRxiv-2022.11.03.514980-b31b1b.svg)](https://www.biorxiv.org/content/10.1101/2022.11.03.514980v1) [![github-repository](https://img.shields.io/badge/GitHub-black?logo=github)](https://github.com/HUBioDataLab/Domain2GO)")
29
 
30
  if 'example_seq_button' not in st.session_state:
31
  st.session_state.example_seq_button = False
 
53
 
54
  st.session_state['email'] = st.text_input('Enter your email for InterProScan query: ')
55
 
56
+ # prevent user from clicking submit button if email or sequence is empty
57
+ submitted = False
58
+
59
+
60
+ with st.sidebar:
61
+ if st.button('Predict functions'):
62
  if 'email' in st.session_state and 'sequence' in st.session_state and '@' in st.session_state.email:
63
+ submitted = True
64
  st.session_state.disabled = True
65
  else:
66
+ with st.sidebar:
67
+ st.warning('Please enter your email and protein sequence first. If you have already entered your email and protein sequence, please check that your email is valid.')
68
+
69
+ if not submitted:
70
+ # on main page, write warning message if user has not submitted email and sequence
71
+ st.markdown("""
72
+ <div style="padding:30px">
73
+ <p style="color:#2a7b36;font-size:20px;">Submit your protein sequence to start.</p>
74
+ </div>
75
+ """, unsafe_allow_html=True)
76
+
77
+ no_domains = False
78
+ error_in_interproscan = False
79
+ if submitted:
80
+ with st.spinner('Finding domains in sequence using InterProScan. This may take a while...'):
81
+ result = find_domains(st.session_state.email, st.session_state.sequence, st.session_state.name)
82
+ result_text = result[0]
83
+ if result_text == 'Domains found.':
84
+ # st.success(result_text + ' You can now see function predictions for the sequence in the "Function predictions" tab.')
85
+ st.session_state['domain_df'] = result[1]
86
+ elif result_text == 'No domains found.':
87
+ st.warning(result_text)
88
+ no_domains = True
89
  else:
90
+ st.error(result_text)
91
+ st.write(f'InterProScan job id: {result[1]}')
92
+ st.write(f'InterProScan job response: {result[2]}')
93
+ error_in_interproscan = True
94
+
95
+
96
+ # if 'domain_df' in st.session_state:
97
+ # with st.expander('Show domains in sequence'):
98
+ # st.write(st.session_state.domain_df)
99
+ # domains_csv = convert_df(st.session_state.domain_df)
100
+ # st.download_button(
101
+ # label="Download domains in sequence as CSV",
102
+ # data=domains_csv,
103
+ # file_name=f"{st.session_state.name}_domains.csv",
104
+ # mime="text/csv",
105
+ # )
106
+
107
+ if 'domain_df' not in st.session_state:
108
+ if error_in_interproscan:
109
+ st.error('Error in InterProScan. Please check InterProScan job id and response.')
110
+ else:
111
+ with st.spinner('Generating function predictions...'):
112
+ cwd = os.getcwd()
113
+ # mapping_path = "{}/Domain2GO/data".format(cwd.split("Domain2GO")[0])
114
+ mapping_path = './data'
115
+ pred_results = generate_function_predictions(st.session_state.domain_df, mapping_path)
116
+ pred_result_text = pred_results[0]
117
+ if pred_result_text == 'Function predictions found.':
118
+ st.success('Function predictions generated.')
119
+ st.session_state['pred_df'] = pred_results[1]
120
+ elif pred_result_text == 'No predictions made for domains found in sequence.':
121
+ st.warning(pred_result_text)
122
+
123
+ if 'pred_df' in st.session_state:
124
+ with st.expander('Show function predictions'):
125
+ st.write(st.session_state.pred_df)
126
+ pred_csv = convert_df(st.session_state.pred_df)
127
  st.download_button(
128
+ label="Download function predictions as CSV",
129
+ data=pred_csv,
130
+ file_name=f"{st.session_state.name}_function_predictions.csv",
131
  mime="text/csv",
132
  )
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
run_domain2go_app.py CHANGED
@@ -98,7 +98,7 @@ def generate_function_predictions(domains_df, mapping_path):
98
  domain2go_df = pd.read_csv(os.path.join(mapping_path, 'finalized_domain2go_mappings.txt'))
99
  print('Domain2GO mappings loaded')
100
  # merge domain2go mappings with domains found in protein sequence
101
- merged_df = pd.merge(domains_df, domain2go_df, left_on='accession', right_on='Interpro')
102
 
103
  print('Function predictions generated.')
104
 
@@ -109,7 +109,7 @@ def generate_function_predictions(domains_df, mapping_path):
109
 
110
  else:
111
  merged_df['protein_name'] = domains_df['protein_name'].iloc[0]
112
- merged_df = merged_df[['protein_name', 'GO', 'locations', 's', 'accession', 'name',]]
113
  merged_df.columns = ['protein_name', 'GO_ID', 'domain_locations', 'probability', 'domain_accession', 'domain_name',]
114
 
115
  # save protein function predictions
 
98
  domain2go_df = pd.read_csv(os.path.join(mapping_path, 'finalized_domain2go_mappings.txt'))
99
  print('Domain2GO mappings loaded')
100
  # merge domain2go mappings with domains found in protein sequence
101
+ merged_df = pd.merge(domains_df, domain2go_df, left_on='domain_accession', right_on='Interpro')
102
 
103
  print('Function predictions generated.')
104
 
 
109
 
110
  else:
111
  merged_df['protein_name'] = domains_df['protein_name'].iloc[0]
112
+ merged_df = merged_df[['protein_name', 'GO', 'domain_locations', 's', 'domain_accession', 'domain_name',]]
113
  merged_df.columns = ['protein_name', 'GO_ID', 'domain_locations', 'probability', 'domain_accession', 'domain_name',]
114
 
115
  # save protein function predictions