syedislamuddin commited on
Commit
2cf664c
1 Parent(s): b4b3c55

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +1100 -0
app.py ADDED
@@ -0,0 +1,1100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #from turtle import shape
2
+ import streamlit as st
3
+ #from st_keyup import st_keyup
4
+ import pandas as pd
5
+ import numpy as np
6
+ from st_aggrid import AgGrid, GridOptionsBuilder,GridUpdateMode,DataReturnMode
7
+
8
+ import os
9
+
10
+ st.set_page_config(layout="wide")
11
+ st.markdown(
12
+ """
13
+ <style>
14
+ .streamlit-expanderHeader {
15
+ font-size: x-large;
16
+ }
17
+ </style>
18
+ """,
19
+ unsafe_allow_html=True,
20
+ )
21
+ caution = '<p style="font-family:sans-serif; color:Red; font-size: 18px;">Please note that Only one Guide (from pair) is found. Please see guides not found section for other guide</p>'
22
+ caution1 = '<p style="font-family:sans-serif; color:Red; font-size: 18px;">Please note that Each mutated guide is reported as a sepearte line. sgID_1/2, sgRNA_1/2, chr_sgRNA_1/2 and position_sgRNA_1/2 represent values for reference/mutated guide</p>'
23
+ caution2 = '<p style="font-family:sans-serif; color:Red; font-size: 18px;">Please Select a single/multiple guides and then select Check Box A, B or C Otherwise code will through error</p>'
24
+ table_edit = '<p style="font-family:sans-serif; color:Green; font-size: 16px;">About Table: Please note that table can be <b>sorted by clicking on any column</b> and <b>Multiple rows can be selected</b> (by clicking check box in first column) to save only those rows.</p>'
25
+ caution_genes = '<p style="font-family:sans-serif; color:Red; font-size: 16px;">Please make sure that desired genes from all three lists should be selected to generate Order Ready Table.</p>'
26
+
27
+ def transform(df,str):
28
+ # Select columns
29
+ #cols = st.multiselect('Please select columns to save current Table as csv file',
30
+ cols = st.multiselect(str,
31
+ df.columns.tolist(),
32
+ df.columns.tolist()
33
+ )
34
+ df = df[cols]
35
+ return df
36
+
37
+ def convert_df(df):
38
+ return df.to_csv().encode('utf-8')
39
+ def convert_df1(df):
40
+ return df.to_csv(index=False).encode('utf-8')
41
+
42
+
43
+ # CSS to inject contained in a string
44
+ hide_table_row_index = """
45
+ <style>
46
+ thead tr th:first-child {display:none}
47
+ tbody th {display:none}
48
+ </style>
49
+ """
50
+
51
+ # Inject CSS with Markdown
52
+ st.markdown(hide_table_row_index, unsafe_allow_html=True)
53
+
54
+
55
+ #########TABLE DISPLAY
56
+ def tbl_disp(dat,var,ref,key,flg=1):
57
+ dat.reset_index(drop=True, inplace=True)
58
+ #df = transform(dft,'Please Select columns to save whole table')
59
+ #fname = st.text_input('Please input file name to save Table', 'temp')
60
+ #fname = st_keyup("Please input file name to save Table", value='temp')
61
+ csv = convert_df(dat)
62
+ if flg==1:
63
+ st.download_button(
64
+ label="Download Full Table as CSV file",
65
+ data=csv,
66
+ file_name=var+'_'+ref+'.csv',#fname+'.csv',
67
+ mime='text/csv',
68
+ #key=key,
69
+ )
70
+ #st.table(dft)
71
+ #st.markdown(table_edit,unsafe_allow_html=True)
72
+ gb = GridOptionsBuilder.from_dataframe(dat)
73
+ gb.configure_pagination(enabled=False)#,paginationAutoPageSize=False)#True) #Add pagination
74
+ gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
75
+ gb.configure_selection(selection_mode="multiple", use_checkbox=True)
76
+ gb.configure_column("gene", headerCheckboxSelection = True)
77
+
78
+
79
+
80
+ gb.configure_side_bar()
81
+ gridOptions = gb.build()
82
+
83
+ grid_response = AgGrid(
84
+ dat,
85
+ height=200,
86
+ gridOptions=gridOptions,
87
+ enable_enterprise_modules=True,
88
+ update_mode=GridUpdateMode.MODEL_CHANGED,
89
+ data_return_mode=DataReturnMode.FILTERED_AND_SORTED,
90
+ fit_columns_on_grid_load=False,
91
+ header_checkbox_selection_filtered_only=True,
92
+ use_checkbox=True,
93
+ width='100%'
94
+ #key=key
95
+ )
96
+
97
+ selected = grid_response['selected_rows']
98
+ if selected:
99
+ #st.write('Selected rows')
100
+
101
+ dfs = pd.DataFrame(selected)
102
+ #st.dataframe(dfs[dfs.columns[1:dfs.shape[1]]])
103
+
104
+ #dfs1 = transform(dfs[dfs.columns[1:dfs.shape[1]]],'Please select columns to save selected Table')
105
+ csv = convert_df1(dfs[dfs.columns[1:dfs.shape[1]]])
106
+ #csv = convert_df1(dfs1)
107
+
108
+ if flg:
109
+ st.download_button(
110
+ label="Download Selected data as CSV",
111
+ data=csv,
112
+ file_name=var+'_'+ref+'.csv',
113
+ mime='text/csv',
114
+ )
115
+ return dfs
116
+
117
+
118
+
119
+ def assemble_tbl(t):
120
+ dft = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
121
+ for i in range(0,t.shape[0],2):
122
+ l1=t.iloc[[i]]
123
+ l1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','mutated_guide', 'strand', 'num_mismatch']
124
+
125
+ l2=t.iloc[[i+1]]
126
+ l2.columns=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2','mutated_guide2', 'strand2', 'num_mismatch2']
127
+ listA_concatenated_match_LR1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1)
128
+ listA_concatenated_match_LR1=listA_concatenated_match_LR1[['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']]
129
+ listA_concatenated_match_LR1['sgRNA_1']=listA_concatenated_match_LR1['sgRNA_1'].str.slice(0, 20)
130
+ listA_concatenated_match_LR1['sgRNA_2']=listA_concatenated_match_LR1['sgRNA_2'].str.slice(0, 20)
131
+ listA_concatenated_match_LR1['sgID_1_2']=listA_concatenated_match_LR1['sgID_1']+"|"+listA_concatenated_match_LR1['sgID_1']
132
+ dft=dft.append(listA_concatenated_match_LR1)
133
+
134
+ return dft
135
+
136
+ def get_lists(ref_list,list_found_ref,list_notfound_ref):
137
+ a_ref=[]
138
+ for i in range(len(ref_list)):
139
+ a_ref.append(ref_list.gene.values[i].split('|')[0])
140
+ a_ref.append(ref_list.gene.values[i].split('|')[1])
141
+
142
+ set_found0_ref=[]
143
+ for i in range(len(a_ref)):
144
+ set_found0_ref.append(list_found_ref[list_found_ref['gene']==a_ref[i]])
145
+ list_concatenated_found_ref = pd.concat(set_found0_ref)
146
+ list_concatenated_match_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch == 0]
147
+ #Also remove Alternate loci's data
148
+ list_concatenated_match_ref = list_concatenated_match_ref[list_concatenated_match_ref['chr'].str.contains('chr')]
149
+
150
+ #also create new list with both sgRNAs in one row
151
+ dft=pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
152
+ if list_concatenated_match_ref.shape[0]>0:
153
+ t=list_concatenated_match_ref.reset_index(drop=True)
154
+ #st.table(t)
155
+
156
+ ##########
157
+ #check even/odd entries
158
+ if t.shape[0]==1:
159
+ t1=t.loc[t.index.repeat(2)].reset_index(drop=True)
160
+ #st.write(t1)
161
+ dft=assemble_tbl(t1)
162
+
163
+ elif t.shape[0]%2==0: #even
164
+ dft=assemble_tbl(t)
165
+
166
+ else: #odd
167
+ t1 = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
168
+ i=0
169
+ while i <t.shape[0]:
170
+ if i<t.shape[0]-1:
171
+ if t.iloc[i]['gene'] == t.iloc[i+1]['gene'] and t.iloc[i]['chr'] == t.iloc[i+1]['chr'] and t.iloc[i]['position'] == t.iloc[i+1]['position']:
172
+ t1=t1.append(t.iloc[[i]], ignore_index = True)
173
+ t1=t1.append(t.iloc[[i+1]], ignore_index = True)
174
+ i=i+2
175
+ else: #repeat entries
176
+ t1=t1.append(t.iloc[[i]], ignore_index = True)
177
+ t1=t1.append(t.iloc[[i]], ignore_index = True)
178
+ #st.table(t1)
179
+ i=i+1
180
+ else:
181
+ t1=t1.append(t.iloc[[i]], ignore_index = True)
182
+ t1=t1.append(t.iloc[[i]], ignore_index = True)
183
+ i=i+1
184
+ #st.table(t1)
185
+
186
+
187
+ dft=assemble_tbl(t1)
188
+ list_concatenated_mutated_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch > 0]
189
+ list_concatenated_mutated_ref=list_concatenated_mutated_ref.sort_values('position')
190
+
191
+ #Also remove Alternate loci's data
192
+
193
+ list_concatenated_mutated_ref = list_concatenated_mutated_ref[list_concatenated_mutated_ref['chr'].str.contains('chr')]
194
+ dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
195
+
196
+ if list_concatenated_mutated_ref.shape[0]>0:
197
+ dft_mut = get_mutated_res(list_concatenated_mutated_ref)
198
+ #check not found
199
+ seta_notfound0_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[0]]
200
+ seta_notfound1_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[1]]
201
+ #st.write(seta_notfound0_ref)
202
+ #st.write(seta_notfound1_ref)
203
+ #add guideflg1 to return which guide is found
204
+ guideflg1=0
205
+ if seta_notfound0_ref.shape[0]>0:
206
+ guideflg1=2
207
+ if seta_notfound1_ref.shape[0]>0:
208
+ guideflg1=1
209
+ list_concatenated_notfound_ref = pd.concat([seta_notfound0_ref,seta_notfound1_ref])
210
+ #st.table(dft)
211
+ #st.table(dft_mut)
212
+ return dft, dft_mut,list_concatenated_notfound_ref,list_concatenated_match_ref,list_concatenated_mutated_ref,guideflg1
213
+ ###########
214
+
215
+ def get_mutated_res(list_concatenated_mutated_ref):
216
+ #########
217
+ #if list_concatenated_mutated_ref.shape[0]>0:
218
+ t=list_concatenated_mutated_ref.reset_index(drop=True)
219
+ #st.table(t)
220
+ dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
221
+ c1=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1']
222
+ c2=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']#, 'sgID_1_2']
223
+ #st.table(listA_concatenated_match_ref)
224
+ #st.write(t.shape[0])
225
+ tf=0
226
+ #for i in range(0,t.shape[0],2):
227
+ for i in range(t.shape[0]):
228
+ l1=t.iloc[[i]]
229
+ l1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','mutated_guide', 'strand', 'num_mismatch']
230
+ l2=l1.copy()
231
+ l2.columns=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2','mutated_guide2', 'strand2', 'num_mismatch2']
232
+ list_concatenated_mutated_ref1=[]
233
+ #listA_concatenated_mutated_ref1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1)
234
+ list_concatenated_mutated_ref1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1)
235
+ #st.table(listA_concatenated_mutated_ref1)
236
+ list_concatenated_mutated_ref1=list_concatenated_mutated_ref1[['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','mutated_guide2','chr_sgRNA_2','position_sgRNA_2']]
237
+ #also change if not leading G
238
+ list_concatenated_mutated_ref1['sgRNA_1']='G'+list_concatenated_mutated_ref1['sgRNA_1'].str.slice(1, 20)
239
+ #also change name of mutated_guide2 column
240
+ list_concatenated_mutated_ref1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']
241
+
242
+ list_concatenated_mutated_ref1['sgRNA_2']='G'+list_concatenated_mutated_ref1['sgRNA_2'].str.slice(1, 20)
243
+ list_concatenated_mutated_ref1['sgID_1_2']=list_concatenated_mutated_ref1['sgID_1']+"|"+list_concatenated_mutated_ref1['sgID_1']
244
+ dft_mut=dft_mut.append(list_concatenated_mutated_ref1)
245
+ return dft_mut
246
+
247
+ #########
248
+
249
+ #######THIS SECTION ADDED FOR ORDER READY LIST AND REMOVE REPITION FOR NOT_FOUND ENTRUES
250
+ def get_lists_ol(ref_list,list_found_ref,list_notfound_ref):
251
+ a_ref=[]
252
+ for i in range(len(ref_list)):
253
+ a_ref.append(ref_list.gene.values[i].split('|')[0])
254
+ a_ref.append(ref_list.gene.values[i].split('|')[1])
255
+
256
+ set_found0_ref=[]
257
+ for i in range(len(a_ref)):
258
+ set_found0_ref.append(list_found_ref[list_found_ref['gene']==a_ref[i]])
259
+ list_concatenated_found_ref = pd.concat(set_found0_ref)
260
+ list_concatenated_match_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch == 0]
261
+ #Also remove Alternate loci's data
262
+ list_concatenated_match_ref = list_concatenated_match_ref[list_concatenated_match_ref['chr'].str.contains('chr')]
263
+
264
+ #also create new list with both sgRNAs in one row
265
+ dft=pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
266
+ if list_concatenated_match_ref.shape[0]>0:
267
+ t=list_concatenated_match_ref.reset_index(drop=True)
268
+ #st.table(t)
269
+
270
+ ##########
271
+ #check even/odd entries
272
+ if t.shape[0]==1:
273
+ t1=t.loc[t.index.repeat(2)].reset_index(drop=True)
274
+ #st.write(t1)
275
+ dft=assemble_tbl(t1)
276
+
277
+ elif t.shape[0]%2==0: #even
278
+ dft=assemble_tbl(t)
279
+
280
+ else: #odd
281
+ t1 = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
282
+ i=0
283
+ while i <t.shape[0]:
284
+ if i<t.shape[0]-1:
285
+ if t.iloc[i]['gene'] == t.iloc[i+1]['gene'] and t.iloc[i]['chr'] == t.iloc[i+1]['chr'] and t.iloc[i]['position'] == t.iloc[i+1]['position']:
286
+ t1=t1.append(t.iloc[[i]], ignore_index = True)
287
+ t1=t1.append(t.iloc[[i+1]], ignore_index = True)
288
+ i=i+2
289
+ else: #repeat entries
290
+ t1=t1.append(t.iloc[[i]], ignore_index = True)
291
+ t1=t1.append(t.iloc[[i]], ignore_index = True)
292
+ #st.table(t1)
293
+ i=i+1
294
+ else:
295
+ t1=t1.append(t.iloc[[i]], ignore_index = True)
296
+ t1=t1.append(t.iloc[[i]], ignore_index = True)
297
+ i=i+1
298
+ #st.table(t1)
299
+
300
+
301
+ dft=assemble_tbl(t1)
302
+ list_concatenated_mutated_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch > 0]
303
+ list_concatenated_mutated_ref=list_concatenated_mutated_ref.sort_values('position')
304
+
305
+ #Also remove Alternate loci's data
306
+
307
+ list_concatenated_mutated_ref = list_concatenated_mutated_ref[list_concatenated_mutated_ref['chr'].str.contains('chr')]
308
+ dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
309
+ if list_concatenated_mutated_ref.shape[0]>0:
310
+ dft_mut = get_mutated_res(list_concatenated_mutated_ref)
311
+ #check not found
312
+ seta_notfound0_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[0]]
313
+ seta_notfound1_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[1]]
314
+ list_concatenated_notfound_ref = pd.concat([seta_notfound0_ref,seta_notfound1_ref])
315
+ return dft, dft_mut,list_concatenated_notfound_ref,list_concatenated_match_ref,list_concatenated_mutated_ref
316
+ ###########
317
+
318
+
319
+ #THIS WILL GENERATE ORDER READY TABLE FOR GRCh38
320
+ #THIS WILL GENERATE ORDER READY TABLE FOR CHM13
321
+
322
+ #CHECK IF GUIDE ARE IN NOT FOUND LIST
323
+ def not_found_check(set12,set34,set56,listA_notfound_lr,listB_notfound_lr,listC_notfound_lr):
324
+ flg11=0
325
+ flg12=0
326
+ flg21=0
327
+ flg22=0
328
+ flg31=0
329
+ flg32=0
330
+ #st.write(set12.split('|')[1])
331
+
332
+ if listA_notfound_lr[listA_notfound_lr['gene']==set12.split('|')[0]].shape[0]>0:
333
+ flg11=1
334
+ if listA_notfound_lr[listA_notfound_lr['gene']==set12.split('|')[1]].shape[0]>0:
335
+ flg12=1
336
+ if listB_notfound_lr[listB_notfound_lr['gene']==set34.split('|')[0]].shape[0]>0:
337
+ flg21=1
338
+ if listB_notfound_lr[listB_notfound_lr['gene']==set34.split('|')[1]].shape[0]>0:
339
+ flg22=1
340
+ if listC_notfound_lr[listC_notfound_lr['gene']==set56.split('|')[0]].shape[0]>0:
341
+ flg31=1
342
+ if listC_notfound_lr[listC_notfound_lr['gene']==set56.split('|')[1]].shape[0]>0:
343
+ flg32=1
344
+ return flg11,flg12,flg21,flg22,flg31,flg32
345
+
346
+ def order_ready_tbl_CHM13(set12,set34,set56,listA_found_lr,listA_notfound_lr,listB_found_lr,listB_notfound_lr,listC_found_lr,listC_notfound_lr):
347
+ dft_order_table=pd.DataFrame(columns=['gene','guide_type','sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
348
+
349
+ dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
350
+ dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
351
+ dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
352
+ set12=set12.reset_index(drop = True)
353
+ set34=set34.reset_index(drop = True)
354
+ set56=set56.reset_index(drop = True)
355
+
356
+ for i in range(set12.shape[0]):
357
+ gene_n=set12[i].split('_')[0]
358
+ f=not_found_check(set12[i],set34[i],set56[i],listA_notfound_lr,listB_notfound_lr,listC_notfound_lr)
359
+ #st.write(f)
360
+ #st.write(set12[i],set34[i],set56[i])
361
+
362
+ #ref_listA=listA[listA['gene']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
363
+ ref_listA=listA[listA['sgID_AB']==set12.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
364
+ ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
365
+
366
+ ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
367
+ resa,res_muta,res_notfounda,list_matcha,list_mutateda,gflga1=get_lists(ref_listA,listA_found_lr,listA_notfound_lr)
368
+ dft_a=dft_a.append(ref_listA)
369
+
370
+ #listb
371
+ ref_listB=listB[listB['sgID_AB']==set34.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
372
+ ref_listB = ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
373
+
374
+ ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
375
+ resb,res_mutb,res_notfoundb,list_matchb,list_mutatedb,gflgb1=get_lists(ref_listB,listB_found_lr,listB_notfound_lr)
376
+ dft_b=dft_b.append(ref_listB)
377
+ #st.table(not resb.empty)
378
+ #st.table(res_mutb)
379
+ #st.table(resb)
380
+ #listc
381
+ ref_listC=listC[listC['sgID_AB']==set56.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
382
+ ref_listC = ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
383
+
384
+ ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
385
+ resc,res_mutc,res_notfoundc,list_matchc,list_mutatedc,gflgc1=get_lists(ref_listC,listC_found_lr,listC_notfound_lr)
386
+ dft_c=dft_c.append(ref_listC)
387
+
388
+ # st.write(set12[i])
389
+ # st.write(set34[i])
390
+ # st.write(set56[i])
391
+ # st.write(f)
392
+ # st.write(gflga1,gflgb1,gflgc1)
393
+ if gflga1==0:
394
+ #Also verigy that both guides are different
395
+
396
+ if resa['sgID_1'][0] != resa['sgID_2'][0]:
397
+ resa['gene']=gene_n
398
+ resa['guide_type']='1-2'
399
+ dft_order_table=dft_order_table.append(resa)
400
+ else: #it is nutation case, so check next
401
+ if f[2]==0 or f[3] == 0:
402
+ #st.write('came in 1')
403
+ if not resb.empty: # and resb['sgID_1'][0] != resb['sgID_2'][0]: #second guide in from setb
404
+ resa[['sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2']] = resb[['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1']]
405
+ resa['sgID_1_2'] = resa['sgID_1']+"|"+resa['sgID_2']
406
+ if f[2]==0:
407
+ resa['gene']=gene_n
408
+ resa['guide_type']=str(gflga1)+"-3"
409
+ dft_order_table=dft_order_table.append(resa)
410
+ else: # f[2]==0:
411
+ resa['gene']=gene_n
412
+ resa['guide_type']=str(gflga1)+"-4"
413
+ dft_order_table=dft_order_table.append(resa)
414
+
415
+
416
+ elif resa.shape[0] >0: #at least one guide is from seta
417
+ #if resa['sgID_1'][0] != resa['sgID_2'][0]:
418
+ if f[2]==0 or f[3] == 0:
419
+ st.write('came in 1')
420
+ if not resb.empty: # and resb['sgID_1'][0] != resb['sgID_2'][0]: #second guide in from setb
421
+ resa[['sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2']] = resb[['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1']]
422
+ resa['sgID_1_2'] = resa['sgID_1']+"|"+resa['sgID_2']
423
+ if f[2]==0:
424
+ resa['gene']=gene_n
425
+ resa['guide_type']=str(gflga1)+"-3"
426
+ dft_order_table=dft_order_table.append(resa)
427
+ else: # f[2]==0:
428
+ resa['gene']=gene_n
429
+ resa['guide_type']=str(gflga1)+"-4"
430
+ dft_order_table=dft_order_table.append(resa)
431
+
432
+ elif f[4]==0 or f[5] == 0:
433
+ #st.write('came in 2')
434
+ #if resa['sgID_1'][0] != resa['sgID_2'][0]:
435
+ if not resc.empty: # and resc['sgID_1'][0] != resc['sgID_2'][0]: # resc.shape[0]>0: #second guide is from setc
436
+ resa[['sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2']] = resc[['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1']]
437
+ resa['sgID_1_2'] = resa['sgID_1']+"|"+resa['sgID_2']
438
+ #dft_order_table=dft_order_table.append(resa)
439
+ if f[4]==0:
440
+ resa['gene']=gene_n
441
+ resa['guide_type']=str(gflga1)+"-5"
442
+ dft_order_table=dft_order_table.append(resa)
443
+ else: # f[2]==0:
444
+ resa['gene']=gene_n
445
+ resa['guide_type']=str(gflga1)+"-6"
446
+ dft_order_table=dft_order_table.append(resa)
447
+
448
+ elif resb.shape[0]>0: #at least one guide
449
+ #if resb['sgID_1'][0] != resb['sgID_2'][0]:
450
+ if f[4]==0 and f[5] == 0:
451
+ resb['gene']=gene_n
452
+ resb['guide_type']='3-4'
453
+ dft_order_table=dft_order_table.append(resb)
454
+
455
+ elif f[4]==0 or f[5] == 0:
456
+ #if not resc.empty and resc['sgID_1'][0] != resc['sgID_2'][0]:
457
+ resb[['sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2']] = resc[['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1']]
458
+ resb['sgID_1_2'] = resb['sgID_1']+"|"+resb['sgID_2']
459
+ #dft_order_table=dft_order_table.append(resb)
460
+ if f[4]==0:
461
+ resb['gene']=gene_n
462
+ resb['guide_type']=str(gflgb1+1)+"-5"
463
+ dft_order_table=dft_order_table.append(resb)
464
+ else: # f[2]==0:
465
+ resb['gene']=gene_n
466
+ resb['guide_type']=str(gflgb1+2)+"-6"
467
+ dft_order_table=dft_order_table.append(resb)
468
+
469
+ elif resc.shape[0]>0: #at least one guide
470
+ #if f[4]==0 and f[5] == 0:
471
+ if resc['sgID_1'][0] != resc['sgID_2'][0]:
472
+ resc['gene']=gene_n
473
+ resc['guide_type']='5-6'
474
+ dft_order_table=dft_order_table.append(resc)
475
+
476
+
477
+ if dft_order_table.shape[0]>0:
478
+ st.write('Order Ready **CHM13** guides List')
479
+ tbl_disp(dft_order_table,'select_genes','SetA_CHM13',5)
480
+ else:
481
+ st.write('**No guides found in ListA, ListB and ListC**')
482
+ #st.table(dft_order_table)
483
+
484
+ #def get_notfound():
485
+
486
+
487
+ cwd=os.getcwd()+'/'+'data/'
488
+
489
+
490
+ listA = pd.read_csv(cwd+"guides_a_new.csv",index_col=False)
491
+
492
+ listB = pd.read_csv(cwd+"guides_b_new.csv",index_col=False)
493
+ listC = pd.read_csv(cwd+"guides_c_new.csv",index_col=False)
494
+
495
+ lista_sz=listA.shape[0]
496
+ listb_sz=listB.shape[0]
497
+ listc_sz=listC.shape[0]
498
+
499
+
500
+ variantsa1=listA['gene'].unique()
501
+ variantsb1=listB['gene'].unique()
502
+ variantsc1=listC['gene'].unique()
503
+
504
+ con = np.concatenate((variantsa1, variantsb1,variantsc1))
505
+
506
+
507
+ #st.write(type(variantsc1))
508
+ variants_s=sorted(np.unique(con))
509
+ #st.write(len(variants_s))
510
+ #also get names for non-targetting guides
511
+
512
+
513
+ #Also read GRCh38 and LR guides for stea
514
+ listA_found_ref = pd.read_csv(cwd+"seta_found_ref1.csv",index_col=False)
515
+
516
+
517
+
518
+
519
+
520
+
521
+ lsita_ref_found_sz=listA_found_ref.shape[0]
522
+ #remove # from chr# #
523
+ listA_found_ref['chr'] = [x.split(' ')[-0] for x in listA_found_ref['chr']]
524
+ listA_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
525
+ listA_notfound_ref = pd.read_csv(cwd+"seta_notfound_ref1.csv",index_col=False)
526
+
527
+ lsita_ref_notfound_sz=listA_notfound_ref.shape[0]
528
+
529
+
530
+ listA_found_lr = pd.read_csv(cwd+"seta_found_LR1.csv",index_col=False)
531
+ lsita_lr_found_sz=listA_found_lr.shape[0]
532
+ listA_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
533
+ listA_notfound_lr = pd.read_csv(cwd+"seta_notfound_LR1.csv",index_col=False)
534
+ lsita_lr_notfound_sz=listA_notfound_lr.shape[0]
535
+
536
+ #Also read GRCh38 and LR guides for set b
537
+ listB_found_ref = pd.read_csv(cwd+"setb_found_ref1.csv",index_col=False)
538
+ lsitb_ref_found_sz=listB_found_ref.shape[0]
539
+ #remove # from chr# #
540
+ listB_found_ref['chr'] = [x.split(' ')[-0] for x in listB_found_ref['chr']]
541
+ listB_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
542
+ listB_notfound_ref = pd.read_csv(cwd+"setb_notfound_ref1.csv",index_col=False)
543
+ lsitb_ref_notfound_sz=listB_notfound_ref.shape[0]
544
+
545
+
546
+ listB_found_lr = pd.read_csv(cwd+"setb_found_LR1.csv",index_col=False)
547
+ lsitb_lr_found_sz=listB_found_lr.shape[0]
548
+ listB_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
549
+ listB_notfound_lr = pd.read_csv(cwd+"setb_notfound_LR1.csv",index_col=False)
550
+ lsitb_lr_notfound_sz=listB_notfound_lr.shape[0]
551
+
552
+ #Also read GRCh38 and LR guides for set c
553
+ listC_found_ref = pd.read_csv(cwd+"setc_found_ref1.csv",index_col=False)
554
+ lsitc_ref_found_sz=listC_found_ref.shape[0]
555
+ #remove # from chr# #
556
+ listC_found_ref['chr'] = [x.split(' ')[-0] for x in listC_found_ref['chr']]
557
+ listC_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
558
+ listC_notfound_ref = pd.read_csv(cwd+"setc_notfound_ref1.csv",index_col=False)
559
+ lsitc_ref_notfound_sz=listC_notfound_ref.shape[0]
560
+
561
+ listC_found_lr = pd.read_csv(cwd+"setc_found_LR1.csv",index_col=False)
562
+ lsitc_lr_found_sz=listC_found_lr.shape[0]
563
+ listC_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
564
+ listC_notfound_lr = pd.read_csv(cwd+"setc_notfound_LR1.csv",index_col=False)
565
+ lsitc_lr_notfound_sz=listC_notfound_lr.shape[0]
566
+ #also load all mismatched except non-targe guides
567
+ #listA_notfound_lr = pd.read_csv(cwd+"setc_notfound_LR1.csv",index_col=False) seta_all_notmatched_table.csv
568
+
569
+ st.title('Long Read Guides Search')
570
+ #st.markdown('**Please select an option from the sidebar**')
571
+
572
+ #st.write(variants)
573
+
574
+
575
+ Calc = st.sidebar.radio(
576
+ "",
577
+ ('ReadME', 'Single/Multiple Guides','All','Not_Found'))
578
+
579
+
580
+ if Calc == 'ReadME':
581
+ expander = st.expander("How to use this app")
582
+ #st.header('How to use this app')
583
+ expander.markdown('Please select **Single Gene** OR **Multiple Genes** Menue checkbox from the sidebar')
584
+ expander.markdown('Select a Gene (from genes dropdown list) OR Multiple genes (from table)')
585
+ expander.markdown('A table showing all reference gudies from three LISTS will appear in the main panel. **Please not some of the genes (for example A1BG and GJB7) have multiple guide pairs and all of these are selected.**')
586
+ expander.markdown('To see results for each of the selected reference guide from ListA, ListB and ListC, Please select respective checkbox')
587
+ expander.markdown('Results are shown as two tables, **Matched** and **Mutated** guides tables and **NOT FOUND** table if guides are not found in GRCh38 and LR reference fasta files')
588
+ expander.markdown('**Mutated** guides table shows the genomic postion in GRCh38 and LR Fasta file along other fields. **If a guide is found in GRCh38 but not in LR fasta, then corresponding columns will be NA**')
589
+ expander.markdown('**Mutated** guides table shows the genomic postion in GRCh38 and LR Fasta file along other fields. **If a guide is found in GRCh38 but not in LR fasta, then corresponding columns will be NA**')
590
+
591
+ expander1 = st.expander('Introduction')
592
+
593
+ expander1.markdown(
594
+ """ This app helps navigate all probable genomic **miss-matched/Mutations (upto 2 bp)** for a given sgRNA (from 3 lists of CRISPRi dual sgRNA libraries) in GRCh38 reference fasta and a Reference fasta generated from BAM generated against KOLF2.1J longread data.
595
+ """
596
+ )
597
+ expander1.markdown('Merged bam file was converted to fasta file using following steps:')
598
+ expander1.markdown('- samtools mpileup to generate bcf file')
599
+ expander1.markdown('- bcftools to generate vcf file')
600
+ expander1.markdown('- bcftools consensus to generate fasta file')
601
+ expander1.markdown('A GPU based [Cas-OFFinder](http://www.rgenome.net/cas-offinder/) tool was used to find off-target sequences (upto 2 miss-matched) for each geiven reference guide against GRCh38 and LR fasta references.')
602
+
603
+ elif Calc=='Single/Multiple Guides':
604
+ flg_a_fount=0
605
+ flg_b_fount=0
606
+ flg_c_fount=0
607
+ #st.write('**General Stats:**')
608
+ #st.write('**GRCh38 Stats: Guides Found: **'+str(lsita_ref_found_sz)+"/"+str(lista_sz))
609
+ with st.form(key='columns_in_form'):
610
+ c2, c3 = st.columns(2)
611
+ with c2:
612
+ multi_genes = st.multiselect(
613
+ 'Please select genes list to start processing',
614
+ variants_s)
615
+ Updated=st.form_submit_button(label = 'Update')
616
+ listA_concatenated_orig = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
617
+ reflistA_concatenated = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
618
+ reflistB_concatenated = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
619
+ reflistC_concatenated = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
620
+ for variant in multi_genes:
621
+ ref_listA=listA[listA['gene']==variant][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
622
+ ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
623
+ ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
624
+ reflistA_concatenated=pd.concat([reflistA_concatenated,ref_listA])
625
+
626
+ ref_listB=listB[listB['gene']==variant][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
627
+ ref_listB = ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
628
+ ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
629
+ reflistB_concatenated=pd.concat([reflistB_concatenated,ref_listB])
630
+
631
+ ref_listC=listC[listC['gene']==variant][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
632
+ ref_listC = ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
633
+ ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
634
+ reflistC_concatenated=pd.concat([reflistC_concatenated,ref_listC])
635
+ listA_concatenated_orig = pd.concat([listA_concatenated_orig,ref_listA,ref_listB,ref_listC])
636
+
637
+ if listA_concatenated_orig.shape[0] > 0:
638
+
639
+ #st.markdown(table_edit,unsafe_allow_html=True)
640
+ st.write('**Input** Guides (all 6 from 3 sets).')
641
+ st.write('**Please Select Guides common to ALL 3 Lists to procede further Processing**')
642
+ st.markdown(caution_genes,unsafe_allow_html=True)
643
+
644
+ with st.form(key='columns_in_form_a'):
645
+ c2, c3 = st.columns(2)
646
+ with c2:
647
+ get_table_order=tbl_disp(listA_concatenated_orig,'variant','ref_guides',111,0)
648
+ #multi_genes = st.multiselect(
649
+ #'Please select genes list to start processing',
650
+ #variants_s)
651
+ Updated1=st.form_submit_button(label = 'Generate Order Ready Table')
652
+
653
+ #get_table_order=tbl_disp(listA_concatenated_orig,'variant','ref_guides',1,0)
654
+
655
+
656
+
657
+
658
+ if not isinstance(get_table_order, type(None)): # and Updated1:# and get_table_order.shape[0]>0:
659
+ #if not isinstance(get_table_order, type(None)):
660
+ variant_set12=get_table_order[get_table_order['guide_type']=='1-2']['gene']
661
+ variant_set34=get_table_order[get_table_order['guide_type']=='3-4']['gene']
662
+ variant_set56=get_table_order[get_table_order['guide_type']=='5-6']['gene']
663
+ #st.table(variant_set12)
664
+ #st.write(type(variant_set12))
665
+ #if not variant_set12.equals(variant_set34):
666
+ # st.write('**Please select Identical Genes From List A and B**')
667
+ if variant_set12.shape[0]==variant_set34.shape[0]==variant_set56.shape[0]:
668
+ #########Here we call order ready table
669
+ #order_ready_tbl_GRCh38(variant_set12,variant_set34,variant_set56)
670
+ order_ready_tbl_CHM13(variant_set12,variant_set34,variant_set56,listA_found_lr,listA_notfound_lr,listB_found_lr,listB_notfound_lr,listC_found_lr,listC_notfound_lr)
671
+ ########END ORDER READY TABLE
672
+
673
+
674
+ elif variant_set12.shape[0]!=variant_set34.shape[0]:
675
+ st.markdown("""**<span style='color:red'>SetA and SetB</span> guides are not same, Please correct the problem and re-run**""",unsafe_allow_html=True)
676
+ elif variant_set12.shape[0]!=variant_set56.shape[0]:
677
+ st.markdown("""**<span style='color:red'>SetA and SetC</span> guides are not same, Please correct the problem and re-run**""",unsafe_allow_html=True)
678
+ elif variant_set34.shape[0]!=variant_set56.shape[0]:
679
+ st.markdown("""**<span style='color:red'>SetB and SetC</span> guides are not same, Please correct the problem and re-run**""",unsafe_allow_html=True)
680
+
681
+ else:
682
+ st.markdown("""**<span style='color:red'>Probably Mixed guides are selected from three lists, Please correct the problem and re-run</span>**""",unsafe_allow_html=True)
683
+
684
+ #Now BUILD Order Ready List
685
+ #if dft_lr_resa.shape[0] >0 and dft_lr_resb.shape[0] >0 and dft_lr_resc.shape[0] >0:
686
+ # for sgrna in dft_lr_resa
687
+ else:
688
+ st.write('**Please select guides and Press Update Button to Begin Processing**')
689
+
690
+
691
+
692
+ ListARes = st.checkbox('Results For SetA',key=300)
693
+ if ListARes:# and not isinstance(get_table, type(None)):#get_table!=None:
694
+ #if ListARes and get_table.shape[0]>0:
695
+ st.write('**Please select Guides From Table Below to processes from ListA**')
696
+ get_table=tbl_disp(reflistA_concatenated,variant,'ref_guides',2,0)
697
+ if not isinstance(get_table, type(None)):
698
+ #variant_set=get_table[get_table['guide_type']=='1-2']['gene']
699
+ variant_set=get_table['gene']
700
+ dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
701
+ dft_resa=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
702
+ dft_res_muta=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
703
+ dft_notfounda=pd.DataFrame(columns=['gene','ref_guide'])
704
+ df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
705
+ df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
706
+ #CHECK FOR GRCh38
707
+ for i in range(variant_set.shape[0]):
708
+ #ref_listA=listA[listA['sgID_AB']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
709
+ ref_listA=listA[listA['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
710
+ ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
711
+
712
+ ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
713
+ res,res_mut,res_notfound,list_match,list_mutated,gflga1=get_lists(ref_listA,listA_found_ref,listA_notfound_ref)
714
+ dft_a=dft_a.append(ref_listA)
715
+ if res.shape[0]>0:
716
+ dft_resa=dft_resa.append(res)
717
+ if res_mut.shape[0]>0:
718
+ dft_res_muta=dft_res_muta.append(res_mut)
719
+ if res_notfound.shape[0]>0:
720
+ dft_notfounda= dft_notfounda.append(res_notfound)
721
+ if list_match.shape[0]>0:
722
+ df_matched_guides_ref= df_matched_guides_ref.append(list_match)
723
+ if list_mutated.shape[0]>0:
724
+ df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
725
+
726
+ #st.write('Selected Reference Guides for **Set A**')
727
+ #tbl_disp(dft_a,'All','ReferenceGuides',0)
728
+ if dft_resa.shape[0]>0:
729
+ st.write('Matched to **GRCh38** Reference Guides for **Set A**')
730
+ tbl_disp(dft_resa,'select_genes','SetA_GRCh38',3)
731
+ elif dft_res_muta.shape[0]>0:
732
+ st.write('Mutated to **GRCh38** Reference Guides for **Set A**')
733
+ st.markdown(caution1,unsafe_allow_html=True)
734
+ tbl_disp(dft_res_muta,'select_genes','SetA_Mutated_GRCh38',4)
735
+ if dft_notfounda.shape[0]>0:
736
+ st.write('**SetA Guides Not Found in GRCh38**')
737
+ #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
738
+ st.table(dft_notfounda)
739
+ #Now CHECK FOR CHM13
740
+ dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
741
+ dft_lr_resa=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
742
+ dft_lr_res_muta=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
743
+ dft_lr_notfounda=pd.DataFrame(columns=['gene','ref_guide'])
744
+ df_matched_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
745
+ df_mutated_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
746
+
747
+ for i in range(variant_set.shape[0]):
748
+ #ref_listA=listA[listA['gene']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
749
+ ref_listA=listA[listA['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
750
+ ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
751
+
752
+ ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
753
+ res,res_mut,res_notfound,list_match,list_mutated,gflga1=get_lists(ref_listA,listA_found_lr,listA_notfound_lr)
754
+ dft_a=dft_a.append(ref_listA)
755
+ if res.shape[0]>0:
756
+ dft_lr_resa=dft_lr_resa.append(res)
757
+ if res_mut.shape[0]>0:
758
+ dft_lr_res_muta=dft_lr_res_muta.append(res_mut)
759
+ if res_notfound.shape[0]>0:
760
+ dft_lr_notfounda= dft_lr_notfounda.append(res_notfound)
761
+ if list_match.shape[0]>0:
762
+ df_matched_guides_lr= df_matched_guides_lr.append(list_match)
763
+ if list_mutated.shape[0]>0:
764
+ df_mutated_guides_lr= df_mutated_guides_lr.append(list_mutated)
765
+
766
+ if dft_lr_resa.shape[0]>0:
767
+ st.write('Matched to **CHM13** Reference Guides for **Set A**')
768
+ tbl_disp(dft_lr_resa,'select_genes','SetA_CHM13',5)
769
+ elif dft_lr_res_muta.shape[0]>0:
770
+ st.write('Mutated to **CHM13** Reference Guides for **Set A**')
771
+ st.markdown(caution1,unsafe_allow_html=True)
772
+ tbl_disp(dft_lr_res_muta,'select_genes','SetA_Mutated_CHM13',6)
773
+ if dft_lr_notfounda.shape[0]>0:
774
+ st.write('**SetA Guides Not Found in CHM13**')
775
+ st.table(dft_lr_notfounda)
776
+ #NOW MERGE FROM GRCh38 and LR
777
+ merged_mutated_set=pd.merge(df_mutated_guides_ref,df_mutated_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
778
+ merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
779
+ merged_match_set=pd.merge(df_matched_guides_ref,df_matched_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
780
+ merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
781
+ if merged_match_set.shape[0]>0:
782
+ #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
783
+ st.write('**Matched** Guides for **Set A** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
784
+ tbl_disp(merged_match_set,'select_genes','SetA_Matched_GRCh38_CHM13',7,0)
785
+
786
+ #st.table(merged_match_seta)
787
+ elif merged_mutated_set.shape[0]>0:
788
+ #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
789
+ st.write('**Mutated** Guides for **Set A** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
790
+
791
+ tbl_disp(merged_mutated_set,'select_genes','SetA_Mutated_GRCh38_CHM13',8,0)
792
+ elif ListARes:
793
+ st.write("**Please select genes from the above table to begin**")
794
+
795
+ ListBRes = st.checkbox('Results For SetB',key=40)
796
+ if ListBRes: # and not isinstance(get_table, type(None)):#get_table!=None:
797
+ st.write('**Please select Guides From Table Below to processes from ListB**')
798
+ get_table=tbl_disp(reflistB_concatenated,variant,'ref_guides',9,0)
799
+ if not isinstance(get_table, type(None)):
800
+ #variant_set=get_table[['gene']]
801
+ variant_set=get_table['gene']
802
+ dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
803
+ dft_resb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
804
+ dft_res_mutb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
805
+ dft_notfoundb=pd.DataFrame(columns=['gene','ref_guide'])
806
+ df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
807
+ df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
808
+ #CHECK FOR GRCh38
809
+ for i in range(variant_set.shape[0]):
810
+ #ref_listB=listB[listB['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
811
+ ref_listB=listB[listB['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
812
+ ref_listB =ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
813
+
814
+ ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
815
+ res,res_mut,res_notfound,list_match,list_mutated,gflgb1=get_lists(ref_listB,listB_found_ref,listB_notfound_ref)
816
+ dft_b=dft_b.append(ref_listB)
817
+ if res.shape[0]>0:
818
+ dft_resb=dft_resb.append(res)
819
+ if res_mut.shape[0]>0:
820
+ dft_res_mutb=dft_res_mutb.append(res_mut)
821
+ if res_notfound.shape[0]>0:
822
+ dft_notfoundb= dft_notfoundb.append(res_notfound)
823
+ if list_match.shape[0]>0:
824
+ df_matched_guides_ref= df_matched_guides_ref.append(list_match)
825
+ if list_mutated.shape[0]>0:
826
+ df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
827
+
828
+ #st.write('Selected Reference Guides for **Set B**')
829
+ #tbl_disp(dft_b,'All','ReferenceGuides',0)
830
+ if dft_resb.shape[0]>0:
831
+ st.write('Matched to **GRCh38** Reference Guides for **Set B**')
832
+ tbl_disp(dft_resb,'select_genes','SetB_GRCh38',10)
833
+ elif dft_res_mutb.shape[0]>0:
834
+ st.write('Mutated to **GRCh38** Reference Guides for **Set B**')
835
+ st.markdown(caution1,unsafe_allow_html=True)
836
+ tbl_disp(dft_res_mutb,'select_genes','SetB_Mutated_GRCh38',11)
837
+ if dft_notfoundb.shape[0]>0:
838
+ st.write('**SetB Guides Not Found in GRCh38**')
839
+ #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
840
+ st.table(dft_notfoundb)
841
+
842
+ #Now CHECK FOR CHM13
843
+ dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
844
+ dft_lr_resb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
845
+ dft_lr_res_mutb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
846
+ dft_lr_notfoundb=pd.DataFrame(columns=['gene','ref_guide'])
847
+ df_matched_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
848
+ df_mutated_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
849
+
850
+ for i in range(variant_set.shape[0]):
851
+ #ref_listB=listB[listB['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
852
+ ref_listB=listB[listB['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
853
+ ref_listB=ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
854
+
855
+ ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
856
+ res,res_mut,res_notfound,list_match,list_mutated,gflgb1=get_lists(ref_listB,listB_found_lr,listB_notfound_lr)
857
+ dft_b=dft_b.append(ref_listB)
858
+ if res.shape[0]>0:
859
+ dft_lr_resb=dft_lr_resb.append(res)
860
+ if res_mut.shape[0]>0:
861
+ dft_lr_res_mutb=dft_lr_res_mutb.append(res_mut)
862
+ if res_notfound.shape[0]>0:
863
+ dft_lr_notfoundb= dft_lr_notfoundb.append(res_notfound)
864
+ if list_match.shape[0]>0:
865
+ df_matched_guides_lr= df_matched_guides_lr.append(list_match)
866
+ if list_mutated.shape[0]>0:
867
+ df_mutated_guides_lr= df_mutated_guides_lr.append(list_mutated)
868
+
869
+ if dft_lr_resb.shape[0]>0:
870
+ st.write('Matched to **CHM13** Reference Guides for **Set B**')
871
+ tbl_disp(dft_lr_resb,'select_genes','SetB_CHM13',12)
872
+ elif dft_lr_res_mutb.shape[0]>0:
873
+ st.write('Mutated to **CHM13** Reference Guides for **Set B**')
874
+ st.markdown(caution1,unsafe_allow_html=True)
875
+ tbl_disp(dft_lr_res_mutb,'select_genes','SetB_Mutated_CHM13',13)
876
+ if dft_lr_notfoundb.shape[0]>0:
877
+ st.write('**SetB Guides Not Found in CHM13**')
878
+ st.table(dft_lr_notfoundb)
879
+ #NOW MERGE FROM GRCh38 and LR
880
+ merged_mutated_set=pd.merge(df_mutated_guides_ref,df_mutated_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
881
+ merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
882
+ merged_match_set=pd.merge(df_matched_guides_ref,df_matched_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
883
+ merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
884
+ if merged_match_set.shape[0]>0:
885
+ #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
886
+ st.write('**Matched** Guides for **Set B** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
887
+ tbl_disp(merged_match_set,'select_genes','SetB_Matched_GRCh38_CHM13',14,0)
888
+
889
+ #st.table(merged_match_seta)
890
+ elif merged_mutated_set.shape[0]>0:
891
+ #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
892
+ st.write('**Mutated** Guides for **Set B** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
893
+ #st.markdown(caution1,unsafe_allow_html=True)
894
+ tbl_disp(merged_mutated_set,'select_genes','SetB_Mutated_GRCh38_CHM13',15,0)
895
+
896
+ elif ListBRes:
897
+ st.write("**Please select genes from the above table to begin**")
898
+
899
+ ListCRes = st.checkbox('Results For SetC',key=50)
900
+ if ListCRes: # and not isinstance(get_table, type(None)):#get_table!=None:
901
+ #variant_set=get_table[['gene']]
902
+ st.write('**Please select Guides From Table Below to processes from ListC**')
903
+ get_table=tbl_disp(reflistC_concatenated,variant,'ref_guides',16,0)
904
+ if not isinstance(get_table, type(None)):
905
+ variant_set=get_table['gene']
906
+ dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
907
+ dft_resc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
908
+ dft_res_mutc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
909
+ dft_notfoundc=pd.DataFrame(columns=['gene','ref_guide'])
910
+ df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
911
+ df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
912
+ #CHECK FOR GRCh38
913
+ for i in range(variant_set.shape[0]):
914
+ #ref_listC=listC[listC['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
915
+ ref_listC=listC[listC['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
916
+ ref_listC =ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
917
+
918
+ ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
919
+ res,res_mut,res_notfound,list_match,list_mutated,gflgc1=get_lists(ref_listC,listC_found_ref,listC_notfound_ref)
920
+ dft_c=dft_c.append(ref_listC)
921
+ if res.shape[0]>0:
922
+ dft_resc=dft_resc.append(res)
923
+ if res_mut.shape[0]>0:
924
+ dft_res_mutc=dft_res_mutc.append(res_mut)
925
+ if res_notfound.shape[0]>0:
926
+ dft_notfoundc= dft_notfoundc.append(res_notfound)
927
+ if list_match.shape[0]>0:
928
+ df_matched_guides_ref= df_matched_guides_ref.append(list_match)
929
+ if list_mutated.shape[0]>0:
930
+ df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
931
+
932
+ #st.write('Selected Reference Guides for **Set C**')
933
+ #tbl_disp(dft_c,'All','ReferenceGuides',0)
934
+ if dft_resc.shape[0]>0:
935
+ st.write('Matched to **GRCh38** Reference Guides for **Set C**')
936
+ tbl_disp(dft_resc,'select_genes','SetC_GRCh38',17)
937
+ elif dft_res_mutc.shape[0]>0:
938
+ st.write('Mutated to **GRCh38** Reference Guides for **Set C**')
939
+ st.markdown(caution1,unsafe_allow_html=True)
940
+ tbl_disp(dft_res_mutc,'select_genes','SetC_Mutated_GRCh38',18)
941
+ if dft_notfoundc.shape[0]>0:
942
+ st.write('**SetC Guides Not Found in GRCh38**')
943
+ #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
944
+ st.table(dft_notfoundc)
945
+
946
+ #Now CHECK FOR CHM13
947
+ dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
948
+ dft_lr_resc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
949
+ dft_lr_res_mutc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
950
+ dft_lr_notfoundc=pd.DataFrame(columns=['gene','ref_guide'])
951
+ df_matched_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
952
+ df_mutated_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
953
+
954
+ for i in range(variant_set.shape[0]):
955
+ #ref_listC=listC[listC['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
956
+ ref_listC=listC[listC['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
957
+ ref_listC=ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
958
+
959
+ ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
960
+ res,res_mut,res_notfound,list_match,list_mutated,gflgc1=get_lists(ref_listC,listC_found_lr,listC_notfound_lr)
961
+ dft_c=dft_c.append(ref_listC)
962
+ if res.shape[0]>0:
963
+ dft_lr_resc=dft_lr_resc.append(res)
964
+ if res_mut.shape[0]>0:
965
+ dft_lr_res_mutc=dft_lr_res_mutc.append(res_mut)
966
+ if res_notfound.shape[0]>0:
967
+ dft_lr_notfoundc= dft_lr_notfoundc.append(res_notfound)
968
+ if list_match.shape[0]>0:
969
+ df_matched_guides_lr= df_matched_guides_lr.append(list_match)
970
+ if list_mutated.shape[0]>0:
971
+ df_mutated_guides_lr= df_mutated_guides_lr.append(list_mutated)
972
+
973
+ if dft_lr_resc.shape[0]>0:
974
+ st.write('Matched to **CHM13** Reference Guides for **Set C**')
975
+ tbl_disp(dft_lr_resc,'select_genes','SetC_CHM13',19)
976
+ elif dft_lr_res_mutc.shape[0]>0:
977
+ st.write('Mutated to **CHM13** Reference Guides for **Set C**')
978
+ st.markdown(caution1,unsafe_allow_html=True)
979
+ tbl_disp(dft_lr_res_mutc,'select_genes','SetC_Mutated_CHM13',20)
980
+ if dft_lr_notfoundc.shape[0]>0:
981
+ st.write('**SetC Guides Not Found in CHM13**')
982
+ st.table(dft_lr_notfoundc)
983
+ #NOW MERGE FROM GRCh38 and LR
984
+ merged_mutated_set=pd.merge(df_mutated_guides_ref,df_mutated_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
985
+ merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
986
+ merged_match_set=pd.merge(df_matched_guides_ref,df_matched_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
987
+ merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
988
+ if merged_match_set.shape[0]>0:
989
+ #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
990
+ st.write('**Matched** Guides for **Set C** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
991
+ tbl_disp(merged_match_set,'select_genes','SetC_Matched_GRCh38_CHM13',21,0)
992
+
993
+ #st.table(merged_match_seta)
994
+ elif merged_mutated_set.shape[0]>0:
995
+ #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
996
+ st.write('**Mutated** Guides for **Set C** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
997
+ #st.markdown(caution1,unsafe_allow_html=True)
998
+ tbl_disp(merged_mutated_set,'select_genes','SetC_Mutated_GRCh38_CHM13',22,0)
999
+
1000
+ # if ListARes and ListBRes and ListCRes:
1001
+ # Order_List = st.checkbox('Generate Order Ready List',key=100)
1002
+ # if Order_List:
1003
+ # if dft_lr_resa.shape[0]>0:
1004
+ # st.table(dft_lr_resa)
1005
+
1006
+
1007
+ elif ListCRes:
1008
+ st.write("**Please select genes from the above table to begin**")
1009
+ elif Calc=='Not_Found':
1010
+ ListAResNotFound = st.checkbox('Results For SetA',key=30)
1011
+ if ListAResNotFound and listA_notfound_lr.shape[0]>0:
1012
+ listA_notfound_LR_sorted=listA_notfound_lr.sort_values('gene')
1013
+ sz1a=listA_notfound_LR_sorted.shape[0]
1014
+ vaild_guides_a = listA_notfound_LR_sorted[~listA_notfound_LR_sorted['gene'].str.contains("non")]
1015
+
1016
+
1017
+ sz2a=vaild_guides_a.shape[0]
1018
+ st.write(str(sz2a)+"/"+str(sz1a)+' Guides Not Found')
1019
+ tbl_disp(vaild_guides_a,'all_not_found','SetA_KOLF2.1',23,0)
1020
+
1021
+ #now get gene names only
1022
+ genesa=vaild_guides_a['gene'].str.split('_').str[0]
1023
+ genesa1=genesa[genesa.duplicated(keep=False)]
1024
+ genesa2=genesa1.unique()
1025
+ pair_lista=[]
1026
+ for g in genesa2:
1027
+ g1=vaild_guides_a[vaild_guides_a['gene'].str.contains(g)]
1028
+ g2=g1.reset_index(drop=True)
1029
+ pair_lista.append([g2.gene[0],g2.ref_guide[0],g2.gene[1],g2.ref_guide[1]])
1030
+ pair_missmatch_a = pd.DataFrame(pair_lista, columns=['sgID_1','sgRNA_1','sgID_2','sgRNA_2'])
1031
+ sz22a=pair_missmatch_a.shape[0]
1032
+ st.write(str(sz22a)+"/"+str(sz2a)+' Paired Guides Not Found')
1033
+ tbl_disp(pair_missmatch_a,'all_not_found','SetA_KOLF2.1',23,0)
1034
+
1035
+
1036
+
1037
+ non_targeting_guides_a = listA_notfound_LR_sorted[listA_notfound_LR_sorted['gene'].str.contains("non")]
1038
+ sz3a=non_targeting_guides_a.shape[0]
1039
+ st.write(str(sz3a)+"/"+str(sz1a)+' no-targeting Guides Not Found')
1040
+ tbl_disp(non_targeting_guides_a,'all_not_found','SetA_KOLF2.1',23,0)
1041
+
1042
+ ListBResNotFound = st.checkbox('Results For SetB',key=40)
1043
+ if ListBResNotFound:
1044
+ listB_notfound_LR_sorted=listB_notfound_lr.sort_values('gene')
1045
+ sz1b=listB_notfound_LR_sorted.shape[0]
1046
+ vaild_guides_b = listB_notfound_LR_sorted[~listB_notfound_LR_sorted['gene'].str.contains("non")]
1047
+ sz2b=vaild_guides_b.shape[0]
1048
+ st.write(str(sz2b)+"/"+str(sz1b)+' Guides Not Found')
1049
+ tbl_disp(vaild_guides_b,'all_not_found','SetA_KOLF2.1',23,0)
1050
+
1051
+ #now get gene names only
1052
+ genesb=vaild_guides_b['gene'].str.split('_').str[0]
1053
+ genesb1=genesb[genesb.duplicated(keep=False)]
1054
+ genesb2=genesb1.unique()
1055
+ pair_listb=[]
1056
+ for g in genesb2:
1057
+ g1=vaild_guides_b[vaild_guides_b['gene'].str.contains(g)]
1058
+ g2=g1.reset_index(drop=True)
1059
+ pair_listb.append([g2.gene[0],g2.ref_guide[0],g2.gene[1],g2.ref_guide[1]])
1060
+ pair_missmatch_b = pd.DataFrame(pair_listb, columns=['sgID_1','sgRNA_1','sgID_2','sgRNA_2'])
1061
+ sz22b=pair_missmatch_b.shape[0]
1062
+ st.write(str(sz22b)+"/"+str(sz2b)+' Paired Guides Not Found')
1063
+ tbl_disp(pair_missmatch_b,'all_not_found','SetA_KOLF2.1',23,0)
1064
+
1065
+
1066
+ non_targeting_guides_b = listB_notfound_LR_sorted[listB_notfound_LR_sorted['gene'].str.contains("non")]
1067
+ sz3b=non_targeting_guides_b.shape[0]
1068
+ st.write(str(sz3b)+"/"+str(sz1b)+' no-targeting Guides Not Found')
1069
+ tbl_disp(non_targeting_guides_b,'all_not_found','SetA_KOLF2.1',23,0)
1070
+ ListCResNotFound = st.checkbox('Results For SetC',key=50)
1071
+ if ListCResNotFound:
1072
+ listC_notfound_LR_sorted=listC_notfound_lr.sort_values('gene')
1073
+ sz1c=listC_notfound_LR_sorted.shape[0]
1074
+ vaild_guides_c = listC_notfound_LR_sorted[~listC_notfound_LR_sorted['gene'].str.contains("non")]
1075
+ sz2c=vaild_guides_c.shape[0]
1076
+ st.write(str(sz2c)+"/"+str(sz1c)+' Guides Not Found')
1077
+ tbl_disp(vaild_guides_c,'all_not_found','SetA_KOLF2.1',23,0)
1078
+
1079
+ #now get gene names only
1080
+ genesc=vaild_guides_c['gene'].str.split('_').str[0]
1081
+ genesc1=genesc[genesc.duplicated(keep=False)]
1082
+ genesc2=genesc1.unique()
1083
+ pair_listc=[]
1084
+ for g in genesc2:
1085
+ g1=vaild_guides_c[vaild_guides_c['gene'].str.contains(g)]
1086
+ g2=g1.reset_index(drop=True)
1087
+ pair_listc.append([g2.gene[0],g2.ref_guide[0],g2.gene[1],g2.ref_guide[1]])
1088
+ pair_missmatch_c = pd.DataFrame(pair_listc, columns=['sgID_1','sgRNA_1','sgID_2','sgRNA_2'])
1089
+ sz22c=pair_missmatch_c.shape[0]
1090
+ st.write(str(sz22c)+"/"+str(sz2c)+' Paired Guides Not Found')
1091
+ tbl_disp(pair_missmatch_c,'all_not_found','SetA_KOLF2.1',23,0)
1092
+
1093
+
1094
+ non_targeting_guides_c = listC_notfound_LR_sorted[listC_notfound_LR_sorted['gene'].str.contains("non")]
1095
+ sz3c=non_targeting_guides_c.shape[0]
1096
+ st.write(str(sz3c)+"/"+str(sz1c)+' no-targeting Guides Not Found')
1097
+ tbl_disp(non_targeting_guides_c,'all_not_found','SetA_KOLF2.1',23,0)
1098
+
1099
+ else:
1100
+ st.write("**Place Holder for All**")