syedislamuddin commited on
Commit
b4b3c55
·
1 Parent(s): 4a556a5

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -883
app.py DELETED
@@ -1,883 +0,0 @@
1
- #from turtle import shape
2
- import streamlit as st
3
- #from st_keyup import st_keyup
4
- import pandas as pd
5
- import numpy as np
6
- from st_aggrid import AgGrid, GridOptionsBuilder,GridUpdateMode,DataReturnMode
7
-
8
- import os
9
-
10
- st.set_page_config(layout="wide")
11
- st.markdown(
12
- """
13
- <style>
14
- .streamlit-expanderHeader {
15
- font-size: x-large;
16
- }
17
- </style>
18
- """,
19
- unsafe_allow_html=True,
20
- )
21
- caution = '<p style="font-family:sans-serif; color:Red; font-size: 18px;">Please note that Only one Guide (from pair) is found. Please see guides not found section for other guide</p>'
22
- caution1 = '<p style="font-family:sans-serif; color:Red; font-size: 18px;">Please note that Each mutated guide is reported as a sepearte line. sgID_1/2, sgRNA_1/2, chr_sgRNA_1/2 and position_sgRNA_1/2 represent values for reference/mutated guide</p>'
23
- caution2 = '<p style="font-family:sans-serif; color:Red; font-size: 18px;">Please Select a single/multiple guides and then select Check Box A, B or C Otherwise code will through error</p>'
24
- table_edit = '<p style="font-family:sans-serif; color:Green; font-size: 16px;">About Table: Please note that table can be <b>sorted by clicking on any column</b> and <b>Multiple rows can be selected</b> (by clicking check box in first column) to save only those rows.</p>'
25
-
26
- def transform(df,str):
27
- # Select columns
28
- #cols = st.multiselect('Please select columns to save current Table as csv file',
29
- cols = st.multiselect(str,
30
- df.columns.tolist(),
31
- df.columns.tolist()
32
- )
33
- df = df[cols]
34
- return df
35
-
36
- def convert_df(df):
37
- return df.to_csv().encode('utf-8')
38
- def convert_df1(df):
39
- return df.to_csv(index=False).encode('utf-8')
40
-
41
-
42
- # CSS to inject contained in a string
43
- hide_table_row_index = """
44
- <style>
45
- thead tr th:first-child {display:none}
46
- tbody th {display:none}
47
- </style>
48
- """
49
-
50
- # Inject CSS with Markdown
51
- st.markdown(hide_table_row_index, unsafe_allow_html=True)
52
-
53
-
54
- #########TABLE DISPLAY
55
- def tbl_disp(dat,var,ref,flg=1):
56
- dat.reset_index(drop=True, inplace=True)
57
- #df = transform(dft,'Please Select columns to save whole table')
58
- #fname = st.text_input('Please input file name to save Table', 'temp')
59
- #fname = st_keyup("Please input file name to save Table", value='temp')
60
- csv = convert_df(dat)
61
- if flg==1:
62
- st.download_button(
63
- label="Download Full Table as CSV file",
64
- data=csv,
65
- file_name=var+'_'+ref+'.csv',#fname+'.csv',
66
- mime='text/csv',
67
- )
68
- #st.table(dft)
69
- #st.markdown(table_edit,unsafe_allow_html=True)
70
- gb = GridOptionsBuilder.from_dataframe(dat)
71
- gb.configure_pagination(enabled=False)#,paginationAutoPageSize=False)#True) #Add pagination
72
- gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
73
- gb.configure_selection(selection_mode="multiple", use_checkbox=True)
74
-
75
- gb.configure_side_bar()
76
- gridOptions = gb.build()
77
-
78
- grid_response = AgGrid(
79
- dat,
80
- height=200,
81
- gridOptions=gridOptions,
82
- enable_enterprise_modules=True,
83
- update_mode=GridUpdateMode.MODEL_CHANGED,
84
- data_return_mode=DataReturnMode.FILTERED_AND_SORTED,
85
- fit_columns_on_grid_load=False,
86
- header_checkbox_selection_filtered_only=True,
87
- use_checkbox=True,
88
- width='100%'
89
- )
90
-
91
- selected = grid_response['selected_rows']
92
- if selected:
93
- st.write('Selected rows')
94
-
95
- dfs = pd.DataFrame(selected)
96
- st.dataframe(dfs[dfs.columns[1:dfs.shape[1]]])
97
-
98
- #dfs1 = transform(dfs[dfs.columns[1:dfs.shape[1]]],'Please select columns to save selected Table')
99
- csv = convert_df1(dfs[dfs.columns[1:dfs.shape[1]]])
100
- #csv = convert_df1(dfs1)
101
-
102
-
103
- st.download_button(
104
- label="Download data as CSV",
105
- data=csv,
106
- file_name=var+'_'+ref+'.csv',
107
- mime='text/csv',
108
- )
109
- return dfs
110
-
111
-
112
-
113
- def assemble_tbl(t):
114
- dft = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
115
- for i in range(0,t.shape[0],2):
116
- l1=t.iloc[[i]]
117
- l1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','mutated_guide', 'strand', 'num_mismatch']
118
-
119
- l2=t.iloc[[i+1]]
120
- l2.columns=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2','mutated_guide2', 'strand2', 'num_mismatch2']
121
- listA_concatenated_match_LR1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1)
122
- listA_concatenated_match_LR1=listA_concatenated_match_LR1[['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']]
123
- listA_concatenated_match_LR1['sgRNA_1']=listA_concatenated_match_LR1['sgRNA_1'].str.slice(0, 20)
124
- listA_concatenated_match_LR1['sgRNA_2']=listA_concatenated_match_LR1['sgRNA_2'].str.slice(0, 20)
125
- listA_concatenated_match_LR1['sgID_1_2']=listA_concatenated_match_LR1['sgID_1']+"|"+listA_concatenated_match_LR1['sgID_1']
126
- dft=dft.append(listA_concatenated_match_LR1)
127
-
128
- return dft
129
-
130
- def get_lists(ref_list,list_found_ref,list_notfound_ref):
131
- a_ref=[]
132
- for i in range(len(ref_list)):
133
- a_ref.append(ref_list.gene.values[i].split('|')[0])
134
- a_ref.append(ref_list.gene.values[i].split('|')[1])
135
- #check GRCh38
136
- #st.table(a_ref)
137
- set_found0_ref=[]
138
- for i in range(len(a_ref)):
139
- set_found0_ref.append(list_found_ref[list_found_ref['gene']==a_ref[i]])
140
- list_concatenated_found_ref = pd.concat(set_found0_ref)
141
-
142
-
143
-
144
- #split in found and not found
145
-
146
- list_concatenated_match_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch == 0]
147
- #list_concatenated_match_ref=list_concatenated_match_ref.sort_values('position')
148
-
149
- #Also remove Alternate loci's data
150
- list_concatenated_match_ref = list_concatenated_match_ref[list_concatenated_match_ref['chr'].str.contains('chr')]
151
-
152
- #also create new list with both sgRNAs in one row
153
- dft=pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
154
- if list_concatenated_match_ref.shape[0]>0:
155
- t=list_concatenated_match_ref.reset_index(drop=True)
156
- #st.table(t)
157
-
158
- ##########
159
- #check even/odd entries
160
- if t.shape[0]==1:
161
- t1=t.loc[t.index.repeat(2)].reset_index(drop=True)
162
- #st.write(t1)
163
- dft=assemble_tbl(t1)
164
-
165
- elif t.shape[0]%2==0: #even
166
- dft=assemble_tbl(t)
167
-
168
- else: #odd
169
- t1 = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
170
- i=0
171
- while i <t.shape[0]:
172
- #for i in range(t.shape[0]):
173
- #if t.iloc[i,['gene']] == t.iloc[i+1,['gene']]:
174
- #st.table(t)
175
- #st.write(i)
176
- if i<t.shape[0]-1:
177
- if t.iloc[i]['gene'] == t.iloc[i+1]['gene'] and t.iloc[i]['chr'] == t.iloc[i+1]['chr'] and t.iloc[i]['position'] == t.iloc[i+1]['position']:
178
- t1=t1.append(t.iloc[[i]], ignore_index = True)
179
- t1=t1.append(t.iloc[[i+1]], ignore_index = True)
180
- i=i+2
181
- else: #repeat entries
182
- t1=t1.append(t.iloc[[i]], ignore_index = True)
183
- t1=t1.append(t.iloc[[i]], ignore_index = True)
184
- #st.table(t1)
185
- i=i+1
186
- else:
187
- t1=t1.append(t.iloc[[i]], ignore_index = True)
188
- t1=t1.append(t.iloc[[i]], ignore_index = True)
189
- i=i+1
190
- #st.table(t1)
191
-
192
-
193
- dft=assemble_tbl(t1)
194
- list_concatenated_mutated_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch > 0]
195
- list_concatenated_mutated_ref=list_concatenated_mutated_ref.sort_values('position')
196
-
197
- #Also remove Alternate loci's data
198
-
199
- list_concatenated_mutated_ref = list_concatenated_mutated_ref[list_concatenated_mutated_ref['chr'].str.contains('chr')]
200
- dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
201
- if list_concatenated_mutated_ref.shape[0]>0:
202
- dft_mut = get_mutated_res(list_concatenated_mutated_ref)
203
- #check not found
204
- seta_notfound0_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[0]]
205
- seta_notfound1_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[1]]
206
- list_concatenated_notfound_ref = pd.concat([seta_notfound0_ref,seta_notfound1_ref])
207
- return dft, dft_mut,list_concatenated_notfound_ref,list_concatenated_match_ref,list_concatenated_mutated_ref
208
- ###########
209
-
210
- def get_mutated_res(list_concatenated_mutated_ref):
211
- #########
212
- #if list_concatenated_mutated_ref.shape[0]>0:
213
- t=list_concatenated_mutated_ref.reset_index(drop=True)
214
- #st.table(t)
215
- dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
216
- c1=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1']
217
- c2=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']#, 'sgID_1_2']
218
- #st.table(listA_concatenated_match_ref)
219
- #st.write(t.shape[0])
220
- tf=0
221
- #for i in range(0,t.shape[0],2):
222
- for i in range(t.shape[0]):
223
- l1=t.iloc[[i]]
224
- l1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','mutated_guide', 'strand', 'num_mismatch']
225
- l2=l1.copy()
226
- l2.columns=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2','mutated_guide2', 'strand2', 'num_mismatch2']
227
- list_concatenated_mutated_ref1=[]
228
- #listA_concatenated_mutated_ref1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1)
229
- list_concatenated_mutated_ref1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1)
230
- #st.table(listA_concatenated_mutated_ref1)
231
- list_concatenated_mutated_ref1=list_concatenated_mutated_ref1[['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','mutated_guide2','chr_sgRNA_2','position_sgRNA_2']]
232
- #also change if not leading G
233
- list_concatenated_mutated_ref1['sgRNA_1']='G'+list_concatenated_mutated_ref1['sgRNA_1'].str.slice(1, 20)
234
- #also change name of mutated_guide2 column
235
- list_concatenated_mutated_ref1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']
236
-
237
- list_concatenated_mutated_ref1['sgRNA_2']='G'+list_concatenated_mutated_ref1['sgRNA_2'].str.slice(1, 20)
238
- list_concatenated_mutated_ref1['sgID_1_2']=list_concatenated_mutated_ref1['sgID_1']+"|"+list_concatenated_mutated_ref1['sgID_1']
239
- dft_mut=dft_mut.append(list_concatenated_mutated_ref1)
240
- return dft_mut
241
-
242
- #########
243
-
244
- #def get_notfound():
245
-
246
-
247
- cwd=os.getcwd()+'/'+'data/'
248
-
249
- #get genes list
250
- #listA = pd.read_csv(cwd+"20200513_library_1_2_unbalanced_dJR051.csv",index_col=False)
251
- #listA = pd.read_csv(cwd+"newa1.csv",index_col=False)
252
- #listB = pd.read_csv(cwd+"newb1.csv",index_col=False)
253
- #listC = pd.read_csv(cwd+"newc1.csv",index_col=False)
254
-
255
- listA = pd.read_csv(cwd+"guides_a_new.csv",index_col=False)
256
-
257
- listB = pd.read_csv(cwd+"guides_b_new.csv",index_col=False)
258
- listC = pd.read_csv(cwd+"guides_c_new.csv",index_col=False)
259
- variantsa1=listA['gene'].unique()
260
- variantsb1=listB['gene'].unique()
261
- variantsc1=listC['gene'].unique()
262
-
263
- con = np.concatenate((variantsa1, variantsb1,variantsc1))
264
-
265
-
266
- #st.write(type(variantsc1))
267
- variants_s=sorted(np.unique(con))
268
- #st.write(len(variants_s))
269
- #also get names for non-targetting guides
270
-
271
-
272
- #Also read GRCh38 and LR guides for stea
273
- listA_found_ref = pd.read_csv(cwd+"seta_found_ref1.csv",index_col=False)
274
- #remove # from chr# #
275
- listA_found_ref['chr'] = [x.split(' ')[-0] for x in listA_found_ref['chr']]
276
- listA_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
277
- listA_notfound_ref = pd.read_csv(cwd+"seta_notfound_ref1.csv",index_col=False)
278
-
279
- listA_found_lr = pd.read_csv(cwd+"seta_found_LR1.csv",index_col=False)
280
- listA_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
281
- listA_notfound_lr = pd.read_csv(cwd+"seta_notfound_LR1.csv",index_col=False)
282
-
283
- #Also read GRCh38 and LR guides for set b
284
- listB_found_ref = pd.read_csv(cwd+"setb_found_ref1.csv",index_col=False)
285
- #remove # from chr# #
286
- listB_found_ref['chr'] = [x.split(' ')[-0] for x in listB_found_ref['chr']]
287
- listB_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
288
- listB_notfound_ref = pd.read_csv(cwd+"setb_notfound_ref1.csv",index_col=False)
289
-
290
- listB_found_lr = pd.read_csv(cwd+"setb_found_LR1.csv",index_col=False)
291
- listB_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
292
- listB_notfound_lr = pd.read_csv(cwd+"setb_notfound_LR1.csv",index_col=False)
293
-
294
- #Also read GRCh38 and LR guides for set c
295
- listC_found_ref = pd.read_csv(cwd+"setc_found_ref1.csv",index_col=False)
296
- #remove # from chr# #
297
- listC_found_ref['chr'] = [x.split(' ')[-0] for x in listC_found_ref['chr']]
298
- listC_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
299
- listC_notfound_ref = pd.read_csv(cwd+"setc_notfound_ref1.csv",index_col=False)
300
-
301
- listC_found_lr = pd.read_csv(cwd+"setc_found_LR1.csv",index_col=False)
302
- listC_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
303
- listC_notfound_lr = pd.read_csv(cwd+"setc_notfound_LR1.csv",index_col=False)
304
-
305
-
306
-
307
- st.title('Long Read Guides Search')
308
- #st.markdown('**Please select an option from the sidebar**')
309
-
310
- #st.write(variants)
311
-
312
-
313
- Calc = st.sidebar.radio(
314
- "",
315
- ('ReadME', 'Single Gene','Multiple Genes'))
316
-
317
-
318
- if Calc == 'ReadME':
319
- expander = st.expander("How to use this app")
320
- #st.header('How to use this app')
321
- expander.markdown('Please select **Single Gene** OR **Multiple Genes** Menue checkbox from the sidebar')
322
- expander.markdown('Select a Gene (from genes dropdown list) OR Multiple genes (from table)')
323
- expander.markdown('A table showing all reference gudies from three LISTS will appear in the main panel. **Please not some of the genes (for example A1BG and GJB7) have multiple guide pairs and all of these are selected.**')
324
- expander.markdown('To see results for each of the selected reference guide from ListA, ListB and ListC, Please select respective checkbox')
325
- expander.markdown('Results are shown as two tables, **Matched** and **Mutated** guides tables and **NOT FOUND** table if guides are not found in GRCh38 and LR reference fasta files')
326
- expander.markdown('**Mutated** guides table shows the genomic postion in GRCh38 and LR Fasta file along other fields. **If a guide is found in GRCh38 but not in LR fasta, then corresponding columns will be NA**')
327
- expander.markdown('**Mutated** guides table shows the genomic postion in GRCh38 and LR Fasta file along other fields. **If a guide is found in GRCh38 but not in LR fasta, then corresponding columns will be NA**')
328
-
329
- expander1 = st.expander('Introduction')
330
-
331
- expander1.markdown(
332
- """ This app helps navigate all probable genomic **miss-matched/Mutations (upto 2 bp)** for a given sgRNA (from 3 lists of CRISPRi dual sgRNA libraries) in GRCh38 reference fasta and a Reference fasta generated from BAM generated against KOLF2.1J longread data.
333
- """
334
- )
335
- expander1.markdown('Merged bam file was converted to fasta file using following steps:')
336
- expander1.markdown('- samtools mpileup to generate bcf file')
337
- expander1.markdown('- bcftools to generate vcf file')
338
- expander1.markdown('- bcftools consensus to generate fasta file')
339
- expander1.markdown('A GPU based [Cas-OFFinder](http://www.rgenome.net/cas-offinder/) tool was used to find off-target sequences (upto 2 miss-matched) for each geiven reference guide against GRCh38 and LR fasta references.')
340
-
341
- elif Calc=='Single Gene':
342
- #if Calc == 'Selection Menu':
343
- #ReadMe = st.sidebar.checkbox('ReadME',value=False)
344
- select_variant = st.sidebar.selectbox(
345
- "Please select Gene",
346
- variants_s
347
- )
348
- #ref_sgrna=listA[listA['sgID_A']==select_variant][['protospacer_A','protospacer_B']]
349
- #get all references
350
-
351
- ref_listA=listA[listA['gene']==select_variant][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
352
- ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
353
- ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
354
-
355
- ref_listB=listB[listB['gene']==select_variant][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
356
- ref_listB = ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
357
- ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
358
-
359
- ref_listC=listC[listC['gene']==select_variant][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
360
- ref_listC = ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
361
- ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
362
- listA_concatenated_orig = pd.concat([ref_listA,ref_listB,ref_listC])
363
-
364
- st.write('**Input** Guides (all 6 from 3 sets)')
365
- st.markdown(table_edit,unsafe_allow_html=True)
366
- tbl_disp(listA_concatenated_orig,select_variant,'ref_guides',0)
367
- #st.table(listA_concatenated_orig)
368
-
369
- #now search from results for list a
370
- #st.write(ref_listA)
371
- ListARes = st.checkbox('Results For SetA',key=1)
372
- if ListARes:
373
- if len(ref_listA)>0:
374
- #st.table(ref_listA)
375
-
376
- ##########
377
- res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listA,listA_found_ref,listA_notfound_ref)
378
- st.write('Selected Reference Guides for **Set A**')
379
- st.table(ref_listA)
380
- #tbl_disp(ref_listA,select_variant,'ReferenceGuides',0)
381
- if res.shape[0]>0:
382
- st.write('Matched to **GRCh38** Reference Guides for **Set A**')
383
- tbl_disp(res,select_variant,'SetA_GRCh38')
384
- elif res_mut.shape[0]>0:
385
- st.write('Mutated to **GRCh38** Reference Guides for **Set A**')
386
- st.markdown(caution1,unsafe_allow_html=True)
387
- tbl_disp(res_mut,select_variant,'SetA_Mutated_GRCh38')
388
- if res_notfound.shape[0]>0:
389
- st.write('**SetA Guides Not Found in GRCh38**')
390
- #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
391
- st.table(res_notfound)
392
- ##########
393
-
394
-
395
- #For LR
396
- ##########
397
- res_lr,res_mut_lr,res_notfound_lr,list_match_lr,list_mutated_lr=get_lists(ref_listA,listA_found_lr,listA_notfound_lr)
398
- #st.write('Selected Reference Guides for **Set A**')
399
- #tbl_disp(ref_listA,select_variant,'ReferenceGuides',0)
400
- if res_lr.shape[0]>0:
401
- st.write('Matched to **CHM13** Reference Guides for **Set A**')
402
- tbl_disp(res_lr,select_variant,'SetA_CHM13')
403
- elif res_mut_lr.shape[0]>0:
404
- st.write('Mutated to **CHM13** Reference Guides for **Set A**')
405
- st.markdown(caution1,unsafe_allow_html=True)
406
- tbl_disp(res_mut_lr,select_variant,'SetA_Mutated_CHM13')
407
- if res_notfound_lr.shape[0]>0:
408
- st.write('**SetA Guides Not Found in CHM13**')
409
- #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
410
- st.table(res_notfound_lr)
411
- ##########
412
-
413
-
414
- #######
415
- #NOW MERGE FROM GRCh38 and LR
416
- merged_mutated_set=pd.merge(list_mutated,list_mutated_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
417
- merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
418
- merged_match_set=pd.merge(list_match,list_match_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
419
- merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
420
- if merged_match_set.shape[0]>0:
421
- #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
422
- st.write('**Matched** Guides for **Set A** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
423
- tbl_disp(merged_match_set,select_variant,'SetA_Matched_GRCh38_CHM13',0)
424
-
425
- #st.table(merged_match_seta)
426
- elif merged_mutated_set.shape[0]>0:
427
- #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
428
- st.write('**Mutated** Guides for **Set A** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
429
-
430
- tbl_disp(merged_mutated_set,select_variant,'SetA_Mutated_GRCh38_CHM13',0)
431
-
432
- ########
433
-
434
- else:
435
- st.write('**Gene: **'+select_variant+' Not found in listA')
436
-
437
-
438
- #list B
439
- ListBRes = st.checkbox('Results For SetB',key=2)
440
- if ListBRes:
441
- if len(ref_listB)>0:
442
- ##########
443
- res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listB,listB_found_ref,listB_notfound_ref)
444
- st.write('Selected Reference Guides for **Set B**')
445
- st.table(ref_listB)
446
- #tbl_disp(ref_listB,select_variant,'ReferenceGuides',0)
447
- if res.shape[0]>0:
448
- st.write('Matched to **GRCh38** Reference Guides for **Set B**')
449
- tbl_disp(res,select_variant,'SetB_GRCh38')
450
- elif res_mut.shape[0]>0:
451
- st.write('Mutated to **GRCh38** Reference Guides for **Set B**')
452
- st.markdown(caution1,unsafe_allow_html=True)
453
- tbl_disp(res_mut,select_variant,'SetA_Mutated_GRCh38')
454
- if res_notfound.shape[0]>0:
455
- st.write('**SetB Guides Not Found in GRCh38**')
456
- #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
457
- st.table(res_notfound)
458
- ##########
459
-
460
-
461
- #For LR
462
- ##########
463
- res_lr,res_mut_lr,res_notfound_lr,list_match_lr,list_mutated_lr=get_lists(ref_listB,listB_found_lr,listB_notfound_lr)
464
- #st.write('Selected Reference Guides for **Set A**')
465
- #tbl_disp(ref_listA,select_variant,'ReferenceGuides',0)
466
- if res_lr.shape[0]>0:
467
- st.write('Matched to **CHM13** Reference Guides for **Set B**')
468
- tbl_disp(res_lr,select_variant,'SetB_CHM13')
469
- elif res_mut_lr.shape[0]>0:
470
- st.write('Mutated to **CHM13** Reference Guides for **Set B**')
471
- st.markdown(caution1,unsafe_allow_html=True)
472
- tbl_disp(res_mut_lr,select_variant,'SetB_Mutated_CHM13')
473
- if res_notfound_lr.shape[0]>0:
474
- st.write('**SetB Guides Not Found in CHM13**')
475
- #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
476
- st.table(res_notfound_lr)
477
- ##########
478
-
479
-
480
- #######
481
- #NOW MERGE FROM GRCh38 and LR
482
- merged_mutated_set=pd.merge(list_mutated,list_mutated_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
483
- merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
484
- merged_match_set=pd.merge(list_match,list_match_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
485
- merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
486
- if merged_match_set.shape[0]>0:
487
- #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
488
- st.write('**Matched** Guides for **Set B** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
489
- tbl_disp(merged_match_set,select_variant,'SetB_Matched_GRCh38_CHM13',0)
490
-
491
- #st.table(merged_match_seta)
492
- elif merged_mutated_set.shape[0]>0:
493
- #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
494
- st.write('**Mutated** Guides for **Set B** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
495
-
496
- tbl_disp(merged_mutated_set,select_variant,'SetB_Mutated_GRCh38_CHM13',0)
497
-
498
- ########
499
-
500
- else:
501
- st.write('**Gene: **'+select_variant+' Not found in listB')
502
-
503
- ### list B
504
-
505
- #list C
506
- ListCRes = st.checkbox('Results For SetC',key=3)
507
- if ListCRes:
508
- if len(ref_listC)>0:
509
- ##########
510
- res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listC,listC_found_ref,listC_notfound_ref)
511
- st.write('Selected Reference Guides for **Set C**')
512
- st.table(ref_listC)
513
- #tbl_disp(ref_listC,select_variant,'ReferenceGuides',0)
514
- if res.shape[0]>0:
515
- st.write('Matched to **GRCh38** Reference Guides for **Set C**')
516
- tbl_disp(res,select_variant,'SetC_GRCh38')
517
- elif res_mut.shape[0]>0:
518
- st.write('Mutated to **GRCh38** Reference Guides for **Set C**')
519
- st.markdown(caution1,unsafe_allow_html=True)
520
- tbl_disp(res_mut,select_variant,'SetC_Mutated_GRCh38')
521
- if res_notfound.shape[0]>0:
522
- st.write('**SetC Guides Not Found in GRCh38**')
523
- #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
524
- st.table(res_notfound)
525
- ##########
526
-
527
-
528
- #For LR
529
- ##########
530
- res_lr,res_mut_lr,res_notfound_lr,list_match_lr,list_mutated_lr=get_lists(ref_listC,listC_found_lr,listC_notfound_lr)
531
- #st.write('Selected Reference Guides for **Set A**')
532
- #tbl_disp(ref_listA,select_variant,'ReferenceGuides',0)
533
- if res_lr.shape[0]>0:
534
- st.write('Matched to **CHM13** Reference Guides for **Set C**')
535
- tbl_disp(res_lr,select_variant,'SetC_CHM13')
536
- elif res_mut_lr.shape[0]>0:
537
- st.write('Mutated to **CHM13** Reference Guides for **Set C**')
538
- st.markdown(caution1,unsafe_allow_html=True)
539
- tbl_disp(res_mut_lr,select_variant,'SetC_Mutated_CHM13')
540
- if res_notfound_lr.shape[0]>0:
541
- st.write('**SetC Guides Not Found in CHM13**')
542
- #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
543
- st.table(res_notfound_lr)
544
- ##########
545
-
546
-
547
- #######
548
- #NOW MERGE FROM GRCh38 and LR
549
- merged_mutated_set=pd.merge(list_mutated,list_mutated_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
550
- merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
551
- merged_match_set=pd.merge(list_match,list_match_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
552
- merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
553
- if merged_match_set.shape[0]>0:
554
- #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
555
- st.write('**Matched** Guides for **Set C** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
556
- tbl_disp(merged_match_set,select_variant,'SetC_Matched_GRCh38_CHM13',0)
557
-
558
- #st.table(merged_match_seta)
559
- elif merged_mutated_set.shape[0]>0:
560
- #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
561
- st.write('**Mutated** Guides for **Set C** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
562
-
563
- tbl_disp(merged_mutated_set,select_variant,'SetC_Mutated_GRCh38_CHM13',0)
564
-
565
- ########
566
-
567
- else:
568
- st.write('**Gene: **'+select_variant+' Not found in listC')
569
-
570
-
571
- ### list C
572
- else:
573
- select_mode = st.radio(
574
- "Please select an option",
575
- ('Select Single/Multiple Genes', 'Select All'))
576
- #st.write('Please Select A **Single/Multiple/SelectAll** Reference Guides')
577
- #get_table = pd.DataFrame(columns=['gene','sgID_A','protospacer_A','sgID_B','protospacer_B','sgID_AB'])
578
- if select_mode=='Select Single/Multiple Genes':
579
- st.markdown(table_edit,unsafe_allow_html=True)
580
- get_table=tbl_disp(listA[['gene','sgID_A','protospacer_A','sgID_B','protospacer_B','sgID_AB']],'SetA','ReferenceGuides',0)
581
-
582
- st.markdown(caution2,unsafe_allow_html=True)
583
- else:
584
- st.markdown(table_edit,unsafe_allow_html=True)
585
- get_table=listA[['gene','sgID_A','protospacer_A','sgID_B','protospacer_B','sgID_AB']]
586
-
587
- st.markdown(caution2,unsafe_allow_html=True)
588
-
589
-
590
- #st.write(get_table)
591
-
592
- ListARes = st.checkbox('Results For SetA',key=30)
593
- if ListARes and not isinstance(get_table, type(None)):#get_table!=None:
594
- #if ListARes and get_table.shape[0]>0:
595
- variant_set=get_table[['gene']]
596
- dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
597
- dft_res=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
598
- dft_res_mut=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
599
- dft_notfound=pd.DataFrame(columns=['gene','ref_guide'])
600
- df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
601
- df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
602
- #CHECK FOR GRCh38
603
- for i in range(variant_set.shape[0]):
604
- ref_listA=listA[listA['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
605
- ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
606
-
607
- ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
608
- res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listA,listA_found_ref,listA_notfound_ref)
609
- dft_a=dft_a.append(ref_listA)
610
- if res.shape[0]>0:
611
- dft_res=dft_res.append(res)
612
- if res_mut.shape[0]>0:
613
- dft_res_mut=dft_res_mut.append(res_mut)
614
- if res_notfound.shape[0]>0:
615
- dft_notfound= dft_notfound.append(res_notfound)
616
- if list_match.shape[0]>0:
617
- df_matched_guides_ref= df_matched_guides_ref.append(list_match)
618
- if list_mutated.shape[0]>0:
619
- df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
620
-
621
- st.write('Selected Reference Guides for **Set A**')
622
- tbl_disp(dft_a,'All','ReferenceGuides',0)
623
- if dft_res.shape[0]>0:
624
- st.write('Matched to **GRCh38** Reference Guides for **Set A**')
625
- tbl_disp(dft_res,'select_genes','SetA_GRCh38')
626
- elif dft_res_mut.shape[0]>0:
627
- st.write('Mutated to **GRCh38** Reference Guides for **Set A**')
628
- st.markdown(caution1,unsafe_allow_html=True)
629
- tbl_disp(dft_res_mut,'select_genes','SetA_Mutated_GRCh38')
630
- if dft_notfound.shape[0]>0:
631
- st.write('**SetA Guides Not Found in GRCh38**')
632
- #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
633
- st.table(dft_notfound)
634
- #Now CHECK FOR CHM13
635
- dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
636
- dft_res=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
637
- dft_res_mut=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
638
- dft_notfound=pd.DataFrame(columns=['gene','ref_guide'])
639
- df_matched_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
640
- df_mutated_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
641
-
642
- for i in range(variant_set.shape[0]):
643
- ref_listA=listA[listA['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
644
- ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
645
-
646
- ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
647
- res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listA,listA_found_lr,listA_notfound_lr)
648
- dft_a=dft_a.append(ref_listA)
649
- if res.shape[0]>0:
650
- dft_res=dft_res.append(res)
651
- if res_mut.shape[0]>0:
652
- dft_res_mut=dft_res_mut.append(res_mut)
653
- if res_notfound.shape[0]>0:
654
- dft_notfound= dft_notfound.append(res_notfound)
655
- if list_match.shape[0]>0:
656
- df_matched_guides_lr= df_matched_guides_lr.append(list_match)
657
- if list_mutated.shape[0]>0:
658
- df_mutated_guides_lr= df_mutated_guides_lr.append(list_mutated)
659
-
660
- if dft_res.shape[0]>0:
661
- st.write('Matched to **CHM13** Reference Guides for **Set A**')
662
- tbl_disp(dft_res,'select_genes','SetA_CHM13')
663
- elif dft_res_mut.shape[0]>0:
664
- st.write('Mutated to **CHM13** Reference Guides for **Set A**')
665
- st.markdown(caution1,unsafe_allow_html=True)
666
- tbl_disp(dft_res_mut,'select_genes','SetA_Mutated_CHM13')
667
- if dft_notfound.shape[0]>0:
668
- st.write('**SetA Guides Not Found in CHM13**')
669
- st.table(dft_notfound)
670
- #NOW MERGE FROM GRCh38 and LR
671
- merged_mutated_set=pd.merge(df_mutated_guides_ref,df_mutated_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
672
- merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
673
- merged_match_set=pd.merge(df_matched_guides_ref,df_matched_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
674
- merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
675
- if merged_match_set.shape[0]>0:
676
- #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
677
- st.write('**Matched** Guides for **Set A** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
678
- tbl_disp(merged_match_set,'select_genes','SetA_Matched_GRCh38_CHM13',0)
679
-
680
- #st.table(merged_match_seta)
681
- elif merged_mutated_set.shape[0]>0:
682
- #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
683
- st.write('**Mutated** Guides for **Set A** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
684
-
685
- tbl_disp(merged_mutated_set,'select_genes','SetA_Mutated_GRCh38_CHM13',0)
686
- elif ListARes:
687
- st.write("**Please select genes from the above table to begin**")
688
-
689
- ListBRes = st.checkbox('Results For SetB',key=40)
690
- if ListBRes and not isinstance(get_table, type(None)):#get_table!=None:
691
- variant_set=get_table[['gene']]
692
- dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
693
- dft_res=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
694
- dft_res_mut=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
695
- dft_notfound=pd.DataFrame(columns=['gene','ref_guide'])
696
- df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
697
- df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
698
- #CHECK FOR GRCh38
699
- for i in range(variant_set.shape[0]):
700
- ref_listB=listB[listB['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
701
- ref_listB =ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
702
-
703
- ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
704
- res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listB,listB_found_ref,listB_notfound_ref)
705
- dft_b=dft_b.append(ref_listB)
706
- if res.shape[0]>0:
707
- dft_res=dft_res.append(res)
708
- if res_mut.shape[0]>0:
709
- dft_res_mut=dft_res_mut.append(res_mut)
710
- if res_notfound.shape[0]>0:
711
- dft_notfound= dft_notfound.append(res_notfound)
712
- if list_match.shape[0]>0:
713
- df_matched_guides_ref= df_matched_guides_ref.append(list_match)
714
- if list_mutated.shape[0]>0:
715
- df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
716
-
717
- st.write('Selected Reference Guides for **Set B**')
718
- tbl_disp(dft_b,'All','ReferenceGuides',0)
719
- if dft_res.shape[0]>0:
720
- st.write('Matched to **GRCh38** Reference Guides for **Set B**')
721
- tbl_disp(dft_res,'select_genes','SetB_GRCh38')
722
- elif dft_res_mut.shape[0]>0:
723
- st.write('Mutated to **GRCh38** Reference Guides for **Set B**')
724
- st.markdown(caution1,unsafe_allow_html=True)
725
- tbl_disp(dft_res_mut,'select_genes','SetB_Mutated_GRCh38')
726
- if dft_notfound.shape[0]>0:
727
- st.write('**SetB Guides Not Found in GRCh38**')
728
- #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
729
- st.table(dft_notfound)
730
-
731
- #Now CHECK FOR CHM13
732
- dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
733
- dft_res=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
734
- dft_res_mut=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
735
- dft_notfound=pd.DataFrame(columns=['gene','ref_guide'])
736
- df_matched_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
737
- df_mutated_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
738
-
739
- for i in range(variant_set.shape[0]):
740
- ref_listB=listB[listB['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
741
- ref_listB=ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
742
-
743
- ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
744
- res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listB,listB_found_lr,listB_notfound_lr)
745
- dft_b=dft_b.append(ref_listB)
746
- if res.shape[0]>0:
747
- dft_res=dft_res.append(res)
748
- if res_mut.shape[0]>0:
749
- dft_res_mut=dft_res_mut.append(res_mut)
750
- if res_notfound.shape[0]>0:
751
- dft_notfound= dft_notfound.append(res_notfound)
752
- if list_match.shape[0]>0:
753
- df_matched_guides_lr= df_matched_guides_lr.append(list_match)
754
- if list_mutated.shape[0]>0:
755
- df_mutated_guides_lr= df_mutated_guides_lr.append(list_mutated)
756
-
757
- if dft_res.shape[0]>0:
758
- st.write('Matched to **CHM13** Reference Guides for **Set B**')
759
- tbl_disp(dft_res,'select_genes','SetB_CHM13')
760
- elif dft_res_mut.shape[0]>0:
761
- st.write('Mutated to **CHM13** Reference Guides for **Set B**')
762
- st.markdown(caution1,unsafe_allow_html=True)
763
- tbl_disp(dft_res_mut,'select_genes','SetB_Mutated_CHM13')
764
- if dft_notfound.shape[0]>0:
765
- st.write('**SetB Guides Not Found in CHM13**')
766
- st.table(dft_notfound)
767
- #NOW MERGE FROM GRCh38 and LR
768
- merged_mutated_set=pd.merge(df_mutated_guides_ref,df_mutated_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
769
- merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
770
- merged_match_set=pd.merge(df_matched_guides_ref,df_matched_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
771
- merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
772
- if merged_match_set.shape[0]>0:
773
- #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
774
- st.write('**Matched** Guides for **Set B** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
775
- tbl_disp(merged_match_set,'select_genes','SetB_Matched_GRCh38_CHM13',0)
776
-
777
- #st.table(merged_match_seta)
778
- elif merged_mutated_set.shape[0]>0:
779
- #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
780
- st.write('**Mutated** Guides for **Set B** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
781
- #st.markdown(caution1,unsafe_allow_html=True)
782
- tbl_disp(merged_mutated_set,'select_genes','SetB_Mutated_GRCh38_CHM13',0)
783
-
784
- elif ListBRes:
785
- st.write("**Please select genes from the above table to begin**")
786
-
787
- ListCRes = st.checkbox('Results For SetC',key=50)
788
- if ListCRes and not isinstance(get_table, type(None)):#get_table!=None:
789
- variant_set=get_table[['gene']]
790
- dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
791
- dft_res=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
792
- dft_res_mut=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
793
- dft_notfound=pd.DataFrame(columns=['gene','ref_guide'])
794
- df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
795
- df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
796
- #CHECK FOR GRCh38
797
- for i in range(variant_set.shape[0]):
798
- ref_listC=listC[listC['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
799
- ref_listC =ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
800
-
801
- ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
802
- res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listC,listC_found_ref,listC_notfound_ref)
803
- dft_c=dft_c.append(ref_listC)
804
- if res.shape[0]>0:
805
- dft_res=dft_res.append(res)
806
- if res_mut.shape[0]>0:
807
- dft_res_mut=dft_res_mut.append(res_mut)
808
- if res_notfound.shape[0]>0:
809
- dft_notfound= dft_notfound.append(res_notfound)
810
- if list_match.shape[0]>0:
811
- df_matched_guides_ref= df_matched_guides_ref.append(list_match)
812
- if list_mutated.shape[0]>0:
813
- df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
814
-
815
- st.write('Selected Reference Guides for **Set B**')
816
- tbl_disp(dft_c,'All','ReferenceGuides',0)
817
- if dft_res.shape[0]>0:
818
- st.write('Matched to **GRCh38** Reference Guides for **Set C**')
819
- tbl_disp(dft_res,'select_genes','SetC_GRCh38')
820
- elif dft_res_mut.shape[0]>0:
821
- st.write('Mutated to **GRCh38** Reference Guides for **Set C**')
822
- st.markdown(caution1,unsafe_allow_html=True)
823
- tbl_disp(dft_res_mut,'select_genes','SetC_Mutated_GRCh38')
824
- if dft_notfound.shape[0]>0:
825
- st.write('**SetC Guides Not Found in GRCh38**')
826
- #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
827
- st.table(dft_notfound)
828
-
829
- #Now CHECK FOR CHM13
830
- dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
831
- dft_res=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
832
- dft_res_mut=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
833
- dft_notfound=pd.DataFrame(columns=['gene','ref_guide'])
834
- df_matched_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
835
- df_mutated_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
836
-
837
- for i in range(variant_set.shape[0]):
838
- ref_listC=listC[listC['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
839
- ref_listC=ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
840
-
841
- ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
842
- res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listC,listC_found_lr,listC_notfound_lr)
843
- dft_c=dft_c.append(ref_listC)
844
- if res.shape[0]>0:
845
- dft_res=dft_res.append(res)
846
- if res_mut.shape[0]>0:
847
- dft_res_mut=dft_res_mut.append(res_mut)
848
- if res_notfound.shape[0]>0:
849
- dft_notfound= dft_notfound.append(res_notfound)
850
- if list_match.shape[0]>0:
851
- df_matched_guides_lr= df_matched_guides_lr.append(list_match)
852
- if list_mutated.shape[0]>0:
853
- df_mutated_guides_lr= df_mutated_guides_lr.append(list_mutated)
854
-
855
- if dft_res.shape[0]>0:
856
- st.write('Matched to **CHM13** Reference Guides for **Set C**')
857
- tbl_disp(dft_res,'select_genes','SetC_CHM13')
858
- elif dft_res_mut.shape[0]>0:
859
- st.write('Mutated to **CHM13** Reference Guides for **Set C**')
860
- st.markdown(caution1,unsafe_allow_html=True)
861
- tbl_disp(dft_res_mut,'select_genes','SetC_Mutated_CHM13')
862
- if dft_notfound.shape[0]>0:
863
- st.write('**SetC Guides Not Found in CHM13**')
864
- st.table(dft_notfound)
865
- #NOW MERGE FROM GRCh38 and LR
866
- merged_mutated_set=pd.merge(df_mutated_guides_ref,df_mutated_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
867
- merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
868
- merged_match_set=pd.merge(df_matched_guides_ref,df_matched_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
869
- merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
870
- if merged_match_set.shape[0]>0:
871
- #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
872
- st.write('**Matched** Guides for **Set C** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
873
- tbl_disp(merged_match_set,'select_genes','SetC_Matched_GRCh38_CHM13',0)
874
-
875
- #st.table(merged_match_seta)
876
- elif merged_mutated_set.shape[0]>0:
877
- #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
878
- st.write('**Mutated** Guides for **Set C** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
879
- #st.markdown(caution1,unsafe_allow_html=True)
880
- tbl_disp(merged_mutated_set,'select_genes','SetC_Mutated_GRCh38_CHM13',0)
881
- elif ListCRes:
882
- st.write("**Please select genes from the above table to begin**")
883
-