#from turtle import shape
import streamlit as st
#from st_keyup import st_keyup
import pandas as pd
import numpy as np
from st_aggrid import AgGrid, GridOptionsBuilder,GridUpdateMode,DataReturnMode
import os
st.set_page_config(layout="wide")
st.markdown(
"""
""",
unsafe_allow_html=True,
)
caution = '
Please note that Only one Guide (from pair) is found. Please see guides not found section for other guide
'
caution1 = 'Please note that Each mutated guide is reported as a sepearte line. sgID_1/2, sgRNA_1/2, chr_sgRNA_1/2 and position_sgRNA_1/2 represent values for reference/mutated guide
'
caution2 = 'Please Select a single/multiple guides and then select Check Box A, B or C Otherwise code will through error
'
table_edit = 'About Table: Please note that table can be sorted by clicking on any column and Multiple rows can be selected (by clicking check box in first column) to save only those rows.
'
caution_genes = 'Please make sure that desired genes from all three lists should be selected to generate Order Ready Table.
'
def transform(df,str):
# Select columns
#cols = st.multiselect('Please select columns to save current Table as csv file',
cols = st.multiselect(str,
df.columns.tolist(),
df.columns.tolist()
)
df = df[cols]
return df
def convert_df(df):
return df.to_csv().encode('utf-8')
def convert_df1(df):
return df.to_csv(index=False).encode('utf-8')
# CSS to inject contained in a string
hide_table_row_index = """
"""
# Inject CSS with Markdown
st.markdown(hide_table_row_index, unsafe_allow_html=True)
#########TABLE DISPLAY
def tbl_disp(dat,var,ref,key,flg=1):
dat.reset_index(drop=True, inplace=True)
#df = transform(dft,'Please Select columns to save whole table')
#fname = st.text_input('Please input file name to save Table', 'temp')
#fname = st_keyup("Please input file name to save Table", value='temp')
csv = convert_df(dat)
if flg==1:
st.download_button(
label="Download Full Table as CSV file",
data=csv,
file_name=var+'_'+ref+'.csv',#fname+'.csv',
mime='text/csv',
#key=key,
)
#st.table(dft)
#st.markdown(table_edit,unsafe_allow_html=True)
gb = GridOptionsBuilder.from_dataframe(dat)
gb.configure_pagination(enabled=False)#,paginationAutoPageSize=False)#True) #Add pagination
gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
gb.configure_selection(selection_mode="multiple", use_checkbox=True)
gb.configure_column("gene", headerCheckboxSelection = True)
gb.configure_side_bar()
gridOptions = gb.build()
grid_response = AgGrid(
dat,
height=200,
gridOptions=gridOptions,
enable_enterprise_modules=True,
update_mode=GridUpdateMode.MODEL_CHANGED,
data_return_mode=DataReturnMode.FILTERED_AND_SORTED,
fit_columns_on_grid_load=False,
header_checkbox_selection_filtered_only=True,
use_checkbox=True,
width='100%'
#key=key
)
selected = grid_response['selected_rows']
if selected:
#st.write('Selected rows')
dfs = pd.DataFrame(selected)
#st.dataframe(dfs[dfs.columns[1:dfs.shape[1]]])
#dfs1 = transform(dfs[dfs.columns[1:dfs.shape[1]]],'Please select columns to save selected Table')
csv = convert_df1(dfs[dfs.columns[1:dfs.shape[1]]])
#csv = convert_df1(dfs1)
if flg:
st.download_button(
label="Download Selected data as CSV",
data=csv,
file_name=var+'_'+ref+'.csv',
mime='text/csv',
)
return dfs
def assemble_tbl(t):
dft = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
for i in range(0,t.shape[0],2):
l1=t.iloc[[i]]
l1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','mutated_guide', 'strand', 'num_mismatch']
l2=t.iloc[[i+1]]
l2.columns=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2','mutated_guide2', 'strand2', 'num_mismatch2']
listA_concatenated_match_LR1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1)
listA_concatenated_match_LR1=listA_concatenated_match_LR1[['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']]
listA_concatenated_match_LR1['sgRNA_1']=listA_concatenated_match_LR1['sgRNA_1'].str.slice(0, 20)
listA_concatenated_match_LR1['sgRNA_2']=listA_concatenated_match_LR1['sgRNA_2'].str.slice(0, 20)
listA_concatenated_match_LR1['sgID_1_2']=listA_concatenated_match_LR1['sgID_1']+"|"+listA_concatenated_match_LR1['sgID_1']
dft=dft.append(listA_concatenated_match_LR1)
return dft
def get_lists(ref_list,list_found_ref,list_notfound_ref):
a_ref=[]
for i in range(len(ref_list)):
a_ref.append(ref_list.gene.values[i].split('|')[0])
a_ref.append(ref_list.gene.values[i].split('|')[1])
set_found0_ref=[]
for i in range(len(a_ref)):
set_found0_ref.append(list_found_ref[list_found_ref['gene']==a_ref[i]])
list_concatenated_found_ref = pd.concat(set_found0_ref)
list_concatenated_match_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch == 0]
#Also remove Alternate loci's data
list_concatenated_match_ref = list_concatenated_match_ref[list_concatenated_match_ref['chr'].str.contains('chr')]
#also create new list with both sgRNAs in one row
dft=pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
if list_concatenated_match_ref.shape[0]>0:
t=list_concatenated_match_ref.reset_index(drop=True)
#st.table(t)
##########
#check even/odd entries
if t.shape[0]==1:
t1=t.loc[t.index.repeat(2)].reset_index(drop=True)
#st.write(t1)
dft=assemble_tbl(t1)
elif t.shape[0]%2==0: #even
dft=assemble_tbl(t)
else: #odd
t1 = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
i=0
while i 0]
list_concatenated_mutated_ref=list_concatenated_mutated_ref.sort_values('position')
#Also remove Alternate loci's data
list_concatenated_mutated_ref = list_concatenated_mutated_ref[list_concatenated_mutated_ref['chr'].str.contains('chr')]
dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
if list_concatenated_mutated_ref.shape[0]>0:
dft_mut = get_mutated_res(list_concatenated_mutated_ref)
#check not found
seta_notfound0_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[0]]
seta_notfound1_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[1]]
#st.write(seta_notfound0_ref)
#st.write(seta_notfound1_ref)
#add guideflg1 to return which guide is found
guideflg1=0
if seta_notfound0_ref.shape[0]>0:
guideflg1=2
if seta_notfound1_ref.shape[0]>0:
guideflg1=1
list_concatenated_notfound_ref = pd.concat([seta_notfound0_ref,seta_notfound1_ref])
#st.table(dft)
#st.table(dft_mut)
return dft, dft_mut,list_concatenated_notfound_ref,list_concatenated_match_ref,list_concatenated_mutated_ref,guideflg1
###########
def get_mutated_res(list_concatenated_mutated_ref):
#########
#if list_concatenated_mutated_ref.shape[0]>0:
t=list_concatenated_mutated_ref.reset_index(drop=True)
#st.table(t)
dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
c1=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1']
c2=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']#, 'sgID_1_2']
#st.table(listA_concatenated_match_ref)
#st.write(t.shape[0])
tf=0
#for i in range(0,t.shape[0],2):
for i in range(t.shape[0]):
l1=t.iloc[[i]]
l1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','mutated_guide', 'strand', 'num_mismatch']
l2=l1.copy()
l2.columns=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2','mutated_guide2', 'strand2', 'num_mismatch2']
list_concatenated_mutated_ref1=[]
#listA_concatenated_mutated_ref1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1)
list_concatenated_mutated_ref1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1)
#st.table(listA_concatenated_mutated_ref1)
list_concatenated_mutated_ref1=list_concatenated_mutated_ref1[['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','mutated_guide2','chr_sgRNA_2','position_sgRNA_2']]
#also change if not leading G
list_concatenated_mutated_ref1['sgRNA_1']='G'+list_concatenated_mutated_ref1['sgRNA_1'].str.slice(1, 20)
#also change name of mutated_guide2 column
list_concatenated_mutated_ref1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']
list_concatenated_mutated_ref1['sgRNA_2']='G'+list_concatenated_mutated_ref1['sgRNA_2'].str.slice(1, 20)
list_concatenated_mutated_ref1['sgID_1_2']=list_concatenated_mutated_ref1['sgID_1']+"|"+list_concatenated_mutated_ref1['sgID_1']
dft_mut=dft_mut.append(list_concatenated_mutated_ref1)
return dft_mut
#########
#######THIS SECTION ADDED FOR ORDER READY LIST AND REMOVE REPITION FOR NOT_FOUND ENTRUES
def get_lists_ol(ref_list,list_found_ref,list_notfound_ref):
a_ref=[]
for i in range(len(ref_list)):
a_ref.append(ref_list.gene.values[i].split('|')[0])
a_ref.append(ref_list.gene.values[i].split('|')[1])
set_found0_ref=[]
for i in range(len(a_ref)):
set_found0_ref.append(list_found_ref[list_found_ref['gene']==a_ref[i]])
list_concatenated_found_ref = pd.concat(set_found0_ref)
list_concatenated_match_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch == 0]
#Also remove Alternate loci's data
list_concatenated_match_ref = list_concatenated_match_ref[list_concatenated_match_ref['chr'].str.contains('chr')]
#also create new list with both sgRNAs in one row
dft=pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
if list_concatenated_match_ref.shape[0]>0:
t=list_concatenated_match_ref.reset_index(drop=True)
#st.table(t)
##########
#check even/odd entries
if t.shape[0]==1:
t1=t.loc[t.index.repeat(2)].reset_index(drop=True)
#st.write(t1)
dft=assemble_tbl(t1)
elif t.shape[0]%2==0: #even
dft=assemble_tbl(t)
else: #odd
t1 = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
i=0
while i 0]
list_concatenated_mutated_ref=list_concatenated_mutated_ref.sort_values('position')
#Also remove Alternate loci's data
list_concatenated_mutated_ref = list_concatenated_mutated_ref[list_concatenated_mutated_ref['chr'].str.contains('chr')]
dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
if list_concatenated_mutated_ref.shape[0]>0:
dft_mut = get_mutated_res(list_concatenated_mutated_ref)
#check not found
seta_notfound0_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[0]]
seta_notfound1_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[1]]
list_concatenated_notfound_ref = pd.concat([seta_notfound0_ref,seta_notfound1_ref])
return dft, dft_mut,list_concatenated_notfound_ref,list_concatenated_match_ref,list_concatenated_mutated_ref
###########
#THIS WILL GENERATE ORDER READY TABLE FOR GRCh38
#THIS WILL GENERATE ORDER READY TABLE FOR CHM13
#CHECK IF GUIDE ARE IN NOT FOUND LIST
def not_found_check(set12,set34,set56,listA_notfound_lr,listB_notfound_lr,listC_notfound_lr):
flg11=0
flg12=0
flg21=0
flg22=0
flg31=0
flg32=0
#st.write(set12.split('|')[1])
if listA_notfound_lr[listA_notfound_lr['gene']==set12.split('|')[0]].shape[0]>0:
flg11=1
if listA_notfound_lr[listA_notfound_lr['gene']==set12.split('|')[1]].shape[0]>0:
flg12=1
if listB_notfound_lr[listB_notfound_lr['gene']==set34.split('|')[0]].shape[0]>0:
flg21=1
if listB_notfound_lr[listB_notfound_lr['gene']==set34.split('|')[1]].shape[0]>0:
flg22=1
if listC_notfound_lr[listC_notfound_lr['gene']==set56.split('|')[0]].shape[0]>0:
flg31=1
if listC_notfound_lr[listC_notfound_lr['gene']==set56.split('|')[1]].shape[0]>0:
flg32=1
return flg11,flg12,flg21,flg22,flg31,flg32
def order_ready_tbl_CHM13(set12,set34,set56,listA_found_lr,listA_notfound_lr,listB_found_lr,listB_notfound_lr,listC_found_lr,listC_notfound_lr):
dft_order_table=pd.DataFrame(columns=['gene','guide_type','sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
set12=set12.reset_index(drop = True)
set34=set34.reset_index(drop = True)
set56=set56.reset_index(drop = True)
for i in range(set12.shape[0]):
gene_n=set12[i].split('_')[0]
f=not_found_check(set12[i],set34[i],set56[i],listA_notfound_lr,listB_notfound_lr,listC_notfound_lr)
#st.write(f)
#st.write(set12[i],set34[i],set56[i])
#ref_listA=listA[listA['gene']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listA=listA[listA['sgID_AB']==set12.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
resa,res_muta,res_notfounda,list_matcha,list_mutateda,gflga1=get_lists(ref_listA,listA_found_lr,listA_notfound_lr)
dft_a=dft_a.append(ref_listA)
#listb
ref_listB=listB[listB['sgID_AB']==set34.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listB = ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
resb,res_mutb,res_notfoundb,list_matchb,list_mutatedb,gflgb1=get_lists(ref_listB,listB_found_lr,listB_notfound_lr)
dft_b=dft_b.append(ref_listB)
#st.table(not resb.empty)
#st.table(res_mutb)
#st.table(resb)
#listc
ref_listC=listC[listC['sgID_AB']==set56.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listC = ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
resc,res_mutc,res_notfoundc,list_matchc,list_mutatedc,gflgc1=get_lists(ref_listC,listC_found_lr,listC_notfound_lr)
dft_c=dft_c.append(ref_listC)
# st.write(set12[i])
# st.write(set34[i])
# st.write(set56[i])
# st.write(f)
# st.write(gflga1,gflgb1,gflgc1)
if gflga1==0:
#Also verigy that both guides are different
if resa['sgID_1'][0] != resa['sgID_2'][0]:
resa['gene']=gene_n
resa['guide_type']='1-2'
dft_order_table=dft_order_table.append(resa)
else: #it is nutation case, so check next
if f[2]==0 or f[3] == 0:
#st.write('came in 1')
if not resb.empty: # and resb['sgID_1'][0] != resb['sgID_2'][0]: #second guide in from setb
resa[['sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2']] = resb[['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1']]
resa['sgID_1_2'] = resa['sgID_1']+"|"+resa['sgID_2']
if f[2]==0:
resa['gene']=gene_n
resa['guide_type']=str(gflga1)+"-3"
dft_order_table=dft_order_table.append(resa)
else: # f[2]==0:
resa['gene']=gene_n
resa['guide_type']=str(gflga1)+"-4"
dft_order_table=dft_order_table.append(resa)
elif resa.shape[0] >0: #at least one guide is from seta
#if resa['sgID_1'][0] != resa['sgID_2'][0]:
if f[2]==0 or f[3] == 0:
st.write('came in 1')
if not resb.empty: # and resb['sgID_1'][0] != resb['sgID_2'][0]: #second guide in from setb
resa[['sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2']] = resb[['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1']]
resa['sgID_1_2'] = resa['sgID_1']+"|"+resa['sgID_2']
if f[2]==0:
resa['gene']=gene_n
resa['guide_type']=str(gflga1)+"-3"
dft_order_table=dft_order_table.append(resa)
else: # f[2]==0:
resa['gene']=gene_n
resa['guide_type']=str(gflga1)+"-4"
dft_order_table=dft_order_table.append(resa)
elif f[4]==0 or f[5] == 0:
#st.write('came in 2')
#if resa['sgID_1'][0] != resa['sgID_2'][0]:
if not resc.empty: # and resc['sgID_1'][0] != resc['sgID_2'][0]: # resc.shape[0]>0: #second guide is from setc
resa[['sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2']] = resc[['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1']]
resa['sgID_1_2'] = resa['sgID_1']+"|"+resa['sgID_2']
#dft_order_table=dft_order_table.append(resa)
if f[4]==0:
resa['gene']=gene_n
resa['guide_type']=str(gflga1)+"-5"
dft_order_table=dft_order_table.append(resa)
else: # f[2]==0:
resa['gene']=gene_n
resa['guide_type']=str(gflga1)+"-6"
dft_order_table=dft_order_table.append(resa)
elif resb.shape[0]>0: #at least one guide
#if resb['sgID_1'][0] != resb['sgID_2'][0]:
if f[4]==0 and f[5] == 0:
resb['gene']=gene_n
resb['guide_type']='3-4'
dft_order_table=dft_order_table.append(resb)
elif f[4]==0 or f[5] == 0:
#if not resc.empty and resc['sgID_1'][0] != resc['sgID_2'][0]:
resb[['sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2']] = resc[['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1']]
resb['sgID_1_2'] = resb['sgID_1']+"|"+resb['sgID_2']
#dft_order_table=dft_order_table.append(resb)
if f[4]==0:
resb['gene']=gene_n
resb['guide_type']=str(gflgb1+1)+"-5"
dft_order_table=dft_order_table.append(resb)
else: # f[2]==0:
resb['gene']=gene_n
resb['guide_type']=str(gflgb1+2)+"-6"
dft_order_table=dft_order_table.append(resb)
elif resc.shape[0]>0: #at least one guide
#if f[4]==0 and f[5] == 0:
if resc['sgID_1'][0] != resc['sgID_2'][0]:
resc['gene']=gene_n
resc['guide_type']='5-6'
dft_order_table=dft_order_table.append(resc)
if dft_order_table.shape[0]>0:
st.write('Order Ready **CHM13** guides List')
tbl_disp(dft_order_table,'select_genes','SetA_CHM13',5)
else:
st.write('**No guides found in ListA, ListB and ListC**')
#st.table(dft_order_table)
#def get_notfound():
cwd=os.getcwd()+'/'+'data/'
listA = pd.read_csv(cwd+"guides_a_new.csv",index_col=False)
listB = pd.read_csv(cwd+"guides_b_new.csv",index_col=False)
listC = pd.read_csv(cwd+"guides_c_new.csv",index_col=False)
lista_sz=listA.shape[0]
listb_sz=listB.shape[0]
listc_sz=listC.shape[0]
variantsa1=listA['gene'].unique()
variantsb1=listB['gene'].unique()
variantsc1=listC['gene'].unique()
con = np.concatenate((variantsa1, variantsb1,variantsc1))
#st.write(type(variantsc1))
variants_s=sorted(np.unique(con))
#st.write(len(variants_s))
#also get names for non-targetting guides
#Also read GRCh38 and LR guides for stea
listA_found_ref = pd.read_csv(cwd+"seta_found_ref1.csv",index_col=False)
lsita_ref_found_sz=listA_found_ref.shape[0]
#remove # from chr# #
listA_found_ref['chr'] = [x.split(' ')[-0] for x in listA_found_ref['chr']]
listA_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
listA_notfound_ref = pd.read_csv(cwd+"seta_notfound_ref1.csv",index_col=False)
lsita_ref_notfound_sz=listA_notfound_ref.shape[0]
listA_found_lr = pd.read_csv(cwd+"seta_found_LR1.csv",index_col=False)
lsita_lr_found_sz=listA_found_lr.shape[0]
listA_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
listA_notfound_lr = pd.read_csv(cwd+"seta_notfound_LR1.csv",index_col=False)
lsita_lr_notfound_sz=listA_notfound_lr.shape[0]
#Also read GRCh38 and LR guides for set b
listB_found_ref = pd.read_csv(cwd+"setb_found_ref1.csv",index_col=False)
lsitb_ref_found_sz=listB_found_ref.shape[0]
#remove # from chr# #
listB_found_ref['chr'] = [x.split(' ')[-0] for x in listB_found_ref['chr']]
listB_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
listB_notfound_ref = pd.read_csv(cwd+"setb_notfound_ref1.csv",index_col=False)
lsitb_ref_notfound_sz=listB_notfound_ref.shape[0]
listB_found_lr = pd.read_csv(cwd+"setb_found_LR1.csv",index_col=False)
lsitb_lr_found_sz=listB_found_lr.shape[0]
listB_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
listB_notfound_lr = pd.read_csv(cwd+"setb_notfound_LR1.csv",index_col=False)
lsitb_lr_notfound_sz=listB_notfound_lr.shape[0]
#Also read GRCh38 and LR guides for set c
listC_found_ref = pd.read_csv(cwd+"setc_found_ref1.csv",index_col=False)
lsitc_ref_found_sz=listC_found_ref.shape[0]
#remove # from chr# #
listC_found_ref['chr'] = [x.split(' ')[-0] for x in listC_found_ref['chr']]
listC_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
listC_notfound_ref = pd.read_csv(cwd+"setc_notfound_ref1.csv",index_col=False)
lsitc_ref_notfound_sz=listC_notfound_ref.shape[0]
listC_found_lr = pd.read_csv(cwd+"setc_found_LR1.csv",index_col=False)
lsitc_lr_found_sz=listC_found_lr.shape[0]
listC_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
listC_notfound_lr = pd.read_csv(cwd+"setc_notfound_LR1.csv",index_col=False)
lsitc_lr_notfound_sz=listC_notfound_lr.shape[0]
#also load all mismatched except non-targe guides
#listA_notfound_lr = pd.read_csv(cwd+"setc_notfound_LR1.csv",index_col=False) seta_all_notmatched_table.csv
st.title('Long Read Guides Search')
#st.markdown('**Please select an option from the sidebar**')
#st.write(variants)
Calc = st.sidebar.radio(
"",
('ReadME', 'Single/Multiple Guides','All','Not_Found'))
if Calc == 'ReadME':
expander = st.expander("How to use this app")
#st.header('How to use this app')
expander.markdown('Please select **Single Gene** OR **Multiple Genes** Menue checkbox from the sidebar')
expander.markdown('Select a Gene (from genes dropdown list) OR Multiple genes (from table)')
expander.markdown('A table showing all reference gudies from three LISTS will appear in the main panel. **Please not some of the genes (for example A1BG and GJB7) have multiple guide pairs and all of these are selected.**')
expander.markdown('To see results for each of the selected reference guide from ListA, ListB and ListC, Please select respective checkbox')
expander.markdown('Results are shown as two tables, **Matched** and **Mutated** guides tables and **NOT FOUND** table if guides are not found in GRCh38 and LR reference fasta files')
expander.markdown('**Mutated** guides table shows the genomic postion in GRCh38 and LR Fasta file along other fields. **If a guide is found in GRCh38 but not in LR fasta, then corresponding columns will be NA**')
expander.markdown('**Mutated** guides table shows the genomic postion in GRCh38 and LR Fasta file along other fields. **If a guide is found in GRCh38 but not in LR fasta, then corresponding columns will be NA**')
expander1 = st.expander('Introduction')
expander1.markdown(
""" This app helps navigate all probable genomic **miss-matched/Mutations (upto 2 bp)** for a given sgRNA (from 3 lists of CRISPRi dual sgRNA libraries) in GRCh38 reference fasta and a Reference fasta generated from BAM generated against KOLF2.1J longread data.
"""
)
expander1.markdown('Merged bam file was converted to fasta file using following steps:')
expander1.markdown('- samtools mpileup to generate bcf file')
expander1.markdown('- bcftools to generate vcf file')
expander1.markdown('- bcftools consensus to generate fasta file')
expander1.markdown('A GPU based [Cas-OFFinder](http://www.rgenome.net/cas-offinder/) tool was used to find off-target sequences (upto 2 miss-matched) for each geiven reference guide against GRCh38 and LR fasta references.')
elif Calc=='Single/Multiple Guides':
flg_a_fount=0
flg_b_fount=0
flg_c_fount=0
#st.write('**General Stats:**')
#st.write('**GRCh38 Stats: Guides Found: **'+str(lsita_ref_found_sz)+"/"+str(lista_sz))
with st.form(key='columns_in_form'):
c2, c3 = st.columns(2)
with c2:
multi_genes = st.multiselect(
'Please select genes list to start processing',
variants_s)
Updated=st.form_submit_button(label = 'Update')
listA_concatenated_orig = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
reflistA_concatenated = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
reflistB_concatenated = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
reflistC_concatenated = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
for variant in multi_genes:
ref_listA=listA[listA['gene']==variant][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
reflistA_concatenated=pd.concat([reflistA_concatenated,ref_listA])
ref_listB=listB[listB['gene']==variant][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listB = ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
reflistB_concatenated=pd.concat([reflistB_concatenated,ref_listB])
ref_listC=listC[listC['gene']==variant][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listC = ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
reflistC_concatenated=pd.concat([reflistC_concatenated,ref_listC])
listA_concatenated_orig = pd.concat([listA_concatenated_orig,ref_listA,ref_listB,ref_listC])
if listA_concatenated_orig.shape[0] > 0:
#st.markdown(table_edit,unsafe_allow_html=True)
st.write('**Input** Guides (all 6 from 3 sets).')
st.write('**Please Select Guides common to ALL 3 Lists to procede further Processing**')
st.markdown(caution_genes,unsafe_allow_html=True)
with st.form(key='columns_in_form_a'):
c2, c3 = st.columns(2)
with c2:
get_table_order=tbl_disp(listA_concatenated_orig,'variant','ref_guides',111,0)
#multi_genes = st.multiselect(
#'Please select genes list to start processing',
#variants_s)
Updated1=st.form_submit_button(label = 'Generate Order Ready Table')
#get_table_order=tbl_disp(listA_concatenated_orig,'variant','ref_guides',1,0)
if not isinstance(get_table_order, type(None)): # and Updated1:# and get_table_order.shape[0]>0:
#if not isinstance(get_table_order, type(None)):
variant_set12=get_table_order[get_table_order['guide_type']=='1-2']['gene']
variant_set34=get_table_order[get_table_order['guide_type']=='3-4']['gene']
variant_set56=get_table_order[get_table_order['guide_type']=='5-6']['gene']
#st.table(variant_set12)
#st.write(type(variant_set12))
#if not variant_set12.equals(variant_set34):
# st.write('**Please select Identical Genes From List A and B**')
if variant_set12.shape[0]==variant_set34.shape[0]==variant_set56.shape[0]:
#########Here we call order ready table
#order_ready_tbl_GRCh38(variant_set12,variant_set34,variant_set56)
order_ready_tbl_CHM13(variant_set12,variant_set34,variant_set56,listA_found_lr,listA_notfound_lr,listB_found_lr,listB_notfound_lr,listC_found_lr,listC_notfound_lr)
########END ORDER READY TABLE
elif variant_set12.shape[0]!=variant_set34.shape[0]:
st.markdown("""**SetA and SetB guides are not same, Please correct the problem and re-run**""",unsafe_allow_html=True)
elif variant_set12.shape[0]!=variant_set56.shape[0]:
st.markdown("""**SetA and SetC guides are not same, Please correct the problem and re-run**""",unsafe_allow_html=True)
elif variant_set34.shape[0]!=variant_set56.shape[0]:
st.markdown("""**SetB and SetC guides are not same, Please correct the problem and re-run**""",unsafe_allow_html=True)
else:
st.markdown("""**Probably Mixed guides are selected from three lists, Please correct the problem and re-run**""",unsafe_allow_html=True)
#Now BUILD Order Ready List
#if dft_lr_resa.shape[0] >0 and dft_lr_resb.shape[0] >0 and dft_lr_resc.shape[0] >0:
# for sgrna in dft_lr_resa
else:
st.write('**Please select guides and Press Update Button to Begin Processing**')
ListARes = st.checkbox('Results For SetA',key=300)
if ListARes:# and not isinstance(get_table, type(None)):#get_table!=None:
#if ListARes and get_table.shape[0]>0:
st.write('**Please select Guides From Table Below to processes from ListA**')
get_table=tbl_disp(reflistA_concatenated,variant,'ref_guides',2,0)
if not isinstance(get_table, type(None)):
#variant_set=get_table[get_table['guide_type']=='1-2']['gene']
variant_set=get_table['gene']
dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
dft_resa=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
dft_res_muta=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
dft_notfounda=pd.DataFrame(columns=['gene','ref_guide'])
df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
#CHECK FOR GRCh38
for i in range(variant_set.shape[0]):
#ref_listA=listA[listA['sgID_AB']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listA=listA[listA['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
res,res_mut,res_notfound,list_match,list_mutated,gflga1=get_lists(ref_listA,listA_found_ref,listA_notfound_ref)
dft_a=dft_a.append(ref_listA)
if res.shape[0]>0:
dft_resa=dft_resa.append(res)
if res_mut.shape[0]>0:
dft_res_muta=dft_res_muta.append(res_mut)
if res_notfound.shape[0]>0:
dft_notfounda= dft_notfounda.append(res_notfound)
if list_match.shape[0]>0:
df_matched_guides_ref= df_matched_guides_ref.append(list_match)
if list_mutated.shape[0]>0:
df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
#st.write('Selected Reference Guides for **Set A**')
#tbl_disp(dft_a,'All','ReferenceGuides',0)
if dft_resa.shape[0]>0:
st.write('Matched to **GRCh38** Reference Guides for **Set A**')
tbl_disp(dft_resa,'select_genes','SetA_GRCh38',3)
elif dft_res_muta.shape[0]>0:
st.write('Mutated to **GRCh38** Reference Guides for **Set A**')
st.markdown(caution1,unsafe_allow_html=True)
tbl_disp(dft_res_muta,'select_genes','SetA_Mutated_GRCh38',4)
if dft_notfounda.shape[0]>0:
st.write('**SetA Guides Not Found in GRCh38**')
#tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
st.table(dft_notfounda)
#Now CHECK FOR CHM13
dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
dft_lr_resa=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
dft_lr_res_muta=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
dft_lr_notfounda=pd.DataFrame(columns=['gene','ref_guide'])
df_matched_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
df_mutated_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
for i in range(variant_set.shape[0]):
#ref_listA=listA[listA['gene']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listA=listA[listA['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
res,res_mut,res_notfound,list_match,list_mutated,gflga1=get_lists(ref_listA,listA_found_lr,listA_notfound_lr)
dft_a=dft_a.append(ref_listA)
if res.shape[0]>0:
dft_lr_resa=dft_lr_resa.append(res)
if res_mut.shape[0]>0:
dft_lr_res_muta=dft_lr_res_muta.append(res_mut)
if res_notfound.shape[0]>0:
dft_lr_notfounda= dft_lr_notfounda.append(res_notfound)
if list_match.shape[0]>0:
df_matched_guides_lr= df_matched_guides_lr.append(list_match)
if list_mutated.shape[0]>0:
df_mutated_guides_lr= df_mutated_guides_lr.append(list_mutated)
if dft_lr_resa.shape[0]>0:
st.write('Matched to **CHM13** Reference Guides for **Set A**')
tbl_disp(dft_lr_resa,'select_genes','SetA_CHM13',5)
elif dft_lr_res_muta.shape[0]>0:
st.write('Mutated to **CHM13** Reference Guides for **Set A**')
st.markdown(caution1,unsafe_allow_html=True)
tbl_disp(dft_lr_res_muta,'select_genes','SetA_Mutated_CHM13',6)
if dft_lr_notfounda.shape[0]>0:
st.write('**SetA Guides Not Found in CHM13**')
st.table(dft_lr_notfounda)
#NOW MERGE FROM GRCh38 and LR
merged_mutated_set=pd.merge(df_mutated_guides_ref,df_mutated_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
merged_match_set=pd.merge(df_matched_guides_ref,df_matched_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
if merged_match_set.shape[0]>0:
#st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
st.write('**Matched** Guides for **Set A** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
tbl_disp(merged_match_set,'select_genes','SetA_Matched_GRCh38_CHM13',7,0)
#st.table(merged_match_seta)
elif merged_mutated_set.shape[0]>0:
#st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
st.write('**Mutated** Guides for **Set A** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
tbl_disp(merged_mutated_set,'select_genes','SetA_Mutated_GRCh38_CHM13',8,0)
elif ListARes:
st.write("**Please select genes from the above table to begin**")
ListBRes = st.checkbox('Results For SetB',key=40)
if ListBRes: # and not isinstance(get_table, type(None)):#get_table!=None:
st.write('**Please select Guides From Table Below to processes from ListB**')
get_table=tbl_disp(reflistB_concatenated,variant,'ref_guides',9,0)
if not isinstance(get_table, type(None)):
#variant_set=get_table[['gene']]
variant_set=get_table['gene']
dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
dft_resb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
dft_res_mutb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
dft_notfoundb=pd.DataFrame(columns=['gene','ref_guide'])
df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
#CHECK FOR GRCh38
for i in range(variant_set.shape[0]):
#ref_listB=listB[listB['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listB=listB[listB['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listB =ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
res,res_mut,res_notfound,list_match,list_mutated,gflgb1=get_lists(ref_listB,listB_found_ref,listB_notfound_ref)
dft_b=dft_b.append(ref_listB)
if res.shape[0]>0:
dft_resb=dft_resb.append(res)
if res_mut.shape[0]>0:
dft_res_mutb=dft_res_mutb.append(res_mut)
if res_notfound.shape[0]>0:
dft_notfoundb= dft_notfoundb.append(res_notfound)
if list_match.shape[0]>0:
df_matched_guides_ref= df_matched_guides_ref.append(list_match)
if list_mutated.shape[0]>0:
df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
#st.write('Selected Reference Guides for **Set B**')
#tbl_disp(dft_b,'All','ReferenceGuides',0)
if dft_resb.shape[0]>0:
st.write('Matched to **GRCh38** Reference Guides for **Set B**')
tbl_disp(dft_resb,'select_genes','SetB_GRCh38',10)
elif dft_res_mutb.shape[0]>0:
st.write('Mutated to **GRCh38** Reference Guides for **Set B**')
st.markdown(caution1,unsafe_allow_html=True)
tbl_disp(dft_res_mutb,'select_genes','SetB_Mutated_GRCh38',11)
if dft_notfoundb.shape[0]>0:
st.write('**SetB Guides Not Found in GRCh38**')
#tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
st.table(dft_notfoundb)
#Now CHECK FOR CHM13
dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
dft_lr_resb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
dft_lr_res_mutb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
dft_lr_notfoundb=pd.DataFrame(columns=['gene','ref_guide'])
df_matched_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
df_mutated_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
for i in range(variant_set.shape[0]):
#ref_listB=listB[listB['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listB=listB[listB['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listB=ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
res,res_mut,res_notfound,list_match,list_mutated,gflgb1=get_lists(ref_listB,listB_found_lr,listB_notfound_lr)
dft_b=dft_b.append(ref_listB)
if res.shape[0]>0:
dft_lr_resb=dft_lr_resb.append(res)
if res_mut.shape[0]>0:
dft_lr_res_mutb=dft_lr_res_mutb.append(res_mut)
if res_notfound.shape[0]>0:
dft_lr_notfoundb= dft_lr_notfoundb.append(res_notfound)
if list_match.shape[0]>0:
df_matched_guides_lr= df_matched_guides_lr.append(list_match)
if list_mutated.shape[0]>0:
df_mutated_guides_lr= df_mutated_guides_lr.append(list_mutated)
if dft_lr_resb.shape[0]>0:
st.write('Matched to **CHM13** Reference Guides for **Set B**')
tbl_disp(dft_lr_resb,'select_genes','SetB_CHM13',12)
elif dft_lr_res_mutb.shape[0]>0:
st.write('Mutated to **CHM13** Reference Guides for **Set B**')
st.markdown(caution1,unsafe_allow_html=True)
tbl_disp(dft_lr_res_mutb,'select_genes','SetB_Mutated_CHM13',13)
if dft_lr_notfoundb.shape[0]>0:
st.write('**SetB Guides Not Found in CHM13**')
st.table(dft_lr_notfoundb)
#NOW MERGE FROM GRCh38 and LR
merged_mutated_set=pd.merge(df_mutated_guides_ref,df_mutated_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
merged_match_set=pd.merge(df_matched_guides_ref,df_matched_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
if merged_match_set.shape[0]>0:
#st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
st.write('**Matched** Guides for **Set B** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
tbl_disp(merged_match_set,'select_genes','SetB_Matched_GRCh38_CHM13',14,0)
#st.table(merged_match_seta)
elif merged_mutated_set.shape[0]>0:
#st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
st.write('**Mutated** Guides for **Set B** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
#st.markdown(caution1,unsafe_allow_html=True)
tbl_disp(merged_mutated_set,'select_genes','SetB_Mutated_GRCh38_CHM13',15,0)
elif ListBRes:
st.write("**Please select genes from the above table to begin**")
ListCRes = st.checkbox('Results For SetC',key=50)
if ListCRes: # and not isinstance(get_table, type(None)):#get_table!=None:
#variant_set=get_table[['gene']]
st.write('**Please select Guides From Table Below to processes from ListC**')
get_table=tbl_disp(reflistC_concatenated,variant,'ref_guides',16,0)
if not isinstance(get_table, type(None)):
variant_set=get_table['gene']
dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
dft_resc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
dft_res_mutc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
dft_notfoundc=pd.DataFrame(columns=['gene','ref_guide'])
df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
#CHECK FOR GRCh38
for i in range(variant_set.shape[0]):
#ref_listC=listC[listC['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listC=listC[listC['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listC =ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
res,res_mut,res_notfound,list_match,list_mutated,gflgc1=get_lists(ref_listC,listC_found_ref,listC_notfound_ref)
dft_c=dft_c.append(ref_listC)
if res.shape[0]>0:
dft_resc=dft_resc.append(res)
if res_mut.shape[0]>0:
dft_res_mutc=dft_res_mutc.append(res_mut)
if res_notfound.shape[0]>0:
dft_notfoundc= dft_notfoundc.append(res_notfound)
if list_match.shape[0]>0:
df_matched_guides_ref= df_matched_guides_ref.append(list_match)
if list_mutated.shape[0]>0:
df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
#st.write('Selected Reference Guides for **Set C**')
#tbl_disp(dft_c,'All','ReferenceGuides',0)
if dft_resc.shape[0]>0:
st.write('Matched to **GRCh38** Reference Guides for **Set C**')
tbl_disp(dft_resc,'select_genes','SetC_GRCh38',17)
elif dft_res_mutc.shape[0]>0:
st.write('Mutated to **GRCh38** Reference Guides for **Set C**')
st.markdown(caution1,unsafe_allow_html=True)
tbl_disp(dft_res_mutc,'select_genes','SetC_Mutated_GRCh38',18)
if dft_notfoundc.shape[0]>0:
st.write('**SetC Guides Not Found in GRCh38**')
#tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
st.table(dft_notfoundc)
#Now CHECK FOR CHM13
dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
dft_lr_resc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
dft_lr_res_mutc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
dft_lr_notfoundc=pd.DataFrame(columns=['gene','ref_guide'])
df_matched_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
df_mutated_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
for i in range(variant_set.shape[0]):
#ref_listC=listC[listC['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listC=listC[listC['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listC=ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
res,res_mut,res_notfound,list_match,list_mutated,gflgc1=get_lists(ref_listC,listC_found_lr,listC_notfound_lr)
dft_c=dft_c.append(ref_listC)
if res.shape[0]>0:
dft_lr_resc=dft_lr_resc.append(res)
if res_mut.shape[0]>0:
dft_lr_res_mutc=dft_lr_res_mutc.append(res_mut)
if res_notfound.shape[0]>0:
dft_lr_notfoundc= dft_lr_notfoundc.append(res_notfound)
if list_match.shape[0]>0:
df_matched_guides_lr= df_matched_guides_lr.append(list_match)
if list_mutated.shape[0]>0:
df_mutated_guides_lr= df_mutated_guides_lr.append(list_mutated)
if dft_lr_resc.shape[0]>0:
st.write('Matched to **CHM13** Reference Guides for **Set C**')
tbl_disp(dft_lr_resc,'select_genes','SetC_CHM13',19)
elif dft_lr_res_mutc.shape[0]>0:
st.write('Mutated to **CHM13** Reference Guides for **Set C**')
st.markdown(caution1,unsafe_allow_html=True)
tbl_disp(dft_lr_res_mutc,'select_genes','SetC_Mutated_CHM13',20)
if dft_lr_notfoundc.shape[0]>0:
st.write('**SetC Guides Not Found in CHM13**')
st.table(dft_lr_notfoundc)
#NOW MERGE FROM GRCh38 and LR
merged_mutated_set=pd.merge(df_mutated_guides_ref,df_mutated_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
merged_match_set=pd.merge(df_matched_guides_ref,df_matched_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
if merged_match_set.shape[0]>0:
#st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
st.write('**Matched** Guides for **Set C** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
tbl_disp(merged_match_set,'select_genes','SetC_Matched_GRCh38_CHM13',21,0)
#st.table(merged_match_seta)
elif merged_mutated_set.shape[0]>0:
#st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
st.write('**Mutated** Guides for **Set C** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
#st.markdown(caution1,unsafe_allow_html=True)
tbl_disp(merged_mutated_set,'select_genes','SetC_Mutated_GRCh38_CHM13',22,0)
# if ListARes and ListBRes and ListCRes:
# Order_List = st.checkbox('Generate Order Ready List',key=100)
# if Order_List:
# if dft_lr_resa.shape[0]>0:
# st.table(dft_lr_resa)
elif ListCRes:
st.write("**Please select genes from the above table to begin**")
elif Calc=='Not_Found':
ListAResNotFound = st.checkbox('Results For SetA',key=30)
if ListAResNotFound and listA_notfound_lr.shape[0]>0:
listA_notfound_LR_sorted=listA_notfound_lr.sort_values('gene')
sz1a=listA_notfound_LR_sorted.shape[0]
vaild_guides_a = listA_notfound_LR_sorted[~listA_notfound_LR_sorted['gene'].str.contains("non")]
sz2a=vaild_guides_a.shape[0]
st.write(str(sz2a)+"/"+str(sz1a)+' Guides Not Found')
tbl_disp(vaild_guides_a,'all_not_found','SetA_KOLF2.1',23,0)
#now get gene names only
genesa=vaild_guides_a['gene'].str.split('_').str[0]
genesa1=genesa[genesa.duplicated(keep=False)]
genesa2=genesa1.unique()
pair_lista=[]
for g in genesa2:
g1=vaild_guides_a[vaild_guides_a['gene'].str.contains(g)]
g2=g1.reset_index(drop=True)
pair_lista.append([g2.gene[0],g2.ref_guide[0],g2.gene[1],g2.ref_guide[1]])
pair_missmatch_a = pd.DataFrame(pair_lista, columns=['sgID_1','sgRNA_1','sgID_2','sgRNA_2'])
sz22a=pair_missmatch_a.shape[0]
st.write(str(sz22a)+"/"+str(sz2a)+' Paired Guides Not Found')
tbl_disp(pair_missmatch_a,'all_not_found','SetA_KOLF2.1',23,0)
non_targeting_guides_a = listA_notfound_LR_sorted[listA_notfound_LR_sorted['gene'].str.contains("non")]
sz3a=non_targeting_guides_a.shape[0]
st.write(str(sz3a)+"/"+str(sz1a)+' no-targeting Guides Not Found')
tbl_disp(non_targeting_guides_a,'all_not_found','SetA_KOLF2.1',23,0)
ListBResNotFound = st.checkbox('Results For SetB',key=40)
if ListBResNotFound:
listB_notfound_LR_sorted=listB_notfound_lr.sort_values('gene')
sz1b=listB_notfound_LR_sorted.shape[0]
vaild_guides_b = listB_notfound_LR_sorted[~listB_notfound_LR_sorted['gene'].str.contains("non")]
sz2b=vaild_guides_b.shape[0]
st.write(str(sz2b)+"/"+str(sz1b)+' Guides Not Found')
tbl_disp(vaild_guides_b,'all_not_found','SetA_KOLF2.1',23,0)
#now get gene names only
genesb=vaild_guides_b['gene'].str.split('_').str[0]
genesb1=genesb[genesb.duplicated(keep=False)]
genesb2=genesb1.unique()
pair_listb=[]
for g in genesb2:
g1=vaild_guides_b[vaild_guides_b['gene'].str.contains(g)]
g2=g1.reset_index(drop=True)
pair_listb.append([g2.gene[0],g2.ref_guide[0],g2.gene[1],g2.ref_guide[1]])
pair_missmatch_b = pd.DataFrame(pair_listb, columns=['sgID_1','sgRNA_1','sgID_2','sgRNA_2'])
sz22b=pair_missmatch_b.shape[0]
st.write(str(sz22b)+"/"+str(sz2b)+' Paired Guides Not Found')
tbl_disp(pair_missmatch_b,'all_not_found','SetA_KOLF2.1',23,0)
non_targeting_guides_b = listB_notfound_LR_sorted[listB_notfound_LR_sorted['gene'].str.contains("non")]
sz3b=non_targeting_guides_b.shape[0]
st.write(str(sz3b)+"/"+str(sz1b)+' no-targeting Guides Not Found')
tbl_disp(non_targeting_guides_b,'all_not_found','SetA_KOLF2.1',23,0)
ListCResNotFound = st.checkbox('Results For SetC',key=50)
if ListCResNotFound:
listC_notfound_LR_sorted=listC_notfound_lr.sort_values('gene')
sz1c=listC_notfound_LR_sorted.shape[0]
vaild_guides_c = listC_notfound_LR_sorted[~listC_notfound_LR_sorted['gene'].str.contains("non")]
sz2c=vaild_guides_c.shape[0]
st.write(str(sz2c)+"/"+str(sz1c)+' Guides Not Found')
tbl_disp(vaild_guides_c,'all_not_found','SetA_KOLF2.1',23,0)
#now get gene names only
genesc=vaild_guides_c['gene'].str.split('_').str[0]
genesc1=genesc[genesc.duplicated(keep=False)]
genesc2=genesc1.unique()
pair_listc=[]
for g in genesc2:
g1=vaild_guides_c[vaild_guides_c['gene'].str.contains(g)]
g2=g1.reset_index(drop=True)
pair_listc.append([g2.gene[0],g2.ref_guide[0],g2.gene[1],g2.ref_guide[1]])
pair_missmatch_c = pd.DataFrame(pair_listc, columns=['sgID_1','sgRNA_1','sgID_2','sgRNA_2'])
sz22c=pair_missmatch_c.shape[0]
st.write(str(sz22c)+"/"+str(sz2c)+' Paired Guides Not Found')
tbl_disp(pair_missmatch_c,'all_not_found','SetA_KOLF2.1',23,0)
non_targeting_guides_c = listC_notfound_LR_sorted[listC_notfound_LR_sorted['gene'].str.contains("non")]
sz3c=non_targeting_guides_c.shape[0]
st.write(str(sz3c)+"/"+str(sz1c)+' no-targeting Guides Not Found')
tbl_disp(non_targeting_guides_c,'all_not_found','SetA_KOLF2.1',23,0)
else:
st.write("**Place Holder for All**")