diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -1,10 +1,9 @@
-#from turtle import shape
import streamlit as st
-#from st_keyup import st_keyup
import pandas as pd
import numpy as np
from st_aggrid import AgGrid, GridOptionsBuilder,GridUpdateMode,DataReturnMode
-
+from iteration_utilities import duplicates
+from iteration_utilities import unique_everseen
import os
st.set_page_config(layout="wide")
@@ -24,9 +23,114 @@ caution2 = '
Pleas
table_edit = '
About Table: Please note that table can be sorted by clicking on any column and Multiple rows can be selected (by clicking check box in first column) to save only those rows.
'
caution_genes = 'Please make sure that desired genes from all three lists should be selected to generate Order Ready Table.
'
+
+#READ INPUT FILES
+
+cwd=os.getcwd()+'/'+'data/'
+
+#Here, gene column is modified for non-targeting guides in the format sgID_1|sgID_2 for coherent downstream manipulation
+listA = pd.read_csv(cwd+"guides_a_new.csv",index_col=False)
+listB = pd.read_csv(cwd+"guides_b_new.csv",index_col=False)
+listC = pd.read_csv(cwd+"guides_c_new.csv",index_col=False)
+
+lista_sz=listA.shape[0]
+listb_sz=listB.shape[0]
+listc_sz=listC.shape[0]
+#st.write(listA.shape)
+variantsa1=listA['gene'].unique()
+variantsb1=listB['gene'].unique()
+variantsc1=listC['gene'].unique()
+#Make a comprehensive lsit of genes in all 3 lists (Please not that non-targeting guide names are not same across three lists)
+con = np.concatenate((variantsa1, variantsb1, variantsc1))
+variants_s=sorted(np.unique(con))
+
+#NOW read GRCh38 and LR guides for stea as identified by LR-Guides pipeline
+#Format is: gene (as many entries as number of guides found, both matched and mutated), ref_guide, chr, position, mutated_guide (can also be same as reference), strand, num_mismatcg (excluding leading G), Please note that each guide has trailing NGG
+listA_found_ref = pd.read_csv(cwd+"seta_found_ref1.csv",index_col=False)
+listA_found_ref = listA_found_ref.sort_values('gene')
+lsita_ref_found_sz=listA_found_ref.shape[0]
+#remove # from chr# #
+listA_found_ref['chr'] = [x.split(' ')[-0] for x in listA_found_ref['chr']]
+listA_found_ref.rename(columns = {'strnad':'strand'}, inplace = True) #Also change strnad to strand (was misspelled in LR-Guides pipeline)
+#This (all such) file has 2-columns (gene as given in sgID_1/2, ref_guide).
+listA_notfound_ref = pd.read_csv(cwd+"seta_notfound_ref1.csv",index_col=False)
+listA_notfound_ref=listA_notfound_ref.sort_values('gene')
+lsita_ref_notfound_sz=listA_notfound_ref.shape[0]
+#LR guides
+listA_found_lr = pd.read_csv(cwd+"seta_found_LR1.csv",index_col=False)
+listA_found_lr=listA_found_lr.sort_values('gene')
+lsita_lr_found_sz=listA_found_lr.shape[0]
+listA_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
+listA_notfound_lr = pd.read_csv(cwd+"seta_notfound_LR1.csv",index_col=False)
+listA_notfound_lr=listA_notfound_lr.sort_values('gene')
+lsita_lr_notfound_sz=listA_notfound_lr.shape[0]
+
+#Also read GRCh38 and LR guides for set b
+listB_found_ref = pd.read_csv(cwd+"setb_found_ref1.csv",index_col=False)
+listB_found_ref=listB_found_ref.sort_values('gene')
+lsitb_ref_found_sz=listB_found_ref.shape[0]
+#remove # from chr# #
+listB_found_ref['chr'] = [x.split(' ')[-0] for x in listB_found_ref['chr']]
+listB_found_ref=listB_found_ref.sort_values('gene')
+listB_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
+listB_notfound_ref = pd.read_csv(cwd+"setb_notfound_ref1.csv",index_col=False)
+listB_notfound_ref=listB_notfound_ref.sort_values('gene')
+lsitb_ref_notfound_sz=listB_notfound_ref.shape[0]
+
+
+listB_found_lr = pd.read_csv(cwd+"setb_found_LR1.csv",index_col=False)
+listB_found_lr=listB_found_lr.sort_values('gene')
+lsitb_lr_found_sz=listB_found_lr.shape[0]
+listB_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
+listB_notfound_lr = pd.read_csv(cwd+"setb_notfound_LR1.csv",index_col=False)
+listB_notfound_lr=listB_notfound_lr.sort_values('gene')
+lsitb_lr_notfound_sz=listB_notfound_lr.shape[0]
+
+#Also read GRCh38 and LR guides for set c
+listC_found_ref = pd.read_csv(cwd+"setc_found_ref1.csv",index_col=False)
+listC_found_ref=listC_found_ref.sort_values('gene')
+lsitc_ref_found_sz=listC_found_ref.shape[0]
+#remove # from chr# #
+listC_found_ref['chr'] = [x.split(' ')[-0] for x in listC_found_ref['chr']]
+listC_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
+listC_notfound_ref = pd.read_csv(cwd+"setc_notfound_ref1.csv",index_col=False)
+listC_notfound_ref=listC_notfound_ref.sort_values('gene')
+lsitc_ref_notfound_sz=listC_notfound_ref.shape[0]
+
+listC_found_lr = pd.read_csv(cwd+"setc_found_LR1.csv",index_col=False)
+listC_found_lr=listC_found_lr.sort_values('gene')
+lsitc_lr_found_sz=listC_found_lr.shape[0]
+listC_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
+listC_notfound_lr = pd.read_csv(cwd+"setc_notfound_LR1.csv",index_col=False)
+listC_notfound_lr=listC_notfound_lr.sort_values('gene')
+lsitc_lr_notfound_sz=listC_notfound_lr.shape[0]
+
+
+#This for all guides order table
+set_start=0
+
+regular_lista=listA[~listA['gene'].str.contains('non-targeting')]['sgID_AB']#[['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']]
+regular_lista=regular_lista.sort_values()
+set_end=regular_lista.shape[0] #18905
+#regular_lista=regular_lista.iloc[set_start:set_end]
+non_targeting_lista=listA[listA['gene'].str.contains('non-targeting')]['sgID_AB']#[['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']]
+non_targeting_lista=non_targeting_lista.sort_values()
+#regular_lista=regular_lista.reset_index()
+regular_listb=listB[~listB['gene'].str.contains('non-targeting')]['sgID_AB']#[['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']]
+regular_listb=regular_listb.sort_values()
+#regular_listb=regular_listb.iloc[set_start:set_end]
+non_targeting_listb=listB[listB['gene'].str.contains('non-targeting')]['sgID_AB']#[['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']]
+non_targeting_listb=non_targeting_listb.sort_values()
+
+#regular_listb=regular_listb.reset_index()
+regular_listc=listC[~listC['gene'].str.contains('non-targeting')]['sgID_AB']#[['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']]
+regular_listc=regular_listc.sort_values()
+#regular_listc=regular_listc[set_start:set_end]
+non_targeting_listc=listC[listC['gene'].str.contains('non-targeting')]['sgID_AB']#[['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']]
+non_targeting_listc=non_targeting_listc.sort_values()
+
+#GENERAL FUNCTIONS
def transform(df,str):
- # Select columns
- #cols = st.multiselect('Please select columns to save current Table as csv file',
cols = st.multiselect(str,
df.columns.tolist(),
df.columns.tolist()
@@ -39,19 +143,6 @@ def convert_df(df):
def convert_df1(df):
return df.to_csv(index=False).encode('utf-8')
-
-# CSS to inject contained in a string
-hide_table_row_index = """
-
- """
-
-# Inject CSS with Markdown
-st.markdown(hide_table_row_index, unsafe_allow_html=True)
-
-
#########TABLE DISPLAY
def tbl_disp(dat,var,ref,key,flg=1):
dat.reset_index(drop=True, inplace=True)
@@ -67,16 +158,11 @@ def tbl_disp(dat,var,ref,key,flg=1):
mime='text/csv',
#key=key,
)
- #st.table(dft)
- #st.markdown(table_edit,unsafe_allow_html=True)
gb = GridOptionsBuilder.from_dataframe(dat)
gb.configure_pagination(enabled=False)#,paginationAutoPageSize=False)#True) #Add pagination
gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
gb.configure_selection(selection_mode="multiple", use_checkbox=True)
gb.configure_column("gene", headerCheckboxSelection = True)
-
-
-
gb.configure_side_bar()
gridOptions = gb.build()
@@ -114,42 +200,32 @@ def tbl_disp(dat,var,ref,key,flg=1):
)
return dfs
-
-
-def assemble_tbl(t):
- dft = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
- for i in range(0,t.shape[0],2):
- l1=t.iloc[[i]]
- l1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','mutated_guide', 'strand', 'num_mismatch']
-
- l2=t.iloc[[i+1]]
- l2.columns=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2','mutated_guide2', 'strand2', 'num_mismatch2']
- listA_concatenated_match_LR1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1)
- listA_concatenated_match_LR1=listA_concatenated_match_LR1[['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']]
- listA_concatenated_match_LR1['sgRNA_1']=listA_concatenated_match_LR1['sgRNA_1'].str.slice(0, 20)
- listA_concatenated_match_LR1['sgRNA_2']=listA_concatenated_match_LR1['sgRNA_2'].str.slice(0, 20)
- listA_concatenated_match_LR1['sgID_1_2']=listA_concatenated_match_LR1['sgID_1']+"|"+listA_concatenated_match_LR1['sgID_1']
- dft=dft.append(listA_concatenated_match_LR1)
-
- return dft
-
def get_lists(ref_list,list_found_ref,list_notfound_ref):
- a_ref=[]
+ #This module retrieves guide_id and searches for guide sequences from the table
+ #st.table(ref_list)
+ a_ref=[]
+ #st.table(ref_list)
for i in range(len(ref_list)):
- a_ref.append(ref_list.gene.values[i].split('|')[0])
- a_ref.append(ref_list.gene.values[i].split('|')[1])
+ a_ref.append(ref_list.sgID_AB.values[i].split('|')[0])
+ a_ref.append(ref_list.sgID_AB.values[i].split('|')[1])
set_found0_ref=[]
+ #st.table(a_ref)
for i in range(len(a_ref)):
set_found0_ref.append(list_found_ref[list_found_ref['gene']==a_ref[i]])
+ #st.write(set_found0_ref)
list_concatenated_found_ref = pd.concat(set_found0_ref)
- list_concatenated_match_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch == 0]
+ list_concatenated_match_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch == 0] #only select guides with zero mismatches for match list, MISSMATCH LIST LATER
#Also remove Alternate loci's data
list_concatenated_match_ref = list_concatenated_match_ref[list_concatenated_match_ref['chr'].str.contains('chr')]
-
+ #st.table(list_concatenated_match_ref)
#also create new list with both sgRNAs in one row
dft=pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
+
+ guideflg1=1
+ #st.table(list_concatenated_match_ref)
if list_concatenated_match_ref.shape[0]>0:
+ guideflg1=0
t=list_concatenated_match_ref.reset_index(drop=True)
#st.table(t)
@@ -169,17 +245,27 @@ def get_lists(ref_list,list_found_ref,list_notfound_ref):
while i 0:
- guideflg1=2
- if seta_notfound1_ref.shape[0]>0:
- guideflg1=1
list_concatenated_notfound_ref = pd.concat([seta_notfound0_ref,seta_notfound1_ref])
- #st.table(dft)
- #st.table(dft_mut)
- return dft, dft_mut,list_concatenated_notfound_ref,list_concatenated_match_ref,list_concatenated_mutated_ref,guideflg1
+
+ return dft.iloc[:1], dft_mut,list_concatenated_notfound_ref,list_concatenated_match_ref,list_concatenated_mutated_ref,guideflg1
###########
-
def get_mutated_res(list_concatenated_mutated_ref):
#########
#if list_concatenated_mutated_ref.shape[0]>0:
t=list_concatenated_mutated_ref.reset_index(drop=True)
+
#st.table(t)
dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
c1=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1']
@@ -241,85 +318,11 @@ def get_mutated_res(list_concatenated_mutated_ref):
list_concatenated_mutated_ref1['sgRNA_2']='G'+list_concatenated_mutated_ref1['sgRNA_2'].str.slice(1, 20)
list_concatenated_mutated_ref1['sgID_1_2']=list_concatenated_mutated_ref1['sgID_1']+"|"+list_concatenated_mutated_ref1['sgID_1']
- dft_mut=dft_mut.append(list_concatenated_mutated_ref1)
- return dft_mut
+ #dft_mut=dft_mut.append(list_concatenated_mutated_ref1)
+ dft_mut=pd.concat([dft_mut,list_concatenated_mutated_ref1])
- #########
-
-#######THIS SECTION ADDED FOR ORDER READY LIST AND REMOVE REPITION FOR NOT_FOUND ENTRUES
-def get_lists_ol(ref_list,list_found_ref,list_notfound_ref):
- a_ref=[]
- for i in range(len(ref_list)):
- a_ref.append(ref_list.gene.values[i].split('|')[0])
- a_ref.append(ref_list.gene.values[i].split('|')[1])
-
- set_found0_ref=[]
- for i in range(len(a_ref)):
- set_found0_ref.append(list_found_ref[list_found_ref['gene']==a_ref[i]])
- list_concatenated_found_ref = pd.concat(set_found0_ref)
- list_concatenated_match_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch == 0]
- #Also remove Alternate loci's data
- list_concatenated_match_ref = list_concatenated_match_ref[list_concatenated_match_ref['chr'].str.contains('chr')]
-
- #also create new list with both sgRNAs in one row
- dft=pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
- if list_concatenated_match_ref.shape[0]>0:
- t=list_concatenated_match_ref.reset_index(drop=True)
- #st.table(t)
-
- ##########
- #check even/odd entries
- if t.shape[0]==1:
- t1=t.loc[t.index.repeat(2)].reset_index(drop=True)
- #st.write(t1)
- dft=assemble_tbl(t1)
-
- elif t.shape[0]%2==0: #even
- dft=assemble_tbl(t)
-
- else: #odd
- t1 = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
- i=0
- while i 0]
- list_concatenated_mutated_ref=list_concatenated_mutated_ref.sort_values('position')
-
- #Also remove Alternate loci's data
+ return dft_mut
- list_concatenated_mutated_ref = list_concatenated_mutated_ref[list_concatenated_mutated_ref['chr'].str.contains('chr')]
- dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
- if list_concatenated_mutated_ref.shape[0]>0:
- dft_mut = get_mutated_res(list_concatenated_mutated_ref)
- #check not found
- seta_notfound0_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[0]]
- seta_notfound1_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[1]]
- list_concatenated_notfound_ref = pd.concat([seta_notfound0_ref,seta_notfound1_ref])
- return dft, dft_mut,list_concatenated_notfound_ref,list_concatenated_match_ref,list_concatenated_mutated_ref
- ###########
-
-
-#THIS WILL GENERATE ORDER READY TABLE FOR GRCh38
-#THIS WILL GENERATE ORDER READY TABLE FOR CHM13
-
-#CHECK IF GUIDE ARE IN NOT FOUND LIST
def not_found_check(set12,set34,set56,listA_notfound_lr,listB_notfound_lr,listC_notfound_lr):
flg11=0
flg12=0
@@ -343,16 +346,21 @@ def not_found_check(set12,set34,set56,listA_notfound_lr,listB_notfound_lr,listC_
flg32=1
return flg11,flg12,flg21,flg22,flg31,flg32
-def order_ready_tbl_CHM13(set12,set34,set56,listA_found_lr,listA_notfound_lr,listB_found_lr,listB_notfound_lr,listC_found_lr,listC_notfound_lr):
+def order_ready_tbl_CHM13(set12,set34,set56,listA_found_lr,listA_notfound_lr,listB_found_lr,listB_notfound_lr,listC_found_lr,listC_notfound_lr,ref_sel):
+ # st.table(set12)
+ # st.table(set34)
+ # st.table(set56)
dft_order_table=pd.DataFrame(columns=['gene','guide_type','sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
-
+ dft_notfound_all=pd.DataFrame(columns=['gene','sgID_AB','guide_type','protospacer_A','protospacer_B'])
+
+ #dft_notfound=pd.DataFrame(columns=['gene','ref_guide'])
+
dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
set12=set12.reset_index(drop = True)
set34=set34.reset_index(drop = True)
set56=set56.reset_index(drop = True)
-
for i in range(set12.shape[0]):
gene_n=set12[i].split('_')[0]
f=not_found_check(set12[i],set34[i],set56[i],listA_notfound_lr,listB_notfound_lr,listC_notfound_lr)
@@ -360,43 +368,37 @@ def order_ready_tbl_CHM13(set12,set34,set56,listA_found_lr,listA_notfound_lr,lis
#st.write(set12[i],set34[i],set56[i])
#ref_listA=listA[listA['gene']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listA=listA[listA['sgID_AB']==set12.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
-
- ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
+ ref_listA=listA[listA['sgID_AB']==set12.iloc[i]][['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']]
+ ref_listA = ref_listA[['gene','sgID_AB','guide_type','protospacer_A','protospacer_B']]
+ #st.write(ref_listA)
+ #ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
resa,res_muta,res_notfounda,list_matcha,list_mutateda,gflga1=get_lists(ref_listA,listA_found_lr,listA_notfound_lr)
- dft_a=dft_a.append(ref_listA)
+ #dft_a=dft_a.append(ref_listA)
#listb
ref_listB=listB[listB['sgID_AB']==set34.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listB = ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
- ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
+ #ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
resb,res_mutb,res_notfoundb,list_matchb,list_mutatedb,gflgb1=get_lists(ref_listB,listB_found_lr,listB_notfound_lr)
- dft_b=dft_b.append(ref_listB)
- #st.table(not resb.empty)
- #st.table(res_mutb)
- #st.table(resb)
+ #dft_b=dft_b.append(ref_listB)
#listc
ref_listC=listC[listC['sgID_AB']==set56.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
ref_listC = ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
- ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
+ #ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
resc,res_mutc,res_notfoundc,list_matchc,list_mutatedc,gflgc1=get_lists(ref_listC,listC_found_lr,listC_notfound_lr)
- dft_c=dft_c.append(ref_listC)
-
- # st.write(set12[i])
- # st.write(set34[i])
- # st.write(set56[i])
- # st.write(f)
+ #dft_c=dft_c.append(ref_listC)
+ #st.table(ref_listA)
# st.write(gflga1,gflgb1,gflgc1)
if gflga1==0:
#Also verigy that both guides are different
-
+ #st.table(resa)
if resa['sgID_1'][0] != resa['sgID_2'][0]:
resa['gene']=gene_n
resa['guide_type']='1-2'
- dft_order_table=dft_order_table.append(resa)
+ #dft_order_table=dft_order_table.append(resa)
+ dft_order_table=pd.concat([dft_order_table, resa]) #dft_order_table.concat(resa)
else: #it is nutation case, so check next
if f[2]==0 or f[3] == 0:
#st.write('came in 1')
@@ -405,29 +407,51 @@ def order_ready_tbl_CHM13(set12,set34,set56,listA_found_lr,listA_notfound_lr,lis
resa['sgID_1_2'] = resa['sgID_1']+"|"+resa['sgID_2']
if f[2]==0:
resa['gene']=gene_n
- resa['guide_type']=str(gflga1)+"-3"
- dft_order_table=dft_order_table.append(resa)
+ if f[0]==0:
+ resa['guide_type']="1-3"
+ else:
+ resa['guide_type']="2-3"
+ #dft_order_table=dft_order_table.append(resa)
+ dft_order_table=pd.concat([dft_order_table,resa])
else: # f[2]==0:
resa['gene']=gene_n
- resa['guide_type']=str(gflga1)+"-4"
- dft_order_table=dft_order_table.append(resa)
-
-
+ if f[0]==0:
+ resa['guide_type']="1-4"
+ else:
+ resa['guide_type']="2-4"
+ #dft_order_table=dft_order_table.append(resa)
+ dft_order_table=pd.concat([dft_order_table,resa])
+ else:
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True)
+ else:
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True)
+
+
elif resa.shape[0] >0: #at least one guide is from seta
#if resa['sgID_1'][0] != resa['sgID_2'][0]:
if f[2]==0 or f[3] == 0:
- st.write('came in 1')
+ #st.write('came in 1')
if not resb.empty: # and resb['sgID_1'][0] != resb['sgID_2'][0]: #second guide in from setb
resa[['sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2']] = resb[['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1']]
resa['sgID_1_2'] = resa['sgID_1']+"|"+resa['sgID_2']
if f[2]==0:
resa['gene']=gene_n
resa['guide_type']=str(gflga1)+"-3"
- dft_order_table=dft_order_table.append(resa)
+ #dft_order_table=dft_order_table.append(resa)
+ dft_order_table=pd.concat([dft_order_table,resa])
else: # f[2]==0:
resa['gene']=gene_n
resa['guide_type']=str(gflga1)+"-4"
- dft_order_table=dft_order_table.append(resa)
+ #dft_order_table=dft_order_table.append(resa)
+ dft_order_table=pd.concat([dft_order_table,resa])
+ else:
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True)
elif f[4]==0 or f[5] == 0:
#st.write('came in 2')
@@ -439,139 +463,315 @@ def order_ready_tbl_CHM13(set12,set34,set56,listA_found_lr,listA_notfound_lr,lis
if f[4]==0:
resa['gene']=gene_n
resa['guide_type']=str(gflga1)+"-5"
- dft_order_table=dft_order_table.append(resa)
+ #dft_order_table=dft_order_table.append(resa)
+ dft_order_table=pd.concat([dft_order_table,resa])
else: # f[2]==0:
resa['gene']=gene_n
resa['guide_type']=str(gflga1)+"-6"
- dft_order_table=dft_order_table.append(resa)
+ #dft_order_table=dft_order_table.append(resa)
+ dft_order_table=pd.concat([dft_order_table,resa])
+ else:
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True)
+
elif resb.shape[0]>0: #at least one guide
- #if resb['sgID_1'][0] != resb['sgID_2'][0]:
- if f[4]==0 or f[5] == 0:
+ if gflgb1==0:
+ if resb['sgID_1'][0] != resb['sgID_2'][0]:
+ resb['gene']=gene_n
+ resb['guide_type']='3-4'
+ #dft_order_table=dft_order_table.append(resb)
+ dft_order_table=pd.concat([dft_order_table,resb])
+ else:
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True)
+
+
+ elif f[4]==0 or f[5] == 0:
#if not resc.empty and resc['sgID_1'][0] != resc['sgID_2'][0]:
resb[['sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2']] = resc[['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1']]
resb['sgID_1_2'] = resb['sgID_1']+"|"+resb['sgID_2']
#dft_order_table=dft_order_table.append(resb)
if f[4]==0:
resb['gene']=gene_n
- resb['guide_type']=str(gflgb1+1)+"-5"
- dft_order_table=dft_order_table.append(resb)
+ resb['guide_type']=str(gflgb1+2)+"-5"
+ #dft_order_table=dft_order_table.append(resb)
+ dft_order_table=pd.concat([dft_order_table,resb])
else: # f[2]==0:
resb['gene']=gene_n
resb['guide_type']=str(gflgb1+2)+"-6"
- dft_order_table=dft_order_table.append(resb)
+ #dft_order_table=dft_order_table.append(resb)
+ dft_order_table=pd.concat([dft_order_table,resb])
+ else:
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True)
+
elif resc.shape[0]>0: #at least one guide
- #if f[4]==0 and f[5] == 0:
- if resc['sgID_1'][0] != resc['sgID_2'][0]:
- resc['gene']=gene_n
- resc['guide_type']='5-6'
- dft_order_table=dft_order_table.append(resc)
+ if gflgc1==0:
+ if resc['sgID_1'][0] != resc['sgID_2'][0]:
+ resc['gene']=gene_n
+ resc['guide_type']='5-6'
+ #dft_order_table=dft_order_table.append(resc)
+ dft_order_table=pd.concat([dft_order_table,resc])
+ else:
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True)
+ else:
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True)
+
+
+ else:
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True)
+ dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True)
+
- if dft_order_table.shape[0]>0:
- st.write('Order Ready **CHM13** guides List')
- tbl_disp(dft_order_table,'select_genes','SetA_CHM13',5)
+ if dft_order_table.shape[0]>0:
+ #check total guides found
+ # st.write(str(set12.shape[0]))
+ # st.write(str(set34.shape[0]))
+ # st.write(str(set56.shape[0]))
+ st.write('**Please note that for guides matching to multiple locations (an example is ABCC6), only first pair is returned**')
+ szt=set12.shape[0]
+ szf=dft_order_table.shape[0]
+ # st.write(str(dft_order_table.shape[0]))
+ szd=szt-szf
+ if szd>0:
+ st.write('Order Ready '+ref_sel+' guides List: '+str(szd)+'/'+str(szt)+' **guides were not found**')
+ tbl_disp(dft_order_table,'select_genes','SetA_CHM13',5)
+ else:
+ st.write('Order Ready '+ref_sel+' guides List')
+ tbl_disp(dft_order_table,'select_genes','SetA_CHM13',5)
else:
st.write('**No guides found in ListA, ListB and ListC**')
- #st.table(dft_order_table)
-
-#def get_notfound():
+ if dft_notfound_all.shape[0]>0:
+ st.write('**Guides not found in any lists**')
+ tbl_disp(dft_notfound_all,'select_genes','SetA_CHM13',6)
-
-cwd=os.getcwd()+'/'+'data/'
-
-
-listA = pd.read_csv(cwd+"guides_a_new.csv",index_col=False)
-
-listB = pd.read_csv(cwd+"guides_b_new.csv",index_col=False)
-listC = pd.read_csv(cwd+"guides_c_new.csv",index_col=False)
-
-lista_sz=listA.shape[0]
-listb_sz=listB.shape[0]
-listc_sz=listC.shape[0]
-
-
-variantsa1=listA['gene'].unique()
-variantsb1=listB['gene'].unique()
-variantsc1=listC['gene'].unique()
-
-con = np.concatenate((variantsa1, variantsb1,variantsc1))
-
-
-#st.write(type(variantsc1))
-variants_s=sorted(np.unique(con))
-#st.write(len(variants_s))
-#also get names for non-targetting guides
-
-
-#Also read GRCh38 and LR guides for stea
-listA_found_ref = pd.read_csv(cwd+"seta_found_ref1.csv",index_col=False)
-
-
-
-
-
-
-lsita_ref_found_sz=listA_found_ref.shape[0]
-#remove # from chr# #
-listA_found_ref['chr'] = [x.split(' ')[-0] for x in listA_found_ref['chr']]
-listA_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
-listA_notfound_ref = pd.read_csv(cwd+"seta_notfound_ref1.csv",index_col=False)
-
-lsita_ref_notfound_sz=listA_notfound_ref.shape[0]
-
+def assemble_tbl(t):
+ dft = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
+ #for i in range(0,t.shape[0],2):
+ mid=int(t.shape[0]/2)
+ for i in range(int(t.shape[0]/2)):
+ l1=t.iloc[[i]]
+ l1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','mutated_guide', 'strand', 'num_mismatch']
-listA_found_lr = pd.read_csv(cwd+"seta_found_LR1.csv",index_col=False)
-lsita_lr_found_sz=listA_found_lr.shape[0]
-listA_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
-listA_notfound_lr = pd.read_csv(cwd+"seta_notfound_LR1.csv",index_col=False)
-lsita_lr_notfound_sz=listA_notfound_lr.shape[0]
+ #l2=t.iloc[[i+1]]
+ l2=t.iloc[[mid]]
+ l2.columns=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2','mutated_guide2', 'strand2', 'num_mismatch2']
+ listA_concatenated_match_LR1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1)
+ listA_concatenated_match_LR1=listA_concatenated_match_LR1[['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']]
+ listA_concatenated_match_LR1['sgRNA_1']=listA_concatenated_match_LR1['sgRNA_1'].str.slice(0, 20)
+ listA_concatenated_match_LR1['sgRNA_2']=listA_concatenated_match_LR1['sgRNA_2'].str.slice(0, 20)
+ listA_concatenated_match_LR1['sgID_1_2']=listA_concatenated_match_LR1['sgID_1']+"|"+listA_concatenated_match_LR1['sgID_2']
+ #dft=dft.append(listA_concatenated_match_LR1)
+ dft=pd.concat([dft,listA_concatenated_match_LR1])
+
+ mid=mid+1
+
+ return dft
+
+#Get non-targeting lists
+def get_lists_non_targeting(ref_list,list_found_ref,list_notfound_ref):
+
+ #This module retrieves guide_id and searches for guide sequences from the table
+ #st.table(ref_list)
+ a_ref=[]
+ for i in range(len(ref_list)):
+ a_ref.append(ref_list.sgID_AB.values[i].split('|')[0])
+ a_ref.append(ref_list.sgID_AB.values[i].split('|')[1])
-#Also read GRCh38 and LR guides for set b
-listB_found_ref = pd.read_csv(cwd+"setb_found_ref1.csv",index_col=False)
-lsitb_ref_found_sz=listB_found_ref.shape[0]
-#remove # from chr# #
-listB_found_ref['chr'] = [x.split(' ')[-0] for x in listB_found_ref['chr']]
-listB_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
-listB_notfound_ref = pd.read_csv(cwd+"setb_notfound_ref1.csv",index_col=False)
-lsitb_ref_notfound_sz=listB_notfound_ref.shape[0]
+ set_found0_ref=[]
+ for i in range(len(a_ref)):
+ set_found0_ref.append(list_found_ref[list_found_ref['gene']==a_ref[i]])
+ list_concatenated_found_ref = pd.concat(set_found0_ref)
+ list_concatenated_match_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch == 0] #only select guides with zero mismatches for match list, MISSMATCH LIST LATER
+ #get matching to Alternating loci's
+ list_concatenated_match_alt_ref = list_concatenated_match_ref[~list_concatenated_match_ref['chr'].str.contains('chr')]
+ #Also remove Alternate loci's data
+ list_concatenated_match_ref = list_concatenated_match_ref[list_concatenated_match_ref['chr'].str.contains('chr')]
+ #st.table(list_concatenated_match_ref)
+ #also create new list with both sgRNAs in one row
+ dft=pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
+ if list_concatenated_match_ref.shape[0]>0:
+ t=list_concatenated_match_ref.reset_index(drop=True)
+ #st.table(t)
+
+ ##########
+ #check even/odd entries
+ if t.shape[0]==1:
+
+ t1=t.loc[t.index.repeat(2)].reset_index(drop=True)
+ #st.write(t1)
+ dft=assemble_tbl(t1)
+
+ elif t.shape[0]%2==0: #even
+ dft=assemble_tbl(t)
+ else: #odd
+ t1 = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
+ i=0
+ while i 0]
+ list_concatenated_mutated_ref=list_concatenated_mutated_ref.sort_values('position')
+
+ #Also remove Alternate loci's data
+ list_concatenated_mutated_alt_ref = list_concatenated_mutated_ref[~list_concatenated_mutated_ref['chr'].str.contains('chr')]
+ list_concatenated_mutated_ref = list_concatenated_mutated_ref[list_concatenated_mutated_ref['chr'].str.contains('chr')]
+ dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
+
+ if list_concatenated_mutated_ref.shape[0]>0:
+ dft_mut = get_mutated_res(list_concatenated_mutated_ref)
+ #check not found
+ seta_notfound0_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[0]]
+ seta_notfound1_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[1]]
+ #st.write(list_notfound_ref[list_notfound_ref['gene']==a_ref[0]])
+ #st.write(seta_notfound0_ref)
+ #st.write(seta_notfound1_ref)
+ #add guideflg1 to return which guide is found
+ guideflg1=0
+ if seta_notfound0_ref.shape[0]>0:
+ guideflg1=2
+ if seta_notfound1_ref.shape[0]>0:
+ guideflg1=1
+ list_concatenated_notfound_ref = pd.concat([seta_notfound0_ref,seta_notfound1_ref])
+ #st.table(a_ref)
+ #st.table(seta_notfound1_ref)
+ #st.table(dft)
+ #st.table(dft_mut)
+ return dft, dft_mut,list_concatenated_notfound_ref,list_concatenated_match_ref,list_concatenated_mutated_ref,list_concatenated_match_alt_ref,list_concatenated_mutated_alt_ref,guideflg1
+ ###########
+#Get All Guides Stats
+#def process_all_guides(glist,list,ref_type,guide_type):
+def process_all_guides(glist,for_list,f_list,nf_list):
+ #st.write(type(glist))
+ #st.table(for_list)
+ #for_list=for_list.reset_index()
+ variant_set=glist['gene']
+ dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
+ dft_resc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
+ dft_res_mutc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
+ dft_notfoundc=pd.DataFrame(columns=['gene','ref_guide'])
+ df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
+ df_matched_alt_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
+ df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
+ df_mutated_guides_alt_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
+
+
+ #st.table(for_list)
+ for i in range(variant_set.shape[0]):
+ #st.write(variant_set.iloc[i])
+ ref_listC=for_list[for_list['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
+ ref_listC =ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
+ #st.table(ref_listC)
+ #st.table(ref_listC)
+
+ res,res_mut,res_notfound,list_match,list_mutated,list_match_alt,list_mutated_alt,gflgc1=get_lists_non_targeting(ref_listC,f_list,nf_list)
+
+
+ #dft_c=dft_c.append(ref_listC)
+ if res.shape[0]>0:
+ dft_resc=pd.concat([dft_resc,res])
+ if res_mut.shape[0]>0:
+ dft_res_mutc=pd.concat([dft_res_mutc,res_mut])
+ if res_notfound.shape[0]>0:
+ dft_notfoundc= pd.concat([dft_notfoundc,res_notfound])
+ if list_match.shape[0]>0:
+ df_matched_guides_ref= pd.concat([df_matched_guides_ref,list_match])
+ if list_mutated.shape[0]>0:
+ df_mutated_guides_ref= pd.concat([df_mutated_guides_ref,list_mutated])
+ if list_match_alt.shape[0]>0:
+ df_matched_alt_ref=pd.concat([df_matched_alt_ref,list_mutated])
+ if list_mutated_alt.shape[0]>0:
+ df_mutated_guides_alt_ref=pd.concat([df_mutated_guides_alt_ref,list_mutated_alt])
+
+ if df_matched_guides_ref.shape[0]>0:
+ #st.write(type(df_matched_guides_ref['gene']))
+ gl=df_matched_guides_ref['gene']
+ dupesm=gl[gl.duplicated()]
+ if df_mutated_guides_ref.shape[0]>0:
+ gl=df_mutated_guides_ref['gene']
+ dupesmu=gl[gl.duplicated()]
+ #now check common between matched and mutated
+ # if dupesm.shape[0]>0 and dupesmu.shape[0]>0:
+ # common_list = set(dupesm).intersection(dupesmu)
+ # st.table(common_list)
+ # st.write('common guides between matched and mutated lists are: '+len(common_list))
+
+
+ if df_matched_guides_ref.shape[0]>0:
+ if dupesm.shape[0]>0:
+ st.write('**Matched Guides**: '+str(df_matched_guides_ref.shape[0])+' and: '+str(dupesm.shape[0])+' are repeated guides (matched to multiple locations)')
+ tbl_disp(df_matched_guides_ref,'select_genes','SetC_GRCh38',17)
+ #st.table(dupesm,'select_genes','SetC_GRCh38',17)
+ tbl_disp(dupesm,'select_genes','SetC_GRCh38',17)
+ else:
+ st.write('**Matched Guides**: '+str(df_matched_guides_ref.shape[0]))
+ tbl_disp(df_matched_guides_ref,'select_genes','SetC_GRCh38',17)
+
+ if df_matched_alt_ref.shape[0]>0:
+ st.write('**Matched Guides to Alt Loci**: '+str(df_matched_alt_ref.shape[0]))
+ tbl_disp(df_matched_alt_ref,'select_genes','SetC_GRCh38',17)
+ if df_mutated_guides_ref.shape[0]>0:
+ #gl=df_mutated_guides_ref['gene']
+ #dupesmu=gl[gl.duplicated()]
+ if dupesmu.shape[0]>0:
+ st.write('**Mutated Guides (some might have >1 guides)**: '+str(df_mutated_guides_ref.shape[0])+' and: '+str(dupesmu.shape[0])+' are repeated guides')
+ tbl_disp(df_mutated_guides_ref,'select_genes','SetC_GRCh38',18)
+ #st.table(dupesmu)
+ else:
+ st.write('**Mutated Guides (some might have >1 guides)**: '+str(df_mutated_guides_ref.shape[0]))
+ tbl_disp(df_mutated_guides_ref,'select_genes','SetC_GRCh38',18)
+
+ if df_mutated_guides_alt_ref.shape[0]>0:
+ st.write('**Mutated Guides to Alt Loci**: '+str(df_mutated_guides_alt_ref.shape[0]))
+ tbl_disp(df_mutated_guides_alt_ref,'select_genes','SetC_GRCh38',18)
-listB_found_lr = pd.read_csv(cwd+"setb_found_LR1.csv",index_col=False)
-lsitb_lr_found_sz=listB_found_lr.shape[0]
-listB_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
-listB_notfound_lr = pd.read_csv(cwd+"setb_notfound_LR1.csv",index_col=False)
-lsitb_lr_notfound_sz=listB_notfound_lr.shape[0]
+ if dft_notfoundc.shape[0]>0:
+ st.write('**Guides Not Found**: '+str(dft_notfoundc.shape[0]))
+ tbl_disp(dft_notfoundc,'select_genes','SetC_GRCh38',19)
+
+#CALC BASED ON LIST, GUIDE TYPE AND REFERENCE
-#Also read GRCh38 and LR guides for set c
-listC_found_ref = pd.read_csv(cwd+"setc_found_ref1.csv",index_col=False)
-lsitc_ref_found_sz=listC_found_ref.shape[0]
-#remove # from chr# #
-listC_found_ref['chr'] = [x.split(' ')[-0] for x in listC_found_ref['chr']]
-listC_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
-listC_notfound_ref = pd.read_csv(cwd+"setc_notfound_ref1.csv",index_col=False)
-lsitc_ref_notfound_sz=listC_notfound_ref.shape[0]
+#END GENERAL FUNCTIONS
-listC_found_lr = pd.read_csv(cwd+"setc_found_LR1.csv",index_col=False)
-lsitc_lr_found_sz=listC_found_lr.shape[0]
-listC_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
-listC_notfound_lr = pd.read_csv(cwd+"setc_notfound_LR1.csv",index_col=False)
-lsitc_lr_notfound_sz=listC_notfound_lr.shape[0]
-#also load all mismatched except non-targe guides
-#listA_notfound_lr = pd.read_csv(cwd+"setc_notfound_LR1.csv",index_col=False) seta_all_notmatched_table.csv
st.title('Long Read Guides Search')
-#st.markdown('**Please select an option from the sidebar**')
-
-#st.write(variants)
+st.write('**Important:** Please note that **MTMR3** is not present in guides_c list, so we have **removed it from list a and list b**')
+#tbl_disp(regulara,'variant','ref_guides',0,1)
Calc = st.sidebar.radio(
"",
('ReadME', 'Single/Multiple Guides','All','Not_Found'))
-
if Calc == 'ReadME':
expander = st.expander("How to use this app")
#st.header('How to use this app')
@@ -608,24 +808,24 @@ elif Calc=='Single/Multiple Guides':
'Please select genes list to start processing',
variants_s)
Updated=st.form_submit_button(label = 'Update')
- listA_concatenated_orig = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
- reflistA_concatenated = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
- reflistB_concatenated = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
- reflistC_concatenated = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
+ listA_concatenated_orig = pd.DataFrame(columns=['gene','sgID_AB','guide_type','protospacer_A','protospacer_B'])
+ reflistA_concatenated = pd.DataFrame(columns=['gene','sgID_AB','guide_type','protospacer_A','protospacer_B'])
+ reflistB_concatenated = pd.DataFrame(columns=['gene','sgID_AB','guide_type','protospacer_A','protospacer_B'])
+ reflistC_concatenated = pd.DataFrame(columns=['gene','sgID_AB','guide_type','protospacer_A','protospacer_B'])
for variant in multi_genes:
- ref_listA=listA[listA['gene']==variant][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
- ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
+ ref_listA=listA[listA['gene']==variant][['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']]
+ ref_listA = ref_listA[['gene','sgID_AB','guide_type','protospacer_A','protospacer_B']]
+ #ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
reflistA_concatenated=pd.concat([reflistA_concatenated,ref_listA])
- ref_listB=listB[listB['gene']==variant][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listB = ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
- ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
+ ref_listB=listB[listB['gene']==variant][['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']]
+ ref_listB = ref_listB[['gene','sgID_AB','guide_type','protospacer_A','protospacer_B']]
+ #ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
reflistB_concatenated=pd.concat([reflistB_concatenated,ref_listB])
- ref_listC=listC[listC['gene']==variant][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listC = ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
- ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
+ ref_listC=listC[listC['gene']==variant][['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']]
+ ref_listC = ref_listC[['gene','sgID_AB','guide_type','protospacer_A','protospacer_B']]
+ #ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
reflistC_concatenated=pd.concat([reflistC_concatenated,ref_listC])
listA_concatenated_orig = pd.concat([listA_concatenated_orig,ref_listA,ref_listB,ref_listC])
@@ -637,32 +837,44 @@ elif Calc=='Single/Multiple Guides':
st.markdown(caution_genes,unsafe_allow_html=True)
with st.form(key='columns_in_form_a'):
- c2, c3 = st.columns(2)
+ c2, c3 = st.columns([10,2])
with c2:
get_table_order=tbl_disp(listA_concatenated_orig,'variant','ref_guides',111,0)
- #multi_genes = st.multiselect(
- #'Please select genes list to start processing',
- #variants_s)
+ with c3:
+ ref_sel = st.radio("Select Reference",
+ ('CHM13','GRCh38'),
+ horizontal=True)
+
Updated1=st.form_submit_button(label = 'Generate Order Ready Table')
-
- #get_table_order=tbl_disp(listA_concatenated_orig,'variant','ref_guides',1,0)
-
-
-
-
if not isinstance(get_table_order, type(None)): # and Updated1:# and get_table_order.shape[0]>0:
- #if not isinstance(get_table_order, type(None)):
- variant_set12=get_table_order[get_table_order['guide_type']=='1-2']['gene']
- variant_set34=get_table_order[get_table_order['guide_type']=='3-4']['gene']
- variant_set56=get_table_order[get_table_order['guide_type']=='5-6']['gene']
+ if ref_sel=='GRCh38':
+
+ list_founda=listA_found_ref
+ list_notfounda=listA_notfound_ref
+ list_foundb=listB_found_ref
+ list_notfoundb=listB_notfound_ref
+ list_foundc=listC_found_ref
+ list_notfoundc=listC_notfound_ref
+
+ else:
+ list_founda=listA_found_lr
+ list_notfounda=listA_notfound_lr
+ list_foundb=listB_found_lr
+ list_notfoundb=listB_notfound_lr
+ list_foundc=listC_found_lr
+ list_notfoundc=listC_notfound_lr
+
+
+ variant_set12=get_table_order[get_table_order['guide_type']=='1-2']['sgID_AB']
+ variant_set34=get_table_order[get_table_order['guide_type']=='3-4']['sgID_AB']
+ variant_set56=get_table_order[get_table_order['guide_type']=='5-6']['sgID_AB']
#st.table(variant_set12)
- #st.write(type(variant_set12))
- #if not variant_set12.equals(variant_set34):
- # st.write('**Please select Identical Genes From List A and B**')
+ #st.write(variant_set12)
if variant_set12.shape[0]==variant_set34.shape[0]==variant_set56.shape[0]:
#########Here we call order ready table
#order_ready_tbl_GRCh38(variant_set12,variant_set34,variant_set56)
- order_ready_tbl_CHM13(variant_set12,variant_set34,variant_set56,listA_found_lr,listA_notfound_lr,listB_found_lr,listB_notfound_lr,listC_found_lr,listC_notfound_lr)
+ #order_ready_tbl_CHM13(variant_set12,variant_set34,variant_set56,listA_found_lr,listA_notfound_lr,listB_found_lr,listB_notfound_lr,listC_found_lr,listC_notfound_lr)
+ order_ready_tbl_CHM13(variant_set12,variant_set34,variant_set56,list_founda,list_notfounda,list_foundb,list_notfoundb,list_foundc,list_notfoundc,ref_sel)
########END ORDER READY TABLE
@@ -675,332 +887,218 @@ elif Calc=='Single/Multiple Guides':
else:
st.markdown("""**Probably Mixed guides are selected from three lists, Please correct the problem and re-run**""",unsafe_allow_html=True)
-
- #Now BUILD Order Ready List
- #if dft_lr_resa.shape[0] >0 and dft_lr_resb.shape[0] >0 and dft_lr_resc.shape[0] >0:
- # for sgrna in dft_lr_resa
else:
st.write('**Please select guides and Press Update Button to Begin Processing**')
-
-
- ListARes = st.checkbox('Results For SetA',key=300)
- if ListARes:# and not isinstance(get_table, type(None)):#get_table!=None:
- #if ListARes and get_table.shape[0]>0:
- st.write('**Please select Guides From Table Below to processes from ListA**')
- get_table=tbl_disp(reflistA_concatenated,variant,'ref_guides',2,0)
- if not isinstance(get_table, type(None)):
- #variant_set=get_table[get_table['guide_type']=='1-2']['gene']
- variant_set=get_table['gene']
- dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
- dft_resa=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
- dft_res_muta=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
- dft_notfounda=pd.DataFrame(columns=['gene','ref_guide'])
- df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
- df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
- #CHECK FOR GRCh38
- for i in range(variant_set.shape[0]):
- #ref_listA=listA[listA['sgID_AB']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listA=listA[listA['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
-
- ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
- res,res_mut,res_notfound,list_match,list_mutated,gflga1=get_lists(ref_listA,listA_found_ref,listA_notfound_ref)
- dft_a=dft_a.append(ref_listA)
- if res.shape[0]>0:
- dft_resa=dft_resa.append(res)
- if res_mut.shape[0]>0:
- dft_res_muta=dft_res_muta.append(res_mut)
- if res_notfound.shape[0]>0:
- dft_notfounda= dft_notfounda.append(res_notfound)
- if list_match.shape[0]>0:
- df_matched_guides_ref= df_matched_guides_ref.append(list_match)
- if list_mutated.shape[0]>0:
- df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
+ if 'get_table_order' in locals():
+ if not isinstance(get_table_order, type(None)):
+ reflistA_concatenated=get_table_order[get_table_order['guide_type']=='1-2']
+ reflistA_concatenated.drop("_selectedRowNodeInfo",axis=1,inplace=True)
+ reflistB_concatenated=get_table_order[get_table_order['guide_type']=='3-4']
+ reflistB_concatenated.drop("_selectedRowNodeInfo",axis=1,inplace=True)
+ reflistC_concatenated=get_table_order[get_table_order['guide_type']=='5-6']
+ reflistC_concatenated.drop("_selectedRowNodeInfo",axis=1,inplace=True)
+
+ #st.write('**Important:** If a guides is **not** in **found, mutated and not_found list (such as GSTT1), then it is found in Alternative Loci and Removed**')
+ with st.form(key='columns_in_form_lists'):
+ c2, c3= st.columns([10,1])#([10,10])
+ with c2:
+ List_Selected = st.selectbox('Please select list',
+ ('','ListA','ListB','ListC'))
+ Show_ListResults=st.form_submit_button(label = 'GO')
- #st.write('Selected Reference Guides for **Set A**')
- #tbl_disp(dft_a,'All','ReferenceGuides',0)
- if dft_resa.shape[0]>0:
- st.write('Matched to **GRCh38** Reference Guides for **Set A**')
- tbl_disp(dft_resa,'select_genes','SetA_GRCh38',3)
- elif dft_res_muta.shape[0]>0:
- st.write('Mutated to **GRCh38** Reference Guides for **Set A**')
- st.markdown(caution1,unsafe_allow_html=True)
- tbl_disp(dft_res_muta,'select_genes','SetA_Mutated_GRCh38',4)
- if dft_notfounda.shape[0]>0:
- st.write('**SetA Guides Not Found in GRCh38**')
- #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
- st.table(dft_notfounda)
- #Now CHECK FOR CHM13
- dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
- dft_lr_resa=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
- dft_lr_res_muta=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
- dft_lr_notfounda=pd.DataFrame(columns=['gene','ref_guide'])
- df_matched_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
- df_mutated_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
-
- for i in range(variant_set.shape[0]):
- #ref_listA=listA[listA['gene']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listA=listA[listA['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
-
- ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
- res,res_mut,res_notfound,list_match,list_mutated,gflga1=get_lists(ref_listA,listA_found_lr,listA_notfound_lr)
- dft_a=dft_a.append(ref_listA)
- if res.shape[0]>0:
- dft_lr_resa=dft_lr_resa.append(res)
- if res_mut.shape[0]>0:
- dft_lr_res_muta=dft_lr_res_muta.append(res_mut)
- if res_notfound.shape[0]>0:
- dft_lr_notfounda= dft_lr_notfounda.append(res_notfound)
- if list_match.shape[0]>0:
- df_matched_guides_lr= df_matched_guides_lr.append(list_match)
- if list_mutated.shape[0]>0:
- df_mutated_guides_lr= df_mutated_guides_lr.append(list_mutated)
-
- if dft_lr_resa.shape[0]>0:
- st.write('Matched to **CHM13** Reference Guides for **Set A**')
- tbl_disp(dft_lr_resa,'select_genes','SetA_CHM13',5)
- elif dft_lr_res_muta.shape[0]>0:
- st.write('Mutated to **CHM13** Reference Guides for **Set A**')
- st.markdown(caution1,unsafe_allow_html=True)
- tbl_disp(dft_lr_res_muta,'select_genes','SetA_Mutated_CHM13',6)
- if dft_lr_notfounda.shape[0]>0:
- st.write('**SetA Guides Not Found in CHM13**')
- st.table(dft_lr_notfounda)
- #NOW MERGE FROM GRCh38 and LR
- merged_mutated_set=pd.merge(df_mutated_guides_ref,df_mutated_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
- merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
- merged_match_set=pd.merge(df_matched_guides_ref,df_matched_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
- merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
- if merged_match_set.shape[0]>0:
- #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
- st.write('**Matched** Guides for **Set A** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
- tbl_disp(merged_match_set,'select_genes','SetA_Matched_GRCh38_CHM13',7,0)
-
- #st.table(merged_match_seta)
- elif merged_mutated_set.shape[0]>0:
- #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
- st.write('**Mutated** Guides for **Set A** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
-
- tbl_disp(merged_mutated_set,'select_genes','SetA_Mutated_GRCh38_CHM13',8,0)
- elif ListARes:
- st.write("**Please select genes from the above table to begin**")
-
- ListBRes = st.checkbox('Results For SetB',key=40)
- if ListBRes: # and not isinstance(get_table, type(None)):#get_table!=None:
- st.write('**Please select Guides From Table Below to processes from ListB**')
- get_table=tbl_disp(reflistB_concatenated,variant,'ref_guides',9,0)
- if not isinstance(get_table, type(None)):
- #variant_set=get_table[['gene']]
- variant_set=get_table['gene']
- dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
- dft_resb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
- dft_res_mutb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
- dft_notfoundb=pd.DataFrame(columns=['gene','ref_guide'])
- df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
- df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
- #CHECK FOR GRCh38
- for i in range(variant_set.shape[0]):
- #ref_listB=listB[listB['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listB=listB[listB['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listB =ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
-
- ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
- res,res_mut,res_notfound,list_match,list_mutated,gflgb1=get_lists(ref_listB,listB_found_ref,listB_notfound_ref)
- dft_b=dft_b.append(ref_listB)
- if res.shape[0]>0:
- dft_resb=dft_resb.append(res)
- if res_mut.shape[0]>0:
- dft_res_mutb=dft_res_mutb.append(res_mut)
- if res_notfound.shape[0]>0:
- dft_notfoundb= dft_notfoundb.append(res_notfound)
- if list_match.shape[0]>0:
- df_matched_guides_ref= df_matched_guides_ref.append(list_match)
- if list_mutated.shape[0]>0:
- df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
+ #ListARes = st.checkbox('Results For SetA',key=300)
+ if List_Selected=='ListA':# and not isinstance(get_table, type(None)):#get_table!=None:
+ ref_list= listA
+ st.write('**Please select Guides From Table Below to processes from ListA**')
+ with st.form(key='columns_in_form_listsA'):
+ c2, c3= st.columns([100,2])#([10,10])
+ with c2:
+ get_table=tbl_disp(reflistA_concatenated,variant,'ref_guides',2,0)
+ #List_Selected = st.selectbox('Please select list',
+ #('ListA','ListB','ListC'))
+ Show_ListResults=st.form_submit_button(label = 'Show ListA Results')
- #st.write('Selected Reference Guides for **Set B**')
- #tbl_disp(dft_b,'All','ReferenceGuides',0)
- if dft_resb.shape[0]>0:
- st.write('Matched to **GRCh38** Reference Guides for **Set B**')
- tbl_disp(dft_resb,'select_genes','SetB_GRCh38',10)
- elif dft_res_mutb.shape[0]>0:
- st.write('Mutated to **GRCh38** Reference Guides for **Set B**')
- st.markdown(caution1,unsafe_allow_html=True)
- tbl_disp(dft_res_mutb,'select_genes','SetB_Mutated_GRCh38',11)
- if dft_notfoundb.shape[0]>0:
- st.write('**SetB Guides Not Found in GRCh38**')
- #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
- st.table(dft_notfoundb)
-
- #Now CHECK FOR CHM13
- dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
- dft_lr_resb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
- dft_lr_res_mutb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
- dft_lr_notfoundb=pd.DataFrame(columns=['gene','ref_guide'])
- df_matched_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
- df_mutated_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
-
- for i in range(variant_set.shape[0]):
- #ref_listB=listB[listB['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listB=listB[listB['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listB=ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
-
- ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
- res,res_mut,res_notfound,list_match,list_mutated,gflgb1=get_lists(ref_listB,listB_found_lr,listB_notfound_lr)
- dft_b=dft_b.append(ref_listB)
- if res.shape[0]>0:
- dft_lr_resb=dft_lr_resb.append(res)
- if res_mut.shape[0]>0:
- dft_lr_res_mutb=dft_lr_res_mutb.append(res_mut)
- if res_notfound.shape[0]>0:
- dft_lr_notfoundb= dft_lr_notfoundb.append(res_notfound)
- if list_match.shape[0]>0:
- df_matched_guides_lr= df_matched_guides_lr.append(list_match)
- if list_mutated.shape[0]>0:
- df_mutated_guides_lr= df_mutated_guides_lr.append(list_mutated)
+ #st.write('**Please select Guides From Table Below to processes from ListA**')
+ #get_table=tbl_disp(reflistA_concatenated,variant,'ref_guides',2,0)
+ if not isinstance(get_table, type(None)):
+ if ref_sel=='GRCh38':
+ list_found=listA_found_ref
+ list_notfound=listA_notfound_ref
+ else:
+
+ list_found=listA_found_lr
+ list_notfound=listA_notfound_lr
+
+ variant_set=get_table['sgID_AB']
+ dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
+ dft_resa=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
+ dft_res_muta=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
+ dft_notfounda=pd.DataFrame(columns=['gene','ref_guide'])
+ df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
+ df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
+ #CHECK FOR GRCh38
+ for i in range(variant_set.shape[0]):
+ #ref_listA=listA[listA['sgID_AB']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
+ ref_listA=ref_list[ref_list['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
+ ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
+
+ #ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
+ #st.table(ref_listA)
+ res,res_mut,res_notfound,list_match,list_mutated,gflga1=get_lists(ref_listA,list_found,list_notfound)
+ dft_a=dft_a.append(ref_listA)
+ if res.shape[0]>0:
+ dft_resa=dft_resa.append(res)
+ if res_mut.shape[0]>0:
+ dft_res_muta=dft_res_muta.append(res_mut)
+ if res_notfound.shape[0]>0:
+ dft_notfounda= dft_notfounda.append(res_notfound)
+ if list_match.shape[0]>0:
+ df_matched_guides_ref= df_matched_guides_ref.append(list_match)
+ if list_mutated.shape[0]>0:
+ df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
- if dft_lr_resb.shape[0]>0:
- st.write('Matched to **CHM13** Reference Guides for **Set B**')
- tbl_disp(dft_lr_resb,'select_genes','SetB_CHM13',12)
- elif dft_lr_res_mutb.shape[0]>0:
- st.write('Mutated to **CHM13** Reference Guides for **Set B**')
- st.markdown(caution1,unsafe_allow_html=True)
- tbl_disp(dft_lr_res_mutb,'select_genes','SetB_Mutated_CHM13',13)
- if dft_lr_notfoundb.shape[0]>0:
- st.write('**SetB Guides Not Found in CHM13**')
- st.table(dft_lr_notfoundb)
- #NOW MERGE FROM GRCh38 and LR
- merged_mutated_set=pd.merge(df_mutated_guides_ref,df_mutated_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
- merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
- merged_match_set=pd.merge(df_matched_guides_ref,df_matched_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
- merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
- if merged_match_set.shape[0]>0:
- #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
- st.write('**Matched** Guides for **Set B** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
- tbl_disp(merged_match_set,'select_genes','SetB_Matched_GRCh38_CHM13',14,0)
-
- #st.table(merged_match_seta)
- elif merged_mutated_set.shape[0]>0:
- #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
- st.write('**Mutated** Guides for **Set B** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
- #st.markdown(caution1,unsafe_allow_html=True)
- tbl_disp(merged_mutated_set,'select_genes','SetB_Mutated_GRCh38_CHM13',15,0)
-
- elif ListBRes:
- st.write("**Please select genes from the above table to begin**")
-
- ListCRes = st.checkbox('Results For SetC',key=50)
- if ListCRes: # and not isinstance(get_table, type(None)):#get_table!=None:
- #variant_set=get_table[['gene']]
- st.write('**Please select Guides From Table Below to processes from ListC**')
- get_table=tbl_disp(reflistC_concatenated,variant,'ref_guides',16,0)
- if not isinstance(get_table, type(None)):
- variant_set=get_table['gene']
- dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
- dft_resc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
- dft_res_mutc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
- dft_notfoundc=pd.DataFrame(columns=['gene','ref_guide'])
- df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
- df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
- #CHECK FOR GRCh38
- for i in range(variant_set.shape[0]):
- #ref_listC=listC[listC['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listC=listC[listC['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listC =ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
-
- ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
- res,res_mut,res_notfound,list_match,list_mutated,gflgc1=get_lists(ref_listC,listC_found_ref,listC_notfound_ref)
- dft_c=dft_c.append(ref_listC)
- if res.shape[0]>0:
- dft_resc=dft_resc.append(res)
- if res_mut.shape[0]>0:
- dft_res_mutc=dft_res_mutc.append(res_mut)
- if res_notfound.shape[0]>0:
- dft_notfoundc= dft_notfoundc.append(res_notfound)
- if list_match.shape[0]>0:
- df_matched_guides_ref= df_matched_guides_ref.append(list_match)
- if list_mutated.shape[0]>0:
- df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
-
- #st.write('Selected Reference Guides for **Set C**')
- #tbl_disp(dft_c,'All','ReferenceGuides',0)
- if dft_resc.shape[0]>0:
- st.write('Matched to **GRCh38** Reference Guides for **Set C**')
- tbl_disp(dft_resc,'select_genes','SetC_GRCh38',17)
- elif dft_res_mutc.shape[0]>0:
- st.write('Mutated to **GRCh38** Reference Guides for **Set C**')
- st.markdown(caution1,unsafe_allow_html=True)
- tbl_disp(dft_res_mutc,'select_genes','SetC_Mutated_GRCh38',18)
- if dft_notfoundc.shape[0]>0:
- st.write('**SetC Guides Not Found in GRCh38**')
- #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
- st.table(dft_notfoundc)
-
- #Now CHECK FOR CHM13
- dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
- dft_lr_resc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
- dft_lr_res_mutc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
- dft_lr_notfoundc=pd.DataFrame(columns=['gene','ref_guide'])
- df_matched_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
- df_mutated_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
-
- for i in range(variant_set.shape[0]):
- #ref_listC=listC[listC['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listC=listC[listC['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
- ref_listC=ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
-
- ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
- res,res_mut,res_notfound,list_match,list_mutated,gflgc1=get_lists(ref_listC,listC_found_lr,listC_notfound_lr)
- dft_c=dft_c.append(ref_listC)
- if res.shape[0]>0:
- dft_lr_resc=dft_lr_resc.append(res)
- if res_mut.shape[0]>0:
- dft_lr_res_mutc=dft_lr_res_mutc.append(res_mut)
- if res_notfound.shape[0]>0:
- dft_lr_notfoundc= dft_lr_notfoundc.append(res_notfound)
- if list_match.shape[0]>0:
- df_matched_guides_lr= df_matched_guides_lr.append(list_match)
- if list_mutated.shape[0]>0:
- df_mutated_guides_lr= df_mutated_guides_lr.append(list_mutated)
+ #st.write('Selected Reference Guides for **Set A**')
+ #tbl_disp(dft_a,'All','ReferenceGuides',0)
+ st.write('**Important:** If a guides is **not** in **found, mutated and not_found list (such as GSTT1), then it is found in Alternative Loci and Removed**')
+ if dft_resa.shape[0]>0:
+ st.write('Matched to '+ref_sel+' Reference Guides for **Set A**')
+ tbl_disp(dft_resa,'select_genes','SetA_GRCh38',3)
+ elif dft_res_muta.shape[0]>0:
+ st.write('None of the guides Matched, So reporting **Mutated to** '+ref_sel+' Reference Guides for **Set A**')
+ st.markdown(caution1,unsafe_allow_html=True)
+ tbl_disp(dft_res_muta,'select_genes','SetA_Mutated_GRCh38',4)
+ if dft_notfounda.shape[0]>0:
+ st.write('**SetA Guides Not Found in '+ref_sel+' (None of the guides are Matched/Mutated)**')
+ #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
+ st.table(dft_notfounda)
+
+ #ListBRes = st.checkbox('Results For SetB',key=40)
+ if List_Selected=='ListB': # and not isinstance(get_table, type(None)):#get_table!=None:
+ ref_list= listB
+ st.write('**Please select Guides From Table Below to processes from ListB**')
+ with st.form(key='columns_in_form_listsA'):
+ c2, c3= st.columns([100,2])#([10,10])
+ with c2:
+ get_table=tbl_disp(reflistB_concatenated,variant,'ref_guides',2,0)
+ Show_ListResults=st.form_submit_button(label = 'Show ListB Results')
+ if not isinstance(get_table, type(None)):
+ if ref_sel=='GRCh38':
+
+ list_found=listB_found_ref
+ list_notfound=listB_notfound_ref
+ else:
+
+ list_found=listB_found_lr
+ list_notfound=listB_notfound_lr
- if dft_lr_resc.shape[0]>0:
- st.write('Matched to **CHM13** Reference Guides for **Set C**')
- tbl_disp(dft_lr_resc,'select_genes','SetC_CHM13',19)
- elif dft_lr_res_mutc.shape[0]>0:
- st.write('Mutated to **CHM13** Reference Guides for **Set C**')
- st.markdown(caution1,unsafe_allow_html=True)
- tbl_disp(dft_lr_res_mutc,'select_genes','SetC_Mutated_CHM13',20)
- if dft_lr_notfoundc.shape[0]>0:
- st.write('**SetC Guides Not Found in CHM13**')
- st.table(dft_lr_notfoundc)
- #NOW MERGE FROM GRCh38 and LR
- merged_mutated_set=pd.merge(df_mutated_guides_ref,df_mutated_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
- merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
- merged_match_set=pd.merge(df_matched_guides_ref,df_matched_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
- merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
- if merged_match_set.shape[0]>0:
- #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
- st.write('**Matched** Guides for **Set C** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
- tbl_disp(merged_match_set,'select_genes','SetC_Matched_GRCh38_CHM13',21,0)
-
- #st.table(merged_match_seta)
- elif merged_mutated_set.shape[0]>0:
- #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
- st.write('**Mutated** Guides for **Set C** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
- #st.markdown(caution1,unsafe_allow_html=True)
- tbl_disp(merged_mutated_set,'select_genes','SetC_Mutated_GRCh38_CHM13',22,0)
+ #variant_set=get_table[['gene']]
+ variant_set=get_table['sgID_AB']
+ dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
+ dft_resb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
+ dft_res_mutb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
+ dft_notfoundb=pd.DataFrame(columns=['gene','ref_guide'])
+ df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
+ df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
+ #CHECK FOR GRCh38
+ for i in range(variant_set.shape[0]):
+ #ref_listB=listB[listB['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
+ ref_listB=ref_list[ref_list['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
+ ref_listB =ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
+
+ #ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
+ res,res_mut,res_notfound,list_match,list_mutated,gflgb1=get_lists(ref_listB,list_found,list_notfound)
+ dft_b=dft_b.append(ref_listB)
+ if res.shape[0]>0:
+ dft_resb=dft_resb.append(res)
+ if res_mut.shape[0]>0:
+ dft_res_mutb=dft_res_mutb.append(res_mut)
+ if res_notfound.shape[0]>0:
+ dft_notfoundb= dft_notfoundb.append(res_notfound)
+ if list_match.shape[0]>0:
+ df_matched_guides_ref= df_matched_guides_ref.append(list_match)
+ if list_mutated.shape[0]>0:
+ df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
+
+ #st.write('Selected Reference Guides for **Set B**')
+ #tbl_disp(dft_b,'All','ReferenceGuides',0)
+ st.write('**Important:** If a guides is **not** in **found, mutated and not_found list (such as GSTT1), then it is found in Alternative Loci and Removed**')
+ if dft_resb.shape[0]>0:
+ st.write('Matched to '+ref_sel+' Reference Guides for **Set B**')
+ tbl_disp(dft_resb,'select_genes','SetB_GRCh38',10)
+ elif dft_res_mutb.shape[0]>0:
+ st.write('None of the guides Matched, So reporting **Mutated to '+ref_sel+' Reference Guides for **Set B**')
+ st.markdown(caution1,unsafe_allow_html=True)
+ tbl_disp(dft_res_mutb,'select_genes','SetB_Mutated_GRCh38',11)
+ if dft_notfoundb.shape[0]>0:
+ st.write('**SetB Guides Not Found in '+ref_sel+' (None of the guides are Matched/Mutated)**')
+ #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
+ st.table(dft_notfoundb)
+
+
+
+ #ListCRes = st.checkbox('Results For SetC',key=50)
+ if List_Selected=='ListC': # and not isinstance(get_table, type(None)):#get_table!=None:
+ ref_list= listC
- # if ListARes and ListBRes and ListCRes:
- # Order_List = st.checkbox('Generate Order Ready List',key=100)
- # if Order_List:
- # if dft_lr_resa.shape[0]>0:
- # st.table(dft_lr_resa)
+ st.write('**Please select Guides From Table Below to processes from ListC**')
+ with st.form(key='columns_in_form_listsA'):
+ c2, c3= st.columns([100,2])#([10,10])
+ with c2:
+ get_table=tbl_disp(reflistC_concatenated,variant,'ref_guides',2,0)
+ Show_ListResults=st.form_submit_button(label = 'Show ListC Results')
+ if not isinstance(get_table, type(None)):
+ if ref_sel=='GRCh38':
+
+ list_found=listC_found_ref
+ list_notfound=listC_notfound_ref
+ else:
+
+ list_found=listB_found_lr
+ list_notfound=listB_notfound_lr
+ variant_set=get_table['sgID_AB']
+ dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
+ dft_resc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
+ dft_res_mutc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
+ dft_notfoundc=pd.DataFrame(columns=['gene','ref_guide'])
+ df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
+ df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
+ #CHECK FOR GRCh38
+ for i in range(variant_set.shape[0]):
+ #ref_listC=listC[listC['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
+ ref_listC=ref_list[ref_list['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
+ ref_listC =ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
+
+ #ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
+ res,res_mut,res_notfound,list_match,list_mutated,gflgc1=get_lists(ref_listC,list_found,list_notfound)
+ dft_c=dft_c.append(ref_listC)
+ if res.shape[0]>0:
+ dft_resc=dft_resc.append(res)
+ if res_mut.shape[0]>0:
+ dft_res_mutc=dft_res_mutc.append(res_mut)
+ if res_notfound.shape[0]>0:
+ dft_notfoundc= dft_notfoundc.append(res_notfound)
+ if list_match.shape[0]>0:
+ df_matched_guides_ref= df_matched_guides_ref.append(list_match)
+ if list_mutated.shape[0]>0:
+ df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
-
- elif ListCRes:
- st.write("**Please select genes from the above table to begin**")
+ #st.write('Selected Reference Guides for **Set C**')
+ #tbl_disp(dft_c,'All','ReferenceGuides',0)
+ st.write('**Important:** If a guides is **not** in **found, mutated and not_found list (such as GSTT1), then it is found in Alternative Loci and Removed**')
+ if dft_resc.shape[0]>0:
+ st.write('Matched to '+ref_sel+' Reference Guides for **Set C**')
+ tbl_disp(dft_resc,'select_genes','SetC_GRCh38',17)
+ elif dft_res_mutc.shape[0]>0:
+ st.write('None of the guides Matched, So reporting **Mutated to '+ref_sel+' Reference Guides for **Set C**')
+ st.markdown(caution1,unsafe_allow_html=True)
+ tbl_disp(dft_res_mutc,'select_genes','SetC_Mutated_GRCh38',18)
+ if dft_notfoundc.shape[0]>0:
+ st.write('**SetC Guides Not Found in '+ref_sel+' (None of the guides are Matched/Mutated)**')
+ #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
+ st.table(dft_notfoundc)
+
+
elif Calc=='Not_Found':
ListAResNotFound = st.checkbox('Results For SetA',key=30)
if ListAResNotFound and listA_notfound_lr.shape[0]>0:
@@ -1092,4 +1190,93 @@ elif Calc=='Not_Found':
tbl_disp(non_targeting_guides_c,'all_not_found','SetA_KOLF2.1',23,0)
else:
- st.write("**Place Holder for All**")
\ No newline at end of file
+ guidetype = st.radio("Select Guide Type",('Non-targetting','Regular'),horizontal=True)
+ if guidetype=='Non-targetting':
+ with st.form(key='columns_in_form_non'):
+ c2, c3 = st.columns([5,5])#([10,10])
+ with c2:
+ guides_List = st.selectbox('Please select list',
+ ('ListA','ListB','ListC'))
+ with c3:
+ ref_type_sel_non = st.radio("Select Reference",
+ ('CHM13','GRCh38'),
+ horizontal=True)
+ Show_Results_non=st.form_submit_button(label = 'Non-targeting Guides Results')
+
+ if Show_Results_non and guides_List=='ListA':
+ for_list=listA
+ if ref_type_sel_non=='GRCh38':
+ f_list=listA_found_ref
+ nf_list=listA_notfound_ref
+ else:
+ f_list=listA_found_lr
+ nf_list=listA_notfound_lr
+
+ st.write('Total: '+str(len(non_targeting_lista))+' Non-targeting Guide pairs and '+str(2*len(non_targeting_lista))+' single guides in ListA')
+
+ process_all_guides(pd.DataFrame(pd.Series(non_targeting_lista,name='gene')),for_list,f_list,nf_list)
+ if Show_Results_non and guides_List=='ListB':
+ for_list=listB
+ if ref_type_sel_non=='GRCh38':
+ f_list=listB_found_ref
+ nf_list=listB_notfound_ref
+ else:
+ f_list=listB_found_lr
+ nf_list=listB_notfound_lr
+
+ st.write('Total: '+str(len(non_targeting_listb))+' Non-targeting Guide pairs and '+str(2*len(non_targeting_listb))+' single guides in ListA')
+ process_all_guides(pd.DataFrame(pd.Series(non_targeting_listb,name='gene')),for_list,f_list,nf_list)
+ if Show_Results_non and guides_List=='ListC':
+ for_list=listC
+ if ref_type_sel_non=='GRCh38':
+ f_list=listC_found_ref
+ nf_list=listC_notfound_ref
+ else:
+ f_list=listC_found_lr
+ nf_list=listC_notfound_lr
+
+ st.write('Total: '+str(len(non_targeting_listc))+' Non-targeting Guide pairs and '+str(2*len(non_targeting_listc))+' single guides in ListA')
+ process_all_guides(pd.DataFrame(pd.Series(non_targeting_listc,name='gene')),for_list,f_list,nf_list)
+
+ elif guidetype=='Regular':
+ st.write('**Maximum End Index=** '+str(regular_lista.shape[0]))
+ with st.form(key='columns_in_form_regular'):
+ c2, c3, c4 = st.columns([5,5,5])#([10,10])
+ with c2:
+ set_start = int(st.text_input('Start Index', '0'))
+ with c3:
+ set_end = int(st.text_input('End Index', str(regular_lista.shape[0])))
+ with c4:
+ ref_type_sel = st.radio("Select Reference",
+ ('CHM13','GRCh38'),
+ horizontal=True)
+
+ Show_Results=st.form_submit_button(label = 'Show Regular Guides Results')
+ if Show_Results:# and guides_List=="ListA":
+
+ regular_listc=regular_listc[set_start:set_end]
+ regular_listb=regular_listb.iloc[set_start:set_end]
+ regular_lista=regular_lista.iloc[set_start:set_end]
+ if ref_type_sel=='GRCh38':
+
+ list_founda=listA_found_ref
+ list_notfounda=listA_notfound_ref
+ list_foundb=listB_found_ref
+ list_notfoundb=listB_notfound_ref
+ list_foundc=listC_found_ref
+ list_notfoundc=listC_notfound_ref
+
+ else:
+ list_founda=listA_found_lr
+ list_notfounda=listA_notfound_lr
+ list_foundb=listB_found_lr
+ list_notfoundb=listB_notfound_lr
+ list_foundc=listC_found_lr
+ list_notfoundc=listC_notfound_lr
+
+ dupesq=list(duplicates(listA['gene']))
+ non_targetinga=variantsa1[pd.Series(variantsa1).str.contains('non-targeting')]
+ regulara=variantsa1[~pd.Series(variantsa1).str.contains('non-targeting')]
+ st.write('Total: '+str(len(regulara))+' Regular Guide (unique genes only) **Excluding:** '+str(len(non_targetinga))+' Non-targeting pairs **and** '+str(len(dupesq))+' Repeated entries (same gene names)')
+ order_ready_tbl_CHM13(regular_lista,regular_listb,regular_listc,list_founda,list_notfounda,list_foundb,list_notfoundb,list_foundc,list_notfoundc,ref_type_sel)
+
\ No newline at end of file