import streamlit as st st.set_page_config(layout="wide") import pandas as pd import numpy as np from zipfile import ZipFile import plotly.express as px import plotly.graph_objs as go LLR_FILE='ALL_hum_isoforms_ESM1b_LLR.zip' df=pd.read_csv('isoform_list.csv',index_col=0) uids=list(df.index.values) clinvar = pd.read_csv('clinvar.csv.gz') def load_LLR(uniprot_id): '''Loads the LLRs for a given uniprot id. Returns a 20xL dataframe rows are indexed by AA change, (AAorder=['K','R','H','E','D','N','Q','T','S','C','G','A','V','L','I','M','P','Y','F','W']) columns indexed by WT_AA+position e.g, "G 12" Usage example: load_LLR('P01116') or load_LLR('P01116-2')''' with ZipFile(LLR_FILE) as myzip: data = myzip.open(myzip.namelist()[0]+uniprot_id+'_LLR.csv') return pd.read_csv(data,index_col=0) def meltLLR(LLR,gene_prefix=None,ignore_pos=False): vars = LLR.melt(ignore_index=False) vars['variant'] = [''.join(i.split(' '))+j for i,j in zip(vars['variable'],vars.index)] vars['score'] = vars['value'] vars = vars.set_index('variant') if not ignore_pos: vars['pos'] = [int(i[1:-1]) for i in vars.index] del vars['variable'],vars['value'] if gene_prefix is not None: vars.index=gene_prefix+'_'+vars.index return vars def plot_interactive(uniprot_id, show_clinvar=False): primaryLLR = load_LLR(uniprot_id) template='plotly_white' fig = px.imshow(primaryLLR.values, x=primaryLLR.columns, y=primaryLLR.index, color_continuous_scale='Viridis_r',zmax=0,zmin=-20, labels=dict(y="Amino acid change", x="Protein sequence", color="LLR"), template=template, title=selection) fig.update_xaxes(tickangle=-90,range=[0,99],rangeslider=dict(visible=True),dtick=1) fig.update_yaxes(dtick=1) fig.update_layout({ 'plot_bgcolor': 'rgba(0, 0, 0, 0)', 'paper_bgcolor': 'rgba(0, 0, 0, 0)', },font={'family':'Arial','size':11}, hoverlabel=dict(font=dict(family='Arial', size=14))) fig.update_traces( hovertemplate="
".join([ "%{x} %{y}"+ " (%{z:.2f})", ])+'' ) if show_clinvar: iso_clinvar = clinvar[clinvar.LLR_file_id == uniprot_id] iso_clinvar = iso_clinvar[iso_clinvar.ClinicalSignificance.isin(['Benign','Pathogenic'])] b_mut=set(iso_clinvar[iso_clinvar.ClinicalSignificance=='Benign'].variant.values) p_mut=set(iso_clinvar[iso_clinvar.ClinicalSignificance=='Pathogenic'].variant.values) hwt_x=[] hwt_y=[] cust=[] phwt_x=[] phwt_y=[] pcust=[] for i in primaryLLR.columns: for j in list(primaryLLR.index): mut = i[0]+i[2:]+j if mut in b_mut: hwt_x+=[i] hwt_y+=[j] cust+=[primaryLLR.loc[j,i]] elif mut in p_mut: phwt_x+=[i] phwt_y+=[j] pcust+=[primaryLLR.loc[j,i]] fig.add_trace(go.Scatter( x=phwt_x, y=phwt_y, customdata=pcust, mode='markers', marker=dict(size=8), showlegend=False, hovertemplate="
".join([ "%{x} %{y}"+ " (%{customdata:.2f})", ])+'') ) fig.add_trace(go.Scatter( x=hwt_x, y=hwt_y, customdata=cust, mode='markers', showlegend=False, marker=dict(size=8), hovertemplate="
".join([ "%{x} %{y}"+ " (%{customdata:.2f})", ])+'') ) return fig selection = st.selectbox("uniprot_id:", df, index= 6251) uid=df[df.txt==selection].index.values[0] show_clinvar = st.checkbox('show ClinVar annotations (red: pathogenic, green: benign)',value=False) fig = plot_interactive(uid,show_clinvar=show_clinvar) fig.update_layout(width = 800, height = 600, autosize = False) st.plotly_chart(fig, use_container_width=True) st.download_button( label="Download data as CSV", data=meltLLR(load_LLR(uid)).to_csv(), file_name=selection+'.csv', mime='text/csv', ) st.markdown(""" To obtain ESM effect scores for non-missense mutations (e.g. indels) or non-human proteins, please use the [esm-variants command-line tool](https://github.com/ntranoslab/esm-variants). """)