Spaces:
Running
Running
File size: 4,337 Bytes
985a7dc cf59915 985a7dc 073b569 985a7dc f575805 9aa0dab d37cbca 3977133 985a7dc 1ed98ce 985a7dc d37cbca 985a7dc c4368d7 985a7dc 42683c0 985a7dc 8b46687 985a7dc d37cbca ae9cb77 d37cbca ae9cb77 d37cbca ae9cb77 d37cbca ae9cb77 d37cbca ae9cb77 268ab01 ae9cb77 d37cbca 268ab01 d37cbca fc3b97a 985a7dc a087ee0 985a7dc b6f7a3c 39a9e82 8c2f1e1 b6f7a3c d37cbca 87fb83d 1ed98ce 208e06a 1ed98ce a5dc7a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import streamlit as st
st.set_page_config(layout="wide")
import pandas as pd
import numpy as np
from zipfile import ZipFile
import plotly.express as px
import plotly.graph_objs as go
LLR_FILE='ALL_hum_isoforms_ESM1b_LLR.zip'
df=pd.read_csv('isoform_list.csv',index_col=0)
uids=list(df.index.values)
clinvar = pd.read_csv('clinvar.csv.gz')
def load_LLR(uniprot_id):
'''Loads the LLRs for a given uniprot id. Returns a 20xL dataframe
rows are indexed by AA change,
(AAorder=['K','R','H','E','D','N','Q','T','S','C','G','A','V','L','I','M','P','Y','F','W'])
columns indexed by WT_AA+position e.g, "G 12"
Usage example: load_LLR('P01116') or load_LLR('P01116-2')'''
with ZipFile(LLR_FILE) as myzip:
data = myzip.open(myzip.namelist()[0]+uniprot_id+'_LLR.csv')
return pd.read_csv(data,index_col=0)
def meltLLR(LLR,gene_prefix=None,ignore_pos=False):
vars = LLR.melt(ignore_index=False)
vars['variant'] = [''.join(i.split(' '))+j for i,j in zip(vars['variable'],vars.index)]
vars['score'] = vars['value']
vars = vars.set_index('variant')
if not ignore_pos:
vars['pos'] = [int(i[1:-1]) for i in vars.index]
del vars['variable'],vars['value']
if gene_prefix is not None:
vars.index=gene_prefix+'_'+vars.index
return vars
def plot_interactive(uniprot_id, show_clinvar=False):
primaryLLR = load_LLR(uniprot_id)
template='plotly_white'
fig = px.imshow(primaryLLR.values, x=primaryLLR.columns, y=primaryLLR.index, color_continuous_scale='Viridis_r',zmax=0,zmin=-20,
labels=dict(y="Amino acid change", x="Protein sequence", color="LLR"),
template=template,
title=selection)
fig.update_xaxes(tickangle=-90,range=[0,99],rangeslider=dict(visible=True),dtick=1)
fig.update_yaxes(dtick=1)
fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
},font={'family':'Arial','size':11},
hoverlabel=dict(font=dict(family='Arial', size=14)))
fig.update_traces(
hovertemplate="<br>".join([
"<b>%{x} %{y}</b>"+
" (%{z:.2f})",
])+'<extra></extra>'
)
if show_clinvar:
iso_clinvar = clinvar[clinvar.LLR_file_id == uniprot_id]
iso_clinvar = iso_clinvar[iso_clinvar.ClinicalSignificance.isin(['Benign','Pathogenic'])]
b_mut=set(iso_clinvar[iso_clinvar.ClinicalSignificance=='Benign'].variant.values)
p_mut=set(iso_clinvar[iso_clinvar.ClinicalSignificance=='Pathogenic'].variant.values)
hwt_x=[]
hwt_y=[]
cust=[]
phwt_x=[]
phwt_y=[]
pcust=[]
for i in primaryLLR.columns:
for j in list(primaryLLR.index):
mut = i[0]+i[2:]+j
if mut in b_mut:
hwt_x+=[i]
hwt_y+=[j]
cust+=[primaryLLR.loc[j,i]]
elif mut in p_mut:
phwt_x+=[i]
phwt_y+=[j]
pcust+=[primaryLLR.loc[j,i]]
fig.add_trace(go.Scatter(
x=phwt_x,
y=phwt_y,
customdata=pcust,
mode='markers',
marker=dict(size=8),
showlegend=False,
hovertemplate="<br>".join([
"<b>%{x} %{y}</b>"+
" (%{customdata:.2f})",
])+'<extra></extra>')
)
fig.add_trace(go.Scatter(
x=hwt_x,
y=hwt_y,
customdata=cust,
mode='markers',
showlegend=False,
marker=dict(size=8),
hovertemplate="<br>".join([
"<b>%{x} %{y}</b>"+
" (%{customdata:.2f})",
])+'<extra></extra>')
)
return fig
selection = st.selectbox("uniprot_id:", df, index= 6251)
uid=df[df.txt==selection].index.values[0]
show_clinvar = st.checkbox('show ClinVar annotations (red: pathogenic, green: benign)',value=False)
fig = plot_interactive(uid,show_clinvar=show_clinvar)
fig.update_layout(width = 800, height = 600, autosize = False)
st.plotly_chart(fig, use_container_width=True)
st.download_button(
label="Download data as CSV",
data=meltLLR(load_LLR(uid)).to_csv(),
file_name=selection+'.csv',
mime='text/csv',
)
st.markdown("""
To obtain ESM effect scores for non-missense mutations (e.g. indels) or non-human proteins,
please use the [esm-variants command-line tool](https://github.com/ntranoslab/esm-variants).
""") |