Spaces:
Running
Running
File size: 5,300 Bytes
9e354ab 51f3ab1 50dc493 51f3ab1 6754b64 1b8024a 91dca1d c4df6e7 4a7e728 1b8024a d63e13e 5285068 6f4b0c9 5ac1f52 1b8024a 51f3ab1 030d03d 2be3a00 a563a6c 7c3d241 51f3ab1 4bf6260 6f4b0c9 e360f80 985f557 d0581e5 985f557 d5c422d e51f486 f782ff4 c805a33 cb783a7 6754b64 4075ce5 d321e71 b2406f8 6754b64 6f4b0c9 c805a33 e51f486 db33ed7 963261f 6390fe2 10dc756 5d5feb8 10dc756 c805a33 d63e13e 14e6680 50dc493 1b8024a f1ac32b 1b8024a f1ac32b 4d85e44 f1ac32b dadf8bd 7c3d241 f1ac32b 6f4b0c9 f1ac32b 4bf6260 6e30489 f1ac32b cfc7061 a563a6c 1cc6ef7 b18bc7c 20ec7b4 c15c34a 8aedad1 e29feff b18bc7c f1ac32b 91dca1d 030d03d f7673af df9add2 f7673af df9add2 f7673af c4df6e7 f7673af 0f7e093 f05c10f 0958527 c4df6e7 e0a8716 0f7e093 c4df6e7 0958527 c4df6e7 e0a8716 0f7e093 e360f80 f1ac32b 4bf6260 25b02ed 30e243d 4bf6260 2640a3e fac20de 4bf6260 0df0a7e 1b8024a 0df0a7e f6334c2 0df0a7e cdc71eb f6334c2 c5bc477 0ae2fe1 |
|
import streamlit as st
import pandas as pd
import numpy as np
import time
import plotly.graph_objects as go
from scipy.ndimage import gaussian_filter1d
from zipfile import ZipFile
np.random.seed(2024)
uids = pd.read_csv("uniprot_ids_isoforms.tsv.gz", names=["selection"], header=None, sep="\t")
# del_sub_merge = pd.read_csv("del_sub_data.csv.gz")
zf = ZipFile("ALL_hum_isoforms_ESM1b_del_sub.zip")
width=600
def plot_interactive_scatter(uid: str):
user_data = pd.read_csv(zf.open(f"{uid}.csv"))
# Create scatter plot for user-specified data
user_trace = go.Scatter(
x=-np.log10(user_data.aPLLR),
y=user_data.avg_LLR,
mode='markers',
name=f"{uid}<br>Data",
text=user_data.site,
hoverinfo='text',
marker=dict(color='orange'))
return user_trace, user_data
def plot_interactive_line(uid_data: pd.DataFrame, uid: str, score: str, mutation: str,
hline1: float, hline2: float):
esm_data = -np.log10(uid_data[score]) if score == "aPLLR" else uid_data[score]
x_ticks = uid_data["site"].tolist()
plot_data = esm_data
hover_text = [f"{x}: {np.round(y, 3)}" for x, y in zip(uid_data.site, plot_data)]
line_trace = go.Scatter(
x=np.arange(1, len(uid_data)+1),
y=plot_data,
mode='lines',
text=hover_text,
hoverinfo='text',
marker=dict(color='orange')
)
line_fig = go.Figure(data=[line_trace])
line_fig.update_layout(
title=f"{uid} {mutation} Scores by Position",
yaxis_title=f'{mutation} Score<br>(More Negative = More Damaging)',
yaxis=dict(showgrid=False, zeroline=False, showline=False),
height=300,
hoverlabel=dict( # Set hover label font size
font=dict(size=16) # Specify the font size of the hover text
)
)
for hline in [hline1, hline2]:
line_fig.add_shape(
type='line',
x0=0, x1=1, y0=hline, y1=hline,
xref='paper', yref='y',
line=dict(color='Black', dash='dash'),
)
return line_fig
selection = st.selectbox("", uids.selection, index=26592)
selection_uid = selection.split(",")[0]
# Base dataset
base_data = pd.read_csv("rand_samp_gw_del_sub.csv.gz")
# Create base scatter plot
base_trace = go.Scatter(
x=-np.log10(base_data.aPLLR),
y=base_data.avg_LLR,
mode='markers',
name='Sample of<br>Genome-Wide<br>Data',
hoverinfo='none', # Disable hover information for the base data
marker=dict(color='grey')
)
# User-specified data
ut, ud = plot_interactive_scatter(selection_uid)
# Combine traces
fig = go.Figure([base_trace, ut])
# Customize layout
fig.update_layout(
title='Deletion v Substitution Effects',
xaxis_title='Deletion Score',
yaxis_title='Substitution Score',
yaxis=dict(showgrid=False, showline=False, zeroline=False),
legend=dict(
font=dict(size=15), # Specify the font size of the legend text
bordercolor="grey",
borderwidth=1
),
hoverlabel=dict( # Set hover label font size
font=dict(size=16) # Specify the font size of the hover text
)
)
fig.update_yaxes(showgrid=False)
# Extract out percentiles
del_bot, del_top = 0.147907659054341, -0.8033614237502615
for del_cutoff in [del_bot, del_top]:
fig.add_shape(
type='line',
x0=del_cutoff, x1=del_cutoff, y0=0, y1=1,
xref='x', yref='paper',
line=dict(color='Black', width=2)
)
# to avoid reading the entire dataset into memory
sub_bot, sub_top = -12.294105263157894, -4.898842105263157
for sub_cutoff in [sub_bot, sub_top]:
fig.add_shape(
type='line',
x0=0, x1=1, y0=sub_cutoff, y1=sub_cutoff,
xref='paper', yref='y',
line=dict(color='Black', width=2),
)
fig.add_annotation(
x=2.5,
y=-18,
text=r"D<sup>+</sup>S<sup>—</sup>",
font=dict(color="green", size=24),
showarrow=False
)
fig.add_annotation(
x=-1.5,
y=0.5,
text=r"D<sup>—</sup>S<sup>+</sup>",
font=dict(color="red", size=24),
showarrow=False
)
lt_apllr = plot_interactive_line(ud, selection_uid, "aPLLR", "Deletion", del_bot, del_top)
lt_llr = plot_interactive_line(ud, selection_uid, "avg_LLR", "Substitution", sub_bot, sub_top)
# Show the scatter plot
st.plotly_chart(fig)
show_line_plots = st.checkbox("Show Deletion and Substitution Effects Alone")
if show_line_plots:
st.plotly_chart(lt_apllr)
st.plotly_chart(lt_llr)
st.download_button(
label=f"Download {selection_uid} data as CSV",
data=ud.reset_index(drop=True)[["site", "aPLLR", "avg_LLR"]].to_csv(),
file_name = f"{selection_uid}_del_sub.csv",
mime='text/csv'
)
st.markdown("""
**README**:
- Deletion scores are *visualized* on the -log10 scale.
- The genome-wide dataset can be downloaded by clicking [here](https://huggingface.co/spaces/goldmangrant/diff-tol/blob/main/ALL_hum_isoforms_ESM1b_del_sub.zip) (or go to files tab).
- Non-aggregated substitution effects can be downloaded or browsed [here](https://huggingface.co/spaces/ntranoslab/esm_variants).
- Additional supplementary data from the paper can be downloaded [here](https://github.com/ntranoslab/diff-tol).
""")
|