File size: 5,300 Bytes
9e354ab
51f3ab1
 
50dc493
51f3ab1
6754b64
1b8024a
91dca1d
c4df6e7
 
4a7e728
1b8024a
 
d63e13e
5285068
 
6f4b0c9
5ac1f52
1b8024a
 
51f3ab1
 
 
 
 
030d03d
2be3a00
a563a6c
7c3d241
51f3ab1
4bf6260
6f4b0c9
e360f80
 
985f557
 
d0581e5
985f557
d5c422d
e51f486
f782ff4
c805a33
cb783a7
6754b64
4075ce5
d321e71
b2406f8
6754b64
6f4b0c9
c805a33
 
e51f486
 
db33ed7
963261f
 
 
 
6390fe2
10dc756
 
 
5d5feb8
10dc756
 
 
c805a33
d63e13e
14e6680
50dc493
1b8024a
f1ac32b
1b8024a
f1ac32b
 
 
4d85e44
 
f1ac32b
dadf8bd
7c3d241
 
f1ac32b
 
 
6f4b0c9
 
f1ac32b
4bf6260
6e30489
f1ac32b
 
cfc7061
a563a6c
 
1cc6ef7
b18bc7c
20ec7b4
c15c34a
8aedad1
e29feff
 
 
b18bc7c
f1ac32b
91dca1d
030d03d
 
f7673af
df9add2
f7673af
 
 
 
 
 
 
 
df9add2
 
f7673af
 
 
 
 
c4df6e7
f7673af
 
0f7e093
f05c10f
 
0958527
c4df6e7
e0a8716
0f7e093
 
 
 
c4df6e7
0958527
c4df6e7
e0a8716
0f7e093
 
e360f80
 
 
 
f1ac32b
4bf6260
 
25b02ed
30e243d
 
 
 
 
4bf6260
2640a3e
fac20de
4bf6260
 
0df0a7e
 
1b8024a
 
0df0a7e
f6334c2
0df0a7e
cdc71eb
f6334c2
c5bc477
0ae2fe1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import streamlit as st
import pandas as pd
import numpy as np
import time
import plotly.graph_objects as go
from scipy.ndimage import gaussian_filter1d
from zipfile import ZipFile

np.random.seed(2024)

uids = pd.read_csv("uniprot_ids_isoforms.tsv.gz", names=["selection"], header=None, sep="\t")
# del_sub_merge = pd.read_csv("del_sub_data.csv.gz")
zf = ZipFile("ALL_hum_isoforms_ESM1b_del_sub.zip")

width=600

def plot_interactive_scatter(uid: str):
    
    user_data = pd.read_csv(zf.open(f"{uid}.csv"))
        
    # Create scatter plot for user-specified data
    user_trace = go.Scatter(
    x=-np.log10(user_data.aPLLR),
    y=user_data.avg_LLR,
    mode='markers',
    name=f"{uid}<br>Data",
    text=user_data.site,
    hoverinfo='text',
    marker=dict(color='orange'))
    
    return user_trace, user_data

def plot_interactive_line(uid_data: pd.DataFrame, uid: str, score: str, mutation: str,
                          hline1: float, hline2: float):
    
    esm_data = -np.log10(uid_data[score]) if score == "aPLLR" else uid_data[score]
    x_ticks = uid_data["site"].tolist()
    
    plot_data = esm_data
    hover_text = [f"{x}: {np.round(y, 3)}" for x, y in zip(uid_data.site, plot_data)]
    
    line_trace = go.Scatter(
        x=np.arange(1, len(uid_data)+1),
        y=plot_data,
        mode='lines',
        text=hover_text,
        hoverinfo='text',
        marker=dict(color='orange')
    )
    line_fig = go.Figure(data=[line_trace])
    line_fig.update_layout(
        title=f"{uid} {mutation} Scores by Position",
        yaxis_title=f'{mutation} Score<br>(More Negative = More Damaging)',
        yaxis=dict(showgrid=False, zeroline=False, showline=False),
        height=300,
        hoverlabel=dict(  # Set hover label font size
            font=dict(size=16)  # Specify the font size of the hover text
        )
    )
    for hline in [hline1, hline2]:
        line_fig.add_shape(        
            type='line',
            x0=0, x1=1, y0=hline, y1=hline,
            xref='paper', yref='y',
            line=dict(color='Black', dash='dash'),
        )
    return line_fig
                           
selection = st.selectbox("", uids.selection, index=26592)
selection_uid = selection.split(",")[0]
                               
# Base dataset
base_data = pd.read_csv("rand_samp_gw_del_sub.csv.gz")

# Create base scatter plot
base_trace = go.Scatter(
    x=-np.log10(base_data.aPLLR),
    y=base_data.avg_LLR,
    mode='markers',
    name='Sample of<br>Genome-Wide<br>Data',
    hoverinfo='none', # Disable hover information for the base data
    marker=dict(color='grey')
)

# User-specified data
ut, ud = plot_interactive_scatter(selection_uid)

# Combine traces
fig = go.Figure([base_trace, ut])

# Customize layout
fig.update_layout(
    title='Deletion v Substitution Effects',
    xaxis_title='Deletion Score',
    yaxis_title='Substitution Score',
    yaxis=dict(showgrid=False, showline=False, zeroline=False),
    legend=dict(
        font=dict(size=15), # Specify the font size of the legend text
        bordercolor="grey",
        borderwidth=1
    ),
    hoverlabel=dict(  # Set hover label font size
        font=dict(size=16)  # Specify the font size of the hover text
    )
)

fig.update_yaxes(showgrid=False)

# Extract out percentiles
del_bot, del_top =  0.147907659054341, -0.8033614237502615
for del_cutoff in [del_bot, del_top]:
    fig.add_shape(
        type='line',
        x0=del_cutoff, x1=del_cutoff, y0=0, y1=1,
        xref='x', yref='paper',
        line=dict(color='Black', width=2)
    )

# to avoid reading the entire dataset into memory
sub_bot, sub_top = -12.294105263157894, -4.898842105263157
for sub_cutoff in [sub_bot, sub_top]:
    fig.add_shape(
        type='line',
        x0=0, x1=1, y0=sub_cutoff, y1=sub_cutoff,
        xref='paper', yref='y',
        line=dict(color='Black', width=2),
    )

fig.add_annotation(
    x=2.5,
    y=-18,
    text=r"D<sup>+</sup>S<sup>—</sup>",
    font=dict(color="green", size=24),
    showarrow=False
)

fig.add_annotation(
    x=-1.5,
    y=0.5,
    text=r"D<sup>—</sup>S<sup>+</sup>",
    font=dict(color="red", size=24),
    showarrow=False
)

lt_apllr = plot_interactive_line(ud, selection_uid, "aPLLR", "Deletion", del_bot, del_top)

lt_llr = plot_interactive_line(ud, selection_uid, "avg_LLR", "Substitution", sub_bot, sub_top)

# Show the scatter plot
st.plotly_chart(fig)

show_line_plots = st.checkbox("Show Deletion and Substitution Effects Alone")

if show_line_plots:
    st.plotly_chart(lt_apllr)
    st.plotly_chart(lt_llr)

st.download_button(
    label=f"Download {selection_uid} data as CSV",
    data=ud.reset_index(drop=True)[["site", "aPLLR", "avg_LLR"]].to_csv(),
    file_name = f"{selection_uid}_del_sub.csv",
    mime='text/csv'
)



st.markdown("""
**README**:
- Deletion scores are *visualized* on the -log10 scale. 
- The genome-wide dataset can be downloaded by clicking [here](https://huggingface.co/spaces/goldmangrant/diff-tol/blob/main/ALL_hum_isoforms_ESM1b_del_sub.zip) (or go to files tab).
- Non-aggregated substitution effects can be downloaded or browsed [here](https://huggingface.co/spaces/ntranoslab/esm_variants).
- Additional supplementary data from the paper can be downloaded [here](https://github.com/ntranoslab/diff-tol).
""")