import streamlit as st
import re
import pandas as pd
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
st.image("banner.png", use_column_width=True)
st.markdown(
"
CMR and Heart Failure Colocalisation Drug Interaction Viewer
",
unsafe_allow_html=True
)
# Description text
st.markdown(
"""
This interactive app allows you to explore colocalising genes between cardiovascular magnetic resonance image (CMR) traits and heart failure (HF) that have interacting drugs.
You can input multiple HGNC gene names or disease terms to filter the dataset or enter a single gene for more detailed information.
Additionally, you can visualize a protein interaction network for specific genes using STRINGdb data.
""",
unsafe_allow_html=True
)
# Load and prepare colocalisation results
annotations = pd.read_csv("colocalisation_results.csv")
annotations.fillna(0, inplace=True)
annotations = annotations.set_index("Gene")
# Filter based on gene list
st.markdown("### View colocalising gene drug interaction results for selected genes/diseases or the entire dataset.")
# Define a function to collect genes from input
collect_genes = lambda x: [str(i) for i in re.split(",|,\s+|\s+", x) if i != ""]
input_gene_list = st.text_input("Input a list of multiple HGNC genes (enter comma separated):")
gene_list = collect_genes(input_gene_list)
# Function to convert DataFrame to CSV for download
@st.cache_data
def convert_df(df):
return df.to_csv(index=False).encode('utf-8')
if len(gene_list) > 1:
# Filter for input gene list
df = annotations[annotations.index.isin(gene_list)]
df['Gene'] = df.index
df.reset_index(drop=True, inplace=True)
# Reorder columns to have "Gene" as the first column
df = df[['Gene'] + [col for col in df.columns if col != 'Gene']]
# Display the filtered results
st.dataframe(df)
output = df[['Gene']]
csv = convert_df(output)
# st.download_button("Download Filtered Colocalisation Results", csv, "filtered_colocalisation_results.csv", "text/csv", key='download-csv')
# Add a new search box for filtering by disease name
input_disease = st.text_input("Input a disease name to search in drug terms (partial match allowed):")
if input_disease:
# Search for partial matches in the "terms_drug" column
df_disease_filtered = annotations[annotations['terms_drug'].str.contains(input_disease, case=False, na=False)]
if not df_disease_filtered.empty:
st.markdown(f"### Colocalisation results for disease: {input_disease}")
df_disease_filtered['Gene'] = df_disease_filtered.index
df_disease_filtered.reset_index(drop=True, inplace=True)
# Reorder columns to have "Gene" as the first column
df_disease_filtered = df_disease_filtered[['Gene'] + [col for col in df_disease_filtered.columns if col != 'Gene']]
# Display filtered dataframe
st.dataframe(df_disease_filtered)
# Convert filtered dataframe to CSV for download
csv_disease_filtered = convert_df(df_disease_filtered)
# st.download_button("Download Filtered Colocalisation Results", csv_disease_filtered, "filtered_colocalisation_disease_results.csv", "text/csv", key='download-disease-csv')
else:
st.write(f"No results found for disease: {input_disease}")
# Display individual gene details if a single gene is input
input_gene = st.text_input("Input an individual HGNC gene:")
if input_gene:
df2 = annotations[annotations.index == input_gene]
if not df2.empty:
df2['Gene'] = df2.index
df2.reset_index(drop=True, inplace=True)
# Reorder columns to have "Gene" as the first column
df2 = df2[['Gene'] + [col for col in df2.columns if col != 'Gene']]
st.dataframe(df2)
# Provide a link to the gene's DrugnomeAI page
url = f"https://astrazeneca-cgr-publications.github.io/DrugnomeAI/geneview.html?gene={input_gene}"
markdown_link = f"[{input_gene} druggability in DrugnomeAI]({url})"
st.markdown(markdown_link, unsafe_allow_html=True)
else:
st.write("Gene not found in the dataset.")
# Display the entire dataset with download option
st.markdown("### All Colocalisation Results Interacting with Drugs")
df_total_output = annotations.copy()
df_total_output['Gene'] = df_total_output.index
df_total_output.reset_index(drop=True, inplace=True)
# Reorder columns to have "Gene" as the first column
df_total_output = df_total_output[['Gene'] + [col for col in df_total_output.columns if col != 'Gene']]
st.dataframe(df_total_output)
csv = convert_df(df_total_output)
# st.download_button("Download Complete Colocalisation Results", csv, "complete_colocalisation_results.csv", "text/csv", key='download-all-csv')
# Protein interaction network visualization using STRINGDB_data.tsv
st.markdown(
"Protein Interaction Networks of Colocalising Drug Targets
",
unsafe_allow_html=True
)
# Description text
st.markdown(
"""
- The colour of each node represents its degree (number of direct connections it has with other nodes).
- The size of each node represents its betweenness centrality (larger nodes play a more central role in the network, facilitating communication between other proteins).
- Node edges/connections are colour-coded by confidence of PPI (lighter colours (brighter) represent stronger interactions).
- Genes that interact with cardiovascular drugs are highlighted with a bold black outline.
""",
unsafe_allow_html=True
)
# Load STRINGDB dataset
ppi_data = pd.read_csv("STRINGdb_data.tsv", sep='\t')
# Create a graph from the STRINGDB PPI data
G = nx.Graph()
# Add edges to the graph based on PPI data
for index, row in ppi_data.iterrows():
G.add_edge(row['node1'], row['node2'], weight=row['combined_score'])
# Function to rescale values to a given range
def rescale(l, newmin, newmax):
arr = list(l)
return [(x - min(arr)) / (max(arr) - min(arr)) * (newmax - newmin) + newmin for x in arr]
# Use the plasma colormap
graph_colormap = plt.get_cmap('plasma', 12)
# Node color varies with Degree
c = rescale([G.degree(v) for v in G], 0.0, 0.9)
c = [graph_colormap(i) for i in c]
# Node size varies with betweeness centrality - map to range [1500, 7000]
bc = nx.betweenness_centrality(G)
s = rescale([v for v in bc.values()], 1500, 7000)
# Edge width shows 1 - weight (to convert cost back to strength of interaction)
ew = rescale([float(G[u][v]['weight']) for u, v in G.edges], 0.1, 4)
ec = rescale([float(G[u][v]['weight']) for u, v in G.edges], 0.1, 1)
ec = [graph_colormap(i) for i in ec]
# Adjust spring_layout parameters to bring the networks closer together
pos = nx.spring_layout(G, k=0.5)
# Prepare to highlight genes with "Cardiovascular_Drug" as "Yes"
highlighted_nodes = annotations[annotations['Cardiovascular_Drug'] == 'Yes'].index
# Draw the network plot
plt.figure(figsize=(19, 9), facecolor='white')
# Draw the nodes with black outline for highlighted ones
nx.draw_networkx_nodes(G, pos, node_color=c, node_size=s, edgecolors=['black' if node in highlighted_nodes else 'none' for node in G], linewidths=2)
# Draw the edges
nx.draw_networkx_edges(G, pos, edge_color=ec, width=ew)
# Draw node labels with customized font color based on degree
# Draw node labels with customized font color based on degree
for node, (x, y) in pos.items():
# Determine font color
font_color = 'white' if G.degree(node) < np.median([G.degree(n) for n in G]) else 'black'
# Dynamically adjust font size for nodes with white text (smaller font size to fit inside node)
if font_color == 'white':
font_size = min(s[list(G.nodes).index(node)] * 0.01, 10) # Adjust the multiplier and limit font size
else:
font_size = 12 # Default size for black font
plt.text(x, y, node, fontsize=font_size, fontweight='bold', ha='center', va='center', color=font_color)
# Add a colorbar to represent the node degree color scale
sm = plt.cm.ScalarMappable(cmap=graph_colormap, norm=plt.Normalize(vmin=0, vmax=1))
sm.set_array([])
cbar = plt.colorbar(sm)
cbar.set_label('Node Degree (Higher = More Connected)', fontsize=12)
plt.axis('off')
# Display the network plot in the Streamlit app directly
st.pyplot(plt)