import streamlit as st import re import pandas as pd import networkx as nx import numpy as np import matplotlib.pyplot as plt from matplotlib import cm st.image("banner.png", use_column_width=True) st.markdown( "

CMR and Heart Failure Colocalisation Drug Interaction Viewer

", unsafe_allow_html=True ) # Description text st.markdown( """ This interactive app allows you to explore colocalising genes between cardiovascular magnetic resonance image (CMR) traits and heart failure (HF) that have interacting drugs. You can input multiple HGNC gene names or disease terms to filter the dataset or enter a single gene for more detailed information. Additionally, you can visualize a protein interaction network for specific genes using STRINGdb data. """, unsafe_allow_html=True ) # Load and prepare colocalisation results annotations = pd.read_csv("colocalisation_results.csv") annotations.fillna(0, inplace=True) annotations = annotations.set_index("Gene") # Filter based on gene list st.markdown("### View colocalising gene drug interaction results for selected genes/diseases or the entire dataset.") # Define a function to collect genes from input collect_genes = lambda x: [str(i) for i in re.split(",|,\s+|\s+", x) if i != ""] input_gene_list = st.text_input("Input a list of multiple HGNC genes (enter comma separated):") gene_list = collect_genes(input_gene_list) # Function to convert DataFrame to CSV for download @st.cache_data def convert_df(df): return df.to_csv(index=False).encode('utf-8') if len(gene_list) > 1: # Filter for input gene list df = annotations[annotations.index.isin(gene_list)] df['Gene'] = df.index df.reset_index(drop=True, inplace=True) # Reorder columns to have "Gene" as the first column df = df[['Gene'] + [col for col in df.columns if col != 'Gene']] # Display the filtered results st.dataframe(df) output = df[['Gene']] csv = convert_df(output) # st.download_button("Download Filtered Colocalisation Results", csv, "filtered_colocalisation_results.csv", "text/csv", key='download-csv') # Add a new search box for filtering by disease name input_disease = st.text_input("Input a disease name to search in drug terms (partial match allowed):") if input_disease: # Search for partial matches in the "terms_drug" column df_disease_filtered = annotations[annotations['terms_drug'].str.contains(input_disease, case=False, na=False)] if not df_disease_filtered.empty: st.markdown(f"### Colocalisation results for disease: {input_disease}") df_disease_filtered['Gene'] = df_disease_filtered.index df_disease_filtered.reset_index(drop=True, inplace=True) # Reorder columns to have "Gene" as the first column df_disease_filtered = df_disease_filtered[['Gene'] + [col for col in df_disease_filtered.columns if col != 'Gene']] # Display filtered dataframe st.dataframe(df_disease_filtered) # Convert filtered dataframe to CSV for download csv_disease_filtered = convert_df(df_disease_filtered) # st.download_button("Download Filtered Colocalisation Results", csv_disease_filtered, "filtered_colocalisation_disease_results.csv", "text/csv", key='download-disease-csv') else: st.write(f"No results found for disease: {input_disease}") # Display individual gene details if a single gene is input input_gene = st.text_input("Input an individual HGNC gene:") if input_gene: df2 = annotations[annotations.index == input_gene] if not df2.empty: df2['Gene'] = df2.index df2.reset_index(drop=True, inplace=True) # Reorder columns to have "Gene" as the first column df2 = df2[['Gene'] + [col for col in df2.columns if col != 'Gene']] st.dataframe(df2) # Provide a link to the gene's DrugnomeAI page url = f"https://astrazeneca-cgr-publications.github.io/DrugnomeAI/geneview.html?gene={input_gene}" markdown_link = f"[{input_gene} druggability in DrugnomeAI]({url})" st.markdown(markdown_link, unsafe_allow_html=True) else: st.write("Gene not found in the dataset.") # Display the entire dataset with download option st.markdown("### All Colocalisation Results Interacting with Drugs") df_total_output = annotations.copy() df_total_output['Gene'] = df_total_output.index df_total_output.reset_index(drop=True, inplace=True) # Reorder columns to have "Gene" as the first column df_total_output = df_total_output[['Gene'] + [col for col in df_total_output.columns if col != 'Gene']] st.dataframe(df_total_output) csv = convert_df(df_total_output) # st.download_button("Download Complete Colocalisation Results", csv, "complete_colocalisation_results.csv", "text/csv", key='download-all-csv') # Protein interaction network visualization using STRINGDB_data.tsv st.markdown( "

Protein Interaction Networks of Colocalising Drug Targets

", unsafe_allow_html=True ) # Description text st.markdown( """ - The colour of each node represents its degree (number of direct connections it has with other nodes). - The size of each node represents its betweenness centrality (larger nodes play a more central role in the network, facilitating communication between other proteins). - Node edges/connections are colour-coded by confidence of PPI (lighter colours (brighter) represent stronger interactions). - Genes that interact with cardiovascular drugs are highlighted with a bold black outline. """, unsafe_allow_html=True ) # Load STRINGDB dataset ppi_data = pd.read_csv("STRINGdb_data.tsv", sep='\t') # Create a graph from the STRINGDB PPI data G = nx.Graph() # Add edges to the graph based on PPI data for index, row in ppi_data.iterrows(): G.add_edge(row['node1'], row['node2'], weight=row['combined_score']) # Function to rescale values to a given range def rescale(l, newmin, newmax): arr = list(l) return [(x - min(arr)) / (max(arr) - min(arr)) * (newmax - newmin) + newmin for x in arr] # Use the plasma colormap graph_colormap = plt.get_cmap('plasma', 12) # Node color varies with Degree c = rescale([G.degree(v) for v in G], 0.0, 0.9) c = [graph_colormap(i) for i in c] # Node size varies with betweeness centrality - map to range [1500, 7000] bc = nx.betweenness_centrality(G) s = rescale([v for v in bc.values()], 1500, 7000) # Edge width shows 1 - weight (to convert cost back to strength of interaction) ew = rescale([float(G[u][v]['weight']) for u, v in G.edges], 0.1, 4) ec = rescale([float(G[u][v]['weight']) for u, v in G.edges], 0.1, 1) ec = [graph_colormap(i) for i in ec] # Adjust spring_layout parameters to bring the networks closer together pos = nx.spring_layout(G, k=0.5) # Prepare to highlight genes with "Cardiovascular_Drug" as "Yes" highlighted_nodes = annotations[annotations['Cardiovascular_Drug'] == 'Yes'].index # Draw the network plot plt.figure(figsize=(19, 9), facecolor='white') # Draw the nodes with black outline for highlighted ones nx.draw_networkx_nodes(G, pos, node_color=c, node_size=s, edgecolors=['black' if node in highlighted_nodes else 'none' for node in G], linewidths=2) # Draw the edges nx.draw_networkx_edges(G, pos, edge_color=ec, width=ew) # Draw node labels with customized font color based on degree # Draw node labels with customized font color based on degree for node, (x, y) in pos.items(): # Determine font color font_color = 'white' if G.degree(node) < np.median([G.degree(n) for n in G]) else 'black' # Dynamically adjust font size for nodes with white text (smaller font size to fit inside node) if font_color == 'white': font_size = min(s[list(G.nodes).index(node)] * 0.01, 10) # Adjust the multiplier and limit font size else: font_size = 12 # Default size for black font plt.text(x, y, node, fontsize=font_size, fontweight='bold', ha='center', va='center', color=font_color) # Add a colorbar to represent the node degree color scale sm = plt.cm.ScalarMappable(cmap=graph_colormap, norm=plt.Normalize(vmin=0, vmax=1)) sm.set_array([]) cbar = plt.colorbar(sm) cbar.set_label('Node Degree (Higher = More Connected)', fontsize=12) plt.axis('off') # Display the network plot in the Streamlit app directly st.pyplot(plt)