Spaces:
Sleeping
Sleeping
########### AGENT: Clincialtrial.gov ################################################################################################### | |
##Gradio App: TRIAL CONNECT | |
#Author: Tamer Chowdhury' Sept-Nov 2023 | |
#tamer.chowdhury@gmail.com | |
################################################################################################################################## | |
import gradio as gr | |
from gradio import Interface | |
from gradio import Dropdown | |
import io | |
import re | |
import pandas as pd | |
import textwrap | |
from IPython.display import display | |
import requests | |
from IPython.core.display import display_markdown | |
########### Clinical Trials. gov API for study fileds with Recruiting Trials Only ################################### | |
import aiohttp | |
import asyncio | |
async def fetch(session, url, params): | |
async with session.get(url, params=params) as response: | |
return await response.text() | |
############################################################################################################################################################# | |
async def get_nct_ids (lead_sponsor_name=None, disease_area=None, overall_status= None, location_country=None, NCTId=None, max_records=None, blocks=30): | |
base_url = "https://clinicaltrials.gov/api/query/study_fields" | |
fields = "NCTId,OrgStudyId,BriefTitle,Condition,Phase,OverallStatus,PrimaryCompletionDate,EnrollmentCount,StudyType,StudyPopulation,\ | |
LocationCountry,LocationCity,DesignPrimaryPurpose,LocationFacility,ArmGroupLabel,LeadSponsorName,InterventionName,PrimaryOutcomeMeasure,\ | |
StartDate,CollaboratorName" | |
params = { | |
"fields": fields, | |
"fmt": "csv" | |
} | |
## Status is Recruiting | |
#overall_status='Recruiting' | |
#overall_status='Not yet recruiting' | |
############################# | |
if NCTId: | |
params["expr"] = f"{NCTId}" | |
else: | |
# overall_status = overall_status.replace(" ", "+") | |
if disease_area: | |
disease_area = disease_area.replace(" ", "+") | |
if lead_sponsor_name: | |
lead_sponsor_name = lead_sponsor_name.replace(" ", "+") | |
if location_country: | |
location_country = location_country.replace(" ", "+") | |
if disease_area and lead_sponsor_name: | |
# params["expr"] = f"{disease_area}+AND+{overall_status}+AND+{lead_sponsor_name}" | |
params["expr"] = f"{disease_area}+AND+{lead_sponsor_name}" | |
elif disease_area: | |
# params["expr"] = f"{disease_area}+AND+{overall_status}" | |
params["expr"] = f"{disease_area}" | |
elif lead_sponsor_name: | |
# params["expr"] = f"{lead_sponsor_name}+AND+{overall_status}" | |
params["expr"] = f"{lead_sponsor_name}" | |
### to ensure it starts from 1 to 1000 and increment | |
all_trials = [] | |
max_trials_per_request = 1000 | |
async with aiohttp.ClientSession() as session: | |
tasks = [] | |
for i in range(1, blocks + 1): # Change the range to start from 1 | |
min_rank = (i - 1) * max_trials_per_request + 1 # Subtract 1 from i to get the correct min_rank | |
# print( min_rank ) | |
max_rank = i * max_trials_per_request # Simplify the max_rank calculation | |
# print( max_rank ) | |
params_copy = params.copy() | |
params_copy["min_rnk"] = min_rank | |
params_copy["max_rnk"] = max_rank | |
task = fetch(session, base_url, params_copy) | |
tasks.append(task) | |
responses = await asyncio.gather(*tasks) | |
# Create a list to store the DataFrames | |
trials_dfs = [] | |
# Fetch data for each block and store it in a separate DataFrame | |
for i, response in enumerate(responses): | |
skip_rows = 10 if not NCTId else 9 | |
if len(response.strip().splitlines()) > 1: | |
interim_df = pd.read_csv(io.StringIO(response), skiprows=skip_rows) | |
else: | |
interim_df = pd.DataFrame() | |
# Print the number of records in the current DataFrame | |
print(f"Number of records in interim_df{i + 1}: {len(interim_df)}") # renamed to interim_df | |
# Add the current DataFrame to the list | |
trials_dfs.append(interim_df) # renamed to interim_df | |
# Concatenate all the DataFrames | |
trials_final_df = pd.concat(trials_dfs, ignore_index=True) | |
# Print the number of records in the final DataFrame | |
print(f"Number of records returned from all the Block Request: {len(trials_final_df)}") | |
recruiting_trials = trials_final_df | |
recruiting_trials_list = [] | |
############################# | |
if NCTId: | |
############################################## | |
for index, row in recruiting_trials.iterrows(): | |
# print(f"Checking row {index}: OverallStatus={row['OverallStatus']}, provided overall_status={overall_status}") | |
#if not NCTId or (NCTId and row['OverallStatus'] == overall_status): | |
# if row['OverallStatus'] == overall_status: | |
trial_info = {'NCTId': row['NCTId'], | |
'Phase': row['Phase'], | |
'OrgStudyId': row['OrgStudyId'], | |
'Status': row['OverallStatus'], | |
'Condition': row['Condition'], | |
'CompletionDate': row['PrimaryCompletionDate'], | |
'EnrollmentCount': row['EnrollmentCount'], | |
'StudyType': row['StudyType'], | |
'Arm': row['ArmGroupLabel'], | |
'Drug': row['InterventionName'], | |
'Country': row['LocationCountry'], | |
'City': row['LocationCity'], | |
'Site': row['LocationFacility'], | |
'StudyPopulation': row['StudyPopulation'], | |
'Sponsor': row['LeadSponsorName'], | |
'Collaborator': row['CollaboratorName'], | |
'StartDate': row['StartDate'], | |
'PrimaryMeasure': row['PrimaryOutcomeMeasure'], | |
'Purpose': row['DesignPrimaryPurpose'], | |
'BriefTitle': row['BriefTitle']} | |
# Print the overall_status and the length of recruiting_trials_list | |
#print(f"Overall status: {overall_status}") | |
#print(f"Number of trials with status '{overall_status}': {len(recruiting_trials_list)}") | |
recruiting_trials_list.append(trial_info) | |
else: | |
############################################## | |
for index, row in recruiting_trials.iterrows(): | |
# print(f"Checking row {index}: OverallStatus={row['OverallStatus']}, provided overall_status={overall_status}") | |
#if not NCTId or (NCTId and row['OverallStatus'] == overall_status): | |
if row['OverallStatus'] == overall_status: | |
trial_info = {'NCTId': row['NCTId'], | |
'Phase': row['Phase'], | |
'OrgStudyId': row['OrgStudyId'], | |
'Status': row['OverallStatus'], | |
'Condition': row['Condition'], | |
'CompletionDate': row['PrimaryCompletionDate'], | |
'EnrollmentCount': row['EnrollmentCount'], | |
'StudyType': row['StudyType'], | |
'Arm': row['ArmGroupLabel'], | |
'Drug': row['InterventionName'], | |
'Country': row['LocationCountry'], | |
'City': row['LocationCity'], | |
'Site': row['LocationFacility'], | |
'StudyPopulation': row['StudyPopulation'], | |
'Sponsor': row['LeadSponsorName'], | |
'Collaborator': row['CollaboratorName'], | |
'StartDate': row['StartDate'], | |
'PrimaryMeasure': row['PrimaryOutcomeMeasure'], | |
'Purpose': row['DesignPrimaryPurpose'], | |
'BriefTitle': row['BriefTitle']} | |
# Print the overall_status and the length of recruiting_trials_list | |
#print(f"Overall status: {overall_status}") | |
#print(f"Number of trials with status '{overall_status}': {len(recruiting_trials_list)}") | |
recruiting_trials_list.append(trial_info) | |
return recruiting_trials_list | |
########################################################################################################################################################## | |
######################################################################################################### | |
## API For Inclusions | |
import requests | |
import re | |
def get_formatted_inclusion_criteria(nct_id): | |
base_url = "https://clinicaltrials.gov/api/query/full_studies?expr=" | |
study_fields = "&fields=EligibilityCriteria" | |
fmt = "&fmt=json" | |
# Construct the API URL | |
api_url = f"{base_url}{nct_id}{study_fields}{fmt}" | |
# Send the API request and parse the JSON response | |
response = requests.get(api_url) | |
data = response.json() | |
# Extract the inclusion criteria text | |
try: | |
eligibility_criteria = data['FullStudiesResponse']['FullStudies'][0]['Study']['ProtocolSection']['EligibilityModule']['EligibilityCriteria'] | |
#inclusion_criteria = re.split(r'\b(?:Exclusion Criteria:|exclusion criteria)\b', eligibility_criteria, flags=re.IGNORECASE)[0].strip() | |
#inclusion_criteria = re.split(r'\b(?:Exclusion Criteria)\b', eligibility_criteria, flags=re.IGNORECASE)[0].strip() | |
inclusion_criteria = re.split(r'\b(?:Exclusion Criteria)\b', eligibility_criteria)[0].strip() | |
# Split the inclusion criteria into a list | |
inclusions = re.split('\n+', inclusion_criteria) | |
# Remove "Inclusion criteria" text if it's present in the list | |
inclusions = [inclusion for inclusion in inclusions if not re.search(r'\bInclusion\s*Criteria\b', inclusion, flags=re.IGNORECASE)] | |
# Format the list as a numbered list with periods | |
formatted_inclusions = [f"{i+1}. {inclusion.strip()}." for i, inclusion in enumerate(inclusions)] | |
# Join the list into a single string | |
return "\n".join(formatted_inclusions) | |
except (IndexError, KeyError): | |
print(f"Inclusion criteria not found for Trial NCT ID: {nct_id}") | |
return None | |
## ############################API For Exclusions################################################################################################################################################### | |
def get_formatted_exclusion_criteria(nct_id): | |
base_url = "https://clinicaltrials.gov/api/query/full_studies?expr=" | |
study_fields = "&fields=EligibilityCriteria" | |
fmt = "&fmt=json" | |
# Construct the API URL | |
api_url = f"{base_url}{nct_id}{study_fields}{fmt}" | |
# Send the API request and parse the JSON response | |
response = requests.get(api_url) | |
data = response.json() | |
# Extract the exclusion criteria text | |
try: | |
eligibility_criteria = data['FullStudiesResponse']['FullStudies'][0]['Study']['ProtocolSection']['EligibilityModule']['EligibilityCriteria'] | |
#exclusion_criteria = re.split("(?i)(?:^|\n)exclusion criteria", eligibility_criteria)[-1].strip() | |
#exclusion_criteria = re.split(r'\b(?:Exclusion Criteria|exclusion criteria)\b', eligibility_criteria, flags=re.IGNORECASE)[1].strip() | |
#exclusion_criteria = re.split(r'\b(?:Exclusion Criteria)\b', eligibility_criteria, flags=re.IGNORECASE)[1].strip() | |
exclusion_criteria = re.split(r'\b(?:Exclusion Criteria)\b', eligibility_criteria)[1].strip() | |
# Split the exclusion criteria into a list | |
exclusions = re.split('\n+', exclusion_criteria) | |
# Remove "Exclusion criteria" text if it's present in the list | |
exclusions = [exclusion for exclusion in exclusions if not re.search(r'\bExclusion\s*Criteria\b', exclusion, flags=re.IGNORECASE)] | |
# Format the list as a numbered list with periods | |
formatted_exclusions = [f"{i+1}. {exclusion.strip()}." for i, exclusion in enumerate(exclusions)] | |
# Join the list into a single string | |
return "\n".join(formatted_exclusions) | |
except (IndexError, KeyError): | |
print(f"Exclusion criteria not found for NCT ID: {nct_id}") | |
return None | |
################################# Apply CSS Style to HTML Table ############################################################################################################## | |
def dataframe_to_html_table(df): | |
custom_css = """ | |
<style> | |
.table-container { | |
width: 100%; | |
max-width: 100%; | |
margin-bottom: 1rem; | |
overflow-x: auto; | |
overflow-y: auto; | |
max-height: 400px; | |
} | |
.table { | |
width: 100%; | |
max-width: 100%; | |
margin-bottom: 1rem; | |
border-collapse: collapse; | |
white-space: nowrap; | |
} | |
.table-striped tbody tr:nth-of-type(odd) { | |
background-color: rgba(0, 0, 0, 0.05); | |
} | |
th, td { | |
padding: 0.75rem; | |
vertical-align: top; | |
border-top: 1px solid #dee2e6; | |
white-space: normal; | |
overflow-wrap: break-word; | |
max-width: 150px; | |
} | |
/* Set the width of the 'BriefTitle' and 'Arm' columns */ | |
.table td:nth-child(3), .table td:nth-child(11) { | |
width: 300px; | |
} | |
thead th { | |
position: sticky; | |
top: 0; | |
background-color: white; | |
z-index: 1; | |
} | |
thead th::before { | |
content: ""; | |
position: absolute; | |
left: 0; | |
width: 100%; | |
height: 100%; | |
border-right: 1px solid #dee2e6; | |
background-color: white; | |
z-index: -1; | |
} | |
</style> | |
""" | |
return custom_css + '<div class="table-container">' + df.to_html(classes="table table-striped", index=False, border=0) + '</div>' | |
################################################################################################################################## | |
def format_summary_stats(summary): | |
formatted_html = f""" | |
<style> | |
#summary-output {{ | |
font-weight: bold; | |
font-size: 20px; | |
color: black; | |
}} | |
</style> | |
<div id="summary-output">{summary}</div> | |
""" | |
return formatted_html | |
############################ End of Style ############################################################################################# | |
############### Functions to Process the Dataframes of Disease, Conditions, Trial Details#################################### | |
# parse the conditions | |
import re | |
import pandas as pd | |
####################################################################################################### | |
def split_conditions(df, column_to_split): | |
# Create a list to store the new rows | |
new_rows = [] | |
# Iterate through each row in the original dataframe | |
for index, row in df.iterrows(): | |
# Split the column value by comma or pipe and create a new row for each unique condition | |
for condition in re.split(',|\|', str(row[column_to_split])): | |
new_row = {col: row[col] if col != column_to_split else condition.strip() for col in df.columns} | |
new_rows.append(new_row) | |
# Create a new dataframe from the list of new rows | |
temp_df = pd.DataFrame(new_rows) | |
return temp_df | |
######################################################################################################################### | |
def split_drug(df, column_to_split): | |
# Create a list to store the new rows | |
new_rows = [] | |
# Iterate through each row in the original dataframe | |
for index, row in df.iterrows(): | |
# Split the column value by comma or pipe and create a new row for each unique condition | |
for drug in re.split(',|\|', str(row[column_to_split])): | |
new_row = {col: row[col] if col != column_to_split else drug.strip() for col in df.columns} | |
new_rows.append(new_row) | |
# Create a new dataframe from the list of new rows | |
temp_df = pd.DataFrame(new_rows) | |
return temp_df | |
############################################################################################################################################ | |
############################################################################################# | |
def split_columns(df, columns_to_split): | |
# Create a list to store the new dataframes | |
new_dfs = [] | |
# Iterate through each row in the original dataframe | |
for index, row in df.iterrows(): | |
# Create a list of dictionaries to store the split values | |
split_rows = [] | |
# Find the maximum number of pipe-separated values in the columns to split | |
max_splits = max([len(str(row[col]).split('|')) for col in columns_to_split]) | |
# Iterate through the number of splits | |
for i in range(max_splits): | |
# Create a dictionary to store the split values for each column | |
split_row = {} | |
# Iterate through the columns to split | |
for col in columns_to_split: | |
# Split the column value and store the ith value if it exists, otherwise store None | |
split_row[col] = str(row[col]).split('|')[i] if i < len(str(row[col]).split('|')) else None | |
# Add the non-split columns to the dictionary | |
for col in df.columns: | |
if col not in columns_to_split: | |
split_row[col] = row[col] | |
# Append the dictionary to the list of dictionaries | |
split_rows.append(split_row) | |
# Convert the list of dictionaries to a dataframe and append it to the list of new dataframes | |
new_dfs.append(pd.DataFrame(split_rows)) | |
# Concatenate all the new dataframes | |
temp_df = pd.concat(new_dfs, ignore_index=True) | |
# Reorder the columns in the temporary dataframe | |
temp_df = temp_df[df.columns] | |
return temp_df | |
################## Interventional, Observational Trials Lead Sponsor Counts################################################## | |
def calculate_summary_stats(df, sponsor): | |
study_types = ["Interventional", "Observational"] | |
summary_stats = [] | |
sponsor_name = sponsor if sponsor else "All Lead Sponsors" | |
for study_type in study_types: | |
df_study_type = df[df['StudyType'] == study_type].copy() | |
# Convert the 'EnrollmentCount' column to numeric | |
df_study_type['EnrollmentCount'] = pd.to_numeric(df_study_type['EnrollmentCount'], errors='coerce') | |
num_trials = len(df_study_type['NCTId'].unique()) | |
unique_conditions = df_study_type['Condition'].unique() | |
num_conditions = len([condition for condition in unique_conditions if condition != 'Healthy' and condition != 'Adult']) | |
total_patients = df_study_type.groupby('NCTId')['EnrollmentCount'].first().sum() | |
formatted_total_patients = format(int(total_patients), ',') | |
summary_stats.append(f"{num_trials} {study_type} Trials, \ | |
{num_conditions} Conditions, \ | |
{formatted_total_patients} Planned Patients.") | |
#return f"{sponsor_name} - As Lead Sponsor Recruiting For: <br>" + "<br>".join(summary_stats) | |
return f"{sponsor_name} - As Lead Sponsor: <br>" + "<br>".join(summary_stats) | |
############################################################################################################################################ | |
def calculate_summary_stats_collb(df, sponsor): | |
study_types = ["Interventional", "Observational"] | |
summary_stats = [] | |
sponsor_name = sponsor if sponsor else "All Collaborators" | |
for study_type in study_types: | |
df_study_type = df[df['StudyType'] == study_type].copy() | |
# Convert the 'EnrollmentCount' column to numeric | |
df_study_type['EnrollmentCount'] = pd.to_numeric(df_study_type['EnrollmentCount'], errors='coerce') | |
num_trials = len(df_study_type['NCTId'].unique()) | |
unique_conditions = df_study_type['Condition'].unique() | |
num_conditions = len([condition for condition in unique_conditions if condition != 'Healthy' and condition != 'Adult']) | |
total_patients = df_study_type.groupby('NCTId')['EnrollmentCount'].first().sum() | |
formatted_total_patients = format(int(total_patients), ',') | |
summary_stats.append(f"{num_trials} {study_type} Trials, \ | |
{num_conditions} Conditions, \ | |
{formatted_total_patients} Planned Patients.") | |
return f"{sponsor_name} - With Collaborators Recruiting For: <br>" + "<br>".join(summary_stats) | |
################################################################################################################## | |
def calculate_summary_stats_sites(df, sponsor, country): | |
#Filter the data frame by the country if a country is provided | |
if country: | |
df = df[df['Country'] == country] | |
num_trials = len(df['NCTId'].unique()) | |
# Group the data frame by NCTId and StudyType in the desired order | |
grouped_df = df.groupby(['NCTId', 'StudyType']).first().reset_index() | |
# Convert the 'EnrollmentCount' column to numeric | |
grouped_df['EnrollmentCount'] = pd.to_numeric(grouped_df['EnrollmentCount'], errors='coerce') | |
# Count the number of unique NCTIds for each StudyType | |
interventional_count = len(grouped_df[grouped_df['StudyType'] == 'Interventional']['NCTId'].unique()) | |
observational_count = len(grouped_df[grouped_df['StudyType'] == 'Observational']['NCTId'].unique()) | |
# Count the number of unique countries for each StudyType | |
interventional_countries = df[df['StudyType'] == 'Interventional']['Country'].nunique() | |
observational_countries = df[df['StudyType'] == 'Observational']['Country'].nunique() | |
# Count the number of unique sites for each StudyType, grouped by Country, City, and Site | |
interventional_grouped = df[df['StudyType'] == 'Interventional'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0] | |
observational_grouped = df[df['StudyType'] == 'Observational'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0] | |
# Calculate the sum of enrollment counts for each StudyType | |
interventional_patients = int(grouped_df[grouped_df['StudyType'] == 'Interventional']['EnrollmentCount'].sum()) | |
observational_patients = int(grouped_df[grouped_df['StudyType'] == 'Observational']['EnrollmentCount'].sum()) | |
formatted_interventional_patients = format(interventional_patients, ',') | |
formatted_observational_patients = format(observational_patients, ',') | |
sponsor_name = sponsor if sponsor else "All Sponsors" | |
country_name = country if country else "All Countries" | |
return f"{sponsor_name} <br> {interventional_count} Interventional Trials, in {interventional_countries} Country, at {interventional_grouped} Sites, \ | |
Recruiting: {formatted_interventional_patients} Planned Patients. <br>\ | |
{observational_count} Observational Trials, in {observational_countries} Country, at {observational_grouped} Sites" | |
#{observational_count} Observational Trials, in {observational_countries} Country, at {observational_grouped} Sites, Recruiting: {formatted_observational_patients} Planned Patients." | |
################################################ GRADIO STARTS HERE ######################################################### | |
#Wrapper Function called from Interfce to get input , output | |
async def gradio_wrapper_nct(sponsor=None, condition=None, NCTId=None, country=None, status = None): | |
# Check if disease, condition, sponsor, and NCTId are provided | |
if condition and sponsor: | |
recruiting_trials = await get_nct_ids(disease_area=condition, lead_sponsor_name=sponsor, overall_status = status) | |
elif condition: | |
recruiting_trials = await get_nct_ids(disease_area=condition, overall_status = status) | |
elif sponsor: | |
recruiting_trials = await get_nct_ids(lead_sponsor_name=sponsor, overall_status = status) | |
elif NCTId: | |
recruiting_trials = await get_nct_ids(NCTId=NCTId, overall_status = status) | |
# print("Recruiting trials for NCTId:", recruiting_trials) # Add this line to debug | |
else: | |
return pd.DataFrame(), "No condition, sponsor, or trial NCT Id provided" | |
trial_info_list = [] | |
for trial in recruiting_trials: | |
trial_info = {'Sponsor': trial['Sponsor'], | |
'Collaborator': trial['Collaborator'], | |
'Status': trial['Status'], | |
'Drug': trial['Drug'], | |
'StudyType': trial['StudyType'], | |
'Phase': trial['Phase'], | |
'Site': trial['Site'], | |
'Country': trial['Country'], | |
'City': trial['City'], | |
'NCTId': trial['NCTId'], | |
'OrgStudyId': trial['OrgStudyId'], | |
'Condition': trial['Condition'], | |
'StartDate': trial['StartDate'], | |
'CompletionDate': trial['CompletionDate'], | |
'EnrollmentCount': trial['EnrollmentCount'], | |
'PrimaryMeasure': trial['PrimaryMeasure'], | |
'Purpose': trial['Purpose'], | |
'Arm': trial['Arm'], | |
'BriefTitle': trial['BriefTitle']} | |
trial_info_list.append(trial_info) | |
# Check if trial_info_list is empty | |
if not trial_info_list: | |
return None, None, None, None, None, None | |
import pandas as pd | |
clinical_trials_gov = pd.DataFrame(trial_info_list, columns=[ 'NCTId','OrgStudyId','Status','BriefTitle','Condition', 'Drug','Phase','StudyType','StartDate', 'CompletionDate','EnrollmentCount', 'Arm','Purpose', 'PrimaryMeasure', \ | |
'Sponsor','Collaborator']) | |
## Take care of NaN | |
clinical_trials_gov.fillna("Not Available", inplace=True) | |
clinical_trials_gov = clinical_trials_gov.sort_values(by=[ 'StudyType', 'Phase' , 'CompletionDate','EnrollmentCount'], ascending=[ True, False,True,False]) | |
# Convert the DataFrame to an HTML table | |
html_table = dataframe_to_html_table(clinical_trials_gov) | |
# now move to include country | |
#clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId', 'Site','Country','City','Zip','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount']) | |
clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId','OrgStudyId','Status', 'BriefTitle', 'Site','Country','City','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount']) | |
## Address NaN | |
clinical_trials_gov_add.fillna("Not Available", inplace=True) | |
clinical_trials_gov_add = clinical_trials_gov_add.sort_values(by=[ 'StudyType', 'Phase' , 'EnrollmentCount','CompletionDate', 'Country'], ascending=[ True, False,False,True,True]) | |
# print("Preparing dataframe before split") | |
# Create a DataFrame for conditions | |
clinical_trials_gov_conditions = pd.DataFrame(trial_info_list, columns=['NCTId', 'BriefTitle','OrgStudyId','Phase','Status','StudyType','Condition', 'Drug','StartDate', 'CompletionDate','EnrollmentCount','Sponsor', 'Collaborator']) | |
# Split the 'Condition' column in clinical_trials_gov_conditions | |
clinical_trials_gov_conditions = split_conditions(clinical_trials_gov_conditions, 'Condition') | |
# print("Dataframe after condition split") | |
#address NaN | |
clinical_trials_gov_conditions.fillna("Not Available", inplace=True) | |
# Create a DataFrame for drugs | |
clinical_trials_gov_drugs = pd.DataFrame(trial_info_list, columns=['Status','NCTId', 'BriefTitle','OrgStudyId','Status','Phase','StudyType','Condition', 'Drug','StartDate', 'CompletionDate','EnrollmentCount','Sponsor', 'Collaborator']) | |
# Split the 'Drug' column in clinical_trials_gov_conditions | |
clinical_trials_gov_drugs = split_conditions(clinical_trials_gov_drugs, 'Drug') | |
clinical_trials_gov_drugs.fillna("Not Available", inplace=True) | |
############################################## ########################################################################################## | |
# Filter and sort the conditions DataFrame | |
# Add the filtering condition for Sponsor and Collaborator | |
# Add the filtering condition for Sponsor and Collaborator | |
if sponsor: | |
df1 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Sponsor'] == sponsor] | |
df1_1 = df1[(df1['Collaborator'] != 'Not Available') & | |
(~df1['Collaborator'].isnull())] | |
df2 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'] == sponsor] | |
### Now move primary sponsors to collaborators: ( that is non BI sponsor to replace BI as now collaborator) | |
df2['Collaborator'] = df2['Sponsor'] | |
df3 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)] | |
df3 = df3[df3['Collaborator'] != sponsor] | |
#print(df3) | |
## Now add sponsors to collaborators | |
df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator'] | |
#df3['Collaborator'] = df3['Sponsor'] | |
clinical_trials_gov_conditions = df1 | |
clinical_trials_gov_conditions_collb = pd.concat([df1_1,df2, df3], ignore_index=True) | |
if sponsor: | |
df1 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Sponsor'] == sponsor] | |
df1_1 = df1[(df1['Collaborator'] != 'Not Available') & | |
(~df1['Collaborator'].isnull())] | |
########################################################################################################## | |
df2 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'] == sponsor] | |
### Now copy sponsors to collaborators: | |
df2['Collaborator'] = df2['Sponsor'] | |
########################################################################################################## | |
df3 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)] | |
df3 = df3[df3['Collaborator'] != sponsor] | |
## Now add sponsors to collaborators | |
df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator'] | |
#df3['Collaborator'] = df3['Sponsor'] | |
clinical_trials_gov_drugs = df1 | |
clinical_trials_gov_drugs_collb = pd.concat([df1_1,df2, df3], ignore_index=True) | |
############################################################################################################################################### | |
# Convert the filtered DataFrame to an HTML table | |
#html_table_add = dataframe_to_html_table(country_site_city_df) | |
# Convert the DataFrame to an HTML table | |
html_table_conditions = dataframe_to_html_table(clinical_trials_gov_conditions) | |
# Convert the DataFrame to an HTML table | |
html_table_drugs = dataframe_to_html_table(clinical_trials_gov_drugs) | |
if sponsor: | |
# Convert the DataFrame to an HTML table | |
html_table_conditions_collb = dataframe_to_html_table(clinical_trials_gov_conditions_collb) | |
# Convert the DataFrame to an HTML table | |
html_table_drugs_collb = dataframe_to_html_table(clinical_trials_gov_drugs_collb) | |
else: | |
empty_df = pd.DataFrame() | |
html_table_conditions_collb = empty_df.to_html(index=False, header=True, border=0, table_id="empty_table") | |
html_table_drugs_collb = empty_df.to_html(index=False, header=True, border=0, table_id="empty_table") | |
######################################################################################################################################## | |
# Calculate the summary statistics | |
# print("Calcualting Stats") | |
summary_stats_pre = calculate_summary_stats(clinical_trials_gov_conditions, sponsor) | |
summary_stats = format_summary_stats(summary_stats_pre) | |
if sponsor: | |
summary_stats_pre_collb = calculate_summary_stats_collb(clinical_trials_gov_conditions_collb, sponsor) | |
summary_stats_collb = format_summary_stats(summary_stats_pre_collb) | |
else: | |
summary_stats_collb ='' | |
# Calculate Site Summary | |
#summary_stats_sites_pre = calculate_summary_stats_sites(country_site_city_df, sponsor, country) | |
#summary_stats_sites = format_summary_stats(summary_stats_sites_pre) | |
# print("Done Stats") | |
return summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs | |
#Wrapper Function called from Interfce to get input , output | |
async def gradio_wrapper_nct_spn(sponsor=None, condition=None, NCTId=None, country=None, status = None): | |
# Check if disease, condition, sponsor, and NCTId are provided | |
if condition and sponsor: | |
recruiting_trials = await get_nct_ids(disease_area=condition, lead_sponsor_name=sponsor, overall_status= status) | |
elif condition: | |
recruiting_trials = await get_nct_ids(disease_area=condition, overall_status= status) | |
elif sponsor: | |
recruiting_trials = await get_nct_ids(lead_sponsor_name=sponsor, overall_status= status) | |
elif NCTId: | |
recruiting_trials = await get_nct_ids(NCTId=NCTId, overall_status = status) | |
# print("Recruiting trials for NCTId:", recruiting_trials) # Add this line to debug | |
else: | |
return pd.DataFrame(), "No condition, sponsor, or trial NCT Id provided" | |
trial_info_list = [] | |
for trial in recruiting_trials: | |
trial_info = {'Sponsor': trial['Sponsor'], | |
'Collaborator': trial['Collaborator'], | |
'Drug': trial['Drug'], | |
'StudyType': trial['StudyType'], | |
'Phase': trial['Phase'], | |
'Status': trial['Status'], | |
'Site': trial['Site'], | |
'Country': trial['Country'], | |
'City': trial['City'], | |
# 'Zip': trial['Zip'], | |
'NCTId': trial['NCTId'], | |
'OrgStudyId': trial['OrgStudyId'], | |
'Condition': trial['Condition'], | |
'StartDate': trial['StartDate'], | |
'CompletionDate': trial['CompletionDate'], | |
'EnrollmentCount': trial['EnrollmentCount'], | |
'PrimaryMeasure': trial['PrimaryMeasure'], | |
'Purpose': trial['Purpose'], | |
'Arm': trial['Arm'], | |
'BriefTitle': trial['BriefTitle']} | |
trial_info_list.append(trial_info) | |
# Check if trial_info_list is empty | |
if not trial_info_list: | |
return None, None, None, None, None, None | |
import pandas as pd | |
clinical_trials_gov = pd.DataFrame(trial_info_list, columns=[ 'NCTId','OrgStudyId','Status','BriefTitle','Condition', 'Drug','Phase','StudyType','StartDate', 'CompletionDate','EnrollmentCount', 'Arm','Purpose', 'PrimaryMeasure', \ | |
'Sponsor','Collaborator']) | |
## Take care of NaN | |
clinical_trials_gov.fillna("Not Available", inplace=True) | |
clinical_trials_gov = clinical_trials_gov.sort_values(by=[ 'StudyType', 'Phase' , 'CompletionDate','EnrollmentCount'], ascending=[ True, False,True,False]) | |
# Convert the DataFrame to an HTML table | |
html_table = dataframe_to_html_table(clinical_trials_gov) | |
# now move to include country | |
#clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId', 'Site','Country','City','Zip','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount']) | |
clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId','OrgStudyId','Status', 'BriefTitle','Site','Country','City','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount']) | |
## Address NaN | |
clinical_trials_gov_add.fillna("Not Available", inplace=True) | |
clinical_trials_gov_add = clinical_trials_gov_add.sort_values(by=[ 'StudyType', 'Phase' , 'EnrollmentCount','CompletionDate', 'Country'], ascending=[ True, False,False,True,True]) | |
# print("Preparing dataframe before split") | |
# Create a DataFrame for conditions | |
clinical_trials_gov_conditions = pd.DataFrame(trial_info_list, columns=['NCTId', 'OrgStudyId','Status','BriefTitle','Phase','StudyType','Condition', 'Drug','EnrollmentCount','Sponsor', 'Collaborator']) | |
# Split the 'Condition' column in clinical_trials_gov_conditions | |
clinical_trials_gov_conditions = split_conditions(clinical_trials_gov_conditions, 'Condition') | |
# print("Dataframe after condition split") | |
#address NaN | |
clinical_trials_gov_conditions.fillna("Not Available", inplace=True) | |
# Create a DataFrame for drugs | |
clinical_trials_gov_drugs = pd.DataFrame(trial_info_list, columns=['NCTId','OrgStudyId', 'Status','BriefTitle','Phase','StudyType','Condition', 'Drug','EnrollmentCount','Sponsor', 'Collaborator']) | |
# Split the 'Drug' column in clinical_trials_gov_conditions | |
clinical_trials_gov_drugs = split_conditions(clinical_trials_gov_drugs, 'Drug') | |
# print("Dataframe after drug split") | |
# Split the 'Condition' column in clinical_trials_gov_conditions | |
#clinical_trials_gov_drugs = split_conditions(clinical_trials_gov_drugs, 'Condition') | |
#print("Prepared dataframe after condition split on drug ? why ?") | |
#address NaN | |
clinical_trials_gov_drugs.fillna("Not Available", inplace=True) | |
# print("Preparing Country City Site split") | |
columns_to_split = ['Site', 'Country', 'City'] | |
#if not clinical_trials_gov_add.empty: | |
country_site_city_df = split_columns(clinical_trials_gov_add, columns_to_split) | |
## Ensure no NaN after Split | |
country_site_city_df.fillna("Not Available", inplace=True) | |
# print("Done Country City Site split") | |
# Filter the modified DataFrame by country if provided | |
if country: | |
# modified_df = modified_df[modified_df['Country'] == country] | |
country_site_city_df = country_site_city_df[country_site_city_df['Country'] == country] | |
############################################## ########################################################################################## | |
# Filter and sort the conditions DataFrame | |
# Add the filtering condition for Sponsor and Collaborator | |
# Add the filtering condition for Sponsor and Collaborator | |
if sponsor: | |
df1 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Sponsor'] == sponsor] | |
df1_1 = df1[(df1['Collaborator'] != 'Not Available') & | |
(~df1['Collaborator'].isnull())] | |
df2 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'] == sponsor] | |
### Now move primary sponsors to collaborators: ( that is non BI sponsor to replace BI as now collaborator) | |
df2['Collaborator'] = df2['Sponsor'] | |
df3 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)] | |
df3 = df3[df3['Collaborator'] != sponsor] | |
#print(df3) | |
## Now add sponsors to collaborators | |
df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator'] | |
#df3['Collaborator'] = df3['Sponsor'] | |
clinical_trials_gov_conditions = df1 | |
clinical_trials_gov_conditions_collb = pd.concat([df1_1,df2, df3], ignore_index=True) | |
if sponsor: | |
df1 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Sponsor'] == sponsor] | |
df1_1 = df1[(df1['Collaborator'] != 'Not Available') & | |
(~df1['Collaborator'].isnull())] | |
########################################################################################################## | |
df2 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'] == sponsor] | |
### Now copy sponsors to collaborators: | |
df2['Collaborator'] = df2['Sponsor'] | |
########################################################################################################## | |
df3 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)] | |
df3 = df3[df3['Collaborator'] != sponsor] | |
## Now add sponsors to collaborators | |
df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator'] | |
#df3['Collaborator'] = df3['Sponsor'] | |
clinical_trials_gov_drugs = df1 | |
clinical_trials_gov_drugs_collb = pd.concat([df1_1,df2, df3], ignore_index=True) | |
#country_site_city_df | |
if sponsor: | |
df1 = country_site_city_df[country_site_city_df['Sponsor'] == sponsor] | |
df1_1 = df1[(df1['Collaborator'] != 'Not Available') & (~df1['Collaborator'].isnull())] | |
df2 = country_site_city_df[country_site_city_df['Collaborator'] == sponsor] | |
### Now copy sponsors to collaborators: | |
df2['Collaborator'] = df2['Sponsor'] | |
#df3 = country_site_city_df[country_site_city_df['Collaborator'].str.contains(f'(\|)?{sponsor}(\|)?', na=False, flags=re.IGNORECASE, regex=True)] | |
df3 = country_site_city_df[country_site_city_df['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)] | |
df3 = df3[df3['Collaborator'] != sponsor] | |
## Now add sponsors to collaborators | |
#df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator'] | |
country_site_city_df = df1 | |
country_site_city_df_collb = pd.concat([ df1_1,df2, df3], ignore_index=True) | |
##################################################################################################################################################################### | |
## This only includes data for a specific sponsor and for the collaborators | |
# Convert the filtered DataFrame to an HTML table | |
html_table_add = dataframe_to_html_table(country_site_city_df) | |
# Convert the DataFrame to an HTML table | |
html_table_conditions = dataframe_to_html_table(clinical_trials_gov_conditions) | |
# Convert the DataFrame to an HTML table | |
html_table_drugs = dataframe_to_html_table(clinical_trials_gov_drugs) | |
###################################################################################################################################### | |
######################################################################################################################################## | |
# Calculate the summary statistics | |
summary_stats_pre = calculate_summary_stats(clinical_trials_gov_conditions, sponsor) | |
summary_stats = format_summary_stats(summary_stats_pre) | |
# Calculate Site Summary | |
summary_stats_sites_pre = calculate_summary_stats_sites(country_site_city_df, sponsor, country) | |
summary_stats_sites = format_summary_stats(summary_stats_sites_pre) | |
return summary_stats, html_table_conditions, html_table, summary_stats_sites,html_table_add,html_table_drugs | |
############################################################################################################################################################### | |
##### ################## Start Gradio Interface ######################################################################### | |
################################## Condition Icicle and Sponsor Map ######################## | |
################################################################### | |
import plotly.graph_objects as go | |
import pandas as pd | |
import numpy as np | |
################################################ TOP 20 Conditions###################################################### | |
########################################################################################################## | |
def plot_condition_sunburst (df): | |
# Filter the dataframe for 'StudyType' equal to "Interventional" | |
df = df[df['StudyType'] == "Interventional"] | |
# Convert 'Condition' names to uppercase | |
df['Condition'] = df['Condition'].str.upper() | |
# Filter out rows where 'Condition' is "OTHER", "OTHERS", "HEALTHY", or "ADULT" | |
df = df[~df['Condition'].isin(["OTHER", "OTHERS", "HEALTHY", "ADULT", "CHRONIC'"])] | |
# Group the data by 'Condition' and count the number of NCTId | |
df_count = df.groupby('Condition')['NCTId'].nunique().reset_index() | |
# Sort the DataFrame by Value in descending order and reset the index | |
df_count = df_count.sort_values('NCTId', ascending=False).reset_index(drop=True) | |
# Create a DataFrame for the top 30 conditions | |
top_30_conditions = df_count.head(20) | |
top_30_conditions = top_30_conditions.rename(columns={'NCTId': 'Number of Trials'}) | |
# Add 'Display' column to top_30_conditions and set its value to 'TOP 30' | |
top_30_conditions['Display'] = 'TOP 20' | |
# Create the icicle plot | |
icicle_fig = px.icicle(top_30_conditions, path=['Condition'], values='Number of Trials', | |
color='Condition', color_continuous_scale='RdBu', | |
custom_data=['Condition', 'Number of Trials']) | |
# Customize the hovertemplate | |
icicle_fig.update_traces(hovertemplate='%{customdata[0]}<br>Number of Trials: %{customdata[1]}') | |
# Customize the icicle plot | |
icicle_fig.update_layout( | |
title='Top 20 Conditions', | |
font=dict(family="Arial", size=14, color='black'), | |
width= 400, | |
height= 1000, | |
#autosize=True, | |
margin=dict(t=50, l=25, r=25, b=25) | |
) | |
return icicle_fig | |
############################################################ Conditions OTHERS ########### ############################################ | |
def plot_condition_others (df): | |
# Filter the dataframe for 'StudyType' equal to "Interventional" | |
df = df[df['StudyType'] == "Interventional"] | |
# Convert 'Condition' names to uppercase | |
df['Condition'] = df['Condition'].str.upper() | |
# Filter out rows where 'Condition' is "OTHER", "OTHERS", "HEALTHY", or "ADULT" | |
df = df[~df['Condition'].isin(["OTHER", "OTHERS", "HEALTHY", "ADULT"])] | |
# Group the data by 'Condition' and count the number of NCTId | |
df_count = df.groupby('Condition')['NCTId'].nunique().reset_index() | |
# Sort the DataFrame by Value in descending order and reset the index | |
df_count = df_count.sort_values('NCTId', ascending=False).reset_index(drop=True) | |
# Create a DataFrame for the top 30 conditions | |
top_30_conditions = df_count.head(20) | |
top_30_conditions = top_30_conditions.rename(columns={'NCTId': 'Number of Trials'}) | |
# Add 'Display' column to top_30_conditions and set its value to 'TOP 30' | |
top_30_conditions['Display'] = 'TOP 30' | |
# Create a DataFrame for the other conditions by filtering out the rows that are part of the top 30 conditions | |
other_conditions = df_count[~df_count['Condition'].isin(top_30_conditions['Condition'])] | |
# Add 'Display' column to other_conditions and set its value to 'OTHERS' | |
other_conditions['Display'] = 'OTHERS' | |
other_conditions = other_conditions.rename(columns={'NCTId': 'Number of Trials'}) | |
#print( other_conditions) | |
# Create the icicle plot | |
#icicle_fig = px.icicle( other_conditions, path=['Condition'], values='Number of Trials', | |
# color='Condition', color_continuous_scale='RdBu', | |
# hover_data=['Condition']) | |
# Create the icicle plot | |
icicle_fig = px.icicle(other_conditions, path=['Condition'], values='Number of Trials', | |
color='Condition', color_continuous_scale='RdBu', | |
custom_data=['Condition', 'Number of Trials']) | |
# Customize the hovertemplate | |
icicle_fig.update_traces(hovertemplate='%{customdata[0]}<br>Number of Trials: %{customdata[1]}') | |
# Customize the icicle plot | |
icicle_fig.update_layout( | |
title='Other Conditions', | |
font=dict(family="Arial", size=14, color='black'), | |
width= 400, | |
height=1000, | |
# autosize=True, | |
margin=dict(t=50, l=25, r=25, b=25) | |
) | |
return icicle_fig | |
################################################################################################################################################### | |
def wrap_text(text, max_chars_per_line): | |
words = text.split() | |
lines = [] | |
current_line = [] | |
for word in words: | |
if len(' '.join(current_line + [word])) <= max_chars_per_line: | |
current_line.append(word) | |
else: | |
lines.append(' '.join(current_line)) | |
current_line = [word] | |
lines.append(' '.join(current_line)) | |
return '<br>'.join(lines) | |
##################################################### Sponsor Counts ########################################### | |
################################################### ############################################################ | |
def wrap_text(text, max_chars_per_line): | |
return '<br>'.join(textwrap.wrap(text, max_chars_per_line)) | |
def plot_sponsor_collaborator_tree_map(df): | |
# Filter the dataframe for 'StudyType' equal to "Interventional" | |
df = df[df['StudyType'] == "Interventional"] | |
# Group the data by 'Sponsor' and 'Collaborator' and count the number of unique NCTId | |
df_count = df.groupby(['Sponsor', 'Collaborator'])['NCTId'].nunique().reset_index() | |
# Sort the DataFrame by Value in descending order and reset the index | |
df_count = df_count.sort_values('NCTId', ascending=False).reset_index(drop=True) | |
# Create a DataFrame for the top 30 sponsors and collaborators | |
top_30 = df_count.head(30) | |
top_30 = top_30.rename(columns={'NCTId': 'Number of Trials'}) | |
max_chars_per_line = 10 # Adjust this value according to your needs | |
top_30['Wrapped Sponsor'] = top_30['Sponsor'].apply(lambda x: wrap_text(x, max_chars_per_line)) | |
top_30['Wrapped Collaborator'] = top_30['Collaborator'].apply(lambda x: wrap_text(x, max_chars_per_line)) | |
# Create the tree map | |
tree_map_fig = px.treemap(top_30, path=['Wrapped Sponsor', 'Wrapped Collaborator'], values='Number of Trials', | |
color='Sponsor', color_continuous_scale='RdBu', | |
custom_data=['Wrapped Sponsor', 'Wrapped Collaborator', 'Number of Trials']) | |
# Customize the hovertemplate | |
tree_map_fig.update_traces(hovertemplate='%{customdata[0]}<br>%{customdata[1]}<br>Number of Trials: %{customdata[2]}') | |
# Customize the tree map | |
tree_map_fig.update_layout( | |
title='Lead Sponsors and Collaborators', | |
font=dict(family="Arial", size=14, color='black'), | |
width=600, | |
height=1000 | |
) | |
# Update the labels to show only the number of trials | |
tree_map_fig.update_traces(textinfo='value') | |
return tree_map_fig | |
######################################################################################################### | |
def plot_sponsor_tree(df): | |
# Filter the dataframe for 'StudyType' equal to "Interventional" | |
df = df[df['StudyType'] == "Interventional"] | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
#Drop rows where 'Phase' is "Not Applicable" or "UNKNOWN" | |
#df = df[(df['Phase'] != "Not Applicable") & (df['Phase'] != "UNKNOWN")] | |
# Group the data by 'Phase' and 'Sponsor' and count the number of unique NCTId | |
df_count = df.groupby(['Phase', 'Sponsor'])['NCTId'].nunique().reset_index() | |
#print(df_count) | |
# Then, sum the counts for each combination of Phase and Sponsor | |
df_count = df_count.groupby(['Phase', 'Sponsor'])['NCTId'].sum().reset_index() | |
# Finally, sum the counts for each Sponsor | |
df_count_tot = df_count.groupby('Sponsor')['NCTId'].sum().reset_index() | |
# Sort the DataFrame by Value in descending order and reset the index | |
df_count_tot = df_count_tot.sort_values('NCTId', ascending=False).reset_index(drop=True) | |
# Create a DataFrame for the top 30 sponsors | |
top_30_sponsors = df_count_tot.head(30) | |
top_30_sponsors = top_30_sponsors.rename(columns={'NCTId': 'Number of Trials'}) | |
max_chars_per_line = 10 # Adjust this value according to your needs | |
top_30_sponsors['Wrapped Sponsor'] = top_30_sponsors['Sponsor'].apply(lambda x: wrap_text(x, max_chars_per_line)) | |
# Create the icicle plot | |
icicle_fig = px.icicle(top_30_sponsors, path=['Wrapped Sponsor'], values='Number of Trials', | |
color='Sponsor', color_continuous_scale='RdBu', | |
custom_data=['Wrapped Sponsor', 'Number of Trials']) | |
# Customize the hovertemplate | |
icicle_fig.update_traces(hovertemplate='%{customdata[0]}<br>Number of Trials: %{customdata[1]}') | |
# Customize the icicle plot | |
icicle_fig.update_layout( | |
title='Sponsor', | |
font=dict(family="Arial", size=14, color='black'), | |
width= 400, | |
height=1000 | |
# autosize=True, | |
# margin=dict(t=50, l=25, r=25, b=25) | |
) | |
return icicle_fig | |
###################################################################################################################################### | |
def plot_collaborator_icicle(df): | |
# Filter the dataframe for 'StudyType' equal to "Interventional" | |
df = df[df['StudyType'] == "Interventional"] | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
# Group the data by 'Phase' and 'Sponsor' and count the number of unique NCTId | |
df_count = df.groupby(['Phase', 'Collaborator'])['NCTId'].nunique().reset_index() | |
# Then, sum the counts for each combination of Phase and Sponsor | |
df_count = df_count.groupby(['Phase', 'Collaborator'])['NCTId'].sum().reset_index() | |
# Finally, sum the counts for each 'Collaborator' | |
df_count_tot = df_count.groupby('Collaborator')['NCTId'].sum().reset_index() | |
# Sort the DataFrame by Value in descending order and reset the index | |
df_count_tot = df_count_tot.sort_values('NCTId', ascending=False).reset_index(drop=True) | |
# Create a DataFrame for the top 30 sponsors | |
top_30_sponsors = df_count_tot.head(30) | |
top_30_sponsors = top_30_sponsors.rename(columns={'NCTId': 'Number of Trials'}) | |
max_chars_per_line = 10 # Adjust this value according to your needs | |
top_30_sponsors['Collaborators'] = top_30_sponsors['Collaborator'].apply(lambda x: wrap_text(x, max_chars_per_line)) | |
# Create the icicle plot | |
icicle_fig = px.icicle(top_30_sponsors, path=['Collaborators'], values='Number of Trials', | |
color='Collaborator', color_continuous_scale='RdBu', | |
custom_data=['Collaborators', 'Number of Trials']) | |
# Customize the hovertemplate | |
icicle_fig.update_traces(hovertemplate='%{customdata[0]}<br>Number of Trials: %{customdata[1]}') | |
# Customize the icicle plot | |
icicle_fig.update_layout( | |
title='Collaborators', | |
font=dict(family="Arial", size=14, color='black'), | |
width= 400, | |
height=1000 | |
) | |
return icicle_fig | |
#################################### DRUGS ######################################################################## | |
#################### Sankey Diagram for Conditions to Drugs to Phase /NCTId############################################# | |
import pandas as pd | |
import plotly.graph_objects as go | |
import random | |
def random_color(): | |
return f'rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})' | |
############################################################################################################## | |
def plot_drug_sankey(df): | |
# Filter the dataframe for 'StudyType' equal to "Interventional" | |
df = df[df['StudyType'] == "Interventional"] | |
# Fill missing values in the 'Phase' column with a placeholder string | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
# Sort by Phase | |
df = df.sort_values(by='Phase') | |
# Split the conditions | |
df = split_conditions(df, 'Condition') | |
conditions = df['Condition'].unique().tolist() | |
drugs = df['Drug'].unique().tolist() | |
nct_ids = df['NCTId'].unique().tolist() | |
study_ids= df['OrgStudyId'].unique().tolist() | |
phases = df['Phase'].unique().tolist() | |
# labels = conditions + drugs + nct_ids + phases | |
labels = conditions + drugs + nct_ids + study_ids+phases | |
# Assign random colors to each node | |
colors = [random_color() for _ in range(len(labels))] | |
source = [] | |
target = [] | |
value = [] | |
for i, condition in enumerate(conditions): | |
for j, drug in enumerate(drugs, start=len(conditions)): | |
count = df[(df['Condition'] == condition) & (df['Drug'] == drug)].shape[0] | |
if count > 0: | |
source.append(i) | |
target.append(j) | |
value.append(count) | |
for i, drug in enumerate(drugs, start=len(conditions)): | |
for j, nct_id in enumerate(nct_ids, start=len(conditions) + len(drugs)): | |
count = df[(df['Drug'] == drug) & (df['NCTId'] == nct_id)].shape[0] | |
if count > 0: | |
source.append(i) | |
target.append(j) | |
value.append(count) | |
# Add connections between nct_ids and study_ids | |
for i, nct_id in enumerate(nct_ids, start=len(conditions) + len(drugs)): | |
for j, study_id in enumerate(study_ids, start=len(conditions) + len(drugs) + len(nct_ids)): | |
count = df[(df['NCTId'] == nct_id) & (df['OrgStudyId'] == study_id)].shape[0] | |
if count > 0: | |
source.append(i) | |
target.append(j) | |
value.append(count) | |
# Add connections between study_ids and phases | |
for i, study_id in enumerate(study_ids, start=len(conditions) + len(drugs) + len(nct_ids)): | |
for j, phase in enumerate(phases, start=len(conditions) + len(drugs) + len(nct_ids) + len(study_ids)): | |
count = df[(df['OrgStudyId'] == study_id) & (df['Phase'] == phase)].shape[0] | |
if count > 0: | |
source.append(i) | |
target.append(j) | |
value.append(count) | |
num_conditions = len(conditions) | |
if num_conditions <= 2: | |
height = 400 | |
elif num_conditions <= 10: | |
# height = 300 + (num_conditions - 2) * (200 / 8) # Linearly scale between 300 and 500 | |
height = 800 | |
elif num_conditions <= 30: | |
height = 1000 | |
else: | |
height = 1200 # Linearly scale between 700 and 1000, assuming a max of 100 conditions | |
height = min(height, 1000) # Cap the height at 1000 | |
fig = go.Figure(data=[go.Sankey( | |
node=dict( | |
pad=15, | |
thickness=15, | |
line=dict(color="black", width=0.5), | |
label=labels, | |
color=colors | |
), | |
link=dict( | |
source=source, | |
target=target, | |
value=value | |
) | |
)]) | |
fig.update_layout(title_text="Conditions, Drugs, Trial IDs, Phases for Sponsor", | |
font_size=10, height=height, width=1200) | |
return fig | |
########################################################################################### | |
########################################################################################################################### | |
#################################################################### TRIALS ############################## | |
import plotly.graph_objects as go | |
def plot_condition_treemap_nct_old (df): | |
# Filter the dataframe for 'StudyType' equal to "Interventional" | |
df = df[df['StudyType'] == "Interventional"] | |
# Fill missing values in the 'Phase' column with a placeholder string | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
# Sort by Phase | |
df = df.sort_values(by='Phase') | |
icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents']) | |
# Add the "Conditions" root node | |
icicle_df = icicle_df.append(pd.DataFrame({ | |
'ids': ["Conditions"], | |
'labels': ["Conditions"], | |
'parents': [""] | |
}), ignore_index=True) | |
# Add the Condition level | |
icicle_df = icicle_df.append(pd.DataFrame({ | |
'ids': df['Condition'].unique(), | |
'labels': df['Condition'].unique(), | |
'parents': ["Conditions"] * len(df['Condition'].unique()) | |
}), ignore_index=True) | |
# Add the Phase level | |
for condition in df['Condition'].unique(): | |
temp_df = df[df['Condition'] == condition] | |
phases = temp_df['Phase'].unique() | |
icicle_df = icicle_df.append(pd.DataFrame({ | |
'ids': [f"{condition}-{phase}" for phase in phases], | |
'labels': phases, | |
'parents': [condition] * len(phases) | |
}), ignore_index=True) | |
# Add the NCTId level | |
for _, row in df.iterrows(): | |
icicle_df = icicle_df.append(pd.DataFrame({ | |
'ids': [row['NCTId']], | |
'labels': [row['NCTId']], | |
'parents': [f"{row['Condition']}-{row['Phase']}"] | |
}), ignore_index=True) | |
# Add a new column to icicle_df to store the count of NCTId for each Condition and Phase | |
icicle_df['nctid_count'] = icicle_df.apply(lambda row: len(df[(df['Condition'] == row['ids']) | (df['Condition'] + '-' + df['Phase'] == row['ids'])]), axis=1) | |
# Update the hovertemplate | |
fig = go.Figure(go.Icicle( | |
ids=icicle_df.ids, | |
labels=icicle_df.labels, | |
parents=icicle_df.parents, | |
root_color="lightgrey", | |
textfont=dict(size=34, family="Arial"), | |
hovertemplate="<b>%{label}</b><br><br>%{customdata[0]} %{customdata[1]}<extra></extra>", | |
customdata=list(zip(icicle_df['nctid_count'], icicle_df['parents'].apply(lambda x: 'Trials' if x == 'Trials' or x in df['Condition'].unique() else ''))) | |
)) | |
# Customize hover information based on the level | |
fig.update_traces(hovertemplate="<b>%{label}</b><br><br>%{customdata[0]} %{customdata[1]}<extra></extra>", | |
selector=dict(ids=df['Condition'].unique())) | |
fig.update_traces(hovertemplate="<b>%{label}</b><br><br>%{customdata[0]} %{customdata[1]}<extra></extra>", | |
selector=dict(ids=[f"{condition}-{phase}" for condition in df['Condition'].unique() for phase in df[df['Condition'] == condition]['Phase'].unique()])) | |
fig.update_traces(hovertemplate="", hoverinfo='none', # Set hovertemplate to an empty string and hoverinfo to 'none' for NCTId level | |
selector=dict(parents=[f"{row['Condition']}-{row['Phase']}" for _, row in df.iterrows()])) | |
fig.update_layout(width=1200, height=1000) | |
return fig | |
######################################## | |
def random_color(): | |
return f"rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})" | |
def plot_condition_treemap_nct(df): | |
df = df[df['StudyType'] == "Interventional"] | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
df = df.sort_values(by='Phase') | |
df = split_conditions(df, 'Condition') | |
conditions = df['Condition'].unique().tolist() | |
nct_ids = df['NCTId'].unique().tolist() | |
study_ids = df['OrgStudyId'].unique().tolist() | |
phases = df['Phase'].unique().tolist() | |
labels = conditions + nct_ids + study_ids + phases | |
colors = [random_color() for _ in range(len(labels))] | |
source = [] | |
target = [] | |
value = [] | |
for i, condition in enumerate(conditions): | |
for j, nct_id in enumerate(nct_ids, start=len(conditions)): | |
count = df[(df['Condition'] == condition) & (df['NCTId'] == nct_id)].shape[0] | |
if count > 0: | |
source.append(i) | |
target.append(j) | |
value.append(count) | |
for i, nct_id in enumerate(nct_ids, start=len(conditions)): | |
for j, study_id in enumerate(study_ids, start=len(conditions) + len(nct_ids)): | |
count = df[(df['NCTId'] == nct_id) & (df['OrgStudyId'] == study_id)].shape[0] | |
if count > 0: | |
source.append(i) | |
target.append(j) | |
value.append(count) | |
for i, study_id in enumerate(study_ids, start=len(conditions) + len(nct_ids)): | |
for j, phase in enumerate(phases, start=len(conditions) + len(nct_ids) + len(study_ids)): | |
count = df[(df['OrgStudyId'] == study_id) & (df['Phase'] == phase)].shape[0] | |
if count > 0: | |
source.append(i) | |
target.append(j) | |
value.append(count) | |
num_conditions = len(conditions) | |
if num_conditions <= 2: | |
height = 400 | |
elif num_conditions <= 10: | |
height = 800 | |
elif num_conditions <= 30: | |
height = 1000 | |
else: | |
height = 1200 | |
height = min(height, 1000) | |
fig = go.Figure(data=[go.Sankey( | |
node=dict( | |
pad=15, | |
thickness=15, | |
line=dict(color="black", width=0.5), | |
label=labels, | |
color=colors | |
), | |
link=dict( | |
source=source, | |
target=target, | |
value=value | |
) | |
)]) | |
fig.update_layout(title_text="Conditions, Trial IDs, Study IDs, Phases for Sponsor", | |
font_size=10, height=height, autosize=True) | |
return fig | |
####################################### | |
########################################################################################################################### | |
import re | |
def insert_line_break(text, max_length=30): | |
if len(text) <= max_length: | |
return text | |
nearest_space = text.rfind(' ', 0, max_length) | |
if nearest_space == -1: | |
nearest_space = max_length | |
return text[:nearest_space] + '<br>' + insert_line_break(text[nearest_space:].strip(), max_length) | |
########################################################### ####################################################################### | |
########################################################### ####################################################################### | |
def plot_nct2org_icicle(df): | |
icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'hovertext']) | |
# Add the "Trials" root node | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': ["Trials"], | |
'labels': ["Trials"], | |
'parents': [""], | |
'hovertext': [""] | |
})], ignore_index=True) | |
# Create a dictionary of NCTId-BriefTitle pairs | |
nctid_brieftitle = df[['NCTId', 'BriefTitle']].drop_duplicates().set_index('NCTId').to_dict()['BriefTitle'] | |
# Add the NCTId level with BriefTitle as hover text | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': df['NCTId'].unique(), | |
'labels': df['NCTId'].unique(), | |
'parents': ["Trials"] * len(df['NCTId'].unique()), | |
'hovertext': [nctid_brieftitle[nctid] for nctid in df['NCTId'].unique()] | |
})], ignore_index=True) | |
# Add the OrgStudyId level | |
for nctid in df['NCTId'].unique(): | |
temp_df = df[df['NCTId'] == nctid] | |
orgstudyids = temp_df['OrgStudyId'].unique() | |
for orgstudyid in orgstudyids: | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': [f"{nctid}-{orgstudyid}"], | |
'labels': [orgstudyid], | |
'parents': [nctid], | |
'hovertext': [""] | |
})], ignore_index=True) | |
# Add the Condition level | |
for index, row in df.iterrows(): | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': [f"{row['NCTId']}-{row['OrgStudyId']}-{row['Condition']}-{index}"], | |
'labels': [row['Condition']], | |
'parents': [f"{row['NCTId']}-{row['OrgStudyId']}"], | |
'hovertext': [""] | |
})], ignore_index=True) | |
fig = go.Figure(go.Icicle( | |
ids=icicle_df.ids, | |
labels=icicle_df.labels, | |
parents=icicle_df.parents, | |
hovertext=icicle_df.hovertext, | |
root_color="lightgrey", | |
textfont=dict(size=34, family="Arial") | |
)) | |
fig.update_layout(width=1200, height=1000) | |
return fig | |
###################################################################################################################################### | |
################################################################################################################# | |
############################## Scatter Plot for Country Timelines ###################################### | |
import pandas as pd | |
import numpy as np | |
import plotly.express as px | |
import plotly.graph_objs as go | |
from plotly.subplots import make_subplots | |
def split_condition(text): | |
split_text = text.split(',', 1)[0].split('|', 1)[0] | |
return split_text.strip() | |
################################################################################################################################# | |
import plotly.graph_objs as go | |
import plotly.graph_objs as go | |
import plotly.subplots as sp | |
import pandas as pd | |
import numpy as np | |
################################################################### COUNTRY PLOTS ################################################################ | |
def plot_trial_country_map(df): | |
df = df[df['StudyType'] == "Interventional"] | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
df = df.sort_values(by='Phase') | |
# Split the conditions | |
df = split_conditions(df, 'Condition') | |
## Root Country Node | |
icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'hover_text']) | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': ["Country"], | |
'labels': ["Country"], | |
'parents': [""], | |
'hover_text': ["Country"] | |
})], ignore_index=True) | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': df['Country'].unique(), | |
'labels': df['Country'].unique(), | |
'parents': ["Country"] * len(df['Country'].unique()), | |
'hover_text': [f"({len(df[df['Country'] == country]['NCTId'].unique())} Trials)" for country in df['Country'].unique()] | |
})], ignore_index=True) | |
### Country and Conditions | |
for country in df['Country'].unique(): | |
temp_df = df[df['Country'] == country] | |
conditions = temp_df['Condition'].unique() | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': [f"{country}__{condition}" for condition in conditions], | |
'labels': conditions, | |
'parents': [country] * len(conditions), | |
'hover_text': [f"({len(temp_df[temp_df['Condition'] == condition]['NCTId'].unique())} Trials)" for condition in conditions] | |
})], ignore_index=True) | |
### Country with Conditions and Trials NCTId and OrgStudId | |
for country_condition in icicle_df['ids'][icicle_df['parents'].isin(df['Country'].unique())]: | |
country, condition = country_condition.split('__') | |
temp_df = df[(df['Country'] == country) & (df['Condition'] == condition)] | |
trials = temp_df['NCTId'].unique() | |
trial_labels = [f"<br>{insert_line_break(temp_df[temp_df['NCTId'] == trial]['BriefTitle'].iloc[0])}" for trial in trials] | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': [f"{country_condition}__{trial}" for trial in trials], | |
'labels': trials, | |
'parents': [country_condition] * len(trials), | |
'hover_text': trial_labels | |
})], ignore_index=True) | |
fig = go.Figure(go.Icicle( | |
ids=icicle_df.ids, | |
labels=icicle_df.labels, | |
parents=icicle_df.parents, | |
textinfo='label', | |
hovertext=icicle_df.hover_text, | |
root_color="lightgrey", | |
textfont=dict(size=30, family="Arial") | |
)) | |
fig.update_layout( | |
width = 1200,height = 800 | |
) | |
return fig | |
#################### | |
################################################################ SITES #################################################### | |
################################################################ TRIAL SITES ########################################### | |
def plot_trial_sites(df): | |
def insert_line_break(text, max_length=30): | |
if len(text) <= max_length: | |
return text | |
nearest_space = text.rfind(' ', 0, max_length) | |
if nearest_space == -1: | |
nearest_space = max_length | |
return text[:nearest_space] + '<br>' + insert_line_break(text[nearest_space:].strip(), max_length) | |
df = df[df['StudyType'] == "Interventional"] | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
df = df.sort_values(by='Phase') | |
## Root Site Node | |
icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'hover_text']) | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': ["Sites"], | |
'labels': ["Sites"], | |
'parents': [""], | |
'hover_text': ["Sites"] | |
})], ignore_index=True) | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': df['City'].unique(), | |
'labels': df['City'].unique(), | |
'parents': ["Sites"] * len(df['City'].unique()), | |
'hover_text': [f"({len(df[df['City'] == city]['NCTId'].unique())} Trials)" for city in df['City'].unique()] | |
})], ignore_index=True) | |
### City and Site | |
for city in df['City'].unique(): | |
temp_df = df[df['City'] == city] | |
sites = temp_df['Site'].unique() | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': [f"{city}__{site}" for site in sites], | |
'labels': sites, | |
'parents': [city] * len(sites), | |
'hover_text': [f"({len(temp_df[temp_df['Site'] == site]['NCTId'].unique())} Trials)" for site in sites] | |
})], ignore_index=True) | |
### Site and Trials (NCTId, OrgStudyId, BriefTitle) | |
for city_site in icicle_df['ids'][icicle_df['parents'].isin(df['City'].unique())]: | |
city, site = city_site.split('__') | |
temp_df = df[(df['City'] == city) & (df['Site'] == site)] | |
trials = temp_df[['NCTId', 'OrgStudyId']].apply(lambda x: f"{x['NCTId']}<br>{x['OrgStudyId']}", axis=1).unique() | |
for trial in trials: | |
nctid = trial.split('<br>')[0] | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': [f"{city_site}__{nctid}"], | |
'labels': [trial], | |
'parents': [city_site], | |
'hover_text': [""] | |
})], ignore_index=True) | |
fig = go.Figure(go.Icicle( | |
ids=icicle_df.ids, | |
labels=icicle_df.labels, | |
parents=icicle_df.parents, | |
textinfo='label', | |
hovertext=icicle_df.hover_text, | |
root_color="lightgrey", | |
textfont=dict(size=30, family="Arial") | |
)) | |
fig.update_layout(width=1200, height=800) | |
return fig | |
############################################################################################################################################# | |
def plot_trial_site_map(df): | |
def insert_line_break(text, max_length=30): | |
if len(text) <= max_length: | |
return text | |
nearest_space = text.rfind(' ', 0, max_length) | |
if nearest_space == -1: | |
nearest_space = max_length | |
return text[:nearest_space] + '<br>' + insert_line_break(text[nearest_space:].strip(), max_length) | |
df = df[df['StudyType'] == "Interventional"] | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
df = df.sort_values(by='Phase') | |
# Split the conditions | |
df = split_conditions(df, 'Condition') | |
#df_count = df.groupby([ 'Site', 'NCTId', 'BriefTitle','Condition']).size().reset_index(name='Count') | |
df_count = df.groupby([ 'Site', 'NCTId', 'BriefTitle','Condition']).size().reset_index(name='Count') | |
#df_count['BriefTitle'] = df_count['BriefTitle'].apply(insert_line_break) | |
# fig = px.treemap(df_count, path=['Site', 'NCTId', 'BriefTitle','Condition'], values='Count', color='Site') | |
fig = px.treemap(df_count, path=['Site', 'NCTId', 'Condition'], values='Count', color='Site') | |
# Customize font and textinfo for Sponsor, Country, Site, and Condition | |
fig.update_traces( | |
textfont=dict(family="Arial", size=30, color='black'), | |
selector=dict(depth=0) # Apply customization to Sponsor grid | |
) | |
fig.update_traces( | |
textfont=dict(family="Arial", size=30, color='black'), | |
selector=dict(depth=1) # Apply customization to Country grid | |
) | |
fig.update_traces( | |
textfont=dict(family="Arial", size=30, color='black'), | |
selector=dict(depth=2) # Apply customization to Site grid | |
) | |
fig.update_layout(width=1200, height=800) | |
return fig | |
############################################################ | |
############################################################################################################################################################### | |
########################################################### Timelines ########################################################################################################### | |
import numpy as np | |
import plotly.graph_objs as go | |
import matplotlib.pyplot as plt | |
def generate_colors(n): | |
colors = plt.cm.rainbow(np.linspace(0, 1, n)) | |
hex_colors = ['#%02x%02x%02x' % (int(r * 255), int(g * 255), int(b * 255)) for r, g, b, _ in colors] | |
return hex_colors | |
def get_marker_size(enrollment_count): | |
if enrollment_count < 100: | |
return 20 | |
elif 100 <= enrollment_count < 300: | |
return 40 | |
elif 300 <= enrollment_count < 500: | |
return 60 | |
elif 500 <= enrollment_count < 1000: | |
return 70 | |
else: | |
return 100 | |
def plot_trial_bubblemap(df): | |
scatter_plot_start_traces = [] | |
scatter_plot_end_traces = [] | |
scatter_plot_lines = [] | |
# Filter the dataframe for 'StudyType' equal to "Interventional" | |
df = df[df['StudyType'] == "Interventional"] | |
# Fill missing values in the 'Phase' column with a placeholder string | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
# Sort by Phase | |
df = df.sort_values(by='Phase') | |
## address correct date formats | |
#df['StartDate'] = pd.to_datetime(df['StartDate']) | |
df['StartDate'] = pd.to_datetime(df['StartDate'], errors='coerce') | |
#df['CompletionDate'] = pd.to_datetime(df['CompletionDate']) | |
df['CompletionDate'] = pd.to_datetime(df['CompletionDate'], errors='coerce') | |
# Split the conditions | |
df = split_conditions(df, 'Condition') | |
# Assign an ID to each unique condition | |
#condition_ids = {condition: idx for idx, condition in enumerate(df['Condition'].unique())} | |
# Create a dictionary of unique conditions with their IDs starting from 1 | |
condition_ids = {condition: i for i, condition in enumerate(df['Condition'].unique(), start=1)} | |
# Create a dictionary that maps each NCTId to a list of condition IDs | |
nctid_condition_map = df.groupby('NCTId')['Condition'].apply(lambda x: [condition_ids[cond] for cond in x]).to_dict() | |
# Define the marker size function | |
df['MarkerSize'] = df['EnrollmentCount'].apply(get_marker_size) | |
# Update the hovertemplate to display original Conditions associated with the NCTId | |
#hovertemplate_start = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>OrgStudyId: %{customdata[1]}<br>Phase: %{customdata[2]}<br>Start Date: %{x}<br>Enrollment Count: %{customdata[3]}<extra></extra>' | |
#hovertemplate_end = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>OrgStudyId: %{customdata[1]}<br>NCTId: %{customdata[2]}<br>Phase: %{customdata[3]}<br>Completion Date: %{x}<br>Enrollment Count: %{customdata[4]}<extra></extra>' | |
# Update the hovertemplate to display original Conditions associated with the NCTId | |
hovertemplate_start = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>BriefTitle: %{customdata[1]}<br>OrgStudyId: %{customdata[2]}<br>Phase: %{customdata[3]}\ | |
<br>Start Date: %{x}<br>Enrollment Count: %{customdata[4]}<extra></extra>' | |
#hovertemplate_end = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>BriefTitle: %{customdata[1]}<br>OrgStudyId: %{customdata[2]}<br>NCTId: %{customdata[3]}\ | |
#<br>Phase: %{customdata[4]}<br>Completion Date: %{x}<br>Enrollment Count: %{customdata[5]}<extra></extra>' | |
hovertemplate_end = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>BriefTitle: %{customdata[1]}<br>OrgStudyId: %{customdata[2]}\ | |
<br>Phase: %{customdata[3]}<br>Completion Date: %{x}<br>Enrollment Count: %{customdata[4]}<extra></extra>' | |
for nctid in df['NCTId'].unique(): | |
df_filtered = df[df['NCTId'] == nctid] | |
# Replace the text parameter with original Conditions | |
text = [', '.join(df_filtered['Condition'].unique()) for _ in range(len(df_filtered))] | |
# Get the first condition ID for the current NCTId | |
first_condition_id = nctid_condition_map[nctid][0] | |
color = f'rgb({first_condition_id * 10 % 256}, {(first_condition_id * 20) % 256}, {(first_condition_id * 30) % 256})' | |
# color = ['rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)'][first_condition_id % 3] | |
# Start traces (square) | |
start_trace = go.Scatter(x=df_filtered['StartDate'], | |
y=df_filtered['NCTId'], | |
mode='markers', | |
marker=dict(size=10, symbol='square', color=color), | |
text=text, | |
#customdata=df_filtered[['StudyType', 'OrgStudyId', 'Phase', 'EnrollmentCount']], | |
customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']], | |
hovertemplate=hovertemplate_start, | |
showlegend=False) | |
scatter_plot_start_traces.append(start_trace) | |
# End traces (circle) | |
end_trace = go.Scatter(x=df_filtered['CompletionDate'], | |
y=df_filtered['NCTId'], | |
mode='markers', | |
marker=dict(size=df_filtered['MarkerSize'], symbol='circle', color=color, sizemode='diameter'), | |
text=text, | |
#customdata=df_filtered[['StudyType', 'OrgStudyId', 'NCTId', 'Phase', 'EnrollmentCount']], | |
customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']], | |
hovertemplate=hovertemplate_end, | |
showlegend=False) | |
scatter_plot_end_traces.append(end_trace) | |
# Line traces connecting start and end dates | |
line_trace = go.Scatter(x=[df_filtered['StartDate'].iloc[0], df_filtered['CompletionDate'].iloc[0]], | |
y=[nctid, nctid], | |
mode='lines', | |
line=dict(color='black', width=1), | |
showlegend=False) | |
scatter_plot_lines.append(line_trace) | |
# Create legend traces for unique conditions with their IDs | |
legend_traces = [go.Scatter(x=[None], y=[None], | |
mode='markers', | |
marker=dict(size=10, symbol='circle', color=f'rgb({condition_id * 10 % 256}, {(condition_id * 20) % 256}, {(condition_id * 30) % 256})'), | |
name=f'{condition_id}: {condition}', | |
showlegend=True) for condition, condition_id in condition_ids.items()] | |
# Combine all traces | |
data = scatter_plot_start_traces + scatter_plot_end_traces + scatter_plot_lines + legend_traces | |
# Update the layout | |
layout = go.Layout(yaxis=dict(title='NCTId', | |
showgrid=False, | |
tickvals=df['NCTId'].unique(), | |
ticktext=df['NCTId'].unique(), | |
tickangle=0), | |
xaxis=dict(title='Start-End Dates', | |
showgrid=False, | |
range=[pd.to_datetime('2020-01-01'), pd.to_datetime('2028-12-31')], | |
tickvals=[pd.to_datetime(f'{year}-01-01') for year in range(2020, 2029)]), | |
# tickvals=[pd.to_datetime(f'{year}') for year in range(2020, 2029)], | |
showlegend=True, | |
legend=dict(title='Conditions', x=1.05, y=1, traceorder='normal', bgcolor='rgba(255,255,255,0.5)', font=dict(color='#000000')), | |
margin=dict(l=150), | |
plot_bgcolor='#ffffff', | |
paper_bgcolor='#ffffff', | |
font=dict(family='Segoe UI', color='#000000')) | |
fig = go.Figure(data=data, layout=layout) | |
# Calculate the height based on the number of shortened_conditions | |
num_trial = len(df['NCTId'].unique()) | |
if num_trial <= 5: | |
height = 600 | |
elif num_trial >= 10: | |
height = 800 | |
elif num_trial >= 20: | |
height = 1000 | |
else: | |
height = 1400 # Linearly scale between 700 and 1000, assuming a max of 100 conditions | |
height = min(height, 1400) # Cap the height at 1400 | |
# Set the width and height | |
fig.update_layout( | |
title='Trial Start and End Dates by Conditions', | |
width=1200, # adjust as per requirement | |
height=height # adjust as per requirement | |
) | |
return fig | |
######################################################################################################################################################## | |
def plot_trial_bubblemap_comp(df): | |
scatter_plot_start_traces = [] | |
scatter_plot_end_traces = [] | |
scatter_plot_lines = [] | |
# Filter the dataframe for 'StudyType' equal to "Interventional" | |
df = df[df['StudyType'] == "Interventional"] | |
# Fill missing values in the 'Phase' column with a placeholder string | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
# Sort by Phase | |
df = df.sort_values(by='Phase') | |
## address correct date formats | |
#df['StartDate'] = pd.to_datetime(df['StartDate']) | |
df['StartDate'] = pd.to_datetime(df['StartDate'], errors='coerce') | |
# df['CompletionDate'] = pd.to_datetime(df['CompletionDate']) | |
df['CompletionDate'] = pd.to_datetime(df['CompletionDate'], errors='coerce') | |
# Split the conditions | |
df = split_conditions(df, 'Condition') | |
# Assign an ID to each unique condition | |
#condition_ids = {condition: idx for idx, condition in enumerate(df['Condition'].unique())} | |
# Create a dictionary of unique conditions with their IDs starting from 1 | |
condition_ids = {condition: i for i, condition in enumerate(df['Condition'].unique(), start=1)} | |
# Create a dictionary that maps each NCTId to a list of condition IDs | |
nctid_condition_map = df.groupby('NCTId')['Condition'].apply(lambda x: [condition_ids[cond] for cond in x]).to_dict() | |
# Define the marker size function | |
df['MarkerSize'] = df['EnrollmentCount'].apply(get_marker_size) | |
# Update the hovertemplate to display original Conditions associated with the NCTId | |
hovertemplate_start = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>BriefTitle: %{customdata[1]}<br>OrgStudyId: %{customdata[2]}<br>Phase: %{customdata[3]}\ | |
<br>Start Date: %{x}<br>Enrollment Count: %{customdata[4]}<extra></extra>' | |
hovertemplate_end = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>BriefTitle: %{customdata[1]}<br>OrgStudyId: %{customdata[2]}\ | |
<br>Phase: %{customdata[3]}<br>Completion Date: %{x}<br>Enrollment Count: %{customdata[4]}<extra></extra>' | |
for nctid in df['NCTId'].unique(): | |
df_filtered = df[df['NCTId'] == nctid] | |
# Replace the text parameter with original Conditions | |
text = [', '.join(df_filtered['Condition'].unique()) for _ in range(len(df_filtered))] | |
# Get the first condition ID for the current NCTId | |
first_condition_id = nctid_condition_map[nctid][0] | |
color = f'rgb({first_condition_id * 10 % 256}, {(first_condition_id * 20) % 256}, {(first_condition_id * 30) % 256})' | |
# color = ['rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)'][first_condition_id % 3] | |
# Start traces (square) | |
start_trace = go.Scatter(x=df_filtered['StartDate'], | |
y=df_filtered['NCTId'], | |
mode='markers', | |
marker=dict(size=10, symbol='square', color=color), | |
text=text, | |
#customdata=df_filtered[['StudyType', 'OrgStudyId', 'Phase', 'EnrollmentCount']], | |
customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']], | |
hovertemplate=hovertemplate_start, | |
showlegend=False) | |
scatter_plot_start_traces.append(start_trace) | |
# End traces (circle) | |
end_trace = go.Scatter(x=df_filtered['CompletionDate'], | |
y=df_filtered['NCTId'], | |
mode='markers', | |
marker=dict(size=df_filtered['MarkerSize'], symbol='circle', color=color, sizemode='diameter'), | |
text=text, | |
#customdata=df_filtered[['StudyType', 'OrgStudyId', 'NCTId', 'Phase', 'EnrollmentCount']], | |
customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']], | |
hovertemplate=hovertemplate_end, | |
showlegend=False) | |
scatter_plot_end_traces.append(end_trace) | |
# Line traces connecting start and end dates | |
line_trace = go.Scatter(x=[df_filtered['StartDate'].iloc[0], df_filtered['CompletionDate'].iloc[0]], | |
y=[nctid, nctid], | |
mode='lines', | |
line=dict(color='black', width=1), | |
showlegend=False) | |
scatter_plot_lines.append(line_trace) | |
# Create legend traces for unique conditions with their IDs | |
legend_traces = [go.Scatter(x=[None], y=[None], | |
mode='markers', | |
marker=dict(size=10, symbol='circle', color=f'rgb({condition_id * 10 % 256}, {(condition_id * 20) % 256}, {(condition_id * 30) % 256})'), | |
name=f'{condition_id}: {condition}', | |
showlegend=True) for condition, condition_id in condition_ids.items()] | |
# Combine all traces | |
data = scatter_plot_start_traces + scatter_plot_end_traces + scatter_plot_lines + legend_traces | |
# Update the layout | |
layout = go.Layout(yaxis=dict(title='NCTId', | |
showgrid=False, | |
tickvals=df['NCTId'].unique(), | |
ticktext=df['NCTId'].unique(), | |
tickangle=0), | |
xaxis=dict(title='Start-End Dates', | |
showgrid=False, | |
range=[pd.to_datetime('2010-01-01'), pd.to_datetime('2023-12-31')], | |
tickvals=[pd.to_datetime(f'{year}-01-01') for year in range(2010, 2023)]), | |
# tickvals=[pd.to_datetime(f'{year}') for year in range(2020, 2029)], | |
showlegend=True, | |
legend=dict(title='Conditions', x=1.05, y=1, traceorder='normal', bgcolor='rgba(255,255,255,0.5)', font=dict(color='#000000')), | |
margin=dict(l=150), | |
plot_bgcolor='#ffffff', | |
paper_bgcolor='#ffffff', | |
font=dict(family='Segoe UI', color='#000000')) | |
fig = go.Figure(data=data, layout=layout) | |
# Calculate the height based on the number of shortened_conditions | |
num_trial = len(df['NCTId'].unique()) | |
if num_trial <= 5: | |
height = 600 | |
elif num_trial >= 10: | |
height = 800 | |
elif num_trial >= 20: | |
height = 1000 | |
else: | |
height = 1400 # Linearly scale between 700 and 1000, assuming a max of 100 conditions | |
height = min(height, 1400) # Cap the height at 1400 | |
# Set the width and height | |
fig.update_layout( | |
title='Trial Start and End Dates by Conditions', | |
width=1200, # adjust as per requirement | |
height=height # adjust as per requirement | |
) | |
return fig | |
####################################################################################### | |
####################################################################################### | |
############################################ Trial Site Map without Zip code now ############## | |
import geopandas as gpd | |
def plot_trial_site_world_map(df, country_filter=None): | |
df.loc[(df['City'] == 'Multiple Locations') & (df['Country'] == 'Germany'), 'City'] = 'Berlin' | |
unique_cities = df[['City', 'Country']].drop_duplicates().copy() | |
geocode_cache = {} # Create an empty dictionary to store geocoded results | |
def geocode_with_cache(city, country): | |
key = (city, country) | |
if key not in geocode_cache: | |
geocode_cache[key] = gpd.tools.geocode(f"{city}, {country}").geometry[0] | |
return geocode_cache[key] | |
unique_cities['Coordinates'] = unique_cities.apply(lambda row: geocode_with_cache(row['City'], row['Country']), axis=1) | |
unique_cities[['Latitude', 'Longitude']] = unique_cities['Coordinates'].apply(lambda coord: pd.Series({'Latitude': coord.y, 'Longitude': coord.x})) | |
df = df.merge(unique_cities, on=['City', 'Country']) | |
# Create a new column combining 'Site' and 'Country' | |
df['SiteCountry'] = df['Site'] + ', ' + df['Country'] | |
df_count = df.groupby(['Country', 'City', 'SiteCountry', 'Condition', 'NCTId','BriefTitle', 'Latitude', 'Longitude']).size().reset_index(name='Count') | |
if country_filter: | |
df_count = df_count[df_count['Country'] == country_filter] | |
fig = px.scatter_geo(df_count, | |
lat='Latitude', | |
lon='Longitude', | |
hover_name='SiteCountry', | |
hover_data={'Latitude':False, 'Longitude':False, 'NCTId':False,'BriefTitle':False, 'Condition':False, 'City':True, 'Country':True}, | |
size='Count', | |
color='SiteCountry', | |
projection='mercator') | |
fig.update_layout(title='Trial Sites Map', | |
geo=dict(showframe=False, showcoastlines=False, showcountries=True), | |
width=1200, | |
height=800) | |
return fig | |
def plot_trial_site_world_map_old(df, country_filter=None): | |
df.loc[(df['City'] == 'Multiple Locations') & (df['Country'] == 'Germany'), 'City'] = 'Berlin' | |
unique_cities = df[['City', 'Country']].drop_duplicates() | |
geocode_cache = {} # Create an empty dictionary to store geocoded results | |
def geocode_with_cache(city, country): | |
key = (city, country) | |
if key not in geocode_cache: | |
geocode_cache[key] = gpd.tools.geocode(f"{city}, {country}").geometry[0] | |
return geocode_cache[key] | |
unique_cities['Coordinates'] = unique_cities.apply(lambda row: geocode_with_cache(row['City'], row['Country']), axis=1) | |
unique_cities[['Latitude', 'Longitude']] = unique_cities['Coordinates'].apply(lambda coord: pd.Series({'Latitude': coord.y, 'Longitude': coord.x})) | |
df = df.merge(unique_cities, on=['City', 'Country']) | |
df_count = df.groupby(['Country', 'City', 'Site', 'Condition', 'NCTId','BriefTitle', 'Latitude', 'Longitude']).size().reset_index(name='Count') | |
if country_filter: | |
df_count = df_count[df_count['Country'] == country_filter] | |
fig = px.scatter_geo(df_count, | |
lat='Latitude', | |
lon='Longitude', | |
hover_name='Site', | |
hover_data={'Latitude':False, 'Longitude':False, 'NCTId':False,'BriefTitle':False, 'Condition':False, 'City':True, 'Country':True}, | |
size='Count', | |
color='Site', | |
projection='mercator') | |
fig.update_layout(title='Trial Sites Map', | |
geo=dict(showframe=False, showcoastlines=False, showcountries=True), | |
width=1200, | |
height=800) | |
return fig | |
def plot_trial_site_world_map_old(df, country_filter=None): | |
# Get the unique cities and countries from the DataFrame | |
# Update the City to the country's capital if City is 'Multiple Locations' and Country is 'Germany' | |
df.loc[(df['City'] == 'Multiple Locations') & (df['Country'] == 'Germany'), 'City'] = 'Berlin' | |
unique_cities = df[['City', 'Country']].drop_duplicates() | |
# Get the approximate coordinates of the cities using geopandas | |
unique_cities['Coordinates'] = unique_cities.apply(lambda row: gpd.tools.geocode(f"{row['City']}, {row['Country']}").geometry[0], axis=1) | |
unique_cities[['Latitude', 'Longitude']] = unique_cities['Coordinates'].apply(lambda coord: pd.Series({'Latitude': coord.y, 'Longitude': coord.x})) | |
# Merge the coordinates back to the original DataFrame | |
df = df.merge(unique_cities, on=['City', 'Country']) | |
# Create a new DataFrame with the count of distinct NCTId for each combination of Sponsor, Country, City, Site, and Condition | |
df_count = df.groupby([ 'Country', 'City', 'Site', 'Condition', 'NCTId','BriefTitle', 'Latitude', 'Longitude']).size().reset_index(name='Count') | |
# If a country filter is applied, filter the DataFrame | |
if country_filter: | |
df_count = df_count[df_count['Country'] == country_filter] | |
#print(df_count) | |
# Create the map | |
fig = px.scatter_geo(df_count, | |
lat='Latitude', | |
lon='Longitude', | |
hover_name='Site', | |
hover_data={'Latitude':False, 'Longitude':False, 'NCTId':False,'BriefTitle':False, 'Condition':False, 'City':True, 'Country':True}, | |
size='Count', | |
color='Site', # Assign a unique color to each country | |
projection='mercator') | |
# Remove the legend while keeping the color coding | |
fig.update_traces(showlegend=True) | |
# Customize the map | |
fig.update_layout(title='Trial Sites Map', | |
geo=dict(showframe=False, showcoastlines=False, showcountries=True), | |
width=1200, # Set the width of the map | |
height=800 | |
#, # Set the height of the map | |
#margin=dict(l=0, r=0, t=50, b=0)) # Adjust the margins to fit the legend | |
) | |
return fig | |
################################################################################ | |
def plot_trial_site_world_map_without_api(df): | |
# Get the unique cities and countries from the DataFrame | |
unique_cities = df[['City', 'Country']].drop_duplicates() | |
# Get the approximate coordinates of the cities using geopandas | |
unique_cities['Coordinates'] = unique_cities.apply(lambda row: gpd.tools.geocode(f"{row['City']}, {row['Country']}").geometry[0], axis=1) | |
unique_cities[['Latitude', 'Longitude']] = unique_cities['Coordinates'].apply(lambda coord: pd.Series({'Latitude': coord.y, 'Longitude': coord.x})) | |
# Merge the coordinates back to the original DataFrame | |
df = df.merge(unique_cities, on=['City', 'Country']) | |
# Create a new DataFrame with the count of distinct NCTId for each combination of Sponsor, Country, City, Site, and Condition | |
df_count = df.groupby(['Sponsor', 'Country', 'City', 'Site', 'Condition', 'NCTId', 'Latitude', 'Longitude']).size().reset_index(name='Count') | |
# Create the map | |
fig = px.scatter_geo(df_count, | |
lat='Latitude', | |
lon='Longitude', | |
color='Site', | |
hover_name='Site', | |
hover_data={'Latitude':False, 'Longitude':False, 'Sponsor':True, 'Condition':True, 'City':True, 'Country':True}, | |
# hover_data=['Sponsor', 'Condition'], | |
size='Count', | |
projection='mercator') | |
# Customize the map | |
fig.update_layout(title='Trial Sites Map', | |
geo=dict(showframe=False, showcoastlines=False, showcountries=True)) | |
return fig | |
############################################################################################################# | |
############################################################# Gradio Function as Views #################################### | |
### ######################### Find Sponspors | |
############################################################################################################################################# | |
def select_sponsor(sponsor_input, academia_input): | |
if sponsor_input: | |
return sponsor_input | |
else: | |
return academia_input | |
def select_disease(disease_input, disease_input_text): | |
if disease_input_text.strip(): | |
return disease_input_text.strip() | |
else: | |
return disease_input | |
#summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drug | |
async def disease_view (condition, condition_text, sponsor_input, academia_input): | |
# condition = condition.strip() # Remove leading and trailing spaces | |
sponsor = select_sponsor(sponsor_input, academia_input) | |
condition = select_disease(condition, condition_text) | |
status = "Recruiting" | |
#print("In Gradio") | |
# Call gradio_wrapper_nct with appropriate arguments | |
if condition and sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status ) | |
elif sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status ) | |
elif condition: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status ) | |
#### error traps | |
if not html_table_conditions and html_table_conditions_collb is None: | |
return "No data was matched from Clinical Trials.Gov, Please try with new selection again!", None, None, None, None, None, None, None | |
# Convert the HTML table to a pandas DataFrame | |
df = pd.read_html(html_table_conditions)[0] | |
#df2 = pd.read_html(html_table_conditions_collb)[0] | |
df2 = [] | |
try: | |
df2 = pd.read_html(html_table_conditions_collb)[0] | |
except (ValueError, IndexError): | |
df2 = pd.DataFrame() | |
####### Address Runtime API Issue to not connecting or fetching from Clinical Trials.gov | |
# Display the DataFrame | |
# evaluate if need to change to collaborator other than top 20 ???? | |
condition_other = plot_condition_others(df) | |
#### Sponsor Only | |
condition_sunburst = plot_condition_sunburst(df) | |
################################################################################ | |
sponsor_tree = plot_sponsor_tree(df) | |
collaborator_tree = None # Initialize to None or any other default value | |
# if df2.empty: | |
# Call the function with the combined dataframe | |
# tree_map_fig = plot_sponsor_collaborator_tree_map(df) | |
if not df2.empty: | |
# Plot the data | |
#collaborator_tree = plot_sponsor_tree(df2) | |
collaborator_tree = plot_collaborator_icicle(df2) | |
#plot_sponsor_collaborator_tree_map | |
# print("All Plot done for Gradio") | |
# return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb, condition_other, condition_sunburst ,sponsor_tree, collaborator_tree | |
return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb, condition_other, condition_sunburst ,sponsor_tree, collaborator_tree | |
##################### Assets ################################################################################### | |
def select_sponsor(s_sponsor_input, s_academia_input): | |
if s_sponsor_input: | |
return s_sponsor_input | |
else: | |
return s_academia_input | |
def select_condition(s_disease_input, s_disease_input_type): | |
if s_disease_input_type.strip(): | |
return s_disease_input_type.strip() | |
else: | |
return s_disease_input | |
async def drug_view(condition, condition_type, s_sponsor_input, s_academia_input): | |
sponsor = select_sponsor(s_sponsor_input, s_academia_input) | |
condition = select_condition(condition, condition_type) | |
status = "Recruiting" | |
# Call gradio_wrapper_nct with appropriate arguments | |
if condition and sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status) | |
elif sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status) | |
elif condition: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status) | |
#### error traps | |
if html_table_drugs is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
###### Convert the HTML table to a pandas DataFrame | |
df = pd.read_html(html_table_drugs)[0] | |
####### Address Runtime API Issue to not connecting or fetching from Clinical Trials.gov | |
# Display the DataFrame | |
sankey_map_drug = plot_drug_sankey(df) | |
return summary_stats,html_table_drugs, sankey_map_drug | |
########################### Condition################### | |
################## ######################################################################################## | |
def select_sponsor_phc(s_sponsor_input_phc, s_academia_input_phc): | |
if s_sponsor_input_phc: | |
return s_sponsor_input_phc | |
else: | |
return s_academia_input_phc | |
def select_condition_phc(s_disease_input_phc, s_disease_input_type_phc): | |
if s_disease_input_type_phc.strip(): | |
return s_disease_input_type_phc.strip() | |
else: | |
return s_disease_input_phc | |
async def disease_view_phc(condition, condition_type, s_sponsor_input, s_academia_input): | |
sponsor = select_sponsor_phc(s_sponsor_input, s_academia_input ) | |
condition = select_condition_phc(condition, condition_type) | |
status = "Recruiting" | |
# Call gradio_wrapper_nct with appropriate arguments | |
if condition and sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status) | |
elif sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status) | |
elif condition: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status) | |
#### error traps | |
if html_table_conditions is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None | |
# Convert the HTML table to a pandas DataFrame | |
df = pd.read_html(html_table_conditions)[0] | |
####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov | |
tree_map_cond_nct = plot_condition_treemap_nct(df) | |
return summary_stats, html_table_conditions, tree_map_cond_nct | |
# return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map | |
################## Trial ######################################################################################## | |
def select_sponsor_phs(s_sponsor_input_phs, s_academia_input_phs): | |
if s_sponsor_input_phs: | |
return s_sponsor_input_phs | |
else: | |
return s_academia_input_phs | |
def select_condition_phs(s_disease_input_phs, s_disease_input_type_phs): | |
if s_disease_input_type_phs.strip(): | |
return s_disease_input_type_phs.strip() | |
else: | |
return s_disease_input_phs | |
async def disease_view_phs(condition, condition_type, s_sponsor_input, s_academia_input): | |
sponsor = select_sponsor_phs(s_sponsor_input, s_academia_input ) | |
condition = select_condition_phs(condition, condition_type) | |
status = "Recruiting" | |
# Call gradio_wrapper_nct with appropriate arguments | |
if condition and sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status) | |
elif sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status) | |
elif condition: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status) | |
#### error traps | |
if html_table_conditions is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None | |
# Convert the HTML table to a pandas DataFrame | |
df = pd.read_html(html_table_conditions)[0] | |
####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov | |
#tree_map_cond_nct = plot_condition_treemap_nct(df) | |
nct_org_map = plot_nct2org_icicle(df) | |
return summary_stats, html_table_conditions, nct_org_map | |
# return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map | |
##################################################### New Trials ###################################### | |
def select_sponsor_phs_n(s_sponsor_input_phs, s_academia_input_phs): | |
if s_sponsor_input_phs: | |
return s_sponsor_input_phs | |
else: | |
return s_academia_input_phs | |
def select_condition_phs_n(s_disease_input_phs, s_disease_input_type_phs): | |
if s_disease_input_type_phs.strip(): | |
return s_disease_input_type_phs.strip() | |
else: | |
return s_disease_input_phs | |
#################################################################################### | |
async def disease_view_phs_n(condition, condition_type, s_sponsor_input, s_academia_input): | |
sponsor = select_sponsor_phs_n(s_sponsor_input, s_academia_input ) | |
condition = select_condition_phs_n(condition, condition_type) | |
status = "Not yet recruiting" | |
# Call gradio_wrapper_nct with appropriate arguments | |
if condition and sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status) | |
elif sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status) | |
elif condition: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status) | |
#### error traps | |
if html_table_conditions is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None | |
# Convert the HTML table to a pandas DataFrame | |
df = pd.read_html(html_table_conditions)[0] | |
####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov | |
tree_map_cond_nct = plot_condition_treemap_nct(df) | |
# nct_org_map = plot_nct2org_icicle(df) | |
######################################### error traps | |
# if html_table_add is None: | |
# return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
df2 = pd.read_html(html_table_conditions)[0] | |
bubble_map_trials = plot_trial_bubblemap(df2) | |
# return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map,bubble_map_trials | |
return summary_stats, html_table_conditions, tree_map_cond_nct, bubble_map_trials | |
############################################### Completed Trials #################################################### | |
def select_sponsor_phs_c(s_sponsor_input_phs, s_academia_input_phs): | |
if s_sponsor_input_phs: | |
return s_sponsor_input_phs | |
else: | |
return s_academia_input_phs | |
def select_condition_phs_c(s_disease_input_phs, s_disease_input_type_phs): | |
if s_disease_input_type_phs.strip(): | |
return s_disease_input_type_phs.strip() | |
else: | |
return s_disease_input_phs | |
async def disease_view_phs_c(condition, condition_type, s_sponsor_input, s_academia_input): | |
sponsor = select_sponsor_phs_c(s_sponsor_input, s_academia_input ) | |
condition = select_condition_phs_c(condition, condition_type) | |
status = "Completed" | |
# Call gradio_wrapper_nct with appropriate arguments | |
if condition and sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status) | |
elif sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status) | |
elif condition: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status) | |
#### error traps | |
if html_table_conditions is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None | |
# Convert the HTML table to a pandas DataFrame | |
df = pd.read_html(html_table_conditions)[0] | |
####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov | |
tree_map_cond_nct = plot_condition_treemap_nct(df) | |
nct_org_map = plot_nct2org_icicle(df) | |
######################################### error traps | |
# if html_table_add is None: | |
# return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
df3 = pd.read_html(html_table_conditions)[0] | |
bubble_map_trials = plot_trial_bubblemap_comp(df3) | |
###### Convert the HTML table to a pandas DataFrame | |
df2 = pd.read_html(html_table_drugs)[0] | |
####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov | |
# Display the DataFrame | |
sankey_map_drug = plot_drug_sankey(df2) | |
#return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map,bubble_map_trials | |
return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map,sankey_map_drug,bubble_map_trials | |
### ############### Country ######################################################### | |
def select_sponsor_con(sponsor_input_con, academia_input_con): | |
if sponsor_input_con: | |
return sponsor_input_con | |
else: | |
return academia_input_con | |
def select_condition_con(condition_input, condition_input_type): | |
if condition_input_type.strip(): | |
return condition_input_type.strip() | |
else: | |
return condition_input | |
async def condition_view(condition, country, condition_type, sponsor_input_con, academia_input_con): | |
condition = select_condition_con(condition, condition_type) | |
sponsor = select_sponsor_con(sponsor_input_con, academia_input_con) | |
status = "Recruiting" | |
summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status) | |
# Convert the HTML table to a pandas DataFrame | |
# Check if html_table_add is None before converting to DataFrame | |
#### error traps | |
if html_table_add is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
df = pd.read_html(html_table_add)[0] | |
df2 = pd.read_html(html_table_add)[0] | |
#print(df) | |
trial_country = plot_trial_country_map(df2) | |
return summary_stats_sites, html_table_add,trial_country | |
############### Site ######################################################################################################### | |
def select_sponsor_con_s(sponsor_input_con_s, academia_input_con_s): | |
if sponsor_input_con_s: | |
return sponsor_input_con_s | |
else: | |
return academia_input_con_s | |
def select_condition_con(condition_input, condition_input_type): | |
if condition_input_type.strip(): | |
return condition_input_type.strip() | |
else: | |
return condition_input | |
async def condition_view_s(condition, country, condition_type, sponsor_input_con_s, academia_input_con_s): | |
condition = select_condition_con(condition, condition_type) | |
sponsor = select_sponsor_con_s(sponsor_input_con_s, academia_input_con_s) | |
status = "Recruiting" | |
summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status) | |
#### error traps | |
if html_table_add is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None,None | |
#### error traps | |
if html_table_add is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None | |
#### error traps | |
if html_table_add is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None | |
# print(html_table_add) | |
df = pd.read_html(html_table_add)[0] | |
#print(df) | |
country_site = plot_trial_site_map(df) | |
site_cond = plot_trial_sites(df) | |
return summary_stats_sites, html_table_add, site_cond,country_site | |
###################################### Timelines ################################################################### | |
def select_sponsor_cont(sponsor_input_con, academia_input_con): | |
if sponsor_input_con: | |
return sponsor_input_con | |
else: | |
return academia_input_con | |
def select_condition_cont(condition_input, condition_input_type): | |
if condition_input_type.strip(): | |
return condition_input_type.strip() | |
else: | |
return condition_input | |
async def condition_viewt(condition, country, condition_type, sponsor_input_con, academia_input_con): | |
condition = select_condition_cont(condition, condition_type) | |
sponsor = select_sponsor_cont(sponsor_input_con, academia_input_con) | |
status = "Recruiting" | |
summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status) | |
# Convert the HTML table to a pandas DataFrame | |
######################################### error traps | |
if html_table_add is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
df = pd.read_html(html_table_add)[0] | |
bubble_map_trials = plot_trial_bubblemap(df) | |
return summary_stats_sites, html_table,bubble_map_trials | |
############### Find Site Map ######################################################################################################### | |
def select_sponsor_con_map(sponsor_input_con_map, academia_input_con_map): | |
if sponsor_input_con_map: | |
return sponsor_input_con_map | |
else: | |
return academia_input_con_map | |
async def condition_view_map(condition, country, sponsor_input_con_map, academia_input_con_map): | |
#condition = condition.strip() # Remove leading and trailing spaces | |
sponsor = select_sponsor_con_map(sponsor_input_con_map, academia_input_con_map) | |
status = "Recruiting" | |
summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status ) | |
# print(html_table_add) | |
#### error traps | |
if html_table_add is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
df = pd.read_html(html_table_add)[0] | |
world_map = plot_trial_site_world_map(df) | |
if world_map is None: | |
return "Sorry, the plot could not be generated. Please try again by slecting a country!", None, None | |
return summary_stats_sites, html_table_add, world_map | |
### ########################################Find Trial Eligibility########################################################################### | |
############################################################################ END VIEWS######################## | |
#### To remove the inclusion exclusion numbers duplicating in text | |
import re | |
def format_html_list(html_string): | |
# Split the input string by numbers followed by a period and a space | |
items = re.split(r'(\d+\.\s)', html_string) | |
# Combine the split items into a list of strings, keeping the original numbers | |
formatted_items = [number + text for number, text in zip(items[1::2], items[2::2])] | |
# Remove unwanted characters from each item | |
formatted_items = [re.sub(r':\.', '', item) for item in formatted_items] | |
formatted_items = [re.sub(r'General\.', '', item) for item in formatted_items] | |
# Filter out empty list items | |
formatted_items = [item for item in formatted_items if item.strip()] | |
# Check if the first item is empty and remove it if so | |
if formatted_items[0].split('. ', 1)[1].strip() == '': | |
formatted_items = formatted_items[1:] | |
# Renumber the items | |
# formatted_items = [f"{i+1}. {item.split('. ', 1)[1]}" for i, item in enumerate(formatted_items)] | |
# Renumber the items | |
formatted_items = [ | |
f"{i+1}. {item.split('. ', 1)[1]}" if len(item.split('. ', 1)) > 1 else item | |
for i, item in enumerate(formatted_items) | |
] | |
# Remove extra periods | |
formatted_items = [re.sub(r'\.{2,}', '.', item) for item in formatted_items] | |
# Join the list items with line breaks to create an HTML string | |
formatted_html = "<br>".join(formatted_items) | |
return formatted_html | |
######################################################################################## | |
def format_html_list_old(html_string): | |
# Split the input string by numbers followed by a period and a space | |
items = re.split(r'(\d+\.\s)', html_string) | |
# Combine the split items into a list of strings, removing the original numbers | |
formatted_items = [number + text for number, text in zip(items[1::2], items[2::2])] | |
# Remove unwanted characters from each item | |
formatted_items = [re.sub(r':\.', '', item) for item in formatted_items] | |
formatted_items = [re.sub(r'General\.', '', item) for item in formatted_items] | |
# Filter out empty list items | |
formatted_items = [item for item in formatted_items if item.strip()] | |
# Join the list items with line breaks to create an HTML string | |
formatted_html = "<br>".join(formatted_items) | |
return formatted_html | |
# Function to convert a list of formatted criteria to a dictionary | |
# ############################# Hugging Face Model Invoke #################################### | |
import os | |
import io | |
from IPython.display import Image, display, HTML | |
from PIL import Image | |
import base64 | |
import gradio as gr | |
import requests, json | |
################################################################ NLP Model ####################################### | |
# API Token and Model Name | |
API_TOKEN = "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" | |
MODEL_NAME = "d4data/biomedical-ner-all" | |
#MODEL_NAME = "kormilitzin/en_core_spancat_med7_lg" | |
############################################################################################################################ | |
def merge_tokens(tokens): | |
if not tokens: | |
return [] | |
merged_tokens = [] | |
for token in tokens: | |
if (merged_tokens and | |
token['entity_group'] == merged_tokens[-1]['entity_group']): | |
# If current token continues the entity of the last one, merge them | |
last_token = merged_tokens[-1] | |
last_token['word'] += token['word'].replace('##', '') | |
last_token['end'] = token['end'] | |
last_token['score'] = (last_token['score'] + token['score']) / 2 | |
else: | |
# Otherwise, add the token to the list | |
merged_tokens.append(token) | |
return merged_tokens | |
# Function to call Hugging Face API################################################################################ | |
def get_completion(text): | |
headers = {"Authorization": f"Bearer {API_TOKEN}"} | |
data = {"inputs": text, "max_tokens": 2048} # Set the max_tokens parameter | |
#data = {"inputs": text, "max_tokens": 512} # Set the max_tokens parameter | |
data = {"inputs": text} # Set the max_tokens parameter | |
response = requests.post(f"https://api-inference.huggingface.co/models/{MODEL_NAME}", headers=headers, json=data, timeout= 90) | |
# Print the response content | |
print(f"From Hugging Face API: {response.text}") | |
return response.json() | |
# Split texts when longer than 2048 tokens | |
from transformers import AutoTokenizer | |
# Load the tokenizer for the model | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
##################################################################################### | |
# Function to split the input text into chunks | |
def split_input_text(text, max_tokens): | |
tokens = tokenizer.encode(text) | |
token_chunks = [] | |
for i in range(0, len(tokens), max_tokens): | |
token_chunk = tokens[i:i + max_tokens] | |
token_chunks.append(tokenizer.decode(token_chunk)) | |
# Debug: Print the token length of the current chunk | |
print(f"Token length of chunk {len(token_chunks)}: {len(token_chunk)}") | |
return token_chunks | |
# Function to remove HTML tags from the input text | |
def remove_html_tags(text): | |
clean_text = re.sub('<[^>]*>', ' ', text) | |
return clean_text | |
def ner_oll (input): | |
max_retries = 10 | |
retries = 0 | |
output = None | |
# Remove HTML tags from the input text | |
input_no_html = remove_html_tags(input) | |
# Split the input text into chunks | |
input_chunks = split_input_text(input_no_html, 500) | |
# Initialize an empty list to store the merged tokens from all chunks | |
all_merged_tokens = [] | |
# Debug: Print the number of chunks created | |
print(f"Number of input chunks: {len(input_chunks)}") | |
api_calls = 0 # Counter for API calls | |
for input_chunk in input_chunks: | |
while retries < max_retries: | |
try: | |
output = get_completion(input_chunk) | |
#print(output) | |
api_calls += 1 # Increment the API calls counter | |
# Check if the output is empty | |
if output: | |
# Check if the output contains an error message | |
if 'error' in output: | |
print("Error in API response, retrying...") | |
retries += 1 | |
continue | |
break | |
else: | |
raise ValueError("Empty output") | |
except Exception as e: | |
print(f"Error in API call: {e}") | |
retries += 1 | |
if output is None or 'error' in output: | |
print("Failed to get API response after maximum 10 retries.") | |
return {"text": input, "entities": []} | |
merged_tokens = merge_tokens(output) | |
# Debug: Print the merged tokens for the current output | |
print(f"Merged tokens for chunk {api_calls}: {merged_tokens}") | |
all_merged_tokens.extend(merged_tokens) | |
print(all_merged_tokens) | |
# Debug: Print the number of API calls made | |
# print(f"Number of API calls made: {api_calls}") | |
return {"text": input, "entities": all_merged_tokens} | |
########################################################################################################## | |
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification | |
# Load the tokenizer and model for the pipeline | |
tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all") | |
model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all") | |
# Create the NER pipeline | |
pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") | |
############################################################################################## | |
def ner(input): | |
max_retries = 10 | |
retries = 0 | |
output = None | |
# Initialize an empty list to store the merged tokens from all chunks | |
all_merged_tokens = [] | |
# Remove HTML tags from the input text | |
input_no_html = remove_html_tags(input) | |
# Split the input text into chunks | |
input_chunks = split_input_text(input_no_html, 500) | |
# Initialize an empty list to store the entities from all chunks | |
all_entities = [] | |
# Debug: Print the number of chunks created | |
print(f"Number of input chunks: {len(input_chunks)}") | |
for input_chunk in input_chunks: | |
while retries < max_retries: | |
try: | |
output = pipe(input_chunk) | |
if output: | |
break | |
else: | |
raise ValueError("Empty output") | |
except Exception as e: | |
print(f"Error in pipeline call: {e}") | |
retries += 1 | |
if output is None: | |
print("Failed to get pipeline output after maximum 10 retries.") | |
return {"text": input, "entities": []} | |
# Remove unwanted entity groups | |
filtered_output = [ | |
entity for entity in output | |
if entity['entity_group'] not in [ | |
'Coreference', | |
'Detailed_description', | |
'Lab_value', | |
# 'Diagnostic_procedure', | |
'Personal_background', | |
'History', | |
'Family_history', | |
'Outcome', | |
'Subject', | |
'Date', | |
'Distance', | |
'Severity', | |
'Activity', | |
'Duration', | |
'Administration', | |
'Sex', | |
'Age', | |
'Sign_symptom', | |
'Therapeutic_procedure', | |
'Biological_structure' | |
] | |
] | |
# Debug: Print the entities for the current output after filtering | |
print(f"Filtered entities for chunk {len(all_entities) + 1}: {filtered_output}") | |
merged_tokens = merge_tokens(filtered_output) | |
all_merged_tokens.extend(merged_tokens) | |
print(all_entities) | |
return {"text": input, "entities": all_merged_tokens} | |
############################################################################## | |
def ner_unflitered(input): | |
max_retries = 10 | |
retries = 0 | |
output = None | |
# Initialize an empty list to store the merged tokens from all chunks | |
all_merged_tokens = [] | |
# Remove HTML tags from the input text | |
input_no_html = remove_html_tags(input) | |
# Split the input text into chunks | |
input_chunks = split_input_text(input_no_html, 500) | |
# Initialize an empty list to store the entities from all chunks | |
all_entities = [] | |
# Debug: Print the number of chunks created | |
print(f"Number of input chunks: {len(input_chunks)}") | |
for input_chunk in input_chunks: | |
while retries < max_retries: | |
try: | |
output = pipe(input_chunk) | |
# Check if the output is empty | |
if output: | |
#print(output) | |
break | |
else: | |
raise ValueError("Empty output") | |
except Exception as e: | |
print(f"Error in pipeline call: {e}") | |
retries += 1 | |
if output is None: | |
print("Failed to get pipeline output after maximum 10 retries.") | |
return {"text": input, "entities": []} | |
# Debug: Print the entities for the current output | |
print(f"Entities for chunk {len(all_entities) + 1}: {output}") | |
merged_tokens = merge_tokens(output) | |
# Debug: Print the merged tokens for the current output | |
#print(f"Merged tokens for chunk {api_calls}: {merged_tokens}") | |
all_merged_tokens.extend(merged_tokens) | |
#all_entities.extend(output) | |
print(all_entities) | |
return {"text": input, "entities": all_merged_tokens} | |
############################################################################################################################################# | |
async def trial_view_map(nctID): | |
nctID = nctID.strip() # Remove leading and trailing spaces | |
###### # Check if nctID is valid | |
if not nctID.startswith('NCT') or not (10 <= len(nctID) <= 12): | |
return "Not a Valid NCT ID has been entered", None, None | |
status = "Recruiting" | |
#summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(NCTId=nctID) | |
summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(NCTId=nctID, status = status) | |
#### error traps | |
#trial_buttons.click(trial_view_map, inputs=[nctID_inputs], outputs=[summary_block_trial_map, world_map]) | |
################################################################################################################################# | |
if html_table_add is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
df = pd.read_html(html_table_add)[0] | |
world_map = plot_trial_site_world_map(df) | |
if world_map is None: | |
return "Sorry, the plot could not be generated. Please try again by slecting a country!", None, None | |
return summary_stats_sites, world_map, html_table_add | |
#return html_table, formatted_html_inclusions,formatted_html_exclusions,world_map | |
#################################################################################################################################################### | |
async def trial_view (nctID): | |
nctID = nctID.strip() # Remove leading and trailing spaces | |
###### # Check if nctID is valid | |
if not nctID.startswith('NCT') or not (10 <= len(nctID) <= 12): | |
return "Not a Valid NCT ID has been entered", None, None | |
status = "Recruiting" | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(NCTId=nctID, status = status) | |
#### error traps | |
formatted_inclusions = get_formatted_inclusion_criteria(nctID) | |
print(formatted_inclusions) | |
formatted_exclusions = get_formatted_exclusion_criteria(nctID) | |
print( formatted_exclusions) | |
# Check if both formatted_inclusions and formatted_exclusions are empty | |
if not formatted_inclusions and not formatted_exclusions: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
inclusion_concepts = ner(formatted_inclusions) | |
exclusion_concepts = ner(formatted_exclusions) | |
#return html_table, formatted_html_inclusions, formatted_html_exclusions,inclusion_concepts,exclusion_concepts | |
return html_table, inclusion_concepts,exclusion_concepts | |
############################### Design the interface#################################################################################### | |
## Added after Spet 27 Failure | |
from gradio.components import Dropdown | |
############################################################################################################################################################################### | |
trial_app = gr.Blocks() | |
with trial_app: | |
gr.Markdown("<center style='font-size: 34px;'><b>Trial Connect</b></center>") | |
with gr.Tabs(): | |
############################################################ Sponsors ###################################################################### | |
with gr.TabItem("Sponsors"): | |
# 1st Row | |
with gr.Row(): | |
########################################################################################################## | |
with gr.Column(): | |
sponsor_input = gr.Dropdown( | |
############################################################################ | |
choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "BioNTech SE","Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ | |
"CSL Behring", "Daiichi Sankyo, Inc.",\ | |
"Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ | |
"Janssen Research & Development, LLC","Merck Sharp & Dohme LLC","ModernaTX, Inc.", \ | |
"Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], | |
label="Choose a Pharma " | |
) | |
############################################################################################################################################################################################################# | |
####################################################################################################################################################### | |
with gr.Column(): | |
#disease_input = gr.inputs.Dropdown( | |
disease_input = gr.Dropdown( | |
choices=[ "Cardiovascular Diseases", \ | |
"Depressive Disorder","Digestive System Diseases","Endocrine System Diseases",\ | |
"Eye Diseases","Heart Diseases", "Immune System Diseases", "Infections","Liver Diseases", \ | |
"Metabolic Diseases","Neoplasms","Nervous System Diseases","Oncology" , \ | |
"Renal Diseases", "Respiratory Tract Diseases", \ | |
"Skin Diseases","Stress Disorder", "Virology" \ | |
], | |
label="Choose a Disease Category" | |
) | |
############################################ | |
with gr.Column(): | |
disease_input_text = gr.Textbox(lines=1, label="Or Type the Disease Name:") | |
###################################################################################################################################################################### | |
with gr.Column(): | |
academia_input = gr.Dropdown( | |
choices = [ "Baylor Breast Cancer Center","Beth Israel Deaconess Medical Center", "City of Hope Medical Center" ,"Cornell University", "Columbia University","Children's Oncology Group",\ | |
"Dana-Farber Cancer Institute", "Dartmouth College",\ | |
"Duke University", "European Institute of Oncology","Fred Hutchinson Cancer Center","Harvard University", "H. Lee Moffitt Cancer Center and Research Institute",\ | |
"John Hopkins University", "Kaiser Permanente", "Massachusetts General Hospital", "Mayo Clinic",\ | |
"M.D. Anderson Cancer Center", "Memorial Sloan Kettering Cancer Center", "National Cancer Institute",\ | |
"Northwestern University", "NYU Langone Health","Ohio State University Comprehensive Cancer Center","Rutgers, The State University of New Jersey","Stanford University", \ | |
"Tufts University","University of Washington",\ | |
"Vanderbilt-Ingram Cancer Center", "Yale University"], | |
label = "Or Choose a Research Institute") | |
############################################################################################################################################## | |
################# # 3rd row################################################################# | |
with gr.Row(): | |
#with gr.Column(): | |
sponsor_button = gr.Button("Show Sponsor Collaborators") | |
# Then, create the clear button and add the dropdown input to it | |
clear_btn = gr.ClearButton() | |
clear_btn.add(sponsor_input) | |
clear_btn.add(academia_input) | |
clear_btn.add(disease_input) | |
clear_btn.add(disease_input_text) | |
################################################################################################################################################################################## | |
with gr.Row(): | |
with gr.Column(): | |
summary_block = gr.HTML(label="Lead Sponsors for Recruiting Clinical Trials:" ) | |
with gr.Column(): | |
summary_block_collbs = gr.HTML(label="Collaborators in Recruiting Clinical Trials:" ) | |
################################################################################################################################### | |
with gr.Row(): | |
with gr.Column(): | |
condition_others = gr.Plot() | |
with gr.Column(): | |
condition_sunbursts = gr.Plot() | |
with gr.Column(): | |
sponsor_trees = gr.Plot() | |
with gr.Column(): | |
collaborator_trees = gr.Plot() | |
#################################################################################################################################################### | |
with gr.Row(): | |
gr.HTML('<h1 style="font-size:24px; color:black; font-weight:bold;">Sponsor Only Trials</h1>') | |
with gr.Row(): | |
output_block_conditions = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors") | |
with gr.Row(): | |
gr.HTML('<h1 style="font-size:24px; color:black; font-weight:bold;">Collaborator and Sponsor Trials</h1>') | |
with gr.Row(): | |
output_block_conditions_collbs = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Collaborators") | |
clear_btn.add(summary_block) | |
clear_btn.add(summary_block_collbs) | |
clear_btn.add(output_block_conditions) | |
clear_btn.add(output_block_conditions_collbs) | |
clear_btn.add(condition_sunbursts) | |
clear_btn.add(sponsor_trees) | |
# clear_btn.add(collaborator_trees) | |
clear_btn.add(condition_others) | |
############################################################################################################################################## | |
################################################################ Conditions ############################################################################################### | |
with gr.TabItem("Conditions"): | |
with gr.Row(): | |
with gr.Column(): | |
#### ######################################################################################################################################################################################################### | |
s_sponsor_input_phc = gr.Dropdown( | |
############################################################################ | |
choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "BioNTech SE","Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ | |
"CSL Behring", "Daiichi Sankyo, Inc.",\ | |
"Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ | |
"Janssen Research & Development, LLC","Merck Sharp & Dohme LLC","ModernaTX, Inc.", \ | |
"Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], | |
label="Choose a Pharma " | |
) | |
############################################################################################################## | |
with gr.Column(): | |
s_disease_input_phc = gr.Dropdown( | |
choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ | |
"Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ | |
"Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ | |
"Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ | |
"Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ | |
"Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ | |
"Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ | |
"Urothelial Carcinoma",\ | |
"Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ | |
"Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ | |
" Major","Metabolic", "Generalized Pustular Psoriasis",\ | |
"Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ | |
"Liver Cirrhosis", \ | |
"NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ | |
"Psychological Trauma","Renal", "Respiratory",\ | |
"Schizophrenia", "PTSD", \ | |
"Venous Thromboembolism", "Wet"], | |
label="Choose a Condition" | |
) | |
################################################################################################################################################################# | |
with gr.Column(): | |
s_disease_input_type_phc = gr.Textbox(lines=1, label="Or Type a Condition:") | |
###################################################################################################################################################################### | |
with gr.Column(): | |
s_academia_input_phc = gr.Dropdown( | |
choices = [ "Baylor Breast Cancer Center","Beth Israel Deaconess Medical Center", "City of Hope Medical Center" ,"Cornell University", "Columbia University","Children's Oncology Group",\ | |
"Dana-Farber Cancer Institute", "Dartmouth College",\ | |
"Duke University", "European Institute of Oncology","Fred Hutchinson Cancer Center","Harvard University", "H. Lee Moffitt Cancer Center and Research Institute",\ | |
"John Hopkins University", "Kaiser Permanente", "Massachusetts General Hospital", "Mayo Clinic",\ | |
"M.D. Anderson Cancer Center", "Memorial Sloan Kettering Cancer Center", "National Cancer Institute",\ | |
"Northwestern University", "NYU Langone Health","Ohio State University Comprehensive Cancer Center","Rutgers, The State University of New Jersey","Stanford University", \ | |
"Tufts University","University of Washington",\ | |
"Vanderbilt-Ingram Cancer Center", "Yale University"], | |
label = "Or Choose a Research Institute") | |
############################################################################################################################################ | |
###################################################################################################################################################################### | |
# 3rd Row | |
with gr.Row(): #academia_input = gr.inputs.Dropdown( | |
s_button_phc = gr.Button("Show Conditions") | |
# Then, create the clear button and add the dropdown input to it | |
clear_btn_phc = gr.ClearButton() | |
clear_btn_phc.add(s_sponsor_input_phc) | |
clear_btn_phc.add(s_academia_input_phc) | |
clear_btn_phc.add(s_disease_input_phc) | |
clear_btn_phc.add(s_disease_input_type_phc) | |
######################################################################################################################################################################### | |
with gr.Row(): | |
summary_block_phc = gr.HTML(label="Conditions Now Recruiting for Clinical Trials:" ) | |
############################################################################################################################################################# | |
# with gr.Row(): | |
# nct_org_map = gr.Plot() | |
########################################################################################################################################################## | |
#################################################################################################################################################### | |
# with gr.Row(): | |
# gr.HTML('<h1 style="font-size:24px; color:black; font-weight:bold;">Conditions by Trials and Phase</h1>') | |
with gr.Row(): | |
# with gr.Column(): | |
tree_map_cond_nct = gr.Plot() | |
with gr.Row(): | |
output_block_conditions_phc = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors") | |
clear_btn_phc.add(summary_block_phc) | |
clear_btn_phc.add(output_block_conditions_phc) | |
clear_btn_phc.add(tree_map_cond_nct) | |
#clear_btn_phs.add(nct_org_map) | |
######################################################################### | |
################################################################ Trials ############################################################################################### | |
with gr.TabItem("Trials"): | |
with gr.Row(): | |
with gr.Column(): | |
#### ######################################################################################################################################################################################################### | |
s_sponsor_input_phs = gr.Dropdown( | |
############################################################################ | |
choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "BioNTech SE","Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ | |
"CSL Behring", "Daiichi Sankyo, Inc.",\ | |
"Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ | |
"Janssen Research & Development, LLC","Merck Sharp & Dohme LLC","ModernaTX, Inc.", \ | |
"Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], | |
label="Choose a Pharma " | |
) | |
############################################################################################################## | |
with gr.Column(): | |
s_disease_input_phs = gr.Dropdown( | |
choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ | |
"Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ | |
"Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ | |
"Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ | |
"Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ | |
"Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ | |
"Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ | |
"Urothelial Carcinoma",\ | |
"Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ | |
"Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ | |
" Major","Metabolic", "Generalized Pustular Psoriasis",\ | |
"Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ | |
"Liver Cirrhosis", \ | |
"NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ | |
"Psychological Trauma","Renal", "Respiratory",\ | |
"Schizophrenia", "PTSD", \ | |
"Venous Thromboembolism", "Wet"], | |
label="Choose a Condition" | |
) | |
################################################################################################################################################################# | |
with gr.Column(): | |
s_disease_input_type_phs = gr.Textbox(lines=1, label="Or Type a Condition:") | |
###################################################################################################################################################################### | |
with gr.Column(): | |
s_academia_input_phs = gr.Dropdown( | |
choices = [ "Baylor Breast Cancer Center","Beth Israel Deaconess Medical Center", "City of Hope Medical Center" ,"Cornell University", "Columbia University","Children's Oncology Group",\ | |
"Dana-Farber Cancer Institute", "Dartmouth College",\ | |
"Duke University", "European Institute of Oncology","Fred Hutchinson Cancer Center","Harvard University", "H. Lee Moffitt Cancer Center and Research Institute",\ | |
"John Hopkins University", "Kaiser Permanente", "Massachusetts General Hospital", "Mayo Clinic",\ | |
"M.D. Anderson Cancer Center", "Memorial Sloan Kettering Cancer Center", "National Cancer Institute",\ | |
"Northwestern University", "NYU Langone Health","Ohio State University Comprehensive Cancer Center","Rutgers, The State University of New Jersey","Stanford University", \ | |
"Tufts University","University of Washington",\ | |
"Vanderbilt-Ingram Cancer Center", "Yale University"], | |
label = "Or Choose a Research Institute") | |
############################################################################################################################################ | |
###################################################################################################################################################################### | |
# 3rd Row | |
with gr.Row(): #academia_input = gr.inputs.Dropdown( | |
s_button_phs = gr.Button("Show Trials") | |
# Then, create the clear button and add the dropdown input to it | |
clear_btn_phs = gr.ClearButton() | |
clear_btn_phs.add(s_sponsor_input_phs) | |
clear_btn_phs.add(s_academia_input_phs) | |
clear_btn_phs.add(s_disease_input_phs) | |
clear_btn_phs.add(s_disease_input_type_phs) | |
######################################################################################################################################################################### | |
with gr.Row(): | |
summary_block_phs = gr.HTML(label="Conditions and Sponsors Now Recruiting for Clinical Trials:" ) | |
############################################################################################################################################################# | |
with gr.Row(): | |
nct_org_map = gr.Plot() | |
########################################################################################################################################################## | |
#################################################################################################################################################### | |
# with gr.Row(): | |
# gr.HTML('<h1 style="font-size:24px; color:black; font-weight:bold;">Recruiting Trials by Phase</h1>') | |
# with gr.Row(): | |
# with gr.Column(): | |
# tree_map_cond_nct = gr.Plot() | |
with gr.Row(): | |
output_block_conditions_phs = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors") | |
clear_btn_phs.add(summary_block_phs) | |
clear_btn_phs.add(output_block_conditions_phs) | |
clear_btn_phs.add(nct_org_map) | |
######################################################################### | |
##################################################################### Country##################################################### | |
with gr.TabItem("Countries"): | |
########################################################################## | |
with gr.Row(): | |
############################################################################### | |
with gr.Column(): | |
sponsor_input_con = gr.Dropdown( | |
############################################################################ | |
choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "BioNTech SE","Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ | |
"CSL Behring", "Daiichi Sankyo, Inc.",\ | |
"Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ | |
"Janssen Research & Development, LLC","Merck Sharp & Dohme LLC","ModernaTX, Inc.", \ | |
"Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], | |
label="Choose a Pharma " | |
) | |
############################################################################################################################################################################################### | |
with gr.Column(): | |
condition_input_con = gr.Dropdown( | |
choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ | |
"Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ | |
"Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ | |
"Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ | |
"Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ | |
"Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ | |
"Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ | |
"Urothelial Carcinoma",\ | |
"Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ | |
"Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ | |
" Major","Metabolic", "Generalized Pustular Psoriasis",\ | |
"Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ | |
"Liver Cirrhosis", \ | |
"NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ | |
"Psychological Trauma","Renal", "Respiratory",\ | |
"Schizophrenia", "PTSD", \ | |
"Venous Thromboembolism", "Wet"], | |
label="Choose a Condition") | |
############################################################################################################################################################## | |
############################################################################### | |
with gr.Column(): | |
country_input_tr = gr.Dropdown( | |
choices=["United States", "Argentina","Australia", "Austria","Belgium","Brazil","Bulgaria","Canada","Columbia","China", "Chile","Croatia","Czechia","Denmark","Finland","France", "Greece","Germany","Hungary",\ | |
"India","Ireland","Israel","Italy","Japan","Korea","Latvia",\ | |
"Malaysia","Mexico","Netherlands", \ | |
"New Zealand","Norway","Poland","Portugal","Romania", "Serbia","Singapore","Slovakia","Spain", "South Africa","Sweden", "Switzerland","Taiwan","Turkey",\ | |
"United Kingdom"\ | |
], | |
label="Choose a Country") | |
########################################################################################################################################################################################### | |
############################################################################### | |
with gr.Column(): | |
condition_input_type = gr.Textbox(lines=1, label="Or Type a Condition:") | |
############################################################################### | |
with gr.Column(): | |
academia_input_con = gr.Dropdown( | |
choices = [ "Baylor Breast Cancer Center","Beth Israel Deaconess Medical Center", "City of Hope Medical Center" ,"Cornell University", "Columbia University","Children's Oncology Group",\ | |
"Dana-Farber Cancer Institute", "Dartmouth College",\ | |
"Duke University", "European Institute of Oncology","Fred Hutchinson Cancer Center","Harvard University", "H. Lee Moffitt Cancer Center and Research Institute",\ | |
"John Hopkins University", "Kaiser Permanente", "Massachusetts General Hospital", "Mayo Clinic",\ | |
"M.D. Anderson Cancer Center", "Memorial Sloan Kettering Cancer Center", "National Cancer Institute",\ | |
"Northwestern University", "NYU Langone Health","Ohio State University Comprehensive Cancer Center","Rutgers, The State University of New Jersey","Stanford University", \ | |
"Tufts University","University of Washington",\ | |
"Vanderbilt-Ingram Cancer Center", "Yale University"], | |
label = " Or Choose a Research Institute") | |
########################################################################################################################################################### | |
with gr.Row(): | |
condition_button = gr.Button("Show Trial Countries") | |
# Then, create the clear button and add the dropdown input to it | |
clear_cn_btn = gr.ClearButton() | |
clear_cn_btn.add(condition_input_con) | |
clear_cn_btn.add(sponsor_input_con) | |
clear_cn_btn.add(academia_input_con) | |
clear_cn_btn.add(condition_input_type) | |
clear_cn_btn.add(country_input_tr) | |
############################################################################################################################################################################## | |
with gr.Row(): | |
summary_block_cond = gr.HTML(label="Countries with Recruiting Clinical Trials:" ) | |
# with gr.Row(): | |
#bubble_map_trial = gr.Plot() | |
with gr.Row(): | |
trial_countries = gr.Plot() | |
with gr.Row(): | |
condition_output = gr.HTML(label="List of Recruiting Trials") | |
# condition_output = gr.Textbox(label="List of Recruiting Trials") | |
## clear output ? | |
clear_cn_btn.add(summary_block_cond) | |
clear_cn_btn.add(trial_countries) | |
#clear_cn_btn.add(bubble_map_trial) | |
clear_cn_btn.add(condition_output) | |
############################################################ Site ############################# ##################################################################### | |
with gr.TabItem("Sites"): | |
############################################################### | |
with gr.Row(): | |
################################################################ | |
with gr.Column(): | |
########################################################################################################################## | |
sponsor_input_con_s = gr.Dropdown( | |
############################################################################ | |
choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "BioNTech SE","Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ | |
"CSL Behring", "Daiichi Sankyo, Inc.",\ | |
"Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ | |
"Janssen Research & Development, LLC","Merck Sharp & Dohme LLC","ModernaTX, Inc.", \ | |
"Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], | |
label="Choose a Pharma " | |
) | |
################################################################################################################### | |
################################################################ | |
with gr.Column(): | |
condition_input_s = gr.Dropdown( | |
choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ | |
"Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ | |
"Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ | |
"Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ | |
"Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ | |
"Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ | |
"Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ | |
"Urothelial Carcinoma",\ | |
"Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ | |
"Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ | |
" Major","Metabolic", "Generalized Pustular Psoriasis",\ | |
"Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ | |
"Liver Cirrhosis", \ | |
"NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ | |
"Psychological Trauma","Renal", "Respiratory",\ | |
"Schizophrenia", "PTSD", \ | |
"Venous Thromboembolism", "Wet"], | |
label="Choose a Condition") | |
################################################################################################################################################# | |
################################################################ | |
with gr.Column(): | |
country_input_s = gr.Dropdown( | |
choices=["United States", "Argentina","Australia", "Austria","Belgium","Brazil","Bulgaria","Canada","Columbia","China", "Chile","Croatia","Czechia","Denmark","Finland","France", "Greece","Germany","Hungary",\ | |
"India","Ireland","Israel","Italy","Japan","Korea","Latvia",\ | |
"Malaysia","Mexico","Netherlands", \ | |
"New Zealand","Norway","Poland","Portugal","Romania", "Serbia","Singapore","Slovakia","Spain", "South Africa","Sweden", "Switzerland","Taiwan","Turkey",\ | |
"United Kingdom"\ | |
], | |
label="Choose a Country") | |
################################################################ | |
with gr.Column(): | |
condition_input_site = gr.Textbox(lines=1, label="Or Type a Condition:") | |
################################################################ | |
with gr.Column(): | |
academia_input_con_s = gr.Dropdown( | |
choices = [ "Baylor Breast Cancer Center","Beth Israel Deaconess Medical Center", "City of Hope Medical Center" ,"Cornell University", "Columbia University","Children's Oncology Group",\ | |
"Dana-Farber Cancer Institute", "Dartmouth College",\ | |
"Duke University", "European Institute of Oncology","Fred Hutchinson Cancer Center","Harvard University", "H. Lee Moffitt Cancer Center and Research Institute",\ | |
"John Hopkins University", "Kaiser Permanente", "Massachusetts General Hospital", "Mayo Clinic",\ | |
"M.D. Anderson Cancer Center", "Memorial Sloan Kettering Cancer Center", "National Cancer Institute",\ | |
"Northwestern University", "NYU Langone Health","Ohio State University Comprehensive Cancer Center","Rutgers, The State University of New Jersey","Stanford University", \ | |
"Tufts University","University of Washington",\ | |
"Vanderbilt-Ingram Cancer Center", "Yale University"], | |
label = "Or Choose a Research Institute") | |
############################################################################################################################################# | |
################################################################################################################################################## | |
############################################################### | |
with gr.Row(): | |
# with gr.Column(): | |
condition_button_s = gr.Button("Show Trial Sites") | |
# Then, create the clear button and add the dropdown input to it | |
clear_cn_btn = gr.ClearButton() | |
clear_cn_btn.add(condition_input_s) | |
clear_cn_btn.add(condition_input_site) | |
clear_cn_btn.add(sponsor_input_con_s) | |
clear_cn_btn.add(academia_input_con_s) | |
clear_cn_btn.add(country_input_s) | |
################################################################################################################################# | |
with gr.Row(): | |
#summary_block = gr.outputs.Textbox(label="Conditions and Sponsors Now Recruiting for Clinical Trials:") | |
summary_block_cond_s = gr.HTML(label="Sites where Sponsors Now Recruiting for Clinical Trials:" ) | |
#with gr.Row(): | |
#world_map = gr.Plot() | |
with gr.Row(): | |
site_cond = gr.Plot() | |
#################################################################################################################################################### | |
with gr.Row(): | |
gr.HTML('<h1 style="font-size:24px; color:black; font-weight:bold;">Recruiting Sites with Trial Ids and Conditions </h1>') | |
with gr.Row(): | |
country_site =gr.Plot() | |
with gr.Row(): | |
condition_output_s = gr.HTML(label="List of Recruiting Trials for Country, Sites") | |
## clear output ? | |
clear_cn_btn.add(summary_block_cond_s) | |
clear_cn_btn.add(condition_output_s) | |
clear_cn_btn.add(country_site) | |
clear_cn_btn.add(site_cond) | |
############################################################ ASSETS ############################################################### | |
with gr.TabItem("Drugs"): | |
############################################################################################ | |
with gr.Row(): | |
with gr.Column(): | |
############################################################################################################################################# | |
s_sponsor_input = gr.Dropdown( | |
############################################################################ | |
choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "BioNTech SE","Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ | |
"CSL Behring", "Daiichi Sankyo, Inc.",\ | |
"Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ | |
"Janssen Research & Development, LLC","Merck Sharp & Dohme LLC","ModernaTX, Inc.", \ | |
"Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], | |
label="Choose a Pharma " | |
) | |
##################################################################################################################################################################################### | |
with gr.Column(): | |
s_disease_input = gr.Dropdown( | |
choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ | |
"Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ | |
"Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ | |
"Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ | |
"Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ | |
"Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ | |
"Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ | |
"Urothelial Carcinoma",\ | |
"Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ | |
"Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ | |
" Major","Metabolic", "Generalized Pustular Psoriasis",\ | |
"Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ | |
"Liver Cirrhosis", \ | |
"NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ | |
"Psychological Trauma","Renal", "Respiratory",\ | |
"Schizophrenia", "PTSD", \ | |
"Venous Thromboembolism", "Wet"], | |
label= "Choose a Condition" | |
) | |
############################################################################################ | |
################################################################################################################################################################# | |
with gr.Column(): | |
s_disease_input_type = gr.Textbox(lines=1, label="Or Type a Condition:") | |
## with gr.Row(): | |
##################################################################################################################################################################################### | |
with gr.Column(): | |
s_academia_input = gr.Dropdown( | |
choices = [ "Baylor Breast Cancer Center","Beth Israel Deaconess Medical Center", "City of Hope Medical Center" ,"Cornell University", "Columbia University","Children's Oncology Group",\ | |
"Dana-Farber Cancer Institute", "Dartmouth College",\ | |
"Duke University", "European Institute of Oncology","Fred Hutchinson Cancer Center","Harvard University", "H. Lee Moffitt Cancer Center and Research Institute",\ | |
"John Hopkins University", "Kaiser Permanente", "Massachusetts General Hospital", "Mayo Clinic",\ | |
"M.D. Anderson Cancer Center", "Memorial Sloan Kettering Cancer Center", "National Cancer Institute",\ | |
"Northwestern University", "NYU Langone Health","Ohio State University Comprehensive Cancer Center","Rutgers, The State University of New Jersey","Stanford University", \ | |
"Tufts University","University of Washington",\ | |
"Vanderbilt-Ingram Cancer Center", "Yale University"], | |
label = "Or Choose a Research Institute") | |
##################################################################################################################################################################################### | |
############################################################################################ | |
with gr.Row(): | |
s_drug_button = gr.Button("Show Drugs") | |
# Then, create the clear button and add the dropdown input to it | |
clear_btn = gr.ClearButton() | |
clear_btn.add(s_sponsor_input) | |
clear_btn.add(s_academia_input) | |
clear_btn.add(s_disease_input) | |
clear_btn.add(s_disease_input_type) | |
with gr.Row(): | |
drug_summary_block = gr.HTML(label="Conditions and Drug Assets, Sponsors Now Recruiting for Clinical Trials:" ) | |
with gr.Row(): | |
sankey_map_drug = gr.Plot() | |
with gr.Row(): | |
drug_output_block_conditions = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors") | |
clear_btn.add(drug_summary_block) | |
clear_btn.add(drug_output_block_conditions) | |
clear_btn.add(sankey_map_drug) | |
############################################################################################################################################################################################ | |
############################################################################# TIMELINES ############################################################################# | |
with gr.TabItem("Timeline"): | |
############################################################## | |
with gr.Row(): | |
with gr.Column(): | |
########################################################################################################################################## | |
sponsor_input_cont = gr.Dropdown( | |
############################################################################ | |
choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "BioNTech SE","Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ | |
"CSL Behring", "Daiichi Sankyo, Inc.",\ | |
"Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ | |
"Janssen Research & Development, LLC","Merck Sharp & Dohme LLC","ModernaTX, Inc.", \ | |
"Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], | |
label="Choose a Pharma " | |
) | |
############################################################################################################################################################### | |
with gr.Column(): | |
condition_input_cont= gr.Dropdown( | |
choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ | |
"Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ | |
"Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ | |
"Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ | |
"Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ | |
"Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ | |
"Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ | |
"Urothelial Carcinoma",\ | |
"Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ | |
"Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ | |
" Major","Metabolic", "Generalized Pustular Psoriasis",\ | |
"Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ | |
"Liver Cirrhosis", \ | |
"NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ | |
"Psychological Trauma","Renal", "Respiratory",\ | |
"Schizophrenia", "PTSD", \ | |
"Venous Thromboembolism", "Wet"], | |
label="Choose a Condition") | |
############################################################################################################################################################### | |
with gr.Column(): | |
country_input_trt = gr.Dropdown( | |
choices=["United States", "Argentina","Australia", "Austria","Belgium","Brazil","Bulgaria","Canada","Columbia","China", "Chile","Croatia","Czechia","Denmark","Finland","France", "Greece","Germany","Hungary",\ | |
"India","Ireland","Israel","Italy","Japan","Korea","Latvia",\ | |
"Malaysia","Mexico","Netherlands", \ | |
"New Zealand","Norway","Poland","Portugal","Romania", "Serbia","Singapore","Slovakia","Spain", "South Africa","Sweden", "Switzerland","Taiwan","Turkey",\ | |
"United Kingdom"\ | |
], | |
label="Choose a Country") | |
############################################################################################################################################################### | |
with gr.Column(): | |
condition_input_typet = gr.Textbox(lines=1, label="Or Type a Condition:") | |
############################################################################################################################################################### | |
with gr.Column(): | |
academia_input_cont = gr.Dropdown( | |
choices = [ "Baylor Breast Cancer Center","Beth Israel Deaconess Medical Center", "City of Hope Medical Center" ,"Cornell University", "Columbia University","Children's Oncology Group",\ | |
"Dana-Farber Cancer Institute", "Dartmouth College",\ | |
"Duke University", "European Institute of Oncology","Fred Hutchinson Cancer Center","Harvard University", "H. Lee Moffitt Cancer Center and Research Institute",\ | |
"John Hopkins University", "Kaiser Permanente", "Massachusetts General Hospital", "Mayo Clinic",\ | |
"M.D. Anderson Cancer Center", "Memorial Sloan Kettering Cancer Center", "National Cancer Institute",\ | |
"Northwestern University", "NYU Langone Health","Ohio State University Comprehensive Cancer Center","Rutgers, The State University of New Jersey","Stanford University", \ | |
"Tufts University","University of Washington",\ | |
"Vanderbilt-Ingram Cancer Center", "Yale University"], | |
label = " Or Choose a Research Institute") | |
with gr.Row(): | |
condition_button_t = gr.Button("Show Timelines") | |
# Then, create the clear button and add the dropdown input to it | |
clear_cn_btn = gr.ClearButton() | |
clear_cn_btn.add(condition_input_cont) | |
clear_cn_btn.add(sponsor_input_cont) | |
clear_cn_btn.add(academia_input_cont) | |
clear_cn_btn.add(condition_input_typet) | |
clear_cn_btn.add(country_input_trt) | |
################################################################################################################################## | |
with gr.Row(): | |
summary_block_condt = gr.HTML(label="Countries with Recruiting Clinical Trials:" ) | |
with gr.Row(): | |
bubble_map_trial = gr.Plot() | |
with gr.Row(): | |
condition_outputt = gr.HTML(label="List of Recruiting Trials") | |
# condition_output = gr.Textbox(label="List of Recruiting Trials") | |
## clear output ? | |
clear_cn_btn.add(summary_block_condt) | |
clear_cn_btn.add(bubble_map_trial) | |
clear_cn_btn.add(condition_outputt) | |
############################################################ Eligibility ############## | |
with gr.TabItem("Eligibility"): | |
with gr.Row(): | |
#nctID_input = gr.inputs.Textbox(lines=1, label="Type Trial NctId:") | |
nctID_input = gr.Textbox(lines=1, label="Type Trial NCT Id,For Example: NCT05512377 or NCT04924075 or NCT04419506 etc.") | |
trial_button = gr.Button("Show Eligibility - 30 Seconds Wait Time") | |
#Then, create the clear button and add the dropdown input to it | |
clear_tn_btn = gr.ClearButton() | |
clear_tn_btn.add(nctID_input ) | |
# with gr.Row(): | |
# with gr.Column(): | |
# formatted_inclusions_output = gr.HTML(label="Inclusions") | |
# with gr.Column(): | |
# formatted_exclusions_output = gr.HTML(label="Exclusions") | |
################################################################################################################################ | |
############################################################################################### | |
with gr.Row(): | |
trial_output = gr.HTML(label="Detail of Recruiting Trials") | |
################################################ | |
with gr.Row(): | |
with gr.Column(): | |
concept_inclusion= gr.HighlightedText(label="Display of Inclusion Concepts") | |
with gr.Column(): | |
concept_exclusion= gr.HighlightedText(label="Display of Exclusion Concepts") | |
clear_tn_btn.add(trial_output) | |
# clear_tn_btn.add(formatted_inclusions_output) | |
# clear_tn_btn.add(formatted_exclusions_output) | |
clear_tn_btn.add(concept_inclusion) | |
clear_tn_btn.add(concept_exclusion) | |
############################################################################################################################################## | |
############################################################ Trial Map ############## | |
with gr.TabItem("Trial Sites"): | |
with gr.Row(): | |
#nctID_input = gr.inputs.Textbox(lines=1, label="Type Trial NctId:") | |
nctID_inputs = gr.Textbox(lines=1, label="Type Trial NCT Id,For Example: NCT05512377") | |
trial_buttons = gr.Button("Show Sites Map: Wait Time 45 seconds") | |
#Then, create the clear button and add the dropdown input to it | |
clear_tn_btns = gr.ClearButton() | |
clear_tn_btns.add(nctID_input ) | |
################################################ | |
with gr.Row(): | |
summary_block_trial_map = gr.HTML(label="Site Map for Recruiting Clinical Trials:" ) | |
with gr.Row(): | |
world_map = gr.Plot() | |
with gr.Row(): | |
trial_output_map = gr.HTML(label="List of Recruiting Country, Sites") | |
clear_tn_btns.add(summary_block_trial_map) | |
clear_tn_btns.add(world_map) | |
clear_tn_btns.add(trial_output_map) | |
############################################################################################################################################## | |
################################################################ Future Trials ############################################################################################### | |
with gr.TabItem("Upcoming Trials"): | |
with gr.Row(): | |
with gr.Column(): | |
#### ######################################################################################################################################################################################################### | |
s_sponsor_input_phs_n = gr.Dropdown( | |
############################################################################ | |
choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "BioNTech SE","Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ | |
"CSL Behring", "Daiichi Sankyo, Inc.",\ | |
"Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ | |
"Janssen Research & Development, LLC","Merck Sharp & Dohme LLC","ModernaTX, Inc.", \ | |
"Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], | |
label="Choose a Pharma " | |
) | |
################################################################################################################################################################################################################## | |
with gr.Column(): | |
s_disease_input_phs_n = gr.Dropdown( | |
choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ | |
"Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ | |
"Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ | |
"Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ | |
"Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ | |
"Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ | |
"Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ | |
"Urothelial Carcinoma",\ | |
"Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ | |
"Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ | |
" Major","Metabolic", "Generalized Pustular Psoriasis",\ | |
"Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ | |
"Liver Cirrhosis", \ | |
"NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ | |
"Psychological Trauma","Renal", "Respiratory",\ | |
"Schizophrenia", "PTSD", \ | |
"Venous Thromboembolism", "Wet"], | |
label="Choose a Condition" | |
) | |
################################################################################################################################################################# | |
with gr.Column(): | |
s_disease_input_type_phs_n = gr.Textbox(lines=1, label="Or Type a Condition:") | |
###################################################################################################################################################################### | |
with gr.Column(): | |
s_academia_input_phs_n = gr.Dropdown( | |
choices = [ "Baylor Breast Cancer Center","Beth Israel Deaconess Medical Center", "City of Hope Medical Center" ,"Cornell University", "Columbia University","Children's Oncology Group",\ | |
"Dana-Farber Cancer Institute", "Dartmouth College",\ | |
"Duke University", "European Institute of Oncology","Fred Hutchinson Cancer Center","Harvard University", "H. Lee Moffitt Cancer Center and Research Institute",\ | |
"John Hopkins University", "Kaiser Permanente", "Massachusetts General Hospital", "Mayo Clinic",\ | |
"M.D. Anderson Cancer Center", "Memorial Sloan Kettering Cancer Center", "National Cancer Institute",\ | |
"Northwestern University", "NYU Langone Health","Ohio State University Comprehensive Cancer Center","Rutgers, The State University of New Jersey","Stanford University", \ | |
"Tufts University","University of Washington",\ | |
"Vanderbilt-Ingram Cancer Center", "Yale University"], | |
label = "Or Choose a Research Institute") | |
############################################################################################################################################ | |
###################################################################################################################################################################### | |
# 3rd Row | |
with gr.Row(): #academia_input = gr.inputs.Dropdown( | |
s_button_phs_n = gr.Button("Show Trials") | |
# Then, create the clear button and add the dropdown input to it | |
clear_btn_phs = gr.ClearButton() | |
clear_btn_phs.add(s_sponsor_input_phs_n) | |
clear_btn_phs.add(s_academia_input_phs_n) | |
clear_btn_phs.add(s_disease_input_phs_n) | |
clear_btn_phs.add(s_disease_input_type_phs_n) | |
######################################################################################################################################################################### | |
with gr.Row(): | |
summary_block_phs_n = gr.HTML(label="Conditions and Sponsors Will Recruit for Clinical Trials:" ) | |
with gr.Row(): | |
# with gr.Column(): | |
tree_map_cond_nct_n = gr.Plot() | |
################################################################ | |
#################################################################################################################################################### | |
with gr.Row(): | |
gr.HTML('<h1 style="font-size:24px; color:black; font-weight:bold;">Upcoming Trials With Timelines </h1>') | |
# with gr.Row(): | |
# nct_org_map_n = gr.Plot() | |
################################################################ | |
with gr.Row(): | |
trial_plot = gr.Plot() | |
with gr.Row(): | |
output_block_conditions_phs_n = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors") | |
clear_btn_phs.add(summary_block_phs_n) | |
clear_btn_phs.add(output_block_conditions_phs_n) | |
# clear_btn_phs.add(nct_org_map_n) | |
clear_btn_phs.add(trial_plot) | |
############################################################################################################################################## | |
################################################################ Completed Trials ############################################################################################### | |
with gr.TabItem("Completed Trials"): | |
with gr.Row(): | |
with gr.Column(): | |
#### ######################################################################################################################################################################################################### | |
s_sponsor_input_phs_c = gr.Dropdown( | |
############################################################################ | |
choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "BioNTech SE","Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ | |
"CSL Behring", "Daiichi Sankyo, Inc.",\ | |
"Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ | |
"Janssen Research & Development, LLC","Merck Sharp & Dohme LLC","ModernaTX, Inc.", \ | |
"Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], | |
label="Choose a Pharma " | |
) | |
################################################################################################################################################################################################################## | |
with gr.Column(): | |
s_disease_input_phs_c= gr.Dropdown( | |
choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ | |
"Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ | |
"Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ | |
"Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ | |
"Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ | |
"Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ | |
"Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ | |
"Urothelial Carcinoma",\ | |
"Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ | |
"Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ | |
" Major","Metabolic", "Generalized Pustular Psoriasis",\ | |
"Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ | |
"Liver Cirrhosis", \ | |
"NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ | |
"Psychological Trauma","Renal", "Respiratory",\ | |
"Schizophrenia", "PTSD", \ | |
"Venous Thromboembolism", "Wet"], | |
label="Choose a Condition" | |
) | |
################################################################################################################################################################# | |
with gr.Column(): | |
s_disease_input_type_phs_c = gr.Textbox(lines=1, label="Or Type a Condition:") | |
###################################################################################################################################################################### | |
with gr.Column(): | |
s_academia_input_phs_c = gr.Dropdown( | |
choices = [ "Baylor Breast Cancer Center","Beth Israel Deaconess Medical Center", "City of Hope Medical Center" ,"Cornell University", "Columbia University","Children's Oncology Group",\ | |
"Dana-Farber Cancer Institute", "Dartmouth College",\ | |
"Duke University", "European Institute of Oncology","Fred Hutchinson Cancer Center","Harvard University", "H. Lee Moffitt Cancer Center and Research Institute",\ | |
"John Hopkins University", "Kaiser Permanente", "Massachusetts General Hospital", "Mayo Clinic",\ | |
"M.D. Anderson Cancer Center", "Memorial Sloan Kettering Cancer Center", "National Cancer Institute",\ | |
"Northwestern University", "NYU Langone Health","Ohio State University Comprehensive Cancer Center","Rutgers, The State University of New Jersey","Stanford University", \ | |
"Tufts University","University of Washington",\ | |
"Vanderbilt-Ingram Cancer Center", "Yale University"], | |
label = "Or Choose a Research Institute") | |
############################################################################################################################################ | |
###################################################################################################################################################################### | |
# 3rd Row | |
with gr.Row(): #academia_input = gr.inputs.Dropdown( | |
s_button_phs_c = gr.Button("Show Trials") | |
# Then, create the clear button and add the dropdown input to it | |
clear_btn_phs = gr.ClearButton() | |
clear_btn_phs.add(s_sponsor_input_phs_c) | |
clear_btn_phs.add(s_academia_input_phs_c) | |
clear_btn_phs.add(s_disease_input_phs_c) | |
clear_btn_phs.add(s_disease_input_type_phs_c) | |
######################################################################################################################################################################### | |
with gr.Row(): | |
summary_block_phs_c = gr.HTML(label="Conditions and Sponsors Will Recruit for Clinical Trials:" ) | |
with gr.Row(): | |
# with gr.Column(): | |
tree_map_cond_nct_c = gr.Plot() | |
################################################################ | |
#################################################################################################################################################### | |
with gr.Row(): | |
gr.HTML('<h1 style="font-size:24px; color:black; font-weight:bold;">Recruiting Trials With Organization Study Ids</h1>') | |
with gr.Row(): | |
nct_org_map_c = gr.Plot() | |
####################################################################### | |
with gr.Row(): | |
trial_plot_c = gr.Plot() | |
####################################################################### | |
with gr.Row(): | |
time_plot_c = gr.Plot() | |
with gr.Row(): | |
output_block_conditions_phs_c = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors") | |
clear_btn_phs.add(summary_block_phs_c) | |
clear_btn_phs.add(output_block_conditions_phs_c) | |
clear_btn_phs.add(tree_map_cond_nct_c) | |
clear_btn_phs.add(nct_org_map_c) | |
clear_btn_phs.add(trial_plot_c) | |
clear_btn_phs.add(time_plot_c) | |
################################ EVENT BUTTONS at GRADIO ################################################################################################################################ | |
## Sponsors | |
sponsor_button.click(disease_view, inputs=[disease_input,disease_input_text, sponsor_input, academia_input], outputs=[summary_block,summary_block_collbs,\ | |
output_block_conditions,output_block_conditions_collbs,condition_others,\ | |
condition_sunbursts,sponsor_trees\ | |
,collaborator_trees\ | |
]) | |
## Conditions | |
s_button_phc.click(disease_view_phc, inputs=[s_disease_input_phc,s_disease_input_type_phc, s_sponsor_input_phc,s_academia_input_phc], outputs=[summary_block_phc, output_block_conditions_phc,\ | |
tree_map_cond_nct]) | |
## Trials | |
s_button_phs.click(disease_view_phs, inputs=[s_disease_input_phs,s_disease_input_type_phs, s_sponsor_input_phs,s_academia_input_phs], outputs=[summary_block_phs, output_block_conditions_phs,\ | |
nct_org_map]) | |
#s_button_phs_n.click(disease_view_phs_n, inputs=[s_disease_input_phs_n,s_disease_input_type_phs_n, s_sponsor_input_phs_n,s_academia_input_phs_n], outputs=[summary_block_phs_n, output_block_conditions_phs_n,\ | |
# tree_map_cond_nct_n, nct_org_map_n,trial_plot]) | |
s_button_phs_n.click(disease_view_phs_n, inputs=[s_disease_input_phs_n,s_disease_input_type_phs_n, s_sponsor_input_phs_n,s_academia_input_phs_n], outputs=[summary_block_phs_n, output_block_conditions_phs_n,\ | |
tree_map_cond_nct_n, trial_plot]) | |
s_button_phs_c.click(disease_view_phs_c, inputs=[s_disease_input_phs_c,s_disease_input_type_phs_c, s_sponsor_input_phs_c,s_academia_input_phs_c], outputs=[summary_block_phs_c, output_block_conditions_phs_c,\ | |
tree_map_cond_nct_c, nct_org_map_c,trial_plot_c, time_plot_c]) | |
### Drugs | |
s_drug_button.click(drug_view, inputs=[s_disease_input, s_disease_input_type, s_sponsor_input, s_academia_input], outputs=[drug_summary_block,drug_output_block_conditions, sankey_map_drug ]) | |
## Country | |
condition_button.click(condition_view, inputs=[condition_input_con, country_input_tr,condition_input_type, sponsor_input_con, academia_input_con], outputs=[summary_block_cond,condition_output,trial_countries]) | |
## Site | |
condition_button_s.click(condition_view_s, inputs=[condition_input_s, country_input_s, condition_input_site,sponsor_input_con_s, academia_input_con_s], \ | |
outputs=[summary_block_cond_s,condition_output_s, site_cond,country_site]) | |
##Timelines | |
condition_button_t.click(condition_viewt, inputs=[condition_input_cont, country_input_trt,condition_input_typet, sponsor_input_cont, academia_input_cont], outputs=[summary_block_condt,condition_outputt,bubble_map_trial]) | |
## Map | |
# Test this way NCT04419506 | |
# trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output, formatted_inclusions_output,formatted_exclusions_output,concept_inclusion,concept_exclusion]) | |
# Test this way NCT04419506 | |
trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output,concept_inclusion,concept_exclusion]) | |
trial_buttons.click(trial_view_map, inputs=[nctID_inputs], outputs=[summary_block_trial_map, world_map,trial_output_map]) | |
trial_app.launch(share=True) | |
#trial_app.launch(share=True, debug = "TRUE") |