Spaces:
Sleeping
Sleeping
#pip install gradio | |
#transformers | |
########### AGENT: Clincialtrial.gov ################################################################################################### | |
##Gradio App: TRIAL CONNECT | |
#Author: Tamer Chowdhury' Nov 2024 | |
#tamer.chowdhury@gmail.com | |
################################################################################################################################## | |
import gradio as gr | |
from gradio import Interface | |
from gradio import Dropdown | |
import io | |
import re | |
import pandas as pd | |
import textwrap | |
from IPython.display import display | |
import requests | |
#from _plotly_utils.utils import NotEncodable | |
from IPython.core.display import display_markdown | |
######################### from CLAUDE########################################### | |
import aiohttp | |
import asyncio | |
import pandas as pd | |
import io | |
import json | |
async def fetch(session, url, params): | |
try: | |
async with session.get(url, params=params) as response: | |
if response.status == 200: | |
text = await response.text() | |
try: | |
return json.loads(text) | |
except json.JSONDecodeError as e: | |
print(f"Failed to decode JSON: {text[:200]}...") | |
raise | |
else: | |
print(f"HTTP Error: {response.status}") | |
print(f"Response text: {await response.text()}") | |
return None | |
except Exception as e: | |
print(f"Error in fetch: {str(e)}") | |
return None | |
async def get_nct_ids(lead_sponsor_name=None, disease_area=None, overall_status=None, | |
location_country=None, NCTId=None, max_records=None, blocks=30): | |
base_url = "https://clinicaltrials.gov/api/v2/studies" | |
# Define the fields we want to retrieve (mapped to v2 API structure) | |
fields = [ | |
"protocolSection.identificationModule.nctId", | |
"protocolSection.identificationModule.orgStudyIdInfo", | |
"protocolSection.identificationModule.briefTitle", | |
"protocolSection.conditionsModule.conditions", | |
"protocolSection.designModule.phases", | |
"protocolSection.statusModule.overallStatus", | |
"protocolSection.statusModule.primaryCompletionDateStruct", | |
"protocolSection.designModule.enrollmentInfo", | |
"protocolSection.designModule.studyType", | |
"protocolSection.eligibilityModule.studyPopulation", | |
"protocolSection.contactsLocationsModule.locations", | |
"protocolSection.designModule.designInfo", | |
"protocolSection.armsInterventionsModule.armGroups", | |
"protocolSection.sponsorCollaboratorsModule.leadSponsor", | |
"protocolSection.armsInterventionsModule.interventions", | |
"protocolSection.outcomesModule.primaryOutcomes", | |
"protocolSection.statusModule.startDateStruct" | |
] | |
# Build base parameters | |
params = { | |
"format": "json", | |
"fields": ",".join(fields), | |
"pageSize": "1000", | |
"countTotal": "true" | |
} | |
print("Constructing query...") | |
# Build query parameters | |
if NCTId: | |
params["query.id"] = NCTId | |
else: | |
if disease_area: | |
params["query.cond"] = disease_area.replace(" ", "+") | |
if lead_sponsor_name: | |
params["query.lead"] = lead_sponsor_name.replace(" ", "+") | |
if location_country: | |
params["query.locn"] = location_country.replace(" ", "+") | |
if overall_status: | |
params["filter.overallStatus"] = overall_status.upper() | |
print(f"Full parameters: {params}") | |
all_studies = [] | |
next_page_token = None | |
async with aiohttp.ClientSession() as session: | |
while True: | |
try: | |
if next_page_token: | |
params["pageToken"] = next_page_token | |
response_data = await fetch(session, base_url, params) | |
if not response_data or not isinstance(response_data, dict): | |
print(f"Invalid response data") | |
break | |
studies = response_data.get('studies', []) | |
if not studies: | |
print("No more studies found") | |
break | |
all_studies.extend(studies) | |
print(f"Retrieved {len(studies)} studies. Total so far: {len(all_studies)}") | |
# Print first study details for debugging | |
if len(all_studies) > 0: | |
first_study = all_studies[0] | |
print("\nFirst study example:") | |
print(f"NCT ID: {_get_nested_value(first_study, ['protocolSection', 'identificationModule', 'nctId'])}") | |
print(f"Status: {_get_nested_value(first_study, ['protocolSection', 'statusModule', 'overallStatus'])}") | |
next_page_token = response_data.get('nextPageToken') | |
if not next_page_token or (max_records and len(all_studies) >= max_records): | |
break | |
except Exception as e: | |
print(f"Error processing page: {str(e)}") | |
break | |
# Convert all studies to the required format | |
recruiting_trials_list = [] | |
for study in all_studies: | |
try: | |
# Status filtering is now handled by the API, so we don't need to filter here | |
trial_info = { | |
'NCTId': _get_nested_value(study, ['protocolSection', 'identificationModule', 'nctId']), | |
'Phase': _get_first_item(study, ['protocolSection', 'designModule', 'phases']), | |
'OrgStudyId': _get_nested_value(study, ['protocolSection', 'identificationModule', 'orgStudyIdInfo', 'id']), | |
'Status': _get_nested_value(study, ['protocolSection', 'statusModule', 'overallStatus']), | |
'Condition': '|'.join(_get_nested_value(study, ['protocolSection', 'conditionsModule', 'conditions'], [])), | |
'CompletionDate': _get_nested_value(study, ['protocolSection', 'statusModule', 'primaryCompletionDateStruct', 'date']), | |
'EnrollmentCount': _get_nested_value(study, ['protocolSection', 'designModule', 'enrollmentInfo', 'count']), | |
'StudyType': _get_nested_value(study, ['protocolSection', 'designModule', 'studyType']), | |
'Arm': _get_first_item(study, ['protocolSection', 'armsInterventionsModule', 'armGroups'], 'label'), | |
'Drug': _get_first_item(study, ['protocolSection', 'armsInterventionsModule', 'interventions'], 'name'), | |
'Country': _get_location_info(study, 'country'), | |
'City': _get_location_info(study, 'city'), | |
'Site': _get_location_info(study, 'facility'), | |
'StudyPopulation': _get_nested_value(study, ['protocolSection', 'eligibilityModule', 'studyPopulation']), | |
'Sponsor': _get_nested_value(study, ['protocolSection', 'sponsorCollaboratorsModule', 'leadSponsor', 'name']), | |
'Collaborator': _get_collaborators(study), | |
'StartDate': _get_nested_value(study, ['protocolSection', 'statusModule', 'startDateStruct', 'date']), | |
'PrimaryMeasure': _get_first_item(study, ['protocolSection', 'outcomesModule', 'primaryOutcomes'], 'measure'), | |
'Purpose': _get_nested_value(study, ['protocolSection', 'designModule', 'designInfo', 'primaryPurpose']), | |
'BriefTitle': _get_nested_value(study, ['protocolSection', 'identificationModule', 'briefTitle']) | |
} | |
recruiting_trials_list.append(trial_info) | |
except Exception as e: | |
print(f"Error processing study: {str(e)}") | |
continue | |
print(f"Total studies processed: {len(recruiting_trials_list)}") | |
return recruiting_trials_list | |
# Helper functions remain the same | |
def _get_nested_value(obj, path, default=None): | |
try: | |
current = obj | |
for key in path: | |
if current is None: | |
return default | |
current = current.get(key) | |
return current if current is not None else default | |
except (KeyError, TypeError, AttributeError): | |
return default | |
def _get_first_item(obj, path, field=None): | |
try: | |
items = _get_nested_value(obj, path, []) | |
if items and isinstance(items, list): | |
if field: | |
return items[0].get(field) | |
return items[0] | |
return None | |
except (IndexError, AttributeError): | |
return None | |
def _get_location_info(study, info_type): | |
try: | |
locations = _get_nested_value(study, ['protocolSection', 'contactsLocationsModule', 'locations'], []) | |
if info_type == 'facility': | |
values = [loc.get('facility', '') for loc in locations if loc.get('facility')] | |
else: | |
values = [loc.get(info_type, '') for loc in locations if loc.get(info_type)] | |
return '|'.join(filter(None, values)) | |
except Exception: | |
return None | |
def _get_collaborators(study): | |
try: | |
collaborators = _get_nested_value(study, ['protocolSection', 'sponsorCollaboratorsModule', 'collaborators'], []) | |
return '|'.join(collab.get('name', '') for collab in collaborators if collab.get('name')) | |
except Exception: | |
return None | |
########### Clinical Trials. gov API for study fileds with Recruiting Trials Only ################################### | |
################# FROM CLAUDE API FOR ELIGIBILITY############################### | |
import requests | |
import re | |
def get_formatted_inclusion_criteria(nct_id): | |
""" | |
Get and format inclusion criteria for a clinical trial using ClinicalTrials.gov API v2 | |
Args: | |
nct_id (str): The NCT ID of the trial | |
Returns: | |
str: Formatted inclusion criteria as a numbered list, or None if not found | |
""" | |
# V2 API endpoint | |
base_url = "https://clinicaltrials.gov/api/v2/studies" | |
# Parameters for the API request | |
params = { | |
"format": "json", | |
"fields": "protocolSection.eligibilityModule.eligibilityCriteria", | |
"query.id": nct_id | |
} | |
try: | |
# Send the API request | |
response = requests.get(base_url, params=params) | |
response.raise_for_status() # Raise an exception for bad status codes | |
data = response.json() | |
# Extract the eligibility criteria text from the v2 API response | |
if not data.get('studies') or len(data['studies']) == 0: | |
print(f"No data found for Trial NCT ID: {nct_id}") | |
return None | |
eligibility_criteria = data['studies'][0]['protocolSection']['eligibilityModule']['eligibilityCriteria'] | |
# Split at "Exclusion Criteria" to get only inclusion criteria | |
# Using a more robust splitting approach | |
inclusion_criteria = re.split(r'\b(?:Exclusion\s+Criteria:?)\b', eligibility_criteria, flags=re.IGNORECASE)[0].strip() | |
# Split the inclusion criteria into a list by line breaks | |
# Handle different types of line breaks | |
inclusions = re.split(r'\r?\n+', inclusion_criteria) | |
# Clean up the inclusions: | |
# 1. Remove "Inclusion criteria" header | |
# 2. Remove empty lines | |
# 3. Remove lines that are just whitespace or punctuation | |
cleaned_inclusions = [] | |
for inclusion in inclusions: | |
inclusion = inclusion.strip() | |
if (inclusion and | |
not re.search(r'^\s*inclusion\s+criteria:?\s*$', inclusion, flags=re.IGNORECASE) and | |
not re.search(r'^\s*[-•*]\s*$', inclusion)): | |
# Remove bullet points and dashes at the start of lines | |
inclusion = re.sub(r'^\s*[-•*]\s*', '', inclusion) | |
# Add to cleaned list if not empty after cleanup | |
if inclusion: | |
cleaned_inclusions.append(inclusion) | |
# Format the list as a numbered list with periods | |
formatted_inclusions = [] | |
for i, inclusion in enumerate(cleaned_inclusions, 1): | |
# Ensure the line ends with a period | |
if not inclusion.endswith('.'): | |
inclusion = inclusion + '.' | |
formatted_inclusions.append(f"{i}. {inclusion}") | |
# Join the list into a single string | |
return "\n".join(formatted_inclusions) | |
except requests.exceptions.RequestException as e: | |
print(f"Error fetching data for Trial NCT ID {nct_id}: {str(e)}") | |
return None | |
except (IndexError, KeyError) as e: | |
print(f"Error processing data for Trial NCT ID {nct_id}: {str(e)}") | |
return None | |
except Exception as e: | |
print(f"Unexpected error for Trial NCT ID {nct_id}: {str(e)}") | |
return None | |
######################################################################################################### | |
## ############################API For Exclusions################################################################################################################################################### | |
def get_formatted_exclusion_criteria(nct_id): | |
""" | |
Get and format exclusion criteria for a clinical trial using ClinicalTrials.gov API v2 | |
Args: | |
nct_id (str): The NCT ID of the trial | |
Returns: | |
str: Formatted exclusion criteria as a numbered list, or None if not found | |
""" | |
# V2 API endpoint | |
base_url = "https://clinicaltrials.gov/api/v2/studies" | |
# Parameters for the API request | |
params = { | |
"format": "json", | |
"fields": "protocolSection.eligibilityModule.eligibilityCriteria", | |
"query.id": nct_id | |
} | |
try: | |
# Send the API request | |
response = requests.get(base_url, params=params) | |
response.raise_for_status() # Raise an exception for bad status codes | |
data = response.json() | |
# Extract the eligibility criteria text from the v2 API response | |
if not data.get('studies') or len(data['studies']) == 0: | |
print(f"No data found for Trial NCT ID: {nct_id}") | |
return None | |
eligibility_criteria = data['studies'][0]['protocolSection']['eligibilityModule']['eligibilityCriteria'] | |
# Split at "Exclusion Criteria" to get only exclusion criteria | |
try: | |
exclusion_criteria = re.split(r'\b(?:Exclusion\s+Criteria:?)\b', eligibility_criteria, flags=re.IGNORECASE)[1].strip() | |
except IndexError: | |
# Try alternative patterns if the first one doesn't work | |
try: | |
exclusion_criteria = re.split(r'(?i)(?:^|\n)\s*exclusion criteria\s*[:|-]?', eligibility_criteria)[1].strip() | |
except IndexError: | |
print(f"Could not find exclusion criteria section for Trial NCT ID: {nct_id}") | |
return None | |
# Split the exclusion criteria into a list by line breaks | |
# Handle different types of line breaks | |
exclusions = re.split(r'\r?\n+', exclusion_criteria) | |
# Clean up the exclusions: | |
# 1. Remove empty lines | |
# 2. Remove lines that are just whitespace or punctuation | |
# 3. Clean up formatting | |
cleaned_exclusions = [] | |
for exclusion in exclusions: | |
exclusion = exclusion.strip() | |
if (exclusion and | |
not re.search(r'^\s*$', exclusion) and # Skip empty lines | |
not re.search(r'^\s*[-•*]\s*$', exclusion)): # Skip lines with just bullets | |
# Remove bullet points and dashes at the start of lines | |
exclusion = re.sub(r'^\s*[-•*]\s*', '', exclusion) | |
# Add to cleaned list if not empty after cleanup | |
if exclusion: | |
cleaned_exclusions.append(exclusion) | |
# Format the list as a numbered list with periods | |
formatted_exclusions = [] | |
for i, exclusion in enumerate(cleaned_exclusions, 1): | |
# Ensure the line ends with a period | |
if not exclusion.endswith('.'): | |
exclusion = exclusion + '.' | |
formatted_exclusions.append(f"{i}. {exclusion}") | |
# Join the list into a single string | |
return "\n".join(formatted_exclusions) | |
except requests.exceptions.RequestException as e: | |
print(f"Error fetching data for Trial NCT ID {nct_id}: {str(e)}") | |
return None | |
except (IndexError, KeyError) as e: | |
print(f"Error processing data for Trial NCT ID {nct_id}: {str(e)}") | |
return None | |
except Exception as e: | |
print(f"Unexpected error for Trial NCT ID {nct_id}: {str(e)}") | |
return None | |
################################# Apply CSS Style to HTML Table ############################################################################################################## | |
def dataframe_to_html_table(df): | |
custom_css = """ | |
<style> | |
.table-container { | |
width: 100%; | |
max-width: 100%; | |
margin-bottom: 1rem; | |
overflow-x: auto; | |
overflow-y: auto; | |
max-height: 400px; | |
} | |
.table { | |
width: 100%; | |
max-width: 100%; | |
margin-bottom: 1rem; | |
border-collapse: collapse; | |
white-space: nowrap; | |
} | |
.table-striped tbody tr:nth-of-type(odd) { | |
background-color: rgba(0, 0, 0, 0.05); | |
} | |
th, td { | |
padding: 0.75rem; | |
vertical-align: top; | |
border-top: 1px solid #dee2e6; | |
white-space: normal; | |
overflow-wrap: break-word; | |
max-width: 150px; | |
} | |
/* Set the width of the 'BriefTitle' and 'Arm' columns */ | |
.table td:nth-child(3), .table td:nth-child(11) { | |
width: 300px; | |
} | |
thead th { | |
position: sticky; | |
top: 0; | |
background-color: white; | |
z-index: 1; | |
} | |
thead th::before { | |
content: ""; | |
position: absolute; | |
left: 0; | |
width: 100%; | |
height: 100%; | |
border-right: 1px solid #dee2e6; | |
background-color: white; | |
z-index: -1; | |
} | |
</style> | |
""" | |
return custom_css + '<div class="table-container">' + df.to_html(classes="table table-striped", index=False, border=0) + '</div>' | |
################################################################################################################################## | |
def format_summary_stats(summary): | |
formatted_html = f""" | |
<style> | |
#summary-output {{ | |
font-weight: bold; | |
font-size: 20px; | |
color: black; | |
}} | |
</style> | |
<div id="summary-output">{summary}</div> | |
""" | |
return formatted_html | |
############################ End of Style ############################################################################################# | |
############### Functions to Process the Dataframes of Disease, Conditions, Trial Details#################################### | |
# parse the conditions | |
import re | |
import pandas as pd | |
####################################################################################################### | |
def split_conditions(df, column_to_split): | |
# Create a list to store the new rows | |
new_rows = [] | |
# Iterate through each row in the original dataframe | |
for index, row in df.iterrows(): | |
# Split the column value by comma or pipe and create a new row for each unique condition | |
for condition in re.split(',|\|', str(row[column_to_split])): | |
new_row = {col: row[col] if col != column_to_split else condition.strip() for col in df.columns} | |
new_rows.append(new_row) | |
# Create a new dataframe from the list of new rows | |
temp_df = pd.DataFrame(new_rows) | |
return temp_df | |
######################################################################################################################### | |
def split_drug(df, column_to_split): | |
# Create a list to store the new rows | |
new_rows = [] | |
# Iterate through each row in the original dataframe | |
for index, row in df.iterrows(): | |
# Split the column value by comma or pipe and create a new row for each unique condition | |
for drug in re.split(',|\|', str(row[column_to_split])): | |
new_row = {col: row[col] if col != column_to_split else drug.strip() for col in df.columns} | |
new_rows.append(new_row) | |
# Create a new dataframe from the list of new rows | |
temp_df = pd.DataFrame(new_rows) | |
return temp_df | |
############################################################################################################################################ | |
############################################################################################# | |
def split_columns(df, columns_to_split): | |
# Create a list to store the new dataframes | |
new_dfs = [] | |
# Iterate through each row in the original dataframe | |
for index, row in df.iterrows(): | |
# Create a list of dictionaries to store the split values | |
split_rows = [] | |
# Find the maximum number of pipe-separated values in the columns to split | |
max_splits = max([len(str(row[col]).split('|')) for col in columns_to_split]) | |
# Iterate through the number of splits | |
for i in range(max_splits): | |
# Create a dictionary to store the split values for each column | |
split_row = {} | |
# Iterate through the columns to split | |
for col in columns_to_split: | |
# Split the column value and store the ith value if it exists, otherwise store None | |
split_row[col] = str(row[col]).split('|')[i] if i < len(str(row[col]).split('|')) else None | |
# Add the non-split columns to the dictionary | |
for col in df.columns: | |
if col not in columns_to_split: | |
split_row[col] = row[col] | |
# Append the dictionary to the list of dictionaries | |
split_rows.append(split_row) | |
# Convert the list of dictionaries to a dataframe and append it to the list of new dataframes | |
new_dfs.append(pd.DataFrame(split_rows)) | |
# Concatenate all the new dataframes | |
temp_df = pd.concat(new_dfs, ignore_index=True) | |
# Reorder the columns in the temporary dataframe | |
temp_df = temp_df[df.columns] | |
return temp_df | |
################## INTERVENTIONAL, OBSERVATIONAL Trials Lead Sponsor Counts################################################## | |
def calculate_summary_stats(df, sponsor): | |
study_types = ["INTERVENTIONAL", "OBSERVATIONAL"] | |
summary_stats = [] | |
sponsor_name = sponsor if sponsor else "All Lead Sponsors" | |
for study_type in study_types: | |
df_study_type = df[df['StudyType'] == study_type].copy() | |
# Convert the 'EnrollmentCount' column to numeric | |
df_study_type['EnrollmentCount'] = pd.to_numeric(df_study_type['EnrollmentCount'], errors='coerce') | |
num_trials = len(df_study_type['NCTId'].unique()) | |
unique_conditions = df_study_type['Condition'].unique() | |
num_conditions = len([condition for condition in unique_conditions if condition != 'Healthy' and condition != 'Adult']) | |
total_patients = df_study_type.groupby('NCTId')['EnrollmentCount'].first().sum() | |
formatted_total_patients = format(int(total_patients), ',') | |
summary_stats.append(f"{num_trials} {study_type} Trials, \ | |
{num_conditions} Conditions, \ | |
{formatted_total_patients} Planned Patients.") | |
#return f"{sponsor_name} - As Lead Sponsor Recruiting For: <br>" + "<br>".join(summary_stats) | |
return f"{sponsor_name} - As Lead Sponsor: <br>" + "<br>".join(summary_stats) | |
############################################################################################################################################ | |
def calculate_summary_stats_collb(df, sponsor): | |
study_types = ["INTERVENTIONAL", "OBSERVATIONAL"] | |
summary_stats = [] | |
sponsor_name = sponsor if sponsor else "All Collaborators" | |
for study_type in study_types: | |
df_study_type = df[df['StudyType'] == study_type].copy() | |
# Convert the 'EnrollmentCount' column to numeric | |
df_study_type['EnrollmentCount'] = pd.to_numeric(df_study_type['EnrollmentCount'], errors='coerce') | |
num_trials = len(df_study_type['NCTId'].unique()) | |
unique_conditions = df_study_type['Condition'].unique() | |
num_conditions = len([condition for condition in unique_conditions if condition != 'Healthy' and condition != 'Adult']) | |
total_patients = df_study_type.groupby('NCTId')['EnrollmentCount'].first().sum() | |
formatted_total_patients = format(int(total_patients), ',') | |
summary_stats.append(f"{num_trials} {study_type} Trials, \ | |
{num_conditions} Conditions, \ | |
{formatted_total_patients} Planned Patients.") | |
return f"{sponsor_name} - With Collaborators Recruiting For: <br>" + "<br>".join(summary_stats) | |
################################################################################################################## | |
def calculate_summary_stats_sites(df, sponsor, country): | |
#Filter the data frame by the country if a country is provided | |
if country: | |
df = df[df['Country'] == country] | |
num_trials = len(df['NCTId'].unique()) | |
# Group the data frame by NCTId and StudyType in the desired order | |
grouped_df = df.groupby(['NCTId', 'StudyType']).first().reset_index() | |
# Convert the 'EnrollmentCount' column to numeric | |
grouped_df['EnrollmentCount'] = pd.to_numeric(grouped_df['EnrollmentCount'], errors='coerce') | |
# Count the number of unique NCTIds for each StudyType | |
INTERVENTIONAL_count = len(grouped_df[grouped_df['StudyType'] == 'INTERVENTIONAL']['NCTId'].unique()) | |
OBSERVATIONAL_count = len(grouped_df[grouped_df['StudyType'] == 'OBSERVATIONAL']['NCTId'].unique()) | |
# Count the number of unique countries for each StudyType | |
INTERVENTIONAL_countries = df[df['StudyType'] == 'INTERVENTIONAL']['Country'].nunique() | |
OBSERVATIONAL_countries = df[df['StudyType'] == 'OBSERVATIONAL']['Country'].nunique() | |
# Count the number of unique sites for each StudyType, grouped by Country, City, and Site | |
INTERVENTIONAL_grouped = df[df['StudyType'] == 'INTERVENTIONAL'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0] | |
OBSERVATIONAL_grouped = df[df['StudyType'] == 'OBSERVATIONAL'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0] | |
# Calculate the sum of enrollment counts for each StudyType | |
INTERVENTIONAL_patients = int(grouped_df[grouped_df['StudyType'] == 'INTERVENTIONAL']['EnrollmentCount'].sum()) | |
OBSERVATIONAL_patients = int(grouped_df[grouped_df['StudyType'] == 'OBSERVATIONAL']['EnrollmentCount'].sum()) | |
formatted_INTERVENTIONAL_patients = format(INTERVENTIONAL_patients, ',') | |
formatted_OBSERVATIONAL_patients = format(OBSERVATIONAL_patients, ',') | |
sponsor_name = sponsor if sponsor else "All Sponsors" | |
country_name = country if country else "All Countries" | |
return f"{sponsor_name} <br> {INTERVENTIONAL_count} INTERVENTIONAL Trials, in {INTERVENTIONAL_countries} Country, at {INTERVENTIONAL_grouped} Sites, \ | |
Recruiting: {formatted_INTERVENTIONAL_patients} Planned Patients. <br>\ | |
{OBSERVATIONAL_count} OBSERVATIONAL Trials, in {OBSERVATIONAL_countries} Country, at {OBSERVATIONAL_grouped} Sites" | |
#{OBSERVATIONAL_count} OBSERVATIONAL Trials, in {OBSERVATIONAL_countries} Country, at {OBSERVATIONAL_grouped} Sites, Recruiting: {formatted_OBSERVATIONAL_patients} Planned Patients." | |
################################################ GRADIO STARTS HERE ######################################################### | |
#Wrapper Function called from Interfce to get input , output | |
async def gradio_wrapper_nct(sponsor=None, condition=None, NCTId=None, country=None, status = None): | |
# Check if disease, condition, sponsor, and NCTId are provided | |
if condition and sponsor: | |
recruiting_trials = await get_nct_ids(disease_area=condition, lead_sponsor_name=sponsor, overall_status = status) | |
elif condition: | |
recruiting_trials = await get_nct_ids(disease_area=condition, overall_status = status) | |
elif sponsor: | |
recruiting_trials = await get_nct_ids(lead_sponsor_name=sponsor, overall_status = status) | |
elif NCTId: | |
recruiting_trials = await get_nct_ids(NCTId=NCTId, overall_status = status) | |
# print("Recruiting trials for NCTId:", recruiting_trials) # Add this line to debug | |
else: | |
return pd.DataFrame(), "No condition, sponsor, or trial NCT Id provided" | |
trial_info_list = [] | |
for trial in recruiting_trials: | |
trial_info = {'Sponsor': trial['Sponsor'], | |
'Collaborator': trial['Collaborator'], | |
'Status': trial['Status'], | |
'Drug': trial['Drug'], | |
'StudyType': trial['StudyType'], | |
'Phase': trial['Phase'], | |
'Site': trial['Site'], | |
'Country': trial['Country'], | |
'City': trial['City'], | |
'NCTId': trial['NCTId'], | |
'OrgStudyId': trial['OrgStudyId'], | |
'Condition': trial['Condition'], | |
'StartDate': trial['StartDate'], | |
'CompletionDate': trial['CompletionDate'], | |
'EnrollmentCount': trial['EnrollmentCount'], | |
'PrimaryMeasure': trial['PrimaryMeasure'], | |
'Purpose': trial['Purpose'], | |
'Arm': trial['Arm'], | |
'BriefTitle': trial['BriefTitle']} | |
trial_info_list.append(trial_info) | |
# Check if trial_info_list is empty | |
if not trial_info_list: | |
return None, None, None, None, None, None | |
import pandas as pd | |
clinical_trials_gov = pd.DataFrame(trial_info_list, columns=[ 'NCTId','OrgStudyId','Status','BriefTitle','Condition', 'Drug','Phase','StudyType','StartDate', 'CompletionDate','EnrollmentCount', 'Arm','Purpose', 'PrimaryMeasure', \ | |
'Sponsor','Collaborator']) | |
## Take care of NaN | |
clinical_trials_gov.fillna("Not Available", inplace=True) | |
clinical_trials_gov = clinical_trials_gov.sort_values(by=[ 'StudyType', 'Phase' , 'CompletionDate','EnrollmentCount'], ascending=[ True, False,True,False]) | |
# Convert the DataFrame to an HTML table | |
html_table = dataframe_to_html_table(clinical_trials_gov) | |
# now move to include country | |
#clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId', 'Site','Country','City','Zip','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount']) | |
clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId','OrgStudyId','Status', 'BriefTitle', 'Site','Country','City','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount']) | |
## Address NaN | |
clinical_trials_gov_add.fillna("Not Available", inplace=True) | |
clinical_trials_gov_add = clinical_trials_gov_add.sort_values(by=[ 'StudyType', 'Phase' , 'EnrollmentCount','CompletionDate', 'Country'], ascending=[ True, False,False,True,True]) | |
# print("Preparing dataframe before split") | |
# Create a DataFrame for conditions | |
clinical_trials_gov_conditions = pd.DataFrame(trial_info_list, columns=['NCTId', 'BriefTitle','OrgStudyId','Phase','Status','StudyType','Condition', 'Drug','StartDate', 'CompletionDate','EnrollmentCount','Sponsor', 'Collaborator']) | |
# Split the 'Condition' column in clinical_trials_gov_conditions | |
clinical_trials_gov_conditions = split_conditions(clinical_trials_gov_conditions, 'Condition') | |
# print("Dataframe after condition split") | |
#address NaN | |
clinical_trials_gov_conditions.fillna("Not Available", inplace=True) | |
# Create a DataFrame for drugs | |
clinical_trials_gov_drugs = pd.DataFrame(trial_info_list, columns=['Status','NCTId', 'BriefTitle','OrgStudyId','Status','Phase','StudyType','Condition', 'Drug','StartDate', 'CompletionDate','EnrollmentCount','Sponsor', 'Collaborator']) | |
# Split the 'Drug' column in clinical_trials_gov_conditions | |
clinical_trials_gov_drugs = split_conditions(clinical_trials_gov_drugs, 'Drug') | |
clinical_trials_gov_drugs.fillna("Not Available", inplace=True) | |
############################################## ########################################################################################## | |
# Filter and sort the conditions DataFrame | |
# Add the filtering condition for Sponsor and Collaborator | |
# Add the filtering condition for Sponsor and Collaborator | |
if sponsor: | |
df1 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Sponsor'] == sponsor] | |
df1_1 = df1[(df1['Collaborator'] != 'Not Available') & | |
(~df1['Collaborator'].isnull())] | |
df2 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'] == sponsor] | |
### Now move primary sponsors to collaborators: ( that is non BI sponsor to replace BI as now collaborator) | |
df2['Collaborator'] = df2['Sponsor'] | |
df3 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)] | |
df3 = df3[df3['Collaborator'] != sponsor] | |
#print(df3) | |
## Now add sponsors to collaborators | |
df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator'] | |
#df3['Collaborator'] = df3['Sponsor'] | |
clinical_trials_gov_conditions = df1 | |
clinical_trials_gov_conditions_collb = pd.concat([df1_1,df2, df3], ignore_index=True) | |
if sponsor: | |
df1 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Sponsor'] == sponsor] | |
df1_1 = df1[(df1['Collaborator'] != 'Not Available') & | |
(~df1['Collaborator'].isnull())] | |
########################################################################################################## | |
df2 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'] == sponsor] | |
### Now copy sponsors to collaborators: | |
df2['Collaborator'] = df2['Sponsor'] | |
########################################################################################################## | |
df3 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)] | |
df3 = df3[df3['Collaborator'] != sponsor] | |
## Now add sponsors to collaborators | |
df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator'] | |
#df3['Collaborator'] = df3['Sponsor'] | |
clinical_trials_gov_drugs = df1 | |
clinical_trials_gov_drugs_collb = pd.concat([df1_1,df2, df3], ignore_index=True) | |
############################################################################################################################################### | |
# Convert the filtered DataFrame to an HTML table | |
#html_table_add = dataframe_to_html_table(country_site_city_df) | |
# Convert the DataFrame to an HTML table | |
html_table_conditions = dataframe_to_html_table(clinical_trials_gov_conditions) | |
# Convert the DataFrame to an HTML table | |
html_table_drugs = dataframe_to_html_table(clinical_trials_gov_drugs) | |
if sponsor: | |
# Convert the DataFrame to an HTML table | |
html_table_conditions_collb = dataframe_to_html_table(clinical_trials_gov_conditions_collb) | |
# Convert the DataFrame to an HTML table | |
html_table_drugs_collb = dataframe_to_html_table(clinical_trials_gov_drugs_collb) | |
else: | |
empty_df = pd.DataFrame() | |
html_table_conditions_collb = empty_df.to_html(index=False, header=True, border=0, table_id="empty_table") | |
html_table_drugs_collb = empty_df.to_html(index=False, header=True, border=0, table_id="empty_table") | |
######################################################################################################################################## | |
# Calculate the summary statistics | |
# print("Calcualting Stats") | |
summary_stats_pre = calculate_summary_stats(clinical_trials_gov_conditions, sponsor) | |
summary_stats = format_summary_stats(summary_stats_pre) | |
if sponsor: | |
summary_stats_pre_collb = calculate_summary_stats_collb(clinical_trials_gov_conditions_collb, sponsor) | |
summary_stats_collb = format_summary_stats(summary_stats_pre_collb) | |
else: | |
summary_stats_collb ='' | |
# Calculate Site Summary | |
#summary_stats_sites_pre = calculate_summary_stats_sites(country_site_city_df, sponsor, country) | |
#summary_stats_sites = format_summary_stats(summary_stats_sites_pre) | |
# print("Done Stats") | |
return summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs | |
#Wrapper Function called from Interfce to get input , output | |
async def gradio_wrapper_nct_spn(sponsor=None, condition=None, NCTId=None, country=None, status = None): | |
# Check if disease, condition, sponsor, and NCTId are provided | |
if condition and sponsor: | |
recruiting_trials = await get_nct_ids(disease_area=condition, lead_sponsor_name=sponsor, overall_status= status) | |
elif condition: | |
recruiting_trials = await get_nct_ids(disease_area=condition, overall_status= status) | |
elif sponsor: | |
recruiting_trials = await get_nct_ids(lead_sponsor_name=sponsor, overall_status= status) | |
elif NCTId: | |
recruiting_trials = await get_nct_ids(NCTId=NCTId, overall_status = status) | |
# print("Recruiting trials for NCTId:", recruiting_trials) # Add this line to debug | |
else: | |
return pd.DataFrame(), "No condition, sponsor, or trial NCT Id provided" | |
trial_info_list = [] | |
for trial in recruiting_trials: | |
trial_info = {'Sponsor': trial['Sponsor'], | |
'Collaborator': trial['Collaborator'], | |
'Drug': trial['Drug'], | |
'StudyType': trial['StudyType'], | |
'Phase': trial['Phase'], | |
'Status': trial['Status'], | |
'Site': trial['Site'], | |
'Country': trial['Country'], | |
'City': trial['City'], | |
# 'Zip': trial['Zip'], | |
'NCTId': trial['NCTId'], | |
'OrgStudyId': trial['OrgStudyId'], | |
'Condition': trial['Condition'], | |
'StartDate': trial['StartDate'], | |
'CompletionDate': trial['CompletionDate'], | |
'EnrollmentCount': trial['EnrollmentCount'], | |
'PrimaryMeasure': trial['PrimaryMeasure'], | |
'Purpose': trial['Purpose'], | |
'Arm': trial['Arm'], | |
'BriefTitle': trial['BriefTitle']} | |
trial_info_list.append(trial_info) | |
# Check if trial_info_list is empty | |
if not trial_info_list: | |
return None, None, None, None, None, None | |
import pandas as pd | |
clinical_trials_gov = pd.DataFrame(trial_info_list, columns=[ 'NCTId','OrgStudyId','Status','BriefTitle','Condition', 'Drug','Phase','StudyType','StartDate', 'CompletionDate','EnrollmentCount', 'Arm','Purpose', 'PrimaryMeasure', \ | |
'Sponsor','Collaborator']) | |
## Take care of NaN | |
clinical_trials_gov.fillna("Not Available", inplace=True) | |
clinical_trials_gov = clinical_trials_gov.sort_values(by=[ 'StudyType', 'Phase' , 'CompletionDate','EnrollmentCount'], ascending=[ True, False,True,False]) | |
# Convert the DataFrame to an HTML table | |
html_table = dataframe_to_html_table(clinical_trials_gov) | |
# now move to include country | |
#clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId', 'Site','Country','City','Zip','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount']) | |
clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId','OrgStudyId','Status', 'BriefTitle','Site','Country','City','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount']) | |
## Address NaN | |
clinical_trials_gov_add.fillna("Not Available", inplace=True) | |
clinical_trials_gov_add = clinical_trials_gov_add.sort_values(by=[ 'StudyType', 'Phase' , 'EnrollmentCount','CompletionDate', 'Country'], ascending=[ True, False,False,True,True]) | |
# print("Preparing dataframe before split") | |
# Create a DataFrame for conditions | |
clinical_trials_gov_conditions = pd.DataFrame(trial_info_list, columns=['NCTId', 'OrgStudyId','Status','BriefTitle','Phase','StudyType','Condition', 'Drug','EnrollmentCount','Sponsor', 'Collaborator']) | |
# Split the 'Condition' column in clinical_trials_gov_conditions | |
clinical_trials_gov_conditions = split_conditions(clinical_trials_gov_conditions, 'Condition') | |
# print("Dataframe after condition split") | |
#address NaN | |
clinical_trials_gov_conditions.fillna("Not Available", inplace=True) | |
# Create a DataFrame for drugs | |
clinical_trials_gov_drugs = pd.DataFrame(trial_info_list, columns=['NCTId','OrgStudyId', 'Status','BriefTitle','Phase','StudyType','Condition', 'Drug','EnrollmentCount','Sponsor', 'Collaborator']) | |
# Split the 'Drug' column in clinical_trials_gov_conditions | |
clinical_trials_gov_drugs = split_conditions(clinical_trials_gov_drugs, 'Drug') | |
# print("Dataframe after drug split") | |
# Split the 'Condition' column in clinical_trials_gov_conditions | |
#clinical_trials_gov_drugs = split_conditions(clinical_trials_gov_drugs, 'Condition') | |
#print("Prepared dataframe after condition split on drug ? why ?") | |
#address NaN | |
clinical_trials_gov_drugs.fillna("Not Available", inplace=True) | |
# print("Preparing Country City Site split") | |
columns_to_split = ['Site', 'Country', 'City'] | |
#if not clinical_trials_gov_add.empty: | |
country_site_city_df = split_columns(clinical_trials_gov_add, columns_to_split) | |
## Ensure no NaN after Split | |
country_site_city_df.fillna("Not Available", inplace=True) | |
# print("Done Country City Site split") | |
# Filter the modified DataFrame by country if provided | |
if country: | |
# modified_df = modified_df[modified_df['Country'] == country] | |
country_site_city_df = country_site_city_df[country_site_city_df['Country'] == country] | |
############################################## ########################################################################################## | |
# Filter and sort the conditions DataFrame | |
# Add the filtering condition for Sponsor and Collaborator | |
# Add the filtering condition for Sponsor and Collaborator | |
if sponsor: | |
df1 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Sponsor'] == sponsor] | |
df1_1 = df1[(df1['Collaborator'] != 'Not Available') & | |
(~df1['Collaborator'].isnull())] | |
df2 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'] == sponsor] | |
### Now move primary sponsors to collaborators: ( that is non BI sponsor to replace BI as now collaborator) | |
df2['Collaborator'] = df2['Sponsor'] | |
df3 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)] | |
df3 = df3[df3['Collaborator'] != sponsor] | |
#print(df3) | |
## Now add sponsors to collaborators | |
df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator'] | |
#df3['Collaborator'] = df3['Sponsor'] | |
clinical_trials_gov_conditions = df1 | |
clinical_trials_gov_conditions_collb = pd.concat([df1_1,df2, df3], ignore_index=True) | |
if sponsor: | |
df1 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Sponsor'] == sponsor] | |
df1_1 = df1[(df1['Collaborator'] != 'Not Available') & | |
(~df1['Collaborator'].isnull())] | |
########################################################################################################## | |
df2 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'] == sponsor] | |
### Now copy sponsors to collaborators: | |
df2['Collaborator'] = df2['Sponsor'] | |
########################################################################################################## | |
df3 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)] | |
df3 = df3[df3['Collaborator'] != sponsor] | |
## Now add sponsors to collaborators | |
df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator'] | |
#df3['Collaborator'] = df3['Sponsor'] | |
clinical_trials_gov_drugs = df1 | |
clinical_trials_gov_drugs_collb = pd.concat([df1_1,df2, df3], ignore_index=True) | |
#country_site_city_df | |
if sponsor: | |
df1 = country_site_city_df[country_site_city_df['Sponsor'] == sponsor] | |
df1_1 = df1[(df1['Collaborator'] != 'Not Available') & (~df1['Collaborator'].isnull())] | |
df2 = country_site_city_df[country_site_city_df['Collaborator'] == sponsor] | |
### Now copy sponsors to collaborators: | |
df2['Collaborator'] = df2['Sponsor'] | |
#df3 = country_site_city_df[country_site_city_df['Collaborator'].str.contains(f'(\|)?{sponsor}(\|)?', na=False, flags=re.IGNORECASE, regex=True)] | |
df3 = country_site_city_df[country_site_city_df['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)] | |
df3 = df3[df3['Collaborator'] != sponsor] | |
## Now add sponsors to collaborators | |
#df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator'] | |
country_site_city_df = df1 | |
country_site_city_df_collb = pd.concat([ df1_1,df2, df3], ignore_index=True) | |
##################################################################################################################################################################### | |
## This only includes data for a specific sponsor and for the collaborators | |
# Convert the filtered DataFrame to an HTML table | |
html_table_add = dataframe_to_html_table(country_site_city_df) | |
# Convert the DataFrame to an HTML table | |
html_table_conditions = dataframe_to_html_table(clinical_trials_gov_conditions) | |
# Convert the DataFrame to an HTML table | |
html_table_drugs = dataframe_to_html_table(clinical_trials_gov_drugs) | |
###################################################################################################################################### | |
######################################################################################################################################## | |
# Calculate the summary statistics | |
summary_stats_pre = calculate_summary_stats(clinical_trials_gov_conditions, sponsor) | |
summary_stats = format_summary_stats(summary_stats_pre) | |
# Calculate Site Summary | |
summary_stats_sites_pre = calculate_summary_stats_sites(country_site_city_df, sponsor, country) | |
summary_stats_sites = format_summary_stats(summary_stats_sites_pre) | |
return summary_stats, html_table_conditions, html_table, summary_stats_sites,html_table_add,html_table_drugs | |
############################################################################################################################################################### | |
##### ################## Start Gradio Interface ######################################################################### | |
################################## Condition Icicle and Sponsor Map ######################## | |
################################################################### | |
import plotly.graph_objects as go | |
import pandas as pd | |
import numpy as np | |
################################################ TOP 20 Conditions###################################################### | |
########################################################################################################## | |
def plot_condition_sunburst (df): | |
# Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" | |
df = df[df['StudyType'] == "INTERVENTIONAL"] | |
# Convert 'Condition' names to uppercase | |
df['Condition'] = df['Condition'].str.upper() | |
# Filter out rows where 'Condition' is "OTHER", "OTHERS", "HEALTHY", or "ADULT" | |
df = df[~df['Condition'].isin(["OTHER", "OTHERS", "HEALTHY", "ADULT", "CHRONIC'"])] | |
# Group the data by 'Condition' and count the number of NCTId | |
df_count = df.groupby('Condition')['NCTId'].nunique().reset_index() | |
# Sort the DataFrame by Value in descending order and reset the index | |
df_count = df_count.sort_values('NCTId', ascending=False).reset_index(drop=True) | |
# Create a DataFrame for the top 30 conditions | |
top_30_conditions = df_count.head(20) | |
top_30_conditions = top_30_conditions.rename(columns={'NCTId': 'Number of Trials'}) | |
# Add 'Display' column to top_30_conditions and set its value to 'TOP 30' | |
top_30_conditions['Display'] = 'TOP 20' | |
# Create the icicle plot | |
icicle_fig = px.icicle(top_30_conditions, path=['Condition'], values='Number of Trials', | |
color='Condition', color_continuous_scale='RdBu', | |
custom_data=['Condition', 'Number of Trials']) | |
# Customize the hovertemplate | |
icicle_fig.update_traces(hovertemplate='%{customdata[0]}<br>Number of Trials: %{customdata[1]}') | |
# Customize the icicle plot | |
icicle_fig.update_layout( | |
title='Top 20 Conditions', | |
font=dict(family="Arial", size=14, color='black'), | |
width= 400, | |
height= 1000, | |
#autosize=True, | |
margin=dict(t=50, l=25, r=25, b=25) | |
) | |
return icicle_fig | |
############################################################ Conditions OTHERS ########### ############################################ | |
def plot_condition_others (df): | |
# Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" | |
df = df[df['StudyType'] == "INTERVENTIONAL"] | |
# Convert 'Condition' names to uppercase | |
df['Condition'] = df['Condition'].str.upper() | |
# Filter out rows where 'Condition' is "OTHER", "OTHERS", "HEALTHY", or "ADULT" | |
df = df[~df['Condition'].isin(["OTHER", "OTHERS", "HEALTHY", "ADULT"])] | |
# Group the data by 'Condition' and count the number of NCTId | |
df_count = df.groupby('Condition')['NCTId'].nunique().reset_index() | |
# Sort the DataFrame by Value in descending order and reset the index | |
df_count = df_count.sort_values('NCTId', ascending=False).reset_index(drop=True) | |
# Create a DataFrame for the top 30 conditions | |
top_30_conditions = df_count.head(20) | |
top_30_conditions = top_30_conditions.rename(columns={'NCTId': 'Number of Trials'}) | |
# Add 'Display' column to top_30_conditions and set its value to 'TOP 30' | |
top_30_conditions['Display'] = 'TOP 30' | |
# Create a DataFrame for the other conditions by filtering out the rows that are part of the top 30 conditions | |
other_conditions = df_count[~df_count['Condition'].isin(top_30_conditions['Condition'])] | |
# Add 'Display' column to other_conditions and set its value to 'OTHERS' | |
other_conditions['Display'] = 'OTHERS' | |
other_conditions = other_conditions.rename(columns={'NCTId': 'Number of Trials'}) | |
#print( other_conditions) | |
# Create the icicle plot | |
#icicle_fig = px.icicle( other_conditions, path=['Condition'], values='Number of Trials', | |
# color='Condition', color_continuous_scale='RdBu', | |
# hover_data=['Condition']) | |
# Create the icicle plot | |
icicle_fig = px.icicle(other_conditions, path=['Condition'], values='Number of Trials', | |
color='Condition', color_continuous_scale='RdBu', | |
custom_data=['Condition', 'Number of Trials']) | |
# Customize the hovertemplate | |
icicle_fig.update_traces(hovertemplate='%{customdata[0]}<br>Number of Trials: %{customdata[1]}') | |
# Customize the icicle plot | |
icicle_fig.update_layout( | |
title='Other Conditions', | |
font=dict(family="Arial", size=14, color='black'), | |
width= 400, | |
height=1000, | |
# autosize=True, | |
margin=dict(t=50, l=25, r=25, b=25) | |
) | |
return icicle_fig | |
################################################################################################################################################### | |
def wrap_text(text, max_chars_per_line): | |
words = text.split() | |
lines = [] | |
current_line = [] | |
for word in words: | |
if len(' '.join(current_line + [word])) <= max_chars_per_line: | |
current_line.append(word) | |
else: | |
lines.append(' '.join(current_line)) | |
current_line = [word] | |
lines.append(' '.join(current_line)) | |
return '<br>'.join(lines) | |
##################################################### Sponsor Counts ########################################### | |
################################################### ############################################################ | |
def wrap_text(text, max_chars_per_line): | |
return '<br>'.join(textwrap.wrap(text, max_chars_per_line)) | |
def plot_sponsor_collaborator_tree_map(df): | |
# Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" | |
df = df[df['StudyType'] == "INTERVENTIONAL"] | |
# Group the data by 'Sponsor' and 'Collaborator' and count the number of unique NCTId | |
df_count = df.groupby(['Sponsor', 'Collaborator'])['NCTId'].nunique().reset_index() | |
# Sort the DataFrame by Value in descending order and reset the index | |
df_count = df_count.sort_values('NCTId', ascending=False).reset_index(drop=True) | |
# Create a DataFrame for the top 30 sponsors and collaborators | |
top_30 = df_count.head(30) | |
top_30 = top_30.rename(columns={'NCTId': 'Number of Trials'}) | |
max_chars_per_line = 10 # Adjust this value according to your needs | |
top_30['Wrapped Sponsor'] = top_30['Sponsor'].apply(lambda x: wrap_text(x, max_chars_per_line)) | |
top_30['Wrapped Collaborator'] = top_30['Collaborator'].apply(lambda x: wrap_text(x, max_chars_per_line)) | |
# Create the tree map | |
tree_map_fig = px.treemap(top_30, path=['Wrapped Sponsor', 'Wrapped Collaborator'], values='Number of Trials', | |
color='Sponsor', color_continuous_scale='RdBu', | |
custom_data=['Wrapped Sponsor', 'Wrapped Collaborator', 'Number of Trials']) | |
# Customize the hovertemplate | |
tree_map_fig.update_traces(hovertemplate='%{customdata[0]}<br>%{customdata[1]}<br>Number of Trials: %{customdata[2]}') | |
# Customize the tree map | |
tree_map_fig.update_layout( | |
title='Lead Sponsors and Collaborators', | |
font=dict(family="Arial", size=14, color='black'), | |
width=600, | |
height=1000 | |
) | |
# Update the labels to show only the number of trials | |
tree_map_fig.update_traces(textinfo='value') | |
return tree_map_fig | |
######################################################################################################### | |
def plot_sponsor_tree(df): | |
# Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" | |
df = df[df['StudyType'] == "INTERVENTIONAL"] | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
#Drop rows where 'Phase' is "Not Applicable" or "UNKNOWN" | |
#df = df[(df['Phase'] != "Not Applicable") & (df['Phase'] != "UNKNOWN")] | |
# Group the data by 'Phase' and 'Sponsor' and count the number of unique NCTId | |
df_count = df.groupby(['Phase', 'Sponsor'])['NCTId'].nunique().reset_index() | |
#print(df_count) | |
# Then, sum the counts for each combination of Phase and Sponsor | |
df_count = df_count.groupby(['Phase', 'Sponsor'])['NCTId'].sum().reset_index() | |
# Finally, sum the counts for each Sponsor | |
df_count_tot = df_count.groupby('Sponsor')['NCTId'].sum().reset_index() | |
# Sort the DataFrame by Value in descending order and reset the index | |
df_count_tot = df_count_tot.sort_values('NCTId', ascending=False).reset_index(drop=True) | |
# Create a DataFrame for the top 30 sponsors | |
top_30_sponsors = df_count_tot.head(30) | |
top_30_sponsors = top_30_sponsors.rename(columns={'NCTId': 'Number of Trials'}) | |
max_chars_per_line = 10 # Adjust this value according to your needs | |
top_30_sponsors['Wrapped Sponsor'] = top_30_sponsors['Sponsor'].apply(lambda x: wrap_text(x, max_chars_per_line)) | |
# Create the icicle plot | |
icicle_fig = px.icicle(top_30_sponsors, path=['Wrapped Sponsor'], values='Number of Trials', | |
color='Sponsor', color_continuous_scale='RdBu', | |
custom_data=['Wrapped Sponsor', 'Number of Trials']) | |
# Customize the hovertemplate | |
icicle_fig.update_traces(hovertemplate='%{customdata[0]}<br>Number of Trials: %{customdata[1]}') | |
# Customize the icicle plot | |
icicle_fig.update_layout( | |
title='Sponsor', | |
font=dict(family="Arial", size=14, color='black'), | |
width= 600, | |
height=1000 | |
# autosize=True | |
# margin=dict(t=50, l=25, r=25, b=25) | |
) | |
return icicle_fig | |
###################################################################################################################################### | |
def plot_collaborator_icicle(df): | |
# Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" | |
df = df[df['StudyType'] == "INTERVENTIONAL"] | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
# Group the data by 'Phase' and 'Sponsor' and count the number of unique NCTId | |
df_count = df.groupby(['Phase', 'Collaborator'])['NCTId'].nunique().reset_index() | |
# Then, sum the counts for each combination of Phase and Sponsor | |
df_count = df_count.groupby(['Phase', 'Collaborator'])['NCTId'].sum().reset_index() | |
# Finally, sum the counts for each 'Collaborator' | |
df_count_tot = df_count.groupby('Collaborator')['NCTId'].sum().reset_index() | |
# Sort the DataFrame by Value in descending order and reset the index | |
df_count_tot = df_count_tot.sort_values('NCTId', ascending=False).reset_index(drop=True) | |
# Create a DataFrame for the top 30 sponsors | |
top_30_sponsors = df_count_tot.head(30) | |
top_30_sponsors = top_30_sponsors.rename(columns={'NCTId': 'Number of Trials'}) | |
max_chars_per_line = 10 # Adjust this value according to your needs | |
top_30_sponsors['Collaborators'] = top_30_sponsors['Collaborator'].apply(lambda x: wrap_text(x, max_chars_per_line)) | |
# Create the icicle plot | |
icicle_fig = px.icicle(top_30_sponsors, path=['Collaborators'], values='Number of Trials', | |
color='Collaborator', color_continuous_scale='RdBu', | |
custom_data=['Collaborators', 'Number of Trials']) | |
# Customize the hovertemplate | |
icicle_fig.update_traces(hovertemplate='%{customdata[0]}<br>Number of Trials: %{customdata[1]}') | |
# Customize the icicle plot | |
icicle_fig.update_layout( | |
title='Collaborators', | |
font=dict(family="Arial", size=14, color='black'), | |
width= 600, | |
height=1000 | |
#autosize=True | |
) | |
return icicle_fig | |
#################################### DRUGS ######################################################################## | |
#################### Sankey Diagram for Conditions to Drugs to Phase /NCTId############################################# | |
import pandas as pd | |
import plotly.graph_objects as go | |
import random | |
def random_color(): | |
return f'rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})' | |
############################################################################################################## | |
def plot_drug_sankey(df): | |
# Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" | |
df = df[df['StudyType'] == "INTERVENTIONAL"] | |
# Fill missing values in the 'Phase' column with a placeholder string | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
# Sort by Phase | |
df = df.sort_values(by='Phase') | |
#print(df) | |
# Split the conditions | |
df = split_conditions(df, 'Condition') | |
conditions = df['Condition'].unique().tolist() | |
drugs = df['Drug'].unique().tolist() | |
nct_ids = df['NCTId'].unique().tolist() | |
study_ids= df['OrgStudyId'].unique().tolist() | |
phases = df['Phase'].unique().tolist() | |
# labels = conditions + drugs + nct_ids + phases | |
labels = conditions + drugs + nct_ids + study_ids+phases | |
# Assign random colors to each node | |
colors = [random_color() for _ in range(len(labels))] | |
source = [] | |
target = [] | |
value = [] | |
for i, condition in enumerate(conditions): | |
for j, drug in enumerate(drugs, start=len(conditions)): | |
count = df[(df['Condition'] == condition) & (df['Drug'] == drug)].shape[0] | |
if count > 0: | |
source.append(i) | |
target.append(j) | |
value.append(count) | |
for i, drug in enumerate(drugs, start=len(conditions)): | |
for j, nct_id in enumerate(nct_ids, start=len(conditions) + len(drugs)): | |
count = df[(df['Drug'] == drug) & (df['NCTId'] == nct_id)].shape[0] | |
if count > 0: | |
source.append(i) | |
target.append(j) | |
value.append(count) | |
# Add connections between nct_ids and study_ids | |
for i, nct_id in enumerate(nct_ids, start=len(conditions) + len(drugs)): | |
for j, study_id in enumerate(study_ids, start=len(conditions) + len(drugs) + len(nct_ids)): | |
count = df[(df['NCTId'] == nct_id) & (df['OrgStudyId'] == study_id)].shape[0] | |
if count > 0: | |
source.append(i) | |
target.append(j) | |
value.append(count) | |
# Add connections between study_ids and phases | |
for i, study_id in enumerate(study_ids, start=len(conditions) + len(drugs) + len(nct_ids)): | |
for j, phase in enumerate(phases, start=len(conditions) + len(drugs) + len(nct_ids) + len(study_ids)): | |
count = df[(df['OrgStudyId'] == study_id) & (df['Phase'] == phase)].shape[0] | |
if count > 0: | |
source.append(i) | |
target.append(j) | |
value.append(count) | |
num_conditions = len(conditions) | |
if num_conditions <= 2: | |
height = 400 | |
elif num_conditions <= 10: | |
# height = 300 + (num_conditions - 2) * (200 / 8) # Linearly scale between 300 and 500 | |
height = 800 | |
elif num_conditions <= 30: | |
height = 1000 | |
else: | |
height = 1200 # Linearly scale between 700 and 1000, assuming a max of 100 conditions | |
height = min(height, 1000) # Cap the height at 1000 | |
fig = go.Figure(data=[go.Sankey( | |
node=dict( | |
pad=15, | |
thickness=15, | |
line=dict(color="black", width=0.5), | |
label=labels, | |
color=colors | |
), | |
link=dict( | |
source=source, | |
target=target, | |
value=value | |
) | |
)]) | |
fig.update_layout(title_text="Conditions, Drugs, Trial IDs, Phases for Sponsor", | |
font_size=10, height=height, autosize=True) | |
return fig | |
########################################################################################### | |
########################################################################################################################### | |
#################################################################### TRIALS ############################## | |
######################################## | |
def random_color(): | |
return f"rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})" | |
def plot_condition_treemap_nct_old(df): | |
df = df[df['StudyType'] == "INTERVENTIONAL"] | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
df = df.sort_values(by='Phase') | |
df = split_conditions(df, 'Condition') | |
conditions = df['Condition'].unique().tolist() | |
nct_ids = df['NCTId'].unique().tolist() | |
study_ids = df['OrgStudyId'].unique().tolist() | |
phases = df['Phase'].unique().tolist() | |
labels = conditions + nct_ids + study_ids + phases | |
colors = [random_color() for _ in range(len(labels))] | |
source = [] | |
target = [] | |
value = [] | |
for i, condition in enumerate(conditions): | |
for j, nct_id in enumerate(nct_ids, start=len(conditions)): | |
count = df[(df['Condition'] == condition) & (df['NCTId'] == nct_id)].shape[0] | |
if count > 0: | |
source.append(i) | |
target.append(j) | |
value.append(count) | |
for i, nct_id in enumerate(nct_ids, start=len(conditions)): | |
for j, study_id in enumerate(study_ids, start=len(conditions) + len(nct_ids)): | |
count = df[(df['NCTId'] == nct_id) & (df['OrgStudyId'] == study_id)].shape[0] | |
if count > 0: | |
source.append(i) | |
target.append(j) | |
value.append(count) | |
for i, study_id in enumerate(study_ids, start=len(conditions) + len(nct_ids)): | |
for j, phase in enumerate(phases, start=len(conditions) + len(nct_ids) + len(study_ids)): | |
count = df[(df['OrgStudyId'] == study_id) & (df['Phase'] == phase)].shape[0] | |
if count > 0: | |
source.append(i) | |
target.append(j) | |
value.append(count) | |
num_conditions = len(conditions) | |
if num_conditions <= 2: | |
height = 400 | |
elif num_conditions <= 10: | |
height = 800 | |
elif num_conditions <= 30: | |
height = 1000 | |
else: | |
height = 1200 | |
height = min(height, 1000) | |
fig = go.Figure(data=[go.Sankey( | |
node=dict( | |
pad=15, | |
thickness=15, | |
line=dict(color="black", width=0.5), | |
label=labels, | |
color=colors | |
), | |
link=dict( | |
source=source, | |
target=target, | |
value=value | |
) | |
)]) | |
fig.update_layout(title_text="Conditions, Trial IDs, Study IDs, Phases for Sponsor", | |
font_size=10, height=height, autosize=True) | |
return fig | |
######################################### Conditions############################### | |
##################################################################################### | |
import plotly.graph_objects as go | |
def plot_condition_treemap_nct_old(df): | |
# Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" | |
#print("tamer") | |
df = df[df['StudyType'] == "INTERVENTIONAL"] | |
#print(df) | |
# Fill missing values in the 'Phase' column with a placeholder string | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
# Map NCTId to its Title | |
# Create a dictionary to map NCTId to BriefTitle | |
nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict() | |
# Create a dictionary to map NCTId to OrgStudyId | |
nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict() | |
# Create a new dataframe with the required columns | |
table_df = df[['Condition', 'NCTId', 'Phase']] | |
# Add the BriefTitle and OrgStudyId columns | |
table_df['BriefTitle'] = table_df['NCTId'].map(nctid_to_brieftitle) | |
table_df['OrgStudyId'] = table_df['NCTId'].map(nctid_to_orgstudyid) | |
print(table_df) | |
# Sort the dataframe by Condition alphabetically | |
table_df = table_df.sort_values('Condition') | |
# Create a Plotly Table | |
fig = go.Figure(data=[go.Table( | |
header=dict( | |
values=['Condition', 'NCTId', 'OrgStudyId', 'BriefTitle', 'Phase'], | |
fill_color='paleturquoise', | |
align='left', | |
font=dict(size=16, color='black') | |
), | |
cells=dict( | |
values=[table_df.Condition, table_df.NCTId, table_df.OrgStudyId, table_df.BriefTitle, table_df.Phase], | |
align='left', | |
font=dict(size=14, color='black') | |
) | |
)]) | |
fig.update_layout( | |
autosize=True, | |
height=1000, | |
title_text="Conditions with NCTIds and Phases", | |
title_x=0.5, | |
font=dict(size=18) | |
) | |
return fig | |
###################### Claude modified Jan 14/2025############################# | |
def plot_condition_sunburst_nct(df): | |
# Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" | |
df = df[df['StudyType'] == "INTERVENTIONAL"] | |
# Fill missing values in the 'Phase' column with a placeholder string | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
# Map NCTId to its Title | |
nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict() | |
nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict() | |
# Create separate dataframes for each level | |
condition_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle']) | |
nctid_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle']) | |
phase_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle']) | |
# Add the "Conditions" root node | |
condition_df = pd.concat([condition_df, pd.DataFrame({ | |
'ids': ["Conditions"], | |
'labels': ["Conditions"], | |
'parents': [""], | |
'brieftitle': [None] | |
})], ignore_index=True) | |
# Sort conditions alphabetically and add them | |
conditions = sorted(df['Condition'].unique(), key=str.lower) | |
for condition in conditions: | |
condition_df = pd.concat([condition_df, pd.DataFrame({ | |
'ids': [condition], | |
'labels': [condition], | |
'parents': ["Conditions"], | |
'brieftitle': [None] | |
})], ignore_index=True) | |
# Add NCTId level | |
for condition in conditions: | |
temp_df = df[df['Condition'] == condition] | |
nctids = sorted(temp_df['NCTId'].unique()) | |
for nctid in nctids: | |
nctid_df = pd.concat([nctid_df, pd.DataFrame({ | |
'ids': [f"{condition}-{nctid}"], | |
'labels': [f"{nctid} ({nctid_to_orgstudyid[nctid]})"], | |
'parents': [condition], | |
'brieftitle': [nctid_to_brieftitle[nctid]] | |
})], ignore_index=True) | |
# Add Phase level | |
for condition in conditions: | |
temp_df = df[df['Condition'] == condition].sort_values('NCTId') | |
for _, row in temp_df.iterrows(): | |
phase_df = pd.concat([phase_df, pd.DataFrame({ | |
'ids': [f"{condition}-{row['NCTId']}-{row['Phase']}"], | |
'labels': [row['Phase']], | |
'parents': [f"{condition}-{row['NCTId']}"], | |
'brieftitle': [None] | |
})], ignore_index=True) | |
# Concatenate all dataframes | |
sunburst_df = pd.concat([condition_df, nctid_df, phase_df], ignore_index=True) | |
# Create the Sunburst figure | |
fig = go.Figure(go.Sunburst( | |
ids=sunburst_df.ids, | |
labels=sunburst_df.labels, | |
parents=sunburst_df.parents, | |
maxdepth=3, # Limit the depth to 3 levels | |
branchvalues="total", | |
hovertext=sunburst_df['brieftitle'], | |
hoverinfo='text', | |
hoverlabel=dict(namelength=-1), | |
textfont=dict(size=14, family="Arial") | |
)) | |
# Update layout | |
fig.update_layout( | |
width=1200, | |
height=1200, | |
title={ | |
'text': "Clinical Trials by Condition, NCTId, and Phase", | |
'y':0.95, | |
'x':0.5, | |
'xanchor': 'center', | |
'yanchor': 'top', | |
'font': dict(size=20) | |
} | |
) | |
return fig | |
#################################################################################### | |
def plot_condition_treemap_nct(df): | |
# Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" | |
df = df[df['StudyType'] == "INTERVENTIONAL"] | |
# Fill missing values in the 'Phase' column with a placeholder string | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
# Map NCTId to its Title | |
nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict() | |
nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict() | |
# Create separate dataframes for each level | |
condition_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle', 'level']) | |
nctid_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle', 'level']) | |
phase_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle', 'level']) | |
# Add the "Conditions" root node | |
condition_df = pd.concat([condition_df, pd.DataFrame({ | |
'ids': ["Conditions"], | |
'labels': ["Conditions"], | |
'parents': [""], | |
'brieftitle': [None], | |
'level': [0] | |
})], ignore_index=True) | |
# Add Condition level with sorting | |
conditions = sorted(df['Condition'].unique(), key=str.lower) | |
for condition in conditions: | |
condition_df = pd.concat([condition_df, pd.DataFrame({ | |
'ids': [condition], | |
'labels': [condition], | |
'parents': ["Conditions"], | |
'brieftitle': [None], | |
'level': [1] | |
})], ignore_index=True) | |
# Add NCTId level | |
for condition in conditions: | |
temp_df = df[df['Condition'] == condition] | |
nctids = sorted(temp_df['NCTId'].unique()) | |
for nctid in nctids: | |
nctid_df = pd.concat([nctid_df, pd.DataFrame({ | |
'ids': [f"{condition}-{nctid}"], | |
'labels': [f"{nctid} ({nctid_to_orgstudyid[nctid]})"], | |
'parents': [condition], | |
'brieftitle': [nctid_to_brieftitle[nctid]], | |
'level': [2] | |
})], ignore_index=True) | |
# Add Phase level | |
for condition in conditions: | |
temp_df = df[df['Condition'] == condition].sort_values('NCTId') | |
for _, row in temp_df.iterrows(): | |
phase_df = pd.concat([phase_df, pd.DataFrame({ | |
'ids': [f"{condition}-{row['NCTId']}-{row['Phase']}"], | |
'labels': [row['Phase']], | |
'parents': [f"{condition}-{row['NCTId']}"], | |
'brieftitle': [None], | |
'level': [3] | |
})], ignore_index=True) | |
# Concatenate all dataframes and sort by level and labels | |
icicle_df = pd.concat([condition_df, nctid_df, phase_df], ignore_index=True) | |
# Sort the dataframe to ensure conditions appear in alphabetical order | |
# First sort by level to maintain hierarchy | |
icicle_df = icicle_df.sort_values(['level', 'labels'], | |
key=lambda x: x.str.lower() if x.name == 'labels' else x) | |
# Create the figure with sorted data | |
fig = go.Figure(go.Icicle( | |
ids=icicle_df.ids, | |
labels=icicle_df.labels, | |
parents=icicle_df.parents, | |
root_color="lightgrey", | |
textfont=dict(size=34, family="Arial"), | |
hovertext=icicle_df['brieftitle'], | |
hoverinfo='text', | |
hoverlabel=dict(namelength=-1) | |
)) | |
fig.update_layout(autosize=True, height=1000) | |
return fig | |
############################################################ | |
########################################################################################################################### | |
import re | |
def insert_line_break(text, max_length=30): | |
if len(text) <= max_length: | |
return text | |
nearest_space = text.rfind(' ', 0, max_length) | |
if nearest_space == -1: | |
nearest_space = max_length | |
return text[:nearest_space] + '<br>' + insert_line_break(text[nearest_space:].strip(), max_length) | |
########################################################### ####################################################################### | |
def plot_nct2org_icicle(df): | |
icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'hovertext']) | |
# Add the "Trials" root node | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': ["Trials"], | |
'labels': ["Trials"], | |
'parents': [""], | |
'hovertext': [""] | |
})], ignore_index=True) | |
# Create a dictionary of NCTId-BriefTitle pairs | |
nctid_brieftitle = df[['NCTId', 'BriefTitle']].drop_duplicates().set_index('NCTId').to_dict()['BriefTitle'] | |
# Add the NCTId level with BriefTitle as hover text | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': df['NCTId'].unique(), | |
'labels': df['NCTId'].unique(), | |
'parents': ["Trials"] * len(df['NCTId'].unique()), | |
'hovertext': [nctid_brieftitle[nctid] for nctid in df['NCTId'].unique()] | |
})], ignore_index=True) | |
# Add the OrgStudyId level | |
for nctid in df['NCTId'].unique(): | |
temp_df = df[df['NCTId'] == nctid] | |
orgstudyids = temp_df['OrgStudyId'].unique() | |
for orgstudyid in orgstudyids: | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': [f"{nctid}-{orgstudyid}"], | |
'labels': [orgstudyid], | |
'parents': [nctid], | |
'hovertext': [""] | |
})], ignore_index=True) | |
# Add the Condition level | |
for index, row in df.iterrows(): | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': [f"{row['NCTId']}-{row['OrgStudyId']}-{row['Condition']}-{index}"], | |
'labels': [row['Condition']], | |
'parents': [f"{row['NCTId']}-{row['OrgStudyId']}"], | |
'hovertext': [""] | |
})], ignore_index=True) | |
fig = go.Figure(go.Icicle( | |
ids=icicle_df.ids, | |
labels=icicle_df.labels, | |
parents=icicle_df.parents, | |
hovertext=icicle_df.hovertext, | |
root_color="lightgrey", | |
textfont=dict(size=34, family="Arial") | |
)) | |
fig.update_layout(autosize=True, height=1000) | |
return fig | |
###################################################################################################################################### | |
################################################################################################################# | |
############################## Scatter Plot for Country Timelines ###################################### | |
import pandas as pd | |
import numpy as np | |
import plotly.express as px | |
import plotly.graph_objs as go | |
from plotly.subplots import make_subplots | |
def split_condition(text): | |
split_text = text.split(',', 1)[0].split('|', 1)[0] | |
return split_text.strip() | |
################################################################################################################################# | |
import plotly.graph_objs as go | |
import plotly.graph_objs as go | |
import plotly.subplots as sp | |
import pandas as pd | |
import numpy as np | |
################################################################### COUNTRY PLOTS ################################################################ | |
def plot_trial_country_map(df): | |
df = df[df['StudyType'] == "INTERVENTIONAL"] | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
df = df.sort_values(by='Phase') | |
# Split the conditions | |
df = split_conditions(df, 'Condition') | |
## Root Country Node | |
icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'hover_text']) | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': ["Country"], | |
'labels': ["Country"], | |
'parents': [""], | |
'hover_text': ["Country"] | |
})], ignore_index=True) | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': df['Country'].unique(), | |
'labels': df['Country'].unique(), | |
'parents': ["Country"] * len(df['Country'].unique()), | |
'hover_text': [f"({len(df[df['Country'] == country]['NCTId'].unique())} Trials)" for country in df['Country'].unique()] | |
})], ignore_index=True) | |
### Country and Conditions | |
for country in df['Country'].unique(): | |
temp_df = df[df['Country'] == country] | |
conditions = temp_df['Condition'].unique() | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': [f"{country}__{condition}" for condition in conditions], | |
'labels': conditions, | |
'parents': [country] * len(conditions), | |
'hover_text': [f"({len(temp_df[temp_df['Condition'] == condition]['NCTId'].unique())} Trials)" for condition in conditions] | |
})], ignore_index=True) | |
### Country with Conditions and Trials NCTId and OrgStudId | |
for country_condition in icicle_df['ids'][icicle_df['parents'].isin(df['Country'].unique())]: | |
country, condition = country_condition.split('__') | |
temp_df = df[(df['Country'] == country) & (df['Condition'] == condition)] | |
trials = temp_df['NCTId'].unique() | |
trial_labels = [f"<br>{insert_line_break(temp_df[temp_df['NCTId'] == trial]['BriefTitle'].iloc[0])}" for trial in trials] | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': [f"{country_condition}__{trial}" for trial in trials], | |
'labels': trials, | |
'parents': [country_condition] * len(trials), | |
'hover_text': trial_labels | |
})], ignore_index=True) | |
fig = go.Figure(go.Icicle( | |
ids=icicle_df.ids, | |
labels=icicle_df.labels, | |
parents=icicle_df.parents, | |
textinfo='label', | |
hovertext=icicle_df.hover_text, | |
root_color="lightgrey", | |
textfont=dict(size=30, family="Arial") | |
)) | |
fig.update_layout( | |
autosize=True,height = 800 | |
) | |
return fig | |
#################### | |
################################################################ SITES ##################################################### | |
################################################################ TRIAL SITES ########################################### | |
def plot_trial_sites(df): | |
def insert_line_break(text, max_length=30): | |
if len(text) <= max_length: | |
return text | |
nearest_space = text.rfind(' ', 0, max_length) | |
if nearest_space == -1: | |
nearest_space = max_length | |
return text[:nearest_space] + '<br>' + insert_line_break(text[nearest_space:].strip(), max_length) | |
df = df[df['StudyType'] == "INTERVENTIONAL"] | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
df = df.sort_values(by='Phase') | |
## Root Site Node | |
icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'hover_text']) | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': ["Sites"], | |
'labels': ["Sites"], | |
'parents': [""], | |
'hover_text': ["Sites"] | |
})], ignore_index=True) | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': df['City'].unique(), | |
'labels': df['City'].unique(), | |
'parents': ["Sites"] * len(df['City'].unique()), | |
'hover_text': [f"({len(df[df['City'] == city]['NCTId'].unique())} Trials)" for city in df['City'].unique()] | |
})], ignore_index=True) | |
### City and Site | |
for city in df['City'].unique(): | |
temp_df = df[df['City'] == city] | |
sites = temp_df['Site'].unique() | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': [f"{city}__{site}" for site in sites], | |
'labels': sites, | |
'parents': [city] * len(sites), | |
'hover_text': [f"({len(temp_df[temp_df['Site'] == site]['NCTId'].unique())} Trials)" for site in sites] | |
})], ignore_index=True) | |
### Site and Trials (NCTId, OrgStudyId, BriefTitle) | |
for city_site in icicle_df['ids'][icicle_df['parents'].isin(df['City'].unique())]: | |
city, site = city_site.split('__') | |
temp_df = df[(df['City'] == city) & (df['Site'] == site)] | |
trials = temp_df[['NCTId', 'OrgStudyId']].apply(lambda x: f"{x['NCTId']}<br>{x['OrgStudyId']}", axis=1).unique() | |
for trial in trials: | |
nctid = trial.split('<br>')[0] | |
icicle_df = pd.concat([icicle_df, pd.DataFrame({ | |
'ids': [f"{city_site}__{nctid}"], | |
'labels': [trial], | |
'parents': [city_site], | |
'hover_text': [""] | |
})], ignore_index=True) | |
fig = go.Figure(go.Icicle( | |
ids=icicle_df.ids, | |
labels=icicle_df.labels, | |
parents=icicle_df.parents, | |
textinfo='label', | |
hovertext=icicle_df.hover_text, | |
root_color="lightgrey", | |
textfont=dict(size=30, family="Arial") | |
)) | |
fig.update_layout(autosize=True, height=800) | |
return fig | |
############################################################################################################################################# | |
def plot_trial_site_map(df): | |
def insert_line_break(text, max_length=30): | |
if len(text) <= max_length: | |
return text | |
nearest_space = text.rfind(' ', 0, max_length) | |
if nearest_space == -1: | |
nearest_space = max_length | |
return text[:nearest_space] + '<br>' + insert_line_break(text[nearest_space:].strip(), max_length) | |
df = df[df['StudyType'] == "INTERVENTIONAL"] | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
df = df.sort_values(by='Phase') | |
# Split the conditions | |
df = split_conditions(df, 'Condition') | |
#df_count = df.groupby([ 'Site', 'NCTId', 'BriefTitle','Condition']).size().reset_index(name='Count') | |
#df_count = df.groupby([ 'Site', 'NCTId', 'BriefTitle','Condition']).size().reset_index(name='Count') | |
df_count = df.groupby([ 'Site', 'City' ,'NCTId', 'Condition']).size().reset_index(name='Count') | |
#df_count['BriefTitle'] = df_count['BriefTitle'].apply(insert_line_break) | |
# fig = px.treemap(df_count, path=['Site', 'NCTId', 'BriefTitle','Condition'], values='Count', color='Site') | |
fig = px.treemap(df_count, path=['Site', 'City','NCTId', 'Condition'], values='Count', color='Site') | |
# Customize font and textinfo for Sponsor, Country, Site, and Condition | |
fig.update_traces( | |
textfont=dict(family="Arial", size=30, color='black'), | |
selector=dict(depth=0) # Apply customization to Sponsor grid | |
) | |
fig.update_traces( | |
textfont=dict(family="Arial", size=30, color='black'), | |
selector=dict(depth=1) # Apply customization to Country grid | |
) | |
fig.update_traces( | |
textfont=dict(family="Arial", size=30, color='black'), | |
selector=dict(depth=2) # Apply customization to Site grid | |
) | |
fig.update_layout(autosize=True, height=800) | |
return fig | |
############################################################ | |
############################################################################################################################################################### | |
########################################################### Timelines ########################################################################################################### | |
import numpy as np | |
import plotly.graph_objs as go | |
import matplotlib.pyplot as plt | |
def generate_colors(n): | |
colors = plt.cm.rainbow(np.linspace(0, 1, n)) | |
hex_colors = ['#%02x%02x%02x' % (int(r * 255), int(g * 255), int(b * 255)) for r, g, b, _ in colors] | |
return hex_colors | |
def get_marker_size(enrollment_count): | |
if enrollment_count < 100: | |
return 20 | |
elif 100 <= enrollment_count < 300: | |
return 40 | |
elif 300 <= enrollment_count < 500: | |
return 60 | |
elif 500 <= enrollment_count < 1000: | |
return 70 | |
else: | |
return 100 | |
def plot_trial_bubblemap(df): | |
scatter_plot_start_traces = [] | |
scatter_plot_end_traces = [] | |
scatter_plot_lines = [] | |
# Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" | |
df = df[df['StudyType'] == "INTERVENTIONAL"] | |
# Fill missing values in the 'Phase' column with a placeholder string | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
# Sort by Phase | |
df = df.sort_values(by='Phase') | |
## address correct date formats | |
#df['StartDate'] = pd.to_datetime(df['StartDate']) | |
df['StartDate'] = pd.to_datetime(df['StartDate'], errors='coerce') | |
#df['CompletionDate'] = pd.to_datetime(df['CompletionDate']) | |
df['CompletionDate'] = pd.to_datetime(df['CompletionDate'], errors='coerce') | |
# Split the conditions | |
df = split_conditions(df, 'Condition') | |
# Assign an ID to each unique condition | |
#condition_ids = {condition: idx for idx, condition in enumerate(df['Condition'].unique())} | |
# Create a dictionary of unique conditions with their IDs starting from 1 | |
condition_ids = {condition: i for i, condition in enumerate(df['Condition'].unique(), start=1)} | |
# Create a dictionary that maps each NCTId to a list of condition IDs | |
nctid_condition_map = df.groupby('NCTId')['Condition'].apply(lambda x: [condition_ids[cond] for cond in x]).to_dict() | |
# Define the marker size function | |
df['MarkerSize'] = df['EnrollmentCount'].apply(get_marker_size) | |
# Update the hovertemplate to display original Conditions associated with the NCTId | |
#hovertemplate_start = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>OrgStudyId: %{customdata[1]}<br>Phase: %{customdata[2]}<br>Start Date: %{x}<br>Enrollment Count: %{customdata[3]}<extra></extra>' | |
#hovertemplate_end = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>OrgStudyId: %{customdata[1]}<br>NCTId: %{customdata[2]}<br>Phase: %{customdata[3]}<br>Completion Date: %{x}<br>Enrollment Count: %{customdata[4]}<extra></extra>' | |
# Update the hovertemplate to display original Conditions associated with the NCTId | |
hovertemplate_start = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>BriefTitle: %{customdata[1]}<br>OrgStudyId: %{customdata[2]}<br>Phase: %{customdata[3]}\ | |
<br>Start Date: %{x}<br>Enrollment Count: %{customdata[4]}<extra></extra>' | |
#hovertemplate_end = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>BriefTitle: %{customdata[1]}<br>OrgStudyId: %{customdata[2]}<br>NCTId: %{customdata[3]}\ | |
#<br>Phase: %{customdata[4]}<br>Completion Date: %{x}<br>Enrollment Count: %{customdata[5]}<extra></extra>' | |
hovertemplate_end = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>BriefTitle: %{customdata[1]}<br>OrgStudyId: %{customdata[2]}\ | |
<br>Phase: %{customdata[3]}<br>Completion Date: %{x}<br>Enrollment Count: %{customdata[4]}<extra></extra>' | |
for nctid in df['NCTId'].unique(): | |
df_filtered = df[df['NCTId'] == nctid] | |
# Replace the text parameter with original Conditions | |
text = [', '.join(df_filtered['Condition'].unique()) for _ in range(len(df_filtered))] | |
# Get the first condition ID for the current NCTId | |
first_condition_id = nctid_condition_map[nctid][0] | |
color = f'rgb({first_condition_id * 10 % 256}, {(first_condition_id * 20) % 256}, {(first_condition_id * 30) % 256})' | |
# color = ['rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)'][first_condition_id % 3] | |
# Start traces (square) | |
start_trace = go.Scatter(x=df_filtered['StartDate'], | |
y=df_filtered['NCTId'], | |
mode='markers', | |
marker=dict(size=10, symbol='square', color=color), | |
text=text, | |
#customdata=df_filtered[['StudyType', 'OrgStudyId', 'Phase', 'EnrollmentCount']], | |
customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']], | |
hovertemplate=hovertemplate_start, | |
showlegend=False) | |
scatter_plot_start_traces.append(start_trace) | |
# End traces (circle) | |
end_trace = go.Scatter(x=df_filtered['CompletionDate'], | |
y=df_filtered['NCTId'], | |
mode='markers', | |
marker=dict(size=df_filtered['MarkerSize'], symbol='circle', color=color, sizemode='diameter'), | |
text=text, | |
#customdata=df_filtered[['StudyType', 'OrgStudyId', 'NCTId', 'Phase', 'EnrollmentCount']], | |
customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']], | |
hovertemplate=hovertemplate_end, | |
showlegend=False) | |
scatter_plot_end_traces.append(end_trace) | |
# Line traces connecting start and end dates | |
line_trace = go.Scatter(x=[df_filtered['StartDate'].iloc[0], df_filtered['CompletionDate'].iloc[0]], | |
y=[nctid, nctid], | |
mode='lines', | |
line=dict(color='black', width=1), | |
showlegend=False) | |
scatter_plot_lines.append(line_trace) | |
# Create legend traces for unique conditions with their IDs | |
legend_traces = [go.Scatter(x=[None], y=[None], | |
mode='markers', | |
marker=dict(size=10, symbol='circle', color=f'rgb({condition_id * 10 % 256}, {(condition_id * 20) % 256}, {(condition_id * 30) % 256})'), | |
name=f'{condition_id}: {condition}', | |
showlegend=True) for condition, condition_id in condition_ids.items()] | |
# Combine all traces | |
data = scatter_plot_start_traces + scatter_plot_end_traces + scatter_plot_lines + legend_traces | |
# Update the layout | |
layout = go.Layout(yaxis=dict(title='NCTId', | |
showgrid=False, | |
tickvals=df['NCTId'].unique(), | |
ticktext=df['NCTId'].unique(), | |
tickangle=0), | |
xaxis=dict(title='Start-End Dates', | |
showgrid=False, | |
range=[pd.to_datetime('2020-01-01'), pd.to_datetime('2028-12-31')], | |
tickvals=[pd.to_datetime(f'{year}-01-01') for year in range(2020, 2029)]), | |
# tickvals=[pd.to_datetime(f'{year}') for year in range(2020, 2029)], | |
showlegend=True, | |
legend=dict(title='Conditions', x=1.05, y=1, traceorder='normal', bgcolor='rgba(255,255,255,0.5)', font=dict(color='#000000')), | |
margin=dict(l=150), | |
plot_bgcolor='#ffffff', | |
paper_bgcolor='#ffffff', | |
font=dict(family='Segoe UI', color='#000000')) | |
fig = go.Figure(data=data, layout=layout) | |
# Calculate the height based on the number of shortened_conditions | |
num_trial = len(df['NCTId'].unique()) | |
if num_trial <= 5: | |
height = 600 | |
elif num_trial >= 10: | |
height = 800 | |
elif num_trial >= 20: | |
height = 1000 | |
else: | |
height = 1400 # Linearly scale between 700 and 1000, assuming a max of 100 conditions | |
height = min(height, 1400) # Cap the height at 1400 | |
# Set the width and height | |
fig.update_layout( | |
title='Trial Start and End Dates by Conditions', | |
autosize=True, # adjust as per requirement | |
height=height # adjust as per requirement | |
) | |
return fig | |
######################################################################################################################################################## | |
def plot_trial_bubblemap_comp(df): | |
scatter_plot_start_traces = [] | |
scatter_plot_end_traces = [] | |
scatter_plot_lines = [] | |
# Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" | |
df = df[df['StudyType'] == "INTERVENTIONAL"] | |
# Fill missing values in the 'Phase' column with a placeholder string | |
df['Phase'] = df['Phase'].fillna('UNKNOWN') | |
# Sort by Phase | |
df = df.sort_values(by='Phase') | |
## address correct date formats | |
#df['StartDate'] = pd.to_datetime(df['StartDate']) | |
df['StartDate'] = pd.to_datetime(df['StartDate'], errors='coerce') | |
# df['CompletionDate'] = pd.to_datetime(df['CompletionDate']) | |
df['CompletionDate'] = pd.to_datetime(df['CompletionDate'], errors='coerce') | |
# Split the conditions | |
df = split_conditions(df, 'Condition') | |
# Assign an ID to each unique condition | |
#condition_ids = {condition: idx for idx, condition in enumerate(df['Condition'].unique())} | |
# Create a dictionary of unique conditions with their IDs starting from 1 | |
condition_ids = {condition: i for i, condition in enumerate(df['Condition'].unique(), start=1)} | |
# Create a dictionary that maps each NCTId to a list of condition IDs | |
nctid_condition_map = df.groupby('NCTId')['Condition'].apply(lambda x: [condition_ids[cond] for cond in x]).to_dict() | |
# Define the marker size function | |
df['MarkerSize'] = df['EnrollmentCount'].apply(get_marker_size) | |
# Update the hovertemplate to display original Conditions associated with the NCTId | |
hovertemplate_start = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>BriefTitle: %{customdata[1]}<br>OrgStudyId: %{customdata[2]}<br>Phase: %{customdata[3]}\ | |
<br>Start Date: %{x}<br>Enrollment Count: %{customdata[4]}<extra></extra>' | |
hovertemplate_end = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>BriefTitle: %{customdata[1]}<br>OrgStudyId: %{customdata[2]}\ | |
<br>Phase: %{customdata[3]}<br>Completion Date: %{x}<br>Enrollment Count: %{customdata[4]}<extra></extra>' | |
for nctid in df['NCTId'].unique(): | |
df_filtered = df[df['NCTId'] == nctid] | |
# Replace the text parameter with original Conditions | |
text = [', '.join(df_filtered['Condition'].unique()) for _ in range(len(df_filtered))] | |
# Get the first condition ID for the current NCTId | |
first_condition_id = nctid_condition_map[nctid][0] | |
color = f'rgb({first_condition_id * 10 % 256}, {(first_condition_id * 20) % 256}, {(first_condition_id * 30) % 256})' | |
# color = ['rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)'][first_condition_id % 3] | |
# Start traces (square) | |
start_trace = go.Scatter(x=df_filtered['StartDate'], | |
y=df_filtered['NCTId'], | |
mode='markers', | |
marker=dict(size=10, symbol='square', color=color), | |
text=text, | |
#customdata=df_filtered[['StudyType', 'OrgStudyId', 'Phase', 'EnrollmentCount']], | |
customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']], | |
hovertemplate=hovertemplate_start, | |
showlegend=False) | |
scatter_plot_start_traces.append(start_trace) | |
# End traces (circle) | |
end_trace = go.Scatter(x=df_filtered['CompletionDate'], | |
y=df_filtered['NCTId'], | |
mode='markers', | |
marker=dict(size=df_filtered['MarkerSize'], symbol='circle', color=color, sizemode='diameter'), | |
text=text, | |
#customdata=df_filtered[['StudyType', 'OrgStudyId', 'NCTId', 'Phase', 'EnrollmentCount']], | |
customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']], | |
hovertemplate=hovertemplate_end, | |
showlegend=False) | |
scatter_plot_end_traces.append(end_trace) | |
# Line traces connecting start and end dates | |
line_trace = go.Scatter(x=[df_filtered['StartDate'].iloc[0], df_filtered['CompletionDate'].iloc[0]], | |
y=[nctid, nctid], | |
mode='lines', | |
line=dict(color='black', width=1), | |
showlegend=False) | |
scatter_plot_lines.append(line_trace) | |
# Create legend traces for unique conditions with their IDs | |
legend_traces = [go.Scatter(x=[None], y=[None], | |
mode='markers', | |
marker=dict(size=10, symbol='circle', color=f'rgb({condition_id * 10 % 256}, {(condition_id * 20) % 256}, {(condition_id * 30) % 256})'), | |
name=f'{condition_id}: {condition}', | |
showlegend=True) for condition, condition_id in condition_ids.items()] | |
# Combine all traces | |
data = scatter_plot_start_traces + scatter_plot_end_traces + scatter_plot_lines + legend_traces | |
# Update the layout | |
layout = go.Layout(yaxis=dict(title='NCTId', | |
showgrid=False, | |
tickvals=df['NCTId'].unique(), | |
ticktext=df['NCTId'].unique(), | |
tickangle=0), | |
xaxis=dict(title='Start-End Dates', | |
showgrid=False, | |
range=[pd.to_datetime('2010-01-01'), pd.to_datetime('2023-12-31')], | |
tickvals=[pd.to_datetime(f'{year}-01-01') for year in range(2010, 2023)]), | |
# tickvals=[pd.to_datetime(f'{year}') for year in range(2020, 2029)], | |
showlegend=True, | |
legend=dict(title='Conditions', x=1.05, y=1, traceorder='normal', bgcolor='rgba(255,255,255,0.5)', font=dict(color='#000000')), | |
margin=dict(l=150), | |
plot_bgcolor='#ffffff', | |
paper_bgcolor='#ffffff', | |
font=dict(family='Segoe UI', color='#000000')) | |
fig = go.Figure(data=data, layout=layout) | |
# Calculate the height based on the number of shortened_conditions | |
num_trial = len(df['NCTId'].unique()) | |
if num_trial <= 5: | |
height = 600 | |
elif num_trial >= 10: | |
height = 800 | |
elif num_trial >= 20: | |
height = 1000 | |
else: | |
height = 1400 # Linearly scale between 700 and 1000, assuming a max of 100 conditions | |
height = min(height, 1400) # Cap the height at 1400 | |
# Set the width and height | |
fig.update_layout( | |
title='Trial Start and End Dates by Conditions', | |
autosize=True, # adjust as per requirement | |
height=height # adjust as per requirement | |
) | |
return fig | |
####################################################################################### | |
####################################################################################### | |
############################################ Trial Site Map without Zip code now ############## | |
import geopandas as gpd | |
def plot_trial_site_world_map(df, country_filter=None): | |
df.loc[(df['City'] == 'Multiple Locations') & (df['Country'] == 'Germany'), 'City'] = 'Berlin' | |
unique_cities = df[['City', 'Country']].drop_duplicates().copy() | |
geocode_cache = {} # Create an empty dictionary to store geocoded results | |
def geocode_with_cache(city, country): | |
key = (city, country) | |
if key not in geocode_cache: | |
geocode_cache[key] = gpd.tools.geocode(f"{city}, {country}").geometry[0] | |
return geocode_cache[key] | |
unique_cities['Coordinates'] = unique_cities.apply(lambda row: geocode_with_cache(row['City'], row['Country']), axis=1) | |
unique_cities[['Latitude', 'Longitude']] = unique_cities['Coordinates'].apply(lambda coord: pd.Series({'Latitude': coord.y, 'Longitude': coord.x})) | |
df = df.merge(unique_cities, on=['City', 'Country']) | |
# Create a new column combining 'Site' and 'Country' | |
df['SiteCountry'] = df['Site'] + ', ' + df['Country'] | |
df_count = df.groupby(['Country', 'City', 'SiteCountry', 'Condition', 'NCTId','BriefTitle', 'Latitude', 'Longitude']).size().reset_index(name='Count') | |
if country_filter: | |
df_count = df_count[df_count['Country'] == country_filter] | |
fig = px.scatter_geo(df_count, | |
lat='Latitude', | |
lon='Longitude', | |
hover_name='SiteCountry', | |
hover_data={'Latitude':False, 'Longitude':False, 'NCTId':False,'BriefTitle':False, 'Condition':False, 'City':True, 'Country':True}, | |
size='Count', | |
color='SiteCountry', | |
projection='mercator') | |
fig.update_layout(title='Trial Sites Map', | |
geo=dict(showframe=False, showcoastlines=False, showcountries=True), | |
width=1200, | |
height=800) | |
return fig | |
############################################################################################################# | |
############################################################# Gradio Function as Views #################################### | |
### ######################### Find Sponspors | |
############################################################################################################################################# | |
def select_sponsor(sponsor_input, academia_input): | |
if sponsor_input: | |
return sponsor_input | |
else: | |
return academia_input | |
def select_disease(disease_input, disease_input_text): | |
if disease_input_text: | |
return disease_input_text.strip() | |
else: | |
return disease_input | |
#summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drug | |
#async def disease_view (condition, condition_text, sponsor_input, academia_input): | |
async def disease_view (condition, sponsor_input): | |
# condition = condition.strip() # Remove leading and trailing spaces | |
#sponsor = select_sponsor(sponsor_input, academia_input) | |
#condition = select_disease(condition, condition_text) | |
sponsor = sponsor_input | |
condition = condition | |
################# ### List data type errors in type conversion to string needed for regualr expression | |
sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor | |
#print(type(sponsor)) | |
condition = ' '.join(condition) if isinstance(condition, list) else condition | |
#print(type(condition)) | |
############################CHECK Sponsor and Condition Inputs ######################################################### | |
import re | |
################ # def check_input(condition, sponsor): | |
allowed_chars = r'^[A-Za-z .,&/()-]*$' | |
if condition is not None and isinstance(condition, str): | |
if len(condition) > 50 or not re.match(allowed_chars, condition): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None,None, None, None, None, None | |
if sponsor is not None and isinstance(sponsor, str): | |
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None,None, None, None, None, None | |
################################################################################################ | |
status = "Recruiting" | |
#print("In Gradio") | |
# Call gradio_wrapper_nct with appropriate arguments | |
if condition and sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status ) | |
elif sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status ) | |
elif condition: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status ) | |
#### error traps | |
if html_table_conditions is None: | |
return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None, None, None | |
#### error traps | |
if html_table_conditions_collb is None: | |
return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None, None, None | |
# Convert the HTML table to a pandas DataFrame | |
df = pd.read_html(html_table_conditions)[0] | |
#df2 = pd.read_html(html_table_conditions_collb)[0] | |
df2 = [] | |
try: | |
df2 = pd.read_html(html_table_conditions_collb)[0] | |
except (ValueError, IndexError): | |
df2 = pd.DataFrame() | |
#### error traps | |
if df.empty and df2.empty: | |
return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None, None, None | |
####### Address Runtime API Issue to not connecting or fetching from Clinical Trials.gov | |
# Display the DataFrame | |
# evaluate if need to change to collaborator other than top 20 ???? | |
# condition_other = plot_condition_others(df) | |
#### Sponsor Only | |
# condition_sunburst = plot_condition_sunburst(df) | |
################################################################################ | |
sponsor_tree = plot_sponsor_tree(df) | |
collaborator_tree = None # Initialize to None or any other default value | |
if not df2.empty: | |
collaborator_tree = plot_collaborator_icicle(df2) | |
return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb,sponsor_tree, collaborator_tree | |
# return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb, condition_other, condition_sunburst ,sponsor_tree, collaborator_tree | |
##################### Assets ################################################################################### | |
def select_sponsor(s_sponsor_input, s_academia_input): | |
if s_sponsor_input: | |
return s_sponsor_input | |
else: | |
return s_academia_input | |
def select_condition(s_disease_input, s_disease_input_type): | |
if s_disease_input_type.strip(): | |
return s_disease_input_type.strip() | |
else: | |
return s_disease_input | |
async def drug_view(condition, condition_type, s_sponsor_input, s_academia_input): | |
sponsor = select_sponsor(s_sponsor_input, s_academia_input) | |
condition = select_condition(condition, condition_type) | |
################# ### List data type errors in type conversion to string needed for regualr expression | |
sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor | |
#print(type(sponsor)) | |
condition = ' '.join(condition) if isinstance(condition, list) else condition | |
#print(type(condition)) | |
############################CHECK Sponsor and Condition Inputs ######################################################### | |
import re | |
################ # def check_input(condition, sponsor): | |
allowed_chars = r'^[A-Za-z .,&/()-]*$' | |
if condition is not None and isinstance(condition, str): | |
if len(condition) > 50 or not re.match(allowed_chars, condition): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None | |
if sponsor is not None and isinstance(sponsor, str): | |
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None | |
################################################################################################ | |
status = "Recruiting" | |
# Call gradio_wrapper_nct with appropriate arguments | |
if condition and sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status) | |
elif sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status) | |
elif condition: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status) | |
#### error traps | |
if html_table_drugs is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
###### Convert the HTML table to a pandas DataFrame | |
df = pd.read_html(html_table_drugs)[0] | |
####### Address Runtime API Issue to not connecting or fetching from Clinical Trials.gov | |
#### error traps | |
if df.empty : | |
return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None | |
# Display the DataFrame | |
sankey_map_drug = plot_drug_sankey(df) | |
return summary_stats,html_table_drugs, sankey_map_drug | |
########################### Condition################### | |
################## ######################################################################################## | |
def select_sponsor_phc(s_sponsor_input_phc, s_academia_input_phc): | |
if s_sponsor_input_phc: | |
return s_sponsor_input_phc | |
else: | |
return s_academia_input_phc | |
def select_condition_phc(s_disease_input_phc, s_disease_input_type_phc): | |
if s_disease_input_type_phc.strip(): | |
return s_disease_input_type_phc.strip() | |
else: | |
return s_disease_input_phc | |
#async def disease_view_phc(condition, condition_type, s_sponsor_input, s_academia_input): | |
async def disease_view_phc(condition, s_sponsor_input): | |
#sponsor = select_sponsor_phc(s_sponsor_input, s_academia_input ) | |
# condition = select_condition_phc(condition, condition_type) | |
sponsor = s_sponsor_input | |
condition = condition | |
################# ### List data type errors in type conversion to string needed for regualr expression | |
sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor | |
#print(type(sponsor)) | |
condition = ' '.join(condition) if isinstance(condition, list) else condition | |
#print(type(condition)) | |
############################CHECK Sponsor and Condition Inputs ######################################################### | |
import re | |
################ # def check_input(condition, sponsor): | |
allowed_chars = r'^[A-Za-z .,&/()-]*$' | |
if condition is not None and isinstance(condition, str): | |
if len(condition) > 50 or not re.match(allowed_chars, condition): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None | |
if sponsor is not None and isinstance(sponsor, str): | |
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None | |
################################################################################################ | |
status = "Recruiting" | |
# Call gradio_wrapper_nct with appropriate arguments | |
if condition and sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status) | |
elif sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status) | |
elif condition: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status) | |
#### error traps | |
if html_table_conditions is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
# Convert the HTML table to a pandas DataFrame | |
df = pd.read_html(html_table_conditions)[0] | |
#print(df) | |
#### error traps | |
if df.empty : | |
return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None | |
####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov | |
tree_map_cond_nct = plot_condition_treemap_nct(df) | |
sunburst_map_cond_nct = plot_condition_sunburst_nct(df) | |
return summary_stats, html_table_conditions, tree_map_cond_nct, sunburst_map_cond_nct | |
# return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map | |
################## Trial ######################################################################################## | |
def select_sponsor_phs(s_sponsor_input_phs, s_academia_input_phs): | |
if s_sponsor_input_phs: | |
return s_sponsor_input_phs | |
else: | |
return s_academia_input_phs | |
def select_condition_phs(s_disease_input_phs, s_disease_input_type_phs): | |
if s_disease_input_type_phs.strip(): | |
return s_disease_input_type_phs.strip() | |
else: | |
return s_disease_input_phs | |
async def disease_view_phs(condition, condition_type, s_sponsor_input, s_academia_input): | |
sponsor = select_sponsor_phs(s_sponsor_input, s_academia_input ) | |
condition = select_condition_phs(condition, condition_type) | |
################# ### List data type errors in type conversion to string needed for regualr expression | |
sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor | |
#print(type(sponsor)) | |
condition = ' '.join(condition) if isinstance(condition, list) else condition | |
#print(type(condition)) | |
############################CHECK Sponsor and Condition Inputs ######################################################### | |
import re | |
################ # def check_input(condition, sponsor): | |
allowed_chars = r'^[A-Za-z .,&/()-]*$' | |
if condition is not None and isinstance(condition, str): | |
if len(condition) > 50 or not re.match(allowed_chars, condition): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None | |
if sponsor is not None and isinstance(sponsor, str): | |
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None | |
################################################################################################ | |
status = "Recruiting" | |
# Call gradio_wrapper_nct with appropriate arguments | |
if condition and sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status) | |
elif sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status) | |
elif condition: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status) | |
#### error traps | |
if html_table_conditions is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
# Convert the HTML table to a pandas DataFrame | |
df = pd.read_html(html_table_conditions)[0] | |
#print(df) | |
#### error traps | |
if df.empty : | |
return "The Sponsor Name did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None | |
####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov | |
#tree_map_cond_nct = plot_condition_treemap_nct(df) | |
nct_org_map = plot_nct2org_icicle(df) | |
return summary_stats, html_table_conditions, nct_org_map | |
# return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map | |
##################################################### New Trials ###################################### | |
def select_sponsor_phs_n(s_sponsor_input_phs, s_academia_input_phs): | |
if s_sponsor_input_phs: | |
return s_sponsor_input_phs | |
else: | |
return s_academia_input_phs | |
def select_condition_phs_n(s_disease_input_phs, s_disease_input_type_phs): | |
if s_disease_input_type_phs.strip(): | |
return s_disease_input_type_phs.strip() | |
else: | |
return s_disease_input_phs | |
#################################################################################### | |
async def disease_view_phs_n(condition, condition_type, s_sponsor_input, s_academia_input): | |
sponsor = select_sponsor_phs_n(s_sponsor_input, s_academia_input ) | |
condition = select_condition_phs_n(condition, condition_type) | |
################# ### List data type errors in type conversion to string needed for regualr expression | |
sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor | |
#print(type(sponsor)) | |
condition = ' '.join(condition) if isinstance(condition, list) else condition | |
#print(type(condition)) | |
############################CHECK Sponsor and Condition Inputs ######################################################### | |
import re | |
################ # def check_input(condition, sponsor): | |
allowed_chars = r'^[A-Za-z .,&/()-]*$' | |
if condition is not None and isinstance(condition, str): | |
if len(condition) > 50 or not re.match(allowed_chars, condition): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None | |
if sponsor is not None and isinstance(sponsor, str): | |
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None | |
################################################################################################ | |
status = "Not yet recruiting" | |
# Call gradio_wrapper_nct with appropriate arguments | |
if condition and sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status) | |
elif sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status) | |
elif condition: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status) | |
#### error traps | |
if html_table_conditions is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None | |
# Convert the HTML table to a pandas DataFrame | |
# df = pd.read_html(html_table_conditions)[0] | |
####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov | |
#tree_map_cond_nct = plot_condition_treemap_nct(df) | |
# tree_map_cond_nct = plot_condition_treemap_nct(df) | |
###### Convert the HTML table to a pandas DataFrame | |
df = pd.read_html(html_table_drugs)[0] | |
#### error traps | |
if df.empty : | |
return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None | |
####### Address Runtime API Issue to not connecting or fetching from Clinical Trials.gov | |
# Display the DataFrame | |
tree_map_cond_nct = plot_drug_sankey(df) | |
# nct_org_map = plot_nct2org_icicle(df) | |
######################################### error traps | |
# if html_table_add is None: | |
# return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
df2 = pd.read_html(html_table_conditions)[0] | |
bubble_map_trials = plot_trial_bubblemap(df2) | |
# return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map,bubble_map_trials | |
return summary_stats, html_table_conditions, tree_map_cond_nct, bubble_map_trials | |
############################################### Completed Trials #################################################### | |
def select_sponsor_phs_c(s_sponsor_input_phs, s_academia_input_phs): | |
if s_sponsor_input_phs: | |
return s_sponsor_input_phs | |
else: | |
return s_academia_input_phs | |
def select_condition_phs_c(s_disease_input_phs, s_disease_input_type_phs): | |
if s_disease_input_type_phs.strip(): | |
return s_disease_input_type_phs.strip() | |
else: | |
return s_disease_input_phs | |
async def disease_view_phs_c(condition, condition_type, s_sponsor_input, s_academia_input): | |
sponsor = select_sponsor_phs_c(s_sponsor_input, s_academia_input ) | |
condition = select_condition_phs_c(condition, condition_type) | |
################# ### List data type errors in type conversion to string needed for regualr expression | |
sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor | |
#print(type(sponsor)) | |
condition = ' '.join(condition) if isinstance(condition, list) else condition | |
#print(type(condition)) | |
############################CHECK Sponsor and Condition Inputs ######################################################### | |
import re | |
################ # def check_input(condition, sponsor): | |
allowed_chars = r'^[A-Za-z .,&/()-]*$' | |
if condition is not None and isinstance(condition, str): | |
if len(condition) > 50 or not re.match(allowed_chars, condition): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None, None, None | |
if sponsor is not None and isinstance(sponsor, str): | |
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None, None, None | |
################################################################################################ | |
status = "Completed" | |
# Call gradio_wrapper_nct with appropriate arguments | |
if condition and sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status) | |
elif sponsor: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status) | |
elif condition: | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status) | |
#### error traps | |
if html_table_conditions is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None, None, None | |
# Convert the HTML table to a pandas DataFrame | |
df = pd.read_html(html_table_conditions)[0] | |
####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov | |
tree_map_cond_nct = plot_condition_treemap_nct(df) | |
nct_org_map = plot_nct2org_icicle(df) | |
######################################### error traps | |
# if html_table_add is None: | |
# return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
df3 = pd.read_html(html_table_conditions)[0] | |
bubble_map_trials = plot_trial_bubblemap_comp(df3) | |
###### Convert the HTML table to a pandas DataFrame | |
df2 = pd.read_html(html_table_drugs)[0] | |
####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov | |
#### error traps | |
if df.empty and df2.empty and df3.empty: | |
return "The selection did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None | |
# Display the DataFrame | |
sankey_map_drug = plot_drug_sankey(df2) | |
#return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map,bubble_map_trials | |
return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map,sankey_map_drug,bubble_map_trials | |
### ############### Country ######################################################### | |
def select_sponsor_con(sponsor_input_con, academia_input_con): | |
if sponsor_input_con: | |
return sponsor_input_con | |
else: | |
return academia_input_con | |
def select_condition_con(condition_input, condition_input_type): | |
if condition_input_type.strip(): | |
return condition_input_type.strip() | |
else: | |
return condition_input | |
async def condition_view(condition, country, condition_type, sponsor_input_con, academia_input_con): | |
condition = select_condition_con(condition, condition_type) | |
sponsor = select_sponsor_con(sponsor_input_con, academia_input_con) | |
################# ### List data type errors in type conversion to string needed for regualr expression | |
sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor | |
#print(type(sponsor)) | |
condition = ' '.join(condition) if isinstance(condition, list) else condition | |
#print(type(condition)) | |
############################CHECK Sponsor and Condition Inputs ######################################################### | |
import re | |
################ # def check_input(condition, sponsor): | |
allowed_chars = r'^[A-Za-z .,&/()-]*$' | |
if condition is not None and isinstance(condition, str): | |
if len(condition) > 50 or not re.match(allowed_chars, condition): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None | |
if sponsor is not None and isinstance(sponsor, str): | |
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None | |
################################################################################################ | |
status = "Recruiting" | |
summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status) | |
# Convert the HTML table to a pandas DataFrame | |
# Check if html_table_add is None before converting to DataFrame | |
#### error traps | |
if html_table_add is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
df = pd.read_html(html_table_add)[0] | |
df2 = pd.read_html(html_table_add)[0] | |
#### error traps | |
if df.empty and df2.empty: | |
return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None | |
#print(df) | |
trial_country = plot_trial_country_map(df2) | |
return summary_stats_sites, html_table_add,trial_country | |
############### Site ######################################################################################################### | |
def select_sponsor_con_s(sponsor_input_con_s, academia_input_con_s): | |
if sponsor_input_con_s: | |
return sponsor_input_con_s | |
else: | |
return academia_input_con_s | |
def select_condition_con(condition_input, condition_input_type): | |
if condition_input_type.strip(): | |
return condition_input_type.strip() | |
else: | |
return condition_input | |
async def condition_view_s(condition, country, condition_type, sponsor_input_con_s, academia_input_con_s): | |
condition = select_condition_con(condition, condition_type) | |
sponsor = select_sponsor_con_s(sponsor_input_con_s, academia_input_con_s) | |
################# ### List data type errors in type conversion to string needed for regualr expression | |
sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor | |
#print(type(sponsor)) | |
condition = ' '.join(condition) if isinstance(condition, list) else condition | |
#print(type(condition)) | |
############################CHECK Sponsor and Condition Inputs ######################################################### | |
import re | |
################ # def check_input(condition, sponsor): | |
allowed_chars = r'^[A-Za-z .,&/()-]*$' | |
if condition is not None and isinstance(condition, str): | |
if len(condition) > 50 or not re.match(allowed_chars, condition): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None | |
if sponsor is not None and isinstance(sponsor, str): | |
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None | |
################################################################################################ | |
status = "Recruiting" | |
summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status) | |
#### error traps | |
if html_table_add is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None,None | |
#### error traps | |
if html_table_add is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None | |
#### error traps | |
if html_table_add is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None | |
# print(html_table_add) | |
df = pd.read_html(html_table_add)[0] | |
#print(df) | |
#### error traps | |
if df.empty : | |
return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None | |
site_cond = plot_trial_sites(df) | |
country_site = plot_trial_site_map(df) | |
return summary_stats_sites, html_table_add, site_cond,country_site | |
###################################### Timelines ################################################################### | |
def select_sponsor_cont(sponsor_input_con, academia_input_con): | |
if sponsor_input_con: | |
return sponsor_input_con | |
else: | |
return academia_input_con | |
def select_condition_cont(condition_input, condition_input_type): | |
if condition_input_type.strip(): | |
return condition_input_type.strip() | |
else: | |
return condition_input | |
async def condition_viewt(condition, country, condition_type, sponsor_input_con, academia_input_con): | |
condition = select_condition_cont(condition, condition_type) | |
sponsor = select_sponsor_cont(sponsor_input_con, academia_input_con) | |
################# ### List data type errors in type conversion to string needed for regualr expression | |
sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor | |
#print(type(sponsor)) | |
condition = ' '.join(condition) if isinstance(condition, list) else condition | |
#print(type(condition)) | |
############################CHECK Sponsor and Condition Inputs ######################################################### | |
import re | |
################ # def check_input(condition, sponsor): | |
allowed_chars = r'^[A-Za-z .,&/()-]*$' | |
if condition is not None and isinstance(condition, str): | |
if len(condition) > 50 or not re.match(allowed_chars, condition): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None | |
if sponsor is not None and isinstance(sponsor, str): | |
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None | |
################################################################################################ | |
status = "Recruiting" | |
summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status) | |
# Convert the HTML table to a pandas DataFrame | |
######################################### error traps | |
if html_table_add is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
df = pd.read_html(html_table_add)[0] | |
#### error traps | |
if df.empty : | |
return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None | |
bubble_map_trials = plot_trial_bubblemap(df) | |
return summary_stats_sites, html_table,bubble_map_trials | |
############### Find Site Map ######################################################################################################### | |
def select_sponsor_con_map(sponsor_input_con_map, academia_input_con_map): | |
if sponsor_input_con_map: | |
return sponsor_input_con_map | |
else: | |
return academia_input_con_map | |
async def condition_view_map(condition, country, sponsor_input_con_map, academia_input_con_map): | |
#condition = condition.strip() # Remove leading and trailing spaces | |
sponsor = select_sponsor_con_map(sponsor_input_con_map, academia_input_con_map) | |
################# ### List data type errors in type conversion to string needed for regualr expression | |
sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor | |
#print(type(condition)) | |
############################CHECK Sponsor and Condition Inputs ######################################################### | |
import re | |
################ # def check_input(condition, sponsor): | |
allowed_chars = r'^[A-Za-z .,&/()-]*$' | |
if sponsor is not None and isinstance(sponsor, str): | |
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): | |
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None | |
################################################################################################ | |
################################################################################################ | |
status = "Recruiting" | |
summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status ) | |
# print(html_table_add) | |
#### error traps | |
if html_table_add is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
df = pd.read_html(html_table_add)[0] | |
#### error traps | |
if df.empty : | |
return "The Trial Id did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None | |
world_map = plot_trial_site_world_map(df) | |
if world_map is None: | |
return "Sorry, the plot could not be generated. Please try again by slecting a country!", None, None | |
return summary_stats_sites, html_table_add, world_map | |
### ########################################Find Trial Eligibility########################################################################### | |
############################################################################ END VIEWS######################## | |
#### To remove the inclusion exclusion numbers duplicating in text | |
import re | |
def format_html_list(html_string): | |
# Split the input string by numbers followed by a period and a space | |
items = re.split(r'(\d+\.\s)', html_string) | |
# Combine the split items into a list of strings, keeping the original numbers | |
formatted_items = [number + text for number, text in zip(items[1::2], items[2::2])] | |
# Remove unwanted characters from each item | |
formatted_items = [re.sub(r':\.', '', item) for item in formatted_items] | |
formatted_items = [re.sub(r'General\.', '', item) for item in formatted_items] | |
# Filter out empty list items | |
formatted_items = [item for item in formatted_items if item.strip()] | |
# Check if the first item is empty and remove it if so | |
if formatted_items[0].split('. ', 1)[1].strip() == '': | |
formatted_items = formatted_items[1:] | |
# Renumber the items | |
# formatted_items = [f"{i+1}. {item.split('. ', 1)[1]}" for i, item in enumerate(formatted_items)] | |
# Renumber the items | |
formatted_items = [ | |
f"{i+1}. {item.split('. ', 1)[1]}" if len(item.split('. ', 1)) > 1 else item | |
for i, item in enumerate(formatted_items) | |
] | |
# Remove extra periods | |
formatted_items = [re.sub(r'\.{2,}', '.', item) for item in formatted_items] | |
# Join the list items with line breaks to create an HTML string | |
formatted_html = "<br>".join(formatted_items) | |
return formatted_html | |
######################################################################################## | |
##################################################################################### | |
############################################################################################################################################# | |
async def trial_view_map(nctID): | |
nctID = nctID.strip() # Remove leading and trailing spaces | |
###### # Check if nctID is valid | |
if not nctID.startswith('NCT') or not (10 <= len(nctID) <= 12): | |
return "Not a Valid NCT ID has been entered", None, None | |
status = "Recruiting" | |
#summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(NCTId=nctID) | |
summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(NCTId=nctID, status = status) | |
#### error traps | |
#trial_buttons.click(trial_view_map, inputs=[nctID_inputs], outputs=[summary_block_trial_map, world_map]) | |
################################################################################################################################# | |
if html_table_add is None: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
df = pd.read_html(html_table_add)[0] | |
world_map = plot_trial_site_world_map(df) | |
if world_map is None: | |
return "Sorry, the plot could not be generated. Please try again by selecting a country!", None, None | |
return summary_stats_sites, world_map, html_table_add | |
#return html_table, formatted_html_inclusions,formatted_html_exclusions,world_map | |
#################################################################################################################################################### | |
import plotly.graph_objects as go | |
def split_numbered_criteria(text): | |
"""Split text into list of criteria based on numbered lines""" | |
if not text: | |
return [] | |
criteria = [] | |
current = [] | |
for line in text.split('\n'): | |
line = line.strip() | |
if line: | |
# Check if line starts with a number followed by period | |
if line[0].isdigit() and '. ' in line[:4]: | |
if current: | |
criteria.append(' '.join(current)) | |
current = [line] | |
else: | |
current.append(line) | |
# Add the last criteria | |
if current: | |
criteria.append(' '.join(current)) | |
return criteria if criteria else ["No criteria available"] | |
def display_criteria_table(inclusion_text, exclusion_text): | |
""" | |
Create a two-column Plotly table with inclusion and exclusion criteria | |
split into separate rows based on numbering | |
""" | |
try: | |
# Split both texts into lists of criteria | |
inclusion_list = split_numbered_criteria(inclusion_text) | |
exclusion_list = split_numbered_criteria(exclusion_text) | |
# Make lists equal length by padding with empty strings | |
max_length = max(len(inclusion_list), len(exclusion_list)) | |
inclusion_list.extend([''] * (max_length - len(inclusion_list))) | |
exclusion_list.extend([''] * (max_length - len(exclusion_list))) | |
# Create the table | |
fig = go.Figure(data=[go.Table( | |
columnwidth=[500, 500], # Equal width columns | |
header=dict( | |
values=['<b>Inclusion Criteria</b>', '<b>Exclusion Criteria</b>'], | |
fill_color='#e6f3ff', | |
align=['left', 'left'], | |
font=dict(size=14, color='black'), | |
height=40 | |
), | |
cells=dict( | |
values=[inclusion_list, exclusion_list], | |
fill_color=[['white', '#f9f9f9'] * max_length], # Alternating row colors | |
align=['left', 'left'], | |
font=dict(size=12), | |
height=None, | |
line=dict(color='lightgrey', width=1) # Add light borders | |
) | |
)]) | |
# Update layout | |
fig.update_layout( | |
title="Trial Eligibility Criteria", | |
width=1200, | |
height=max(400, max_length * 30 + 100), # Dynamic height based on content | |
margin=dict(l=20, r=20, t=40, b=20) | |
) | |
return fig | |
except Exception as e: | |
print(f"Error in display_criteria_table: {str(e)}") | |
return None | |
async def trial_view(nctID): | |
nctID = nctID.strip() | |
if not nctID.startswith('NCT') or not (10 <= len(nctID) <= 12): | |
return "Not a Valid NCT ID has been entered", None, None | |
status = "Recruiting" | |
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(NCTId=nctID, status = status) | |
formatted_inclusions = get_formatted_inclusion_criteria(nctID) | |
formatted_exclusions = get_formatted_exclusion_criteria(nctID) | |
if not formatted_inclusions and not formatted_exclusions: | |
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None | |
# Create single table with both criteria | |
criteria_table = display_criteria_table(formatted_inclusions, formatted_exclusions) | |
return html_table, criteria_table | |
############################### Design the interface#################################################################################### | |
## Added after Spet 27 Failure | |
from gradio.components import Dropdown | |
############################################################################################################################################################################### | |
trial_app = gr.Blocks() | |
with trial_app: | |
gr.Markdown("<center style='font-size: 36px;'><b style='color: green;'>Trial Connect</b></center>") | |
gr.Markdown("<center style='font-size: 20px;'><b style='color: green;'>Data Source: ClinicalTrials.gov </b></center>") | |
#gr.Markdown("<p style='text-align: left; font-size: 20px; color: green;'>Now Recruiting Trials:</p>") | |
with gr.Tabs(): | |
############################################################################################################################################## | |
################################################################ Conditions ############################################################################################### | |
with gr.TabItem("Trials"): | |
# 1st Row | |
#################################################################################################################################################### | |
##################################################################################################################################################### | |
with gr.Row(): | |
gr.HTML(''' | |
<h1 style="font-size:16px;font-weight:normal;color:green; ">'Now Recruiting' Trials for Conditions:</h1> | |
<p style="font-size:16px;color:green; ">1. Select a Condition, for example, 'Pancreatic Cancer', 'Chronic Kidney Disease', 'MASH' etc.</p> | |
<p style="font-size:16px;color:green; ">2. Select a Sponsor'. </p> | |
<p style="font-size:16px;color:green; ">3. Click 'Show Trials'. </p> | |
''') | |
##################################################################################################################################################### | |
with gr.Row(): | |
################################################################### | |
with gr.Column(): | |
s_disease_input_phc = gr.Dropdown( | |
choices=["Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ | |
"Cancer","Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ | |
"Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ | |
"Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ | |
"Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ | |
"Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ | |
"Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ | |
"Urothelial Carcinoma",\ | |
"Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ | |
"Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ | |
" Major","Metabolic", "Generalized Pustular Psoriasis",\ | |
"Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ | |
"Liver Cirrhosis", \ | |
"MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ | |
"Psychological Trauma","Renal", "Respiratory",\ | |
"Schizophrenia", "PTSD", \ | |
"Venous Thromboembolism", "Wet"], | |
label="Select Condition" | |
) | |
################################################################### | |
with gr.Column(): | |
#### ######################################################################################################################################################################################################### | |
s_sponsor_input_phc = gr.Dropdown( | |
choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ | |
"CSL Behring", "Daiichi Sankyo, Inc.",\ | |
"Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ | |
"Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\ | |
"Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], | |
label="Select Sponsor" | |
) | |
###################################################################################################################################################################### | |
# 3rd Row | |
with gr.Row(): #academia_input = gr.inputs.Dropdown( | |
s_button_phc = gr.Button("Show Trials") | |
# Then, create the clear button and add the dropdown input to it | |
clear_btn_phc = gr.ClearButton() | |
clear_btn_phc.add(s_sponsor_input_phc) | |
clear_btn_phc.add(s_disease_input_phc) | |
# with gr.Column(): | |
################# # 3rd row################################################################# | |
# with gr.Row(): | |
################################################################### | |
# with gr.Column(): | |
# s_academia_input_phc = gr.Textbox(lines=1, label="Type a Sponsor Name:") | |
# clear_btn_phc.add(s_academia_input_phc) | |
################################################################### | |
# with gr.Column(): | |
# s_disease_input_type_phc = gr.Textbox(lines=1, label="Filter by typing a Condition:") | |
# clear_btn_phc.add(s_disease_input_type_phc) | |
############################################################################################################################################ | |
###################################################################################################################################################################### | |
######################################################################################################################################################################### | |
with gr.Row(): | |
summary_block_phc = gr.HTML(label="Clinical Trials Now Recruiting for Conditions :" ) | |
############################################################################################################################################################# | |
# with gr.Row(): | |
# nct_org_map = gr.Plot() | |
########################################################################################################################################################## | |
#################################################################################################################################################### | |
# with gr.Row(): | |
# gr.HTML('<h1 style="font-size:24px; color:black; font-weight:bold;">Conditions by Trials and Phase</h1>') | |
with gr.Row(): | |
# with gr.Column(): | |
tree_map_cond_nct = gr.Plot() | |
#################################################################################################################################################### | |
# with gr.Row(): | |
# gr.HTML('<h1 style="font-size:24px; color:black; font-weight:bold;">Conditions by Trials and Phase</h1>') | |
with gr.Row(): | |
# with gr.Column(): | |
sunburst_map_cond_nct = gr.Plot() | |
with gr.Row(): | |
output_block_conditions_phc = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors") | |
clear_btn_phc.add(summary_block_phc) | |
clear_btn_phc.add(output_block_conditions_phc) | |
clear_btn_phc.add(tree_map_cond_nct) | |
clear_btn_phc.add(sunburst_map_cond_nct) | |
#clear_btn_phs.add(nct_org_map) | |
######################################################################### | |
################################################################ Trials ############################################################################################### | |
# with gr.TabItem("Trials"): | |
# 1st Row | |
#################################################################################################################################################### | |
# with gr.Row(): | |
# gr.HTML(''' | |
# <h1 style="font-size:16px;font-weight:normal;color:green; ">Trials 'Now Recruiting':</h1> | |
# <p style="font-size:16px;color:green; ">1. Select a Sponsor and click 'Show Trials'. </p> | |
# <p style="font-size:16px;color:green; ">2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.</p> | |
# ''') | |
##################################################################################################################################################### | |
# with gr.Row(): | |
# with gr.Column(): | |
#### ######################################################################################################################################################################################################### | |
# s_sponsor_input_phs = gr.Dropdown( | |
############################################################################ | |
# choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ | |
# "CSL Behring", "Daiichi Sankyo, Inc.",\ | |
# "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ | |
# "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\ | |
# "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], | |
# label="Select a Sponsor" | |
# ) | |
############################################################################################################## | |
# with gr.Column(): | |
###################################################################################################################################################################### | |
# 3rd Row | |
# with gr.Row(): #academia_input = gr.inputs.Dropdown( | |
# s_button_phs = gr.Button("Show Trials") | |
# Then, create the clear button and add the dropdown input to it | |
# clear_btn_phs = gr.ClearButton() | |
# clear_btn_phs.add(s_sponsor_input_phs) | |
# clear_btn_phs.add(s_disease_input_phs) | |
###################################################################################################################################################################### | |
################# # 3rd row################################################################# | |
# with gr.Row(): | |
################################################################################################################################################################# | |
# with gr.Column(): | |
# s_academia_input_phs = gr.Textbox(lines=1, label="Type a Sponsor Name:") | |
# clear_btn_phs.add(s_academia_input_phs) | |
################################################################################################################################################################# | |
# with gr.Column(): | |
# s_disease_input_type_phs = gr.Textbox(lines=1, label="Filter by typing a Condition:") | |
# clear_btn_phs.add(s_disease_input_type_phs) | |
############################################################################################################################################ | |
######################################################################################################################################################################### | |
# with gr.Row(): | |
# summary_block_phs = gr.HTML(label="Conditions and Sponsors Now Recruiting for Clinical Trials:" ) | |
############################################################################################################################################################# | |
#with gr.Row(): | |
# nct_org_map = gr.Plot() | |
########################################################################################################################################################## | |
#################################################################################################################################################### | |
# with gr.Row(): | |
# output_block_conditions_phs = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors") | |
# clear_btn_phs.add(summary_block_phs) | |
# clear_btn_phs.add(output_block_conditions_phs) | |
# clear_btn_phs.add(nct_org_map) | |
######################################################################### | |
############################################################ ASSETS ############################################################### | |
with gr.TabItem("Drugs"): | |
############################################################################################ | |
# 1st Row | |
#################################################################################################################################################### | |
with gr.Row(): | |
#################################################################################################################################################### | |
gr.HTML(''' | |
<h1 style="font-size:16px;font-weight:normal;color:green; ">Drugs for 'Now Recruiting' Trials:</h1> | |
<p style="font-size:16px;color:green; ">1. Select a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc. </p> | |
<p style="font-size:16px;color:green; ">2. Select a Sponsor and click 'Show Drugs'. </p> | |
''') | |
##################################################################################################################################################### | |
with gr.Row(): | |
##################################################################################################################################################################################### | |
with gr.Column(): | |
s_disease_input = gr.Dropdown( | |
choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ | |
"Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ | |
"Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ | |
"Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ | |
"Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ | |
"Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ | |
"Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ | |
"Urothelial Carcinoma",\ | |
"Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ | |
"Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ | |
" Major","Metabolic", "Generalized Pustular Psoriasis",\ | |
"Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ | |
"Liver Cirrhosis", \ | |
"MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ | |
"Psychological Trauma","Renal", "Respiratory",\ | |
"Schizophrenia", "PTSD", \ | |
"Venous Thromboembolism", "Wet"], | |
label= "Filter by a Condition" | |
) | |
######################################################################################################################################## | |
with gr.Column(): | |
############################################################################################################################################# | |
s_sponsor_input = gr.Dropdown( | |
############################################################################ | |
choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ | |
"CSL Behring", "Daiichi Sankyo, Inc.",\ | |
"Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ | |
"Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\ | |
"Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], | |
label="Select a Sponsor" | |
) | |
############################################################################################ | |
############################################################################################ | |
with gr.Row(): | |
s_drug_button = gr.Button("Show Drugs") | |
# Then, create the clear button and add the dropdown input to it | |
clear_btn = gr.ClearButton() | |
clear_btn.add(s_sponsor_input) | |
clear_btn.add(s_disease_input) | |
## with gr.Row(): | |
##################################################################################################################################################################################### | |
################# # 3rd row################################################################# | |
with gr.Row(): | |
##################################################################################################################################################################### | |
with gr.Column(): | |
s_academia_input = gr.Textbox(lines=1, label="Type a Sponsor Name:") | |
clear_btn.add(s_academia_input) | |
################################################################################################################################################################# | |
with gr.Column(): | |
s_disease_input_type = gr.Textbox(lines=1, label="Filter by typing a Condition:") | |
clear_btn.add(s_disease_input_type) | |
##################################################################################################################################################################################### | |
with gr.Row(): | |
drug_summary_block = gr.HTML(label="Conditions and Drug Assets, Sponsors Now Recruiting for Clinical Trials:" ) | |
with gr.Row(): | |
sankey_map_drug = gr.Plot() | |
with gr.Row(): | |
drug_output_block_conditions = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors") | |
clear_btn.add(drug_summary_block) | |
clear_btn.add(drug_output_block_conditions) | |
clear_btn.add(sankey_map_drug) | |
############################################################################################################################################################################################ | |
##################################################################### Country##################################################### | |
# with gr.TabItem("Countries"): | |
########################################################################## | |
# 1st Row | |
#################################################################################################################################################### | |
# with gr.Row(): | |
# gr.HTML(''' | |
# <h1 style="font-size:16px;font-weight:normal;color:green; ">Countries 'Now Recruiting':</h1> | |
# <p style="font-size:16px;color:green; ">1. Select a Sponsor, a Condition Name and click 'Show Countries'. </p> | |
# <p style="font-size:16px;color:green; ">2. Filter by a Country, for example, 'United States','Germany' etc. </p> | |
# ''') | |
##################################################################################################################################################### | |
# with gr.Row(): | |
############################################################################### | |
# with gr.Column(): | |
# sponsor_input_con = gr.Dropdown( | |
############################################################################ | |
# choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ | |
# "CSL Behring", "Daiichi Sankyo, Inc.",\ | |
# "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ | |
# "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\ | |
# "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], | |
# label="Select a Sponsor" | |
# ) | |
############################################################################################################################################################################################### | |
# with gr.Column(): | |
# condition_input_con = gr.Dropdown( | |
# choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ | |
# "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ | |
# "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ | |
# "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ | |
# "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ | |
# "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ | |
## "Urothelial Carcinoma",\ | |
# "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ | |
## " Major","Metabolic", "Generalized Pustular Psoriasis",\ | |
# "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ | |
# "Liver Cirrhosis", \ | |
# "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ | |
# "Psychological Trauma","Renal", "Respiratory",\ | |
# "Schizophrenia", "PTSD", \ | |
# "Venous Thromboembolism", "Wet"], | |
# label= "Select a Condition") | |
############################################################################### | |
# with gr.Column(): | |
# country_input_tr = gr.Dropdown( | |
# choices=["United States", "Argentina","Australia", "Austria","Belgium","Brazil","Bulgaria","Canada","Columbia","China", "Chile","Croatia","Czechia","Denmark","Finland","France", "Greece","Germany","Hungary",\ | |
# "India","Ireland","Israel","Italy","Japan","Korea","Latvia",\ | |
# "Malaysia","Mexico","Netherlands", \ | |
# "New Zealand","Norway","Poland","Portugal","Romania", "Serbia","Singapore","Slovakia","Spain", "South Africa","Sweden", "Switzerland","Taiwan","Turkey",\ | |
# "United Kingdom"\ | |
# ], | |
# label="Filter by a Country") | |
########################################################################################################################################################################################### | |
########################################################################################################################################################### | |
# with gr.Row(): | |
# condition_button = gr.Button("Show Countries") | |
# Then, create the clear button and add the dropdown input to it | |
# clear_cn_btn = gr.ClearButton() | |
# clear_cn_btn.add(condition_input_con) | |
#clear_cn_btn.add(sponsor_input_con) | |
# clear_cn_btn.add(country_input_tr) | |
################# # 3rd row################################################################# | |
# with gr.Row(): | |
################################################################ ############## ############################################################################### | |
# with gr.Column(): | |
# academia_input_con = gr.Textbox(lines=1, label="Type a Sponsor Name:") | |
# clear_cn_btn.add(academia_input_con) | |
################################################################ ############## ############################################################################### | |
# with gr.Column(): | |
# condition_input_type = gr.Textbox(lines=1, label="Filter by typing a Condition:") | |
# clear_cn_btn.add(condition_input_type) | |
############################################################################### | |
############################################################################################################################################################################## | |
# with gr.Row(): | |
# summary_block_cond = gr.HTML(label="Countries with Recruiting Clinical Trials:" ) | |
# with gr.Row(): | |
#bubble_map_trial = gr.Plot() | |
# with gr.Row(): | |
# trial_countries = gr.Plot() | |
# with gr.Row(): | |
# condition_output = gr.HTML(label="List of Recruiting Trials") | |
# condition_output = gr.Textbox(label="List of Recruiting Trials") | |
## clear output ? | |
# clear_cn_btn.add(summary_block_cond) | |
# clear_cn_btn.add(trial_countries) | |
#clear_cn_btn.add(bubble_map_trial) | |
# clear_cn_btn.add(condition_output) | |
############################################################ Site ############################# ##################################################################### | |
with gr.TabItem("Locations"): | |
############################################################### | |
#################################################################################################################################################### | |
with gr.Row(): | |
gr.HTML(''' | |
<h1 style="font-size:16px;font-weight:normal;color:green; ">Sites 'Now Recruiting':</h1> | |
<p style="font-size:16px;color:green; ">1. Select a Condition, Sponsor and Country and click 'Show Sites'. </p> | |
<p style="font-size:16px;color:green; ">2. Review each Site and Cities with the Trial Ids and the Conditions. </p> | |
''') | |
with gr.Row(): | |
################################################################ | |
with gr.Column(): | |
condition_input_s = gr.Dropdown( | |
choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ | |
"Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ | |
"Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ | |
"Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ | |
"Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ | |
"Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ | |
"Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ | |
"Urothelial Carcinoma",\ | |
"Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ | |
"Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ | |
" Major","Metabolic", "Generalized Pustular Psoriasis",\ | |
"Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ | |
"Liver Cirrhosis", \ | |
"MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ | |
"Psychological Trauma","Renal", "Respiratory",\ | |
"Schizophrenia", "PTSD", \ | |
"Venous Thromboembolism", "Wet"], | |
label="Select a Condition") | |
################################################################ | |
with gr.Column(): | |
########################################################################################################################## | |
sponsor_input_con_s = gr.Dropdown( | |
############################################################################ | |
############################################################################ | |
choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ | |
"CSL Behring", "Daiichi Sankyo, Inc.",\ | |
"Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ | |
"Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\ | |
"Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], | |
label="Select a Sponsor" | |
) | |
################################################################################################################### | |
################################################################################################################################################# | |
################################################################ | |
################################################################ | |
with gr.Column(): | |
country_input_s = gr.Dropdown( | |
choices=["United States", "Argentina","Australia", "Austria","Belgium","Brazil","Bulgaria","Canada","Columbia","China", "Chile","Croatia","Czechia","Denmark","Finland","France", "Greece","Germany","Hungary",\ | |
"India","Ireland","Israel","Italy","Japan","Korea","Latvia",\ | |
"Malaysia","Mexico","Netherlands", \ | |
"New Zealand","Norway","Poland","Portugal","Romania", "Serbia","Singapore","Slovakia","Spain", "South Africa","Sweden", "Switzerland","Taiwan","Turkey",\ | |
"United Kingdom"\ | |
], | |
label="Select a Country") | |
############################################################### | |
with gr.Row(): | |
# with gr.Column(): | |
condition_button_s = gr.Button("Show Sites") | |
# Then, create the clear button and add the dropdown input to it | |
clear_cn_btn = gr.ClearButton() | |
clear_cn_btn.add(condition_input_s) | |
clear_cn_btn.add(sponsor_input_con_s) | |
clear_cn_btn.add(country_input_s) | |
################# # 3rd row################################################################# | |
with gr.Row(): | |
################################################################ | |
with gr.Column(): | |
academia_input_con_s = gr.Textbox(lines=1, label="Type a Sponsor Name:") | |
clear_cn_btn.add(academia_input_con_s) | |
################################################################ | |
with gr.Column(): | |
condition_input_site = gr.Textbox(lines=1, label="Filter by typing a Condition:") | |
clear_cn_btn.add(condition_input_site) | |
############################################################################################################################################# | |
################################################################################################################################################## | |
################################################################################################################################# | |
with gr.Row(): | |
#summary_block = gr.outputs.Textbox(label="Conditions and Sponsors Now Recruiting for Clinical Trials:") | |
summary_block_cond_s = gr.HTML(label="Sites where Sponsors Now Recruiting for Clinical Trials:" ) | |
#with gr.Row(): | |
#world_map = gr.Plot() | |
with gr.Row(): | |
site_cond = gr.Plot() | |
#################################################################################################################################################### | |
with gr.Row(): | |
gr.HTML('<h1 style="font-size:24px; color:black; font-weight:bold;">Recruiting Sites with Trial Ids and Conditions </h1>') | |
with gr.Row(): | |
country_site =gr.Plot() | |
with gr.Row(): | |
condition_output_s = gr.HTML(label="List of Recruiting Trials for Country, Sites") | |
## clear output ? | |
clear_cn_btn.add(summary_block_cond_s) | |
clear_cn_btn.add(condition_output_s) | |
clear_cn_btn.add(country_site) | |
clear_cn_btn.add(site_cond) | |
############################################################################# TIMELINES ############################################################################# | |
# with gr.TabItem("Timeline"): | |
############################################################## | |
# with gr.Row(): | |
#################################################################################################################################################### | |
# gr.HTML(''' | |
# <h1 style="font-size:16px;font-weight:normal;color:green; ">Timelines for 'Now Recruiting' Trials:</h1> | |
# <p style="font-size:16px;color:green; ">1. Select a Sponsor and click 'Show Timelines'. </p> | |
# <p style="font-size:16px;color:green; ">2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.</p> | |
# ''') | |
########################################################################################### | |
# with gr.Row(): | |
# with gr.Column(): | |
########################################################################################################################################## | |
# sponsor_input_cont = gr.Dropdown( | |
############################################################################ | |
# choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ | |
# "CSL Behring", "Daiichi Sankyo, Inc.",\ | |
# "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ | |
# "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\ | |
# "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], | |
# label="Select a Sponsor" | |
# ) | |
############################################################################################################################################################### | |
# with gr.Column(): | |
# condition_input_cont= gr.Dropdown( | |
# choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ | |
# "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ | |
# "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ | |
# "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ | |
# "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ | |
# "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ | |
## "Urothelial Carcinoma",\ | |
# "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ | |
# "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ | |
# " Major","Metabolic", "Generalized Pustular Psoriasis",\ | |
# "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ | |
# "Liver Cirrhosis", \ | |
# "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ | |
# "Psychological Trauma","Renal", "Respiratory",\ | |
# "Schizophrenia", "PTSD", \ | |
# "Venous Thromboembolism", "Wet"], | |
# label="Filter by a Condition") | |
############################################################################################################################################################### | |
# with gr.Column(): | |
# country_input_trt = gr.Dropdown( | |
# choices=["United States", "Argentina","Australia", "Austria","Belgium","Brazil","Bulgaria","Canada","Columbia","China", "Chile","Croatia","Czechia","Denmark","Finland","France", "Greece","Germany","Hungary",\ | |
# "India","Ireland","Israel","Italy","Japan","Korea","Latvia",\ | |
# "Malaysia","Mexico","Netherlands", \ | |
# "New Zealand","Norway","Poland","Portugal","Romania", "Serbia","Singapore","Slovakia","Spain", "South Africa","Sweden", "Switzerland","Taiwan","Turkey",\ | |
# "United Kingdom"\ | |
# ], | |
# label="Filter by a Country") | |
########################################################################################### | |
# with gr.Row(): | |
# condition_button_t = gr.Button("Show Timelines") | |
# Then, create the clear button and add the dropdown input to it | |
# clear_cn_btn = gr.ClearButton() | |
# clear_cn_btn.add(condition_input_cont) | |
# clear_cn_btn.add(sponsor_input_cont) | |
# clear_cn_btn.add(country_input_trt) | |
########################################################################################### | |
# with gr.Row(): | |
############################################################################################################################################################### | |
# with gr.Column(): | |
# academia_input_cont = gr.Textbox(lines=1, label="Type a Sponsor Name:") | |
# clear_cn_btn.add(academia_input_cont) | |
############################################################################################################################################################### | |
#with gr.Column(): | |
# condition_input_typet = gr.Textbox(lines=1, label="Filter by typing a Condition:") | |
# clear_cn_btn.add(condition_input_typet) | |
################################################################################################################################## | |
########################################################################################### | |
# with gr.Row(): | |
# summary_block_condt = gr.HTML(label="Countries with Recruiting Clinical Trials:" ) | |
########################################################################################### | |
# with gr.Row(): | |
# bubble_map_trial = gr.Plot() | |
########################################################################################### | |
# with gr.Row(): | |
# condition_outputt = gr.HTML(label="List of Recruiting Trials") | |
# condition_output = gr.Textbox(label="List of Recruiting Trials") | |
## clear output ? | |
# clear_cn_btn.add(summary_block_condt) | |
# clear_cn_btn.add(bubble_map_trial) | |
# clear_cn_btn.add(condition_outputt) | |
############################################################ Eligibility ############## | |
with gr.TabItem("Eligibility"): | |
#################################################################################################################################################### | |
with gr.Row(): | |
gr.HTML(''' | |
<h1 style="font-size:16px;font-weight:normal;color:green; ">Eligibility Crietria for a Trial:</h1> | |
<p style="font-size:16px;color:green; ">1. Type a single Trial's NCT Id,For Example: NCT05512377 or NCT04924075 or NCT04419506 etc. and click 'Show Eligibility'. </p> | |
<p style="font-size:16px;color:green; ">2. Inclusion and Exclusion Criteria for that single Trial are displayed with the Diseases, Diagnostic Procedures and Medications highlighted. </p> | |
<p style="font-size:16px;color:green; ">3. Wait time approximately 30 seconds for the model to run and highlight eligibility text. </p> | |
''') | |
with gr.Row(): | |
#nctID_input = gr.inputs.Textbox(lines=1, label="Type Trial NctId:") | |
nctID_input = gr.Textbox(lines=1, label="Type a Trial NCT Id: ") | |
trial_button = gr.Button("Show Eligibility") | |
#Then, create the clear button and add the dropdown input to it | |
clear_tn_btn = gr.ClearButton() | |
clear_tn_btn.add(nctID_input ) | |
# with gr.Row(): | |
# with gr.Column(): | |
# formatted_inclusions_output = gr.HTML(label="Inclusions") | |
# with gr.Column(): | |
# formatted_exclusions_output = gr.HTML(label="Exclusions") | |
################################################################################################################################ | |
############################################################################################### | |
with gr.Row(): | |
trial_output = gr.HTML(label="Detail of Recruiting Trials") | |
################################################ | |
with gr.Row(): | |
# with gr.Column(): | |
eligibilities_plot = gr.Plot() | |
# with gr.Column(): | |
# concept_exclusion= gr.HighlightedText(label="Display of Exclusion Concepts") | |
clear_tn_btn.add(trial_output) | |
# clear_tn_btn.add(formatted_inclusions_output) | |
# clear_tn_btn.add(formatted_exclusions_output) | |
clear_tn_btn.add(eligibilities_plot) | |
# clear_tn_btn.add(concept_exclusion) | |
############################################################################################################################################## | |
################################ EVENT BUTTONS at GRADIO ################################################################################################################################ | |
## Sponsors | |
#sponsor_button.click(disease_view, inputs=[disease_input,disease_input_text, sponsor_input, academia_input], outputs=[summary_block,summary_block_collbs,\ | |
# sponsor_button.click(disease_view, inputs=[disease_input, sponsor_input], outputs=[summary_block,summary_block_collbs,\ | |
# output_block_conditions,output_block_conditions_collbs,\ | |
#condition_others,\ | |
#condition_sunbursts, | |
# sponsor_trees\ | |
# ,collaborator_trees\ | |
# ]) | |
## Conditions | |
# s_button_phc.click(disease_view_phc, inputs=[s_disease_input_phc,s_disease_input_type_phc, s_sponsor_input_phc,s_academia_input_phc], outputs=[summary_block_phc, output_block_conditions_phc,\ | |
s_button_phc.click(disease_view_phc, inputs=[s_disease_input_phc, s_sponsor_input_phc], outputs=[summary_block_phc, output_block_conditions_phc,\ | |
tree_map_cond_nct,sunburst_map_cond_nct]) | |
## Trials | |
# s_button_phs.click(disease_view_phs, inputs=[s_disease_input_phs,s_disease_input_type_phs, s_sponsor_input_phs,s_academia_input_phs], outputs=[summary_block_phs, output_block_conditions_phs,\ | |
# nct_org_map]) | |
#s_button_phs_n.click(disease_view_phs_n, inputs=[s_disease_input_phs_n,s_disease_input_type_phs_n, s_sponsor_input_phs_n,s_academia_input_phs_n], outputs=[summary_block_phs_n, output_block_conditions_phs_n,\ | |
# tree_map_cond_nct_n, nct_org_map_n,trial_plot]) | |
# s_button_phs_n.click(disease_view_phs_n, inputs=[s_disease_input_phs_n,s_disease_input_type_phs_n, s_sponsor_input_phs_n,s_academia_input_phs_n], outputs=[summary_block_phs_n, output_block_conditions_phs_n,\ | |
# tree_map_cond_nct_n, trial_plot]) | |
# s_button_phs_c.click(disease_view_phs_c, inputs=[s_disease_input_phs_c,s_disease_input_type_phs_c, s_sponsor_input_phs_c,s_academia_input_phs_c], outputs=[summary_block_phs_c, output_block_conditions_phs_c,\ | |
# tree_map_cond_nct_c, nct_org_map_c,trial_plot_c, time_plot_c]) | |
### Drugs | |
s_drug_button.click(drug_view, inputs=[s_disease_input, s_disease_input_type, s_sponsor_input, s_academia_input], outputs=[drug_summary_block,drug_output_block_conditions, sankey_map_drug ]) | |
## Country | |
# condition_button.click(condition_view, inputs=[condition_input_con, country_input_tr,condition_input_type, sponsor_input_con, academia_input_con], outputs=[summary_block_cond,condition_output,trial_countries]) | |
## Site | |
condition_button_s.click(condition_view_s, inputs=[condition_input_s, country_input_s, condition_input_site,sponsor_input_con_s, academia_input_con_s], \ | |
outputs=[summary_block_cond_s,condition_output_s, site_cond,country_site]) | |
##Timelines | |
# condition_button_t.click(condition_viewt, inputs=[condition_input_cont, country_input_trt,condition_input_typet, sponsor_input_cont, academia_input_cont], outputs=[summary_block_condt,condition_outputt,bubble_map_trial]) | |
## Map | |
# Test this way NCT04419506 | |
# trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output, formatted_inclusions_output,formatted_exclusions_output,concept_inclusion,concept_exclusion]) | |
# Test this way NCT04419506 | |
# trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output,concept_inclusion,concept_exclusion]) | |
trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output,eligibilities_plot]) | |
# trial_buttons.click(trial_view_map, inputs=[nctID_inputs], outputs=[summary_block_trial_map, world_map,trial_output_map]) | |
trial_app.launch(share=True) | |
#trial_app.launch(share=True, debug = "TRUE") | |
import requests | |
import json | |
# Define the base URL for the new API | |
base_url = "https://clinicaltrials.gov/api/v2/studies" | |
# Define the parameters for the API call | |
params = { | |
"query.lead": "Boehringer Ingelheim", # Query the lead sponsor field | |
"filter.overallStatus": "RECRUITING", # Filter by the overall status | |
"fields": "protocolSection.identificationModule.nctId" # Use the correct field name for the NCTId | |
} | |
# Send the API request | |
response = requests.get(base_url, params=params) | |
# Check the API response status | |
#print("API response status code:", response.status_code) | |
# Check the API response content | |
#print("API response content:", response.text) | |
# If the status code is 200, parse the JSON response | |
if response.status_code == 200: | |
data = response.json() | |
# Extract the NCTIDs from the response | |
nct_ids = [study['protocolSection']['identificationModule']['nctId'] for study in data['studies']] | |
# Create a DataFrame from the list | |
df = pd.DataFrame(nct_ids, columns=['NCTId']) | |
# Print the DataFrame | |
print(df) |