diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -1,7 +1,12 @@
+!pip install gradio
+#transformers
+
+
+
########### AGENT: Clincialtrial.gov ###################################################################################################
##Gradio App: TRIAL CONNECT
-#Author: Tamer Chowdhury' Dec 2023
+#Author: Tamer Chowdhury' Nov 2024
#tamer.chowdhury@gmail.com
##################################################################################################################################
@@ -18,254 +23,377 @@ import requests
from IPython.core.display import display_markdown
-
-########### Clinical Trials. gov API for study fileds with Recruiting Trials Only ###################################
-
+######################### from CLAUDE###########################################
import aiohttp
import asyncio
-
+import pandas as pd
+import io
+import json
async def fetch(session, url, params):
- async with session.get(url, params=params) as response:
- return await response.text()
-#############################################################################################################################################################
-async def get_nct_ids (lead_sponsor_name=None, disease_area=None, overall_status= None, location_country=None, NCTId=None, max_records=None, blocks=30):
- base_url = "https://clinicaltrials.gov/api/query/study_fields"
- fields = "NCTId,OrgStudyId,BriefTitle,Condition,Phase,OverallStatus,PrimaryCompletionDate,EnrollmentCount,StudyType,StudyPopulation,\
-LocationCountry,LocationCity,DesignPrimaryPurpose,LocationFacility,ArmGroupLabel,LeadSponsorName,InterventionName,PrimaryOutcomeMeasure,\
-StartDate,CollaboratorName"
+ try:
+ async with session.get(url, params=params) as response:
+ if response.status == 200:
+ text = await response.text()
+ try:
+ return json.loads(text)
+ except json.JSONDecodeError as e:
+ print(f"Failed to decode JSON: {text[:200]}...")
+ raise
+ else:
+ print(f"HTTP Error: {response.status}")
+ print(f"Response text: {await response.text()}")
+ return None
+ except Exception as e:
+ print(f"Error in fetch: {str(e)}")
+ return None
+
+async def get_nct_ids(lead_sponsor_name=None, disease_area=None, overall_status=None,
+ location_country=None, NCTId=None, max_records=None, blocks=30):
+ base_url = "https://clinicaltrials.gov/api/v2/studies"
+
+ # Define the fields we want to retrieve (mapped to v2 API structure)
+ fields = [
+ "protocolSection.identificationModule.nctId",
+ "protocolSection.identificationModule.orgStudyIdInfo",
+ "protocolSection.identificationModule.briefTitle",
+ "protocolSection.conditionsModule.conditions",
+ "protocolSection.designModule.phases",
+ "protocolSection.statusModule.overallStatus",
+ "protocolSection.statusModule.primaryCompletionDateStruct",
+ "protocolSection.designModule.enrollmentInfo",
+ "protocolSection.designModule.studyType",
+ "protocolSection.eligibilityModule.studyPopulation",
+ "protocolSection.contactsLocationsModule.locations",
+ "protocolSection.designModule.designInfo",
+ "protocolSection.armsInterventionsModule.armGroups",
+ "protocolSection.sponsorCollaboratorsModule.leadSponsor",
+ "protocolSection.armsInterventionsModule.interventions",
+ "protocolSection.outcomesModule.primaryOutcomes",
+ "protocolSection.statusModule.startDateStruct"
+ ]
+
+ # Build base parameters
params = {
- "fields": fields,
- "fmt": "csv"
+ "format": "json",
+ "fields": ",".join(fields),
+ "pageSize": "1000",
+ "countTotal": "true"
}
- ## Status is Recruiting
- #overall_status='Recruiting'
- #overall_status='Not yet recruiting'
+ print("Constructing query...")
-#############################
+ # Build query parameters
if NCTId:
- params["expr"] = f"{NCTId}"
+ params["query.id"] = NCTId
else:
- # overall_status = overall_status.replace(" ", "+")
if disease_area:
- disease_area = disease_area.replace(" ", "+")
+ params["query.cond"] = disease_area.replace(" ", "+")
if lead_sponsor_name:
- lead_sponsor_name = lead_sponsor_name.replace(" ", "+")
+ params["query.lead"] = lead_sponsor_name.replace(" ", "+")
if location_country:
- location_country = location_country.replace(" ", "+")
+ params["query.locn"] = location_country.replace(" ", "+")
+ if overall_status:
+ params["filter.overallStatus"] = overall_status.upper()
- if disease_area and lead_sponsor_name:
- # params["expr"] = f"{disease_area}+AND+{overall_status}+AND+{lead_sponsor_name}"
- params["expr"] = f"{disease_area}+AND+{lead_sponsor_name}"
+ print(f"Full parameters: {params}")
- elif disease_area:
- # params["expr"] = f"{disease_area}+AND+{overall_status}"
- params["expr"] = f"{disease_area}"
-
- elif lead_sponsor_name:
- # params["expr"] = f"{lead_sponsor_name}+AND+{overall_status}"
- params["expr"] = f"{lead_sponsor_name}"
-
-
- ### to ensure it starts from 1 to 1000 and increment
- all_trials = []
- max_trials_per_request = 1000
+ all_studies = []
+ next_page_token = None
async with aiohttp.ClientSession() as session:
- tasks = []
- for i in range(1, blocks + 1): # Change the range to start from 1
- min_rank = (i - 1) * max_trials_per_request + 1 # Subtract 1 from i to get the correct min_rank
- # print( min_rank )
- max_rank = i * max_trials_per_request # Simplify the max_rank calculation
- # print( max_rank )
- params_copy = params.copy()
- params_copy["min_rnk"] = min_rank
- params_copy["max_rnk"] = max_rank
- task = fetch(session, base_url, params_copy)
- tasks.append(task)
- responses = await asyncio.gather(*tasks)
- # Create a list to store the DataFrames
- trials_dfs = []
-
- # Fetch data for each block and store it in a separate DataFrame
- for i, response in enumerate(responses):
- skip_rows = 10 if not NCTId else 9
- if len(response.strip().splitlines()) > 1:
- interim_df = pd.read_csv(io.StringIO(response), skiprows=skip_rows)
- else:
- interim_df = pd.DataFrame()
-
- # Print the number of records in the current DataFrame
- print(f"Number of records in interim_df{i + 1}: {len(interim_df)}") # renamed to interim_df
-
- # Add the current DataFrame to the list
- trials_dfs.append(interim_df) # renamed to interim_df
-
- # Concatenate all the DataFrames
- trials_final_df = pd.concat(trials_dfs, ignore_index=True)
-
- # Print the number of records in the final DataFrame
- print(f"Number of records returned from all the Block Request: {len(trials_final_df)}")
-
- recruiting_trials = trials_final_df
+ while True:
+ try:
+ if next_page_token:
+ params["pageToken"] = next_page_token
- recruiting_trials_list = []
+ response_data = await fetch(session, base_url, params)
- #############################
- if NCTId:
+ if not response_data or not isinstance(response_data, dict):
+ print(f"Invalid response data")
+ break
+
+ studies = response_data.get('studies', [])
+ if not studies:
+ print("No more studies found")
+ break
+ all_studies.extend(studies)
+ print(f"Retrieved {len(studies)} studies. Total so far: {len(all_studies)}")
+ # Print first study details for debugging
+ if len(all_studies) > 0:
+ first_study = all_studies[0]
+ print("\nFirst study example:")
+ print(f"NCT ID: {_get_nested_value(first_study, ['protocolSection', 'identificationModule', 'nctId'])}")
+ print(f"Status: {_get_nested_value(first_study, ['protocolSection', 'statusModule', 'overallStatus'])}")
-##############################################
- for index, row in recruiting_trials.iterrows():
- # print(f"Checking row {index}: OverallStatus={row['OverallStatus']}, provided overall_status={overall_status}")
- #if not NCTId or (NCTId and row['OverallStatus'] == overall_status):
- # if row['OverallStatus'] == overall_status:
- trial_info = {'NCTId': row['NCTId'],
- 'Phase': row['Phase'],
- 'OrgStudyId': row['OrgStudyId'],
- 'Status': row['OverallStatus'],
- 'Condition': row['Condition'],
- 'CompletionDate': row['PrimaryCompletionDate'],
- 'EnrollmentCount': row['EnrollmentCount'],
- 'StudyType': row['StudyType'],
- 'Arm': row['ArmGroupLabel'],
- 'Drug': row['InterventionName'],
- 'Country': row['LocationCountry'],
- 'City': row['LocationCity'],
- 'Site': row['LocationFacility'],
- 'StudyPopulation': row['StudyPopulation'],
- 'Sponsor': row['LeadSponsorName'],
- 'Collaborator': row['CollaboratorName'],
- 'StartDate': row['StartDate'],
- 'PrimaryMeasure': row['PrimaryOutcomeMeasure'],
- 'Purpose': row['DesignPrimaryPurpose'],
- 'BriefTitle': row['BriefTitle']}
- # Print the overall_status and the length of recruiting_trials_list
- #print(f"Overall status: {overall_status}")
- #print(f"Number of trials with status '{overall_status}': {len(recruiting_trials_list)}")
- recruiting_trials_list.append(trial_info)
+ next_page_token = response_data.get('nextPageToken')
+ if not next_page_token or (max_records and len(all_studies) >= max_records):
+ break
+ except Exception as e:
+ print(f"Error processing page: {str(e)}")
+ break
- else:
- ##############################################
- for index, row in recruiting_trials.iterrows():
- # print(f"Checking row {index}: OverallStatus={row['OverallStatus']}, provided overall_status={overall_status}")
- #if not NCTId or (NCTId and row['OverallStatus'] == overall_status):
- if row['OverallStatus'] == overall_status:
- trial_info = {'NCTId': row['NCTId'],
- 'Phase': row['Phase'],
- 'OrgStudyId': row['OrgStudyId'],
- 'Status': row['OverallStatus'],
- 'Condition': row['Condition'],
- 'CompletionDate': row['PrimaryCompletionDate'],
- 'EnrollmentCount': row['EnrollmentCount'],
- 'StudyType': row['StudyType'],
- 'Arm': row['ArmGroupLabel'],
- 'Drug': row['InterventionName'],
- 'Country': row['LocationCountry'],
- 'City': row['LocationCity'],
- 'Site': row['LocationFacility'],
- 'StudyPopulation': row['StudyPopulation'],
- 'Sponsor': row['LeadSponsorName'],
- 'Collaborator': row['CollaboratorName'],
- 'StartDate': row['StartDate'],
- 'PrimaryMeasure': row['PrimaryOutcomeMeasure'],
- 'Purpose': row['DesignPrimaryPurpose'],
- 'BriefTitle': row['BriefTitle']}
- # Print the overall_status and the length of recruiting_trials_list
- #print(f"Overall status: {overall_status}")
- #print(f"Number of trials with status '{overall_status}': {len(recruiting_trials_list)}")
- recruiting_trials_list.append(trial_info)
+ # Convert all studies to the required format
+ recruiting_trials_list = []
+ for study in all_studies:
+ try:
+ # Status filtering is now handled by the API, so we don't need to filter here
+ trial_info = {
+ 'NCTId': _get_nested_value(study, ['protocolSection', 'identificationModule', 'nctId']),
+ 'Phase': _get_first_item(study, ['protocolSection', 'designModule', 'phases']),
+ 'OrgStudyId': _get_nested_value(study, ['protocolSection', 'identificationModule', 'orgStudyIdInfo', 'id']),
+ 'Status': _get_nested_value(study, ['protocolSection', 'statusModule', 'overallStatus']),
+ 'Condition': '|'.join(_get_nested_value(study, ['protocolSection', 'conditionsModule', 'conditions'], [])),
+ 'CompletionDate': _get_nested_value(study, ['protocolSection', 'statusModule', 'primaryCompletionDateStruct', 'date']),
+ 'EnrollmentCount': _get_nested_value(study, ['protocolSection', 'designModule', 'enrollmentInfo', 'count']),
+ 'StudyType': _get_nested_value(study, ['protocolSection', 'designModule', 'studyType']),
+ 'Arm': _get_first_item(study, ['protocolSection', 'armsInterventionsModule', 'armGroups'], 'label'),
+ 'Drug': _get_first_item(study, ['protocolSection', 'armsInterventionsModule', 'interventions'], 'name'),
+ 'Country': _get_location_info(study, 'country'),
+ 'City': _get_location_info(study, 'city'),
+ 'Site': _get_location_info(study, 'facility'),
+ 'StudyPopulation': _get_nested_value(study, ['protocolSection', 'eligibilityModule', 'studyPopulation']),
+ 'Sponsor': _get_nested_value(study, ['protocolSection', 'sponsorCollaboratorsModule', 'leadSponsor', 'name']),
+ 'Collaborator': _get_collaborators(study),
+ 'StartDate': _get_nested_value(study, ['protocolSection', 'statusModule', 'startDateStruct', 'date']),
+ 'PrimaryMeasure': _get_first_item(study, ['protocolSection', 'outcomesModule', 'primaryOutcomes'], 'measure'),
+ 'Purpose': _get_nested_value(study, ['protocolSection', 'designModule', 'designInfo', 'primaryPurpose']),
+ 'BriefTitle': _get_nested_value(study, ['protocolSection', 'identificationModule', 'briefTitle'])
+ }
+ recruiting_trials_list.append(trial_info)
+
+ except Exception as e:
+ print(f"Error processing study: {str(e)}")
+ continue
+
+ print(f"Total studies processed: {len(recruiting_trials_list)}")
return recruiting_trials_list
-##########################################################################################################################################################
+# Helper functions remain the same
+def _get_nested_value(obj, path, default=None):
+ try:
+ current = obj
+ for key in path:
+ if current is None:
+ return default
+ current = current.get(key)
+ return current if current is not None else default
+ except (KeyError, TypeError, AttributeError):
+ return default
+
+def _get_first_item(obj, path, field=None):
+ try:
+ items = _get_nested_value(obj, path, [])
+ if items and isinstance(items, list):
+ if field:
+ return items[0].get(field)
+ return items[0]
+ return None
+ except (IndexError, AttributeError):
+ return None
-#########################################################################################################
-## API For Inclusions
+def _get_location_info(study, info_type):
+ try:
+ locations = _get_nested_value(study, ['protocolSection', 'contactsLocationsModule', 'locations'], [])
+ if info_type == 'facility':
+ values = [loc.get('facility', '') for loc in locations if loc.get('facility')]
+ else:
+ values = [loc.get(info_type, '') for loc in locations if loc.get(info_type)]
+ return '|'.join(filter(None, values))
+ except Exception:
+ return None
+
+def _get_collaborators(study):
+ try:
+ collaborators = _get_nested_value(study, ['protocolSection', 'sponsorCollaboratorsModule', 'collaborators'], [])
+ return '|'.join(collab.get('name', '') for collab in collaborators if collab.get('name'))
+ except Exception:
+ return None
+########### Clinical Trials. gov API for study fileds with Recruiting Trials Only ###################################
+################# FROM CLAUDE API FOR ELIGIBILITY###############################
import requests
import re
def get_formatted_inclusion_criteria(nct_id):
- base_url = "https://clinicaltrials.gov/api/query/full_studies?expr="
- study_fields = "&fields=EligibilityCriteria"
- fmt = "&fmt=json"
-
- # Construct the API URL
- api_url = f"{base_url}{nct_id}{study_fields}{fmt}"
-
- # Send the API request and parse the JSON response
- response = requests.get(api_url)
- data = response.json()
+ """
+ Get and format inclusion criteria for a clinical trial using ClinicalTrials.gov API v2
+
+ Args:
+ nct_id (str): The NCT ID of the trial
+
+ Returns:
+ str: Formatted inclusion criteria as a numbered list, or None if not found
+ """
+ # V2 API endpoint
+ base_url = "https://clinicaltrials.gov/api/v2/studies"
+
+ # Parameters for the API request
+ params = {
+ "format": "json",
+ "fields": "protocolSection.eligibilityModule.eligibilityCriteria",
+ "query.id": nct_id
+ }
- # Extract the inclusion criteria text
try:
- eligibility_criteria = data['FullStudiesResponse']['FullStudies'][0]['Study']['ProtocolSection']['EligibilityModule']['EligibilityCriteria']
-
- #inclusion_criteria = re.split(r'\b(?:Exclusion Criteria:|exclusion criteria)\b', eligibility_criteria, flags=re.IGNORECASE)[0].strip()
- #inclusion_criteria = re.split(r'\b(?:Exclusion Criteria)\b', eligibility_criteria, flags=re.IGNORECASE)[0].strip()
- inclusion_criteria = re.split(r'\b(?:Exclusion Criteria)\b', eligibility_criteria)[0].strip()
-
-
-
- # Split the inclusion criteria into a list
- inclusions = re.split('\n+', inclusion_criteria)
-
- # Remove "Inclusion criteria" text if it's present in the list
- inclusions = [inclusion for inclusion in inclusions if not re.search(r'\bInclusion\s*Criteria\b', inclusion, flags=re.IGNORECASE)]
+ # Send the API request
+ response = requests.get(base_url, params=params)
+ response.raise_for_status() # Raise an exception for bad status codes
+ data = response.json()
+
+ # Extract the eligibility criteria text from the v2 API response
+ if not data.get('studies') or len(data['studies']) == 0:
+ print(f"No data found for Trial NCT ID: {nct_id}")
+ return None
+
+ eligibility_criteria = data['studies'][0]['protocolSection']['eligibilityModule']['eligibilityCriteria']
+
+ # Split at "Exclusion Criteria" to get only inclusion criteria
+ # Using a more robust splitting approach
+ inclusion_criteria = re.split(r'\b(?:Exclusion\s+Criteria:?)\b', eligibility_criteria, flags=re.IGNORECASE)[0].strip()
+
+ # Split the inclusion criteria into a list by line breaks
+ # Handle different types of line breaks
+ inclusions = re.split(r'\r?\n+', inclusion_criteria)
+
+ # Clean up the inclusions:
+ # 1. Remove "Inclusion criteria" header
+ # 2. Remove empty lines
+ # 3. Remove lines that are just whitespace or punctuation
+ cleaned_inclusions = []
+ for inclusion in inclusions:
+ inclusion = inclusion.strip()
+ if (inclusion and
+ not re.search(r'^\s*inclusion\s+criteria:?\s*$', inclusion, flags=re.IGNORECASE) and
+ not re.search(r'^\s*[-•*]\s*$', inclusion)):
+
+ # Remove bullet points and dashes at the start of lines
+ inclusion = re.sub(r'^\s*[-•*]\s*', '', inclusion)
+
+ # Add to cleaned list if not empty after cleanup
+ if inclusion:
+ cleaned_inclusions.append(inclusion)
# Format the list as a numbered list with periods
- formatted_inclusions = [f"{i+1}. {inclusion.strip()}." for i, inclusion in enumerate(inclusions)]
+ formatted_inclusions = []
+ for i, inclusion in enumerate(cleaned_inclusions, 1):
+ # Ensure the line ends with a period
+ if not inclusion.endswith('.'):
+ inclusion = inclusion + '.'
+ formatted_inclusions.append(f"{i}. {inclusion}")
# Join the list into a single string
return "\n".join(formatted_inclusions)
- except (IndexError, KeyError):
- print(f"Inclusion criteria not found for Trial NCT ID: {nct_id}")
+ except requests.exceptions.RequestException as e:
+ print(f"Error fetching data for Trial NCT ID {nct_id}: {str(e)}")
+ return None
+ except (IndexError, KeyError) as e:
+ print(f"Error processing data for Trial NCT ID {nct_id}: {str(e)}")
+ return None
+ except Exception as e:
+ print(f"Unexpected error for Trial NCT ID {nct_id}: {str(e)}")
return None
-## ############################API For Exclusions###################################################################################################################################################
-
-def get_formatted_exclusion_criteria(nct_id):
- base_url = "https://clinicaltrials.gov/api/query/full_studies?expr="
- study_fields = "&fields=EligibilityCriteria"
- fmt = "&fmt=json"
-
- # Construct the API URL
- api_url = f"{base_url}{nct_id}{study_fields}{fmt}"
-
- # Send the API request and parse the JSON response
- response = requests.get(api_url)
- data = response.json()
- # Extract the exclusion criteria text
- try:
- eligibility_criteria = data['FullStudiesResponse']['FullStudies'][0]['Study']['ProtocolSection']['EligibilityModule']['EligibilityCriteria']
- #exclusion_criteria = re.split("(?i)(?:^|\n)exclusion criteria", eligibility_criteria)[-1].strip()
- #exclusion_criteria = re.split(r'\b(?:Exclusion Criteria|exclusion criteria)\b', eligibility_criteria, flags=re.IGNORECASE)[1].strip()
- #exclusion_criteria = re.split(r'\b(?:Exclusion Criteria)\b', eligibility_criteria, flags=re.IGNORECASE)[1].strip()
- exclusion_criteria = re.split(r'\b(?:Exclusion Criteria)\b', eligibility_criteria)[1].strip()
+#########################################################################################################
- # Split the exclusion criteria into a list
- exclusions = re.split('\n+', exclusion_criteria)
+## ############################API For Exclusions###################################################################################################################################################
+def get_formatted_exclusion_criteria(nct_id):
+ """
+ Get and format exclusion criteria for a clinical trial using ClinicalTrials.gov API v2
+
+ Args:
+ nct_id (str): The NCT ID of the trial
+
+ Returns:
+ str: Formatted exclusion criteria as a numbered list, or None if not found
+ """
+ # V2 API endpoint
+ base_url = "https://clinicaltrials.gov/api/v2/studies"
+
+ # Parameters for the API request
+ params = {
+ "format": "json",
+ "fields": "protocolSection.eligibilityModule.eligibilityCriteria",
+ "query.id": nct_id
+ }
- # Remove "Exclusion criteria" text if it's present in the list
- exclusions = [exclusion for exclusion in exclusions if not re.search(r'\bExclusion\s*Criteria\b', exclusion, flags=re.IGNORECASE)]
+ try:
+ # Send the API request
+ response = requests.get(base_url, params=params)
+ response.raise_for_status() # Raise an exception for bad status codes
+ data = response.json()
+
+ # Extract the eligibility criteria text from the v2 API response
+ if not data.get('studies') or len(data['studies']) == 0:
+ print(f"No data found for Trial NCT ID: {nct_id}")
+ return None
+
+ eligibility_criteria = data['studies'][0]['protocolSection']['eligibilityModule']['eligibilityCriteria']
+
+ # Split at "Exclusion Criteria" to get only exclusion criteria
+ try:
+ exclusion_criteria = re.split(r'\b(?:Exclusion\s+Criteria:?)\b', eligibility_criteria, flags=re.IGNORECASE)[1].strip()
+ except IndexError:
+ # Try alternative patterns if the first one doesn't work
+ try:
+ exclusion_criteria = re.split(r'(?i)(?:^|\n)\s*exclusion criteria\s*[:|-]?', eligibility_criteria)[1].strip()
+ except IndexError:
+ print(f"Could not find exclusion criteria section for Trial NCT ID: {nct_id}")
+ return None
+
+ # Split the exclusion criteria into a list by line breaks
+ # Handle different types of line breaks
+ exclusions = re.split(r'\r?\n+', exclusion_criteria)
+
+ # Clean up the exclusions:
+ # 1. Remove empty lines
+ # 2. Remove lines that are just whitespace or punctuation
+ # 3. Clean up formatting
+ cleaned_exclusions = []
+ for exclusion in exclusions:
+ exclusion = exclusion.strip()
+ if (exclusion and
+ not re.search(r'^\s*$', exclusion) and # Skip empty lines
+ not re.search(r'^\s*[-•*]\s*$', exclusion)): # Skip lines with just bullets
+
+ # Remove bullet points and dashes at the start of lines
+ exclusion = re.sub(r'^\s*[-•*]\s*', '', exclusion)
+
+ # Add to cleaned list if not empty after cleanup
+ if exclusion:
+ cleaned_exclusions.append(exclusion)
# Format the list as a numbered list with periods
- formatted_exclusions = [f"{i+1}. {exclusion.strip()}." for i, exclusion in enumerate(exclusions)]
+ formatted_exclusions = []
+ for i, exclusion in enumerate(cleaned_exclusions, 1):
+ # Ensure the line ends with a period
+ if not exclusion.endswith('.'):
+ exclusion = exclusion + '.'
+ formatted_exclusions.append(f"{i}. {exclusion}")
# Join the list into a single string
return "\n".join(formatted_exclusions)
- except (IndexError, KeyError):
- print(f"Exclusion criteria not found for NCT ID: {nct_id}")
+ except requests.exceptions.RequestException as e:
+ print(f"Error fetching data for Trial NCT ID {nct_id}: {str(e)}")
return None
+ except (IndexError, KeyError) as e:
+ print(f"Error processing data for Trial NCT ID {nct_id}: {str(e)}")
+ return None
+ except Exception as e:
+ print(f"Unexpected error for Trial NCT ID {nct_id}: {str(e)}")
+ return None
+
+
+
################################# Apply CSS Style to HTML Table ##############################################################################################################
@@ -419,9 +547,9 @@ def split_columns(df, columns_to_split):
return temp_df
-################## Interventional, Observational Trials Lead Sponsor Counts##################################################
+################## INTERVENTIONAL, OBSERVATIONAL Trials Lead Sponsor Counts##################################################
def calculate_summary_stats(df, sponsor):
- study_types = ["Interventional", "Observational"]
+ study_types = ["INTERVENTIONAL", "OBSERVATIONAL"]
summary_stats = []
sponsor_name = sponsor if sponsor else "All Lead Sponsors"
@@ -453,7 +581,7 @@ def calculate_summary_stats(df, sponsor):
############################################################################################################################################
def calculate_summary_stats_collb(df, sponsor):
- study_types = ["Interventional", "Observational"]
+ study_types = ["INTERVENTIONAL", "OBSERVATIONAL"]
summary_stats = []
sponsor_name = sponsor if sponsor else "All Collaborators"
@@ -500,32 +628,32 @@ def calculate_summary_stats_sites(df, sponsor, country):
grouped_df['EnrollmentCount'] = pd.to_numeric(grouped_df['EnrollmentCount'], errors='coerce')
# Count the number of unique NCTIds for each StudyType
- interventional_count = len(grouped_df[grouped_df['StudyType'] == 'Interventional']['NCTId'].unique())
- observational_count = len(grouped_df[grouped_df['StudyType'] == 'Observational']['NCTId'].unique())
+ INTERVENTIONAL_count = len(grouped_df[grouped_df['StudyType'] == 'INTERVENTIONAL']['NCTId'].unique())
+ OBSERVATIONAL_count = len(grouped_df[grouped_df['StudyType'] == 'OBSERVATIONAL']['NCTId'].unique())
# Count the number of unique countries for each StudyType
- interventional_countries = df[df['StudyType'] == 'Interventional']['Country'].nunique()
- observational_countries = df[df['StudyType'] == 'Observational']['Country'].nunique()
+ INTERVENTIONAL_countries = df[df['StudyType'] == 'INTERVENTIONAL']['Country'].nunique()
+ OBSERVATIONAL_countries = df[df['StudyType'] == 'OBSERVATIONAL']['Country'].nunique()
# Count the number of unique sites for each StudyType, grouped by Country, City, and Site
- interventional_grouped = df[df['StudyType'] == 'Interventional'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0]
- observational_grouped = df[df['StudyType'] == 'Observational'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0]
+ INTERVENTIONAL_grouped = df[df['StudyType'] == 'INTERVENTIONAL'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0]
+ OBSERVATIONAL_grouped = df[df['StudyType'] == 'OBSERVATIONAL'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0]
# Calculate the sum of enrollment counts for each StudyType
- interventional_patients = int(grouped_df[grouped_df['StudyType'] == 'Interventional']['EnrollmentCount'].sum())
- observational_patients = int(grouped_df[grouped_df['StudyType'] == 'Observational']['EnrollmentCount'].sum())
+ INTERVENTIONAL_patients = int(grouped_df[grouped_df['StudyType'] == 'INTERVENTIONAL']['EnrollmentCount'].sum())
+ OBSERVATIONAL_patients = int(grouped_df[grouped_df['StudyType'] == 'OBSERVATIONAL']['EnrollmentCount'].sum())
- formatted_interventional_patients = format(interventional_patients, ',')
- formatted_observational_patients = format(observational_patients, ',')
+ formatted_INTERVENTIONAL_patients = format(INTERVENTIONAL_patients, ',')
+ formatted_OBSERVATIONAL_patients = format(OBSERVATIONAL_patients, ',')
sponsor_name = sponsor if sponsor else "All Sponsors"
country_name = country if country else "All Countries"
- return f"{sponsor_name}
{interventional_count} Interventional Trials, in {interventional_countries} Country, at {interventional_grouped} Sites, \
- Recruiting: {formatted_interventional_patients} Planned Patients.
\
- {observational_count} Observational Trials, in {observational_countries} Country, at {observational_grouped} Sites"
+ return f"{sponsor_name}
{INTERVENTIONAL_count} INTERVENTIONAL Trials, in {INTERVENTIONAL_countries} Country, at {INTERVENTIONAL_grouped} Sites, \
+ Recruiting: {formatted_INTERVENTIONAL_patients} Planned Patients.
\
+ {OBSERVATIONAL_count} OBSERVATIONAL Trials, in {OBSERVATIONAL_countries} Country, at {OBSERVATIONAL_grouped} Sites"
- #{observational_count} Observational Trials, in {observational_countries} Country, at {observational_grouped} Sites, Recruiting: {formatted_observational_patients} Planned Patients."
+ #{OBSERVATIONAL_count} OBSERVATIONAL Trials, in {OBSERVATIONAL_countries} Country, at {OBSERVATIONAL_grouped} Sites, Recruiting: {formatted_OBSERVATIONAL_patients} Planned Patients."
################################################ GRADIO STARTS HERE #########################################################
@@ -951,8 +1079,8 @@ import numpy as np
def plot_condition_sunburst (df):
- # Filter the dataframe for 'StudyType' equal to "Interventional"
- df = df[df['StudyType'] == "Interventional"]
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
# Convert 'Condition' names to uppercase
df['Condition'] = df['Condition'].str.upper()
@@ -998,8 +1126,8 @@ def plot_condition_sunburst (df):
############################################################ Conditions OTHERS ########### ############################################
def plot_condition_others (df):
- # Filter the dataframe for 'StudyType' equal to "Interventional"
- df = df[df['StudyType'] == "Interventional"]
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
# Convert 'Condition' names to uppercase
df['Condition'] = df['Condition'].str.upper()
@@ -1081,8 +1209,8 @@ def wrap_text(text, max_chars_per_line):
def plot_sponsor_collaborator_tree_map(df):
- # Filter the dataframe for 'StudyType' equal to "Interventional"
- df = df[df['StudyType'] == "Interventional"]
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
# Group the data by 'Sponsor' and 'Collaborator' and count the number of unique NCTId
df_count = df.groupby(['Sponsor', 'Collaborator'])['NCTId'].nunique().reset_index()
@@ -1124,8 +1252,8 @@ def plot_sponsor_collaborator_tree_map(df):
def plot_sponsor_tree(df):
- # Filter the dataframe for 'StudyType' equal to "Interventional"
- df = df[df['StudyType'] == "Interventional"]
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
df['Phase'] = df['Phase'].fillna('UNKNOWN')
@@ -1168,9 +1296,9 @@ def plot_sponsor_tree(df):
icicle_fig.update_layout(
title='Sponsor',
font=dict(family="Arial", size=14, color='black'),
- width= 400,
+ width= 600,
height=1000
- # autosize=True,
+ # autosize=True
# margin=dict(t=50, l=25, r=25, b=25)
)
@@ -1182,8 +1310,8 @@ def plot_sponsor_tree(df):
def plot_collaborator_icicle(df):
- # Filter the dataframe for 'StudyType' equal to "Interventional"
- df = df[df['StudyType'] == "Interventional"]
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
df['Phase'] = df['Phase'].fillna('UNKNOWN')
@@ -1221,8 +1349,9 @@ def plot_collaborator_icicle(df):
icicle_fig.update_layout(
title='Collaborators',
font=dict(family="Arial", size=14, color='black'),
- width= 400,
+ width= 600,
height=1000
+ #autosize=True
)
@@ -1244,15 +1373,15 @@ def random_color():
return f'rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})'
##############################################################################################################
def plot_drug_sankey(df):
- # Filter the dataframe for 'StudyType' equal to "Interventional"
- df = df[df['StudyType'] == "Interventional"]
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
# Fill missing values in the 'Phase' column with a placeholder string
df['Phase'] = df['Phase'].fillna('UNKNOWN')
# Sort by Phase
df = df.sort_values(by='Phase')
-
+ #print(df)
# Split the conditions
df = split_conditions(df, 'Condition')
@@ -1356,7 +1485,7 @@ def random_color():
def plot_condition_treemap_nct_old(df):
- df = df[df['StudyType'] == "Interventional"]
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
df['Phase'] = df['Phase'].fillna('UNKNOWN')
df = df.sort_values(by='Phase')
df = split_conditions(df, 'Condition')
@@ -1426,71 +1555,142 @@ def plot_condition_treemap_nct_old(df):
fig.update_layout(title_text="Conditions, Trial IDs, Study IDs, Phases for Sponsor",
font_size=10, height=height, autosize=True)
return fig
+######################################### Conditions###############################
+
+#####################################################################################
+
+import plotly.graph_objects as go
+
+def plot_condition_treemap_nct_old(df):
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ #print("tamer")
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+ #print(df)
+ # Fill missing values in the 'Phase' column with a placeholder string
+ df['Phase'] = df['Phase'].fillna('UNKNOWN')
+
+ # Map NCTId to its Title
+ # Create a dictionary to map NCTId to BriefTitle
+ nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict()
+ # Create a dictionary to map NCTId to OrgStudyId
+ nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict()
+
+ # Create a new dataframe with the required columns
+ table_df = df[['Condition', 'NCTId', 'Phase']]
+
+ # Add the BriefTitle and OrgStudyId columns
+ table_df['BriefTitle'] = table_df['NCTId'].map(nctid_to_brieftitle)
+ table_df['OrgStudyId'] = table_df['NCTId'].map(nctid_to_orgstudyid)
+ print(table_df)
+ # Sort the dataframe by Condition alphabetically
+ table_df = table_df.sort_values('Condition')
+
+
+ # Create a Plotly Table
+ fig = go.Figure(data=[go.Table(
+ header=dict(
+ values=['Condition', 'NCTId', 'OrgStudyId', 'BriefTitle', 'Phase'],
+ fill_color='paleturquoise',
+ align='left',
+ font=dict(size=16, color='black')
+ ),
+ cells=dict(
+ values=[table_df.Condition, table_df.NCTId, table_df.OrgStudyId, table_df.BriefTitle, table_df.Phase],
+ align='left',
+ font=dict(size=14, color='black')
+ )
+ )])
+
+ fig.update_layout(
+ autosize=True,
+ height=1000,
+ title_text="Conditions with NCTIds and Phases",
+ title_x=0.5,
+ font=dict(size=18)
+ )
+
+ return fig
-#######################################
-def plot_condition_treemap_nct (df):
- # Filter the dataframe for 'StudyType' equal to "Interventional"
- df = df[df['StudyType'] == "Interventional"]
+
+
+
+####################################################################################
+def plot_condition_treemap_nct(df):
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
# Fill missing values in the 'Phase' column with a placeholder string
df['Phase'] = df['Phase'].fillna('UNKNOWN')
- # Sort by Phase
- df = df.sort_values(by='Phase')
-
# Map NCTId to its Title
# Create a dictionary to map NCTId to BriefTitle
nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict()
- # Create a dictionary to map NCTId to BriefTitle
+ # Create a dictionary to map NCTId to OrgStudyId
nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict()
- icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents'])
+ # Create separate dataframes for each level
+ condition_df = pd.DataFrame(columns=['ids', 'labels', 'parents'])
+ nctid_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle'])
+ phase_df = pd.DataFrame(columns=['ids', 'labels', 'parents'])
- # Add the "Trials" root node
- icicle_df = pd.concat([icicle_df, pd.DataFrame({
- 'ids': ["Trials"],
- 'labels': ["Trials"],
+ # Add the "Conditions" root node
+ condition_df = pd.concat([condition_df, pd.DataFrame({
+ 'ids': ["Conditions"],
+ 'labels': ["Conditions"],
'parents': [""]
})], ignore_index=True)
# Add the Condition level
- icicle_df = pd.concat([icicle_df, pd.DataFrame({
- 'ids': df['Condition'].unique(),
- 'labels': df['Condition'].unique(),
- 'parents': ["Trials"] * len(df['Condition'].unique())
- })], ignore_index=True)
-
- # Add the Phase level
- for condition in df['Condition'].unique():
- temp_df = df[df['Condition'] == condition]
- phases = temp_df['Phase'].unique()
- icicle_df = pd.concat([icicle_df, pd.DataFrame({
- 'ids': [f"{condition}-{phase}" for phase in phases],
- 'labels': phases,
- 'parents': [condition] * len(phases)
+ conditions = df['Condition'].unique()
+ for condition in conditions:
+ condition_df = pd.concat([condition_df, pd.DataFrame({
+ 'ids': [condition],
+ 'labels': [condition],
+ 'parents': ["Conditions"]
})], ignore_index=True)
# Add the NCTId level
+ for condition in conditions:
+ temp_df = df[df['Condition'] == condition]
+ nctids = temp_df['NCTId'].unique()
+ for nctid in nctids:
+ nctid_df = pd.concat([nctid_df, pd.DataFrame({
+ 'ids': [f"{condition}-{nctid}"],
+ 'labels': [f"{nctid} ({nctid_to_orgstudyid[nctid]})"],
+ 'parents': [condition],
+ 'brieftitle': [nctid_to_brieftitle[nctid]]
+ })], ignore_index=True)
+
+ # Sort the Conditions alphabetically from A to Z
+ condition_df = condition_df.sort_values('labels')
+ nctid_df['parents'] = pd.Categorical(nctid_df['parents'], categories=condition_df['ids'], ordered=True)
+ nctid_df = nctid_df.sort_values('parents')
+
+ # Add the Phase level
for _, row in df.iterrows():
- icicle_df = pd.concat([icicle_df, pd.DataFrame({
- 'ids': [row['NCTId']],
- 'labels': [f"{row['NCTId']} ({nctid_to_orgstudyid[row['NCTId']]}) ({nctid_to_brieftitle[row['NCTId']]})"],
- 'parents': [f"{row['Condition']}-{row['Phase']}"]
+ phase_df = pd.concat([phase_df, pd.DataFrame({
+ 'ids': [f"{row['Condition']}-{row['NCTId']}-{row['Phase']}"],
+ 'labels': [row['Phase']],
+ 'parents': [f"{row['Condition']}-{row['NCTId']}"]
})], ignore_index=True)
+ # Concatenate the dataframes in the desired order
+ icicle_df = pd.concat([condition_df, nctid_df, phase_df], ignore_index=True)
+
fig = go.Figure(go.Icicle(
ids=icicle_df.ids,
labels=icicle_df.labels,
parents=icicle_df.parents,
root_color="lightgrey",
textfont=dict(size=34, family="Arial"),
- hovertemplate="NCTId: %{id}
OrgStudyId: %{customdata[0]}
BriefTitle: %{customdata[1]}
' + insert_line_break(text[nearest_space:].strip(), max_length)
- df = df[df['StudyType'] == "Interventional"]
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
df['Phase'] = df['Phase'].fillna('UNKNOWN')
df = df.sort_values(by='Phase')
@@ -1772,7 +1974,7 @@ def plot_trial_site_map(df):
return text[:nearest_space] + '
' + insert_line_break(text[nearest_space:].strip(), max_length)
- df = df[df['StudyType'] == "Interventional"]
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
df['Phase'] = df['Phase'].fillna('UNKNOWN')
df = df.sort_values(by='Phase')
@@ -1840,8 +2042,8 @@ def plot_trial_bubblemap(df):
scatter_plot_end_traces = []
scatter_plot_lines = []
- # Filter the dataframe for 'StudyType' equal to "Interventional"
- df = df[df['StudyType'] == "Interventional"]
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
# Fill missing values in the 'Phase' column with a placeholder string
df['Phase'] = df['Phase'].fillna('UNKNOWN')
@@ -1992,8 +2194,8 @@ def plot_trial_bubblemap_comp(df):
scatter_plot_end_traces = []
scatter_plot_lines = []
- # Filter the dataframe for 'StudyType' equal to "Interventional"
- df = df[df['StudyType'] == "Interventional"]
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
# Fill missing values in the 'Phase' column with a placeholder string
df['Phase'] = df['Phase'].fillna('UNKNOWN')
@@ -2207,26 +2409,29 @@ def select_disease(disease_input, disease_input_text):
#summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drug
-async def disease_view (condition, condition_text, sponsor_input, academia_input):
+#async def disease_view (condition, condition_text, sponsor_input, academia_input):
+async def disease_view (condition, sponsor_input):
# condition = condition.strip() # Remove leading and trailing spaces
- sponsor = select_sponsor(sponsor_input, academia_input)
- condition = select_disease(condition, condition_text)
+ #sponsor = select_sponsor(sponsor_input, academia_input)
+ #condition = select_disease(condition, condition_text)
+ sponsor = sponsor_input
+ condition = condition
################# ### List data type errors in type conversion to string needed for regualr expression
sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
#print(type(sponsor))
condition = ' '.join(condition) if isinstance(condition, list) else condition
#print(type(condition))
-############################CHECK Sponsor and Condition Inputs #########################################################
+############################CHECK Sponsor and Condition Inputs #########################################################
import re
################ # def check_input(condition, sponsor):
allowed_chars = r'^[A-Za-z .,&/()-]*$'
-
+
if condition is not None and isinstance(condition, str):
if len(condition) > 50 or not re.match(allowed_chars, condition):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None,None, None, None, None, None
-
+
if sponsor is not None and isinstance(sponsor, str):
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None,None, None, None, None, None
@@ -2269,9 +2474,9 @@ async def disease_view (condition, condition_text, sponsor_input, academia_input
# Display the DataFrame
# evaluate if need to change to collaborator other than top 20 ????
- condition_other = plot_condition_others(df)
+ # condition_other = plot_condition_others(df)
#### Sponsor Only
- condition_sunburst = plot_condition_sunburst(df)
+ # condition_sunburst = plot_condition_sunburst(df)
################################################################################
sponsor_tree = plot_sponsor_tree(df)
@@ -2281,7 +2486,9 @@ async def disease_view (condition, condition_text, sponsor_input, academia_input
if not df2.empty:
collaborator_tree = plot_collaborator_icicle(df2)
- return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb, condition_other, condition_sunburst ,sponsor_tree, collaborator_tree
+ return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb,sponsor_tree, collaborator_tree
+
+ # return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb, condition_other, condition_sunburst ,sponsor_tree, collaborator_tree
##################### Assets ###################################################################################
@@ -2308,23 +2515,23 @@ async def drug_view(condition, condition_type, s_sponsor_input, s_academia_input
#print(type(sponsor))
condition = ' '.join(condition) if isinstance(condition, list) else condition
#print(type(condition))
-############################CHECK Sponsor and Condition Inputs #########################################################
+############################CHECK Sponsor and Condition Inputs #########################################################
import re
################ # def check_input(condition, sponsor):
allowed_chars = r'^[A-Za-z .,&/()-]*$'
-
+
if condition is not None and isinstance(condition, str):
if len(condition) > 50 or not re.match(allowed_chars, condition):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
-
+
if sponsor is not None and isinstance(sponsor, str):
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
-################################################################################################
-
-
+################################################################################################
+
+
status = "Recruiting"
# Call gradio_wrapper_nct with appropriate arguments
if condition and sponsor:
@@ -2367,34 +2574,37 @@ def select_condition_phc(s_disease_input_phc, s_disease_input_type_phc):
return s_disease_input_phc
+#async def disease_view_phc(condition, condition_type, s_sponsor_input, s_academia_input):
+async def disease_view_phc(condition, s_sponsor_input):
+ #sponsor = select_sponsor_phc(s_sponsor_input, s_academia_input )
+ # condition = select_condition_phc(condition, condition_type)
+ sponsor = s_sponsor_input
+ condition = condition
+
-async def disease_view_phc(condition, condition_type, s_sponsor_input, s_academia_input):
- sponsor = select_sponsor_phc(s_sponsor_input, s_academia_input )
- condition = select_condition_phc(condition, condition_type)
-
################# ### List data type errors in type conversion to string needed for regualr expression
sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
#print(type(sponsor))
condition = ' '.join(condition) if isinstance(condition, list) else condition
#print(type(condition))
-############################CHECK Sponsor and Condition Inputs #########################################################
+############################CHECK Sponsor and Condition Inputs #########################################################
import re
################ # def check_input(condition, sponsor):
allowed_chars = r'^[A-Za-z .,&/()-]*$'
-
+
if condition is not None and isinstance(condition, str):
if len(condition) > 50 or not re.match(allowed_chars, condition):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
-
+
if sponsor is not None and isinstance(sponsor, str):
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
-################################################################################################
-
-
-
+################################################################################################
+
+
+
status = "Recruiting"
# Call gradio_wrapper_nct with appropriate arguments
if condition and sponsor:
@@ -2412,6 +2622,7 @@ async def disease_view_phc(condition, condition_type, s_sponsor_input, s_academi
# Convert the HTML table to a pandas DataFrame
df = pd.read_html(html_table_conditions)[0]
+ #print(df)
#### error traps
if df.empty :
return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None
@@ -2453,23 +2664,23 @@ async def disease_view_phs(condition, condition_type, s_sponsor_input, s_academi
#print(type(sponsor))
condition = ' '.join(condition) if isinstance(condition, list) else condition
#print(type(condition))
-############################CHECK Sponsor and Condition Inputs #########################################################
+############################CHECK Sponsor and Condition Inputs #########################################################
import re
################ # def check_input(condition, sponsor):
allowed_chars = r'^[A-Za-z .,&/()-]*$'
-
+
if condition is not None and isinstance(condition, str):
if len(condition) > 50 or not re.match(allowed_chars, condition):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
-
+
if sponsor is not None and isinstance(sponsor, str):
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
-################################################################################################
-
-
+################################################################################################
+
+
status = "Recruiting"
# Call gradio_wrapper_nct with appropriate arguments
if condition and sponsor:
@@ -2485,9 +2696,10 @@ async def disease_view_phs(condition, condition_type, s_sponsor_input, s_academi
# Convert the HTML table to a pandas DataFrame
df = pd.read_html(html_table_conditions)[0]
+ #print(df)
#### error traps
if df.empty :
- return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None
+ return "The Sponsor Name did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None
####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov
#tree_map_cond_nct = plot_condition_treemap_nct(df)
@@ -2525,25 +2737,25 @@ async def disease_view_phs_n(condition, condition_type, s_sponsor_input, s_acade
#print(type(sponsor))
condition = ' '.join(condition) if isinstance(condition, list) else condition
#print(type(condition))
-############################CHECK Sponsor and Condition Inputs #########################################################
+############################CHECK Sponsor and Condition Inputs #########################################################
import re
################ # def check_input(condition, sponsor):
allowed_chars = r'^[A-Za-z .,&/()-]*$'
-
+
if condition is not None and isinstance(condition, str):
if len(condition) > 50 or not re.match(allowed_chars, condition):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None
-
+
if sponsor is not None and isinstance(sponsor, str):
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None
-################################################################################################
-
-
-
-
+################################################################################################
+
+
+
+
status = "Not yet recruiting"
# Call gradio_wrapper_nct with appropriate arguments
if condition and sponsor:
@@ -2608,22 +2820,22 @@ async def disease_view_phs_c(condition, condition_type, s_sponsor_input, s_acade
#print(type(sponsor))
condition = ' '.join(condition) if isinstance(condition, list) else condition
#print(type(condition))
-############################CHECK Sponsor and Condition Inputs #########################################################
+############################CHECK Sponsor and Condition Inputs #########################################################
import re
################ # def check_input(condition, sponsor):
allowed_chars = r'^[A-Za-z .,&/()-]*$'
-
+
if condition is not None and isinstance(condition, str):
if len(condition) > 50 or not re.match(allowed_chars, condition):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None, None, None
-
+
if sponsor is not None and isinstance(sponsor, str):
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None, None, None
-################################################################################################
-
+################################################################################################
+
status = "Completed"
# Call gradio_wrapper_nct with appropriate arguments
@@ -2694,23 +2906,23 @@ async def condition_view(condition, country, condition_type, sponsor_input_con,
#print(type(sponsor))
condition = ' '.join(condition) if isinstance(condition, list) else condition
#print(type(condition))
-############################CHECK Sponsor and Condition Inputs #########################################################
+############################CHECK Sponsor and Condition Inputs #########################################################
import re
################ # def check_input(condition, sponsor):
allowed_chars = r'^[A-Za-z .,&/()-]*$'
-
+
if condition is not None and isinstance(condition, str):
if len(condition) > 50 or not re.match(allowed_chars, condition):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
-
+
if sponsor is not None and isinstance(sponsor, str):
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
-################################################################################################
-
-
+################################################################################################
+
+
status = "Recruiting"
summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status)
# Convert the HTML table to a pandas DataFrame
@@ -2759,25 +2971,25 @@ async def condition_view_s(condition, country, condition_type, sponsor_input_con
#print(type(sponsor))
condition = ' '.join(condition) if isinstance(condition, list) else condition
#print(type(condition))
-############################CHECK Sponsor and Condition Inputs #########################################################
+############################CHECK Sponsor and Condition Inputs #########################################################
import re
################ # def check_input(condition, sponsor):
allowed_chars = r'^[A-Za-z .,&/()-]*$'
-
+
if condition is not None and isinstance(condition, str):
if len(condition) > 50 or not re.match(allowed_chars, condition):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None
-
+
if sponsor is not None and isinstance(sponsor, str):
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None
-################################################################################################
-
-
-
-
+################################################################################################
+
+
+
+
status = "Recruiting"
summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status)
@@ -2795,13 +3007,13 @@ async def condition_view_s(condition, country, condition_type, sponsor_input_con
# print(html_table_add)
df = pd.read_html(html_table_add)[0]
#print(df)
-
+
#### error traps
if df.empty :
return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None
-
-
-
+
+
+
country_site = plot_trial_site_map(df)
site_cond = plot_trial_sites(df)
@@ -2837,23 +3049,23 @@ async def condition_viewt(condition, country, condition_type, sponsor_input_con,
#print(type(sponsor))
condition = ' '.join(condition) if isinstance(condition, list) else condition
#print(type(condition))
-############################CHECK Sponsor and Condition Inputs #########################################################
+############################CHECK Sponsor and Condition Inputs #########################################################
import re
################ # def check_input(condition, sponsor):
allowed_chars = r'^[A-Za-z .,&/()-]*$'
-
+
if condition is not None and isinstance(condition, str):
if len(condition) > 50 or not re.match(allowed_chars, condition):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
-
+
if sponsor is not None and isinstance(sponsor, str):
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
-################################################################################################
-
-
-
+################################################################################################
+
+
+
status = "Recruiting"
summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status)
# Convert the HTML table to a pandas DataFrame
@@ -2892,25 +3104,25 @@ async def condition_view_map(condition, country, sponsor_input_con_map, academia
sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
#print(type(condition))
-############################CHECK Sponsor and Condition Inputs #########################################################
+############################CHECK Sponsor and Condition Inputs #########################################################
import re
################ # def check_input(condition, sponsor):
allowed_chars = r'^[A-Za-z .,&/()-]*$'
-
-
+
+
if sponsor is not None and isinstance(sponsor, str):
if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
-################################################################################################
-################################################################################################
-
-
-
-
-
+################################################################################################
+################################################################################################
+
+
+
+
+
status = "Recruiting"
summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status )
# print(html_table_add)
@@ -2976,301 +3188,12 @@ def format_html_list(html_string):
return formatted_html
########################################################################################
-def format_html_list_old(html_string):
- # Split the input string by numbers followed by a period and a space
- items = re.split(r'(\d+\.\s)', html_string)
-
- # Combine the split items into a list of strings, removing the original numbers
- formatted_items = [number + text for number, text in zip(items[1::2], items[2::2])]
-
- # Remove unwanted characters from each item
- formatted_items = [re.sub(r':\.', '', item) for item in formatted_items]
- formatted_items = [re.sub(r'General\.', '', item) for item in formatted_items]
-
- # Filter out empty list items
- formatted_items = [item for item in formatted_items if item.strip()]
-
- # Join the list items with line breaks to create an HTML string
- formatted_html = "
".join(formatted_items)
-
- return formatted_html
-
-# Function to convert a list of formatted criteria to a dictionary
-
-# ############################# Hugging Face Model Invoke ####################################
-
-import os
-import io
-from IPython.display import Image, display, HTML
-from PIL import Image
-import base64
-import gradio as gr
-import requests, json
-
-################################################################ NLP Model #######################################
-# API Token and Model Name
-API_TOKEN = "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"
-MODEL_NAME = "d4data/biomedical-ner-all"
-#MODEL_NAME = "kormilitzin/en_core_spancat_med7_lg"
-
-
-############################################################################################################################
-
-def merge_tokens(tokens):
- if not tokens:
- return []
-
- merged_tokens = []
- for token in tokens:
- if (merged_tokens and
- token['entity_group'] == merged_tokens[-1]['entity_group']):
- # If current token continues the entity of the last one, merge them
- last_token = merged_tokens[-1]
- last_token['word'] += token['word'].replace('##', '')
- last_token['end'] = token['end']
- last_token['score'] = (last_token['score'] + token['score']) / 2
- else:
- # Otherwise, add the token to the list
- merged_tokens.append(token)
-
- return merged_tokens
-
-# Function to call Hugging Face API################################################################################
-def get_completion(text):
- headers = {"Authorization": f"Bearer {API_TOKEN}"}
- data = {"inputs": text, "max_tokens": 2048} # Set the max_tokens parameter
- #data = {"inputs": text, "max_tokens": 512} # Set the max_tokens parameter
- data = {"inputs": text} # Set the max_tokens parameter
- response = requests.post(f"https://api-inference.huggingface.co/models/{MODEL_NAME}", headers=headers, json=data, timeout= 90)
- # Print the response content
- print(f"From Hugging Face API: {response.text}")
- return response.json()
-# Split texts when longer than 2048 tokens
-from transformers import AutoTokenizer
-# Load the tokenizer for the model
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
#####################################################################################
-# Function to split the input text into chunks
-def split_input_text(text, max_tokens):
- tokens = tokenizer.encode(text)
- token_chunks = []
-
- for i in range(0, len(tokens), max_tokens):
- token_chunk = tokens[i:i + max_tokens]
- token_chunks.append(tokenizer.decode(token_chunk))
-
- # Debug: Print the token length of the current chunk
- print(f"Token length of chunk {len(token_chunks)}: {len(token_chunk)}")
-
- return token_chunks
-
-# Function to remove HTML tags from the input text
-def remove_html_tags(text):
- clean_text = re.sub('<[^>]*>', ' ', text)
- return clean_text
-
-def ner_oll (input):
- max_retries = 10
- retries = 0
- output = None
-
- # Remove HTML tags from the input text
- input_no_html = remove_html_tags(input)
-
- # Split the input text into chunks
- input_chunks = split_input_text(input_no_html, 500)
-
- # Initialize an empty list to store the merged tokens from all chunks
- all_merged_tokens = []
-
- # Debug: Print the number of chunks created
- print(f"Number of input chunks: {len(input_chunks)}")
-
- api_calls = 0 # Counter for API calls
-
- for input_chunk in input_chunks:
- while retries < max_retries:
- try:
- output = get_completion(input_chunk)
- #print(output)
- api_calls += 1 # Increment the API calls counter
- # Check if the output is empty
- if output:
- # Check if the output contains an error message
- if 'error' in output:
- print("Error in API response, retrying...")
- retries += 1
- continue
- break
- else:
- raise ValueError("Empty output")
- except Exception as e:
- print(f"Error in API call: {e}")
- retries += 1
-
- if output is None or 'error' in output:
- print("Failed to get API response after maximum 10 retries.")
- return {"text": input, "entities": []}
-
- merged_tokens = merge_tokens(output)
- # Debug: Print the merged tokens for the current output
- print(f"Merged tokens for chunk {api_calls}: {merged_tokens}")
-
- all_merged_tokens.extend(merged_tokens)
-
- print(all_merged_tokens)
-
- # Debug: Print the number of API calls made
- # print(f"Number of API calls made: {api_calls}")
-
- return {"text": input, "entities": all_merged_tokens}
-
-##########################################################################################################
-
-from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
-
-# Load the tokenizer and model for the pipeline
-tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
-model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
-
-# Create the NER pipeline
-pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
-##############################################################################################
-
-def ner(input):
- max_retries = 10
- retries = 0
- output = None
- # Initialize an empty list to store the merged tokens from all chunks
- all_merged_tokens = []
- # Remove HTML tags from the input text
- input_no_html = remove_html_tags(input)
-
- # Split the input text into chunks
- input_chunks = split_input_text(input_no_html, 500)
-
- # Initialize an empty list to store the entities from all chunks
- all_entities = []
-
- # Debug: Print the number of chunks created
- print(f"Number of input chunks: {len(input_chunks)}")
-
- for input_chunk in input_chunks:
- while retries < max_retries:
- try:
- output = pipe(input_chunk)
- if output:
- break
- else:
- raise ValueError("Empty output")
- except Exception as e:
- print(f"Error in pipeline call: {e}")
- retries += 1
-
- if output is None:
- print("Failed to get pipeline output after maximum 10 retries.")
- return {"text": input, "entities": []}
-
- # Remove unwanted entity groups
- filtered_output = [
- entity for entity in output
- if entity['entity_group'] not in [
- 'Coreference',
- 'Detailed_description',
- 'Lab_value',
- # 'Diagnostic_procedure',
- 'Personal_background',
- 'History',
- 'Family_history',
- 'Outcome',
- 'Subject',
- 'Date',
- 'Distance',
- 'Severity',
- 'Activity',
- 'Duration',
- 'Administration',
- 'Sex',
- 'Age',
- 'Sign_symptom',
- 'Therapeutic_procedure',
- 'Biological_structure'
- ]
- ]
-
- # Debug: Print the entities for the current output after filtering
- print(f"Filtered entities for chunk {len(all_entities) + 1}: {filtered_output}")
-
- merged_tokens = merge_tokens(filtered_output)
- all_merged_tokens.extend(merged_tokens)
-
- print(all_entities)
-
-
- return {"text": input, "entities": all_merged_tokens}
-
-
-
-
-
-
-
-##############################################################################
-def ner_unflitered(input):
- max_retries = 10
- retries = 0
- output = None
- # Initialize an empty list to store the merged tokens from all chunks
- all_merged_tokens = []
- # Remove HTML tags from the input text
- input_no_html = remove_html_tags(input)
-
- # Split the input text into chunks
- input_chunks = split_input_text(input_no_html, 500)
-
- # Initialize an empty list to store the entities from all chunks
- all_entities = []
-
- # Debug: Print the number of chunks created
- print(f"Number of input chunks: {len(input_chunks)}")
-
- for input_chunk in input_chunks:
- while retries < max_retries:
- try:
- output = pipe(input_chunk)
- # Check if the output is empty
- if output:
- #print(output)
- break
- else:
- raise ValueError("Empty output")
- except Exception as e:
- print(f"Error in pipeline call: {e}")
- retries += 1
-
- if output is None:
- print("Failed to get pipeline output after maximum 10 retries.")
- return {"text": input, "entities": []}
-
- # Debug: Print the entities for the current output
- print(f"Entities for chunk {len(all_entities) + 1}: {output}")
-
- merged_tokens = merge_tokens(output)
- # Debug: Print the merged tokens for the current output
- #print(f"Merged tokens for chunk {api_calls}: {merged_tokens}")
-
- all_merged_tokens.extend(merged_tokens)
- #all_entities.extend(output)
-
- print(all_entities)
-
- return {"text": input, "entities": all_merged_tokens}
-
#############################################################################################################################################
async def trial_view_map(nctID):
nctID = nctID.strip() # Remove leading and trailing spaces
@@ -3292,41 +3215,108 @@ async def trial_view_map(nctID):
df = pd.read_html(html_table_add)[0]
world_map = plot_trial_site_world_map(df)
if world_map is None:
- return "Sorry, the plot could not be generated. Please try again by slecting a country!", None, None
+ return "Sorry, the plot could not be generated. Please try again by selecting a country!", None, None
return summary_stats_sites, world_map, html_table_add
#return html_table, formatted_html_inclusions,formatted_html_exclusions,world_map
####################################################################################################################################################
+import plotly.graph_objects as go
-async def trial_view (nctID):
- nctID = nctID.strip() # Remove leading and trailing spaces
-###### # Check if nctID is valid
+def split_numbered_criteria(text):
+ """Split text into list of criteria based on numbered lines"""
+ if not text:
+ return []
+
+ criteria = []
+ current = []
+
+ for line in text.split('\n'):
+ line = line.strip()
+ if line:
+ # Check if line starts with a number followed by period
+ if line[0].isdigit() and '. ' in line[:4]:
+ if current:
+ criteria.append(' '.join(current))
+ current = [line]
+ else:
+ current.append(line)
+
+ # Add the last criteria
+ if current:
+ criteria.append(' '.join(current))
+
+ return criteria if criteria else ["No criteria available"]
+
+def display_criteria_table(inclusion_text, exclusion_text):
+ """
+ Create a two-column Plotly table with inclusion and exclusion criteria
+ split into separate rows based on numbering
+ """
+ try:
+ # Split both texts into lists of criteria
+ inclusion_list = split_numbered_criteria(inclusion_text)
+ exclusion_list = split_numbered_criteria(exclusion_text)
+
+ # Make lists equal length by padding with empty strings
+ max_length = max(len(inclusion_list), len(exclusion_list))
+ inclusion_list.extend([''] * (max_length - len(inclusion_list)))
+ exclusion_list.extend([''] * (max_length - len(exclusion_list)))
+
+ # Create the table
+ fig = go.Figure(data=[go.Table(
+ columnwidth=[500, 500], # Equal width columns
+ header=dict(
+ values=['Inclusion Criteria', 'Exclusion Criteria'],
+ fill_color='#e6f3ff',
+ align=['left', 'left'],
+ font=dict(size=14, color='black'),
+ height=40
+ ),
+ cells=dict(
+ values=[inclusion_list, exclusion_list],
+ fill_color=[['white', '#f9f9f9'] * max_length], # Alternating row colors
+ align=['left', 'left'],
+ font=dict(size=12),
+ height=None,
+ line=dict(color='lightgrey', width=1) # Add light borders
+ )
+ )])
+
+ # Update layout
+ fig.update_layout(
+ title="Trial Eligibility Criteria",
+ width=1200,
+ height=max(400, max_length * 30 + 100), # Dynamic height based on content
+ margin=dict(l=20, r=20, t=40, b=20)
+ )
+
+ return fig
+
+ except Exception as e:
+ print(f"Error in display_criteria_table: {str(e)}")
+ return None
+
+async def trial_view(nctID):
+ nctID = nctID.strip()
if not nctID.startswith('NCT') or not (10 <= len(nctID) <= 12):
return "Not a Valid NCT ID has been entered", None, None
status = "Recruiting"
summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(NCTId=nctID, status = status)
- #### error traps
formatted_inclusions = get_formatted_inclusion_criteria(nctID)
- print(formatted_inclusions)
formatted_exclusions = get_formatted_exclusion_criteria(nctID)
- print( formatted_exclusions)
- # Check if both formatted_inclusions and formatted_exclusions are empty
if not formatted_inclusions and not formatted_exclusions:
return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
- inclusion_concepts = ner(formatted_inclusions)
- exclusion_concepts = ner(formatted_exclusions)
-
-
+ # Create single table with both criteria
+ criteria_table = display_criteria_table(formatted_inclusions, formatted_exclusions)
- #return html_table, formatted_html_inclusions, formatted_html_exclusions,inclusion_concepts,exclusion_concepts
- return html_table, inclusion_concepts,exclusion_concepts
+ return html_table, criteria_table
@@ -3345,137 +3335,7 @@ with trial_app:
#gr.Markdown("
Now Recruiting Trials:
") with gr.Tabs(): - ############################################################ Sponsors ###################################################################### - with gr.TabItem("Sponsors"): - # 1st Row -#################################################################################################################################################### - with gr.Row(): - gr.HTML(''' -1. Select a Sponsor and click 'Show Sponsor'.
-2. Filter Conditions by selecting a 'Disease Area', for example, Oncology.
- ''') - #3. If typing a Sponsor, the name needs to fully match with ClinicalTrials.gov.
- #4. To find more Sponsors, only type a 'Disease Name', for example, 'Pancreatic Cancer' and click 'Show Sponsor'.
- - # ''') -# #4. When typing a Sponsor Name, needs to fully match with ClinicalTrials.gov reporting, for example, 'Seagen Inc.' and not'Seagen'
-################################################################################################################################### -################################################################################################################################################################################## - with gr.Row(): - with gr.Column(): - summary_block = gr.HTML(label="Lead Sponsors for Recruiting Clinical Trials:" ) - # with gr.Column(): - # summary_block_collbs = gr.HTML(label="Collaborators in Recruiting Clinical Trials:" ) - with gr.Row(): - with gr.Column(): - sponsor_trees = gr.Plot() - with gr.Column(): - collaborator_trees = gr.Plot() - with gr.Column(): - condition_sunbursts = gr.Plot() - with gr.Column(): - condition_others = gr.Plot() - - - -#################################################################################################################################################### - with gr.Row(): - gr.HTML('1. Select a Sponsor and click 'Show Conditions'.
-2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'NASH' etc.
+1. Select a Sponsor'.
+2. Select a Condition Name, for example, 'Pancreatic Cancer', 'Chronic Kidney Disease', 'MASH' etc.
+3. Click 'Show Conditions'.
''') @@ -3518,8 +3379,8 @@ with trial_app: ################################################################### with gr.Column(): s_disease_input_phc = gr.Dropdown( - choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ - "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ + choices=["Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ + "Cancer","Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ @@ -3531,7 +3392,7 @@ with trial_app: " Major","Metabolic", "Generalized Pustular Psoriasis",\ "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ "Psychological Trauma","Renal", "Respiratory",\ "Schizophrenia", "PTSD", \ "Venous Thromboembolism", "Wet"], @@ -3551,18 +3412,18 @@ with trial_app: # with gr.Column(): ################# # 3rd row################################################################# - with gr.Row(): + # with gr.Row(): ################################################################### - with gr.Column(): - s_academia_input_phc = gr.Textbox(lines=1, label="Type a Sponsor Name:") - clear_btn_phc.add(s_academia_input_phc) + # with gr.Column(): + # s_academia_input_phc = gr.Textbox(lines=1, label="Type a Sponsor Name:") + # clear_btn_phc.add(s_academia_input_phc) ################################################################### - with gr.Column(): + # with gr.Column(): - s_disease_input_type_phc = gr.Textbox(lines=1, label="Filter by typing a Condition:") - clear_btn_phc.add(s_disease_input_type_phc) + # s_disease_input_type_phc = gr.Textbox(lines=1, label="Filter by typing a Condition:") + # clear_btn_phc.add(s_disease_input_type_phc) ############################################################################################################################################ ###################################################################################################################################################################### @@ -3602,7 +3463,7 @@ with trial_app: gr.HTML('''1. Select a Sponsor and click 'Show Trials'.
-2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'NASH' etc.
+2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.
''') @@ -3639,7 +3500,7 @@ with trial_app: " Major","Metabolic", "Generalized Pustular Psoriasis",\ "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ "Psychological Trauma","Renal", "Respiratory",\ "Schizophrenia", "PTSD", \ "Venous Thromboembolism", "Wet"], @@ -3701,7 +3562,7 @@ with trial_app: gr.HTML('''1. Select a Sponsor and click 'Show Drugs'.
-2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'NASH' etc.
+2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.
''') ##################################################################################################################################################### @@ -3734,7 +3595,7 @@ with trial_app: " Major","Metabolic", "Generalized Pustular Psoriasis",\ "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ "Psychological Trauma","Renal", "Respiratory",\ "Schizophrenia", "PTSD", \ "Venous Thromboembolism", "Wet"], @@ -3827,7 +3688,7 @@ with trial_app: " Major","Metabolic", "Generalized Pustular Psoriasis",\ "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ "Psychological Trauma","Renal", "Respiratory",\ "Schizophrenia", "PTSD", \ "Venous Thromboembolism", "Wet"], @@ -3930,7 +3791,7 @@ with trial_app: " Major","Metabolic", "Generalized Pustular Psoriasis",\ "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ "Psychological Trauma","Renal", "Respiratory",\ "Schizophrenia", "PTSD", \ "Venous Thromboembolism", "Wet"], @@ -4020,7 +3881,7 @@ with trial_app: gr.HTML('''1. Select a Sponsor and click 'Show Timelines'.
-2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'NASH' etc.
+2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.
''') @@ -4056,7 +3917,7 @@ with trial_app: " Major","Metabolic", "Generalized Pustular Psoriasis",\ "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ "Psychological Trauma","Renal", "Respiratory",\ "Schizophrenia", "PTSD", \ "Venous Thromboembolism", "Wet"], @@ -4150,295 +4011,39 @@ with trial_app: trial_output = gr.HTML(label="Detail of Recruiting Trials") ################################################ with gr.Row(): - with gr.Column(): - concept_inclusion= gr.HighlightedText(label="Display of Inclusion Concepts") - with gr.Column(): - concept_exclusion= gr.HighlightedText(label="Display of Exclusion Concepts") + # with gr.Column(): + eligibilities_plot = gr.Plot() + # with gr.Column(): + # concept_exclusion= gr.HighlightedText(label="Display of Exclusion Concepts") - clear_tn_btn.add(trial_output) + clear_tn_btn.add(trial_output) # clear_tn_btn.add(formatted_inclusions_output) # clear_tn_btn.add(formatted_exclusions_output) - clear_tn_btn.add(concept_inclusion) - clear_tn_btn.add(concept_exclusion) - - - ############################################################################################################################################## - ############################################################ Trial Map ############## - - - - with gr.TabItem("Trial Sites"): -################################################################################################################################################### - with gr.Row(): - gr.HTML(''' -1. Type a single Trial's NCT Id,For Example: NCT05512377 or NCT04924075 or NCT04419506 etc. and click 'Show Sites Map'.
-3. Wait time approximately 45 seconds to display all Trial Sites in a Map.
- ''') - - - - with gr.Row(): - - #nctID_input = gr.inputs.Textbox(lines=1, label="Type Trial NctId:") - nctID_inputs = gr.Textbox(lines=1, label="Type Trial NCT Id: ") - trial_buttons = gr.Button("Show Sites Map") - #Then, create the clear button and add the dropdown input to it - clear_tn_btns = gr.ClearButton() - clear_tn_btns.add(nctID_inputs ) -################################################ - with gr.Row(): - - summary_block_trial_map = gr.HTML(label="Site Map for Recruiting Clinical Trials:" ) - with gr.Row(): - - world_map = gr.Plot() - - - with gr.Row(): - - trial_output_map = gr.HTML(label="List of Recruiting Country, Sites") - - clear_tn_btns.add(summary_block_trial_map) - clear_tn_btns.add(world_map) - clear_tn_btns.add(trial_output_map) + clear_tn_btn.add(eligibilities_plot) + # clear_tn_btn.add(concept_exclusion) ############################################################################################################################################## - ################################################################ Future Trials ############################################################################################### - with gr.TabItem("Future Trials"): - # 1st Row -#################################################################################################################################################### - with gr.Row(): - gr.HTML(''' -1. Select a Sponsor and click 'Show Future Trials'.
-2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease','NASH' etc.
- ''') - - -##################################################################################################################################################### - - with gr.Row(): - with gr.Column(): - -#### ######################################################################################################################################################################################################### - - s_sponsor_input_phs_n = gr.Dropdown( -############################################################################ - choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "BioNTech SE","Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ - "CSL Behring", "Daiichi Sankyo, Inc.",\ - "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ - "Janssen Research & Development, LLC","Merck Sharp & Dohme LLC","ModernaTX, Inc.", \ - "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], - label="Select a Sponsor " - ) - -################################################################################################################################################################################################################## - with gr.Column(): - s_disease_input_phs_n = gr.Dropdown( - choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ - "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ - "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ - "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ - "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ - "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ - "Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ - "Urothelial Carcinoma",\ - "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ - "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ - " Major","Metabolic", "Generalized Pustular Psoriasis",\ - "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ - "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ - "Psychological Trauma","Renal", "Respiratory",\ - "Schizophrenia", "PTSD", \ - "Venous Thromboembolism", "Wet"], - label="Filter by a Condition" - ) - -###################################################################################################################################################################### - # 3rd Row - with gr.Row(): #academia_input = gr.inputs.Dropdown( - s_button_phs_n = gr.Button("Show Future Trials") - - # Then, create the clear button and add the dropdown input to it - clear_btn_phs = gr.ClearButton() - clear_btn_phs.add(s_sponsor_input_phs_n) - - clear_btn_phs.add(s_disease_input_phs_n) - -################# # 3rd row################################################################# - with gr.Row(): -##################################################################################################################################################################### - with gr.Column(): - s_academia_input_phs_n = gr.Textbox(lines=1, label="Type a Sponsor Name:") - clear_btn_phs.add(s_academia_input_phs_n) -################################################################################################################################################################# - with gr.Column(): - s_disease_input_type_phs_n = gr.Textbox(lines=1, label="Filter by typing a Condition:") - clear_btn_phs.add(s_disease_input_type_phs_nwith gr.Row(): - - summary_block_phs_n = gr.HTML(label="Conditions and Sponsors Will Recruit for Clinical Trials:" ) - - - with gr.Row(): - # with gr.Column(): - tree_map_cond_nct_n = gr.Plot() -################################################################ -#################################################################################################################################################### - with gr.Row(): - gr.HTML('1. Select a Sponsor and a Condition name and click 'Show Completed Trials'.
- ''' - ) - with gr.Row(): - with gr.Column(): - -#### ######################################################################################################################################################################################################### - - s_sponsor_input_phs_c = gr.Dropdown( -############################################################################ - choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "BioNTech SE","Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ - "CSL Behring", "Daiichi Sankyo, Inc.",\ - "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ - "Janssen Research & Development, LLC","Merck Sharp & Dohme LLC","ModernaTX, Inc.", \ - "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], - label="Select a Sponsor " - ) - -################################################################################################################################################################################################################## - with gr.Column(): - s_disease_input_phs_c= gr.Dropdown( - choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ - "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ - "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ - "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ - "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ - "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ - "Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ - "Urothelial Carcinoma",\ - "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ - "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ - " Major","Metabolic", "Generalized Pustular Psoriasis",\ - "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ - "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ - "Psychological Trauma","Renal", "Respiratory",\ - "Schizophrenia", "PTSD", \ - "Venous Thromboembolism", "Wet"], - label="Filter by a Condition" - ) - -###################################################################################################################################################################### - # 3rd Row - with gr.Row(): #academia_input = gr.inputs.Dropdown( - s_button_phs_c = gr.Button("Show Completed Trials") - - # Then, create the clear button and add the dropdown input to it - clear_btn_phs = gr.ClearButton() - clear_btn_phs.add(s_sponsor_input_phs_c) - - clear_btn_phs.add(s_disease_input_phs_c) -################# # 3rd row################################################################# - with gr.Row(): -##################################################################################################################################################################### - with gr.Column(): - s_academia_input_phs_c = gr.Textbox(lines=1, label="Type a Sponsor Name:") - clear_btn_phs.add(s_academia_input_phs_c) -################################################################################################################################################################# - with gr.Column(): - s_disease_input_type_phs_c = gr.Textbox(lines=1, label="Filter by typing a Condition:") - clear_btn_phs.add(s_disease_input_type_phs_c) -##################################################################################################################################################################################### - - - - -######################################################################################################################################################################### - with gr.Row(): - - summary_block_phs_c = gr.HTML(label="Conditions and Sponsors Will Recruit for Clinical Trials:" ) - - - with gr.Row(): - # with gr.Column(): - tree_map_cond_nct_c = gr.Plot() -################################################################ -#################################################################################################################################################### - with gr.Row(): - gr.HTML('