diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -1,7 +1,12 @@ +!pip install gradio +#transformers + + + ########### AGENT: Clincialtrial.gov ################################################################################################### ##Gradio App: TRIAL CONNECT -#Author: Tamer Chowdhury' Dec 2023 +#Author: Tamer Chowdhury' Nov 2024 #tamer.chowdhury@gmail.com ################################################################################################################################## @@ -18,254 +23,377 @@ import requests from IPython.core.display import display_markdown - -########### Clinical Trials. gov API for study fileds with Recruiting Trials Only ################################### - +######################### from CLAUDE########################################### import aiohttp import asyncio - +import pandas as pd +import io +import json async def fetch(session, url, params): - async with session.get(url, params=params) as response: - return await response.text() -############################################################################################################################################################# -async def get_nct_ids (lead_sponsor_name=None, disease_area=None, overall_status= None, location_country=None, NCTId=None, max_records=None, blocks=30): - base_url = "https://clinicaltrials.gov/api/query/study_fields" - fields = "NCTId,OrgStudyId,BriefTitle,Condition,Phase,OverallStatus,PrimaryCompletionDate,EnrollmentCount,StudyType,StudyPopulation,\ -LocationCountry,LocationCity,DesignPrimaryPurpose,LocationFacility,ArmGroupLabel,LeadSponsorName,InterventionName,PrimaryOutcomeMeasure,\ -StartDate,CollaboratorName" + try: + async with session.get(url, params=params) as response: + if response.status == 200: + text = await response.text() + try: + return json.loads(text) + except json.JSONDecodeError as e: + print(f"Failed to decode JSON: {text[:200]}...") + raise + else: + print(f"HTTP Error: {response.status}") + print(f"Response text: {await response.text()}") + return None + except Exception as e: + print(f"Error in fetch: {str(e)}") + return None + +async def get_nct_ids(lead_sponsor_name=None, disease_area=None, overall_status=None, + location_country=None, NCTId=None, max_records=None, blocks=30): + base_url = "https://clinicaltrials.gov/api/v2/studies" + + # Define the fields we want to retrieve (mapped to v2 API structure) + fields = [ + "protocolSection.identificationModule.nctId", + "protocolSection.identificationModule.orgStudyIdInfo", + "protocolSection.identificationModule.briefTitle", + "protocolSection.conditionsModule.conditions", + "protocolSection.designModule.phases", + "protocolSection.statusModule.overallStatus", + "protocolSection.statusModule.primaryCompletionDateStruct", + "protocolSection.designModule.enrollmentInfo", + "protocolSection.designModule.studyType", + "protocolSection.eligibilityModule.studyPopulation", + "protocolSection.contactsLocationsModule.locations", + "protocolSection.designModule.designInfo", + "protocolSection.armsInterventionsModule.armGroups", + "protocolSection.sponsorCollaboratorsModule.leadSponsor", + "protocolSection.armsInterventionsModule.interventions", + "protocolSection.outcomesModule.primaryOutcomes", + "protocolSection.statusModule.startDateStruct" + ] + + # Build base parameters params = { - "fields": fields, - "fmt": "csv" + "format": "json", + "fields": ",".join(fields), + "pageSize": "1000", + "countTotal": "true" } - ## Status is Recruiting - #overall_status='Recruiting' - #overall_status='Not yet recruiting' + print("Constructing query...") -############################# + # Build query parameters if NCTId: - params["expr"] = f"{NCTId}" + params["query.id"] = NCTId else: - # overall_status = overall_status.replace(" ", "+") if disease_area: - disease_area = disease_area.replace(" ", "+") + params["query.cond"] = disease_area.replace(" ", "+") if lead_sponsor_name: - lead_sponsor_name = lead_sponsor_name.replace(" ", "+") + params["query.lead"] = lead_sponsor_name.replace(" ", "+") if location_country: - location_country = location_country.replace(" ", "+") + params["query.locn"] = location_country.replace(" ", "+") + if overall_status: + params["filter.overallStatus"] = overall_status.upper() - if disease_area and lead_sponsor_name: - # params["expr"] = f"{disease_area}+AND+{overall_status}+AND+{lead_sponsor_name}" - params["expr"] = f"{disease_area}+AND+{lead_sponsor_name}" + print(f"Full parameters: {params}") - elif disease_area: - # params["expr"] = f"{disease_area}+AND+{overall_status}" - params["expr"] = f"{disease_area}" - - elif lead_sponsor_name: - # params["expr"] = f"{lead_sponsor_name}+AND+{overall_status}" - params["expr"] = f"{lead_sponsor_name}" - - - ### to ensure it starts from 1 to 1000 and increment - all_trials = [] - max_trials_per_request = 1000 + all_studies = [] + next_page_token = None async with aiohttp.ClientSession() as session: - tasks = [] - for i in range(1, blocks + 1): # Change the range to start from 1 - min_rank = (i - 1) * max_trials_per_request + 1 # Subtract 1 from i to get the correct min_rank - # print( min_rank ) - max_rank = i * max_trials_per_request # Simplify the max_rank calculation - # print( max_rank ) - params_copy = params.copy() - params_copy["min_rnk"] = min_rank - params_copy["max_rnk"] = max_rank - task = fetch(session, base_url, params_copy) - tasks.append(task) - responses = await asyncio.gather(*tasks) - # Create a list to store the DataFrames - trials_dfs = [] - - # Fetch data for each block and store it in a separate DataFrame - for i, response in enumerate(responses): - skip_rows = 10 if not NCTId else 9 - if len(response.strip().splitlines()) > 1: - interim_df = pd.read_csv(io.StringIO(response), skiprows=skip_rows) - else: - interim_df = pd.DataFrame() - - # Print the number of records in the current DataFrame - print(f"Number of records in interim_df{i + 1}: {len(interim_df)}") # renamed to interim_df - - # Add the current DataFrame to the list - trials_dfs.append(interim_df) # renamed to interim_df - - # Concatenate all the DataFrames - trials_final_df = pd.concat(trials_dfs, ignore_index=True) - - # Print the number of records in the final DataFrame - print(f"Number of records returned from all the Block Request: {len(trials_final_df)}") - - recruiting_trials = trials_final_df + while True: + try: + if next_page_token: + params["pageToken"] = next_page_token - recruiting_trials_list = [] + response_data = await fetch(session, base_url, params) - ############################# - if NCTId: + if not response_data or not isinstance(response_data, dict): + print(f"Invalid response data") + break + + studies = response_data.get('studies', []) + if not studies: + print("No more studies found") + break + all_studies.extend(studies) + print(f"Retrieved {len(studies)} studies. Total so far: {len(all_studies)}") + # Print first study details for debugging + if len(all_studies) > 0: + first_study = all_studies[0] + print("\nFirst study example:") + print(f"NCT ID: {_get_nested_value(first_study, ['protocolSection', 'identificationModule', 'nctId'])}") + print(f"Status: {_get_nested_value(first_study, ['protocolSection', 'statusModule', 'overallStatus'])}") -############################################## - for index, row in recruiting_trials.iterrows(): - # print(f"Checking row {index}: OverallStatus={row['OverallStatus']}, provided overall_status={overall_status}") - #if not NCTId or (NCTId and row['OverallStatus'] == overall_status): - # if row['OverallStatus'] == overall_status: - trial_info = {'NCTId': row['NCTId'], - 'Phase': row['Phase'], - 'OrgStudyId': row['OrgStudyId'], - 'Status': row['OverallStatus'], - 'Condition': row['Condition'], - 'CompletionDate': row['PrimaryCompletionDate'], - 'EnrollmentCount': row['EnrollmentCount'], - 'StudyType': row['StudyType'], - 'Arm': row['ArmGroupLabel'], - 'Drug': row['InterventionName'], - 'Country': row['LocationCountry'], - 'City': row['LocationCity'], - 'Site': row['LocationFacility'], - 'StudyPopulation': row['StudyPopulation'], - 'Sponsor': row['LeadSponsorName'], - 'Collaborator': row['CollaboratorName'], - 'StartDate': row['StartDate'], - 'PrimaryMeasure': row['PrimaryOutcomeMeasure'], - 'Purpose': row['DesignPrimaryPurpose'], - 'BriefTitle': row['BriefTitle']} - # Print the overall_status and the length of recruiting_trials_list - #print(f"Overall status: {overall_status}") - #print(f"Number of trials with status '{overall_status}': {len(recruiting_trials_list)}") - recruiting_trials_list.append(trial_info) + next_page_token = response_data.get('nextPageToken') + if not next_page_token or (max_records and len(all_studies) >= max_records): + break + except Exception as e: + print(f"Error processing page: {str(e)}") + break - else: - ############################################## - for index, row in recruiting_trials.iterrows(): - # print(f"Checking row {index}: OverallStatus={row['OverallStatus']}, provided overall_status={overall_status}") - #if not NCTId or (NCTId and row['OverallStatus'] == overall_status): - if row['OverallStatus'] == overall_status: - trial_info = {'NCTId': row['NCTId'], - 'Phase': row['Phase'], - 'OrgStudyId': row['OrgStudyId'], - 'Status': row['OverallStatus'], - 'Condition': row['Condition'], - 'CompletionDate': row['PrimaryCompletionDate'], - 'EnrollmentCount': row['EnrollmentCount'], - 'StudyType': row['StudyType'], - 'Arm': row['ArmGroupLabel'], - 'Drug': row['InterventionName'], - 'Country': row['LocationCountry'], - 'City': row['LocationCity'], - 'Site': row['LocationFacility'], - 'StudyPopulation': row['StudyPopulation'], - 'Sponsor': row['LeadSponsorName'], - 'Collaborator': row['CollaboratorName'], - 'StartDate': row['StartDate'], - 'PrimaryMeasure': row['PrimaryOutcomeMeasure'], - 'Purpose': row['DesignPrimaryPurpose'], - 'BriefTitle': row['BriefTitle']} - # Print the overall_status and the length of recruiting_trials_list - #print(f"Overall status: {overall_status}") - #print(f"Number of trials with status '{overall_status}': {len(recruiting_trials_list)}") - recruiting_trials_list.append(trial_info) + # Convert all studies to the required format + recruiting_trials_list = [] + for study in all_studies: + try: + # Status filtering is now handled by the API, so we don't need to filter here + trial_info = { + 'NCTId': _get_nested_value(study, ['protocolSection', 'identificationModule', 'nctId']), + 'Phase': _get_first_item(study, ['protocolSection', 'designModule', 'phases']), + 'OrgStudyId': _get_nested_value(study, ['protocolSection', 'identificationModule', 'orgStudyIdInfo', 'id']), + 'Status': _get_nested_value(study, ['protocolSection', 'statusModule', 'overallStatus']), + 'Condition': '|'.join(_get_nested_value(study, ['protocolSection', 'conditionsModule', 'conditions'], [])), + 'CompletionDate': _get_nested_value(study, ['protocolSection', 'statusModule', 'primaryCompletionDateStruct', 'date']), + 'EnrollmentCount': _get_nested_value(study, ['protocolSection', 'designModule', 'enrollmentInfo', 'count']), + 'StudyType': _get_nested_value(study, ['protocolSection', 'designModule', 'studyType']), + 'Arm': _get_first_item(study, ['protocolSection', 'armsInterventionsModule', 'armGroups'], 'label'), + 'Drug': _get_first_item(study, ['protocolSection', 'armsInterventionsModule', 'interventions'], 'name'), + 'Country': _get_location_info(study, 'country'), + 'City': _get_location_info(study, 'city'), + 'Site': _get_location_info(study, 'facility'), + 'StudyPopulation': _get_nested_value(study, ['protocolSection', 'eligibilityModule', 'studyPopulation']), + 'Sponsor': _get_nested_value(study, ['protocolSection', 'sponsorCollaboratorsModule', 'leadSponsor', 'name']), + 'Collaborator': _get_collaborators(study), + 'StartDate': _get_nested_value(study, ['protocolSection', 'statusModule', 'startDateStruct', 'date']), + 'PrimaryMeasure': _get_first_item(study, ['protocolSection', 'outcomesModule', 'primaryOutcomes'], 'measure'), + 'Purpose': _get_nested_value(study, ['protocolSection', 'designModule', 'designInfo', 'primaryPurpose']), + 'BriefTitle': _get_nested_value(study, ['protocolSection', 'identificationModule', 'briefTitle']) + } + recruiting_trials_list.append(trial_info) + + except Exception as e: + print(f"Error processing study: {str(e)}") + continue + + print(f"Total studies processed: {len(recruiting_trials_list)}") return recruiting_trials_list -########################################################################################################################################################## +# Helper functions remain the same +def _get_nested_value(obj, path, default=None): + try: + current = obj + for key in path: + if current is None: + return default + current = current.get(key) + return current if current is not None else default + except (KeyError, TypeError, AttributeError): + return default + +def _get_first_item(obj, path, field=None): + try: + items = _get_nested_value(obj, path, []) + if items and isinstance(items, list): + if field: + return items[0].get(field) + return items[0] + return None + except (IndexError, AttributeError): + return None -######################################################################################################### -## API For Inclusions +def _get_location_info(study, info_type): + try: + locations = _get_nested_value(study, ['protocolSection', 'contactsLocationsModule', 'locations'], []) + if info_type == 'facility': + values = [loc.get('facility', '') for loc in locations if loc.get('facility')] + else: + values = [loc.get(info_type, '') for loc in locations if loc.get(info_type)] + return '|'.join(filter(None, values)) + except Exception: + return None + +def _get_collaborators(study): + try: + collaborators = _get_nested_value(study, ['protocolSection', 'sponsorCollaboratorsModule', 'collaborators'], []) + return '|'.join(collab.get('name', '') for collab in collaborators if collab.get('name')) + except Exception: + return None +########### Clinical Trials. gov API for study fileds with Recruiting Trials Only ################################### +################# FROM CLAUDE API FOR ELIGIBILITY############################### import requests import re def get_formatted_inclusion_criteria(nct_id): - base_url = "https://clinicaltrials.gov/api/query/full_studies?expr=" - study_fields = "&fields=EligibilityCriteria" - fmt = "&fmt=json" - - # Construct the API URL - api_url = f"{base_url}{nct_id}{study_fields}{fmt}" - - # Send the API request and parse the JSON response - response = requests.get(api_url) - data = response.json() + """ + Get and format inclusion criteria for a clinical trial using ClinicalTrials.gov API v2 + + Args: + nct_id (str): The NCT ID of the trial + + Returns: + str: Formatted inclusion criteria as a numbered list, or None if not found + """ + # V2 API endpoint + base_url = "https://clinicaltrials.gov/api/v2/studies" + + # Parameters for the API request + params = { + "format": "json", + "fields": "protocolSection.eligibilityModule.eligibilityCriteria", + "query.id": nct_id + } - # Extract the inclusion criteria text try: - eligibility_criteria = data['FullStudiesResponse']['FullStudies'][0]['Study']['ProtocolSection']['EligibilityModule']['EligibilityCriteria'] - - #inclusion_criteria = re.split(r'\b(?:Exclusion Criteria:|exclusion criteria)\b', eligibility_criteria, flags=re.IGNORECASE)[0].strip() - #inclusion_criteria = re.split(r'\b(?:Exclusion Criteria)\b', eligibility_criteria, flags=re.IGNORECASE)[0].strip() - inclusion_criteria = re.split(r'\b(?:Exclusion Criteria)\b', eligibility_criteria)[0].strip() - - - - # Split the inclusion criteria into a list - inclusions = re.split('\n+', inclusion_criteria) - - # Remove "Inclusion criteria" text if it's present in the list - inclusions = [inclusion for inclusion in inclusions if not re.search(r'\bInclusion\s*Criteria\b', inclusion, flags=re.IGNORECASE)] + # Send the API request + response = requests.get(base_url, params=params) + response.raise_for_status() # Raise an exception for bad status codes + data = response.json() + + # Extract the eligibility criteria text from the v2 API response + if not data.get('studies') or len(data['studies']) == 0: + print(f"No data found for Trial NCT ID: {nct_id}") + return None + + eligibility_criteria = data['studies'][0]['protocolSection']['eligibilityModule']['eligibilityCriteria'] + + # Split at "Exclusion Criteria" to get only inclusion criteria + # Using a more robust splitting approach + inclusion_criteria = re.split(r'\b(?:Exclusion\s+Criteria:?)\b', eligibility_criteria, flags=re.IGNORECASE)[0].strip() + + # Split the inclusion criteria into a list by line breaks + # Handle different types of line breaks + inclusions = re.split(r'\r?\n+', inclusion_criteria) + + # Clean up the inclusions: + # 1. Remove "Inclusion criteria" header + # 2. Remove empty lines + # 3. Remove lines that are just whitespace or punctuation + cleaned_inclusions = [] + for inclusion in inclusions: + inclusion = inclusion.strip() + if (inclusion and + not re.search(r'^\s*inclusion\s+criteria:?\s*$', inclusion, flags=re.IGNORECASE) and + not re.search(r'^\s*[-•*]\s*$', inclusion)): + + # Remove bullet points and dashes at the start of lines + inclusion = re.sub(r'^\s*[-•*]\s*', '', inclusion) + + # Add to cleaned list if not empty after cleanup + if inclusion: + cleaned_inclusions.append(inclusion) # Format the list as a numbered list with periods - formatted_inclusions = [f"{i+1}. {inclusion.strip()}." for i, inclusion in enumerate(inclusions)] + formatted_inclusions = [] + for i, inclusion in enumerate(cleaned_inclusions, 1): + # Ensure the line ends with a period + if not inclusion.endswith('.'): + inclusion = inclusion + '.' + formatted_inclusions.append(f"{i}. {inclusion}") # Join the list into a single string return "\n".join(formatted_inclusions) - except (IndexError, KeyError): - print(f"Inclusion criteria not found for Trial NCT ID: {nct_id}") + except requests.exceptions.RequestException as e: + print(f"Error fetching data for Trial NCT ID {nct_id}: {str(e)}") + return None + except (IndexError, KeyError) as e: + print(f"Error processing data for Trial NCT ID {nct_id}: {str(e)}") + return None + except Exception as e: + print(f"Unexpected error for Trial NCT ID {nct_id}: {str(e)}") return None -## ############################API For Exclusions################################################################################################################################################### - -def get_formatted_exclusion_criteria(nct_id): - base_url = "https://clinicaltrials.gov/api/query/full_studies?expr=" - study_fields = "&fields=EligibilityCriteria" - fmt = "&fmt=json" - - # Construct the API URL - api_url = f"{base_url}{nct_id}{study_fields}{fmt}" - - # Send the API request and parse the JSON response - response = requests.get(api_url) - data = response.json() - # Extract the exclusion criteria text - try: - eligibility_criteria = data['FullStudiesResponse']['FullStudies'][0]['Study']['ProtocolSection']['EligibilityModule']['EligibilityCriteria'] - #exclusion_criteria = re.split("(?i)(?:^|\n)exclusion criteria", eligibility_criteria)[-1].strip() - #exclusion_criteria = re.split(r'\b(?:Exclusion Criteria|exclusion criteria)\b', eligibility_criteria, flags=re.IGNORECASE)[1].strip() - #exclusion_criteria = re.split(r'\b(?:Exclusion Criteria)\b', eligibility_criteria, flags=re.IGNORECASE)[1].strip() - exclusion_criteria = re.split(r'\b(?:Exclusion Criteria)\b', eligibility_criteria)[1].strip() +######################################################################################################### - # Split the exclusion criteria into a list - exclusions = re.split('\n+', exclusion_criteria) +## ############################API For Exclusions################################################################################################################################################### +def get_formatted_exclusion_criteria(nct_id): + """ + Get and format exclusion criteria for a clinical trial using ClinicalTrials.gov API v2 + + Args: + nct_id (str): The NCT ID of the trial + + Returns: + str: Formatted exclusion criteria as a numbered list, or None if not found + """ + # V2 API endpoint + base_url = "https://clinicaltrials.gov/api/v2/studies" + + # Parameters for the API request + params = { + "format": "json", + "fields": "protocolSection.eligibilityModule.eligibilityCriteria", + "query.id": nct_id + } - # Remove "Exclusion criteria" text if it's present in the list - exclusions = [exclusion for exclusion in exclusions if not re.search(r'\bExclusion\s*Criteria\b', exclusion, flags=re.IGNORECASE)] + try: + # Send the API request + response = requests.get(base_url, params=params) + response.raise_for_status() # Raise an exception for bad status codes + data = response.json() + + # Extract the eligibility criteria text from the v2 API response + if not data.get('studies') or len(data['studies']) == 0: + print(f"No data found for Trial NCT ID: {nct_id}") + return None + + eligibility_criteria = data['studies'][0]['protocolSection']['eligibilityModule']['eligibilityCriteria'] + + # Split at "Exclusion Criteria" to get only exclusion criteria + try: + exclusion_criteria = re.split(r'\b(?:Exclusion\s+Criteria:?)\b', eligibility_criteria, flags=re.IGNORECASE)[1].strip() + except IndexError: + # Try alternative patterns if the first one doesn't work + try: + exclusion_criteria = re.split(r'(?i)(?:^|\n)\s*exclusion criteria\s*[:|-]?', eligibility_criteria)[1].strip() + except IndexError: + print(f"Could not find exclusion criteria section for Trial NCT ID: {nct_id}") + return None + + # Split the exclusion criteria into a list by line breaks + # Handle different types of line breaks + exclusions = re.split(r'\r?\n+', exclusion_criteria) + + # Clean up the exclusions: + # 1. Remove empty lines + # 2. Remove lines that are just whitespace or punctuation + # 3. Clean up formatting + cleaned_exclusions = [] + for exclusion in exclusions: + exclusion = exclusion.strip() + if (exclusion and + not re.search(r'^\s*$', exclusion) and # Skip empty lines + not re.search(r'^\s*[-•*]\s*$', exclusion)): # Skip lines with just bullets + + # Remove bullet points and dashes at the start of lines + exclusion = re.sub(r'^\s*[-•*]\s*', '', exclusion) + + # Add to cleaned list if not empty after cleanup + if exclusion: + cleaned_exclusions.append(exclusion) # Format the list as a numbered list with periods - formatted_exclusions = [f"{i+1}. {exclusion.strip()}." for i, exclusion in enumerate(exclusions)] + formatted_exclusions = [] + for i, exclusion in enumerate(cleaned_exclusions, 1): + # Ensure the line ends with a period + if not exclusion.endswith('.'): + exclusion = exclusion + '.' + formatted_exclusions.append(f"{i}. {exclusion}") # Join the list into a single string return "\n".join(formatted_exclusions) - except (IndexError, KeyError): - print(f"Exclusion criteria not found for NCT ID: {nct_id}") + except requests.exceptions.RequestException as e: + print(f"Error fetching data for Trial NCT ID {nct_id}: {str(e)}") return None + except (IndexError, KeyError) as e: + print(f"Error processing data for Trial NCT ID {nct_id}: {str(e)}") + return None + except Exception as e: + print(f"Unexpected error for Trial NCT ID {nct_id}: {str(e)}") + return None + + + ################################# Apply CSS Style to HTML Table ############################################################################################################## @@ -419,9 +547,9 @@ def split_columns(df, columns_to_split): return temp_df -################## Interventional, Observational Trials Lead Sponsor Counts################################################## +################## INTERVENTIONAL, OBSERVATIONAL Trials Lead Sponsor Counts################################################## def calculate_summary_stats(df, sponsor): - study_types = ["Interventional", "Observational"] + study_types = ["INTERVENTIONAL", "OBSERVATIONAL"] summary_stats = [] sponsor_name = sponsor if sponsor else "All Lead Sponsors" @@ -453,7 +581,7 @@ def calculate_summary_stats(df, sponsor): ############################################################################################################################################ def calculate_summary_stats_collb(df, sponsor): - study_types = ["Interventional", "Observational"] + study_types = ["INTERVENTIONAL", "OBSERVATIONAL"] summary_stats = [] sponsor_name = sponsor if sponsor else "All Collaborators" @@ -500,32 +628,32 @@ def calculate_summary_stats_sites(df, sponsor, country): grouped_df['EnrollmentCount'] = pd.to_numeric(grouped_df['EnrollmentCount'], errors='coerce') # Count the number of unique NCTIds for each StudyType - interventional_count = len(grouped_df[grouped_df['StudyType'] == 'Interventional']['NCTId'].unique()) - observational_count = len(grouped_df[grouped_df['StudyType'] == 'Observational']['NCTId'].unique()) + INTERVENTIONAL_count = len(grouped_df[grouped_df['StudyType'] == 'INTERVENTIONAL']['NCTId'].unique()) + OBSERVATIONAL_count = len(grouped_df[grouped_df['StudyType'] == 'OBSERVATIONAL']['NCTId'].unique()) # Count the number of unique countries for each StudyType - interventional_countries = df[df['StudyType'] == 'Interventional']['Country'].nunique() - observational_countries = df[df['StudyType'] == 'Observational']['Country'].nunique() + INTERVENTIONAL_countries = df[df['StudyType'] == 'INTERVENTIONAL']['Country'].nunique() + OBSERVATIONAL_countries = df[df['StudyType'] == 'OBSERVATIONAL']['Country'].nunique() # Count the number of unique sites for each StudyType, grouped by Country, City, and Site - interventional_grouped = df[df['StudyType'] == 'Interventional'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0] - observational_grouped = df[df['StudyType'] == 'Observational'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0] + INTERVENTIONAL_grouped = df[df['StudyType'] == 'INTERVENTIONAL'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0] + OBSERVATIONAL_grouped = df[df['StudyType'] == 'OBSERVATIONAL'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0] # Calculate the sum of enrollment counts for each StudyType - interventional_patients = int(grouped_df[grouped_df['StudyType'] == 'Interventional']['EnrollmentCount'].sum()) - observational_patients = int(grouped_df[grouped_df['StudyType'] == 'Observational']['EnrollmentCount'].sum()) + INTERVENTIONAL_patients = int(grouped_df[grouped_df['StudyType'] == 'INTERVENTIONAL']['EnrollmentCount'].sum()) + OBSERVATIONAL_patients = int(grouped_df[grouped_df['StudyType'] == 'OBSERVATIONAL']['EnrollmentCount'].sum()) - formatted_interventional_patients = format(interventional_patients, ',') - formatted_observational_patients = format(observational_patients, ',') + formatted_INTERVENTIONAL_patients = format(INTERVENTIONAL_patients, ',') + formatted_OBSERVATIONAL_patients = format(OBSERVATIONAL_patients, ',') sponsor_name = sponsor if sponsor else "All Sponsors" country_name = country if country else "All Countries" - return f"{sponsor_name}
{interventional_count} Interventional Trials, in {interventional_countries} Country, at {interventional_grouped} Sites, \ - Recruiting: {formatted_interventional_patients} Planned Patients.
\ - {observational_count} Observational Trials, in {observational_countries} Country, at {observational_grouped} Sites" + return f"{sponsor_name}
{INTERVENTIONAL_count} INTERVENTIONAL Trials, in {INTERVENTIONAL_countries} Country, at {INTERVENTIONAL_grouped} Sites, \ + Recruiting: {formatted_INTERVENTIONAL_patients} Planned Patients.
\ + {OBSERVATIONAL_count} OBSERVATIONAL Trials, in {OBSERVATIONAL_countries} Country, at {OBSERVATIONAL_grouped} Sites" - #{observational_count} Observational Trials, in {observational_countries} Country, at {observational_grouped} Sites, Recruiting: {formatted_observational_patients} Planned Patients." + #{OBSERVATIONAL_count} OBSERVATIONAL Trials, in {OBSERVATIONAL_countries} Country, at {OBSERVATIONAL_grouped} Sites, Recruiting: {formatted_OBSERVATIONAL_patients} Planned Patients." ################################################ GRADIO STARTS HERE ######################################################### @@ -951,8 +1079,8 @@ import numpy as np def plot_condition_sunburst (df): - # Filter the dataframe for 'StudyType' equal to "Interventional" - df = df[df['StudyType'] == "Interventional"] + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] # Convert 'Condition' names to uppercase df['Condition'] = df['Condition'].str.upper() @@ -998,8 +1126,8 @@ def plot_condition_sunburst (df): ############################################################ Conditions OTHERS ########### ############################################ def plot_condition_others (df): - # Filter the dataframe for 'StudyType' equal to "Interventional" - df = df[df['StudyType'] == "Interventional"] + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] # Convert 'Condition' names to uppercase df['Condition'] = df['Condition'].str.upper() @@ -1081,8 +1209,8 @@ def wrap_text(text, max_chars_per_line): def plot_sponsor_collaborator_tree_map(df): - # Filter the dataframe for 'StudyType' equal to "Interventional" - df = df[df['StudyType'] == "Interventional"] + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] # Group the data by 'Sponsor' and 'Collaborator' and count the number of unique NCTId df_count = df.groupby(['Sponsor', 'Collaborator'])['NCTId'].nunique().reset_index() @@ -1124,8 +1252,8 @@ def plot_sponsor_collaborator_tree_map(df): def plot_sponsor_tree(df): - # Filter the dataframe for 'StudyType' equal to "Interventional" - df = df[df['StudyType'] == "Interventional"] + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] df['Phase'] = df['Phase'].fillna('UNKNOWN') @@ -1168,9 +1296,9 @@ def plot_sponsor_tree(df): icicle_fig.update_layout( title='Sponsor', font=dict(family="Arial", size=14, color='black'), - width= 400, + width= 600, height=1000 - # autosize=True, + # autosize=True # margin=dict(t=50, l=25, r=25, b=25) ) @@ -1182,8 +1310,8 @@ def plot_sponsor_tree(df): def plot_collaborator_icicle(df): - # Filter the dataframe for 'StudyType' equal to "Interventional" - df = df[df['StudyType'] == "Interventional"] + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] df['Phase'] = df['Phase'].fillna('UNKNOWN') @@ -1221,8 +1349,9 @@ def plot_collaborator_icicle(df): icicle_fig.update_layout( title='Collaborators', font=dict(family="Arial", size=14, color='black'), - width= 400, + width= 600, height=1000 + #autosize=True ) @@ -1244,15 +1373,15 @@ def random_color(): return f'rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})' ############################################################################################################## def plot_drug_sankey(df): - # Filter the dataframe for 'StudyType' equal to "Interventional" - df = df[df['StudyType'] == "Interventional"] + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] # Fill missing values in the 'Phase' column with a placeholder string df['Phase'] = df['Phase'].fillna('UNKNOWN') # Sort by Phase df = df.sort_values(by='Phase') - + #print(df) # Split the conditions df = split_conditions(df, 'Condition') @@ -1356,7 +1485,7 @@ def random_color(): def plot_condition_treemap_nct_old(df): - df = df[df['StudyType'] == "Interventional"] + df = df[df['StudyType'] == "INTERVENTIONAL"] df['Phase'] = df['Phase'].fillna('UNKNOWN') df = df.sort_values(by='Phase') df = split_conditions(df, 'Condition') @@ -1426,71 +1555,142 @@ def plot_condition_treemap_nct_old(df): fig.update_layout(title_text="Conditions, Trial IDs, Study IDs, Phases for Sponsor", font_size=10, height=height, autosize=True) return fig +######################################### Conditions############################### + +##################################################################################### + +import plotly.graph_objects as go + +def plot_condition_treemap_nct_old(df): + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + #print("tamer") + df = df[df['StudyType'] == "INTERVENTIONAL"] + #print(df) + # Fill missing values in the 'Phase' column with a placeholder string + df['Phase'] = df['Phase'].fillna('UNKNOWN') + + # Map NCTId to its Title + # Create a dictionary to map NCTId to BriefTitle + nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict() + # Create a dictionary to map NCTId to OrgStudyId + nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict() + + # Create a new dataframe with the required columns + table_df = df[['Condition', 'NCTId', 'Phase']] + + # Add the BriefTitle and OrgStudyId columns + table_df['BriefTitle'] = table_df['NCTId'].map(nctid_to_brieftitle) + table_df['OrgStudyId'] = table_df['NCTId'].map(nctid_to_orgstudyid) + print(table_df) + # Sort the dataframe by Condition alphabetically + table_df = table_df.sort_values('Condition') + + + # Create a Plotly Table + fig = go.Figure(data=[go.Table( + header=dict( + values=['Condition', 'NCTId', 'OrgStudyId', 'BriefTitle', 'Phase'], + fill_color='paleturquoise', + align='left', + font=dict(size=16, color='black') + ), + cells=dict( + values=[table_df.Condition, table_df.NCTId, table_df.OrgStudyId, table_df.BriefTitle, table_df.Phase], + align='left', + font=dict(size=14, color='black') + ) + )]) + + fig.update_layout( + autosize=True, + height=1000, + title_text="Conditions with NCTIds and Phases", + title_x=0.5, + font=dict(size=18) + ) + + return fig -####################################### -def plot_condition_treemap_nct (df): - # Filter the dataframe for 'StudyType' equal to "Interventional" - df = df[df['StudyType'] == "Interventional"] + + + +#################################################################################### +def plot_condition_treemap_nct(df): + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] # Fill missing values in the 'Phase' column with a placeholder string df['Phase'] = df['Phase'].fillna('UNKNOWN') - # Sort by Phase - df = df.sort_values(by='Phase') - # Map NCTId to its Title # Create a dictionary to map NCTId to BriefTitle nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict() - # Create a dictionary to map NCTId to BriefTitle + # Create a dictionary to map NCTId to OrgStudyId nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict() - icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents']) + # Create separate dataframes for each level + condition_df = pd.DataFrame(columns=['ids', 'labels', 'parents']) + nctid_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle']) + phase_df = pd.DataFrame(columns=['ids', 'labels', 'parents']) - # Add the "Trials" root node - icicle_df = pd.concat([icicle_df, pd.DataFrame({ - 'ids': ["Trials"], - 'labels': ["Trials"], + # Add the "Conditions" root node + condition_df = pd.concat([condition_df, pd.DataFrame({ + 'ids': ["Conditions"], + 'labels': ["Conditions"], 'parents': [""] })], ignore_index=True) # Add the Condition level - icicle_df = pd.concat([icicle_df, pd.DataFrame({ - 'ids': df['Condition'].unique(), - 'labels': df['Condition'].unique(), - 'parents': ["Trials"] * len(df['Condition'].unique()) - })], ignore_index=True) - - # Add the Phase level - for condition in df['Condition'].unique(): - temp_df = df[df['Condition'] == condition] - phases = temp_df['Phase'].unique() - icicle_df = pd.concat([icicle_df, pd.DataFrame({ - 'ids': [f"{condition}-{phase}" for phase in phases], - 'labels': phases, - 'parents': [condition] * len(phases) + conditions = df['Condition'].unique() + for condition in conditions: + condition_df = pd.concat([condition_df, pd.DataFrame({ + 'ids': [condition], + 'labels': [condition], + 'parents': ["Conditions"] })], ignore_index=True) # Add the NCTId level + for condition in conditions: + temp_df = df[df['Condition'] == condition] + nctids = temp_df['NCTId'].unique() + for nctid in nctids: + nctid_df = pd.concat([nctid_df, pd.DataFrame({ + 'ids': [f"{condition}-{nctid}"], + 'labels': [f"{nctid} ({nctid_to_orgstudyid[nctid]})"], + 'parents': [condition], + 'brieftitle': [nctid_to_brieftitle[nctid]] + })], ignore_index=True) + + # Sort the Conditions alphabetically from A to Z + condition_df = condition_df.sort_values('labels') + nctid_df['parents'] = pd.Categorical(nctid_df['parents'], categories=condition_df['ids'], ordered=True) + nctid_df = nctid_df.sort_values('parents') + + # Add the Phase level for _, row in df.iterrows(): - icicle_df = pd.concat([icicle_df, pd.DataFrame({ - 'ids': [row['NCTId']], - 'labels': [f"{row['NCTId']} ({nctid_to_orgstudyid[row['NCTId']]}) ({nctid_to_brieftitle[row['NCTId']]})"], - 'parents': [f"{row['Condition']}-{row['Phase']}"] + phase_df = pd.concat([phase_df, pd.DataFrame({ + 'ids': [f"{row['Condition']}-{row['NCTId']}-{row['Phase']}"], + 'labels': [row['Phase']], + 'parents': [f"{row['Condition']}-{row['NCTId']}"] })], ignore_index=True) + # Concatenate the dataframes in the desired order + icicle_df = pd.concat([condition_df, nctid_df, phase_df], ignore_index=True) + fig = go.Figure(go.Icicle( ids=icicle_df.ids, labels=icicle_df.labels, parents=icicle_df.parents, root_color="lightgrey", textfont=dict(size=34, family="Arial"), - hovertemplate="NCTId: %{id}
OrgStudyId: %{customdata[0]}
BriefTitle: %{customdata[1]}", - customdata=list(zip(icicle_df.ids.map(nctid_to_orgstudyid).fillna(''), icicle_df.ids.map(nctid_to_brieftitle).fillna(''))) + hovertext=icicle_df['brieftitle'], + hoverinfo='text', + hoverlabel=dict(namelength=-1) )) fig.update_layout(autosize=True, height=1000) @@ -1498,7 +1698,9 @@ def plot_condition_treemap_nct (df): return fig -####################################### +############################################################ + + ########################################################################################################################### import re @@ -1610,7 +1812,7 @@ import numpy as np ################################################################### COUNTRY PLOTS ################################################################ def plot_trial_country_map(df): - df = df[df['StudyType'] == "Interventional"] + df = df[df['StudyType'] == "INTERVENTIONAL"] df['Phase'] = df['Phase'].fillna('UNKNOWN') df = df.sort_values(by='Phase') @@ -1696,7 +1898,7 @@ def plot_trial_sites(df): return text[:nearest_space] + '
' + insert_line_break(text[nearest_space:].strip(), max_length) - df = df[df['StudyType'] == "Interventional"] + df = df[df['StudyType'] == "INTERVENTIONAL"] df['Phase'] = df['Phase'].fillna('UNKNOWN') df = df.sort_values(by='Phase') @@ -1772,7 +1974,7 @@ def plot_trial_site_map(df): return text[:nearest_space] + '
' + insert_line_break(text[nearest_space:].strip(), max_length) - df = df[df['StudyType'] == "Interventional"] + df = df[df['StudyType'] == "INTERVENTIONAL"] df['Phase'] = df['Phase'].fillna('UNKNOWN') df = df.sort_values(by='Phase') @@ -1840,8 +2042,8 @@ def plot_trial_bubblemap(df): scatter_plot_end_traces = [] scatter_plot_lines = [] - # Filter the dataframe for 'StudyType' equal to "Interventional" - df = df[df['StudyType'] == "Interventional"] + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] # Fill missing values in the 'Phase' column with a placeholder string df['Phase'] = df['Phase'].fillna('UNKNOWN') @@ -1992,8 +2194,8 @@ def plot_trial_bubblemap_comp(df): scatter_plot_end_traces = [] scatter_plot_lines = [] - # Filter the dataframe for 'StudyType' equal to "Interventional" - df = df[df['StudyType'] == "Interventional"] + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] # Fill missing values in the 'Phase' column with a placeholder string df['Phase'] = df['Phase'].fillna('UNKNOWN') @@ -2207,26 +2409,29 @@ def select_disease(disease_input, disease_input_text): #summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drug -async def disease_view (condition, condition_text, sponsor_input, academia_input): +#async def disease_view (condition, condition_text, sponsor_input, academia_input): +async def disease_view (condition, sponsor_input): # condition = condition.strip() # Remove leading and trailing spaces - sponsor = select_sponsor(sponsor_input, academia_input) - condition = select_disease(condition, condition_text) + #sponsor = select_sponsor(sponsor_input, academia_input) + #condition = select_disease(condition, condition_text) + sponsor = sponsor_input + condition = condition ################# ### List data type errors in type conversion to string needed for regualr expression sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor #print(type(sponsor)) condition = ' '.join(condition) if isinstance(condition, list) else condition #print(type(condition)) -############################CHECK Sponsor and Condition Inputs ######################################################### +############################CHECK Sponsor and Condition Inputs ######################################################### import re ################ # def check_input(condition, sponsor): allowed_chars = r'^[A-Za-z .,&/()-]*$' - + if condition is not None and isinstance(condition, str): if len(condition) > 50 or not re.match(allowed_chars, condition): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None,None, None, None, None, None - + if sponsor is not None and isinstance(sponsor, str): if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None,None, None, None, None, None @@ -2269,9 +2474,9 @@ async def disease_view (condition, condition_text, sponsor_input, academia_input # Display the DataFrame # evaluate if need to change to collaborator other than top 20 ???? - condition_other = plot_condition_others(df) + # condition_other = plot_condition_others(df) #### Sponsor Only - condition_sunburst = plot_condition_sunburst(df) + # condition_sunburst = plot_condition_sunburst(df) ################################################################################ sponsor_tree = plot_sponsor_tree(df) @@ -2281,7 +2486,9 @@ async def disease_view (condition, condition_text, sponsor_input, academia_input if not df2.empty: collaborator_tree = plot_collaborator_icicle(df2) - return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb, condition_other, condition_sunburst ,sponsor_tree, collaborator_tree + return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb,sponsor_tree, collaborator_tree + + # return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb, condition_other, condition_sunburst ,sponsor_tree, collaborator_tree ##################### Assets ################################################################################### @@ -2308,23 +2515,23 @@ async def drug_view(condition, condition_type, s_sponsor_input, s_academia_input #print(type(sponsor)) condition = ' '.join(condition) if isinstance(condition, list) else condition #print(type(condition)) -############################CHECK Sponsor and Condition Inputs ######################################################### +############################CHECK Sponsor and Condition Inputs ######################################################### import re ################ # def check_input(condition, sponsor): allowed_chars = r'^[A-Za-z .,&/()-]*$' - + if condition is not None and isinstance(condition, str): if len(condition) > 50 or not re.match(allowed_chars, condition): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None - + if sponsor is not None and isinstance(sponsor, str): if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None -################################################################################################ - - +################################################################################################ + + status = "Recruiting" # Call gradio_wrapper_nct with appropriate arguments if condition and sponsor: @@ -2367,34 +2574,37 @@ def select_condition_phc(s_disease_input_phc, s_disease_input_type_phc): return s_disease_input_phc +#async def disease_view_phc(condition, condition_type, s_sponsor_input, s_academia_input): +async def disease_view_phc(condition, s_sponsor_input): + #sponsor = select_sponsor_phc(s_sponsor_input, s_academia_input ) + # condition = select_condition_phc(condition, condition_type) + sponsor = s_sponsor_input + condition = condition + -async def disease_view_phc(condition, condition_type, s_sponsor_input, s_academia_input): - sponsor = select_sponsor_phc(s_sponsor_input, s_academia_input ) - condition = select_condition_phc(condition, condition_type) - ################# ### List data type errors in type conversion to string needed for regualr expression sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor #print(type(sponsor)) condition = ' '.join(condition) if isinstance(condition, list) else condition #print(type(condition)) -############################CHECK Sponsor and Condition Inputs ######################################################### +############################CHECK Sponsor and Condition Inputs ######################################################### import re ################ # def check_input(condition, sponsor): allowed_chars = r'^[A-Za-z .,&/()-]*$' - + if condition is not None and isinstance(condition, str): if len(condition) > 50 or not re.match(allowed_chars, condition): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None - + if sponsor is not None and isinstance(sponsor, str): if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None -################################################################################################ - - - +################################################################################################ + + + status = "Recruiting" # Call gradio_wrapper_nct with appropriate arguments if condition and sponsor: @@ -2412,6 +2622,7 @@ async def disease_view_phc(condition, condition_type, s_sponsor_input, s_academi # Convert the HTML table to a pandas DataFrame df = pd.read_html(html_table_conditions)[0] + #print(df) #### error traps if df.empty : return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None @@ -2453,23 +2664,23 @@ async def disease_view_phs(condition, condition_type, s_sponsor_input, s_academi #print(type(sponsor)) condition = ' '.join(condition) if isinstance(condition, list) else condition #print(type(condition)) -############################CHECK Sponsor and Condition Inputs ######################################################### +############################CHECK Sponsor and Condition Inputs ######################################################### import re ################ # def check_input(condition, sponsor): allowed_chars = r'^[A-Za-z .,&/()-]*$' - + if condition is not None and isinstance(condition, str): if len(condition) > 50 or not re.match(allowed_chars, condition): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None - + if sponsor is not None and isinstance(sponsor, str): if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None -################################################################################################ - - +################################################################################################ + + status = "Recruiting" # Call gradio_wrapper_nct with appropriate arguments if condition and sponsor: @@ -2485,9 +2696,10 @@ async def disease_view_phs(condition, condition_type, s_sponsor_input, s_academi # Convert the HTML table to a pandas DataFrame df = pd.read_html(html_table_conditions)[0] + #print(df) #### error traps if df.empty : - return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None + return "The Sponsor Name did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None ####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov #tree_map_cond_nct = plot_condition_treemap_nct(df) @@ -2525,25 +2737,25 @@ async def disease_view_phs_n(condition, condition_type, s_sponsor_input, s_acade #print(type(sponsor)) condition = ' '.join(condition) if isinstance(condition, list) else condition #print(type(condition)) -############################CHECK Sponsor and Condition Inputs ######################################################### +############################CHECK Sponsor and Condition Inputs ######################################################### import re ################ # def check_input(condition, sponsor): allowed_chars = r'^[A-Za-z .,&/()-]*$' - + if condition is not None and isinstance(condition, str): if len(condition) > 50 or not re.match(allowed_chars, condition): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None - + if sponsor is not None and isinstance(sponsor, str): if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None -################################################################################################ - - - - +################################################################################################ + + + + status = "Not yet recruiting" # Call gradio_wrapper_nct with appropriate arguments if condition and sponsor: @@ -2608,22 +2820,22 @@ async def disease_view_phs_c(condition, condition_type, s_sponsor_input, s_acade #print(type(sponsor)) condition = ' '.join(condition) if isinstance(condition, list) else condition #print(type(condition)) -############################CHECK Sponsor and Condition Inputs ######################################################### +############################CHECK Sponsor and Condition Inputs ######################################################### import re ################ # def check_input(condition, sponsor): allowed_chars = r'^[A-Za-z .,&/()-]*$' - + if condition is not None and isinstance(condition, str): if len(condition) > 50 or not re.match(allowed_chars, condition): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None, None, None - + if sponsor is not None and isinstance(sponsor, str): if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None, None, None -################################################################################################ - +################################################################################################ + status = "Completed" # Call gradio_wrapper_nct with appropriate arguments @@ -2694,23 +2906,23 @@ async def condition_view(condition, country, condition_type, sponsor_input_con, #print(type(sponsor)) condition = ' '.join(condition) if isinstance(condition, list) else condition #print(type(condition)) -############################CHECK Sponsor and Condition Inputs ######################################################### +############################CHECK Sponsor and Condition Inputs ######################################################### import re ################ # def check_input(condition, sponsor): allowed_chars = r'^[A-Za-z .,&/()-]*$' - + if condition is not None and isinstance(condition, str): if len(condition) > 50 or not re.match(allowed_chars, condition): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None - + if sponsor is not None and isinstance(sponsor, str): if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None -################################################################################################ - - +################################################################################################ + + status = "Recruiting" summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status) # Convert the HTML table to a pandas DataFrame @@ -2759,25 +2971,25 @@ async def condition_view_s(condition, country, condition_type, sponsor_input_con #print(type(sponsor)) condition = ' '.join(condition) if isinstance(condition, list) else condition #print(type(condition)) -############################CHECK Sponsor and Condition Inputs ######################################################### +############################CHECK Sponsor and Condition Inputs ######################################################### import re ################ # def check_input(condition, sponsor): allowed_chars = r'^[A-Za-z .,&/()-]*$' - + if condition is not None and isinstance(condition, str): if len(condition) > 50 or not re.match(allowed_chars, condition): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None - + if sponsor is not None and isinstance(sponsor, str): if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None -################################################################################################ - - - - +################################################################################################ + + + + status = "Recruiting" summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status) @@ -2795,13 +3007,13 @@ async def condition_view_s(condition, country, condition_type, sponsor_input_con # print(html_table_add) df = pd.read_html(html_table_add)[0] #print(df) - + #### error traps if df.empty : return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None - - - + + + country_site = plot_trial_site_map(df) site_cond = plot_trial_sites(df) @@ -2837,23 +3049,23 @@ async def condition_viewt(condition, country, condition_type, sponsor_input_con, #print(type(sponsor)) condition = ' '.join(condition) if isinstance(condition, list) else condition #print(type(condition)) -############################CHECK Sponsor and Condition Inputs ######################################################### +############################CHECK Sponsor and Condition Inputs ######################################################### import re ################ # def check_input(condition, sponsor): allowed_chars = r'^[A-Za-z .,&/()-]*$' - + if condition is not None and isinstance(condition, str): if len(condition) > 50 or not re.match(allowed_chars, condition): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None - + if sponsor is not None and isinstance(sponsor, str): if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None -################################################################################################ - - - +################################################################################################ + + + status = "Recruiting" summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status) # Convert the HTML table to a pandas DataFrame @@ -2892,25 +3104,25 @@ async def condition_view_map(condition, country, sponsor_input_con_map, academia sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor #print(type(condition)) -############################CHECK Sponsor and Condition Inputs ######################################################### +############################CHECK Sponsor and Condition Inputs ######################################################### import re ################ # def check_input(condition, sponsor): allowed_chars = r'^[A-Za-z .,&/()-]*$' - - + + if sponsor is not None and isinstance(sponsor, str): if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None -################################################################################################ -################################################################################################ - - - - - +################################################################################################ +################################################################################################ + + + + + status = "Recruiting" summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status ) # print(html_table_add) @@ -2976,301 +3188,12 @@ def format_html_list(html_string): return formatted_html ######################################################################################## -def format_html_list_old(html_string): - # Split the input string by numbers followed by a period and a space - items = re.split(r'(\d+\.\s)', html_string) - - # Combine the split items into a list of strings, removing the original numbers - formatted_items = [number + text for number, text in zip(items[1::2], items[2::2])] - - # Remove unwanted characters from each item - formatted_items = [re.sub(r':\.', '', item) for item in formatted_items] - formatted_items = [re.sub(r'General\.', '', item) for item in formatted_items] - - # Filter out empty list items - formatted_items = [item for item in formatted_items if item.strip()] - - # Join the list items with line breaks to create an HTML string - formatted_html = "
".join(formatted_items) - - return formatted_html - -# Function to convert a list of formatted criteria to a dictionary - -# ############################# Hugging Face Model Invoke #################################### - -import os -import io -from IPython.display import Image, display, HTML -from PIL import Image -import base64 -import gradio as gr -import requests, json - -################################################################ NLP Model ####################################### -# API Token and Model Name -API_TOKEN = "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" -MODEL_NAME = "d4data/biomedical-ner-all" -#MODEL_NAME = "kormilitzin/en_core_spancat_med7_lg" - - -############################################################################################################################ - -def merge_tokens(tokens): - if not tokens: - return [] - - merged_tokens = [] - for token in tokens: - if (merged_tokens and - token['entity_group'] == merged_tokens[-1]['entity_group']): - # If current token continues the entity of the last one, merge them - last_token = merged_tokens[-1] - last_token['word'] += token['word'].replace('##', '') - last_token['end'] = token['end'] - last_token['score'] = (last_token['score'] + token['score']) / 2 - else: - # Otherwise, add the token to the list - merged_tokens.append(token) - - return merged_tokens - -# Function to call Hugging Face API################################################################################ -def get_completion(text): - headers = {"Authorization": f"Bearer {API_TOKEN}"} - data = {"inputs": text, "max_tokens": 2048} # Set the max_tokens parameter - #data = {"inputs": text, "max_tokens": 512} # Set the max_tokens parameter - data = {"inputs": text} # Set the max_tokens parameter - response = requests.post(f"https://api-inference.huggingface.co/models/{MODEL_NAME}", headers=headers, json=data, timeout= 90) - # Print the response content - print(f"From Hugging Face API: {response.text}") - return response.json() -# Split texts when longer than 2048 tokens -from transformers import AutoTokenizer -# Load the tokenizer for the model -tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) ##################################################################################### -# Function to split the input text into chunks -def split_input_text(text, max_tokens): - tokens = tokenizer.encode(text) - token_chunks = [] - - for i in range(0, len(tokens), max_tokens): - token_chunk = tokens[i:i + max_tokens] - token_chunks.append(tokenizer.decode(token_chunk)) - - # Debug: Print the token length of the current chunk - print(f"Token length of chunk {len(token_chunks)}: {len(token_chunk)}") - - return token_chunks - -# Function to remove HTML tags from the input text -def remove_html_tags(text): - clean_text = re.sub('<[^>]*>', ' ', text) - return clean_text - -def ner_oll (input): - max_retries = 10 - retries = 0 - output = None - - # Remove HTML tags from the input text - input_no_html = remove_html_tags(input) - - # Split the input text into chunks - input_chunks = split_input_text(input_no_html, 500) - - # Initialize an empty list to store the merged tokens from all chunks - all_merged_tokens = [] - - # Debug: Print the number of chunks created - print(f"Number of input chunks: {len(input_chunks)}") - - api_calls = 0 # Counter for API calls - - for input_chunk in input_chunks: - while retries < max_retries: - try: - output = get_completion(input_chunk) - #print(output) - api_calls += 1 # Increment the API calls counter - # Check if the output is empty - if output: - # Check if the output contains an error message - if 'error' in output: - print("Error in API response, retrying...") - retries += 1 - continue - break - else: - raise ValueError("Empty output") - except Exception as e: - print(f"Error in API call: {e}") - retries += 1 - - if output is None or 'error' in output: - print("Failed to get API response after maximum 10 retries.") - return {"text": input, "entities": []} - - merged_tokens = merge_tokens(output) - # Debug: Print the merged tokens for the current output - print(f"Merged tokens for chunk {api_calls}: {merged_tokens}") - - all_merged_tokens.extend(merged_tokens) - - print(all_merged_tokens) - - # Debug: Print the number of API calls made - # print(f"Number of API calls made: {api_calls}") - - return {"text": input, "entities": all_merged_tokens} - -########################################################################################################## - -from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification - -# Load the tokenizer and model for the pipeline -tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all") -model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all") - -# Create the NER pipeline -pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") -############################################################################################## - -def ner(input): - max_retries = 10 - retries = 0 - output = None - # Initialize an empty list to store the merged tokens from all chunks - all_merged_tokens = [] - # Remove HTML tags from the input text - input_no_html = remove_html_tags(input) - - # Split the input text into chunks - input_chunks = split_input_text(input_no_html, 500) - - # Initialize an empty list to store the entities from all chunks - all_entities = [] - - # Debug: Print the number of chunks created - print(f"Number of input chunks: {len(input_chunks)}") - - for input_chunk in input_chunks: - while retries < max_retries: - try: - output = pipe(input_chunk) - if output: - break - else: - raise ValueError("Empty output") - except Exception as e: - print(f"Error in pipeline call: {e}") - retries += 1 - - if output is None: - print("Failed to get pipeline output after maximum 10 retries.") - return {"text": input, "entities": []} - - # Remove unwanted entity groups - filtered_output = [ - entity for entity in output - if entity['entity_group'] not in [ - 'Coreference', - 'Detailed_description', - 'Lab_value', - # 'Diagnostic_procedure', - 'Personal_background', - 'History', - 'Family_history', - 'Outcome', - 'Subject', - 'Date', - 'Distance', - 'Severity', - 'Activity', - 'Duration', - 'Administration', - 'Sex', - 'Age', - 'Sign_symptom', - 'Therapeutic_procedure', - 'Biological_structure' - ] - ] - - # Debug: Print the entities for the current output after filtering - print(f"Filtered entities for chunk {len(all_entities) + 1}: {filtered_output}") - - merged_tokens = merge_tokens(filtered_output) - all_merged_tokens.extend(merged_tokens) - - print(all_entities) - - - return {"text": input, "entities": all_merged_tokens} - - - - - - - -############################################################################## -def ner_unflitered(input): - max_retries = 10 - retries = 0 - output = None - # Initialize an empty list to store the merged tokens from all chunks - all_merged_tokens = [] - # Remove HTML tags from the input text - input_no_html = remove_html_tags(input) - - # Split the input text into chunks - input_chunks = split_input_text(input_no_html, 500) - - # Initialize an empty list to store the entities from all chunks - all_entities = [] - - # Debug: Print the number of chunks created - print(f"Number of input chunks: {len(input_chunks)}") - - for input_chunk in input_chunks: - while retries < max_retries: - try: - output = pipe(input_chunk) - # Check if the output is empty - if output: - #print(output) - break - else: - raise ValueError("Empty output") - except Exception as e: - print(f"Error in pipeline call: {e}") - retries += 1 - - if output is None: - print("Failed to get pipeline output after maximum 10 retries.") - return {"text": input, "entities": []} - - # Debug: Print the entities for the current output - print(f"Entities for chunk {len(all_entities) + 1}: {output}") - - merged_tokens = merge_tokens(output) - # Debug: Print the merged tokens for the current output - #print(f"Merged tokens for chunk {api_calls}: {merged_tokens}") - - all_merged_tokens.extend(merged_tokens) - #all_entities.extend(output) - - print(all_entities) - - return {"text": input, "entities": all_merged_tokens} - ############################################################################################################################################# async def trial_view_map(nctID): nctID = nctID.strip() # Remove leading and trailing spaces @@ -3292,41 +3215,108 @@ async def trial_view_map(nctID): df = pd.read_html(html_table_add)[0] world_map = plot_trial_site_world_map(df) if world_map is None: - return "Sorry, the plot could not be generated. Please try again by slecting a country!", None, None + return "Sorry, the plot could not be generated. Please try again by selecting a country!", None, None return summary_stats_sites, world_map, html_table_add #return html_table, formatted_html_inclusions,formatted_html_exclusions,world_map #################################################################################################################################################### +import plotly.graph_objects as go -async def trial_view (nctID): - nctID = nctID.strip() # Remove leading and trailing spaces -###### # Check if nctID is valid +def split_numbered_criteria(text): + """Split text into list of criteria based on numbered lines""" + if not text: + return [] + + criteria = [] + current = [] + + for line in text.split('\n'): + line = line.strip() + if line: + # Check if line starts with a number followed by period + if line[0].isdigit() and '. ' in line[:4]: + if current: + criteria.append(' '.join(current)) + current = [line] + else: + current.append(line) + + # Add the last criteria + if current: + criteria.append(' '.join(current)) + + return criteria if criteria else ["No criteria available"] + +def display_criteria_table(inclusion_text, exclusion_text): + """ + Create a two-column Plotly table with inclusion and exclusion criteria + split into separate rows based on numbering + """ + try: + # Split both texts into lists of criteria + inclusion_list = split_numbered_criteria(inclusion_text) + exclusion_list = split_numbered_criteria(exclusion_text) + + # Make lists equal length by padding with empty strings + max_length = max(len(inclusion_list), len(exclusion_list)) + inclusion_list.extend([''] * (max_length - len(inclusion_list))) + exclusion_list.extend([''] * (max_length - len(exclusion_list))) + + # Create the table + fig = go.Figure(data=[go.Table( + columnwidth=[500, 500], # Equal width columns + header=dict( + values=['Inclusion Criteria', 'Exclusion Criteria'], + fill_color='#e6f3ff', + align=['left', 'left'], + font=dict(size=14, color='black'), + height=40 + ), + cells=dict( + values=[inclusion_list, exclusion_list], + fill_color=[['white', '#f9f9f9'] * max_length], # Alternating row colors + align=['left', 'left'], + font=dict(size=12), + height=None, + line=dict(color='lightgrey', width=1) # Add light borders + ) + )]) + + # Update layout + fig.update_layout( + title="Trial Eligibility Criteria", + width=1200, + height=max(400, max_length * 30 + 100), # Dynamic height based on content + margin=dict(l=20, r=20, t=40, b=20) + ) + + return fig + + except Exception as e: + print(f"Error in display_criteria_table: {str(e)}") + return None + +async def trial_view(nctID): + nctID = nctID.strip() if not nctID.startswith('NCT') or not (10 <= len(nctID) <= 12): return "Not a Valid NCT ID has been entered", None, None status = "Recruiting" summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(NCTId=nctID, status = status) - #### error traps formatted_inclusions = get_formatted_inclusion_criteria(nctID) - print(formatted_inclusions) formatted_exclusions = get_formatted_exclusion_criteria(nctID) - print( formatted_exclusions) - # Check if both formatted_inclusions and formatted_exclusions are empty if not formatted_inclusions and not formatted_exclusions: return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None - inclusion_concepts = ner(formatted_inclusions) - exclusion_concepts = ner(formatted_exclusions) - - + # Create single table with both criteria + criteria_table = display_criteria_table(formatted_inclusions, formatted_exclusions) - #return html_table, formatted_html_inclusions, formatted_html_exclusions,inclusion_concepts,exclusion_concepts - return html_table, inclusion_concepts,exclusion_concepts + return html_table, criteria_table @@ -3345,137 +3335,7 @@ with trial_app: #gr.Markdown("

Now Recruiting Trials:

") with gr.Tabs(): - ############################################################ Sponsors ###################################################################### - with gr.TabItem("Sponsors"): - # 1st Row -#################################################################################################################################################### - with gr.Row(): - gr.HTML(''' -

Sponsors for 'Now Recruiting' Trials:

-

1. Select a Sponsor and click 'Show Sponsor'.

-

2. Filter Conditions by selecting a 'Disease Area', for example, Oncology.

- ''') - #

Instructions on finding Sponsors with Now Recruiting Trials:

-##################################################################################################################### - with gr.Row(): -########################################################################################################## - with gr.Column(): - sponsor_input = gr.Dropdown( -############################################################################ - choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ - "CSL Behring", "Daiichi Sankyo, Inc.",\ - "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ - "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\ - "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], - label="Select a Sponsor" - ) - -############################################################################################################################################################################################################# - - -####################################################################################################################################################### - with gr.Column(): - #disease_input = gr.inputs.Dropdown( - disease_input = gr.Dropdown( - choices=[ "Cardiovascular Diseases", \ - "Depressive Disorder","Digestive System Diseases","Endocrine System Diseases",\ - "Eye Diseases","Heart Diseases", "Immune System Diseases", "Infections","Liver Diseases", \ - "Metabolic Diseases","Neoplasms","Nervous System Diseases","Oncology" , \ - "Renal Diseases", "Respiratory Tract Diseases", \ - "Skin Diseases","Stress Disorder", "Virology" \ - ], - label="Filter by a Disease Area" - ) - -############################################################################################################################################## -########################################################################################################## - # with gr.Column(): - # disease_input_text = gr.Textbox(lines=1, label="Or Type a Disease Area:") - - -################# # 3rd row################################################################# - with gr.Row(): - #with gr.Column(): - sponsor_button = gr.Button("Show Sponsor") -##################################################################################################################### - - - # Then, create the clear button and add the dropdown input to it - clear_btn = gr.ClearButton() - clear_btn.add(sponsor_input) - clear_btn.add(disease_input) - -#################################################################################################### - - - with gr.Row(): -############################################## - - with gr.Column(): - academia_input = gr.Textbox(lines=1, label="Type a Sponsor Name: ") - clear_btn.add(academia_input) -########################################################################################################## - with gr.Column(): - disease_input_text = gr.Textbox(lines=1, label="Type a Disease Area:") - clear_btn.add(disease_input_text) - -################# # 3rd row################################################################# -#################################################################################################### - - - # with gr.Row(): - # gr.HTML(''' - #

3. If typing a Sponsor, the name needs to fully match with ClinicalTrials.gov.

- #

4. To find more Sponsors, only type a 'Disease Name', for example, 'Pancreatic Cancer' and click 'Show Sponsor'.

- - # ''') -# #

4. When typing a Sponsor Name, needs to fully match with ClinicalTrials.gov reporting, for example, 'Seagen Inc.' and not'Seagen'

-################################################################################################################################### -################################################################################################################################################################################## - with gr.Row(): - with gr.Column(): - summary_block = gr.HTML(label="Lead Sponsors for Recruiting Clinical Trials:" ) - # with gr.Column(): - # summary_block_collbs = gr.HTML(label="Collaborators in Recruiting Clinical Trials:" ) - with gr.Row(): - with gr.Column(): - sponsor_trees = gr.Plot() - with gr.Column(): - collaborator_trees = gr.Plot() - with gr.Column(): - condition_sunbursts = gr.Plot() - with gr.Column(): - condition_others = gr.Plot() - - - -#################################################################################################################################################### - with gr.Row(): - gr.HTML('

Sponsor Only Trials

') - with gr.Row(): - output_block_conditions = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors") - - - - with gr.Row(): - gr.HTML('

Sponsor with Collaborator Trials

') - - with gr.Row(): - summary_block_collbs = gr.HTML(label="Sponsor with Collaborators in Recruiting Clinical Trials:" ) - - - with gr.Row(): - output_block_conditions_collbs = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Collaborators") - - clear_btn.add(summary_block) - clear_btn.add(summary_block_collbs) - clear_btn.add(output_block_conditions) - clear_btn.add(output_block_conditions_collbs) - clear_btn.add(condition_sunbursts) - clear_btn.add(sponsor_trees) - clear_btn.add(collaborator_trees) - clear_btn.add(condition_others) ############################################################################################################################################## ################################################################ Conditions ############################################################################################### @@ -3490,8 +3350,9 @@ with trial_app: with gr.Row(): gr.HTML('''

Conditions for 'Now Recruiting' Trials:

-

1. Select a Sponsor and click 'Show Conditions'.

-

2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'NASH' etc.

+

1. Select a Sponsor'.

+

2. Select a Condition Name, for example, 'Pancreatic Cancer', 'Chronic Kidney Disease', 'MASH' etc.

+

3. Click 'Show Conditions'.

''') @@ -3518,8 +3379,8 @@ with trial_app: ################################################################### with gr.Column(): s_disease_input_phc = gr.Dropdown( - choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ - "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ + choices=["Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ + "Cancer","Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ @@ -3531,7 +3392,7 @@ with trial_app: " Major","Metabolic", "Generalized Pustular Psoriasis",\ "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ "Psychological Trauma","Renal", "Respiratory",\ "Schizophrenia", "PTSD", \ "Venous Thromboembolism", "Wet"], @@ -3551,18 +3412,18 @@ with trial_app: # with gr.Column(): ################# # 3rd row################################################################# - with gr.Row(): + # with gr.Row(): ################################################################### - with gr.Column(): - s_academia_input_phc = gr.Textbox(lines=1, label="Type a Sponsor Name:") - clear_btn_phc.add(s_academia_input_phc) + # with gr.Column(): + # s_academia_input_phc = gr.Textbox(lines=1, label="Type a Sponsor Name:") + # clear_btn_phc.add(s_academia_input_phc) ################################################################### - with gr.Column(): + # with gr.Column(): - s_disease_input_type_phc = gr.Textbox(lines=1, label="Filter by typing a Condition:") - clear_btn_phc.add(s_disease_input_type_phc) + # s_disease_input_type_phc = gr.Textbox(lines=1, label="Filter by typing a Condition:") + # clear_btn_phc.add(s_disease_input_type_phc) ############################################################################################################################################ ###################################################################################################################################################################### @@ -3602,7 +3463,7 @@ with trial_app: gr.HTML('''

Trials 'Now Recruiting':

1. Select a Sponsor and click 'Show Trials'.

-

2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'NASH' etc.

+

2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.

''') @@ -3639,7 +3500,7 @@ with trial_app: " Major","Metabolic", "Generalized Pustular Psoriasis",\ "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ "Psychological Trauma","Renal", "Respiratory",\ "Schizophrenia", "PTSD", \ "Venous Thromboembolism", "Wet"], @@ -3701,7 +3562,7 @@ with trial_app: gr.HTML('''

Drugs for 'Now Recruiting' Trials:

1. Select a Sponsor and click 'Show Drugs'.

-

2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'NASH' etc.

+

2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.

''') ##################################################################################################################################################### @@ -3734,7 +3595,7 @@ with trial_app: " Major","Metabolic", "Generalized Pustular Psoriasis",\ "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ "Psychological Trauma","Renal", "Respiratory",\ "Schizophrenia", "PTSD", \ "Venous Thromboembolism", "Wet"], @@ -3827,7 +3688,7 @@ with trial_app: " Major","Metabolic", "Generalized Pustular Psoriasis",\ "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ "Psychological Trauma","Renal", "Respiratory",\ "Schizophrenia", "PTSD", \ "Venous Thromboembolism", "Wet"], @@ -3930,7 +3791,7 @@ with trial_app: " Major","Metabolic", "Generalized Pustular Psoriasis",\ "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ "Psychological Trauma","Renal", "Respiratory",\ "Schizophrenia", "PTSD", \ "Venous Thromboembolism", "Wet"], @@ -4020,7 +3881,7 @@ with trial_app: gr.HTML('''

Timelines for 'Now Recruiting' Trials:

1. Select a Sponsor and click 'Show Timelines'.

-

2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'NASH' etc.

+

2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.

''') @@ -4056,7 +3917,7 @@ with trial_app: " Major","Metabolic", "Generalized Pustular Psoriasis",\ "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ "Psychological Trauma","Renal", "Respiratory",\ "Schizophrenia", "PTSD", \ "Venous Thromboembolism", "Wet"], @@ -4150,295 +4011,39 @@ with trial_app: trial_output = gr.HTML(label="Detail of Recruiting Trials") ################################################ with gr.Row(): - with gr.Column(): - concept_inclusion= gr.HighlightedText(label="Display of Inclusion Concepts") - with gr.Column(): - concept_exclusion= gr.HighlightedText(label="Display of Exclusion Concepts") + # with gr.Column(): + eligibilities_plot = gr.Plot() + # with gr.Column(): + # concept_exclusion= gr.HighlightedText(label="Display of Exclusion Concepts") - clear_tn_btn.add(trial_output) + clear_tn_btn.add(trial_output) # clear_tn_btn.add(formatted_inclusions_output) # clear_tn_btn.add(formatted_exclusions_output) - clear_tn_btn.add(concept_inclusion) - clear_tn_btn.add(concept_exclusion) - - - ############################################################################################################################################## - ############################################################ Trial Map ############## - - - - with gr.TabItem("Trial Sites"): -################################################################################################################################################### - with gr.Row(): - gr.HTML(''' -

Site Map for a Trial:

-

1. Type a single Trial's NCT Id,For Example: NCT05512377 or NCT04924075 or NCT04419506 etc. and click 'Show Sites Map'.

-

3. Wait time approximately 45 seconds to display all Trial Sites in a Map.

- ''') - - - - with gr.Row(): - - #nctID_input = gr.inputs.Textbox(lines=1, label="Type Trial NctId:") - nctID_inputs = gr.Textbox(lines=1, label="Type Trial NCT Id: ") - trial_buttons = gr.Button("Show Sites Map") - #Then, create the clear button and add the dropdown input to it - clear_tn_btns = gr.ClearButton() - clear_tn_btns.add(nctID_inputs ) -################################################ - with gr.Row(): - - summary_block_trial_map = gr.HTML(label="Site Map for Recruiting Clinical Trials:" ) - with gr.Row(): - - world_map = gr.Plot() - - - with gr.Row(): - - trial_output_map = gr.HTML(label="List of Recruiting Country, Sites") - - clear_tn_btns.add(summary_block_trial_map) - clear_tn_btns.add(world_map) - clear_tn_btns.add(trial_output_map) + clear_tn_btn.add(eligibilities_plot) + # clear_tn_btn.add(concept_exclusion) ############################################################################################################################################## - ################################################################ Future Trials ############################################################################################### - with gr.TabItem("Future Trials"): - # 1st Row -#################################################################################################################################################### - with gr.Row(): - gr.HTML(''' -

Future Trials, 'Not Yet Recruiting':

-

1. Select a Sponsor and click 'Show Future Trials'.

-

2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease','NASH' etc.

- ''') - - -##################################################################################################################################################### - - with gr.Row(): - with gr.Column(): - -#### ######################################################################################################################################################################################################### - - s_sponsor_input_phs_n = gr.Dropdown( -############################################################################ - choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "BioNTech SE","Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ - "CSL Behring", "Daiichi Sankyo, Inc.",\ - "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ - "Janssen Research & Development, LLC","Merck Sharp & Dohme LLC","ModernaTX, Inc.", \ - "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], - label="Select a Sponsor " - ) - -################################################################################################################################################################################################################## - with gr.Column(): - s_disease_input_phs_n = gr.Dropdown( - choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ - "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ - "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ - "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ - "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ - "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ - "Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ - "Urothelial Carcinoma",\ - "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ - "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ - " Major","Metabolic", "Generalized Pustular Psoriasis",\ - "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ - "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ - "Psychological Trauma","Renal", "Respiratory",\ - "Schizophrenia", "PTSD", \ - "Venous Thromboembolism", "Wet"], - label="Filter by a Condition" - ) - -###################################################################################################################################################################### - # 3rd Row - with gr.Row(): #academia_input = gr.inputs.Dropdown( - s_button_phs_n = gr.Button("Show Future Trials") - - # Then, create the clear button and add the dropdown input to it - clear_btn_phs = gr.ClearButton() - clear_btn_phs.add(s_sponsor_input_phs_n) - - clear_btn_phs.add(s_disease_input_phs_n) - -################# # 3rd row################################################################# - with gr.Row(): -##################################################################################################################################################################### - with gr.Column(): - s_academia_input_phs_n = gr.Textbox(lines=1, label="Type a Sponsor Name:") - clear_btn_phs.add(s_academia_input_phs_n) -################################################################################################################################################################# - with gr.Column(): - s_disease_input_type_phs_n = gr.Textbox(lines=1, label="Filter by typing a Condition:") - clear_btn_phs.add(s_disease_input_type_phs_n) -##################################################################################################################################################################################### - - -############################################################################################################################################ - -######################################################################################################################################################################### - with gr.Row(): - - summary_block_phs_n = gr.HTML(label="Conditions and Sponsors Will Recruit for Clinical Trials:" ) - - - with gr.Row(): - # with gr.Column(): - tree_map_cond_nct_n = gr.Plot() -################################################################ -#################################################################################################################################################### - with gr.Row(): - gr.HTML('

Upcoming Trials With Timelines

') - - # with gr.Row(): - # nct_org_map_n = gr.Plot() - ################################################################ - with gr.Row(): - trial_plot = gr.Plot() - - with gr.Row(): - output_block_conditions_phs_n = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors") - - clear_btn_phs.add(summary_block_phs_n) - clear_btn_phs.add(output_block_conditions_phs_n) - clear_btn_phs.add(tree_map_cond_nct_n) - clear_btn_phs.add(trial_plot) -############################################################################################################################################## - ################################################################ Completed Trials ############################################################################################### - with gr.TabItem("Completed Trials"): - # 1st Row -#################################################################################################################################################### - with gr.Row(): - gr.HTML(''' -

'Completed' Trials for a Condition:

-

1. Select a Sponsor and a Condition name and click 'Show Completed Trials'.

- ''' - ) - with gr.Row(): - with gr.Column(): - -#### ######################################################################################################################################################################################################### - - s_sponsor_input_phs_c = gr.Dropdown( -############################################################################ - choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "BioNTech SE","Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ - "CSL Behring", "Daiichi Sankyo, Inc.",\ - "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ - "Janssen Research & Development, LLC","Merck Sharp & Dohme LLC","ModernaTX, Inc.", \ - "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], - label="Select a Sponsor " - ) - -################################################################################################################################################################################################################## - with gr.Column(): - s_disease_input_phs_c= gr.Dropdown( - choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ - "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ - "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ - "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ - "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ - "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ - "Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ - "Urothelial Carcinoma",\ - "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ - "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ - " Major","Metabolic", "Generalized Pustular Psoriasis",\ - "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ - "Liver Cirrhosis", \ - "NASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ - "Psychological Trauma","Renal", "Respiratory",\ - "Schizophrenia", "PTSD", \ - "Venous Thromboembolism", "Wet"], - label="Filter by a Condition" - ) - -###################################################################################################################################################################### - # 3rd Row - with gr.Row(): #academia_input = gr.inputs.Dropdown( - s_button_phs_c = gr.Button("Show Completed Trials") - - # Then, create the clear button and add the dropdown input to it - clear_btn_phs = gr.ClearButton() - clear_btn_phs.add(s_sponsor_input_phs_c) - - clear_btn_phs.add(s_disease_input_phs_c) -################# # 3rd row################################################################# - with gr.Row(): -##################################################################################################################################################################### - with gr.Column(): - s_academia_input_phs_c = gr.Textbox(lines=1, label="Type a Sponsor Name:") - clear_btn_phs.add(s_academia_input_phs_c) -################################################################################################################################################################# - with gr.Column(): - s_disease_input_type_phs_c = gr.Textbox(lines=1, label="Filter by typing a Condition:") - clear_btn_phs.add(s_disease_input_type_phs_c) -##################################################################################################################################################################################### - - - - -######################################################################################################################################################################### - with gr.Row(): - - summary_block_phs_c = gr.HTML(label="Conditions and Sponsors Will Recruit for Clinical Trials:" ) - - - with gr.Row(): - # with gr.Column(): - tree_map_cond_nct_c = gr.Plot() -################################################################ -#################################################################################################################################################### - with gr.Row(): - gr.HTML('

Recruiting Trials With Organization Study Ids

') - - with gr.Row(): - nct_org_map_c = gr.Plot() -####################################################################### - with gr.Row(): - trial_plot_c = gr.Plot() -####################################################################### - with gr.Row(): - time_plot_c = gr.Plot() - - - - with gr.Row(): - output_block_conditions_phs_c = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors") - - clear_btn_phs.add(summary_block_phs_c) - clear_btn_phs.add(output_block_conditions_phs_c) - clear_btn_phs.add(tree_map_cond_nct_c) - clear_btn_phs.add(nct_org_map_c) - clear_btn_phs.add(trial_plot_c) - clear_btn_phs.add(time_plot_c) - - - ################################ EVENT BUTTONS at GRADIO ################################################################################################################################ ## Sponsors - - sponsor_button.click(disease_view, inputs=[disease_input,disease_input_text, sponsor_input, academia_input], outputs=[summary_block,summary_block_collbs,\ - output_block_conditions,output_block_conditions_collbs,condition_others,\ - condition_sunbursts,sponsor_trees\ - ,collaborator_trees\ - ]) + #sponsor_button.click(disease_view, inputs=[disease_input,disease_input_text, sponsor_input, academia_input], outputs=[summary_block,summary_block_collbs,\ + # sponsor_button.click(disease_view, inputs=[disease_input, sponsor_input], outputs=[summary_block,summary_block_collbs,\ + # output_block_conditions,output_block_conditions_collbs,\ + #condition_others,\ + #condition_sunbursts, + # sponsor_trees\ + # ,collaborator_trees\ + # ]) ## Conditions - - s_button_phc.click(disease_view_phc, inputs=[s_disease_input_phc,s_disease_input_type_phc, s_sponsor_input_phc,s_academia_input_phc], outputs=[summary_block_phc, output_block_conditions_phc,\ + # s_button_phc.click(disease_view_phc, inputs=[s_disease_input_phc,s_disease_input_type_phc, s_sponsor_input_phc,s_academia_input_phc], outputs=[summary_block_phc, output_block_conditions_phc,\ + s_button_phc.click(disease_view_phc, inputs=[s_disease_input_phc, s_sponsor_input_phc], outputs=[summary_block_phc, output_block_conditions_phc,\ tree_map_cond_nct]) @@ -4452,15 +4057,15 @@ with trial_app: #s_button_phs_n.click(disease_view_phs_n, inputs=[s_disease_input_phs_n,s_disease_input_type_phs_n, s_sponsor_input_phs_n,s_academia_input_phs_n], outputs=[summary_block_phs_n, output_block_conditions_phs_n,\ # tree_map_cond_nct_n, nct_org_map_n,trial_plot]) - s_button_phs_n.click(disease_view_phs_n, inputs=[s_disease_input_phs_n,s_disease_input_type_phs_n, s_sponsor_input_phs_n,s_academia_input_phs_n], outputs=[summary_block_phs_n, output_block_conditions_phs_n,\ - tree_map_cond_nct_n, trial_plot]) + # s_button_phs_n.click(disease_view_phs_n, inputs=[s_disease_input_phs_n,s_disease_input_type_phs_n, s_sponsor_input_phs_n,s_academia_input_phs_n], outputs=[summary_block_phs_n, output_block_conditions_phs_n,\ + # tree_map_cond_nct_n, trial_plot]) - s_button_phs_c.click(disease_view_phs_c, inputs=[s_disease_input_phs_c,s_disease_input_type_phs_c, s_sponsor_input_phs_c,s_academia_input_phs_c], outputs=[summary_block_phs_c, output_block_conditions_phs_c,\ - tree_map_cond_nct_c, nct_org_map_c,trial_plot_c, time_plot_c]) + # s_button_phs_c.click(disease_view_phs_c, inputs=[s_disease_input_phs_c,s_disease_input_type_phs_c, s_sponsor_input_phs_c,s_academia_input_phs_c], outputs=[summary_block_phs_c, output_block_conditions_phs_c,\ + # tree_map_cond_nct_c, nct_org_map_c,trial_plot_c, time_plot_c]) ### Drugs @@ -4484,13 +4089,16 @@ with trial_app: # Test this way NCT04419506 # trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output, formatted_inclusions_output,formatted_exclusions_output,concept_inclusion,concept_exclusion]) # Test this way NCT04419506 - trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output,concept_inclusion,concept_exclusion]) + # trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output,concept_inclusion,concept_exclusion]) + trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output,eligibilities_plot]) - trial_buttons.click(trial_view_map, inputs=[nctID_inputs], outputs=[summary_block_trial_map, world_map,trial_output_map]) + # trial_buttons.click(trial_view_map, inputs=[nctID_inputs], outputs=[summary_block_trial_map, world_map,trial_output_map]) trial_app.launch(share=True) -#trial_app.launch(share=True, debug = "TRUE") +###END + + \ No newline at end of file