Spaces:
Sleeping
Sleeping
import re | |
import ast | |
import requests | |
from typing import Union | |
def _find_nctid(text: str) -> Union[str,None]: | |
"return nct string if found in text else none" | |
match = re.search(r"[Nn][Cc][Tt]0*[1-9]\d{0,7}", text) | |
return match[0] if match is not None else match | |
def _get_registry_outcomes(nct_id: str) -> Union[dict,None]: | |
outcomes = None | |
r = requests.get(f"https://clinicaltrials.gov/api/v2/studies/{nct_id}", params={"fields":"OutcomesModule"}) | |
if r.status_code == 200 and "outcomesModule" in r.json()["protocolSection"]: | |
outcomes = ast.literal_eval(r.text)["protocolSection"]["outcomesModule"] | |
return outcomes | |
def _reformat_outcomes(outcomes: dict) -> list[dict[str,str]]: | |
new_outcomes = [] | |
for outcome_type, outcome_list in outcomes.items() : | |
outcome_type = outcome_type.replace("Outcomes","") | |
for outcome_item in outcome_list : | |
outcome_item["type"] = outcome_type | |
new_outcomes.append(outcome_item) | |
return new_outcomes | |
def extract_nct_outcomes(text:str) -> Union[None,list[dict[str,str]]]: | |
"""Extract outcomes from a text using CTGOV APIV2 if a nct id is found else return None""" | |
outcomes = None | |
if text is None : | |
return outcomes | |
nct_id = _find_nctid(text) | |
if nct_id is not None: | |
outcomes = _get_registry_outcomes(nct_id) | |
outcomes = _reformat_outcomes(outcomes) | |
return outcomes | |