presidio / transformers_rec /configuration.py
omri374's picture
Upload 2 files
46c6fe7
raw
history blame
3.43 kB
## Taken from https://github.com/microsoft/presidio/blob/main/docs/samples/python/transformers_recognizer/configuration.py
STANFORD_COFIGURATION = {
"DEFAULT_MODEL_PATH": "StanfordAIMI/stanford-deidentifier-base",
"PRESIDIO_SUPPORTED_ENTITIES": [
"LOCATION",
"PERSON",
"ORGANIZATION",
"AGE",
"PHONE_NUMBER",
"EMAIL",
"DATE_TIME",
"DEVICE",
"ZIP",
"PROFESSION",
"USERNAME",
"ID"
],
"LABELS_TO_IGNORE": ["O"],
"DEFAULT_EXPLANATION": "Identified as {} by the StanfordAIMI/stanford-deidentifier-base NER model",
"SUB_WORD_AGGREGATION": "simple",
"DATASET_TO_PRESIDIO_MAPPING": {
"DATE": "DATE_TIME",
"DOCTOR": "PERSON",
"PATIENT": "PERSON",
"HOSPITAL": "LOCATION",
"MEDICALRECORD": "ID",
"IDNUM": "ID",
"ORGANIZATION": "ORGANIZATION",
"ZIP": "ZIP",
"PHONE": "PHONE_NUMBER",
"USERNAME": "USERNAME",
"STREET": "LOCATION",
"PROFESSION": "PROFESSION",
"COUNTRY": "LOCATION",
"LOCATION-OTHER": "LOCATION",
"FAX": "PHONE_NUMBER",
"EMAIL": "EMAIL",
"STATE": "LOCATION",
"DEVICE": "DEVICE",
"ORG": "ORGANIZATION",
"AGE": "AGE",
},
"MODEL_TO_PRESIDIO_MAPPING": {
"PER": "PERSON",
"PERSON": "PERSON",
"LOC": "LOCATION",
"ORG": "ORGANIZATION",
"AGE": "AGE",
"PATIENT": "PERSON",
"HCW": "PERSON",
"HOSPITAL": "LOCATION",
"PATORG": "ORGANIZATION",
"DATE": "DATE_TIME",
"PHONE": "PHONE_NUMBER",
"VENDOR": "ORGANIZATION",
},
"CHUNK_OVERLAP_SIZE": 40,
"CHUNK_SIZE": 600,
"ID_SCORE_MULTIPLIER": 0.4,
"ID_ENTITY_NAME": "ID"
}
BERT_DEID_CONFIGURATION = {
"PRESIDIO_SUPPORTED_ENTITIES": [
"LOCATION",
"PERSON",
"ORGANIZATION",
"AGE",
"PHONE_NUMBER",
"EMAIL",
"DATE_TIME",
"ZIP",
"PROFESSION",
"USERNAME",
"ID"
],
"DEFAULT_MODEL_PATH": "obi/deid_roberta_i2b2",
"LABELS_TO_IGNORE": ["O"],
"DEFAULT_EXPLANATION": "Identified as {} by the obi/deid_roberta_i2b2 NER model",
"SUB_WORD_AGGREGATION": "simple",
"DATASET_TO_PRESIDIO_MAPPING": {
"DATE": "DATE_TIME",
"DOCTOR": "PERSON",
"PATIENT": "PERSON",
"HOSPITAL": "ORGANIZATION",
"MEDICALRECORD": "O",
"IDNUM": "O",
"ORGANIZATION": "ORGANIZATION",
"ZIP": "O",
"PHONE": "PHONE_NUMBER",
"USERNAME": "",
"STREET": "LOCATION",
"PROFESSION": "PROFESSION",
"COUNTRY": "LOCATION",
"LOCATION-OTHER": "LOCATION",
"FAX": "PHONE_NUMBER",
"EMAIL": "EMAIL",
"STATE": "LOCATION",
"DEVICE": "O",
"ORG": "ORGANIZATION",
"AGE": "AGE",
},
"MODEL_TO_PRESIDIO_MAPPING": {
"PER": "PERSON",
"LOC": "LOCATION",
"ORG": "ORGANIZATION",
"AGE": "AGE",
"ID": "ID",
"EMAIL": "EMAIL",
"PATIENT": "PERSON",
"STAFF": "PERSON",
"HOSP": "ORGANIZATION",
"PATORG": "ORGANIZATION",
"DATE": "DATE_TIME",
"PHONE": "PHONE_NUMBER",
},
"CHUNK_OVERLAP_SIZE": 40,
"CHUNK_SIZE": 600,
"ID_SCORE_MULTIPLIER": 0.4,
"ID_ENTITY_NAME": "ID"
}