import spacy from functools import lru_cache from pydantic import BaseSettings, Field from source.datamodel.common import CountryCode, LineOfBusiness from source.datamodel.annotation_ranking import Weights, WeightCatalog class Settings(BaseSettings): SERVER_HOST: str = '0.0.0.0' PORT: int = 3000 STOP_TIMEOUT = 120 SLEEP_DURATION = 1e-4 # 0.1 ms sleep APP_NAME: str = "MIRA MODELS" MIRA_MODELS_BLOB_PATH: str = "Mira/ml_models" LOCAL_MIRA_MODELS: str = "ml_models" MIRA_INTENT_MODEL: str = "ml_models/intent_classifier/2021-04-09" MARINE_NL_NER_MODEL: str = "ml_models/ner_marine_nl/2021-04-09" MARINE_NL_RB_MODEL: str = "ml_models/ner_marine_nl/rule_based_annotator/rb_annotator.pkl" PROPERTY_NL_NER_MODEL: str = "ml_models/ner_property_nl/ner_v10" PROPERTY_BE_NER_MODEL: str = "ml_models/ner_property_be/ner_v10" PROPERTY_BE_UW_MODEL: str = Field("ml_models/ner_property_be/uw_property_be_dev", env='PROPERTY_BE_UW_MODEL') PROPERTY_NL_UW_MODEL: str = Field("ml_models/ner_property_nl/uw_property_nl_dev", env='PROPERTY_NL_UW_MODEL') ADDRESS_DETECTION_LAXONS: str = "ml_models/address_detection/laxons.json" ADDRESS_DETECTION_TERMS: str = "ml_models/address_detection/terms.json" ADDRESS_DETECTION_BROKER_ADDRESSES: str = "ml_models/address_detection/broker_addresses.json" LAYOUTLM_MODEL: str = "ml_models/layoutlm/layoutlm_model.pth" LAYOUTLM_LABEL_MAPPING: str = "ml_models/layoutlm/labels_mapping.json" LAYOUTLM_TOKENIZER: str = "ml_models/layoutlm/tokenizer" ADDRESS_DETECTION_MAX_LEN: int = 60 ADDRESS_INDEX_MIN: int = 40 DEEPPARSE_ROOT_DIR: str = "ml_models/deepparse" TSI_THRESHOLD: int = 100000 BROKER_MODEL: dict = { 'CRF_BROKER_MODEL_PATH': r"source/services/ner_crf/model/crf/30_Nov_2023-14h-broker_pycrf.crfsuite", 'WORD_POSITION': 1, #'POS_POSITION': 2, 'LEMMA_POSITION': 2, #'NER_POSITION': 3 } si_model: dict = { 'CRF_SI_MODEL_PATH': r"ml_models/si/crf_23_Jun_2022-11h_inclu_lemma_n_amount_with_eur_gt10k_amount.joblib", 'WORD_POSITION': 1, 'LEMMA_POSITION': 2, 'NER_POSITION': 3, 'POS_POSITION': 4 } #spacy_pretrained_model_nl_sm = spacy.load('nl_core_news_sm') spacy_pretrained_model_nl_md = spacy.load('nl_core_news_md') layoutlm_config: dict = {'local_rank': -1, 'overwrite_cache': True, 'max_seq_length': 512, 'model_type': 'layoutlm', 'cls_token_box': [0, 0, 0, 0], 'sep_token_box': [1000, 1000, 1000, 1000], 'pad_token_box': [0, 0, 0, 0]} def loss_ratio_params(): url = "http://0.0.0.0:3000/claim-experience-risk-level/" login = "clerk" pw = "asdfgh" return url, login, pw @lru_cache() def get_weight_catalog(): weight_catalog = WeightCatalog() # PROPERTY BE WEIGHTS weight_catalog.set_weights( LineOfBusiness.property, CountryCode.belgium, 'POLICYHOLDER', Weights(subject=0.7, body=0.2, attachment=0.1)) weight_catalog.set_weights( LineOfBusiness.property, CountryCode.belgium, 'BROKER', Weights(subject=0.1, body=0.6, attachment=0.2)) # PROPERTY NL WEIGHTS weight_catalog.set_weights( LineOfBusiness.property, CountryCode.netherlands, 'POLICYHOLDER', Weights(subject=0.7, body=0.2, attachment=0.1)) weight_catalog.set_weights( LineOfBusiness.property, CountryCode.netherlands, 'BROKER', Weights(subject=0.1, body=0.6, attachment=0.2)) return weight_catalog @lru_cache() def get_settings(): return Settings() # Instantiate the settings settings = get_settings()