File size: 3,804 Bytes
1d6faef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import spacy
from functools import lru_cache
from pydantic import BaseSettings, Field
from source.datamodel.common import CountryCode, LineOfBusiness
from source.datamodel.annotation_ranking import Weights, WeightCatalog


class Settings(BaseSettings):
    SERVER_HOST: str = '0.0.0.0'
    PORT: int = 3000
    STOP_TIMEOUT = 120
    SLEEP_DURATION = 1e-4  # 0.1 ms sleep
    APP_NAME: str = "MIRA MODELS"
    MIRA_MODELS_BLOB_PATH: str = "Mira/ml_models"
    LOCAL_MIRA_MODELS: str = "ml_models"
    MIRA_INTENT_MODEL: str = "ml_models/intent_classifier/2021-04-09"
    MARINE_NL_NER_MODEL: str = "ml_models/ner_marine_nl/2021-04-09"
    MARINE_NL_RB_MODEL: str = "ml_models/ner_marine_nl/rule_based_annotator/rb_annotator.pkl"
    PROPERTY_NL_NER_MODEL: str = "ml_models/ner_property_nl/ner_v10"
    PROPERTY_BE_NER_MODEL: str = "ml_models/ner_property_be/ner_v10"
    PROPERTY_BE_UW_MODEL: str = Field("ml_models/ner_property_be/uw_property_be_dev", env='PROPERTY_BE_UW_MODEL')
    PROPERTY_NL_UW_MODEL: str = Field("ml_models/ner_property_nl/uw_property_nl_dev", env='PROPERTY_NL_UW_MODEL')
    ADDRESS_DETECTION_LAXONS: str = "ml_models/address_detection/laxons.json"
    ADDRESS_DETECTION_TERMS: str = "ml_models/address_detection/terms.json"
    ADDRESS_DETECTION_BROKER_ADDRESSES: str = "ml_models/address_detection/broker_addresses.json"
    LAYOUTLM_MODEL: str = "ml_models/layoutlm/layoutlm_model.pth"
    LAYOUTLM_LABEL_MAPPING: str = "ml_models/layoutlm/labels_mapping.json"
    LAYOUTLM_TOKENIZER: str = "ml_models/layoutlm/tokenizer"
    ADDRESS_DETECTION_MAX_LEN: int = 60
    ADDRESS_INDEX_MIN: int = 40
    DEEPPARSE_ROOT_DIR: str = "ml_models/deepparse"
    TSI_THRESHOLD: int = 100000
    BROKER_MODEL: dict = {
        'CRF_BROKER_MODEL_PATH': r"source/services/ner_crf/model/crf/30_Nov_2023-14h-broker_pycrf.crfsuite",
        'WORD_POSITION': 1,
        #'POS_POSITION': 2,
        'LEMMA_POSITION': 2,
        #'NER_POSITION': 3
        }
    si_model: dict = {
        'CRF_SI_MODEL_PATH': r"ml_models/si/crf_23_Jun_2022-11h_inclu_lemma_n_amount_with_eur_gt10k_amount.joblib",
        'WORD_POSITION': 1,
        'LEMMA_POSITION': 2,
        'NER_POSITION': 3,
        'POS_POSITION': 4
        }
    #spacy_pretrained_model_nl_sm = spacy.load('nl_core_news_sm')
    spacy_pretrained_model_nl_md = spacy.load('nl_core_news_md')
    layoutlm_config: dict = {'local_rank': -1,
                             'overwrite_cache': True,
                             'max_seq_length': 512,
                             'model_type': 'layoutlm',
                             'cls_token_box': [0, 0, 0, 0],
                             'sep_token_box': [1000, 1000, 1000, 1000],
                             'pad_token_box': [0, 0, 0, 0]}


def loss_ratio_params():
    url = "http://0.0.0.0:3000/claim-experience-risk-level/"
    login = "clerk"
    pw = "asdfgh"
    return url, login, pw


@lru_cache()
def get_weight_catalog():
    weight_catalog = WeightCatalog()

    # PROPERTY BE WEIGHTS
    weight_catalog.set_weights(
        LineOfBusiness.property, CountryCode.belgium, 'POLICYHOLDER',
        Weights(subject=0.7, body=0.2, attachment=0.1))
    weight_catalog.set_weights(
        LineOfBusiness.property, CountryCode.belgium, 'BROKER',
        Weights(subject=0.1, body=0.6, attachment=0.2))

    # PROPERTY NL WEIGHTS
    weight_catalog.set_weights(
        LineOfBusiness.property, CountryCode.netherlands, 'POLICYHOLDER',
        Weights(subject=0.7, body=0.2, attachment=0.1))
    weight_catalog.set_weights(
        LineOfBusiness.property, CountryCode.netherlands, 'BROKER',
        Weights(subject=0.1, body=0.6, attachment=0.2))

    return weight_catalog


@lru_cache()
def get_settings():
    return Settings()


# Instantiate the settings
settings = get_settings()