aimlnerd commited on
Commit
f7558d2
1 Parent(s): c09f7da

update config

Browse files
Files changed (3) hide show
  1. configuration/config.py +3 -80
  2. gradio_ner.py +3 -3
  3. requirements.txt +2 -1
configuration/config.py CHANGED
@@ -1,88 +1,11 @@
1
- import spacy
2
  from functools import lru_cache
3
- from pydantic import BaseSettings, Field
4
- from source.datamodel.common import CountryCode, LineOfBusiness
5
- from source.datamodel.annotation_ranking import Weights, WeightCatalog
6
 
7
 
8
  class Settings(BaseSettings):
9
  SERVER_HOST: str = '0.0.0.0'
10
- PORT: int = 3000
11
- STOP_TIMEOUT = 120
12
- SLEEP_DURATION = 1e-4 # 0.1 ms sleep
13
- APP_NAME: str = "MIRA MODELS"
14
- MIRA_MODELS_BLOB_PATH: str = "Mira/ml_models"
15
- LOCAL_MIRA_MODELS: str = "ml_models"
16
- MIRA_INTENT_MODEL: str = "ml_models/intent_classifier/2021-04-09"
17
- MARINE_NL_NER_MODEL: str = "ml_models/ner_marine_nl/2021-04-09"
18
- MARINE_NL_RB_MODEL: str = "ml_models/ner_marine_nl/rule_based_annotator/rb_annotator.pkl"
19
- PROPERTY_NL_NER_MODEL: str = "ml_models/ner_property_nl/ner_v10"
20
- PROPERTY_BE_NER_MODEL: str = "ml_models/ner_property_be/ner_v10"
21
- PROPERTY_BE_UW_MODEL: str = Field("ml_models/ner_property_be/uw_property_be_dev", env='PROPERTY_BE_UW_MODEL')
22
- PROPERTY_NL_UW_MODEL: str = Field("ml_models/ner_property_nl/uw_property_nl_dev", env='PROPERTY_NL_UW_MODEL')
23
- ADDRESS_DETECTION_LAXONS: str = "ml_models/address_detection/laxons.json"
24
- ADDRESS_DETECTION_TERMS: str = "ml_models/address_detection/terms.json"
25
- ADDRESS_DETECTION_BROKER_ADDRESSES: str = "ml_models/address_detection/broker_addresses.json"
26
- LAYOUTLM_MODEL: str = "ml_models/layoutlm/layoutlm_model.pth"
27
- LAYOUTLM_LABEL_MAPPING: str = "ml_models/layoutlm/labels_mapping.json"
28
- LAYOUTLM_TOKENIZER: str = "ml_models/layoutlm/tokenizer"
29
- ADDRESS_DETECTION_MAX_LEN: int = 60
30
- ADDRESS_INDEX_MIN: int = 40
31
- DEEPPARSE_ROOT_DIR: str = "ml_models/deepparse"
32
- TSI_THRESHOLD: int = 100000
33
- BROKER_MODEL: dict = {
34
- 'CRF_BROKER_MODEL_PATH': r"source/services/ner_crf/model/crf/30_Nov_2023-14h-broker_pycrf.crfsuite",
35
- 'WORD_POSITION': 1,
36
- #'POS_POSITION': 2,
37
- 'LEMMA_POSITION': 2,
38
- #'NER_POSITION': 3
39
- }
40
- si_model: dict = {
41
- 'CRF_SI_MODEL_PATH': r"ml_models/si/crf_23_Jun_2022-11h_inclu_lemma_n_amount_with_eur_gt10k_amount.joblib",
42
- 'WORD_POSITION': 1,
43
- 'LEMMA_POSITION': 2,
44
- 'NER_POSITION': 3,
45
- 'POS_POSITION': 4
46
- }
47
- #spacy_pretrained_model_nl_sm = spacy.load('nl_core_news_sm')
48
- spacy_pretrained_model_nl_md = spacy.load('nl_core_news_md')
49
- layoutlm_config: dict = {'local_rank': -1,
50
- 'overwrite_cache': True,
51
- 'max_seq_length': 512,
52
- 'model_type': 'layoutlm',
53
- 'cls_token_box': [0, 0, 0, 0],
54
- 'sep_token_box': [1000, 1000, 1000, 1000],
55
- 'pad_token_box': [0, 0, 0, 0]}
56
-
57
-
58
- def loss_ratio_params():
59
- url = "http://0.0.0.0:3000/claim-experience-risk-level/"
60
- login = "clerk"
61
- pw = "asdfgh"
62
- return url, login, pw
63
-
64
-
65
- @lru_cache()
66
- def get_weight_catalog():
67
- weight_catalog = WeightCatalog()
68
-
69
- # PROPERTY BE WEIGHTS
70
- weight_catalog.set_weights(
71
- LineOfBusiness.property, CountryCode.belgium, 'POLICYHOLDER',
72
- Weights(subject=0.7, body=0.2, attachment=0.1))
73
- weight_catalog.set_weights(
74
- LineOfBusiness.property, CountryCode.belgium, 'BROKER',
75
- Weights(subject=0.1, body=0.6, attachment=0.2))
76
-
77
- # PROPERTY NL WEIGHTS
78
- weight_catalog.set_weights(
79
- LineOfBusiness.property, CountryCode.netherlands, 'POLICYHOLDER',
80
- Weights(subject=0.7, body=0.2, attachment=0.1))
81
- weight_catalog.set_weights(
82
- LineOfBusiness.property, CountryCode.netherlands, 'BROKER',
83
- Weights(subject=0.1, body=0.6, attachment=0.2))
84
-
85
- return weight_catalog
86
 
87
 
88
  @lru_cache()
 
 
1
  from functools import lru_cache
2
+ from pydantic_settings import BaseSettings
 
 
3
 
4
 
5
  class Settings(BaseSettings):
6
  SERVER_HOST: str = '0.0.0.0'
7
+ PORT: int = 7860
8
+ MODEL_CHECKPOINT: str = "aimlnerd/bert-finetuned-legalentity-ner-accelerate"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
  @lru_cache()
gradio_ner.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import logging
3
  from transformers import pipeline
4
-
5
  examples = [
6
  """
7
  Notice of Representation
@@ -73,7 +73,7 @@ Notice of Representation
73
  John D Locke, Esq"""
74
  ]
75
  # Replace this with your own checkpoint
76
- model_checkpoint = "aimlnerd/bert-finetuned-legalentity-ner-accelerate"
77
  ner_pipeline = pipeline(
78
  "token-classification", model=model_checkpoint, aggregation_strategy="simple"
79
  )
@@ -89,4 +89,4 @@ demo = gr.Interface(ner,
89
  gr.HighlightedText(),
90
  examples=examples)
91
 
92
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
2
  import logging
3
  from transformers import pipeline
4
+ from configuration.config import settings
5
  examples = [
6
  """
7
  Notice of Representation
 
73
  John D Locke, Esq"""
74
  ]
75
  # Replace this with your own checkpoint
76
+ model_checkpoint = settings.MODEL_CHECKPOINT
77
  ner_pipeline = pipeline(
78
  "token-classification", model=model_checkpoint, aggregation_strategy="simple"
79
  )
 
89
  gr.HighlightedText(),
90
  examples=examples)
91
 
92
+ demo.launch(server_name=settings.SERVER_HOST, server_port=settings.PORT)
requirements.txt CHANGED
@@ -7,4 +7,5 @@ evaluate==0.4.1
7
  accelerate==0.25.0
8
  seqeval==1.2.2
9
  pandas==2.1.4
10
- gradio==4.13.0
 
 
7
  accelerate==0.25.0
8
  seqeval==1.2.2
9
  pandas==2.1.4
10
+ gradio==4.13.0
11
+ pydantic_settings==2.1.0