Spaces:
Runtime error
Runtime error
update config
Browse files- configuration/config.py +3 -80
- gradio_ner.py +3 -3
- requirements.txt +2 -1
configuration/config.py
CHANGED
@@ -1,88 +1,11 @@
|
|
1 |
-
import spacy
|
2 |
from functools import lru_cache
|
3 |
-
from
|
4 |
-
from source.datamodel.common import CountryCode, LineOfBusiness
|
5 |
-
from source.datamodel.annotation_ranking import Weights, WeightCatalog
|
6 |
|
7 |
|
8 |
class Settings(BaseSettings):
|
9 |
SERVER_HOST: str = '0.0.0.0'
|
10 |
-
PORT: int =
|
11 |
-
|
12 |
-
SLEEP_DURATION = 1e-4 # 0.1 ms sleep
|
13 |
-
APP_NAME: str = "MIRA MODELS"
|
14 |
-
MIRA_MODELS_BLOB_PATH: str = "Mira/ml_models"
|
15 |
-
LOCAL_MIRA_MODELS: str = "ml_models"
|
16 |
-
MIRA_INTENT_MODEL: str = "ml_models/intent_classifier/2021-04-09"
|
17 |
-
MARINE_NL_NER_MODEL: str = "ml_models/ner_marine_nl/2021-04-09"
|
18 |
-
MARINE_NL_RB_MODEL: str = "ml_models/ner_marine_nl/rule_based_annotator/rb_annotator.pkl"
|
19 |
-
PROPERTY_NL_NER_MODEL: str = "ml_models/ner_property_nl/ner_v10"
|
20 |
-
PROPERTY_BE_NER_MODEL: str = "ml_models/ner_property_be/ner_v10"
|
21 |
-
PROPERTY_BE_UW_MODEL: str = Field("ml_models/ner_property_be/uw_property_be_dev", env='PROPERTY_BE_UW_MODEL')
|
22 |
-
PROPERTY_NL_UW_MODEL: str = Field("ml_models/ner_property_nl/uw_property_nl_dev", env='PROPERTY_NL_UW_MODEL')
|
23 |
-
ADDRESS_DETECTION_LAXONS: str = "ml_models/address_detection/laxons.json"
|
24 |
-
ADDRESS_DETECTION_TERMS: str = "ml_models/address_detection/terms.json"
|
25 |
-
ADDRESS_DETECTION_BROKER_ADDRESSES: str = "ml_models/address_detection/broker_addresses.json"
|
26 |
-
LAYOUTLM_MODEL: str = "ml_models/layoutlm/layoutlm_model.pth"
|
27 |
-
LAYOUTLM_LABEL_MAPPING: str = "ml_models/layoutlm/labels_mapping.json"
|
28 |
-
LAYOUTLM_TOKENIZER: str = "ml_models/layoutlm/tokenizer"
|
29 |
-
ADDRESS_DETECTION_MAX_LEN: int = 60
|
30 |
-
ADDRESS_INDEX_MIN: int = 40
|
31 |
-
DEEPPARSE_ROOT_DIR: str = "ml_models/deepparse"
|
32 |
-
TSI_THRESHOLD: int = 100000
|
33 |
-
BROKER_MODEL: dict = {
|
34 |
-
'CRF_BROKER_MODEL_PATH': r"source/services/ner_crf/model/crf/30_Nov_2023-14h-broker_pycrf.crfsuite",
|
35 |
-
'WORD_POSITION': 1,
|
36 |
-
#'POS_POSITION': 2,
|
37 |
-
'LEMMA_POSITION': 2,
|
38 |
-
#'NER_POSITION': 3
|
39 |
-
}
|
40 |
-
si_model: dict = {
|
41 |
-
'CRF_SI_MODEL_PATH': r"ml_models/si/crf_23_Jun_2022-11h_inclu_lemma_n_amount_with_eur_gt10k_amount.joblib",
|
42 |
-
'WORD_POSITION': 1,
|
43 |
-
'LEMMA_POSITION': 2,
|
44 |
-
'NER_POSITION': 3,
|
45 |
-
'POS_POSITION': 4
|
46 |
-
}
|
47 |
-
#spacy_pretrained_model_nl_sm = spacy.load('nl_core_news_sm')
|
48 |
-
spacy_pretrained_model_nl_md = spacy.load('nl_core_news_md')
|
49 |
-
layoutlm_config: dict = {'local_rank': -1,
|
50 |
-
'overwrite_cache': True,
|
51 |
-
'max_seq_length': 512,
|
52 |
-
'model_type': 'layoutlm',
|
53 |
-
'cls_token_box': [0, 0, 0, 0],
|
54 |
-
'sep_token_box': [1000, 1000, 1000, 1000],
|
55 |
-
'pad_token_box': [0, 0, 0, 0]}
|
56 |
-
|
57 |
-
|
58 |
-
def loss_ratio_params():
|
59 |
-
url = "http://0.0.0.0:3000/claim-experience-risk-level/"
|
60 |
-
login = "clerk"
|
61 |
-
pw = "asdfgh"
|
62 |
-
return url, login, pw
|
63 |
-
|
64 |
-
|
65 |
-
@lru_cache()
|
66 |
-
def get_weight_catalog():
|
67 |
-
weight_catalog = WeightCatalog()
|
68 |
-
|
69 |
-
# PROPERTY BE WEIGHTS
|
70 |
-
weight_catalog.set_weights(
|
71 |
-
LineOfBusiness.property, CountryCode.belgium, 'POLICYHOLDER',
|
72 |
-
Weights(subject=0.7, body=0.2, attachment=0.1))
|
73 |
-
weight_catalog.set_weights(
|
74 |
-
LineOfBusiness.property, CountryCode.belgium, 'BROKER',
|
75 |
-
Weights(subject=0.1, body=0.6, attachment=0.2))
|
76 |
-
|
77 |
-
# PROPERTY NL WEIGHTS
|
78 |
-
weight_catalog.set_weights(
|
79 |
-
LineOfBusiness.property, CountryCode.netherlands, 'POLICYHOLDER',
|
80 |
-
Weights(subject=0.7, body=0.2, attachment=0.1))
|
81 |
-
weight_catalog.set_weights(
|
82 |
-
LineOfBusiness.property, CountryCode.netherlands, 'BROKER',
|
83 |
-
Weights(subject=0.1, body=0.6, attachment=0.2))
|
84 |
-
|
85 |
-
return weight_catalog
|
86 |
|
87 |
|
88 |
@lru_cache()
|
|
|
|
|
1 |
from functools import lru_cache
|
2 |
+
from pydantic_settings import BaseSettings
|
|
|
|
|
3 |
|
4 |
|
5 |
class Settings(BaseSettings):
|
6 |
SERVER_HOST: str = '0.0.0.0'
|
7 |
+
PORT: int = 7860
|
8 |
+
MODEL_CHECKPOINT: str = "aimlnerd/bert-finetuned-legalentity-ner-accelerate"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
|
11 |
@lru_cache()
|
gradio_ner.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import logging
|
3 |
from transformers import pipeline
|
4 |
-
|
5 |
examples = [
|
6 |
"""
|
7 |
Notice of Representation
|
@@ -73,7 +73,7 @@ Notice of Representation
|
|
73 |
John D Locke, Esq"""
|
74 |
]
|
75 |
# Replace this with your own checkpoint
|
76 |
-
model_checkpoint =
|
77 |
ner_pipeline = pipeline(
|
78 |
"token-classification", model=model_checkpoint, aggregation_strategy="simple"
|
79 |
)
|
@@ -89,4 +89,4 @@ demo = gr.Interface(ner,
|
|
89 |
gr.HighlightedText(),
|
90 |
examples=examples)
|
91 |
|
92 |
-
demo.launch(server_name=
|
|
|
1 |
import gradio as gr
|
2 |
import logging
|
3 |
from transformers import pipeline
|
4 |
+
from configuration.config import settings
|
5 |
examples = [
|
6 |
"""
|
7 |
Notice of Representation
|
|
|
73 |
John D Locke, Esq"""
|
74 |
]
|
75 |
# Replace this with your own checkpoint
|
76 |
+
model_checkpoint = settings.MODEL_CHECKPOINT
|
77 |
ner_pipeline = pipeline(
|
78 |
"token-classification", model=model_checkpoint, aggregation_strategy="simple"
|
79 |
)
|
|
|
89 |
gr.HighlightedText(),
|
90 |
examples=examples)
|
91 |
|
92 |
+
demo.launch(server_name=settings.SERVER_HOST, server_port=settings.PORT)
|
requirements.txt
CHANGED
@@ -7,4 +7,5 @@ evaluate==0.4.1
|
|
7 |
accelerate==0.25.0
|
8 |
seqeval==1.2.2
|
9 |
pandas==2.1.4
|
10 |
-
gradio==4.13.0
|
|
|
|
7 |
accelerate==0.25.0
|
8 |
seqeval==1.2.2
|
9 |
pandas==2.1.4
|
10 |
+
gradio==4.13.0
|
11 |
+
pydantic_settings==2.1.0
|