duoquote commited on
Commit
d1aed23
1 Parent(s): e50ef50

Update model files and tokenizer configuration

Browse files
Files changed (8) hide show
  1. README.md +36 -36
  2. config.json +66 -36
  3. model/config.json +66 -36
  4. model/model.safetensors +2 -2
  5. model/tokenizer.json +16 -2
  6. model/training_args.bin +1 -1
  7. predict.py +4 -9
  8. train.py +19 -16
README.md CHANGED
@@ -30,55 +30,55 @@ The model is based on [dbmdz/bert-base-turkish-cased](https://huggingface.co/dbm
30
  ```
31
  (g:\projects\address-extraction\venv) G:\projects\address-extraction>python predict.py
32
  Osmangazi Mahallesi, Hoca Ahmet Yesevi Cd. No:34, 16050 Osmangazi/Bursa
33
- Osmangazi Mahalle 98.65%
34
- Hoca Ahmet Yesevi Cadde 97.63%
35
- 34 Bina Numarası 98.92%
36
- 16050 Posta Kodu 97.83%
37
- Osmangazi İlçe 98.97%
38
  Bursa İl 99.21%
39
- Average Score: 0.9902257982053255
40
  Labels Found: 6
41
  ----------------------------------------------------------------------
42
  Karşıyaka Mahallesi, Mavişehir Caddesi No: 91, Daire 4, 35540 Karşıyaka/İzmir
43
- Karşıyaka Mahalle 99.11%
44
- Mavişehir Cadde 97.16%
45
- 91 Bina Numarası 98.73%
46
- 4 Kat 29.06%
47
- 35540 Posta Kodu 98.65%
48
- Karşıyaka İlçe 99.17%
49
- İzmir İl 99.16%
50
- Average Score: 0.9237866433043229
51
  Labels Found: 7
52
  ----------------------------------------------------------------------
53
  Selçuklu Mahallesi, Atatürk Bulvarı No: 55, 42050 Selçuklu/Konya
54
- Selçuklu Mahalle 98.67%
55
- Atatürk Cadde 57.06%
56
- 55 Bina Numarası 98.94%
57
- 42050 Posta Kodu 98.81%
58
- Selçuklu İlçe 99.06%
59
- Konya İl 99.22%
60
- Average Score: 0.9659512996673584
61
  Labels Found: 6
62
  ----------------------------------------------------------------------
63
  Alsancak Mahallesi, 1475. Sk. No:3, 35220 Konak/İzmir
64
- Alsancak Mahalle 99.38%
65
- 1475 Sokak 96.04%
66
- 3 Bina Numarası 98.06%
67
- 35220 Posta Kodu 98.75%
68
- Konak İlçe 99.23%
69
- İzmir İl 99.16%
70
- Average Score: 0.9909308176291617
71
  Labels Found: 6
72
  ----------------------------------------------------------------------
73
  Kocatepe Mahallesi, Yaşam Caddesi 3. Sokak No:4, 06420 Bayrampaşa/İstanbul
74
- Kocatepe Mahalle 99.46%
75
- Yaşam Cadde 94.07%
76
- 3 Sokak 84.07%
77
- 4 Bina Numarası 98.42%
78
- 06420 Posta Kodu 98.54%
79
- Bayrampaşa İlçe 98.97%
80
- İstanbul İl 98.98%
81
- Average Score: 0.9832726591511777
82
  Labels Found: 7
83
  ----------------------------------------------------------------------
84
  ```
 
30
  ```
31
  (g:\projects\address-extraction\venv) G:\projects\address-extraction>python predict.py
32
  Osmangazi Mahallesi, Hoca Ahmet Yesevi Cd. No:34, 16050 Osmangazi/Bursa
33
+ Osmangazi Mahalle 98.80%
34
+ Hoca Ahmet Yesevi Cadde 98.55%
35
+ 34 Bina Numarası 99.50%
36
+ 16050 Posta Kodu 98.49%
37
+ Osmangazi İlçe 98.71%
38
  Bursa İl 99.21%
39
+ Average Score: 0.9874102413654328
40
  Labels Found: 6
41
  ----------------------------------------------------------------------
42
  Karşıyaka Mahallesi, Mavişehir Caddesi No: 91, Daire 4, 35540 Karşıyaka/İzmir
43
+ Karşıyaka Mahalle 98.93%
44
+ Mavişehir Cadde 96.90%
45
+ 91 Bina Numarası 99.25%
46
+ 4 Bina Numarası 30.75%
47
+ 35540 Posta Kodu 98.97%
48
+ Karşıyaka İlçe 98.84%
49
+ İzmir İl 98.86%
50
+ Average Score: 0.9173339426517486
51
  Labels Found: 7
52
  ----------------------------------------------------------------------
53
  Selçuklu Mahallesi, Atatürk Bulvarı No: 55, 42050 Selçuklu/Konya
54
+ Selçuklu Mahalle 98.53%
55
+ Atatürk Cadde 47.01%
56
+ 55 Bina Numarası 99.49%
57
+ 42050 Posta Kodu 98.78%
58
+ Selçuklu İlçe 98.74%
59
+ Konya İl 99.16%
60
+ Average Score: 0.9240859523415565
61
  Labels Found: 6
62
  ----------------------------------------------------------------------
63
  Alsancak Mahallesi, 1475. Sk. No:3, 35220 Konak/İzmir
64
+ Alsancak Mahalle 99.35%
65
+ 1475 Sokak 97.71%
66
+ 3 Bina Numarası 99.18%
67
+ 35220 Posta Kodu 99.00%
68
+ Konak İlçe 98.90%
69
+ İzmir İl 98.95%
70
+ Average Score: 0.9881603717803955
71
  Labels Found: 6
72
  ----------------------------------------------------------------------
73
  Kocatepe Mahallesi, Yaşam Caddesi 3. Sokak No:4, 06420 Bayrampaşa/İstanbul
74
+ Kocatepe Mahalle 99.44%
75
+ Yaşam Cadde 92.45%
76
+ 3 Sokak 70.61%
77
+ 4 Bina Numarası 99.18%
78
+ 06420 Posta Kodu 99.00%
79
+ Bayrampaşa İlçe 98.86%
80
+ İstanbul İl 98.90%
81
+ Average Score: 0.9558616995811462
82
  Labels Found: 7
83
  ----------------------------------------------------------------------
84
  ```
config.json CHANGED
@@ -9,46 +9,76 @@
9
  "hidden_dropout_prob": 0.1,
10
  "hidden_size": 768,
11
  "id2label": {
12
- "0": "[PAD]",
13
- "1": "\u00dclke",
14
- "2": "\u0130l",
15
- "3": "\u0130l\u00e7e",
16
- "4": "Mahalle",
17
- "5": "Cadde",
18
- "6": "Sokak",
19
- "7": "Bina Ad\u0131",
20
- "8": "Bina Numaras\u0131",
21
- "9": "Yer Ad\u0131",
22
- "10": "Site",
23
- "11": "Adres Detay",
24
- "12": "Blok No",
25
- "13": "Bulvar",
26
- "14": "Daire No",
27
- "15": "Posta Kodu",
28
- "16": "Kat",
29
- "17": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  },
31
  "initializer_range": 0.02,
32
  "intermediate_size": 3072,
33
  "label2id": {
34
- "Adres Detay": 11,
35
- "Bina Ad\u0131": 7,
36
- "Bina Numaras\u0131": 8,
37
- "Blok No": 12,
38
- "Bulvar": 13,
39
- "Cadde": 5,
40
- "Daire No": 14,
41
- "Kat": 16,
42
- "Mahalle": 4,
43
- "Posta Kodu": 15,
44
- "Site": 10,
45
- "Sokak": 6,
46
- "Yer Ad\u0131": 9,
47
- "[PAD]": 0,
48
- "[UNK]": 17,
49
- "\u00dclke": 1,
50
- "\u0130l": 2,
51
- "\u0130l\u00e7e": 3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  },
53
  "layer_norm_eps": 1e-12,
54
  "max_position_embeddings": 512,
 
9
  "hidden_dropout_prob": 0.1,
10
  "hidden_size": 768,
11
  "id2label": {
12
+ "0": "O",
13
+ "1": "B-\u00dclke",
14
+ "2": "I-\u00dclke",
15
+ "3": "B-\u0130l",
16
+ "4": "I-\u0130l",
17
+ "5": "B-\u0130l\u00e7e",
18
+ "6": "I-\u0130l\u00e7e",
19
+ "7": "B-Mahalle",
20
+ "8": "I-Mahalle",
21
+ "9": "B-Cadde",
22
+ "10": "I-Cadde",
23
+ "11": "B-Sokak",
24
+ "12": "I-Sokak",
25
+ "13": "B-Bina Ad\u0131",
26
+ "14": "I-Bina Ad\u0131",
27
+ "15": "B-Bina Numaras\u0131",
28
+ "16": "I-Bina Numaras\u0131",
29
+ "17": "B-Yer Ad\u0131",
30
+ "18": "I-Yer Ad\u0131",
31
+ "19": "B-Site",
32
+ "20": "I-Site",
33
+ "21": "B-Adres Detay",
34
+ "22": "I-Adres Detay",
35
+ "23": "B-Blok No",
36
+ "24": "I-Blok No",
37
+ "25": "B-Bulvar",
38
+ "26": "I-Bulvar",
39
+ "27": "B-Daire No",
40
+ "28": "I-Daire No",
41
+ "29": "B-Posta Kodu",
42
+ "30": "I-Posta Kodu",
43
+ "31": "B-Kat",
44
+ "32": "I-Kat"
45
  },
46
  "initializer_range": 0.02,
47
  "intermediate_size": 3072,
48
  "label2id": {
49
+ "B-Adres Detay": 21,
50
+ "B-Bina Ad\u0131": 13,
51
+ "B-Bina Numaras\u0131": 15,
52
+ "B-Blok No": 23,
53
+ "B-Bulvar": 25,
54
+ "B-Cadde": 9,
55
+ "B-Daire No": 27,
56
+ "B-Kat": 31,
57
+ "B-Mahalle": 7,
58
+ "B-Posta Kodu": 29,
59
+ "B-Site": 19,
60
+ "B-Sokak": 11,
61
+ "B-Yer Ad\u0131": 17,
62
+ "B-\u00dclke": 1,
63
+ "B-\u0130l": 3,
64
+ "B-\u0130l\u00e7e": 5,
65
+ "I-Adres Detay": 22,
66
+ "I-Bina Ad\u0131": 14,
67
+ "I-Bina Numaras\u0131": 16,
68
+ "I-Blok No": 24,
69
+ "I-Bulvar": 26,
70
+ "I-Cadde": 10,
71
+ "I-Daire No": 28,
72
+ "I-Kat": 32,
73
+ "I-Mahalle": 8,
74
+ "I-Posta Kodu": 30,
75
+ "I-Site": 20,
76
+ "I-Sokak": 12,
77
+ "I-Yer Ad\u0131": 18,
78
+ "I-\u00dclke": 2,
79
+ "I-\u0130l": 4,
80
+ "I-\u0130l\u00e7e": 6,
81
+ "O": 0
82
  },
83
  "layer_norm_eps": 1e-12,
84
  "max_position_embeddings": 512,
model/config.json CHANGED
@@ -9,46 +9,76 @@
9
  "hidden_dropout_prob": 0.1,
10
  "hidden_size": 768,
11
  "id2label": {
12
- "0": "[PAD]",
13
- "1": "\u00dclke",
14
- "2": "\u0130l",
15
- "3": "\u0130l\u00e7e",
16
- "4": "Mahalle",
17
- "5": "Cadde",
18
- "6": "Sokak",
19
- "7": "Bina Ad\u0131",
20
- "8": "Bina Numaras\u0131",
21
- "9": "Yer Ad\u0131",
22
- "10": "Site",
23
- "11": "Adres Detay",
24
- "12": "Blok No",
25
- "13": "Bulvar",
26
- "14": "Daire No",
27
- "15": "Posta Kodu",
28
- "16": "Kat",
29
- "17": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  },
31
  "initializer_range": 0.02,
32
  "intermediate_size": 3072,
33
  "label2id": {
34
- "Adres Detay": 11,
35
- "Bina Ad\u0131": 7,
36
- "Bina Numaras\u0131": 8,
37
- "Blok No": 12,
38
- "Bulvar": 13,
39
- "Cadde": 5,
40
- "Daire No": 14,
41
- "Kat": 16,
42
- "Mahalle": 4,
43
- "Posta Kodu": 15,
44
- "Site": 10,
45
- "Sokak": 6,
46
- "Yer Ad\u0131": 9,
47
- "[PAD]": 0,
48
- "[UNK]": 17,
49
- "\u00dclke": 1,
50
- "\u0130l": 2,
51
- "\u0130l\u00e7e": 3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  },
53
  "layer_norm_eps": 1e-12,
54
  "max_position_embeddings": 512,
 
9
  "hidden_dropout_prob": 0.1,
10
  "hidden_size": 768,
11
  "id2label": {
12
+ "0": "O",
13
+ "1": "B-\u00dclke",
14
+ "2": "I-\u00dclke",
15
+ "3": "B-\u0130l",
16
+ "4": "I-\u0130l",
17
+ "5": "B-\u0130l\u00e7e",
18
+ "6": "I-\u0130l\u00e7e",
19
+ "7": "B-Mahalle",
20
+ "8": "I-Mahalle",
21
+ "9": "B-Cadde",
22
+ "10": "I-Cadde",
23
+ "11": "B-Sokak",
24
+ "12": "I-Sokak",
25
+ "13": "B-Bina Ad\u0131",
26
+ "14": "I-Bina Ad\u0131",
27
+ "15": "B-Bina Numaras\u0131",
28
+ "16": "I-Bina Numaras\u0131",
29
+ "17": "B-Yer Ad\u0131",
30
+ "18": "I-Yer Ad\u0131",
31
+ "19": "B-Site",
32
+ "20": "I-Site",
33
+ "21": "B-Adres Detay",
34
+ "22": "I-Adres Detay",
35
+ "23": "B-Blok No",
36
+ "24": "I-Blok No",
37
+ "25": "B-Bulvar",
38
+ "26": "I-Bulvar",
39
+ "27": "B-Daire No",
40
+ "28": "I-Daire No",
41
+ "29": "B-Posta Kodu",
42
+ "30": "I-Posta Kodu",
43
+ "31": "B-Kat",
44
+ "32": "I-Kat"
45
  },
46
  "initializer_range": 0.02,
47
  "intermediate_size": 3072,
48
  "label2id": {
49
+ "B-Adres Detay": 21,
50
+ "B-Bina Ad\u0131": 13,
51
+ "B-Bina Numaras\u0131": 15,
52
+ "B-Blok No": 23,
53
+ "B-Bulvar": 25,
54
+ "B-Cadde": 9,
55
+ "B-Daire No": 27,
56
+ "B-Kat": 31,
57
+ "B-Mahalle": 7,
58
+ "B-Posta Kodu": 29,
59
+ "B-Site": 19,
60
+ "B-Sokak": 11,
61
+ "B-Yer Ad\u0131": 17,
62
+ "B-\u00dclke": 1,
63
+ "B-\u0130l": 3,
64
+ "B-\u0130l\u00e7e": 5,
65
+ "I-Adres Detay": 22,
66
+ "I-Bina Ad\u0131": 14,
67
+ "I-Bina Numaras\u0131": 16,
68
+ "I-Blok No": 24,
69
+ "I-Bulvar": 26,
70
+ "I-Cadde": 10,
71
+ "I-Daire No": 28,
72
+ "I-Kat": 32,
73
+ "I-Mahalle": 8,
74
+ "I-Posta Kodu": 30,
75
+ "I-Site": 20,
76
+ "I-Sokak": 12,
77
+ "I-Yer Ad\u0131": 18,
78
+ "I-\u00dclke": 2,
79
+ "I-\u0130l": 4,
80
+ "I-\u0130l\u00e7e": 6,
81
+ "O": 0
82
  },
83
  "layer_norm_eps": 1e-12,
84
  "max_position_embeddings": 512,
model/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f61796d22b89ac6c4b5bf7cd5932198148f721b23b684a10950709b692328c6
3
- size 440185728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ff0f793d2c61260659c6a327c27dd0ea1d632bc0e5fc51da60d20d3caf3f7f3
3
+ size 440231868
model/tokenizer.json CHANGED
@@ -1,7 +1,21 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 128,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 128
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 0,
16
+ "pad_type_id": 0,
17
+ "pad_token": "[PAD]"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
model/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:300abae98dafa01f4daa08ba322e5f0ec434e9a6823866fb12dde9fb1397ba62
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:551b4a0b8523f76d65879932b7a7ba98935984c8de39d14af0fd2659e2aadadc
3
  size 4664
predict.py CHANGED
@@ -5,9 +5,6 @@ from transformers import BertTokenizerFast, AutoTokenizer
5
 
6
  tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
7
 
8
- with open("labels.json", "r") as f:
9
- id_to_label = {int(k): v for k, v in orjson.loads(f.read()).items()}
10
-
11
  nlp = pipeline(
12
  "ner",
13
  model="./model",
@@ -19,20 +16,18 @@ def get_entities(tokens):
19
  entities = []
20
  entity = None
21
  for token in tokens:
22
- label_id = int(token["entity"][6:])
23
- label = id_to_label[label_id]
24
- if label.startswith("B-"):
25
  if entity:
26
  entity["score"] /= entity["token_count"]
27
  entities.append(entity)
28
  entity = {
29
- "label": label[2:],
30
  "ranges": [token["start"], token["end"]],
31
  "score": token["score"],
32
  "token_count": 1,
33
  }
34
- elif label.startswith("I-"):
35
- if entity and entity["label"] == label[2:]:
36
  entity["ranges"][1] = token["end"]
37
  entity["token_count"] += 1
38
  entity["score"] += token["score"]
 
5
 
6
  tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
7
 
 
 
 
8
  nlp = pipeline(
9
  "ner",
10
  model="./model",
 
16
  entities = []
17
  entity = None
18
  for token in tokens:
19
+ if token["entity"].startswith("B-"):
 
 
20
  if entity:
21
  entity["score"] /= entity["token_count"]
22
  entities.append(entity)
23
  entity = {
24
+ "label": token["entity"][2:],
25
  "ranges": [token["start"], token["end"]],
26
  "score": token["score"],
27
  "token_count": 1,
28
  }
29
+ elif token["entity"].startswith("I-"):
30
+ if entity and entity["label"] == token["entity"][2:]:
31
  entity["ranges"][1] = token["end"]
32
  entity["token_count"] += 1
33
  entity["score"] += token["score"]
train.py CHANGED
@@ -1,4 +1,5 @@
1
  import io
 
2
  import requests
3
  import json
4
  import time
@@ -63,14 +64,11 @@ def load_data():
63
  return labels, [orjson.loads(line) for line in data.split("\n") if line]
64
 
65
  labels, data = load_data()
66
- # label_to_id = {}
67
- # for i, label in enumerate(labels):
68
- # label_to_id["B-" + label["text"]] = i * 2 + 1
69
- # label_to_id["I-" + label["text"]] = i * 2 + 2
70
- # label_to_id["O"] = 0
71
- label_to_id = {label["text"]: i + 1 for i, label in enumerate(labels)}
72
- label_to_id["[PAD]"] = 0
73
- label_to_id["[UNK]"] = len(label_to_id)
74
  id_to_label = {v: k for k, v in label_to_id.items()}
75
 
76
  tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
@@ -97,18 +95,20 @@ def preprocess_data(item, tokenizer, label_to_id):
97
  attention_mask = inputs["attention_mask"]
98
  offset_mapping = inputs["offset_mapping"]
99
 
100
- labels = ["[PAD]"] * 128
 
101
  for token_idx, [off_start, off_end] in enumerate(offset_mapping[0]):
102
  if off_start == off_end:
103
  continue
104
 
105
  for start, end, label in item['label']:
106
  if start <= off_start and off_end <= end:
107
- labels[token_idx] = label
 
 
 
 
108
  break
109
-
110
- if labels[token_idx] == "[PAD]":
111
- labels[token_idx] = "[UNK]"
112
 
113
  # Convert labels to ids
114
  labels = [label_to_id[label] for label in labels]
@@ -132,6 +132,7 @@ class AddressDataset(Dataset):
132
  return {key: torch.tensor(val) for key, val in item.items()}
133
 
134
 
 
135
  dataset = Dataset.from_generator(
136
  lambda: (preprocess_data(item, tokenizer, label_to_id) for item in data),
137
  )
@@ -166,8 +167,8 @@ def compute_metrics(pred, id_to_label):
166
  labels = [[id_to_label[label_id] for label_id in label_ids] for label_ids in labels]
167
  preds = [[id_to_label[pred] for pred in preds] for preds in preds]
168
 
169
- labels = [set(label) for label in labels]
170
- preds = [set(pred) for pred in preds]
171
 
172
  mlb = MultiLabelBinarizer()
173
  mlb.fit([id_to_label.values()])
@@ -194,4 +195,6 @@ trainer = Trainer(
194
  trainer.train()
195
  trainer.evaluate()
196
 
197
- trainer.save_model("./model")
 
 
 
1
  import io
2
+ import shutil
3
  import requests
4
  import json
5
  import time
 
64
  return labels, [orjson.loads(line) for line in data.split("\n") if line]
65
 
66
  labels, data = load_data()
67
+ label_to_id = {}
68
+ for i, label in enumerate(labels):
69
+ label_to_id["B-" + label["text"]] = i * 2 + 1
70
+ label_to_id["I-" + label["text"]] = i * 2 + 2
71
+ label_to_id["O"] = 0
 
 
 
72
  id_to_label = {v: k for k, v in label_to_id.items()}
73
 
74
  tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
 
95
  attention_mask = inputs["attention_mask"]
96
  offset_mapping = inputs["offset_mapping"]
97
 
98
+ labels = ["O"] * 128
99
+ last_label = "O"
100
  for token_idx, [off_start, off_end] in enumerate(offset_mapping[0]):
101
  if off_start == off_end:
102
  continue
103
 
104
  for start, end, label in item['label']:
105
  if start <= off_start and off_end <= end:
106
+ if last_label == label:
107
+ labels[token_idx] = "I-" + label
108
+ else:
109
+ labels[token_idx] = "B-" + label
110
+ last_label = label
111
  break
 
 
 
112
 
113
  # Convert labels to ids
114
  labels = [label_to_id[label] for label in labels]
 
132
  return {key: torch.tensor(val) for key, val in item.items()}
133
 
134
 
135
+
136
  dataset = Dataset.from_generator(
137
  lambda: (preprocess_data(item, tokenizer, label_to_id) for item in data),
138
  )
 
167
  labels = [[id_to_label[label_id] for label_id in label_ids] for label_ids in labels]
168
  preds = [[id_to_label[pred] for pred in preds] for preds in preds]
169
 
170
+ labels = [label for label in labels if label != "O"]
171
+ preds = [pred for pred in preds if pred != "O"]
172
 
173
  mlb = MultiLabelBinarizer()
174
  mlb.fit([id_to_label.values()])
 
195
  trainer.train()
196
  trainer.evaluate()
197
 
198
+ trainer.save_model("./model")
199
+
200
+ shutil.copy("./model/config.json", "./config.json")