Update spaCy pipeline
Browse files- .gitattributes +8 -0
- README.md +0 -0
- config.cfg +228 -0
- frequency_lemmatizer/config.json +1 -0
- frequency_lemmatizer/lookup.json +0 -0
- frequency_lemmatizer/table.json +3 -0
- grc_dep_treebanks_xlm-any-py3-none-any.whl +3 -0
- lemmatizer.py +271 -0
- meta.json +0 -0
- morphologizer/cfg +0 -0
- morphologizer/model +3 -0
- parser/cfg +13 -0
- parser/model +3 -0
- parser/moves +1 -0
- tagger/cfg +839 -0
- tagger/model +3 -0
- tokenizer +4 -0
- trainable_lemmatizer/cfg +0 -0
- trainable_lemmatizer/model +3 -0
- trainable_lemmatizer/trees +3 -0
- transformer/cfg +3 -0
- transformer/model +3 -0
- vocab/key2row +1 -0
- vocab/lookups.bin +3 -0
- vocab/strings.json +0 -0
- vocab/vectors +0 -0
- vocab/vectors.cfg +3 -0
.gitattributes
CHANGED
@@ -32,3 +32,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
grc_dep_treebanks_xlm-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
|
36 |
+
frequency_lemmatizer/table.json filter=lfs diff=lfs merge=lfs -text
|
37 |
+
morphologizer/model filter=lfs diff=lfs merge=lfs -text
|
38 |
+
trainable_lemmatizer/model filter=lfs diff=lfs merge=lfs -text
|
39 |
+
trainable_lemmatizer/trees filter=lfs diff=lfs merge=lfs -text
|
40 |
+
tagger/model filter=lfs diff=lfs merge=lfs -text
|
41 |
+
parser/model filter=lfs diff=lfs merge=lfs -text
|
42 |
+
transformer/model filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
The diff for this file is too large to render.
See raw diff
|
|
config.cfg
ADDED
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[paths]
|
2 |
+
train = "corpus/joint/train.spacy"
|
3 |
+
test = null
|
4 |
+
dev = "corpus/joint/dev.spacy"
|
5 |
+
vectors = null
|
6 |
+
init_tok2vec = null
|
7 |
+
|
8 |
+
[system]
|
9 |
+
gpu_allocator = "pytorch"
|
10 |
+
seed = 0
|
11 |
+
|
12 |
+
[nlp]
|
13 |
+
lang = "grc"
|
14 |
+
pipeline = ["transformer","tagger","morphologizer","parser","trainable_lemmatizer","frequency_lemmatizer"]
|
15 |
+
batch_size = 8
|
16 |
+
disabled = []
|
17 |
+
before_creation = null
|
18 |
+
after_creation = null
|
19 |
+
after_pipeline_creation = null
|
20 |
+
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
|
21 |
+
|
22 |
+
[components]
|
23 |
+
|
24 |
+
[components.frequency_lemmatizer]
|
25 |
+
factory = "frequency_lemmatizer"
|
26 |
+
fallback_priority = "lookup"
|
27 |
+
overwrite = true
|
28 |
+
|
29 |
+
[components.morphologizer]
|
30 |
+
factory = "morphologizer"
|
31 |
+
extend = false
|
32 |
+
overwrite = true
|
33 |
+
scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
|
34 |
+
|
35 |
+
[components.morphologizer.model]
|
36 |
+
@architectures = "spacy.Tagger.v2"
|
37 |
+
nO = null
|
38 |
+
normalize = false
|
39 |
+
|
40 |
+
[components.morphologizer.model.tok2vec]
|
41 |
+
@architectures = "spacy-transformers.TransformerListener.v1"
|
42 |
+
grad_factor = 1.0
|
43 |
+
pooling = {"@layers":"reduce_mean.v1"}
|
44 |
+
upstream = "*"
|
45 |
+
|
46 |
+
[components.parser]
|
47 |
+
factory = "parser"
|
48 |
+
learn_tokens = false
|
49 |
+
min_action_freq = 30
|
50 |
+
moves = null
|
51 |
+
scorer = {"@scorers":"spacy.parser_scorer.v1"}
|
52 |
+
update_with_oracle_cut_size = 100
|
53 |
+
|
54 |
+
[components.parser.model]
|
55 |
+
@architectures = "spacy.TransitionBasedParser.v2"
|
56 |
+
state_type = "parser"
|
57 |
+
extra_state_tokens = false
|
58 |
+
hidden_width = 128
|
59 |
+
maxout_pieces = 3
|
60 |
+
use_upper = true
|
61 |
+
nO = null
|
62 |
+
|
63 |
+
[components.parser.model.tok2vec]
|
64 |
+
@architectures = "spacy-transformers.TransformerListener.v1"
|
65 |
+
grad_factor = 1.0
|
66 |
+
pooling = {"@layers":"reduce_mean.v1"}
|
67 |
+
upstream = "*"
|
68 |
+
|
69 |
+
[components.tagger]
|
70 |
+
factory = "tagger"
|
71 |
+
neg_prefix = "!"
|
72 |
+
overwrite = false
|
73 |
+
scorer = {"@scorers":"spacy.tagger_scorer.v1"}
|
74 |
+
|
75 |
+
[components.tagger.model]
|
76 |
+
@architectures = "spacy.Tagger.v2"
|
77 |
+
nO = null
|
78 |
+
normalize = false
|
79 |
+
|
80 |
+
[components.tagger.model.tok2vec]
|
81 |
+
@architectures = "spacy-transformers.TransformerListener.v1"
|
82 |
+
grad_factor = 1.0
|
83 |
+
pooling = {"@layers":"reduce_mean.v1"}
|
84 |
+
upstream = "*"
|
85 |
+
|
86 |
+
[components.trainable_lemmatizer]
|
87 |
+
factory = "trainable_lemmatizer"
|
88 |
+
backoff = "orth"
|
89 |
+
min_tree_freq = 1
|
90 |
+
overwrite = false
|
91 |
+
scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
|
92 |
+
top_k = 3
|
93 |
+
|
94 |
+
[components.trainable_lemmatizer.model]
|
95 |
+
@architectures = "spacy.Tagger.v2"
|
96 |
+
nO = null
|
97 |
+
normalize = false
|
98 |
+
|
99 |
+
[components.trainable_lemmatizer.model.tok2vec]
|
100 |
+
@architectures = "spacy-transformers.TransformerListener.v1"
|
101 |
+
grad_factor = 1.0
|
102 |
+
pooling = {"@layers":"reduce_mean.v1"}
|
103 |
+
upstream = "*"
|
104 |
+
|
105 |
+
[components.transformer]
|
106 |
+
factory = "transformer"
|
107 |
+
max_batch_items = 4096
|
108 |
+
set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
|
109 |
+
|
110 |
+
[components.transformer.model]
|
111 |
+
@architectures = "spacy-transformers.TransformerModel.v3"
|
112 |
+
name = "xlm-roberta-base"
|
113 |
+
mixed_precision = false
|
114 |
+
|
115 |
+
[components.transformer.model.get_spans]
|
116 |
+
@span_getters = "spacy-transformers.strided_spans.v1"
|
117 |
+
window = 128
|
118 |
+
stride = 96
|
119 |
+
|
120 |
+
[components.transformer.model.grad_scaler_config]
|
121 |
+
|
122 |
+
[components.transformer.model.tokenizer_config]
|
123 |
+
use_fast = true
|
124 |
+
|
125 |
+
[components.transformer.model.transformer_config]
|
126 |
+
|
127 |
+
[corpora]
|
128 |
+
|
129 |
+
[corpora.dev]
|
130 |
+
@readers = "spacy.Corpus.v1"
|
131 |
+
path = ${paths.dev}
|
132 |
+
max_length = 0
|
133 |
+
gold_preproc = false
|
134 |
+
limit = 0
|
135 |
+
augmenter = null
|
136 |
+
|
137 |
+
[corpora.train]
|
138 |
+
@readers = "spacy.Corpus.v1"
|
139 |
+
path = ${paths.train}
|
140 |
+
max_length = 0
|
141 |
+
gold_preproc = false
|
142 |
+
limit = 0
|
143 |
+
augmenter = null
|
144 |
+
|
145 |
+
[training]
|
146 |
+
accumulate_gradient = 3
|
147 |
+
dev_corpus = "corpora.dev"
|
148 |
+
train_corpus = "corpora.train"
|
149 |
+
seed = ${system.seed}
|
150 |
+
gpu_allocator = ${system.gpu_allocator}
|
151 |
+
dropout = 0.1
|
152 |
+
patience = 1600
|
153 |
+
max_epochs = 0
|
154 |
+
max_steps = 20000
|
155 |
+
eval_frequency = 200
|
156 |
+
frozen_components = []
|
157 |
+
annotating_components = []
|
158 |
+
before_to_disk = null
|
159 |
+
before_update = null
|
160 |
+
|
161 |
+
[training.batcher]
|
162 |
+
@batchers = "spacy.batch_by_padded.v1"
|
163 |
+
discard_oversize = true
|
164 |
+
size = 500
|
165 |
+
buffer = 256
|
166 |
+
get_length = null
|
167 |
+
|
168 |
+
[training.logger]
|
169 |
+
@loggers = "spacy.WandbLogger.v3"
|
170 |
+
project_name = "homerCy"
|
171 |
+
remove_config_values = []
|
172 |
+
model_log_interval = null
|
173 |
+
log_dataset_dir = null
|
174 |
+
entity = null
|
175 |
+
run_name = null
|
176 |
+
|
177 |
+
[training.optimizer]
|
178 |
+
@optimizers = "Adam.v1"
|
179 |
+
beta1 = 0.9
|
180 |
+
beta2 = 0.999
|
181 |
+
L2_is_weight_decay = true
|
182 |
+
L2 = 0.01
|
183 |
+
grad_clip = 1.0
|
184 |
+
use_averages = true
|
185 |
+
eps = 0.00000001
|
186 |
+
|
187 |
+
[training.optimizer.learn_rate]
|
188 |
+
@schedules = "warmup_linear.v1"
|
189 |
+
warmup_steps = 250
|
190 |
+
total_steps = 20000
|
191 |
+
initial_rate = 0.00005
|
192 |
+
|
193 |
+
[training.score_weights]
|
194 |
+
tag_acc = 0.21
|
195 |
+
pos_acc = 0.1
|
196 |
+
morph_acc = 0.1
|
197 |
+
morph_per_feat = null
|
198 |
+
dep_uas = 0.1
|
199 |
+
dep_las = 0.1
|
200 |
+
dep_las_per_type = null
|
201 |
+
sents_p = null
|
202 |
+
sents_r = null
|
203 |
+
sents_f = 0.0
|
204 |
+
lemma_acc = 0.4
|
205 |
+
|
206 |
+
[pretraining]
|
207 |
+
|
208 |
+
[initialize]
|
209 |
+
vectors = ${paths.vectors}
|
210 |
+
init_tok2vec = ${paths.init_tok2vec}
|
211 |
+
vocab_data = null
|
212 |
+
lookups = null
|
213 |
+
before_init = null
|
214 |
+
after_init = null
|
215 |
+
|
216 |
+
[initialize.components]
|
217 |
+
|
218 |
+
[initialize.components.frequency_lemmatizer]
|
219 |
+
|
220 |
+
[initialize.components.frequency_lemmatizer.lookup]
|
221 |
+
@readers = "srsly.read_json.v1"
|
222 |
+
path = "assets/lemmas/lemma_lookup.json"
|
223 |
+
|
224 |
+
[initialize.components.frequency_lemmatizer.table]
|
225 |
+
@readers = "srsly.read_json.v1"
|
226 |
+
path = "assets/lemmas/lemma_table.json"
|
227 |
+
|
228 |
+
[initialize.tokenizer]
|
frequency_lemmatizer/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"overwrite": true, "fallback_priority": "lookup"}
|
frequency_lemmatizer/lookup.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
frequency_lemmatizer/table.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b4678164e1be8f5de58fea33c1a25f57c348012986f69762dc6fde547f955ad
|
3 |
+
size 26885581
|
grc_dep_treebanks_xlm-any-py3-none-any.whl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba03e52d8246f27d441c3db872cc65f100eae01e2e6c95c52f6fcc5b250d58b4
|
3 |
+
size 910823635
|
lemmatizer.py
ADDED
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
from pathlib import Path
|
4 |
+
from typing import Dict, List, Literal, Optional, Union, Iterable
|
5 |
+
from typing_extensions import TypedDict, NotRequired
|
6 |
+
|
7 |
+
from spacy.language import Language
|
8 |
+
from spacy.pipeline import Pipe
|
9 |
+
from spacy.pipeline.lemmatizer import lemmatizer_score
|
10 |
+
from spacy.util import ensure_path
|
11 |
+
from spacy.tokens import Doc, Token
|
12 |
+
|
13 |
+
MATCH_ORDER = [
|
14 |
+
"upos",
|
15 |
+
"Tense",
|
16 |
+
"VerbForm",
|
17 |
+
"Voice",
|
18 |
+
"Case",
|
19 |
+
"Gender",
|
20 |
+
"Number",
|
21 |
+
"Degree",
|
22 |
+
"Mood",
|
23 |
+
"Person",
|
24 |
+
"Aspect",
|
25 |
+
"Definite",
|
26 |
+
"PronType",
|
27 |
+
"Polarity",
|
28 |
+
"Poss",
|
29 |
+
"Reflex",
|
30 |
+
]
|
31 |
+
|
32 |
+
|
33 |
+
class TableEntry(TypedDict):
|
34 |
+
form: str
|
35 |
+
lemma: str
|
36 |
+
upos: str
|
37 |
+
frequency: int
|
38 |
+
Tense: NotRequired[str]
|
39 |
+
VerbForm: NotRequired[str]
|
40 |
+
Voice: NotRequired[str]
|
41 |
+
Case: NotRequired[str]
|
42 |
+
Gender: NotRequired[str]
|
43 |
+
Number: NotRequired[str]
|
44 |
+
Degree: NotRequired[str]
|
45 |
+
Mood: NotRequired[str]
|
46 |
+
Person: NotRequired[str]
|
47 |
+
Aspect: NotRequired[str]
|
48 |
+
Definite: NotRequired[str]
|
49 |
+
PronType: NotRequired[str]
|
50 |
+
Polarity: NotRequired[str]
|
51 |
+
Poss: NotRequired[str]
|
52 |
+
Reflex: NotRequired[str]
|
53 |
+
|
54 |
+
|
55 |
+
FrequencyTable = Dict[str, List[TableEntry]]
|
56 |
+
|
57 |
+
LookupTable = Dict[str, str]
|
58 |
+
|
59 |
+
|
60 |
+
@Language.factory(
|
61 |
+
"frequency_lemmatizer",
|
62 |
+
assigns=["token.lemma"],
|
63 |
+
default_config={
|
64 |
+
"overwrite": True,
|
65 |
+
"fallback_priority": "lookup",
|
66 |
+
},
|
67 |
+
default_score_weights={"lemma_acc": 1.0},
|
68 |
+
)
|
69 |
+
def make_lemmatizer(
|
70 |
+
nlp: Language,
|
71 |
+
name: str,
|
72 |
+
overwrite: bool,
|
73 |
+
fallback_priority: Literal["lemma", "lookup"],
|
74 |
+
):
|
75 |
+
return FrequencyLemmatizer(
|
76 |
+
nlp=nlp,
|
77 |
+
name=name,
|
78 |
+
overwrite=overwrite,
|
79 |
+
fallback_priority=fallback_priority,
|
80 |
+
) # type: ignore
|
81 |
+
|
82 |
+
|
83 |
+
def max_freq_lemma(entries: List[TableEntry]) -> str:
|
84 |
+
"""Returns lemma with highest frequency from the given entries."""
|
85 |
+
max_index = 0
|
86 |
+
n_entries = len(entries)
|
87 |
+
for index in range(1, n_entries):
|
88 |
+
if entries[index]["frequency"] > entries[max_index]["frequency"]:
|
89 |
+
max_index = index
|
90 |
+
return entries[max_index]["lemma"]
|
91 |
+
|
92 |
+
|
93 |
+
def match_lemma(
|
94 |
+
token_entry: TableEntry, table: FrequencyTable
|
95 |
+
) -> Optional[str]:
|
96 |
+
"""Returns a lemma for a token if it
|
97 |
+
can be found in the frequency table.
|
98 |
+
"""
|
99 |
+
# Tries to find the entries associated with the token in the table
|
100 |
+
match = table.get(token_entry["form"], [])
|
101 |
+
if not match:
|
102 |
+
return None
|
103 |
+
# We go through all the properties to be matched
|
104 |
+
for match_property in MATCH_ORDER:
|
105 |
+
match_new = [
|
106 |
+
entry
|
107 |
+
for entry in match
|
108 |
+
if entry.get(match_property, "")
|
109 |
+
== token_entry.get(match_property, "")
|
110 |
+
]
|
111 |
+
if not match_new:
|
112 |
+
return max_freq_lemma(entries=match)
|
113 |
+
match = match_new
|
114 |
+
return max_freq_lemma(entries=match)
|
115 |
+
|
116 |
+
|
117 |
+
def read_json(path: str) -> Dict:
|
118 |
+
with open(path) as file:
|
119 |
+
res = json.load(file)
|
120 |
+
return res
|
121 |
+
|
122 |
+
|
123 |
+
def write_json(object: Dict, path: str) -> None:
|
124 |
+
with open(path, "w") as file:
|
125 |
+
json.dump(object, file)
|
126 |
+
|
127 |
+
|
128 |
+
class FrequencyLemmatizer(Pipe):
|
129 |
+
"""
|
130 |
+
Part-of-speech and morphology, and frequency
|
131 |
+
sensitive rule-based lemmatizer.
|
132 |
+
|
133 |
+
Parameters
|
134 |
+
----------
|
135 |
+
overwrite: bool, default True
|
136 |
+
Specifies whether the frequency lemmatizer should overwrite
|
137 |
+
already assigned lemmas.
|
138 |
+
fallback_priority: 'lemma' or 'lookup', default 'lookup'
|
139 |
+
Specifies which fallback should have higher priority
|
140 |
+
if the lemma is not found in
|
141 |
+
the primary table.
|
142 |
+
"""
|
143 |
+
|
144 |
+
def __init__(
|
145 |
+
self,
|
146 |
+
nlp: Language,
|
147 |
+
name: str = "freq_lemmatizer",
|
148 |
+
*,
|
149 |
+
overwrite: bool = True,
|
150 |
+
fallback_priority: Literal["lemma", "lookup"] = "lookup",
|
151 |
+
):
|
152 |
+
self.name = name
|
153 |
+
self.overwrite = overwrite
|
154 |
+
self.scorer = lemmatizer_score
|
155 |
+
self.fallback_priority = fallback_priority
|
156 |
+
|
157 |
+
def initialize(
|
158 |
+
self,
|
159 |
+
get_examples=None,
|
160 |
+
*,
|
161 |
+
nlp=None,
|
162 |
+
table: Optional[FrequencyTable] = None,
|
163 |
+
lookup: Optional[LookupTable] = None,
|
164 |
+
) -> None:
|
165 |
+
"""Initializes the frequency lemmatizer from given lemma table and lookup.
|
166 |
+
|
167 |
+
Parameters
|
168 |
+
----------
|
169 |
+
table: iterable of entries or None, default None
|
170 |
+
Iterable of all entries in the lemma table
|
171 |
+
with pos tags morph features and frequencies.
|
172 |
+
lookup: dict of str to str or None, default None
|
173 |
+
Backoff lookup table for simple token-lemma lookup.
|
174 |
+
"""
|
175 |
+
if table is None:
|
176 |
+
self.table = None
|
177 |
+
else:
|
178 |
+
self.table = table
|
179 |
+
self.lookup = lookup
|
180 |
+
|
181 |
+
def backoff(self, token: Token) -> str:
|
182 |
+
"""Gets backoff token based on priority."""
|
183 |
+
orth = token.orth_.lower()
|
184 |
+
lookup = self.lookup
|
185 |
+
in_lookup = (lookup is not None) and (orth in lookup)
|
186 |
+
priority = self.fallback_priority
|
187 |
+
has_lemma = (token.lemma != 0) and (token.lemma_ != token.orth_)
|
188 |
+
if in_lookup:
|
189 |
+
if priority == "lookup":
|
190 |
+
return lookup[orth] # type: ignore
|
191 |
+
else:
|
192 |
+
if has_lemma:
|
193 |
+
return token.lemma_
|
194 |
+
else:
|
195 |
+
return token.orth_
|
196 |
+
else:
|
197 |
+
if has_lemma:
|
198 |
+
return token.lemma_
|
199 |
+
else:
|
200 |
+
return token.orth_
|
201 |
+
|
202 |
+
def lemmatize(self, token: Token) -> str:
|
203 |
+
"""Lemmatizes token."""
|
204 |
+
backoff = self.backoff(token)
|
205 |
+
orth = token.orth_.lower()
|
206 |
+
# If the table is empty we early return
|
207 |
+
if self.table is None:
|
208 |
+
return backoff
|
209 |
+
# I only add frequency for type compatibility
|
210 |
+
token_entry: TableEntry = TableEntry(
|
211 |
+
form=orth, upos=token.pos_, frequency=-1, **token.morph.to_dict()
|
212 |
+
)
|
213 |
+
lemma = match_lemma(token_entry=token_entry, table=self.table)
|
214 |
+
if lemma is None:
|
215 |
+
return backoff
|
216 |
+
else:
|
217 |
+
return lemma
|
218 |
+
|
219 |
+
def __call__(self, doc: Doc) -> Doc:
|
220 |
+
"""Apply the lemmatization to a document."""
|
221 |
+
error_handler = self.get_error_handler()
|
222 |
+
try:
|
223 |
+
for token in doc:
|
224 |
+
if self.overwrite or token.lemma == 0:
|
225 |
+
token.lemma_ = self.lemmatize(token)
|
226 |
+
return doc
|
227 |
+
except Exception as e:
|
228 |
+
error_handler(self.name, self, [doc], e)
|
229 |
+
|
230 |
+
def to_disk(
|
231 |
+
self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()
|
232 |
+
):
|
233 |
+
"""Save frequency lemmatizer data to a directory."""
|
234 |
+
path = ensure_path(path)
|
235 |
+
Path(path).mkdir(parents=True, exist_ok=True)
|
236 |
+
config = dict(
|
237 |
+
overwrite=self.overwrite, fallback_priority=self.fallback_priority
|
238 |
+
)
|
239 |
+
with open(os.path.join(path, "config.json"), "w") as config_file:
|
240 |
+
json.dump(config, config_file)
|
241 |
+
if self.table is not None:
|
242 |
+
table_path = os.path.join(path, "table.json")
|
243 |
+
write_json(self.table, path=table_path)
|
244 |
+
if self.lookup is not None:
|
245 |
+
lookup_path = os.path.join(path, "lookup.json")
|
246 |
+
write_json(self.lookup, path=lookup_path)
|
247 |
+
|
248 |
+
def from_disk(
|
249 |
+
self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()
|
250 |
+
) -> "FrequencyLemmatizer":
|
251 |
+
"""Load component from disk."""
|
252 |
+
path = ensure_path(path)
|
253 |
+
config = read_json(os.path.join(path, "config.json"))
|
254 |
+
self.overwrite = config.get("overwrite", self.overwrite)
|
255 |
+
self.fallback_priority = config.get(
|
256 |
+
"fallback_priority", self.fallback_priority
|
257 |
+
)
|
258 |
+
try:
|
259 |
+
table: Optional[FrequencyTable] = read_json(
|
260 |
+
os.path.join(path, "table.json")
|
261 |
+
)
|
262 |
+
except FileNotFoundError:
|
263 |
+
table = None
|
264 |
+
try:
|
265 |
+
lookup: Optional[LookupTable] = read_json(
|
266 |
+
os.path.join(path, "lookup.json")
|
267 |
+
)
|
268 |
+
except FileNotFoundError:
|
269 |
+
lookup = None
|
270 |
+
self.initialize(table=table, lookup=lookup)
|
271 |
+
return self
|
meta.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
morphologizer/cfg
ADDED
The diff for this file is too large to render.
See raw diff
|
|
morphologizer/model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac79b4ca383de69726f4b7d6b1906789e6f863c8f832f042eb440666b52a2e41
|
3 |
+
size 4408561
|
parser/cfg
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"moves":null,
|
3 |
+
"update_with_oracle_cut_size":100,
|
4 |
+
"multitasks":[
|
5 |
+
|
6 |
+
],
|
7 |
+
"min_action_freq":30,
|
8 |
+
"learn_tokens":false,
|
9 |
+
"beam_width":1,
|
10 |
+
"beam_density":0.0,
|
11 |
+
"beam_update_prob":0.0,
|
12 |
+
"incorrect_spans_key":null
|
13 |
+
}
|
parser/model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4935f74b412dc41ba2bc69cd62cb760f1aa2f1024d8a8be348417e8cb8272e0f
|
3 |
+
size 2075321
|
parser/moves
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
��moves�3{"0":{"":181951},"1":{"":138489},"2":{"det":30514,"advmod":25856,"case":19705,"nsubj":14893,"obj":13046,"obl":9420,"advcl":8875,"nmod":8747,"discourse":8140,"punct":6644,"cc":5133,"mark":4895,"iobj":2542,"xcomp":2221,"cop":1869,"amod":1626,"vocative":1404,"det||nsubj":1381,"nmod||obj":1144,"nmod||nsubj":1039,"nsubj:pass":966,"case||obl":853,"nummod":639,"mark||advcl":633,"obj||advcl":615,"det||obj":507,"obl||advcl":389,"nsubj||ccomp":335,"acl":330,"obj||xcomp":307,"nmod||obl":298,"advmod||advcl":287,"nsubj||advcl":274,"xcomp||nsubj":272,"ccomp":268,"dislocated":261,"orphan":206,"cc||advcl":160,"conj||advcl":158,"det||obl":149,"amod||obj":149,"advmod||advmod":148,"det||nsubj:pass":140,"xcomp||obj":135,"nmod||nmod":135,"obl||xcomp":133,"det||nmod":133,"advmod||xcomp":128,"amod||nsubj":127,"obl:agent":126,"xcomp||advcl":107,"parataxis":106,"obj||ccomp":102,"det||iobj":100,"nmod||xcomp":94,"advmod||ccomp":89,"csubj":82,"iobj||xcomp":81,"iobj||advcl":79,"det||advmod":76,"advmod||nsubj":76,"advmod||obj":74,"obl||ccomp":72,"cc||nsubj":66,"conj||nsubj":60,"nmod||iobj":57,"advmod||nmod":55,"appos||nsubj":53,"advmod||obl":47,"conj||obj":46,"ccomp||advcl":46,"acl||nsubj":45,"amod||obl":42,"cc||obj":41,"obj||csubj":40,"det||xcomp":40,"iobj||ccomp":38,"nsubj||csubj":35,"advcl||ccomp":35,"nsubj:pass||advcl":34,"acl||obj":33,"xcomp||xcomp":32,"nsubj||nsubj":32,"nmod||det":32,"obl||obj":30,"dep":0},"3":{"conj":15241,"cc":14143,"punct":13626,"nmod":11545,"obj":11482,"obl":11254,"advcl":7395,"nsubj":6834,"advmod":6645,"xcomp":4281,"det":4188,"iobj":4162,"ccomp":2904,"cop":2189,"discourse":2107,"acl":1999,"appos":1772,"amod":1681,"nmod||obj":826,"nsubj:pass":710,"advmod||conj":678,"case":573,"nsubj||conj":568,"obj||conj":564,"vocative":527,"nmod||nsubj":510,"orphan":452,"acl||obj":422,"conj||obj":329,"obl||conj":300,"acl||nsubj":287,"nummod":282,"obl:agent":273,"nmod||conj":268,"cc||obj":258,"flat:name":248,"csubj":223,"conj||nsubj":218,"case||conj":213,"xcomp||obj":205,"parataxis":201,"advcl||conj":196,"cc||nsubj":186,"nmod||obl":178,"appos||nsubj":178,"amod||obj":174,"case||obl":170,"appos||obj":165,"xcomp||nsubj":148,"fixed":148,"nmod||nmod":146,"det||nsubj":137,"xcomp||conj":132,"amod||nsubj":131,"conj||obl":130,"cc||obl":120,"csubj:pass":118,"det||obj":117,"obl||xcomp":103,"conj||xcomp":95,"acl||obl":85,"obj||xcomp":81,"conj||nmod":79,"iobj||conj":73,"cc||xcomp":71,"cc||nmod":71,"acl||nmod":71,"advmod||obj":69,"nmod||xcomp":64,"dislocated":59,"appos||obl":58,"advcl||advmod":53,"nsubj||ccomp":52,"det||conj":52,"obj||advcl":51,"advmod||xcomp":49,"iobj||xcomp":45,"nmod||iobj":44,"conj||iobj":44,"advmod||advmod":44,"cop||xcomp":42,"advcl||xcomp":42,"cc||iobj":41,"acl||iobj":36,"cop||ccomp":34,"advmod||nsubj":34,"conj||advcl":33,"obl||advcl":32,"ccomp||conj":32,"obj||ccomp":31,"conj||amod":31,"cop||obj":30,"dep":0},"4":{"ROOT":26490}}�cfg��neg_key�
|
tagger/cfg
ADDED
@@ -0,0 +1,839 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"labels":[
|
3 |
+
"---------",
|
4 |
+
"--p---fa-",
|
5 |
+
"--s---ma-",
|
6 |
+
"-3paia---",
|
7 |
+
"-3paim---",
|
8 |
+
"-3siia---",
|
9 |
+
"A-",
|
10 |
+
"C-",
|
11 |
+
"Df",
|
12 |
+
"Dq",
|
13 |
+
"Du",
|
14 |
+
"F-",
|
15 |
+
"G-",
|
16 |
+
"I-",
|
17 |
+
"Ma",
|
18 |
+
"Mo",
|
19 |
+
"Nb",
|
20 |
+
"Ne",
|
21 |
+
"Pc",
|
22 |
+
"Pd",
|
23 |
+
"Pi",
|
24 |
+
"Pk",
|
25 |
+
"Pp",
|
26 |
+
"Pr",
|
27 |
+
"Ps",
|
28 |
+
"Px",
|
29 |
+
"R-",
|
30 |
+
"S-",
|
31 |
+
"V-",
|
32 |
+
"a--------",
|
33 |
+
"a-------s",
|
34 |
+
"a-d---fa-",
|
35 |
+
"a-d---fd-",
|
36 |
+
"a-d---fg-",
|
37 |
+
"a-d---fn-",
|
38 |
+
"a-d---ma-",
|
39 |
+
"a-d---md-",
|
40 |
+
"a-d---mg-",
|
41 |
+
"a-d---mn-",
|
42 |
+
"a-d---mnc",
|
43 |
+
"a-d---mv-",
|
44 |
+
"a-d---na-",
|
45 |
+
"a-d---ng-",
|
46 |
+
"a-d---nn-",
|
47 |
+
"a-p----dc",
|
48 |
+
"a-p---fa-",
|
49 |
+
"a-p---fac",
|
50 |
+
"a-p---fas",
|
51 |
+
"a-p---fd-",
|
52 |
+
"a-p---fdc",
|
53 |
+
"a-p---fds",
|
54 |
+
"a-p---fg-",
|
55 |
+
"a-p---fgc",
|
56 |
+
"a-p---fn-",
|
57 |
+
"a-p---fnc",
|
58 |
+
"a-p---fns",
|
59 |
+
"a-p---fv-",
|
60 |
+
"a-p---m--",
|
61 |
+
"a-p---m-c",
|
62 |
+
"a-p---ma-",
|
63 |
+
"a-p---mac",
|
64 |
+
"a-p---mas",
|
65 |
+
"a-p---md-",
|
66 |
+
"a-p---mdc",
|
67 |
+
"a-p---mds",
|
68 |
+
"a-p---mg-",
|
69 |
+
"a-p---mgc",
|
70 |
+
"a-p---mgs",
|
71 |
+
"a-p---mn-",
|
72 |
+
"a-p---mnc",
|
73 |
+
"a-p---mns",
|
74 |
+
"a-p---mv-",
|
75 |
+
"a-p---mvs",
|
76 |
+
"a-p---na-",
|
77 |
+
"a-p---nac",
|
78 |
+
"a-p---nas",
|
79 |
+
"a-p---nd-",
|
80 |
+
"a-p---ndc",
|
81 |
+
"a-p---nds",
|
82 |
+
"a-p---ng-",
|
83 |
+
"a-p---ngs",
|
84 |
+
"a-p---nn-",
|
85 |
+
"a-p---nnc",
|
86 |
+
"a-p---nns",
|
87 |
+
"a-p---nv-",
|
88 |
+
"a-s----d-",
|
89 |
+
"a-s----dc",
|
90 |
+
"a-s----g-",
|
91 |
+
"a-s----gc",
|
92 |
+
"a-s---fa-",
|
93 |
+
"a-s---fac",
|
94 |
+
"a-s---fas",
|
95 |
+
"a-s---fd-",
|
96 |
+
"a-s---fds",
|
97 |
+
"a-s---fg-",
|
98 |
+
"a-s---fgc",
|
99 |
+
"a-s---fgs",
|
100 |
+
"a-s---fn-",
|
101 |
+
"a-s---fnc",
|
102 |
+
"a-s---fns",
|
103 |
+
"a-s---fv-",
|
104 |
+
"a-s---m--",
|
105 |
+
"a-s---ma-",
|
106 |
+
"a-s---mac",
|
107 |
+
"a-s---mas",
|
108 |
+
"a-s---md-",
|
109 |
+
"a-s---mdc",
|
110 |
+
"a-s---mds",
|
111 |
+
"a-s---mg-",
|
112 |
+
"a-s---mgc",
|
113 |
+
"a-s---mgs",
|
114 |
+
"a-s---mn-",
|
115 |
+
"a-s---mnc",
|
116 |
+
"a-s---mns",
|
117 |
+
"a-s---mv-",
|
118 |
+
"a-s---mvc",
|
119 |
+
"a-s---mvs",
|
120 |
+
"a-s---na-",
|
121 |
+
"a-s---nac",
|
122 |
+
"a-s---nas",
|
123 |
+
"a-s---nd-",
|
124 |
+
"a-s---ndc",
|
125 |
+
"a-s---nds",
|
126 |
+
"a-s---ng-",
|
127 |
+
"a-s---nn-",
|
128 |
+
"a-s---nnc",
|
129 |
+
"a-s---nns",
|
130 |
+
"a-s---nv-",
|
131 |
+
"a-s---nvs",
|
132 |
+
"c--------",
|
133 |
+
"d--------",
|
134 |
+
"d-------c",
|
135 |
+
"d-------s",
|
136 |
+
"g--------",
|
137 |
+
"i--------",
|
138 |
+
"l--------",
|
139 |
+
"l-d---fa-",
|
140 |
+
"l-d---fg-",
|
141 |
+
"l-d---mg-",
|
142 |
+
"l-d---mn-",
|
143 |
+
"l-d---na-",
|
144 |
+
"l-d---nn-",
|
145 |
+
"l-p---fa-",
|
146 |
+
"l-p---fd-",
|
147 |
+
"l-p---fg-",
|
148 |
+
"l-p---fn-",
|
149 |
+
"l-p---ma-",
|
150 |
+
"l-p---md-",
|
151 |
+
"l-p---mg-",
|
152 |
+
"l-p---mn-",
|
153 |
+
"l-p---na-",
|
154 |
+
"l-p---nd-",
|
155 |
+
"l-p---ng-",
|
156 |
+
"l-p---nn-",
|
157 |
+
"l-s---fa-",
|
158 |
+
"l-s---fd-",
|
159 |
+
"l-s---fg-",
|
160 |
+
"l-s---fn-",
|
161 |
+
"l-s---ma-",
|
162 |
+
"l-s---md-",
|
163 |
+
"l-s---mg-",
|
164 |
+
"l-s---mn-",
|
165 |
+
"l-s---na-",
|
166 |
+
"l-s---nd-",
|
167 |
+
"l-s---ng-",
|
168 |
+
"l-s---nn-",
|
169 |
+
"m--------",
|
170 |
+
"m-p---m--",
|
171 |
+
"m-p---md-",
|
172 |
+
"m-p---nn-",
|
173 |
+
"n-----fg-",
|
174 |
+
"n-----na-",
|
175 |
+
"n-----nn-",
|
176 |
+
"n-d----a-",
|
177 |
+
"n-d---fa-",
|
178 |
+
"n-d---fd-",
|
179 |
+
"n-d---fg-",
|
180 |
+
"n-d---fn-",
|
181 |
+
"n-d---ma-",
|
182 |
+
"n-d---md-",
|
183 |
+
"n-d---mg-",
|
184 |
+
"n-d---mn-",
|
185 |
+
"n-d---mv-",
|
186 |
+
"n-d---na-",
|
187 |
+
"n-d---nn-",
|
188 |
+
"n-p----d-",
|
189 |
+
"n-p----g-",
|
190 |
+
"n-p---fa-",
|
191 |
+
"n-p---fd-",
|
192 |
+
"n-p---fg-",
|
193 |
+
"n-p---fn-",
|
194 |
+
"n-p---fv-",
|
195 |
+
"n-p---ma-",
|
196 |
+
"n-p---md-",
|
197 |
+
"n-p---mg-",
|
198 |
+
"n-p---mn-",
|
199 |
+
"n-p---mv-",
|
200 |
+
"n-p---na-",
|
201 |
+
"n-p---nd-",
|
202 |
+
"n-p---ng-",
|
203 |
+
"n-p---nn-",
|
204 |
+
"n-p---nv-",
|
205 |
+
"n-s----d-",
|
206 |
+
"n-s----g-",
|
207 |
+
"n-s----n-",
|
208 |
+
"n-s----v-",
|
209 |
+
"n-s---fa-",
|
210 |
+
"n-s---fd-",
|
211 |
+
"n-s---fg-",
|
212 |
+
"n-s---fn-",
|
213 |
+
"n-s---fv-",
|
214 |
+
"n-s---m--",
|
215 |
+
"n-s---ma-",
|
216 |
+
"n-s---md-",
|
217 |
+
"n-s---mg-",
|
218 |
+
"n-s---mn-",
|
219 |
+
"n-s---mv-",
|
220 |
+
"n-s---na-",
|
221 |
+
"n-s---nd-",
|
222 |
+
"n-s---ng-",
|
223 |
+
"n-s---nn-",
|
224 |
+
"n-s---nv-",
|
225 |
+
"p--------",
|
226 |
+
"p-d----d-",
|
227 |
+
"p-d----n-",
|
228 |
+
"p-d---fa-",
|
229 |
+
"p-d---fd-",
|
230 |
+
"p-d---fg-",
|
231 |
+
"p-d---fn-",
|
232 |
+
"p-d---ma-",
|
233 |
+
"p-d---md-",
|
234 |
+
"p-d---mg-",
|
235 |
+
"p-d---mn-",
|
236 |
+
"p-d---mv-",
|
237 |
+
"p-p----a-",
|
238 |
+
"p-p----d-",
|
239 |
+
"p-p----g-",
|
240 |
+
"p-p----n-",
|
241 |
+
"p-p---fa-",
|
242 |
+
"p-p---fd-",
|
243 |
+
"p-p---fg-",
|
244 |
+
"p-p---fn-",
|
245 |
+
"p-p---ma-",
|
246 |
+
"p-p---md-",
|
247 |
+
"p-p---mg-",
|
248 |
+
"p-p---mn-",
|
249 |
+
"p-p---na-",
|
250 |
+
"p-p---nd-",
|
251 |
+
"p-p---ng-",
|
252 |
+
"p-p---nn-",
|
253 |
+
"p-s----a-",
|
254 |
+
"p-s----d-",
|
255 |
+
"p-s----g-",
|
256 |
+
"p-s----n-",
|
257 |
+
"p-s---fa-",
|
258 |
+
"p-s---fd-",
|
259 |
+
"p-s---fg-",
|
260 |
+
"p-s---fn-",
|
261 |
+
"p-s---ma-",
|
262 |
+
"p-s---md-",
|
263 |
+
"p-s---mg-",
|
264 |
+
"p-s---mn-",
|
265 |
+
"p-s---mv-",
|
266 |
+
"p-s---na-",
|
267 |
+
"p-s---nd-",
|
268 |
+
"p-s---ng-",
|
269 |
+
"p-s---nn-",
|
270 |
+
"p1p---fa-",
|
271 |
+
"p1p---ma-",
|
272 |
+
"p1p---md-",
|
273 |
+
"p1p---mg-",
|
274 |
+
"p1p---mn-",
|
275 |
+
"p1s---fa-",
|
276 |
+
"p1s---fd-",
|
277 |
+
"p1s---fg-",
|
278 |
+
"p1s---fn-",
|
279 |
+
"p1s---ma-",
|
280 |
+
"p1s---md-",
|
281 |
+
"p1s---mg-",
|
282 |
+
"p1s---mn-",
|
283 |
+
"p2p----a-",
|
284 |
+
"p2p----d-",
|
285 |
+
"p2p---ma-",
|
286 |
+
"p2p---mg-",
|
287 |
+
"p2p---mn-",
|
288 |
+
"p2s----a-",
|
289 |
+
"p2s----d-",
|
290 |
+
"p2s----g-",
|
291 |
+
"p2s----n-",
|
292 |
+
"p2s---ma-",
|
293 |
+
"p2s---md-",
|
294 |
+
"p2s---mg-",
|
295 |
+
"p3s---fa-",
|
296 |
+
"p3s---ma-",
|
297 |
+
"r--------",
|
298 |
+
"u--------",
|
299 |
+
"v---na---",
|
300 |
+
"v--amm---",
|
301 |
+
"v--an----",
|
302 |
+
"v--ana---",
|
303 |
+
"v--ane---",
|
304 |
+
"v--anm---",
|
305 |
+
"v--anp---",
|
306 |
+
"v--fna---",
|
307 |
+
"v--fne---",
|
308 |
+
"v--fnm---",
|
309 |
+
"v--fnp---",
|
310 |
+
"v--pna---",
|
311 |
+
"v--pnd---",
|
312 |
+
"v--pne---",
|
313 |
+
"v--pnp---",
|
314 |
+
"v--ppefa-",
|
315 |
+
"v--ppemn-",
|
316 |
+
"v--rn----",
|
317 |
+
"v--rna---",
|
318 |
+
"v--rne---",
|
319 |
+
"v--rnp---",
|
320 |
+
"v--tna---",
|
321 |
+
"v-dapafn-",
|
322 |
+
"v-dapama-",
|
323 |
+
"v-dapamg-",
|
324 |
+
"v-dapamn-",
|
325 |
+
"v-dapmfn-",
|
326 |
+
"v-dapmmn-",
|
327 |
+
"v-dappma-",
|
328 |
+
"v-dappmn-",
|
329 |
+
"v-dppafg-",
|
330 |
+
"v-dppama-",
|
331 |
+
"v-dppamn-",
|
332 |
+
"v-dppefn-",
|
333 |
+
"v-dppema-",
|
334 |
+
"v-dppemd-",
|
335 |
+
"v-dppemn-",
|
336 |
+
"v-dpppmn-",
|
337 |
+
"v-drpama-",
|
338 |
+
"v-drpamn-",
|
339 |
+
"v-drpefn-",
|
340 |
+
"v-drpemn-",
|
341 |
+
"v-p-pmma-",
|
342 |
+
"v-pap-mn-",
|
343 |
+
"v-papafa-",
|
344 |
+
"v-papafg-",
|
345 |
+
"v-papafn-",
|
346 |
+
"v-papama-",
|
347 |
+
"v-papamd-",
|
348 |
+
"v-papamg-",
|
349 |
+
"v-papamn-",
|
350 |
+
"v-papana-",
|
351 |
+
"v-papand-",
|
352 |
+
"v-papann-",
|
353 |
+
"v-papefn-",
|
354 |
+
"v-papema-",
|
355 |
+
"v-papemn-",
|
356 |
+
"v-papmfa-",
|
357 |
+
"v-papmfg-",
|
358 |
+
"v-papmfn-",
|
359 |
+
"v-papmma-",
|
360 |
+
"v-papmmd-",
|
361 |
+
"v-papmmg-",
|
362 |
+
"v-papmmn-",
|
363 |
+
"v-papmna-",
|
364 |
+
"v-papmng-",
|
365 |
+
"v-papmnn-",
|
366 |
+
"v-pappfd-",
|
367 |
+
"v-pappfg-",
|
368 |
+
"v-pappfn-",
|
369 |
+
"v-pappma-",
|
370 |
+
"v-pappmd-",
|
371 |
+
"v-pappmg-",
|
372 |
+
"v-pappmn-",
|
373 |
+
"v-pappna-",
|
374 |
+
"v-pappng-",
|
375 |
+
"v-pappnn-",
|
376 |
+
"v-pfpama-",
|
377 |
+
"v-pfpamg-",
|
378 |
+
"v-pfpamn-",
|
379 |
+
"v-pfpema-",
|
380 |
+
"v-pfpemn-",
|
381 |
+
"v-pfpmfa-",
|
382 |
+
"v-pfpmfn-",
|
383 |
+
"v-pfpmma-",
|
384 |
+
"v-pfpmmd-",
|
385 |
+
"v-pfpmmg-",
|
386 |
+
"v-pfpmmn-",
|
387 |
+
"v-pfpmnn-",
|
388 |
+
"v-pfppmn-",
|
389 |
+
"v-ppp-mn-",
|
390 |
+
"v-pppafa-",
|
391 |
+
"v-pppafd-",
|
392 |
+
"v-pppafg-",
|
393 |
+
"v-pppafn-",
|
394 |
+
"v-pppafv-",
|
395 |
+
"v-pppama-",
|
396 |
+
"v-pppamd-",
|
397 |
+
"v-pppamg-",
|
398 |
+
"v-pppamn-",
|
399 |
+
"v-pppamv-",
|
400 |
+
"v-pppana-",
|
401 |
+
"v-pppand-",
|
402 |
+
"v-pppang-",
|
403 |
+
"v-pppann-",
|
404 |
+
"v-pppefa-",
|
405 |
+
"v-pppefd-",
|
406 |
+
"v-pppefg-",
|
407 |
+
"v-pppefn-",
|
408 |
+
"v-pppefv-",
|
409 |
+
"v-pppema-",
|
410 |
+
"v-pppemd-",
|
411 |
+
"v-pppemg-",
|
412 |
+
"v-pppemn-",
|
413 |
+
"v-pppemv-",
|
414 |
+
"v-pppena-",
|
415 |
+
"v-pppend-",
|
416 |
+
"v-pppeng-",
|
417 |
+
"v-pppenn-",
|
418 |
+
"v-ppppma-",
|
419 |
+
"v-ppppmd-",
|
420 |
+
"v-ppppmn-",
|
421 |
+
"v-prp-mn-",
|
422 |
+
"v-prpafa-",
|
423 |
+
"v-prpafd-",
|
424 |
+
"v-prpafn-",
|
425 |
+
"v-prpama-",
|
426 |
+
"v-prpamd-",
|
427 |
+
"v-prpamg-",
|
428 |
+
"v-prpamn-",
|
429 |
+
"v-prpana-",
|
430 |
+
"v-prpang-",
|
431 |
+
"v-prpefa-",
|
432 |
+
"v-prpefd-",
|
433 |
+
"v-prpefg-",
|
434 |
+
"v-prpefn-",
|
435 |
+
"v-prpema-",
|
436 |
+
"v-prpemd-",
|
437 |
+
"v-prpemg-",
|
438 |
+
"v-prpemn-",
|
439 |
+
"v-prpena-",
|
440 |
+
"v-prpend-",
|
441 |
+
"v-prpeng-",
|
442 |
+
"v-prpenn-",
|
443 |
+
"v-prppfn-",
|
444 |
+
"v-prppmn-",
|
445 |
+
"v-sagamn-",
|
446 |
+
"v-saiamn-",
|
447 |
+
"v-samp---",
|
448 |
+
"v-sap-mg-",
|
449 |
+
"v-sap-mn-",
|
450 |
+
"v-sapafa-",
|
451 |
+
"v-sapafd-",
|
452 |
+
"v-sapafg-",
|
453 |
+
"v-sapafn-",
|
454 |
+
"v-sapama-",
|
455 |
+
"v-sapamd-",
|
456 |
+
"v-sapamg-",
|
457 |
+
"v-sapamn-",
|
458 |
+
"v-sapamv-",
|
459 |
+
"v-sapana-",
|
460 |
+
"v-sapang-",
|
461 |
+
"v-sapann-",
|
462 |
+
"v-sapanv-",
|
463 |
+
"v-sapema-",
|
464 |
+
"v-sapemn-",
|
465 |
+
"v-sapmfa-",
|
466 |
+
"v-sapmfd-",
|
467 |
+
"v-sapmfg-",
|
468 |
+
"v-sapmfn-",
|
469 |
+
"v-sapmma-",
|
470 |
+
"v-sapmmd-",
|
471 |
+
"v-sapmmg-",
|
472 |
+
"v-sapmmn-",
|
473 |
+
"v-sapmna-",
|
474 |
+
"v-sapmng-",
|
475 |
+
"v-sapmnn-",
|
476 |
+
"v-sappfa-",
|
477 |
+
"v-sappfd-",
|
478 |
+
"v-sappfg-",
|
479 |
+
"v-sappfn-",
|
480 |
+
"v-sappma-",
|
481 |
+
"v-sappmd-",
|
482 |
+
"v-sappmg-",
|
483 |
+
"v-sappmn-",
|
484 |
+
"v-sappna-",
|
485 |
+
"v-sappng-",
|
486 |
+
"v-sappnn-",
|
487 |
+
"v-sappnv-",
|
488 |
+
"v-sfpafa-",
|
489 |
+
"v-sfpafd-",
|
490 |
+
"v-sfpafn-",
|
491 |
+
"v-sfpama-",
|
492 |
+
"v-sfpamd-",
|
493 |
+
"v-sfpamg-",
|
494 |
+
"v-sfpamn-",
|
495 |
+
"v-sfpmfa-",
|
496 |
+
"v-sfpmfd-",
|
497 |
+
"v-sfpmfg-",
|
498 |
+
"v-sfpmfn-",
|
499 |
+
"v-sfpmma-",
|
500 |
+
"v-sfpmmg-",
|
501 |
+
"v-sfpmmn-",
|
502 |
+
"v-sfpmna-",
|
503 |
+
"v-sfppma-",
|
504 |
+
"v-spiamn-",
|
505 |
+
"v-spp-mn-",
|
506 |
+
"v-spp-nn-",
|
507 |
+
"v-sppa---",
|
508 |
+
"v-sppafa-",
|
509 |
+
"v-sppafd-",
|
510 |
+
"v-sppafg-",
|
511 |
+
"v-sppafn-",
|
512 |
+
"v-sppafv-",
|
513 |
+
"v-sppama-",
|
514 |
+
"v-sppamd-",
|
515 |
+
"v-sppamg-",
|
516 |
+
"v-sppamn-",
|
517 |
+
"v-sppamv-",
|
518 |
+
"v-sppana-",
|
519 |
+
"v-sppand-",
|
520 |
+
"v-sppang-",
|
521 |
+
"v-sppann-",
|
522 |
+
"v-sppanv-",
|
523 |
+
"v-sppefa-",
|
524 |
+
"v-sppefd-",
|
525 |
+
"v-sppefg-",
|
526 |
+
"v-sppefn-",
|
527 |
+
"v-sppema-",
|
528 |
+
"v-sppemd-",
|
529 |
+
"v-sppemg-",
|
530 |
+
"v-sppemn-",
|
531 |
+
"v-sppemv-",
|
532 |
+
"v-sppena-",
|
533 |
+
"v-sppend-",
|
534 |
+
"v-sppeng-",
|
535 |
+
"v-sppenn-",
|
536 |
+
"v-spppfa-",
|
537 |
+
"v-spppfd-",
|
538 |
+
"v-spppfg-",
|
539 |
+
"v-spppfn-",
|
540 |
+
"v-spppma-",
|
541 |
+
"v-spppmn-",
|
542 |
+
"v-srp-mn-",
|
543 |
+
"v-srpafa-",
|
544 |
+
"v-srpafd-",
|
545 |
+
"v-srpafg-",
|
546 |
+
"v-srpafn-",
|
547 |
+
"v-srpama-",
|
548 |
+
"v-srpamd-",
|
549 |
+
"v-srpamg-",
|
550 |
+
"v-srpamn-",
|
551 |
+
"v-srpamv-",
|
552 |
+
"v-srpana-",
|
553 |
+
"v-srpand-",
|
554 |
+
"v-srpang-",
|
555 |
+
"v-srpann-",
|
556 |
+
"v-srpefa-",
|
557 |
+
"v-srpefd-",
|
558 |
+
"v-srpefg-",
|
559 |
+
"v-srpefn-",
|
560 |
+
"v-srpema-",
|
561 |
+
"v-srpemd-",
|
562 |
+
"v-srpemg-",
|
563 |
+
"v-srpemn-",
|
564 |
+
"v-srpemv-",
|
565 |
+
"v-srpena-",
|
566 |
+
"v-srpend-",
|
567 |
+
"v-srpeng-",
|
568 |
+
"v-srpenn-",
|
569 |
+
"v-srppfn-",
|
570 |
+
"v-srppma-",
|
571 |
+
"v-srppmn-",
|
572 |
+
"v-srppmv-",
|
573 |
+
"v1paia---",
|
574 |
+
"v1paim---",
|
575 |
+
"v1paip---",
|
576 |
+
"v1paoa---",
|
577 |
+
"v1paom---",
|
578 |
+
"v1paop---",
|
579 |
+
"v1pasa---",
|
580 |
+
"v1pase---",
|
581 |
+
"v1pasm---",
|
582 |
+
"v1pasp---",
|
583 |
+
"v1pfia---",
|
584 |
+
"v1pfim---",
|
585 |
+
"v1pfom---",
|
586 |
+
"v1piia---",
|
587 |
+
"v1piie---",
|
588 |
+
"v1plia---",
|
589 |
+
"v1plie---",
|
590 |
+
"v1ppia---",
|
591 |
+
"v1ppie---",
|
592 |
+
"v1ppip---",
|
593 |
+
"v1ppoa---",
|
594 |
+
"v1ppoe---",
|
595 |
+
"v1ppsa---",
|
596 |
+
"v1ppse---",
|
597 |
+
"v1pria---",
|
598 |
+
"v1prie---",
|
599 |
+
"v1prsa---",
|
600 |
+
"v1prse---",
|
601 |
+
"v1ptie---",
|
602 |
+
"v1s-sa---",
|
603 |
+
"v1sa-a---",
|
604 |
+
"v1saia---",
|
605 |
+
"v1saie---",
|
606 |
+
"v1saim---",
|
607 |
+
"v1saip---",
|
608 |
+
"v1sao----",
|
609 |
+
"v1saoa---",
|
610 |
+
"v1saoe---",
|
611 |
+
"v1saom---",
|
612 |
+
"v1saop---",
|
613 |
+
"v1sasa---",
|
614 |
+
"v1sase---",
|
615 |
+
"v1sasm---",
|
616 |
+
"v1sasp---",
|
617 |
+
"v1sfi----",
|
618 |
+
"v1sfia---",
|
619 |
+
"v1sfie---",
|
620 |
+
"v1sfim---",
|
621 |
+
"v1sfip---",
|
622 |
+
"v1siia---",
|
623 |
+
"v1siie---",
|
624 |
+
"v1slia---",
|
625 |
+
"v1slie---",
|
626 |
+
"v1slim---",
|
627 |
+
"v1spia---",
|
628 |
+
"v1spie---",
|
629 |
+
"v1spoa---",
|
630 |
+
"v1spoe---",
|
631 |
+
"v1spsa---",
|
632 |
+
"v1spse---",
|
633 |
+
"v1sria---",
|
634 |
+
"v1srie---",
|
635 |
+
"v1sroa---",
|
636 |
+
"v1sroe---",
|
637 |
+
"v1srsa---",
|
638 |
+
"v1stie---",
|
639 |
+
"v1stim---",
|
640 |
+
"v2daia---",
|
641 |
+
"v2dama---",
|
642 |
+
"v2dasa---",
|
643 |
+
"v2dase---",
|
644 |
+
"v2dfia---",
|
645 |
+
"v2dfim---",
|
646 |
+
"v2diia---",
|
647 |
+
"v2diie---",
|
648 |
+
"v2dpia---",
|
649 |
+
"v2dpma---",
|
650 |
+
"v2dpme---",
|
651 |
+
"v2dria---",
|
652 |
+
"v2drma---",
|
653 |
+
"v2paia---",
|
654 |
+
"v2paim---",
|
655 |
+
"v2paip---",
|
656 |
+
"v2pama---",
|
657 |
+
"v2pame---",
|
658 |
+
"v2pamm---",
|
659 |
+
"v2paoa---",
|
660 |
+
"v2paom---",
|
661 |
+
"v2paop---",
|
662 |
+
"v2pasa---",
|
663 |
+
"v2pase---",
|
664 |
+
"v2pasm---",
|
665 |
+
"v2pasp---",
|
666 |
+
"v2pfia---",
|
667 |
+
"v2pfim---",
|
668 |
+
"v2piia---",
|
669 |
+
"v2piie---",
|
670 |
+
"v2ppia---",
|
671 |
+
"v2ppie---",
|
672 |
+
"v2ppma---",
|
673 |
+
"v2ppme---",
|
674 |
+
"v2ppoa---",
|
675 |
+
"v2ppoe---",
|
676 |
+
"v2ppsa---",
|
677 |
+
"v2pria---",
|
678 |
+
"v2prie---",
|
679 |
+
"v2prma---",
|
680 |
+
"v2prmp---",
|
681 |
+
"v2proa---",
|
682 |
+
"v2prsa---",
|
683 |
+
"v2saia---",
|
684 |
+
"v2saie---",
|
685 |
+
"v2saim---",
|
686 |
+
"v2saip---",
|
687 |
+
"v2sam----",
|
688 |
+
"v2sama---",
|
689 |
+
"v2same---",
|
690 |
+
"v2samm---",
|
691 |
+
"v2samp---",
|
692 |
+
"v2saoa---",
|
693 |
+
"v2saoe---",
|
694 |
+
"v2saom---",
|
695 |
+
"v2saop---",
|
696 |
+
"v2sasa---",
|
697 |
+
"v2sase---",
|
698 |
+
"v2sasm---",
|
699 |
+
"v2sasp---",
|
700 |
+
"v2sfi----",
|
701 |
+
"v2sfia---",
|
702 |
+
"v2sfie---",
|
703 |
+
"v2sfim---",
|
704 |
+
"v2sfip---",
|
705 |
+
"v2siia---",
|
706 |
+
"v2siie---",
|
707 |
+
"v2siip---",
|
708 |
+
"v2slia---",
|
709 |
+
"v2slie---",
|
710 |
+
"v2slim---",
|
711 |
+
"v2spia---",
|
712 |
+
"v2spie---",
|
713 |
+
"v2spma---",
|
714 |
+
"v2spme---",
|
715 |
+
"v2spoa---",
|
716 |
+
"v2spoe---",
|
717 |
+
"v2spsa---",
|
718 |
+
"v2spse---",
|
719 |
+
"v2sria---",
|
720 |
+
"v2srie---",
|
721 |
+
"v2srma---",
|
722 |
+
"v2srme---",
|
723 |
+
"v2sroa---",
|
724 |
+
"v2srsa---",
|
725 |
+
"v2stie---",
|
726 |
+
"v3-roe---",
|
727 |
+
"v3daia---",
|
728 |
+
"v3daim---",
|
729 |
+
"v3daip---",
|
730 |
+
"v3daoa---",
|
731 |
+
"v3dfia---",
|
732 |
+
"v3dfim---",
|
733 |
+
"v3diia---",
|
734 |
+
"v3diie---",
|
735 |
+
"v3dlia---",
|
736 |
+
"v3dlie---",
|
737 |
+
"v3dlim---",
|
738 |
+
"v3dpia---",
|
739 |
+
"v3dpie---",
|
740 |
+
"v3dpma---",
|
741 |
+
"v3dpme---",
|
742 |
+
"v3dpsa---",
|
743 |
+
"v3dria---",
|
744 |
+
"v3pai----",
|
745 |
+
"v3paia---",
|
746 |
+
"v3paie---",
|
747 |
+
"v3paim---",
|
748 |
+
"v3paip---",
|
749 |
+
"v3pamm---",
|
750 |
+
"v3paoa---",
|
751 |
+
"v3paoe---",
|
752 |
+
"v3paom---",
|
753 |
+
"v3paop---",
|
754 |
+
"v3pasa---",
|
755 |
+
"v3pase---",
|
756 |
+
"v3pasm---",
|
757 |
+
"v3pasp---",
|
758 |
+
"v3pfia---",
|
759 |
+
"v3pfie---",
|
760 |
+
"v3pfim---",
|
761 |
+
"v3piia---",
|
762 |
+
"v3piie---",
|
763 |
+
"v3piip---",
|
764 |
+
"v3plia---",
|
765 |
+
"v3plie---",
|
766 |
+
"v3plim---",
|
767 |
+
"v3plip---",
|
768 |
+
"v3ppia---",
|
769 |
+
"v3ppie---",
|
770 |
+
"v3ppip---",
|
771 |
+
"v3ppma---",
|
772 |
+
"v3ppme---",
|
773 |
+
"v3ppoa---",
|
774 |
+
"v3ppoe---",
|
775 |
+
"v3ppsa---",
|
776 |
+
"v3ppse---",
|
777 |
+
"v3pria---",
|
778 |
+
"v3prie---",
|
779 |
+
"v3prip---",
|
780 |
+
"v3sai----",
|
781 |
+
"v3saia---",
|
782 |
+
"v3saie---",
|
783 |
+
"v3saim---",
|
784 |
+
"v3saip---",
|
785 |
+
"v3sama---",
|
786 |
+
"v3samm---",
|
787 |
+
"v3samp---",
|
788 |
+
"v3sana---",
|
789 |
+
"v3sao----",
|
790 |
+
"v3saoa---",
|
791 |
+
"v3saoe---",
|
792 |
+
"v3saom---",
|
793 |
+
"v3saop---",
|
794 |
+
"v3sas----",
|
795 |
+
"v3sasa---",
|
796 |
+
"v3sase---",
|
797 |
+
"v3sasm---",
|
798 |
+
"v3sasp---",
|
799 |
+
"v3sfi----",
|
800 |
+
"v3sfia---",
|
801 |
+
"v3sfie---",
|
802 |
+
"v3sfim---",
|
803 |
+
"v3sfip---",
|
804 |
+
"v3sfoa---",
|
805 |
+
"v3sii----",
|
806 |
+
"v3siia---",
|
807 |
+
"v3siie---",
|
808 |
+
"v3siip---",
|
809 |
+
"v3sli----",
|
810 |
+
"v3slia---",
|
811 |
+
"v3slie---",
|
812 |
+
"v3slim---",
|
813 |
+
"v3slip---",
|
814 |
+
"v3spia---",
|
815 |
+
"v3spie---",
|
816 |
+
"v3spip---",
|
817 |
+
"v3spma---",
|
818 |
+
"v3spme---",
|
819 |
+
"v3spoa---",
|
820 |
+
"v3spoe---",
|
821 |
+
"v3spop---",
|
822 |
+
"v3spsa---",
|
823 |
+
"v3spse---",
|
824 |
+
"v3sria---",
|
825 |
+
"v3srie---",
|
826 |
+
"v3srip---",
|
827 |
+
"v3srma---",
|
828 |
+
"v3sroa---",
|
829 |
+
"v3srsa---",
|
830 |
+
"v3stie---",
|
831 |
+
"v3stim---",
|
832 |
+
"v3stip---",
|
833 |
+
"x--------",
|
834 |
+
"x-p----d-",
|
835 |
+
"x-p---nn-"
|
836 |
+
],
|
837 |
+
"neg_prefix":"!",
|
838 |
+
"overwrite":false
|
839 |
+
}
|
tagger/model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63bfc381f7bf45ed55fe9ae93417b5915f60dcd0d621f6b61c13cb02e58108b5
|
3 |
+
size 2562961
|
tokenizer
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
��prefix_search�{^†|^⸏|^…|^……|^,|^:|^;|^\!|^\?|^¿|^؟|^¡|^\(|^\)|^\[|^\]|^\{|^\}|^<|^>|^_|^#|^\*|^&|^。|^?|^!|^,|^、|^;|^:|^~|^·|^।|^،|^۔|^؛|^٪|^\.\.+|^…|^\'|^"|^”|^“|^`|^‘|^´|^’|^‚|^,|^„|^»|^«|^「|^」|^『|^』|^(|^)|^〔|^〕|^【|^】|^《|^》|^〈|^〉|^〈|^〉|^⟦|^⟧|^\$|^£|^€|^¥|^฿|^US\$|^C\$|^A\$|^₽|^﷼|^₴|^₠|^₡|^₢|^₣|^₤|^₥|^₦|^₧|^₨|^₩|^₪|^₫|^€|^₭|^₮|^₯|^₰|^₱|^₲|^₳|^₴|^₵|^₶|^₷|^₸|^₹|^₺|^₻|^₼|^₽|^₾|^₿|^[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]�suffix_search�
|
2 |
+
�…$|……$|,$|:$|;$|\!$|\?$|¿$|؟$|¡$|\($|\)$|\[$|\]$|\{$|\}$|<$|>$|_$|#$|\*$|&$|。$|?$|!$|,$|、$|;$|:$|~$|·$|।$|،$|۔$|؛$|٪$|\.\.+$|…$|\'$|"$|”$|“$|`$|‘$|´$|’$|‚$|,$|„$|»$|«$|「$|」$|『$|』$|($|)$|〔$|〕$|【$|】$|《$|》$|〈$|〉$|〈$|〉$|⟦$|⟧$|[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]$|†$|⸎$|(?<=[\u1F00-\u1FFF\u0370-\u03FF])[\-\.⸏]$�infix_finditer�?!\.\.+|…|[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]|(?<=[0-9])[+\-\*^](?=[0-9-])|(?<=[a-z\uFF41-\uFF5A\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E\u017F\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFFёа-яәөүҗңһα-ωάέίόώήύа-щюяіїєґѓѕјљњќѐѝ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉〈〉⟦⟧])\.(?=[A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉〈〉⟦⟧])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F]),(?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F0-9])(?:-|–|—|--|---|——|~)(?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F0-9])[:<>=/](?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])|(?<=[\u1F00-\u1FFF\u0370-\u03FF])—�token_match��url_match�
|
3 |
+
��A�
|
4 |
+
� ��A� �'��A�'�''��A�''�(*_*)��A�(*_*)�(-8��A�(-8�(-:��A�(-:�(-;��A�(-;�(-_-)��A�(-_-)�(._.)��A�(._.)�(:��A�(:�(;��A�(;�(=��A�(=�(>_<)��A�(>_<)�(^_^)��A�(^_^)�(o:��A�(o:�(¬_¬)��A�(¬_¬)�(ಠ_ಠ)��A�(ಠ_ಠ)�(╯°□°)╯︵┻━┻��A�(╯°□°)╯︵┻━┻�)-:��A�)-:�):��A�):�-_-��A�-_-�-__-��A�-__-�._.��A�._.�0.0��A�0.0�0.o��A�0.o�0_0��A�0_0�0_o��A�0_o�8)��A�8)�8-)��A�8-)�8-D��A�8-D�8D��A�8D�:'(��A�:'(�:')��A�:')�:'-(��A�:'-(�:'-)��A�:'-)�:(��A�:(�:((��A�:((�:(((��A�:(((�:()��A�:()�:)��A�:)�:))��A�:))�:)))��A�:)))�:*��A�:*�:-(��A�:-(�:-((��A�:-((�:-(((��A�:-(((�:-)��A�:-)�:-))��A�:-))�:-)))��A�:-)))�:-*��A�:-*�:-/��A�:-/�:-0��A�:-0�:-3��A�:-3�:->��A�:->�:-D��A�:-D�:-O��A�:-O�:-P��A�:-P�:-X��A�:-X�:-]��A�:-]�:-o��A�:-o�:-p��A�:-p�:-x��A�:-x�:-|��A�:-|�:-}��A�:-}�:/��A�:/�:0��A�:0�:1��A�:1�:3��A�:3�:>��A�:>�:D��A�:D�:O��A�:O�:P��A�:P�:X��A�:X�:]��A�:]�:o��A�:o�:o)��A�:o)�:p��A�:p�:x��A�:x�:|��A�:|�:}��A�:}�:’(��A�:’(�:’)��A�:’)�:’-(��A�:’-(�:’-)��A�:’-)�;)��A�;)�;-)��A�;-)�;-D��A�;-D�;D��A�;D�;_;��A�;_;�<.<��A�<.<�</3��A�</3�<3��A�<3�<33��A�<33�<333��A�<333�<space>��A�<space>�=(��A�=(�=)��A�=)�=/��A�=/�=3��A�=3�=D��A�=D�=[��A�=[�=]��A�=]�=|��A�=|�>.<��A�>.<�>.>��A�>.>�>:(��A�>:(�>:o��A�>:o�><(((*>��A�><(((*>�@_@��A�@_@�C++��A�C++�O.O��A�O.O�O.o��A�O.o�O_O��A�O_O�O_o��A�O_o�V.V��A�V.V�V_V��A�V_V�XD��A�XD�XDD��A�XDD�[-:��A�[-:�[:��A�[:�[=��A�[=�\")��A�\")�\n��A�\n�\t��A�\t�]=��A�]=�^_^��A�^_^�^__^��A�^__^�^___^��A�^___^�a.��A�a.�b.��A�b.�c.��A�c.�d.��A�d.�e.��A�e.�f.��A�f.�g.��A�g.�h.��A�h.�i.��A�i.�j.��A�j.�k.��A�k.�l.��A�l.�m.��A�m.�n.��A�n.�o.��A�o.�o.0��A�o.0�o.O��A�o.O�o.o��A�o.o�o_0��A�o_0�o_O��A�o_O�o_o��A�o_o�p.��A�p.�q.��A�q.�r.��A�r.�s.��A�s.�t.��A�t.�u.��A�u.�v.��A�v.�v.v��A�v.v�v_v��A�v_v�w.��A�w.�x.��A�x.�xD��A�xD�xDD��A�xDD�y.��A�y.�z.��A�z.� ��A� C� �¯\(ツ)/¯��A�¯\(ツ)/¯�°C.��A�°�A�C�A�.�°F.��A�°�A�F�A�.�°K.��A�°�A�K�A�.�°c.��A�°�A�c�A�.�°f.��A�°�A�f�A�.�°k.��A�°�A�k�A�.�ä.��A�ä.�ö.��A�ö.�ü.��A�ü.�Δ'��A�Δ'C�δέ�Δι'��A�Δι'C�διά�Δι’��A�Δι’C�διά�Δ’��A�Δ’C�δέ�Εφ'��A�Εφ'C�επί�Εφ’��A�Εφ’C�επί�Καθ'��A�Καθ'C�κατά�Καθ’��A�Καθ’C�κατά�Κατ'��A�Κατ'C�κατά�Κατ’��A�Κατ’C�κατά�Μ'��A�Μ'C�με�Μετ'��A�Μετ'C�μετά�Μετ’��A�Μετ’C�μετά�Μ’��A�Μ’C�με�Παρ'��A�Παρ'C�παρά�Παρ’��A�Παρ’C�παρά�Σ'��A�Σ'C�σε�Σ’��A�Σ’C�σε�Τ'��A�Τ'C�τε�Τ’��A�Τ’C�τε�αὑτός��A�αὑC�ὁ�A�τόςC�αὐτός�αὑτὸς��A�αὑC�ὁ�A�τὸςC�αὐτός�δ'��A�δ'C�δέ�δι'��A�δι'C�διά�διὰ��A�διὰC�διά�δι’��A�δι’C�διά�δὲ��A�δὲC�δέ�δ’��A�δ’C�δέ�εφ'��A�εφ'C�επί�εφ’��A�εφ’C�επί�θοἰμάτιον��A�θοC�τό�A�ἰμάτιον�θἡμέρᾳ��A�θC�τῇ�A�ἡμέρᾳ�καθ'��A�καθ'C�κατά�καθ’��A�καθ’C�κατά�κατ'��A�κατ'C�κατά�κατὰ��A�κατὰC�κατά�κατ’��A�κατ’C�κατά�καὐτός��A�κC�καί�A�αὐτός�καὐτὸς��A�κC�καί�A�αὐτὸςC�αὐτός�καὶ��A�καὶC�καί�κεἰ��A�κC�καί�A�εἰ�κεἰς��A�κC�καί�A�εἰς�κοὐ��A�κC�καί�A�οὐ�κἀγώ��A�κἀC�καί�A�γώC�ἐγώ�κἀγὼ��A�κἀC�καί�A�γὼC�ἐγώ�κἀν��A�κC�καί�A�ἀνC�ἐν�κἀς��A�κC�καί�A�ἀςC�ἐς�κᾆτα��A�κC�καί�A�ᾆταC�εἶτα�μ'��A�μ'C�με�μέ��A�μέC�με�μεθ'��A�μεθ'C�μετά�μεθ’��A�μεθ’C�μετά�μετ'��A�μετ'C�μετά�μετὰ��A�μετὰC�μετά�μετ’��A�μετ’C�μετά�μοὔστι��A�μοὔC�μοί�A�στιC�ἐστι�μοὖστι��A�μοὖC�μοί�A�στιC�ἐστι�μὲ��A�μὲC�με�μὲν��A�μὲνC�μέν�μὴν��A�μὴνC�μήν�μ’��A�μ’C�με�οὑμοί��A�οὑC�οἱ�A�μοίC�ἐμoί�οὑμοὶ��A�οὑC�οἱ�A�μοὶC�ἐμoί�οὑμός��A�οὑC�ὁ�A�μόςC�ἐμός�οὑμὸς��A�οὑC�ὁ�A�μὸςC�ἐμός�οὑν��A�οὑC�ὁ�A�νC�ἐν�παρ��A�παρC�παρά�παρ'��A�παρ'C�παρά�παρὰ��A�παρὰC�παρά�παρ’��A�παρ’C�παρά�προὔχοντα��A�προὔC�πρό�A�χονταC�ἔχοντα�προὔχων��A�προὔC�πρό�A�χωνC�ἔχων�σ'��A�σ'C�σε�σέ��A�σέC�σε�σοὐστί��A�σοὐC�σοί�A�στίC�ἐστί�σοὐστὶ��A�σοὐC�σοί�A�στὶC�ἐστί�σοὔστι��A�σοὔC�σοί�A�στιC�ἐστι�σὲ��A�σὲC�σε�σ’��A�σ’C�σε�τ'��A�τ'C�τε�τέ��A�τέC�τε�ταὐτοῦ��A�τC�τοῦ�A�αὐτοῦ�τοὔνομα��A�τοὔC�τό�A�νομαC�ὄνομα�τἀνδρί��A�τC�τῷ�A�ἀνδρί�τἀνδρός��A�τC�τοῦ�A�ἀνδρός�τἀνδρὶ��A�τC�τῷ�A�ἀνδρὶC�ἀνδρί�τἀνδρὸς��A�τC�τοῦ�A�ἀνδρὸςC�ἀνδρός�τἄλλα��A�τC�τὰ�A�ἄλλα�τἆλλα��A�τἆC�τὰ�A�λλαC�ἄλλα�τὠληθές��A�τὠC�τὸ�A�ληθέςC�ἀληθές�τὲ��A�τὲC�τε�τὴν��A�τὴνC�τήν�τὸν��A�τὸνC�τόν�τ’��A�τ’C�τε�χοἱ��A�χC�καί�A�οἱ�χἡ��A�χC�καί�A�ἡ�χἱκετεύετε��A�χC�καί�A�ἱκετεύετε�χὤπως��A�χC�καί�A�ὤπωςC�ὅπως�χὤταν��A�χC�καί�A�ὤτανC�ὅταν�χὤτε��A�χC�καί�A�ὤτεC�ὅτε�χὤτι��A�χC�καί�A�ὤτιC�ὅτι�ಠ_ಠ��A�ಠ_ಠ�ಠ︵ಠ��A�ಠ︵ಠ�ἀλλ'��A�ἀλλ'C�ἀλλά�ἀλλὰ��A�ἀλλὰC�ἀλλά�ἀλλ’��A�ἀλλ’C�ἀλλά�ἀπὸ��A�ἀπὸC�από�ἀφ'��A�ἀφ'C�από�ἀφ’��A�ἀφ’C�από�ἁγαθαί��A�ἁC�αἱ�A�γαθαίC�ἀγαθαί�ἁγαθαὶ��A�ἁC�αἱ�A�γαθαὶC�ἀγαθαί�ἁγώ��A�ἁC�ἃ�A�γώC�ἐγώ�ἁγὼ��A�ἁC�ἃ�A�γὼC�ἐγώ�ἁλήθεια��A�ἁC�ἡ�A�λήθειαC�ἀλήθεια�ἁνήρ��A�ἁC�ὁ�A�νήρC�ἀνήρ�ἁνὴρ��A�ἁC�ὁ�A�νὴρC�ἀνήρ�ἅνδρες��A�ἅC�οἱ�A�νδρεςC�ἄνδρες�ἅνθρωπος��A�ἅC�ὁ�A�νθρωποςC�ἄνθρωπος�ἐγᾦδα��A�ἐγC�ἐγώ�A�ᾦδαC�οἶδα�ἐγᾦμαι��A�ἐγC�ἐγώ�A�ᾦμαιC�οἶμαι�ἐπ'��A�ἐπ'C�επί�ἐπὶ��A�ἐπὶC�επί�ἐπ’��A�ἐπ’C�επί�Ἐπ'��A�Ἐπ'C�επί�Ἐπ’��A�Ἐπ’C�επί�ὑπ'��A�ὑπ'C�ὑπό�ὑπ’��A�ὑπ’C�ὑπό�ὑφ'��A�ὑφ'C�ὑπό�ὑφ’��A�ὑφ’C�ὑπό�Ὑπ'��A�Ὑπ'C�ὑπό�Ὑπ’��A�Ὑπ’C�ὑπό�ὥνεκα��A�ὥC�οὗ�A�νεκαC�ἕνεκα�ὦνδρες��A�ὦC�ὦ�A�νδρεςC�ἄνδρες�ὦνερ��A�ὦC�ὦ�A�νερC�ἄνερ�᾽ΑΠ'��A�᾽ΑΠ'C�από�᾽ΑΠ’��A�᾽ΑΠ’C�από�᾽Αλλ'��A�᾽Αλλ'C�ἀλλά�᾽Αλλ’��A�᾽Αλλ’C�ἀλλά�᾽Απ'��A�᾽Απ'C�από�᾽Απ’��A�᾽Απ’C�από�᾽Αφ��A�᾽ΑφC�από�—��A�—�’��A�’�’’��A�’’�faster_heuristics�
|
trainable_lemmatizer/cfg
ADDED
The diff for this file is too large to render.
See raw diff
|
|
trainable_lemmatizer/model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d373ea41f35545993049be1488bd1e1e7d4d3d830d7b080f7bcfacf4ba3b99e
|
3 |
+
size 71957523
|
trainable_lemmatizer/trees
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82dcb591674572ec7124a40c85666b79e6c71f5bdedaf91dd76459b773fee605
|
3 |
+
size 2400258
|
transformer/cfg
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_batch_items":4096
|
3 |
+
}
|
transformer/model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f75dc46a394d2e2b7ab298a757a318d7222d401127fe5408a17cb40c4c8f524b
|
3 |
+
size 1134411305
|
vocab/key2row
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
�
|
vocab/lookups.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
|
3 |
+
size 1
|
vocab/strings.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
vocab/vectors
ADDED
Binary file (128 Bytes). View file
|
|
vocab/vectors.cfg
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"mode":"default"
|
3 |
+
}
|