adrianeboyd commited on
Commit
6084989
1 Parent(s): 62d5a10

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -14,13 +14,13 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.7244623656
18
  - name: NER Recall
19
  type: recall
20
- value: 0.6779874214
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.7004548408
24
  - task:
25
  name: TAG
26
  type: token-classification
@@ -34,7 +34,7 @@ model-index:
34
  metrics:
35
  - name: POS (UPOS) Accuracy
36
  type: accuracy
37
- value: 0.9733311518
38
  - task:
39
  name: MORPH
40
  type: token-classification
@@ -55,21 +55,21 @@ model-index:
55
  metrics:
56
  - name: Unlabeled Attachment Score (UAS)
57
  type: f_score
58
- value: 0.9198487032
59
  - task:
60
  name: LABELED_DEPENDENCIES
61
  type: token-classification
62
  metrics:
63
  - name: Labeled Attachment Score (LAS)
64
  type: f_score
65
- value: 0.9061782838
66
  - task:
67
  name: SENTS
68
  type: token-classification
69
  metrics:
70
  - name: Sentences F-Score
71
  type: f_score
72
- value: 0.9901380671
73
  ---
74
  ### Details: https://spacy.io/models/ja#ja_core_news_md
75
 
@@ -78,8 +78,8 @@ Japanese pipeline optimized for CPU. Components: tok2vec, morphologizer, parser,
78
  | Feature | Description |
79
  | --- | --- |
80
  | **Name** | `ja_core_news_md` |
81
- | **Version** | `3.4.0` |
82
- | **spaCy** | `>=3.4.0,<3.5.0` |
83
  | **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `attribute_ruler`, `ner` |
84
  | **Components** | `tok2vec`, `morphologizer`, `parser`, `senter`, `attribute_ruler`, `ner` |
85
  | **Vectors** | 480443 keys, 20000 unique vectors (300 dimensions) |
@@ -105,22 +105,22 @@ Japanese pipeline optimized for CPU. Components: tok2vec, morphologizer, parser,
105
 
106
  | Type | Score |
107
  | --- | --- |
108
- | `TOKEN_ACC` | 99.69 |
109
  | `TOKEN_P` | 97.65 |
110
  | `TOKEN_R` | 97.90 |
111
  | `TOKEN_F` | 97.77 |
112
- | `POS_ACC` | 97.33 |
113
  | `MORPH_ACC` | 0.00 |
114
  | `MORPH_MICRO_P` | 34.01 |
115
  | `MORPH_MICRO_R` | 98.04 |
116
  | `MORPH_MICRO_F` | 50.51 |
117
- | `SENTS_P` | 99.01 |
118
- | `SENTS_R` | 99.01 |
119
- | `SENTS_F` | 99.01 |
120
- | `DEP_UAS` | 91.98 |
121
- | `DEP_LAS` | 90.62 |
122
  | `TAG_ACC` | 97.12 |
123
  | `LEMMA_ACC` | 96.71 |
124
- | `ENTS_P` | 72.45 |
125
- | `ENTS_R` | 67.80 |
126
- | `ENTS_F` | 70.05 |
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.7121418827
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.6566037736
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.6832460733
24
  - task:
25
  name: TAG
26
  type: token-classification
 
34
  metrics:
35
  - name: POS (UPOS) Accuracy
36
  type: accuracy
37
+ value: 0.9721881391
38
  - task:
39
  name: MORPH
40
  type: token-classification
 
55
  metrics:
56
  - name: Unlabeled Attachment Score (UAS)
57
  type: f_score
58
+ value: 0.9203675345
59
  - task:
60
  name: LABELED_DEPENDENCIES
61
  type: token-classification
62
  metrics:
63
  - name: Labeled Attachment Score (LAS)
64
  type: f_score
65
+ value: 0.9074140723
66
  - task:
67
  name: SENTS
68
  type: token-classification
69
  metrics:
70
  - name: Sentences F-Score
71
  type: f_score
72
+ value: 0.9774730656
73
  ---
74
  ### Details: https://spacy.io/models/ja#ja_core_news_md
75
 
 
78
  | Feature | Description |
79
  | --- | --- |
80
  | **Name** | `ja_core_news_md` |
81
+ | **Version** | `3.5.0` |
82
+ | **spaCy** | `>=3.5.0,<3.6.0` |
83
  | **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `attribute_ruler`, `ner` |
84
  | **Components** | `tok2vec`, `morphologizer`, `parser`, `senter`, `attribute_ruler`, `ner` |
85
  | **Vectors** | 480443 keys, 20000 unique vectors (300 dimensions) |
 
105
 
106
  | Type | Score |
107
  | --- | --- |
108
+ | `TOKEN_ACC` | 99.37 |
109
  | `TOKEN_P` | 97.65 |
110
  | `TOKEN_R` | 97.90 |
111
  | `TOKEN_F` | 97.77 |
112
+ | `POS_ACC` | 97.22 |
113
  | `MORPH_ACC` | 0.00 |
114
  | `MORPH_MICRO_P` | 34.01 |
115
  | `MORPH_MICRO_R` | 98.04 |
116
  | `MORPH_MICRO_F` | 50.51 |
117
+ | `SENTS_P` | 97.08 |
118
+ | `SENTS_R` | 98.42 |
119
+ | `SENTS_F` | 97.75 |
120
+ | `DEP_UAS` | 92.04 |
121
+ | `DEP_LAS` | 90.74 |
122
  | `TAG_ACC` | 97.12 |
123
  | `LEMMA_ACC` | 96.71 |
124
+ | `ENTS_P` | 71.21 |
125
+ | `ENTS_R` | 65.66 |
126
+ | `ENTS_F` | 68.32 |
accuracy.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "token_acc": 0.9968649485,
3
  "token_p": 0.9764591282,
4
  "token_r": 0.9790021974,
5
  "token_f": 0.9777290092,
6
- "pos_acc": 0.9733311518,
7
  "morph_acc": 0.0,
8
  "morph_micro_p": 0.3401360544,
9
  "morph_micro_r": 0.9803921569,
@@ -25,101 +25,101 @@
25
  "f": 0.0
26
  }
27
  },
28
- "sents_p": 0.9901380671,
29
- "sents_r": 0.9901380671,
30
- "sents_f": 0.9901380671,
31
- "dep_uas": 0.9198487032,
32
- "dep_las": 0.9061782838,
33
  "dep_las_per_type": {
34
  "cc": {
35
- "p": 0.7115384615,
36
- "r": 0.7708333333,
37
- "f": 0.74
38
  },
39
  "compound": {
40
- "p": 0.9336051252,
41
- "r": 0.9036076663,
42
- "f": 0.918361501
43
  },
44
  "obl": {
45
- "p": 0.8171500631,
46
- "r": 0.808988764,
47
- "f": 0.8130489335
48
  },
49
  "case": {
50
- "p": 0.9892720307,
51
- "r": 0.9810030395,
52
- "f": 0.9851201831
53
  },
54
  "dislocated": {
55
- "p": 0.6428571429,
56
- "r": 0.6923076923,
57
- "f": 0.6666666667
58
  },
59
  "nsubj": {
60
- "p": 0.8233009709,
61
- "r": 0.8138195777,
62
- "f": 0.8185328185
63
  },
64
  "nmod": {
65
- "p": 0.8746898263,
66
- "r": 0.8245614035,
67
- "f": 0.8488862131
68
  },
69
  "root": {
70
- "p": 0.9679358717,
71
- "r": 0.9526627219,
72
- "f": 0.9602385686
73
  },
74
  "aux": {
75
- "p": 0.9787037037,
76
- "r": 0.9814298979,
77
- "f": 0.980064905
78
  },
79
  "advcl": {
80
- "p": 0.6944444444,
81
- "r": 0.6741573034,
82
- "f": 0.6841505131
83
  },
84
  "mark": {
85
- "p": 0.9775967413,
86
- "r": 0.96,
87
- "f": 0.9687184662
88
  },
89
  "fixed": {
90
- "p": 0.9588550984,
91
- "r": 0.9745454545,
92
- "f": 0.9666366096
93
  },
94
  "acl": {
95
- "p": 0.8315334773,
96
- "r": 0.8461538462,
97
- "f": 0.8387799564
98
  },
99
  "obj": {
100
- "p": 0.9480122324,
101
- "r": 0.9365558912,
102
- "f": 0.9422492401
103
  },
104
  "nummod": {
105
- "p": 0.9805194805,
106
- "r": 0.8934911243,
107
- "f": 0.9349845201
108
  },
109
  "advmod": {
110
- "p": 0.6917293233,
111
  "r": 0.6571428571,
112
- "f": 0.673992674
113
  },
114
  "amod": {
115
- "p": 0.9642857143,
116
- "r": 0.7297297297,
117
- "f": 0.8307692308
118
  },
119
  "cop": {
120
- "p": 0.9523809524,
121
- "r": 0.9302325581,
122
- "f": 0.9411764706
123
  },
124
  "ccomp": {
125
  "p": 0.95,
@@ -127,56 +127,51 @@
127
  "f": 0.9047619048
128
  },
129
  "det": {
130
- "p": 0.9615384615,
131
- "r": 0.9433962264,
132
- "f": 0.9523809524
133
  },
134
  "csubj": {
135
- "p": 0.7142857143,
136
- "r": 0.8333333333,
137
- "f": 0.7692307692
138
  },
139
  "dep": {
140
- "p": 0.2,
141
  "r": 0.1428571429,
142
- "f": 0.1666666667
143
  }
144
  },
145
  "tag_acc": 0.9712488769,
146
  "lemma_acc": 0.9670526831,
147
- "ents_p": 0.7244623656,
148
- "ents_r": 0.6779874214,
149
- "ents_f": 0.7004548408,
150
  "ents_per_type": {
151
  "DATE": {
152
- "p": 0.9545454545,
153
- "r": 0.9633027523,
154
- "f": 0.9589041096
155
- },
156
- "PERSON": {
157
- "p": 0.7152777778,
158
- "r": 0.7410071942,
159
- "f": 0.7279151943
160
  },
161
  "ORG": {
162
- "p": 0.6333333333,
163
- "r": 0.5547445255,
164
- "f": 0.5914396887
165
  },
166
  "TITLE_AFFIX": {
167
- "p": 0.8333333333,
168
- "r": 0.6666666667,
169
- "f": 0.7407407407
170
  },
171
- "GPE": {
172
- "p": 0.6741573034,
173
- "r": 0.6382978723,
174
- "f": 0.6557377049
175
  },
176
- "PRODUCT": {
177
- "p": 0.3636363636,
178
- "r": 0.2857142857,
179
- "f": 0.32
180
  },
181
  "TIME": {
182
  "p": 0.6666666667,
@@ -184,29 +179,34 @@
184
  "f": 0.8
185
  },
186
  "QUANTITY": {
187
- "p": 0.8194444444,
188
- "r": 0.8939393939,
189
- "f": 0.8550724638
190
  },
191
  "NORP": {
192
- "p": 0.72,
193
- "r": 0.5625,
194
- "f": 0.6315789474
195
  },
196
  "ORDINAL": {
197
- "p": 0.5833333333,
198
- "r": 0.6363636364,
199
- "f": 0.6086956522
200
  },
201
  "WORK_OF_ART": {
202
- "p": 0.6842105263,
203
- "r": 0.7647058824,
204
- "f": 0.7222222222
 
 
 
 
 
205
  },
206
  "FAC": {
207
- "p": 0.6538461538,
208
- "r": 0.4594594595,
209
- "f": 0.5396825397
210
  },
211
  "PERCENT": {
212
  "p": 1.0,
@@ -214,29 +214,29 @@
214
  "f": 0.4444444444
215
  },
216
  "EVENT": {
217
- "p": 0.7368421053,
218
- "r": 0.5384615385,
219
- "f": 0.6222222222
220
- },
221
- "CARDINAL": {
222
- "p": 0.0,
223
- "r": 0.0,
224
- "f": 0.0
225
  },
226
  "LOC": {
227
- "p": 0.6153846154,
228
- "r": 0.8,
229
- "f": 0.6956521739
230
  },
231
  "MOVEMENT": {
232
  "p": 0.0,
233
  "r": 0.0,
234
  "f": 0.0
235
  },
 
 
 
 
 
236
  "LAW": {
237
- "p": 1.0,
238
- "r": 0.3333333333,
239
- "f": 0.5
240
  },
241
  "MONEY": {
242
  "p": 1.0,
@@ -244,9 +244,9 @@
244
  "f": 1.0
245
  },
246
  "LANGUAGE": {
247
- "p": 1.0,
248
  "r": 1.0,
249
- "f": 1.0
250
  },
251
  "PET_NAME": {
252
  "p": 0.0,
@@ -254,5 +254,5 @@
254
  "f": 0.0
255
  }
256
  },
257
- "speed": 7222.6711221355
258
  }
 
1
  {
2
+ "token_acc": 0.9937494927,
3
  "token_p": 0.9764591282,
4
  "token_r": 0.9790021974,
5
  "token_f": 0.9777290092,
6
+ "pos_acc": 0.9721881391,
7
  "morph_acc": 0.0,
8
  "morph_micro_p": 0.3401360544,
9
  "morph_micro_r": 0.9803921569,
 
25
  "f": 0.0
26
  }
27
  },
28
+ "sents_p": 0.9708171206,
29
+ "sents_r": 0.9842209073,
30
+ "sents_f": 0.9774730656,
31
+ "dep_uas": 0.9203675345,
32
+ "dep_las": 0.9074140723,
33
  "dep_las_per_type": {
34
  "cc": {
35
+ "p": 0.829787234,
36
+ "r": 0.8125,
37
+ "f": 0.8210526316
38
  },
39
  "compound": {
40
+ "p": 0.9382284382,
41
+ "r": 0.9075535513,
42
+ "f": 0.9226361032
43
  },
44
  "obl": {
45
+ "p": 0.8116480793,
46
+ "r": 0.8177278402,
47
+ "f": 0.8146766169
48
  },
49
  "case": {
50
+ "p": 0.9888632873,
51
+ "r": 0.978343465,
52
+ "f": 0.9835752483
53
  },
54
  "dislocated": {
55
+ "p": 0.5384615385,
56
+ "r": 0.5384615385,
57
+ "f": 0.5384615385
58
  },
59
  "nsubj": {
60
+ "p": 0.8103448276,
61
+ "r": 0.8119001919,
62
+ "f": 0.8111217641
63
  },
64
  "nmod": {
65
+ "p": 0.8861076345,
66
+ "r": 0.8280701754,
67
+ "f": 0.8561064087
68
  },
69
  "root": {
70
+ "p": 0.9703557312,
71
+ "r": 0.9684418146,
72
+ "f": 0.9693978282
73
  },
74
  "aux": {
75
+ "p": 0.9823747681,
76
+ "r": 0.9832869081,
77
+ "f": 0.9828306265
78
  },
79
  "advcl": {
80
+ "p": 0.6825396825,
81
+ "r": 0.6764044944,
82
+ "f": 0.6794582393
83
  },
84
  "mark": {
85
+ "p": 0.967611336,
86
+ "r": 0.956,
87
+ "f": 0.9617706237
88
  },
89
  "fixed": {
90
+ "p": 0.946714032,
91
+ "r": 0.9690909091,
92
+ "f": 0.957771788
93
  },
94
  "acl": {
95
+ "p": 0.8470066519,
96
+ "r": 0.8395604396,
97
+ "f": 0.8432671082
98
  },
99
  "obj": {
100
+ "p": 0.9569230769,
101
+ "r": 0.9395770393,
102
+ "f": 0.9481707317
103
  },
104
  "nummod": {
105
+ "p": 0.987012987,
106
+ "r": 0.899408284,
107
+ "f": 0.9411764706
108
  },
109
  "advmod": {
110
+ "p": 0.6715328467,
111
  "r": 0.6571428571,
112
+ "f": 0.6642599278
113
  },
114
  "amod": {
115
+ "p": 0.962962963,
116
+ "r": 0.7027027027,
117
+ "f": 0.8125
118
  },
119
  "cop": {
120
+ "p": 0.9704142012,
121
+ "r": 0.9534883721,
122
+ "f": 0.9618768328
123
  },
124
  "ccomp": {
125
  "p": 0.95,
 
127
  "f": 0.9047619048
128
  },
129
  "det": {
130
+ "p": 0.9807692308,
131
+ "r": 0.9622641509,
132
+ "f": 0.9714285714
133
  },
134
  "csubj": {
135
+ "p": 0.7272727273,
136
+ "r": 0.6666666667,
137
+ "f": 0.6956521739
138
  },
139
  "dep": {
140
+ "p": 0.25,
141
  "r": 0.1428571429,
142
+ "f": 0.1818181818
143
  }
144
  },
145
  "tag_acc": 0.9712488769,
146
  "lemma_acc": 0.9670526831,
147
+ "ents_p": 0.7121418827,
148
+ "ents_r": 0.6566037736,
149
+ "ents_f": 0.6832460733,
150
  "ents_per_type": {
151
  "DATE": {
152
+ "p": 0.954954955,
153
+ "r": 0.9724770642,
154
+ "f": 0.9636363636
 
 
 
 
 
155
  },
156
  "ORG": {
157
+ "p": 0.5675675676,
158
+ "r": 0.4598540146,
159
+ "f": 0.5080645161
160
  },
161
  "TITLE_AFFIX": {
162
+ "p": 0.8181818182,
163
+ "r": 0.6,
164
+ "f": 0.6923076923
165
  },
166
+ "PERSON": {
167
+ "p": 0.7410071942,
168
+ "r": 0.7410071942,
169
+ "f": 0.7410071942
170
  },
171
+ "GPE": {
172
+ "p": 0.6875,
173
+ "r": 0.7021276596,
174
+ "f": 0.6947368421
175
  },
176
  "TIME": {
177
  "p": 0.6666666667,
 
179
  "f": 0.8
180
  },
181
  "QUANTITY": {
182
+ "p": 0.8695652174,
183
+ "r": 0.9090909091,
184
+ "f": 0.8888888889
185
  },
186
  "NORP": {
187
+ "p": 0.6785714286,
188
+ "r": 0.59375,
189
+ "f": 0.6333333333
190
  },
191
  "ORDINAL": {
192
+ "p": 0.619047619,
193
+ "r": 0.5909090909,
194
+ "f": 0.6046511628
195
  },
196
  "WORK_OF_ART": {
197
+ "p": 0.6666666667,
198
+ "r": 0.5882352941,
199
+ "f": 0.625
200
+ },
201
+ "CARDINAL": {
202
+ "p": 1.0,
203
+ "r": 0.5,
204
+ "f": 0.6666666667
205
  },
206
  "FAC": {
207
+ "p": 0.5384615385,
208
+ "r": 0.3783783784,
209
+ "f": 0.4444444444
210
  },
211
  "PERCENT": {
212
  "p": 1.0,
 
214
  "f": 0.4444444444
215
  },
216
  "EVENT": {
217
+ "p": 0.9285714286,
218
+ "r": 0.5,
219
+ "f": 0.65
 
 
 
 
 
220
  },
221
  "LOC": {
222
+ "p": 0.4117647059,
223
+ "r": 0.7,
224
+ "f": 0.5185185185
225
  },
226
  "MOVEMENT": {
227
  "p": 0.0,
228
  "r": 0.0,
229
  "f": 0.0
230
  },
231
+ "PRODUCT": {
232
+ "p": 0.2857142857,
233
+ "r": 0.2380952381,
234
+ "f": 0.2597402597
235
+ },
236
  "LAW": {
237
+ "p": 0.0,
238
+ "r": 0.0,
239
+ "f": 0.0
240
  },
241
  "MONEY": {
242
  "p": 1.0,
 
244
  "f": 1.0
245
  },
246
  "LANGUAGE": {
247
+ "p": 0.75,
248
  "r": 1.0,
249
+ "f": 0.8571428571
250
  },
251
  "PET_NAME": {
252
  "p": 0.0,
 
254
  "f": 0.0
255
  }
256
  },
257
+ "speed": 9629.4441452948
258
  }
attribute_ruler/patterns CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
 
config.cfg CHANGED
@@ -135,8 +135,8 @@ factory = "tok2vec"
135
  [components.tok2vec.model.embed]
136
  @architectures = "spacy.MultiHashEmbed.v2"
137
  width = ${components.tok2vec.model.encode:width}
138
- attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
139
- rows = [5000,1000,2500,2500]
140
  include_static_vectors = true
141
 
142
  [components.tok2vec.model.encode]
@@ -178,6 +178,7 @@ eval_frequency = 1000
178
  frozen_components = []
179
  before_to_disk = null
180
  annotating_components = []
 
181
 
182
  [training.batcher]
183
  @batchers = "spacy.batch_by_words.v1"
 
135
  [components.tok2vec.model.embed]
136
  @architectures = "spacy.MultiHashEmbed.v2"
137
  width = ${components.tok2vec.model.encode:width}
138
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE","IS_SPACE"]
139
+ rows = [5000,1000,2500,2500,50]
140
  include_static_vectors = true
141
 
142
  [components.tok2vec.model.encode]
 
178
  frozen_components = []
179
  before_to_disk = null
180
  annotating_components = []
181
+ before_update = null
182
 
183
  [training.batcher]
184
  @batchers = "spacy.batch_by_words.v1"
ja_core_news_md-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df45e0c8206a077ef983bb413aca7b81b29bba454856225d2f6f0fe66b27c632
3
- size 41990789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d24b2118902b57d62271e12c28a6071e1c08dd653523da336bbb824b3e8b3bf
3
+ size 42105228
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"ja",
3
  "name":"core_news_md",
4
- "version":"3.4.0",
5
  "description":"Japanese pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"CC BY-SA 4.0",
10
- "spacy_version":">=3.4.0,<3.5.0",
11
- "spacy_git_version":"dd038b536",
12
  "vectors":{
13
  "width":300,
14
  "vectors":20000,
@@ -113,11 +113,11 @@
113
  "senter"
114
  ],
115
  "performance":{
116
- "token_acc":0.9968649485,
117
  "token_p":0.9764591282,
118
  "token_r":0.9790021974,
119
  "token_f":0.9777290092,
120
- "pos_acc":0.9733311518,
121
  "morph_acc":0.0,
122
  "morph_micro_p":0.3401360544,
123
  "morph_micro_r":0.9803921569,
@@ -139,101 +139,101 @@
139
  "f":0.0
140
  }
141
  },
142
- "sents_p":0.9901380671,
143
- "sents_r":0.9901380671,
144
- "sents_f":0.9901380671,
145
- "dep_uas":0.9198487032,
146
- "dep_las":0.9061782838,
147
  "dep_las_per_type":{
148
  "cc":{
149
- "p":0.7115384615,
150
- "r":0.7708333333,
151
- "f":0.74
152
  },
153
  "compound":{
154
- "p":0.9336051252,
155
- "r":0.9036076663,
156
- "f":0.918361501
157
  },
158
  "obl":{
159
- "p":0.8171500631,
160
- "r":0.808988764,
161
- "f":0.8130489335
162
  },
163
  "case":{
164
- "p":0.9892720307,
165
- "r":0.9810030395,
166
- "f":0.9851201831
167
  },
168
  "dislocated":{
169
- "p":0.6428571429,
170
- "r":0.6923076923,
171
- "f":0.6666666667
172
  },
173
  "nsubj":{
174
- "p":0.8233009709,
175
- "r":0.8138195777,
176
- "f":0.8185328185
177
  },
178
  "nmod":{
179
- "p":0.8746898263,
180
- "r":0.8245614035,
181
- "f":0.8488862131
182
  },
183
  "root":{
184
- "p":0.9679358717,
185
- "r":0.9526627219,
186
- "f":0.9602385686
187
  },
188
  "aux":{
189
- "p":0.9787037037,
190
- "r":0.9814298979,
191
- "f":0.980064905
192
  },
193
  "advcl":{
194
- "p":0.6944444444,
195
- "r":0.6741573034,
196
- "f":0.6841505131
197
  },
198
  "mark":{
199
- "p":0.9775967413,
200
- "r":0.96,
201
- "f":0.9687184662
202
  },
203
  "fixed":{
204
- "p":0.9588550984,
205
- "r":0.9745454545,
206
- "f":0.9666366096
207
  },
208
  "acl":{
209
- "p":0.8315334773,
210
- "r":0.8461538462,
211
- "f":0.8387799564
212
  },
213
  "obj":{
214
- "p":0.9480122324,
215
- "r":0.9365558912,
216
- "f":0.9422492401
217
  },
218
  "nummod":{
219
- "p":0.9805194805,
220
- "r":0.8934911243,
221
- "f":0.9349845201
222
  },
223
  "advmod":{
224
- "p":0.6917293233,
225
  "r":0.6571428571,
226
- "f":0.673992674
227
  },
228
  "amod":{
229
- "p":0.9642857143,
230
- "r":0.7297297297,
231
- "f":0.8307692308
232
  },
233
  "cop":{
234
- "p":0.9523809524,
235
- "r":0.9302325581,
236
- "f":0.9411764706
237
  },
238
  "ccomp":{
239
  "p":0.95,
@@ -241,56 +241,51 @@
241
  "f":0.9047619048
242
  },
243
  "det":{
244
- "p":0.9615384615,
245
- "r":0.9433962264,
246
- "f":0.9523809524
247
  },
248
  "csubj":{
249
- "p":0.7142857143,
250
- "r":0.8333333333,
251
- "f":0.7692307692
252
  },
253
  "dep":{
254
- "p":0.2,
255
  "r":0.1428571429,
256
- "f":0.1666666667
257
  }
258
  },
259
  "tag_acc":0.9712488769,
260
  "lemma_acc":0.9670526831,
261
- "ents_p":0.7244623656,
262
- "ents_r":0.6779874214,
263
- "ents_f":0.7004548408,
264
  "ents_per_type":{
265
  "DATE":{
266
- "p":0.9545454545,
267
- "r":0.9633027523,
268
- "f":0.9589041096
269
- },
270
- "PERSON":{
271
- "p":0.7152777778,
272
- "r":0.7410071942,
273
- "f":0.7279151943
274
  },
275
  "ORG":{
276
- "p":0.6333333333,
277
- "r":0.5547445255,
278
- "f":0.5914396887
279
  },
280
  "TITLE_AFFIX":{
281
- "p":0.8333333333,
282
- "r":0.6666666667,
283
- "f":0.7407407407
284
  },
285
- "GPE":{
286
- "p":0.6741573034,
287
- "r":0.6382978723,
288
- "f":0.6557377049
289
  },
290
- "PRODUCT":{
291
- "p":0.3636363636,
292
- "r":0.2857142857,
293
- "f":0.32
294
  },
295
  "TIME":{
296
  "p":0.6666666667,
@@ -298,29 +293,34 @@
298
  "f":0.8
299
  },
300
  "QUANTITY":{
301
- "p":0.8194444444,
302
- "r":0.8939393939,
303
- "f":0.8550724638
304
  },
305
  "NORP":{
306
- "p":0.72,
307
- "r":0.5625,
308
- "f":0.6315789474
309
  },
310
  "ORDINAL":{
311
- "p":0.5833333333,
312
- "r":0.6363636364,
313
- "f":0.6086956522
314
  },
315
  "WORK_OF_ART":{
316
- "p":0.6842105263,
317
- "r":0.7647058824,
318
- "f":0.7222222222
 
 
 
 
 
319
  },
320
  "FAC":{
321
- "p":0.6538461538,
322
- "r":0.4594594595,
323
- "f":0.5396825397
324
  },
325
  "PERCENT":{
326
  "p":1.0,
@@ -328,29 +328,29 @@
328
  "f":0.4444444444
329
  },
330
  "EVENT":{
331
- "p":0.7368421053,
332
- "r":0.5384615385,
333
- "f":0.6222222222
334
- },
335
- "CARDINAL":{
336
- "p":0.0,
337
- "r":0.0,
338
- "f":0.0
339
  },
340
  "LOC":{
341
- "p":0.6153846154,
342
- "r":0.8,
343
- "f":0.6956521739
344
  },
345
  "MOVEMENT":{
346
  "p":0.0,
347
  "r":0.0,
348
  "f":0.0
349
  },
 
 
 
 
 
350
  "LAW":{
351
- "p":1.0,
352
- "r":0.3333333333,
353
- "f":0.5
354
  },
355
  "MONEY":{
356
  "p":1.0,
@@ -358,9 +358,9 @@
358
  "f":1.0
359
  },
360
  "LANGUAGE":{
361
- "p":1.0,
362
  "r":1.0,
363
- "f":1.0
364
  },
365
  "PET_NAME":{
366
  "p":0.0,
@@ -368,7 +368,7 @@
368
  "f":0.0
369
  }
370
  },
371
- "speed":7222.6711221355
372
  },
373
  "sources":[
374
  {
 
1
  {
2
  "lang":"ja",
3
  "name":"core_news_md",
4
+ "version":"3.5.0",
5
  "description":"Japanese pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"CC BY-SA 4.0",
10
+ "spacy_version":">=3.5.0,<3.6.0",
11
+ "spacy_git_version":"9e0322de1",
12
  "vectors":{
13
  "width":300,
14
  "vectors":20000,
 
113
  "senter"
114
  ],
115
  "performance":{
116
+ "token_acc":0.9937494927,
117
  "token_p":0.9764591282,
118
  "token_r":0.9790021974,
119
  "token_f":0.9777290092,
120
+ "pos_acc":0.9721881391,
121
  "morph_acc":0.0,
122
  "morph_micro_p":0.3401360544,
123
  "morph_micro_r":0.9803921569,
 
139
  "f":0.0
140
  }
141
  },
142
+ "sents_p":0.9708171206,
143
+ "sents_r":0.9842209073,
144
+ "sents_f":0.9774730656,
145
+ "dep_uas":0.9203675345,
146
+ "dep_las":0.9074140723,
147
  "dep_las_per_type":{
148
  "cc":{
149
+ "p":0.829787234,
150
+ "r":0.8125,
151
+ "f":0.8210526316
152
  },
153
  "compound":{
154
+ "p":0.9382284382,
155
+ "r":0.9075535513,
156
+ "f":0.9226361032
157
  },
158
  "obl":{
159
+ "p":0.8116480793,
160
+ "r":0.8177278402,
161
+ "f":0.8146766169
162
  },
163
  "case":{
164
+ "p":0.9888632873,
165
+ "r":0.978343465,
166
+ "f":0.9835752483
167
  },
168
  "dislocated":{
169
+ "p":0.5384615385,
170
+ "r":0.5384615385,
171
+ "f":0.5384615385
172
  },
173
  "nsubj":{
174
+ "p":0.8103448276,
175
+ "r":0.8119001919,
176
+ "f":0.8111217641
177
  },
178
  "nmod":{
179
+ "p":0.8861076345,
180
+ "r":0.8280701754,
181
+ "f":0.8561064087
182
  },
183
  "root":{
184
+ "p":0.9703557312,
185
+ "r":0.9684418146,
186
+ "f":0.9693978282
187
  },
188
  "aux":{
189
+ "p":0.9823747681,
190
+ "r":0.9832869081,
191
+ "f":0.9828306265
192
  },
193
  "advcl":{
194
+ "p":0.6825396825,
195
+ "r":0.6764044944,
196
+ "f":0.6794582393
197
  },
198
  "mark":{
199
+ "p":0.967611336,
200
+ "r":0.956,
201
+ "f":0.9617706237
202
  },
203
  "fixed":{
204
+ "p":0.946714032,
205
+ "r":0.9690909091,
206
+ "f":0.957771788
207
  },
208
  "acl":{
209
+ "p":0.8470066519,
210
+ "r":0.8395604396,
211
+ "f":0.8432671082
212
  },
213
  "obj":{
214
+ "p":0.9569230769,
215
+ "r":0.9395770393,
216
+ "f":0.9481707317
217
  },
218
  "nummod":{
219
+ "p":0.987012987,
220
+ "r":0.899408284,
221
+ "f":0.9411764706
222
  },
223
  "advmod":{
224
+ "p":0.6715328467,
225
  "r":0.6571428571,
226
+ "f":0.6642599278
227
  },
228
  "amod":{
229
+ "p":0.962962963,
230
+ "r":0.7027027027,
231
+ "f":0.8125
232
  },
233
  "cop":{
234
+ "p":0.9704142012,
235
+ "r":0.9534883721,
236
+ "f":0.9618768328
237
  },
238
  "ccomp":{
239
  "p":0.95,
 
241
  "f":0.9047619048
242
  },
243
  "det":{
244
+ "p":0.9807692308,
245
+ "r":0.9622641509,
246
+ "f":0.9714285714
247
  },
248
  "csubj":{
249
+ "p":0.7272727273,
250
+ "r":0.6666666667,
251
+ "f":0.6956521739
252
  },
253
  "dep":{
254
+ "p":0.25,
255
  "r":0.1428571429,
256
+ "f":0.1818181818
257
  }
258
  },
259
  "tag_acc":0.9712488769,
260
  "lemma_acc":0.9670526831,
261
+ "ents_p":0.7121418827,
262
+ "ents_r":0.6566037736,
263
+ "ents_f":0.6832460733,
264
  "ents_per_type":{
265
  "DATE":{
266
+ "p":0.954954955,
267
+ "r":0.9724770642,
268
+ "f":0.9636363636
 
 
 
 
 
269
  },
270
  "ORG":{
271
+ "p":0.5675675676,
272
+ "r":0.4598540146,
273
+ "f":0.5080645161
274
  },
275
  "TITLE_AFFIX":{
276
+ "p":0.8181818182,
277
+ "r":0.6,
278
+ "f":0.6923076923
279
  },
280
+ "PERSON":{
281
+ "p":0.7410071942,
282
+ "r":0.7410071942,
283
+ "f":0.7410071942
284
  },
285
+ "GPE":{
286
+ "p":0.6875,
287
+ "r":0.7021276596,
288
+ "f":0.6947368421
289
  },
290
  "TIME":{
291
  "p":0.6666666667,
 
293
  "f":0.8
294
  },
295
  "QUANTITY":{
296
+ "p":0.8695652174,
297
+ "r":0.9090909091,
298
+ "f":0.8888888889
299
  },
300
  "NORP":{
301
+ "p":0.6785714286,
302
+ "r":0.59375,
303
+ "f":0.6333333333
304
  },
305
  "ORDINAL":{
306
+ "p":0.619047619,
307
+ "r":0.5909090909,
308
+ "f":0.6046511628
309
  },
310
  "WORK_OF_ART":{
311
+ "p":0.6666666667,
312
+ "r":0.5882352941,
313
+ "f":0.625
314
+ },
315
+ "CARDINAL":{
316
+ "p":1.0,
317
+ "r":0.5,
318
+ "f":0.6666666667
319
  },
320
  "FAC":{
321
+ "p":0.5384615385,
322
+ "r":0.3783783784,
323
+ "f":0.4444444444
324
  },
325
  "PERCENT":{
326
  "p":1.0,
 
328
  "f":0.4444444444
329
  },
330
  "EVENT":{
331
+ "p":0.9285714286,
332
+ "r":0.5,
333
+ "f":0.65
 
 
 
 
 
334
  },
335
  "LOC":{
336
+ "p":0.4117647059,
337
+ "r":0.7,
338
+ "f":0.5185185185
339
  },
340
  "MOVEMENT":{
341
  "p":0.0,
342
  "r":0.0,
343
  "f":0.0
344
  },
345
+ "PRODUCT":{
346
+ "p":0.2857142857,
347
+ "r":0.2380952381,
348
+ "f":0.2597402597
349
+ },
350
  "LAW":{
351
+ "p":0.0,
352
+ "r":0.0,
353
+ "f":0.0
354
  },
355
  "MONEY":{
356
  "p":1.0,
 
358
  "f":1.0
359
  },
360
  "LANGUAGE":{
361
+ "p":0.75,
362
  "r":1.0,
363
+ "f":0.8571428571
364
  },
365
  "PET_NAME":{
366
  "p":0.0,
 
368
  "f":0.0
369
  }
370
  },
371
+ "speed":9629.4441452948
372
  },
373
  "sources":[
374
  {
morphologizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40a61d03889a18d216ecec75486609a43994214da406a8ba2b09bb0a8b94b20d
3
  size 8189
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c78232fbe8991c452f468171439f4683fe130cada00b20bb0ea2ca2cbaf510b
3
  size 8189
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:249b11cd2cf32cc4699b57696d3a0543d260f2ff581c141a5513e5c557f0b887
3
  size 6385103
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b06b4e265c7a59eff524135673037708034fd704d38e9c55e1093a57fe66251
3
  size 6385103
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a834891908c8a11925ec2e94ca6facfafc6f51a681c351de5a44a9c2026e5af
3
  size 299888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc9186de82f1115f839a0c69175cca31c32d74feb49e9281ca76ed2a0ce4bc0b
3
  size 299888
senter/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20165fdde644486787c8444e6a7827e6c2b3977d25dc9f26b96136e953bcff2e
3
  size 213263
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c6af0ba198ff60abfb8ece4f9cd9ac3844ac12dd6f168592d654466689c3cd3
3
  size 213263
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bc127f8ea7af91f3b35f536e0a565690a02b641115a04d3417d717d4db5d02b
3
- size 6235418
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5642f6138a8a9afb76a959c05875cbda74372ad8eb11a4602d39c000e19b9a2
3
+ size 6365607
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a812f79413801d2858a16f71168326eb951a7d0c53a7bda8acde7a3af273859
3
- size 15615308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:061bc23618b4494defb44e746ab7ff52bce6b2035c3d31b4121b7142939a8777
3
+ size 15614069