Jacobo commited on
Commit
123ba2e
1 Parent(s): c12ea21

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -13,42 +13,42 @@ model-index:
13
  metrics:
14
  - name: TAG (XPOS) Accuracy
15
  type: accuracy
16
- value: 0.973255026
17
  - task:
18
  name: POS
19
  type: token-classification
20
  metrics:
21
  - name: POS (UPOS) Accuracy
22
  type: accuracy
23
- value: 0.9719448837
24
  - task:
25
  name: MORPH
26
  type: token-classification
27
  metrics:
28
  - name: Morph (UFeats) Accuracy
29
  type: accuracy
30
- value: 0.9470973571
31
  - task:
32
  name: LEMMA
33
  type: token-classification
34
  metrics:
35
  - name: Lemma Accuracy
36
  type: accuracy
37
- value: 0.9687634105
38
  - task:
39
  name: UNLABELED_DEPENDENCIES
40
  type: token-classification
41
  metrics:
42
  - name: Unlabeled Attachment Score (UAS)
43
  type: f_score
44
- value: 0.8300239687
45
  - task:
46
  name: LABELED_DEPENDENCIES
47
  type: token-classification
48
  metrics:
49
  - name: Labeled Attachment Score (LAS)
50
  type: f_score
51
- value: 0.7853160086
52
  - task:
53
  name: SENTS
54
  type: token-classification
@@ -60,8 +60,8 @@ model-index:
60
  | Feature | Description |
61
  | --- | --- |
62
  | **Name** | `grc_perseus_trf` |
63
- | **Version** | `3.7` |
64
- | **spaCy** | `>=3.7.4,<3.8.0` |
65
  | **Default Pipeline** | `transformer`, `morphologizer`, `tagger`, `senter`, `parser`, `lemmatizer`, `attribute_ruler` |
66
  | **Components** | `transformer`, `morphologizer`, `tagger`, `senter`, `parser`, `lemmatizer`, `attribute_ruler` |
67
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
@@ -87,17 +87,17 @@ model-index:
87
 
88
  | Type | Score |
89
  | --- | --- |
90
- | `POS_ACC` | 96.52 |
91
- | `MORPH_ACC` | 95.04 |
92
- | `TAG_ACC` | 96.60 |
93
  | `SENTS_F` | 99.34 |
94
  | `SENTS_P` | 99.30 |
95
  | `SENTS_R` | 99.38 |
96
- | `DEP_UAS` | 81.72 |
97
- | `DEP_LAS` | 76.93 |
98
- | `LEMMA_ACC` | 97.23 |
99
- | `TRANSFORMER_LOSS` | 99065.36 |
100
- | `MORPHOLOGIZER_LOSS` | 7157.58 |
101
- | `TAGGER_LOSS` | 3968.99 |
102
- | `SENTER_LOSS` | 13391.75 |
103
- | `PARSER_LOSS` | 1908487.82 |
 
13
  metrics:
14
  - name: TAG (XPOS) Accuracy
15
  type: accuracy
16
+ value: 0.9723063022
17
  - task:
18
  name: POS
19
  type: token-classification
20
  metrics:
21
  - name: POS (UPOS) Accuracy
22
  type: accuracy
23
+ value: 0.9717189971
24
  - task:
25
  name: MORPH
26
  type: token-classification
27
  metrics:
28
  - name: Morph (UFeats) Accuracy
29
  type: accuracy
30
+ value: 0.9466455839
31
  - task:
32
  name: LEMMA
33
  type: token-classification
34
  metrics:
35
  - name: Lemma Accuracy
36
  type: accuracy
37
+ value: 0.9714737436
38
  - task:
39
  name: UNLABELED_DEPENDENCIES
40
  type: token-classification
41
  metrics:
42
  - name: Unlabeled Attachment Score (UAS)
43
  type: f_score
44
+ value: 0.8278332829
45
  - task:
46
  name: LABELED_DEPENDENCIES
47
  type: token-classification
48
  metrics:
49
  - name: Labeled Attachment Score (LAS)
50
  type: f_score
51
+ value: 0.7808557877
52
  - task:
53
  name: SENTS
54
  type: token-classification
 
60
  | Feature | Description |
61
  | --- | --- |
62
  | **Name** | `grc_perseus_trf` |
63
+ | **Version** | `3.7.5` |
64
+ | **spaCy** | `>=3.7.5,<3.8.0` |
65
  | **Default Pipeline** | `transformer`, `morphologizer`, `tagger`, `senter`, `parser`, `lemmatizer`, `attribute_ruler` |
66
  | **Components** | `transformer`, `morphologizer`, `tagger`, `senter`, `parser`, `lemmatizer`, `attribute_ruler` |
67
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
 
87
 
88
  | Type | Score |
89
  | --- | --- |
90
+ | `POS_ACC` | 97.17 |
91
+ | `MORPH_ACC` | 94.66 |
92
+ | `TAG_ACC` | 97.23 |
93
  | `SENTS_F` | 99.34 |
94
  | `SENTS_P` | 99.30 |
95
  | `SENTS_R` | 99.38 |
96
+ | `DEP_UAS` | 82.78 |
97
+ | `DEP_LAS` | 78.09 |
98
+ | `LEMMA_ACC` | 97.15 |
99
+ | `TRANSFORMER_LOSS` | 181871.36 |
100
+ | `MORPHOLOGIZER_LOSS` | 20683.74 |
101
+ | `TAGGER_LOSS` | 16994.14 |
102
+ | `SENTER_LOSS` | 68301.80 |
103
+ | `PARSER_LOSS` | 9682034.86 |
config.cfg CHANGED
@@ -1,6 +1,6 @@
1
  [paths]
2
- train = "corpus/train/grc_perseus-ud-train.spacy"
3
- dev = "corpus/dev/grc_perseus-ud-dev.spacy"
4
  vectors = null
5
  init_tok2vec = null
6
 
@@ -178,7 +178,7 @@ dropout = 0.1
178
  patience = 5000
179
  max_epochs = 0
180
  max_steps = 20000
181
- eval_frequency = 200
182
  frozen_components = ["lemmatizer"]
183
  annotating_components = ["lemmatizer"]
184
  before_to_disk = null
 
1
  [paths]
2
+ train = "corpus/parser/perseus/train/grc_perseus-ud-train.spacy"
3
+ dev = "corpus/parser/perseus/dev/grc_perseus-ud-dev.spacy"
4
  vectors = null
5
  init_tok2vec = null
6
 
 
178
  patience = 5000
179
  max_epochs = 0
180
  max_steps = 20000
181
+ eval_frequency = 1000
182
  frozen_components = ["lemmatizer"]
183
  annotating_components = ["lemmatizer"]
184
  before_to_disk = null
grc_perseus_trf-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e70a98ec79ba1af612fe44d91d006efce93e923504cb3bf81e135d9e13727597
3
- size 579700077
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:560ad38f98987bb12b60d71efd8004e515859fbd19aad26af9b0b4982c8ec8b0
3
+ size 497307169
lemmatizer/cfg CHANGED
The diff for this file is too large to render. See raw diff
 
lemmatizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee3567720a57a22b28099f9ccd9c39ee5b455f448e531763e00d59219f0b4671
3
- size 27089840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:becf7d37bd570f1a89a157da3184d2710dccc79804ec6f0ad3cae090aafe5c2a
3
+ size 29215702
lemmatizer/trees CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a53607dd846f3e82212b030f5fde5e8487e55fe146f7f90da6078e87837bc0a
3
- size 6009899
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86606a185ea3cbd4c6f071029121d17265dfe9d5a468c6dbeb071a5808cf49f9
3
+ size 6516433
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"grc",
3
  "name":"perseus_trf",
4
- "version":"3.7",
5
  "description":"",
6
  "author":"",
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
- "spacy_version":">=3.7.4,<3.8.0",
11
- "spacy_git_version":"bff8725f4",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
@@ -963,156 +963,156 @@
963
 
964
  ],
965
  "performance":{
966
- "pos_acc":0.9719448837,
967
- "morph_acc":0.9470973571,
968
  "morph_per_feat":{
969
  "Case":{
970
- "p":0.9787910923,
971
- "r":0.9768057148,
972
- "f":0.9777973957
973
  },
974
  "Gender":{
975
- "p":0.9614159292,
976
- "r":0.9597173145,
977
- "f":0.9605658709
978
  },
979
  "Number":{
980
- "p":0.98920054,
981
- "r":0.9875711291,
982
- "f":0.988385163
983
  },
984
  "Degree":{
985
- "p":0.8441558442,
986
- "r":0.7647058824,
987
- "f":0.8024691358
988
  },
989
  "Tense":{
990
- "p":0.9648143654,
991
- "r":0.9634116792,
992
- "f":0.9641125121
993
  },
994
  "VerbForm":{
995
- "p":0.9927202135,
996
- "r":0.9912769566,
997
- "f":0.9919980601
998
  },
999
  "Voice":{
1000
- "p":0.965525613,
1001
- "r":0.9643549952,
1002
- "f":0.964939949
1003
  },
1004
  "Aspect":{
1005
- "p":0.9528061224,
1006
- "r":0.9098660171,
1007
- "f":0.9308411215
1008
  },
1009
  "Mood":{
1010
- "p":0.982160555,
1011
- "r":0.9826474963,
1012
- "f":0.9824039653
1013
  },
1014
  "Person":{
1015
- "p":0.9853729888,
1016
- "r":0.9642175573,
1017
- "f":0.9746804919
1018
  }
1019
  },
1020
- "tag_acc":0.973255026,
1021
  "sents_f":0.9934065934,
1022
  "sents_p":0.992970123,
1023
  "sents_r":0.9938434477,
1024
- "dep_uas":0.8300239687,
1025
- "dep_las":0.7853160086,
1026
  "dep_las_per_type":{
1027
  "det":{
1028
- "p":0.9101460127,
1029
- "r":0.9033816425,
1030
- "f":0.9067512122
1031
  },
1032
  "advmod":{
1033
- "p":0.8054195233,
1034
- "r":0.7952933591,
1035
- "f":0.800324412
1036
  },
1037
  "case":{
1038
- "p":0.941826215,
1039
- "r":0.9502228826,
1040
- "f":0.9460059172
1041
  },
1042
  "nmod":{
1043
- "p":0.717268623,
1044
- "r":0.6848060345,
1045
- "f":0.7006615215
1046
  },
1047
  "cc":{
1048
- "p":0.7668292683,
1049
- "r":0.7631067961,
1050
- "f":0.7649635036
1051
  },
1052
  "conj":{
1053
- "p":0.7571035747,
1054
- "r":0.7441441441,
1055
- "f":0.7505679237
1056
  },
1057
  "csubj":{
1058
- "p":0.6588235294,
1059
- "r":0.5436893204,
1060
- "f":0.5957446809
1061
  },
1062
  "obl":{
1063
- "p":0.7193347193,
1064
- "r":0.7330508475,
1065
- "f":0.7261280168
1066
  },
1067
  "root":{
1068
- "p":0.9516695958,
1069
- "r":0.9525065963,
1070
- "f":0.9520879121
1071
  },
1072
  "cop":{
1073
- "p":0.7964912281,
1074
- "r":0.8021201413,
1075
- "f":0.7992957746
1076
  },
1077
  "obj":{
1078
- "p":0.7563704164,
1079
- "r":0.7821336761,
1080
- "f":0.7690363349
1081
  },
1082
  "acl":{
1083
- "p":0.4836065574,
1084
- "r":0.3641975309,
1085
- "f":0.4154929577
1086
  },
1087
  "xcomp":{
1088
- "p":0.5787965616,
1089
- "r":0.5923753666,
1090
- "f":0.5855072464
1091
  },
1092
  "nsubj":{
1093
- "p":0.7847498014,
1094
- "r":0.7767295597,
1095
- "f":0.7807190834
1096
  },
1097
  "amod":{
1098
- "p":0.4468085106,
1099
- "r":0.3088235294,
1100
- "f":0.3652173913
1101
  },
1102
  "advcl":{
1103
- "p":0.7131849315,
1104
- "r":0.7256097561,
1105
- "f":0.719343696
1106
  },
1107
  "mark":{
1108
- "p":0.8833333333,
1109
  "r":0.8857938719,
1110
- "f":0.8845618915
1111
  },
1112
  "iobj":{
1113
- "p":0.5955882353,
1114
- "r":0.5955882353,
1115
- "f":0.5955882353
1116
  },
1117
  "dep":{
1118
  "p":0.0,
@@ -1120,19 +1120,19 @@
1120
  "f":0.0
1121
  },
1122
  "ccomp":{
1123
- "p":0.6203208556,
1124
- "r":0.5576923077,
1125
- "f":0.5873417722
1126
  },
1127
  "appos":{
1128
- "p":0.2,
1129
- "r":0.12,
1130
- "f":0.15
1131
  },
1132
  "nummod":{
1133
- "p":0.3157894737,
1134
- "r":0.375,
1135
- "f":0.3428571429
1136
  },
1137
  "discourse":{
1138
  "p":1.0,
@@ -1140,9 +1140,9 @@
1140
  "f":0.9696969697
1141
  },
1142
  "vocative":{
1143
- "p":0.8421052632,
1144
- "r":0.6956521739,
1145
- "f":0.7619047619
1146
  },
1147
  "parataxis":{
1148
  "p":0.0,
@@ -1150,14 +1150,14 @@
1150
  "f":0.0
1151
  }
1152
  },
1153
- "lemma_acc":0.9687634105,
1154
- "transformer_loss":229.1886046889,
1155
- "morphologizer_loss":31.0409947152,
1156
- "tagger_loss":27.2031119848,
1157
- "senter_loss":134.9575767298,
1158
- "parser_loss":19260.8797536265
1159
  },
1160
  "requirements":[
1161
- "spacy-transformers>=1.3.4,<1.4.0"
1162
  ]
1163
  }
 
1
  {
2
  "lang":"grc",
3
  "name":"perseus_trf",
4
+ "version":"3.7.5",
5
  "description":"",
6
  "author":"",
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
+ "spacy_version":">=3.7.5,<3.8.0",
11
+ "spacy_git_version":"a6d0fc360",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
 
963
 
964
  ],
965
  "performance":{
966
+ "pos_acc":0.9717189971,
967
+ "morph_acc":0.9466455839,
968
  "morph_per_feat":{
969
  "Case":{
970
+ "p":0.9781647808,
971
+ "r":0.9758356116,
972
+ "f":0.976998808
973
  },
974
  "Gender":{
975
+ "p":0.9618449008,
976
+ "r":0.9598056537,
977
+ "f":0.9608241953
978
  },
979
  "Number":{
980
+ "p":0.9891948676,
981
+ "r":0.9870470201,
982
+ "f":0.9881197766
983
  },
984
  "Degree":{
985
+ "p":0.8048780488,
986
+ "r":0.7764705882,
987
+ "f":0.7904191617
988
  },
989
  "Tense":{
990
+ "p":0.9640252795,
991
+ "r":0.9609886116,
992
+ "f":0.9625045504
993
  },
994
  "VerbForm":{
995
+ "p":0.9934370442,
996
+ "r":0.9903077296,
997
+ "f":0.9918699187
998
  },
999
  "Voice":{
1000
+ "p":0.9654753221,
1001
+ "r":0.962900097,
1002
+ "f":0.96418599
1003
  },
1004
  "Aspect":{
1005
+ "p":0.9555273189,
1006
+ "r":0.915956151,
1007
+ "f":0.9353233831
1008
  },
1009
  "Mood":{
1010
+ "p":0.982630273,
1011
+ "r":0.9816559246,
1012
+ "f":0.9821428571
1013
  },
1014
  "Person":{
1015
+ "p":0.9868099658,
1016
+ "r":0.963740458,
1017
+ "f":0.9751387883
1018
  }
1019
  },
1020
+ "tag_acc":0.9723063022,
1021
  "sents_f":0.9934065934,
1022
  "sents_p":0.992970123,
1023
  "sents_r":0.9938434477,
1024
+ "dep_uas":0.8278332829,
1025
+ "dep_las":0.7808557877,
1026
  "dep_las_per_type":{
1027
  "det":{
1028
+ "p":0.9116104869,
1029
+ "r":0.9044964697,
1030
+ "f":0.9080395449
1031
  },
1032
  "advmod":{
1033
+ "p":0.7959779436,
1034
+ "r":0.7911025145,
1035
+ "f":0.7935327405
1036
  },
1037
  "case":{
1038
+ "p":0.9423076923,
1039
+ "r":0.9465081724,
1040
+ "f":0.9444032617
1041
  },
1042
  "nmod":{
1043
+ "p":0.7173666288,
1044
+ "r":0.6810344828,
1045
+ "f":0.6987285793
1046
  },
1047
  "cc":{
1048
+ "p":0.7729516288,
1049
+ "r":0.7601941748,
1050
+ "f":0.7665198238
1051
  },
1052
  "conj":{
1053
+ "p":0.7578268877,
1054
+ "r":0.7414414414,
1055
+ "f":0.7495446266
1056
  },
1057
  "csubj":{
1058
+ "p":0.6352941176,
1059
+ "r":0.5242718447,
1060
+ "f":0.5744680851
1061
  },
1062
  "obl":{
1063
+ "p":0.7283687943,
1064
+ "r":0.7252824859,
1065
+ "f":0.7268223638
1066
  },
1067
  "root":{
1068
+ "p":0.9525483304,
1069
+ "r":0.9533861038,
1070
+ "f":0.952967033
1071
  },
1072
  "cop":{
1073
+ "p":0.7665505226,
1074
+ "r":0.777385159,
1075
+ "f":0.7719298246
1076
  },
1077
  "obj":{
1078
+ "p":0.7562972292,
1079
+ "r":0.7718508997,
1080
+ "f":0.7639949109
1081
  },
1082
  "acl":{
1083
+ "p":0.4910714286,
1084
+ "r":0.3395061728,
1085
+ "f":0.401459854
1086
  },
1087
  "xcomp":{
1088
+ "p":0.5683139535,
1089
+ "r":0.573313783,
1090
+ "f":0.5708029197
1091
  },
1092
  "nsubj":{
1093
+ "p":0.7786499215,
1094
+ "r":0.7798742138,
1095
+ "f":0.7792615868
1096
  },
1097
  "amod":{
1098
+ "p":0.3461538462,
1099
+ "r":0.2647058824,
1100
+ "f":0.3
1101
  },
1102
  "advcl":{
1103
+ "p":0.6948160535,
1104
+ "r":0.7238675958,
1105
+ "f":0.7090443686
1106
  },
1107
  "mark":{
1108
+ "p":0.8808864266,
1109
  "r":0.8857938719,
1110
+ "f":0.8833333333
1111
  },
1112
  "iobj":{
1113
+ "p":0.5414012739,
1114
+ "r":0.625,
1115
+ "f":0.5802047782
1116
  },
1117
  "dep":{
1118
  "p":0.0,
 
1120
  "f":0.0
1121
  },
1122
  "ccomp":{
1123
+ "p":0.5606060606,
1124
+ "r":0.5336538462,
1125
+ "f":0.5467980296
1126
  },
1127
  "appos":{
1128
+ "p":0.2222222222,
1129
+ "r":0.16,
1130
+ "f":0.1860465116
1131
  },
1132
  "nummod":{
1133
+ "p":0.3,
1134
+ "r":0.1875,
1135
+ "f":0.2307692308
1136
  },
1137
  "discourse":{
1138
  "p":1.0,
 
1140
  "f":0.9696969697
1141
  },
1142
  "vocative":{
1143
+ "p":0.8333333333,
1144
+ "r":0.652173913,
1145
+ "f":0.7317073171
1146
  },
1147
  "parataxis":{
1148
  "p":0.0,
 
1150
  "f":0.0
1151
  }
1152
  },
1153
+ "lemma_acc":0.9714737436,
1154
+ "transformer_loss":1818.713642158,
1155
+ "morphologizer_loss":206.8374374373,
1156
+ "tagger_loss":169.9414253123,
1157
+ "senter_loss":683.018002257,
1158
+ "parser_loss":96820.3485642595
1159
  },
1160
  "requirements":[
1161
+ "spacy-transformers>=1.3.5,<1.4.0"
1162
  ]
1163
  }
morphologizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78c2bc2a75f6085499efe7c84594cd2232135b286f2eca55ddfb2f68a8379a82
3
  size 2667545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84cdd86aab4cb28269444ee5f9a8f0a4d3baadd01c84efb99427a1bb4a405928
3
  size 2667545
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c6b00df2b710485e686eb62d9bf45131fd68ead6b96e52b1d862e8ec9104f01
3
  size 2030887
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a2083eafe099d28ec478c0d6b52d9f3301a16f6aeedbf8268566d39a5533755
3
  size 2030887
senter/model CHANGED
Binary files a/senter/model and b/senter/model differ
 
tagger/model CHANGED
Binary files a/tagger/model and b/tagger/model differ
 
transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16599b137b7ab585bba7999d13bd1559892a4bd38935584999484c5c7eece319
3
  size 500074867
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5489b0e1b432464fe8d2c201b8dd529bad88671d36dd08451a8d0d23413e35c0
3
  size 500074867
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44cfaf0d411aaad34b0a67e353cfe8f85fbaf34513e489542dee0409fd3d1fd7
3
- size 23346083
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:991bdcde1c93f43861f5213f350babb9194774a93df0b4c0f106e28876ee2b3b
3
+ size 22428003