KoichiYasuoka commited on
Commit
2e5a751
1 Parent(s): a7a2a59

model improved for transformers 4.42

Browse files
config.json CHANGED
@@ -4,22 +4,11 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "auto_map": {
8
- "AutoModelForTokenClassification": "upos.LlamaForTokenClassification"
9
- },
10
  "bos_token_id": 1,
11
  "custom_pipelines": {
12
  "upos": {
13
  "impl": "upos.BellmanFordTokenClassificationPipeline",
14
  "pt": "AutoModelForTokenClassification"
15
- },
16
- "token-classification": {
17
- "impl": "upos.RawTokenClassificationPipeline",
18
- "pt": "AutoModelForTokenClassification"
19
- },
20
- "ner": {
21
- "impl": "upos.RawTokenClassificationPipeline",
22
- "pt": "AutoModelForTokenClassification"
23
  }
24
  },
25
  "eos_token_id": 2,
@@ -166,7 +155,7 @@
166
  "tie_word_embeddings": false,
167
  "tokenizer_class": "LlamaTokenizerFast",
168
  "torch_dtype": "float32",
169
- "transformers_version": "4.41.2",
170
  "use_cache": true,
171
  "vocab_size": 43744
172
  }
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
 
 
 
7
  "bos_token_id": 1,
8
  "custom_pipelines": {
9
  "upos": {
10
  "impl": "upos.BellmanFordTokenClassificationPipeline",
11
  "pt": "AutoModelForTokenClassification"
 
 
 
 
 
 
 
 
12
  }
13
  },
14
  "eos_token_id": 2,
 
155
  "tie_word_embeddings": false,
156
  "tokenizer_class": "LlamaTokenizerFast",
157
  "torch_dtype": "float32",
158
+ "transformers_version": "4.42.4",
159
  "use_cache": true,
160
  "vocab_size": 43744
161
  }
maker.sh CHANGED
@@ -1,49 +1,17 @@
1
  #! /bin/sh
2
  test -f ja_gsd_modern.conllu || curl -LO https://github.com/KoichiYasuoka/SuPar-UniDic/raw/main/suparunidic/suparmodels/ja_gsd_modern.conllu
3
- curl -L https://huggingface.co/KoichiYasuoka/Swallow-7b-plus-upos/resolve/main/tokenizer.json | env LANG=ja_JP.utf8 egrep -v '"[ぁ-ん] [ぁ-ん]",$' > newtokenizer.json
 
 
 
 
4
 
5
  TMP=./maker$$.py
6
  cat << 'EOF' > $TMP
7
  #! /usr/bin/env deepspeed
8
  src="KoichiYasuoka/Swallow-7b-plus-upos"
9
  tgt="KoichiYasuoka/Swallow-7b-plus-char-upos"
10
- from transformers import LlamaTokenizerFast,LlamaModel,LlamaPreTrainedModel,AutoConfig,DataCollatorForTokenClassification,TrainingArguments,Trainer
11
- from transformers.modeling_outputs import TokenClassifierOutput
12
-
13
- class LlamaForTokenClassification(LlamaPreTrainedModel):
14
- def __init__(self,config):
15
- from torch import nn
16
- super().__init__(config)
17
- self.num_labels=config.num_labels
18
- self.model=LlamaModel(config)
19
- if hasattr(config,"classifier_dropout") and config.classifier_dropout is not None:
20
- classifier_dropout=config.classifier_dropout
21
- elif hasattr(config,"hidden_dropout") and config.hidden_dropout is not None:
22
- classifier_dropout=config.hidden_dropout
23
- else:
24
- classifier_dropout=0.1
25
- self.dropout=nn.Dropout(classifier_dropout)
26
- self.classifier=nn.Linear(config.hidden_size,config.num_labels)
27
- self.post_init()
28
- def get_input_embeddings(self):
29
- return self.model.embed_tokens
30
- def set_input_embeddings(self,value):
31
- self.model.embed_tokens=value
32
- def forward(self,input_ids=None,past_key_values=None,attention_mask=None,position_ids=None,inputs_embeds=None,labels=None,use_cache=None,output_attentions=None,output_hidden_states=None,return_dict=None):
33
- return_dict=return_dict if return_dict is not None else self.config.use_return_dict
34
- transformer_outputs=self.model(input_ids,past_key_values=past_key_values,attention_mask=attention_mask,position_ids=position_ids,inputs_embeds=inputs_embeds,use_cache=use_cache,output_attentions=output_attentions,output_hidden_states=output_hidden_states,return_dict=return_dict)
35
- hidden_states=transformer_outputs[0]
36
- hidden_states=self.dropout(hidden_states)
37
- logits=self.classifier(hidden_states)
38
- loss=None
39
- if labels is not None:
40
- from torch import nn
41
- loss_fct=nn.CrossEntropyLoss()
42
- loss=loss_fct(logits.view(-1,self.num_labels),labels.view(-1))
43
- if not return_dict:
44
- output=(logits,)+transformer_outputs[1:]
45
- return ((loss,)+output) if loss is not None else output
46
- return TokenClassifierOutput(loss=loss,logits=logits,hidden_states=transformer_outputs.hidden_states,attentions=transformer_outputs.attentions)
47
 
48
  class UPOSFileDataset(object):
49
  def __init__(self,conllu,tokenizer):
 
1
  #! /bin/sh
2
  test -f ja_gsd_modern.conllu || curl -LO https://github.com/KoichiYasuoka/SuPar-UniDic/raw/main/suparunidic/suparmodels/ja_gsd_modern.conllu
3
+ ( if [ -f KoichiYasuoka/Swallow-7b-plus-upos/tokenizer.json ]
4
+ then cat KoichiYasuoka/Swallow-7b-plus-upos/tokenizer.json
5
+ else curl -L https://huggingface.co/KoichiYasuoka/Swallow-7b-plus-upos/resolve/main/tokenizer.json
6
+ fi
7
+ ) | env LANG=ja_JP.utf8 egrep -v '"[ぁ-ん] [ぁ-ん]",$' > newtokenizer.json
8
 
9
  TMP=./maker$$.py
10
  cat << 'EOF' > $TMP
11
  #! /usr/bin/env deepspeed
12
  src="KoichiYasuoka/Swallow-7b-plus-upos"
13
  tgt="KoichiYasuoka/Swallow-7b-plus-char-upos"
14
+ from transformers import LlamaTokenizerFast,LlamaForTokenClassification,AutoConfig,DataCollatorForTokenClassification,TrainingArguments,Trainer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  class UPOSFileDataset(object):
17
  def __init__(self,conllu,tokenizer):
pytorch_model-00001-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b13ae6b0fd6bf2779fb4e4df457e3c693b91369869b0fff330130c4f99b2c20e
3
  size 4965712452
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:692bdb261ef8779cd58b516450b18d082d7094f1ac2ba46db95454f8c4197da5
3
  size 4965712452
pytorch_model-00002-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d896a30f76e9b1fee9f183ad8638a6cedc1da563f3d2d01278a7dc9b46a3d32a
3
  size 4924328556
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09d0e2bcffb1ce5e9cd95ea6977d474eb8d2c7eebdbfe1ce8f802c1db0096d65
3
  size 4924328556
pytorch_model-00003-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdf4712dba3e338ead5f8c8c0559890d2e0fe568da1dfd1d0f58b62c9dbf97a1
3
  size 4857219294
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d25051601bcf446f572c009773090c033538dd7b810beb52271e957494ba562f
3
  size 4857219294
pytorch_model-00004-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7aabf1ca7f7963a9b2901bff5c30ccf6454e91ac374f7fed7fd6e443ddb069d7
3
  size 4857219294
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15db31b8c730a7df1599e3121316de388024aee68ae6d543092e93a130dcb555
3
  size 4857219294
pytorch_model-00005-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc2f6bb378f573913af9194eca2339fb5077cfe849f9ed9e8f36fa46877b3ba1
3
  size 4857219294
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:989af73b70f3c362fdda3f67e5044496814128c3d133f8872977dd07d13c48a6
3
  size 4857219294
pytorch_model-00006-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9aa4184bf98e7e4595cc57088ac56eef0943927147a29ae4b60c14559d7090e3
3
  size 2161173694
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8483e76dc3e91fed1ab66073cbcd439d62342414eee644239b38c637e7bec8b
3
  size 2161173694
pytorch_model.bin.index.json CHANGED
@@ -3,8 +3,6 @@
3
  "total_size": 26622771440
4
  },
5
  "weight_map": {
6
- "classifier.bias": "pytorch_model-00006-of-00006.bin",
7
- "classifier.weight": "pytorch_model-00006-of-00006.bin",
8
  "model.embed_tokens.weight": "pytorch_model-00001-of-00006.bin",
9
  "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
10
  "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
@@ -294,6 +292,8 @@
294
  "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
295
  "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
296
  "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
297
- "model.norm.weight": "pytorch_model-00006-of-00006.bin"
 
 
298
  }
299
  }
 
3
  "total_size": 26622771440
4
  },
5
  "weight_map": {
 
 
6
  "model.embed_tokens.weight": "pytorch_model-00001-of-00006.bin",
7
  "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
8
  "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
 
292
  "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
293
  "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
294
  "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
295
+ "model.norm.weight": "pytorch_model-00006-of-00006.bin",
296
+ "score.bias": "pytorch_model-00006-of-00006.bin",
297
+ "score.weight": "pytorch_model-00006-of-00006.bin"
298
  }
299
  }
tokenizer.json CHANGED
@@ -31,14 +31,13 @@
31
  "special": true
32
  }
33
  ],
34
- "normalizer": {
35
- "type": "Replace",
36
- "pattern": {
37
- "String": " "
38
- },
39
- "content": "▁"
40
  },
41
- "pre_tokenizer": null,
42
  "post_processor": {
43
  "type": "TemplateProcessing",
44
  "single": [
@@ -108,12 +107,6 @@
108
  },
109
  {
110
  "type": "Fuse"
111
- },
112
- {
113
- "type": "Strip",
114
- "content": " ",
115
- "start": 1,
116
- "stop": 0
117
  }
118
  ]
119
  },
@@ -124,7 +117,7 @@
124
  "continuing_subword_prefix": null,
125
  "end_of_word_suffix": null,
126
  "fuse_unk": true,
127
- "byte_fallback": false,
128
  "ignore_merges": false,
129
  "vocab": {
130
  "<unk>": 0,
@@ -43303,574 +43296,574 @@
43303
  "趙": 43173,
43304
  "弛": 43174,
43305
  "徊": 43175,
43306
- "": 43176,
43307
- "": 43177,
43308
- "": 43178,
43309
- "": 43179,
43310
- "": 43180,
43311
- "": 43181,
43312
- "": 43182,
43313
- "": 43183,
43314
- "": 43184,
43315
- "": 43185,
43316
- "": 43186,
43317
- "": 43187,
43318
- "": 43188,
43319
- "": 43189,
43320
- "": 43190,
43321
- "": 43191,
43322
- "": 43192,
43323
- "": 43193,
43324
- "": 43194,
43325
- "": 43195,
43326
- "": 43196,
43327
- "": 43197,
43328
- "": 43198,
43329
- "": 43199,
43330
- "": 43200,
43331
- "": 43201,
43332
- "": 43202,
43333
- "": 43203,
43334
- "": 43204,
43335
- "": 43205,
43336
- "": 43206,
43337
- "": 43207,
43338
- "": 43208,
43339
- "": 43209,
43340
- "": 43210,
43341
- "": 43211,
43342
- "": 43212,
43343
- "": 43213,
43344
- "": 43214,
43345
- "": 43215,
43346
- "": 43216,
43347
- "": 43217,
43348
- "": 43218,
43349
- "": 43219,
43350
- "": 43220,
43351
- "": 43221,
43352
- "": 43222,
43353
- "": 43223,
43354
- "": 43224,
43355
- "": 43225,
43356
- "": 43226,
43357
- "": 43227,
43358
- "": 43228,
43359
- "": 43229,
43360
- "": 43230,
43361
- "": 43231,
43362
- "": 43232,
43363
- "": 43233,
43364
- "": 43234,
43365
- "": 43235,
43366
- "": 43236,
43367
- "": 43237,
43368
- "": 43238,
43369
- "": 43239,
43370
- "": 43240,
43371
- "": 43241,
43372
- "": 43242,
43373
- "": 43243,
43374
- "": 43244,
43375
- "": 43245,
43376
- "": 43246,
43377
- "": 43247,
43378
- "": 43248,
43379
- "": 43249,
43380
- "": 43250,
43381
- "": 43251,
43382
- "": 43252,
43383
- "": 43253,
43384
- "": 43254,
43385
- "": 43255,
43386
- "": 43256,
43387
- "": 43257,
43388
- "": 43258,
43389
- "": 43259,
43390
- "": 43260,
43391
- "": 43261,
43392
- "": 43262,
43393
- "": 43263,
43394
- "": 43264,
43395
- "": 43265,
43396
- "": 43266,
43397
- "": 43267,
43398
- "": 43268,
43399
- "": 43269,
43400
- "": 43270,
43401
- "": 43271,
43402
- "": 43272,
43403
- "": 43273,
43404
- "": 43274,
43405
- "": 43275,
43406
- "": 43276,
43407
- "": 43277,
43408
- "": 43278,
43409
- "": 43279,
43410
- "": 43280,
43411
- "": 43281,
43412
- "": 43282,
43413
- "": 43283,
43414
- "": 43284,
43415
- "麿": 43285,
43416
- "": 43286,
43417
- "": 43287,
43418
- "": 43288,
43419
- "": 43289,
43420
- "": 43290,
43421
- "": 43291,
43422
- "": 43292,
43423
- "": 43293,
43424
- "": 43294,
43425
- "": 43295,
43426
- "": 43296,
43427
- "": 43297,
43428
- "": 43298,
43429
- "": 43299,
43430
- "": 43300,
43431
- "": 43301,
43432
- "": 43302,
43433
- "": 43303,
43434
- "": 43304,
43435
- "": 43305,
43436
- "": 43306,
43437
- "": 43307,
43438
- "": 43308,
43439
- "": 43309,
43440
- "": 43310,
43441
- "": 43311,
43442
- "": 43312,
43443
- "": 43313,
43444
- "": 43314,
43445
- "": 43315,
43446
- "𠮟": 43316,
43447
- "": 43317,
43448
- "": 43318,
43449
- "": 43319,
43450
- "": 43320,
43451
- "": 43321,
43452
- "": 43322,
43453
- "": 43323,
43454
- "": 43324,
43455
- "": 43325,
43456
- "": 43326,
43457
- "": 43327,
43458
- "": 43328,
43459
- "": 43329,
43460
- "": 43330,
43461
- "": 43331,
43462
- "": 43332,
43463
- "": 43333,
43464
- "": 43334,
43465
- "": 43335,
43466
- "": 43336,
43467
- "": 43337,
43468
- "": 43338,
43469
- "": 43339,
43470
- "": 43340,
43471
- "": 43341,
43472
- "": 43342,
43473
- "": 43343,
43474
- "": 43344,
43475
- "": 43345,
43476
- "": 43346,
43477
- "": 43347,
43478
- "": 43348,
43479
- "": 43349,
43480
- "": 43350,
43481
- "": 43351,
43482
- "": 43352,
43483
- "": 43353,
43484
- "": 43354,
43485
- "": 43355,
43486
- "": 43356,
43487
- "": 43357,
43488
- "": 43358,
43489
- "": 43359,
43490
- "": 43360,
43491
- "": 43361,
43492
- "": 43362,
43493
- "": 43363,
43494
- "": 43364,
43495
- "": 43365,
43496
- "": 43366,
43497
- "": 43367,
43498
- "": 43368,
43499
- "": 43369,
43500
- "": 43370,
43501
- "": 43371,
43502
- "": 43372,
43503
- "": 43373,
43504
- "": 43374,
43505
- "": 43375,
43506
- "": 43376,
43507
- "": 43377,
43508
- "": 43378,
43509
- "": 43379,
43510
- "": 43380,
43511
- "": 43381,
43512
- "": 43382,
43513
- "": 43383,
43514
- "": 43384,
43515
- "": 43385,
43516
- "": 43386,
43517
- "": 43387,
43518
- "": 43388,
43519
- "": 43389,
43520
- "": 43390,
43521
- "": 43391,
43522
- "": 43392,
43523
- "": 43393,
43524
- "": 43394,
43525
- "": 43395,
43526
- "": 43396,
43527
- "": 43397,
43528
- "": 43398,
43529
- "": 43399,
43530
- "": 43400,
43531
- "": 43401,
43532
- "": 43402,
43533
- "": 43403,
43534
- "": 43404,
43535
- "": 43405,
43536
- "": 43406,
43537
- "": 43407,
43538
- "": 43408,
43539
- "": 43409,
43540
- "": 43410,
43541
- "": 43411,
43542
- "": 43412,
43543
- "": 43413,
43544
- "": 43414,
43545
- "": 43415,
43546
- "": 43416,
43547
- "": 43417,
43548
- "": 43418,
43549
- "": 43419,
43550
- "": 43420,
43551
- "": 43421,
43552
- "": 43422,
43553
- "": 43423,
43554
- "": 43424,
43555
- "": 43425,
43556
- "": 43426,
43557
- "": 43427,
43558
- "": 43428,
43559
- "": 43429,
43560
- "": 43430,
43561
- "": 43431,
43562
- "": 43432,
43563
- "": 43433,
43564
- "": 43434,
43565
- "": 43435,
43566
- "": 43436,
43567
- "": 43437,
43568
- "": 43438,
43569
- "": 43439,
43570
- "": 43440,
43571
- "": 43441,
43572
- "": 43442,
43573
- "": 43443,
43574
- "": 43444,
43575
- "": 43445,
43576
- "": 43446,
43577
- "": 43447,
43578
- "": 43448,
43579
- "": 43449,
43580
- "": 43450,
43581
- "": 43451,
43582
- "": 43452,
43583
- "": 43453,
43584
- "": 43454,
43585
- "": 43455,
43586
- "": 43456,
43587
- "": 43457,
43588
- "": 43458,
43589
- "": 43459,
43590
- "": 43460,
43591
- "": 43461,
43592
- "": 43462,
43593
- "": 43463,
43594
- "": 43464,
43595
- "": 43465,
43596
- "": 43466,
43597
- "": 43467,
43598
- "": 43468,
43599
- "": 43469,
43600
- "": 43470,
43601
- "": 43471,
43602
- "": 43472,
43603
- "": 43473,
43604
- "": 43474,
43605
- "": 43475,
43606
- "": 43476,
43607
- "": 43477,
43608
- "": 43478,
43609
- "": 43479,
43610
- "": 43480,
43611
- "": 43481,
43612
- "": 43482,
43613
- "": 43483,
43614
- "": 43484,
43615
- "": 43485,
43616
- "": 43486,
43617
- "": 43487,
43618
- "": 43488,
43619
- "": 43489,
43620
- "": 43490,
43621
- "": 43491,
43622
- "": 43492,
43623
- "": 43493,
43624
- "": 43494,
43625
- "": 43495,
43626
- "": 43496,
43627
- "": 43497,
43628
- "": 43498,
43629
- "滿": 43499,
43630
- "": 43500,
43631
- "": 43501,
43632
- "": 43502,
43633
- "": 43503,
43634
- "": 43504,
43635
- "": 43505,
43636
- "": 43506,
43637
- "": 43507,
43638
- "": 43508,
43639
- "": 43509,
43640
- "": 43510,
43641
- "": 43511,
43642
- "": 43512,
43643
- "": 43513,
43644
- "": 43514,
43645
- "": 43515,
43646
- "": 43516,
43647
- "": 43517,
43648
- "": 43518,
43649
- "": 43519,
43650
- "": 43520,
43651
- "": 43521,
43652
- "": 43522,
43653
- "": 43523,
43654
- "": 43524,
43655
- "": 43525,
43656
- "": 43526,
43657
- "": 43527,
43658
- "": 43528,
43659
- "": 43529,
43660
- "": 43530,
43661
- "": 43531,
43662
- "": 43532,
43663
- "": 43533,
43664
- "": 43534,
43665
- "": 43535,
43666
- "": 43536,
43667
- "": 43537,
43668
- "": 43538,
43669
- "": 43539,
43670
- "": 43540,
43671
- "": 43541,
43672
- "": 43542,
43673
- "": 43543,
43674
- "": 43544,
43675
- "": 43545,
43676
- "": 43546,
43677
- "": 43547,
43678
- "": 43548,
43679
- "": 43549,
43680
- "": 43550,
43681
- "": 43551,
43682
- "": 43552,
43683
- "": 43553,
43684
- "": 43554,
43685
- "": 43555,
43686
- "": 43556,
43687
- "": 43557,
43688
- "": 43558,
43689
- "": 43559,
43690
- "": 43560,
43691
- "": 43561,
43692
- "": 43562,
43693
- "": 43563,
43694
- "": 43564,
43695
- "": 43565,
43696
- "": 43566,
43697
- "": 43567,
43698
- "": 43568,
43699
- "": 43569,
43700
- "": 43570,
43701
- "": 43571,
43702
- "": 43572,
43703
- "": 43573,
43704
- "": 43574,
43705
- "": 43575,
43706
- "": 43576,
43707
- "": 43577,
43708
- "": 43578,
43709
- "": 43579,
43710
- "婿": 43580,
43711
- "": 43581,
43712
- "": 43582,
43713
- "": 43583,
43714
- "": 43584,
43715
- "": 43585,
43716
- "": 43586,
43717
- "": 43587,
43718
- "": 43588,
43719
- "": 43589,
43720
- "": 43590,
43721
- "": 43591,
43722
- "": 43592,
43723
- "": 43593,
43724
- "": 43594,
43725
- "": 43595,
43726
- "": 43596,
43727
- "": 43597,
43728
- "": 43598,
43729
- "": 43599,
43730
- "": 43600,
43731
- "": 43601,
43732
- "": 43602,
43733
- "": 43603,
43734
- "": 43604,
43735
- "": 43605,
43736
- "": 43606,
43737
- "": 43607,
43738
- "": 43608,
43739
- "": 43609,
43740
- "": 43610,
43741
- "": 43611,
43742
- "": 43612,
43743
- "": 43613,
43744
- "": 43614,
43745
- "": 43615,
43746
- "": 43616,
43747
- "": 43617,
43748
- "": 43618,
43749
- "": 43619,
43750
- "": 43620,
43751
- "": 43621,
43752
- "": 43622,
43753
- "": 43623,
43754
- "": 43624,
43755
- "": 43625,
43756
- "": 43626,
43757
- "": 43627,
43758
- "": 43628,
43759
- "": 43629,
43760
- "": 43630,
43761
- "": 43631,
43762
- "": 43632,
43763
- "": 43633,
43764
- "": 43634,
43765
- "": 43635,
43766
- "": 43636,
43767
- "": 43637,
43768
- "": 43638,
43769
- "": 43639,
43770
- "": 43640,
43771
- "": 43641,
43772
- "": 43642,
43773
- "": 43643,
43774
- "": 43644,
43775
- "": 43645,
43776
- "": 43646,
43777
- "": 43647,
43778
- "": 43648,
43779
- "": 43649,
43780
- "": 43650,
43781
- "": 43651,
43782
- "": 43652,
43783
- "": 43653,
43784
- "": 43654,
43785
- "": 43655,
43786
- "": 43656,
43787
- "": 43657,
43788
- "": 43658,
43789
- "": 43659,
43790
- "": 43660,
43791
- "": 43661,
43792
- "": 43662,
43793
- "": 43663,
43794
- "": 43664,
43795
- "": 43665,
43796
- "": 43666,
43797
- "": 43667,
43798
- "": 43668,
43799
- "": 43669,
43800
- "": 43670,
43801
- "": 43671,
43802
- "": 43672,
43803
- "": 43673,
43804
- "": 43674,
43805
- "": 43675,
43806
- "": 43676,
43807
- "": 43677,
43808
- "": 43678,
43809
- "": 43679,
43810
- "": 43680,
43811
- "": 43681,
43812
- "": 43682,
43813
- "": 43683,
43814
- "": 43684,
43815
- "忿": 43685,
43816
- "": 43686,
43817
- "": 43687,
43818
- "": 43688,
43819
- "": 43689,
43820
- "": 43690,
43821
- "": 43691,
43822
- "": 43692,
43823
- "": 43693,
43824
- "": 43694,
43825
- "": 43695,
43826
- "": 43696,
43827
- "": 43697,
43828
- "": 43698,
43829
- "": 43699,
43830
- "": 43700,
43831
- "": 43701,
43832
- "": 43702,
43833
- "": 43703,
43834
- "": 43704,
43835
- "": 43705,
43836
- "": 43706,
43837
- "": 43707,
43838
- "": 43708,
43839
- "": 43709,
43840
- "": 43710,
43841
- "": 43711,
43842
- "": 43712,
43843
- "": 43713,
43844
- "": 43714,
43845
- "": 43715,
43846
- "": 43716,
43847
- "": 43717,
43848
- "": 43718,
43849
- "": 43719,
43850
- "": 43720,
43851
- "": 43721,
43852
- "": 43722,
43853
- "": 43723,
43854
- "": 43724,
43855
- "": 43725,
43856
- "": 43726,
43857
- "": 43727,
43858
- "": 43728,
43859
- "": 43729,
43860
- "": 43730,
43861
- "": 43731,
43862
- "": 43732,
43863
- "": 43733,
43864
- "": 43734,
43865
- "": 43735,
43866
- "": 43736,
43867
- "": 43737,
43868
- "": 43738,
43869
- "": 43739,
43870
- "": 43740,
43871
- "": 43741,
43872
- "": 43742,
43873
- "": 43743
43874
  },
43875
  "merges": [
43876
  "▁ t",
 
31
  "special": true
32
  }
33
  ],
34
+ "normalizer": null,
35
+ "pre_tokenizer": {
36
+ "type": "Metaspace",
37
+ "replacement": "",
38
+ "prepend_scheme": "never",
39
+ "split": false
40
  },
 
41
  "post_processor": {
42
  "type": "TemplateProcessing",
43
  "single": [
 
107
  },
108
  {
109
  "type": "Fuse"
 
 
 
 
 
 
110
  }
111
  ]
112
  },
 
117
  "continuing_subword_prefix": null,
118
  "end_of_word_suffix": null,
119
  "fuse_unk": true,
120
+ "byte_fallback": true,
121
  "ignore_merges": false,
122
  "vocab": {
123
  "<unk>": 0,
 
43296
  "趙": 43173,
43297
  "弛": 43174,
43298
  "徊": 43175,
43299
+ "": 43176,
43300
+ "": 43177,
43301
+ "": 43178,
43302
+ "": 43179,
43303
+ "": 43180,
43304
+ "": 43181,
43305
+ "": 43182,
43306
+ "": 43183,
43307
+ "": 43184,
43308
+ "": 43185,
43309
+ "": 43186,
43310
+ "": 43187,
43311
+ "": 43188,
43312
+ "": 43189,
43313
+ "": 43190,
43314
+ "": 43191,
43315
+ "": 43192,
43316
+ "": 43193,
43317
+ "": 43194,
43318
+ "": 43195,
43319
+ "": 43196,
43320
+ "": 43197,
43321
+ "": 43198,
43322
+ "": 43199,
43323
+ "": 43200,
43324
+ "": 43201,
43325
+ "": 43202,
43326
+ "": 43203,
43327
+ "": 43204,
43328
+ "": 43205,
43329
+ "": 43206,
43330
+ "": 43207,
43331
+ "": 43208,
43332
+ "": 43209,
43333
+ "": 43210,
43334
+ "": 43211,
43335
+ "": 43212,
43336
+ "": 43213,
43337
+ "": 43214,
43338
+ "": 43215,
43339
+ "": 43216,
43340
+ "": 43217,
43341
+ "": 43218,
43342
+ "": 43219,
43343
+ "": 43220,
43344
+ "": 43221,
43345
+ "": 43222,
43346
+ "": 43223,
43347
+ "": 43224,
43348
+ "": 43225,
43349
+ "": 43226,
43350
+ "滿": 43227,
43351
+ "": 43228,
43352
+ "": 43229,
43353
+ "": 43230,
43354
+ "": 43231,
43355
+ "": 43232,
43356
+ "": 43233,
43357
+ "": 43234,
43358
+ "": 43235,
43359
+ "": 43236,
43360
+ "": 43237,
43361
+ "": 43238,
43362
+ "": 43239,
43363
+ "": 43240,
43364
+ "": 43241,
43365
+ "": 43242,
43366
+ "": 43243,
43367
+ "": 43244,
43368
+ "": 43245,
43369
+ "": 43246,
43370
+ "": 43247,
43371
+ "": 43248,
43372
+ "": 43249,
43373
+ "": 43250,
43374
+ "": 43251,
43375
+ "": 43252,
43376
+ "": 43253,
43377
+ "": 43254,
43378
+ "": 43255,
43379
+ "": 43256,
43380
+ "": 43257,
43381
+ "": 43258,
43382
+ "": 43259,
43383
+ "": 43260,
43384
+ "": 43261,
43385
+ "": 43262,
43386
+ "": 43263,
43387
+ "": 43264,
43388
+ "": 43265,
43389
+ "": 43266,
43390
+ "": 43267,
43391
+ "": 43268,
43392
+ "": 43269,
43393
+ "": 43270,
43394
+ "": 43271,
43395
+ "": 43272,
43396
+ "": 43273,
43397
+ "": 43274,
43398
+ "": 43275,
43399
+ "": 43276,
43400
+ "": 43277,
43401
+ "": 43278,
43402
+ "": 43279,
43403
+ "": 43280,
43404
+ "": 43281,
43405
+ "": 43282,
43406
+ "": 43283,
43407
+ "": 43284,
43408
+ "": 43285,
43409
+ "": 43286,
43410
+ "": 43287,
43411
+ "": 43288,
43412
+ "": 43289,
43413
+ "": 43290,
43414
+ "": 43291,
43415
+ "": 43292,
43416
+ "": 43293,
43417
+ "": 43294,
43418
+ "": 43295,
43419
+ "": 43296,
43420
+ "": 43297,
43421
+ "": 43298,
43422
+ "": 43299,
43423
+ "": 43300,
43424
+ "": 43301,
43425
+ "": 43302,
43426
+ "": 43303,
43427
+ "": 43304,
43428
+ "": 43305,
43429
+ "": 43306,
43430
+ "": 43307,
43431
+ "": 43308,
43432
+ "": 43309,
43433
+ "": 43310,
43434
+ "": 43311,
43435
+ "": 43312,
43436
+ "婿": 43313,
43437
+ "": 43314,
43438
+ "": 43315,
43439
+ "": 43316,
43440
+ "": 43317,
43441
+ "": 43318,
43442
+ "": 43319,
43443
+ "": 43320,
43444
+ "": 43321,
43445
+ "": 43322,
43446
+ "": 43323,
43447
+ "": 43324,
43448
+ "": 43325,
43449
+ "": 43326,
43450
+ "": 43327,
43451
+ "": 43328,
43452
+ "": 43329,
43453
+ "": 43330,
43454
+ "": 43331,
43455
+ "": 43332,
43456
+ "": 43333,
43457
+ "": 43334,
43458
+ "": 43335,
43459
+ "": 43336,
43460
+ "": 43337,
43461
+ "": 43338,
43462
+ "": 43339,
43463
+ "": 43340,
43464
+ "": 43341,
43465
+ "": 43342,
43466
+ "": 43343,
43467
+ "": 43344,
43468
+ "": 43345,
43469
+ "": 43346,
43470
+ "": 43347,
43471
+ "": 43348,
43472
+ "": 43349,
43473
+ "": 43350,
43474
+ "": 43351,
43475
+ "": 43352,
43476
+ "": 43353,
43477
+ "": 43354,
43478
+ "": 43355,
43479
+ "": 43356,
43480
+ "": 43357,
43481
+ "": 43358,
43482
+ "": 43359,
43483
+ "": 43360,
43484
+ "": 43361,
43485
+ "": 43362,
43486
+ "": 43363,
43487
+ "": 43364,
43488
+ "": 43365,
43489
+ "": 43366,
43490
+ "": 43367,
43491
+ "": 43368,
43492
+ "": 43369,
43493
+ "": 43370,
43494
+ "": 43371,
43495
+ "": 43372,
43496
+ "": 43373,
43497
+ "": 43374,
43498
+ "": 43375,
43499
+ "": 43376,
43500
+ "": 43377,
43501
+ "": 43378,
43502
+ "": 43379,
43503
+ "": 43380,
43504
+ "": 43381,
43505
+ "": 43382,
43506
+ "": 43383,
43507
+ "": 43384,
43508
+ "": 43385,
43509
+ "": 43386,
43510
+ "": 43387,
43511
+ "": 43388,
43512
+ "": 43389,
43513
+ "": 43390,
43514
+ "": 43391,
43515
+ "": 43392,
43516
+ "": 43393,
43517
+ "": 43394,
43518
+ "": 43395,
43519
+ "": 43396,
43520
+ "": 43397,
43521
+ "": 43398,
43522
+ "": 43399,
43523
+ "": 43400,
43524
+ "": 43401,
43525
+ "": 43402,
43526
+ "": 43403,
43527
+ "": 43404,
43528
+ "": 43405,
43529
+ "": 43406,
43530
+ "": 43407,
43531
+ "": 43408,
43532
+ "": 43409,
43533
+ "": 43410,
43534
+ "": 43411,
43535
+ "": 43412,
43536
+ "": 43413,
43537
+ "": 43414,
43538
+ "": 43415,
43539
+ "忿": 43416,
43540
+ "": 43417,
43541
+ "": 43418,
43542
+ "": 43419,
43543
+ "": 43420,
43544
+ "": 43421,
43545
+ "": 43422,
43546
+ "": 43423,
43547
+ "": 43424,
43548
+ "": 43425,
43549
+ "": 43426,
43550
+ "": 43427,
43551
+ "": 43428,
43552
+ "": 43429,
43553
+ "": 43430,
43554
+ "": 43431,
43555
+ "": 43432,
43556
+ "": 43433,
43557
+ "": 43434,
43558
+ "": 43435,
43559
+ "": 43436,
43560
+ "": 43437,
43561
+ "": 43438,
43562
+ "": 43439,
43563
+ "": 43440,
43564
+ "": 43441,
43565
+ "": 43442,
43566
+ "": 43443,
43567
+ "": 43444,
43568
+ "": 43445,
43569
+ "": 43446,
43570
+ "": 43447,
43571
+ "": 43448,
43572
+ "": 43449,
43573
+ "": 43450,
43574
+ "": 43451,
43575
+ "": 43452,
43576
+ "": 43453,
43577
+ "": 43454,
43578
+ "": 43455,
43579
+ "": 43456,
43580
+ "": 43457,
43581
+ "": 43458,
43582
+ "": 43459,
43583
+ "": 43460,
43584
+ "": 43461,
43585
+ "": 43462,
43586
+ "": 43463,
43587
+ "": 43464,
43588
+ "": 43465,
43589
+ "": 43466,
43590
+ "": 43467,
43591
+ "": 43468,
43592
+ "": 43469,
43593
+ "": 43470,
43594
+ "": 43471,
43595
+ "": 43472,
43596
+ "": 43473,
43597
+ "": 43474,
43598
+ "": 43475,
43599
+ "": 43476,
43600
+ "": 43477,
43601
+ "": 43478,
43602
+ "": 43479,
43603
+ "": 43480,
43604
+ "": 43481,
43605
+ "": 43482,
43606
+ "": 43483,
43607
+ "": 43484,
43608
+ "": 43485,
43609
+ "": 43486,
43610
+ "": 43487,
43611
+ "": 43488,
43612
+ "": 43489,
43613
+ "": 43490,
43614
+ "": 43491,
43615
+ "": 43492,
43616
+ "": 43493,
43617
+ "": 43494,
43618
+ "": 43495,
43619
+ "": 43496,
43620
+ "": 43497,
43621
+ "": 43498,
43622
+ "": 43499,
43623
+ "": 43500,
43624
+ "": 43501,
43625
+ "": 43502,
43626
+ "": 43503,
43627
+ "": 43504,
43628
+ "": 43505,
43629
+ "": 43506,
43630
+ "": 43507,
43631
+ "": 43508,
43632
+ "": 43509,
43633
+ "": 43510,
43634
+ "": 43511,
43635
+ "": 43512,
43636
+ "": 43513,
43637
+ "": 43514,
43638
+ "": 43515,
43639
+ "": 43516,
43640
+ "": 43517,
43641
+ "": 43518,
43642
+ "": 43519,
43643
+ "": 43520,
43644
+ "": 43521,
43645
+ "": 43522,
43646
+ "": 43523,
43647
+ "": 43524,
43648
+ "": 43525,
43649
+ "": 43526,
43650
+ "": 43527,
43651
+ "麿": 43528,
43652
+ "": 43529,
43653
+ "": 43530,
43654
+ "": 43531,
43655
+ "": 43532,
43656
+ "": 43533,
43657
+ "": 43534,
43658
+ "": 43535,
43659
+ "": 43536,
43660
+ "": 43537,
43661
+ "": 43538,
43662
+ "": 43539,
43663
+ "": 43540,
43664
+ "": 43541,
43665
+ "": 43542,
43666
+ "": 43543,
43667
+ "": 43544,
43668
+ "": 43545,
43669
+ "": 43546,
43670
+ "": 43547,
43671
+ "": 43548,
43672
+ "": 43549,
43673
+ "": 43550,
43674
+ "": 43551,
43675
+ "": 43552,
43676
+ "": 43553,
43677
+ "": 43554,
43678
+ "": 43555,
43679
+ "": 43556,
43680
+ "": 43557,
43681
+ "": 43558,
43682
+ "": 43559,
43683
+ "": 43560,
43684
+ "": 43561,
43685
+ "": 43562,
43686
+ "": 43563,
43687
+ "": 43564,
43688
+ "": 43565,
43689
+ "": 43566,
43690
+ "": 43567,
43691
+ "": 43568,
43692
+ "": 43569,
43693
+ "": 43570,
43694
+ "": 43571,
43695
+ "": 43572,
43696
+ "": 43573,
43697
+ "": 43574,
43698
+ "": 43575,
43699
+ "": 43576,
43700
+ "": 43577,
43701
+ "": 43578,
43702
+ "": 43579,
43703
+ "": 43580,
43704
+ "": 43581,
43705
+ "": 43582,
43706
+ "": 43583,
43707
+ "": 43584,
43708
+ "": 43585,
43709
+ "": 43586,
43710
+ "": 43587,
43711
+ "": 43588,
43712
+ "": 43589,
43713
+ "": 43590,
43714
+ "": 43591,
43715
+ "": 43592,
43716
+ "": 43593,
43717
+ "": 43594,
43718
+ "": 43595,
43719
+ "": 43596,
43720
+ "": 43597,
43721
+ "": 43598,
43722
+ "": 43599,
43723
+ "": 43600,
43724
+ "": 43601,
43725
+ "": 43602,
43726
+ "": 43603,
43727
+ "": 43604,
43728
+ "": 43605,
43729
+ "": 43606,
43730
+ "": 43607,
43731
+ "": 43608,
43732
+ "": 43609,
43733
+ "": 43610,
43734
+ "": 43611,
43735
+ "": 43612,
43736
+ "": 43613,
43737
+ "": 43614,
43738
+ "": 43615,
43739
+ "": 43616,
43740
+ "": 43617,
43741
+ "": 43618,
43742
+ "": 43619,
43743
+ "": 43620,
43744
+ "": 43621,
43745
+ "": 43622,
43746
+ "": 43623,
43747
+ "": 43624,
43748
+ "": 43625,
43749
+ "": 43626,
43750
+ "": 43627,
43751
+ "": 43628,
43752
+ "": 43629,
43753
+ "": 43630,
43754
+ "": 43631,
43755
+ "𠮟": 43632,
43756
+ "": 43633,
43757
+ "": 43634,
43758
+ "": 43635,
43759
+ "": 43636,
43760
+ "": 43637,
43761
+ "": 43638,
43762
+ "": 43639,
43763
+ "": 43640,
43764
+ "": 43641,
43765
+ "": 43642,
43766
+ "": 43643,
43767
+ "": 43644,
43768
+ "": 43645,
43769
+ "": 43646,
43770
+ "": 43647,
43771
+ "": 43648,
43772
+ "": 43649,
43773
+ "": 43650,
43774
+ "": 43651,
43775
+ "": 43652,
43776
+ "": 43653,
43777
+ "": 43654,
43778
+ "": 43655,
43779
+ "": 43656,
43780
+ "": 43657,
43781
+ "": 43658,
43782
+ "": 43659,
43783
+ "": 43660,
43784
+ "": 43661,
43785
+ "": 43662,
43786
+ "": 43663,
43787
+ "": 43664,
43788
+ "": 43665,
43789
+ "": 43666,
43790
+ "": 43667,
43791
+ "": 43668,
43792
+ "": 43669,
43793
+ "": 43670,
43794
+ "": 43671,
43795
+ "": 43672,
43796
+ "": 43673,
43797
+ "": 43674,
43798
+ "": 43675,
43799
+ "": 43676,
43800
+ "": 43677,
43801
+ "": 43678,
43802
+ "": 43679,
43803
+ "": 43680,
43804
+ "": 43681,
43805
+ "": 43682,
43806
+ "": 43683,
43807
+ "": 43684,
43808
+ "": 43685,
43809
+ "": 43686,
43810
+ "": 43687,
43811
+ "": 43688,
43812
+ "": 43689,
43813
+ "": 43690,
43814
+ "": 43691,
43815
+ "": 43692,
43816
+ "": 43693,
43817
+ "": 43694,
43818
+ "": 43695,
43819
+ "": 43696,
43820
+ "": 43697,
43821
+ "": 43698,
43822
+ "": 43699,
43823
+ "": 43700,
43824
+ "": 43701,
43825
+ "": 43702,
43826
+ "": 43703,
43827
+ "": 43704,
43828
+ "": 43705,
43829
+ "": 43706,
43830
+ "": 43707,
43831
+ "": 43708,
43832
+ "": 43709,
43833
+ "": 43710,
43834
+ "": 43711,
43835
+ "": 43712,
43836
+ "": 43713,
43837
+ "": 43714,
43838
+ "": 43715,
43839
+ "": 43716,
43840
+ "": 43717,
43841
+ "": 43718,
43842
+ "": 43719,
43843
+ "": 43720,
43844
+ "": 43721,
43845
+ "": 43722,
43846
+ "": 43723,
43847
+ "": 43724,
43848
+ "": 43725,
43849
+ "": 43726,
43850
+ "": 43727,
43851
+ "": 43728,
43852
+ "": 43729,
43853
+ "": 43730,
43854
+ "": 43731,
43855
+ "": 43732,
43856
+ "": 43733,
43857
+ "": 43734,
43858
+ "": 43735,
43859
+ "": 43736,
43860
+ "": 43737,
43861
+ "": 43738,
43862
+ "": 43739,
43863
+ "": 43740,
43864
+ "": 43741,
43865
+ "": 43742,
43866
+ "": 43743
43867
  },
43868
  "merges": [
43869
  "▁ t",
tokenizer_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
upos.py CHANGED
@@ -1,5 +1,4 @@
1
- from transformers import TokenClassificationPipeline,LlamaModel,LlamaPreTrainedModel
2
- from transformers.modeling_outputs import TokenClassifierOutput
3
 
4
  class BellmanFordTokenClassificationPipeline(TokenClassificationPipeline):
5
  def __init__(self,**kwargs):
@@ -40,41 +39,3 @@ class BellmanFordTokenClassificationPipeline(TokenClassificationPipeline):
40
  t["text"]=model_outputs["sentence"][t["start"]:t["end"]]
41
  return w
42
 
43
- class RawTokenClassificationPipeline(TokenClassificationPipeline):
44
- def check_model_type(self,supported_models):
45
- pass
46
-
47
- class LlamaForTokenClassification(LlamaPreTrainedModel):
48
- def __init__(self,config):
49
- from torch import nn
50
- super().__init__(config)
51
- self.num_labels=config.num_labels
52
- self.model=LlamaModel(config)
53
- if hasattr(config,"classifier_dropout") and config.classifier_dropout is not None:
54
- classifier_dropout=config.classifier_dropout
55
- elif hasattr(config,"hidden_dropout") and config.hidden_dropout is not None:
56
- classifier_dropout=config.hidden_dropout
57
- else:
58
- classifier_dropout=0.1
59
- self.dropout=nn.Dropout(classifier_dropout)
60
- self.classifier=nn.Linear(config.hidden_size,config.num_labels)
61
- self.post_init()
62
- def get_input_embeddings(self):
63
- return self.model.embed_tokens
64
- def set_input_embeddings(self,value):
65
- self.model.embed_tokens=value
66
- def forward(self,input_ids=None,past_key_values=None,attention_mask=None,position_ids=None,inputs_embeds=None,labels=None,use_cache=None,output_attentions=None,output_hidden_states=None,return_dict=None):
67
- return_dict=return_dict if return_dict is not None else self.config.use_return_dict
68
- transformer_outputs=self.model(input_ids,past_key_values=past_key_values,attention_mask=attention_mask,position_ids=position_ids,inputs_embeds=inputs_embeds,use_cache=use_cache,output_attentions=output_attentions,output_hidden_states=output_hidden_states,return_dict=return_dict)
69
- hidden_states=transformer_outputs[0]
70
- hidden_states=self.dropout(hidden_states)
71
- logits=self.classifier(hidden_states)
72
- loss=None
73
- if labels is not None:
74
- from torch import nn
75
- loss_fct=nn.CrossEntropyLoss()
76
- loss=loss_fct(logits.view(-1,self.num_labels),labels.view(-1))
77
- if not return_dict:
78
- output=(logits,)+transformer_outputs[2:]
79
- return ((loss,)+output) if loss is not None else output
80
- return TokenClassifierOutput(loss=loss,logits=logits,hidden_states=transformer_outputs.hidden_states,attentions=transformer_outputs.attentions)
 
1
+ from transformers import TokenClassificationPipeline
 
2
 
3
  class BellmanFordTokenClassificationPipeline(TokenClassificationPipeline):
4
  def __init__(self,**kwargs):
 
39
  t["text"]=model_outputs["sentence"][t["start"]:t["end"]]
40
  return w
41