TwentyNine commited on
Commit
88c2b0e
1 Parent(s): 6d0cacf

Update model reference

Browse files
Files changed (1) hide show
  1. README.md +4 -3
README.md CHANGED
@@ -20,7 +20,7 @@ The following is adapted from [slone/nllb-rus-tyv-v1](https://huggingface.co/slo
20
  import torch
21
  from transformers import NllbTokenizer, AutoModelForSeq2SeqLM
22
 
23
- def fix_tokenizer(tokenizer, new_lang='ain_Latn'):
24
  """ Add a new language token to the tokenizer vocabulary (this should be done each time after its initialization) """
25
  old_len = len(tokenizer) - int(new_lang in tokenizer.added_tokens_encoder)
26
  tokenizer.lang_code_to_id[new_lang] = old_len-1
@@ -36,10 +36,11 @@ def fix_tokenizer(tokenizer, new_lang='ain_Latn'):
36
  tokenizer.added_tokens_encoder = {}
37
  tokenizer.added_tokens_decoder = {}
38
 
39
- MODEL_URL = "TwentyNine/nllb-jpn-ain-v1"
40
  model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_URL)
41
  tokenizer = NllbTokenizer.from_pretrained(MODEL_URL)
42
- fix_tokenizer(tokenizer)
 
43
 
44
  def translate(
45
  text,
 
20
  import torch
21
  from transformers import NllbTokenizer, AutoModelForSeq2SeqLM
22
 
23
+ def fix_tokenizer(tokenizer, new_lang):
24
  """ Add a new language token to the tokenizer vocabulary (this should be done each time after its initialization) """
25
  old_len = len(tokenizer) - int(new_lang in tokenizer.added_tokens_encoder)
26
  tokenizer.lang_code_to_id[new_lang] = old_len-1
 
36
  tokenizer.added_tokens_encoder = {}
37
  tokenizer.added_tokens_decoder = {}
38
 
39
+ MODEL_URL = "TwentyNine/nllb-ain-kana-latin-converter-v1"
40
  model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_URL)
41
  tokenizer = NllbTokenizer.from_pretrained(MODEL_URL)
42
+ fix_tokenizer(tokenizer, 'ain_Jpan')
43
+ fix_tokenizer(tokenizer, 'ain_Latn')
44
 
45
  def translate(
46
  text,