TwentyNine commited on
Commit
908620a
β€’
1 Parent(s): 88c2b0e

Correct incorrect code in model card.

Browse files
Files changed (1) hide show
  1. README.md +10 -6
README.md CHANGED
@@ -16,7 +16,7 @@ The following is adapted from [slone/nllb-rus-tyv-v1](https://huggingface.co/slo
16
 
17
  ```Python
18
  # the version of transformers is important!
19
- !pip install sentencepiece transformers==4.33
20
  import torch
21
  from transformers import NllbTokenizer, AutoModelForSeq2SeqLM
22
 
@@ -39,14 +39,14 @@ def fix_tokenizer(tokenizer, new_lang):
39
  MODEL_URL = "TwentyNine/nllb-ain-kana-latin-converter-v1"
40
  model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_URL)
41
  tokenizer = NllbTokenizer.from_pretrained(MODEL_URL)
42
- fix_tokenizer(tokenizer, 'ain_Jpan')
43
  fix_tokenizer(tokenizer, 'ain_Latn')
44
 
45
- def translate(
46
  text,
47
  model,
48
  tokenizer,
49
- src_lang='ain_Jpan',
50
  tgt_lang='ain_Latn',
51
  max_length='auto',
52
  num_beams=4,
@@ -69,8 +69,12 @@ def translate(
69
  out = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
70
  if isinstance(text, str) and n_out is None:
71
  return out[0]
72
- return
73
 
74
- translate("ポむ セタ クコン ルスむ", model=model, tokenizer=tokenizer)
75
  # 'pon seta ku=kor rusuy'
 
 
 
 
76
  ```
 
16
 
17
  ```Python
18
  # the version of transformers is important!
19
+ !pip install sentencepiece transformers==4.33 > /dev/null
20
  import torch
21
  from transformers import NllbTokenizer, AutoModelForSeq2SeqLM
22
 
 
39
  MODEL_URL = "TwentyNine/nllb-ain-kana-latin-converter-v1"
40
  model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_URL)
41
  tokenizer = NllbTokenizer.from_pretrained(MODEL_URL)
42
+ fix_tokenizer(tokenizer, 'ain_Japn')
43
  fix_tokenizer(tokenizer, 'ain_Latn')
44
 
45
+ def convert(
46
  text,
47
  model,
48
  tokenizer,
49
+ src_lang='ain_Japn',
50
  tgt_lang='ain_Latn',
51
  max_length='auto',
52
  num_beams=4,
 
69
  out = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
70
  if isinstance(text, str) and n_out is None:
71
  return out[0]
72
+ return
73
 
74
+ convert("ポむ セタ クコン ルスむ", model=model, tokenizer=tokenizer)
75
  # 'pon seta ku=kor rusuy'
76
+
77
+ convert("γ‚Ώγƒ³γƒˆ γŒγ£γ“γ†γ€€γ‚ͺルン パむェ", model=model, tokenizer=tokenizer)
78
+ # 'tanto γŒγ£γ“γ† or un paye'
79
+ # ideal: 'tanto GAKKO or un paye' or 'tanto GAKKOU or un paye'
80
  ```