alirezamsh commited on
Commit
be35656
β€’
1 Parent(s): cc83fc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -6
app.py CHANGED
@@ -6,14 +6,28 @@ os.system("pip install transformers sentencepiece torch")
6
  from transformers import M2M100ForConditionalGeneration
7
  from tokenization_small100 import SMALL100Tokenizer
8
 
 
 
 
 
 
 
9
  model = M2M100ForConditionalGeneration.from_pretrained("alirezamsh/small100")
10
  tokenizer = SMALL100Tokenizer.from_pretrained("alirezamsh/small100")
11
 
12
- def fn(text, lang):
13
  tokenizer.tgt_lang = lang
14
- encoded = tokenizer(text, return_tensors="pt")
15
- generated_tokens = model.generate(**encoded)
16
- return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
 
 
 
 
 
17
 
18
- demo = gr.Interface(fn=fn, inputs=["text", "text"], outputs="text")
19
- demo.launch()
 
 
 
 
6
  from transformers import M2M100ForConditionalGeneration
7
  from tokenization_small100 import SMALL100Tokenizer
8
 
9
+ langs = """Afrikaans (af), Amharic (am), Arabic (ar), Asturian (ast), Azerbaijani (az), Bashkir (ba), Belarusian (be), Bulgarian (bg), Bengali (bn), Breton (br), Bosnian (bs), Catalan; Valencian (ca), Cebuano (ceb), Czech (cs), Welsh (cy), Danish (da), German (de), Greeek (el), English (en), Spanish (es), Estonian (et), Persian (fa), Fulah (ff), Finnish (fi), French (fr), Western Frisian (fy), Irish (ga), Gaelic; Scottish Gaelic (gd), Galician (gl), Gujarati (gu), Hausa (ha), Hebrew (he), Hindi (hi), Croatian (hr), Haitian; Haitian Creole (ht), Hungarian (hu), Armenian (hy), Indonesian (id), Igbo (ig), Iloko (ilo), Icelandic (is), Italian (it), Japanese (ja), Javanese (jv), Georgian (ka), Kazakh (kk), Central Khmer (km), Kannada (kn),
10
+ Korean (ko), Luxembourgish; Letzeburgesch (lb), Ganda (lg), Lingala (ln), Lao (lo), Lithuanian (lt), Latvian (lv), Malagasy (mg), Macedonian (mk), Malayalam (ml), Mongolian (mn), Marathi (mr), Malay (ms), Burmese (my), Nepali (ne), Dutch; Flemish (nl), Norwegian (no), Northern Sotho (ns), Occitan (post 1500) (oc), Oriya (or), Panjabi; Punjabi (pa), Polish (pl), Pushto; Pashto (ps), Portuguese (pt), Romanian; Moldavian; Moldovan (ro), Russian (ru), Sindhi (sd), Sinhala; Sinhalese (si), Slovak (sk),
11
+ Slovenian (sl), Somali (so), Albanian (sq), Serbian (sr), Swati (ss), Sundanese (su), Swedish (sv), Swahili (sw), Tamil (ta), Thai (th), Tagalog (tl), Tswana (tn),
12
+ Turkish (tr), Ukrainian (uk), Urdu (ur), Uzbek (uz), Vietnamese (vi), Wolof (wo), Xhosa (xh), Yiddish (yi), Yoruba (yo), Chinese (zh), Zulu (zu)"""
13
+ lang_list = [lang.strip() for lang in langs.split(',')]
14
+
15
  model = M2M100ForConditionalGeneration.from_pretrained("alirezamsh/small100")
16
  tokenizer = SMALL100Tokenizer.from_pretrained("alirezamsh/small100")
17
 
18
+ def small100_tr(text, lang):
19
  tokenizer.tgt_lang = lang
20
+ encoded_text = tokenizer(text, return_tensors="pt")
21
+ generated_tokens = model.generate(**encoded_text)
22
+ return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
23
+
24
+ examples = [["French (fr)", "μ—„λ§ˆνŒλ‹€λŠ” μƒˆλΌκ°€ μžˆλ„€."]]
25
+
26
+ demo = gr.Interface(fn=small100_tr, inputs=["text", "text"], outputs="text")
27
+ demo.launch()
28
 
29
+ output_text = gr.outputs.Textbox()
30
+ gr.Interface(small100_tr, inputs=[gr.inputs.Dropdown(lang_list, label=" Target Language"), 'text'], outputs=output_text, title="SMaLL100: Translate Between 100 languages much faster",
31
+ description="Demo page for SMaLL100 model",
32
+ examples=examples
33
+ ).launch()