Spaces:
Running
Running
alirezamsh
commited on
Commit
β’
be35656
1
Parent(s):
cc83fc4
Update app.py
Browse files
app.py
CHANGED
@@ -6,14 +6,28 @@ os.system("pip install transformers sentencepiece torch")
|
|
6 |
from transformers import M2M100ForConditionalGeneration
|
7 |
from tokenization_small100 import SMALL100Tokenizer
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
model = M2M100ForConditionalGeneration.from_pretrained("alirezamsh/small100")
|
10 |
tokenizer = SMALL100Tokenizer.from_pretrained("alirezamsh/small100")
|
11 |
|
12 |
-
def
|
13 |
tokenizer.tgt_lang = lang
|
14 |
-
|
15 |
-
generated_tokens = model.generate(**
|
16 |
-
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
6 |
from transformers import M2M100ForConditionalGeneration
|
7 |
from tokenization_small100 import SMALL100Tokenizer
|
8 |
|
9 |
+
langs = """Afrikaans (af), Amharic (am), Arabic (ar), Asturian (ast), Azerbaijani (az), Bashkir (ba), Belarusian (be), Bulgarian (bg), Bengali (bn), Breton (br), Bosnian (bs), Catalan; Valencian (ca), Cebuano (ceb), Czech (cs), Welsh (cy), Danish (da), German (de), Greeek (el), English (en), Spanish (es), Estonian (et), Persian (fa), Fulah (ff), Finnish (fi), French (fr), Western Frisian (fy), Irish (ga), Gaelic; Scottish Gaelic (gd), Galician (gl), Gujarati (gu), Hausa (ha), Hebrew (he), Hindi (hi), Croatian (hr), Haitian; Haitian Creole (ht), Hungarian (hu), Armenian (hy), Indonesian (id), Igbo (ig), Iloko (ilo), Icelandic (is), Italian (it), Japanese (ja), Javanese (jv), Georgian (ka), Kazakh (kk), Central Khmer (km), Kannada (kn),
|
10 |
+
Korean (ko), Luxembourgish; Letzeburgesch (lb), Ganda (lg), Lingala (ln), Lao (lo), Lithuanian (lt), Latvian (lv), Malagasy (mg), Macedonian (mk), Malayalam (ml), Mongolian (mn), Marathi (mr), Malay (ms), Burmese (my), Nepali (ne), Dutch; Flemish (nl), Norwegian (no), Northern Sotho (ns), Occitan (post 1500) (oc), Oriya (or), Panjabi; Punjabi (pa), Polish (pl), Pushto; Pashto (ps), Portuguese (pt), Romanian; Moldavian; Moldovan (ro), Russian (ru), Sindhi (sd), Sinhala; Sinhalese (si), Slovak (sk),
|
11 |
+
Slovenian (sl), Somali (so), Albanian (sq), Serbian (sr), Swati (ss), Sundanese (su), Swedish (sv), Swahili (sw), Tamil (ta), Thai (th), Tagalog (tl), Tswana (tn),
|
12 |
+
Turkish (tr), Ukrainian (uk), Urdu (ur), Uzbek (uz), Vietnamese (vi), Wolof (wo), Xhosa (xh), Yiddish (yi), Yoruba (yo), Chinese (zh), Zulu (zu)"""
|
13 |
+
lang_list = [lang.strip() for lang in langs.split(',')]
|
14 |
+
|
15 |
model = M2M100ForConditionalGeneration.from_pretrained("alirezamsh/small100")
|
16 |
tokenizer = SMALL100Tokenizer.from_pretrained("alirezamsh/small100")
|
17 |
|
18 |
+
def small100_tr(text, lang):
|
19 |
tokenizer.tgt_lang = lang
|
20 |
+
encoded_text = tokenizer(text, return_tensors="pt")
|
21 |
+
generated_tokens = model.generate(**encoded_text)
|
22 |
+
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
23 |
+
|
24 |
+
examples = [["French (fr)", "μλ§νλ€λ μλΌκ° μλ€."]]
|
25 |
+
|
26 |
+
demo = gr.Interface(fn=small100_tr, inputs=["text", "text"], outputs="text")
|
27 |
+
demo.launch()
|
28 |
|
29 |
+
output_text = gr.outputs.Textbox()
|
30 |
+
gr.Interface(small100_tr, inputs=[gr.inputs.Dropdown(lang_list, label=" Target Language"), 'text'], outputs=output_text, title="SMaLL100: Translate Between 100 languages much faster",
|
31 |
+
description="Demo page for SMaLL100 model",
|
32 |
+
examples=examples
|
33 |
+
).launch()
|