NeuralFalcon
commited on
Commit
•
9d78489
1
Parent(s):
84d8fc6
Create utils.py
Browse files
utils.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
language_dict = {
|
2 |
+
"Akan": {"lang_code": "aka", "meta_code": "aka_Latn"},
|
3 |
+
"Albanian": {"lang_code": "sq", "meta_code": "als_Latn"},
|
4 |
+
"Amharic": {"lang_code": "am", "meta_code": "amh_Ethi"},
|
5 |
+
"Arabic": {"lang_code": "ar", "meta_code": "arb_Arab"},
|
6 |
+
"Armenian": {"lang_code": "hy", "meta_code": "hye_Armn"},
|
7 |
+
"Assamese": {"lang_code": "as", "meta_code": "asm_Beng"},
|
8 |
+
"Azerbaijani": {"lang_code": "az", "meta_code": "azj_Latn"},
|
9 |
+
"Basque": {"lang_code": "eu", "meta_code": "eus_Latn"},
|
10 |
+
"Bashkir": {"lang_code": "ba", "meta_code": "bak_Cyrl"},
|
11 |
+
"Bengali": {"lang_code": "bn", "meta_code": "ben_Beng"},
|
12 |
+
"Bosnian": {"lang_code": "bs", "meta_code": "bos_Latn"},
|
13 |
+
"Bulgarian": {"lang_code": "bg", "meta_code": "bul_Cyrl"},
|
14 |
+
"Burmese": {"lang_code": "my", "meta_code": "mya_Mymr"},
|
15 |
+
"Catalan": {"lang_code": "ca", "meta_code": "cat_Latn"},
|
16 |
+
"Chinese": {"lang_code": "zh", "meta_code": "zh_Hans"},
|
17 |
+
"Croatian": {"lang_code": "hr", "meta_code": "hrv_Latn"},
|
18 |
+
"Czech": {"lang_code": "cs", "meta_code": "ces_Latn"},
|
19 |
+
"Danish": {"lang_code": "da", "meta_code": "dan_Latn"},
|
20 |
+
"Dutch": {"lang_code": "nl", "meta_code": "nld_Latn"},
|
21 |
+
"English": {"lang_code": "en", "meta_code": "eng_Latn"},
|
22 |
+
"Estonian": {"lang_code": "et", "meta_code": "est_Latn"},
|
23 |
+
"Faroese": {"lang_code": "fo", "meta_code": "fao_Latn"},
|
24 |
+
"Finnish": {"lang_code": "fi", "meta_code": "fin_Latn"},
|
25 |
+
"French": {"lang_code": "fr", "meta_code": "fra_Latn"},
|
26 |
+
"Galician": {"lang_code": "gl", "meta_code": "glg_Latn"},
|
27 |
+
"Georgian": {"lang_code": "ka", "meta_code": "kat_Geor"},
|
28 |
+
"German": {"lang_code": "de", "meta_code": "deu_Latn"},
|
29 |
+
"Greek": {"lang_code": "el", "meta_code": "ell_Grek"},
|
30 |
+
"Gujarati": {"lang_code": "gu", "meta_code": "guj_Gujr"},
|
31 |
+
"Haitian Creole": {"lang_code": "ht", "meta_code": "hat_Latn"},
|
32 |
+
"Hausa": {"lang_code": "ha", "meta_code": "hau_Latn"},
|
33 |
+
"Hebrew": {"lang_code": "he", "meta_code": "heb_Hebr"},
|
34 |
+
"Hindi": {"lang_code": "hi", "meta_code": "hin_Deva"},
|
35 |
+
"Hungarian": {"lang_code": "hu", "meta_code": "hun_Latn"},
|
36 |
+
"Icelandic": {"lang_code": "is", "meta_code": "isl_Latn"},
|
37 |
+
"Indonesian": {"lang_code": "id", "meta_code": "ind_Latn"},
|
38 |
+
"Italian": {"lang_code": "it", "meta_code": "ita_Latn"},
|
39 |
+
"Japanese": {"lang_code": "ja", "meta_code": "jpn_Jpan"},
|
40 |
+
"Kannada": {"lang_code": "kn", "meta_code": "kan_Knda"},
|
41 |
+
"Kazakh": {"lang_code": "kk", "meta_code": "kaz_Cyrl"},
|
42 |
+
"Korean": {"lang_code": "ko", "meta_code": "kor_Hang"},
|
43 |
+
"Kurdish": {"lang_code": "ckb", "meta_code": "ckb_Arab"},
|
44 |
+
"Kyrgyz": {"lang_code": "ky", "meta_code": "kir_Cyrl"},
|
45 |
+
"Lao": {"lang_code": "lo", "meta_code": "lao_Laoo"},
|
46 |
+
"Lithuanian": {"lang_code": "lt", "meta_code": "lit_Latn"},
|
47 |
+
"Luxembourgish": {"lang_code": "lb", "meta_code": "ltz_Latn"},
|
48 |
+
"Macedonian": {"lang_code": "mk", "meta_code": "mkd_Cyrl"},
|
49 |
+
"Malay": {"lang_code": "ms", "meta_code": "ms_Latn"},
|
50 |
+
"Malayalam": {"lang_code": "ml", "meta_code": "mal_Mlym"},
|
51 |
+
"Maltese": {"lang_code": "mt", "meta_code": "mlt_Latn"},
|
52 |
+
"Maori": {"lang_code": "mi", "meta_code": "mri_Latn"},
|
53 |
+
"Marathi": {"lang_code": "mr", "meta_code": "mar_Deva"},
|
54 |
+
"Mongolian": {"lang_code": "mn", "meta_code": "khk_Cyrl"},
|
55 |
+
"Nepali": {"lang_code": "ne", "meta_code": "npi_Deva"},
|
56 |
+
"Norwegian": {"lang_code": "no", "meta_code": "nob_Latn"},
|
57 |
+
"Norwegian Nynorsk": {"lang_code": "nn", "meta_code": "nno_Latn"},
|
58 |
+
"Pashto": {"lang_code": "ps", "meta_code": "pbt_Arab"},
|
59 |
+
"Persian": {"lang_code": "fa", "meta_code": "pes_Arab"},
|
60 |
+
"Polish": {"lang_code": "pl", "meta_code": "pol_Latn"},
|
61 |
+
"Portuguese": {"lang_code": "pt", "meta_code": "por_Latn"},
|
62 |
+
"Punjabi": {"lang_code": "pa", "meta_code": "pan_Guru"},
|
63 |
+
"Romanian": {"lang_code": "ro", "meta_code": "ron_Latn"},
|
64 |
+
"Russian": {"lang_code": "ru", "meta_code": "rus_Cyrl"},
|
65 |
+
"Serbian": {"lang_code": "sr", "meta_code": "srp_Cyrl"},
|
66 |
+
"Sinhala": {"lang_code": "si", "meta_code": "sin_Sinh"},
|
67 |
+
"Slovak": {"lang_code": "sk", "meta_code": "slk_Latn"},
|
68 |
+
"Slovenian": {"lang_code": "sl", "meta_code": "slv_Latn"},
|
69 |
+
"Somali": {"lang_code": "so", "meta_code": "som_Latn"},
|
70 |
+
"Spanish": {"lang_code": "es", "meta_code": "spa_Latn"},
|
71 |
+
"Sundanese": {"lang_code": "su", "meta_code": "sun_Latn"},
|
72 |
+
"Swahili": {"lang_code": "sw", "meta_code": "swa_Latn"},
|
73 |
+
"Swedish": {"lang_code": "sv", "meta_code": "swe_Latn"},
|
74 |
+
"Tamil": {"lang_code": "ta", "meta_code": "tam_Taml"},
|
75 |
+
"Telugu": {"lang_code": "te", "meta_code": "tel_Telu"},
|
76 |
+
"Thai": {"lang_code": "th", "meta_code": "tha_Latn"},
|
77 |
+
"Turkish": {"lang_code": "tr", "meta_code": "tur_Latn"},
|
78 |
+
"Ukrainian": {"lang_code": "uk", "meta_code": "ukr_Cyrl"},
|
79 |
+
"Urdu": {"lang_code": "ur", "meta_code": "urd_Arab"},
|
80 |
+
"Uzbek": {"lang_code": "uz", "meta_code": "uzb_Latn"},
|
81 |
+
"Vietnamese": {"lang_code": "vi", "meta_code": "vie_Latn"},
|
82 |
+
"Welsh": {"lang_code": "cy", "meta_code": "cym_Latn"},
|
83 |
+
"Yiddish": {"lang_code": "yi", "meta_code": "yi_Hebr"},
|
84 |
+
"Yoruba": {"lang_code": "yo", "meta_code": "yo_Latn"},
|
85 |
+
"Zulu": {"lang_code": "zu", "meta_code": "zul_Latn"},
|
86 |
+
}
|