|
|
|
language_code_to_name = { |
|
"afr": "Afrikaans", |
|
"amh": "Amharic", |
|
"arb": "Modern Standard Arabic", |
|
"ary": "Moroccan Arabic", |
|
"arz": "Egyptian Arabic", |
|
"asm": "Assamese", |
|
"ast": "Asturian", |
|
"azj": "North Azerbaijani", |
|
"bel": "Belarusian", |
|
"ben": "Bengali", |
|
"bos": "Bosnian", |
|
"bul": "Bulgarian", |
|
"cat": "Catalan", |
|
"ceb": "Cebuano", |
|
"ces": "Czech", |
|
"ckb": "Central Kurdish", |
|
"cmn": "Mandarin Chinese", |
|
"cym": "Welsh", |
|
"dan": "Danish", |
|
"deu": "German", |
|
"ell": "Greek", |
|
"eng": "English", |
|
"est": "Estonian", |
|
"eus": "Basque", |
|
"fin": "Finnish", |
|
"fra": "French", |
|
"gaz": "West Central Oromo", |
|
"gle": "Irish", |
|
"glg": "Galician", |
|
"guj": "Gujarati", |
|
"heb": "Hebrew", |
|
"hin": "Hindi", |
|
"hrv": "Croatian", |
|
"hun": "Hungarian", |
|
"hye": "Armenian", |
|
"ibo": "Igbo", |
|
"ind": "Indonesian", |
|
"isl": "Icelandic", |
|
"ita": "Italian", |
|
"jav": "Javanese", |
|
"jpn": "Japanese", |
|
"kam": "Kamba", |
|
"kan": "Kannada", |
|
"kat": "Georgian", |
|
"kaz": "Kazakh", |
|
"kea": "Kabuverdianu", |
|
"khk": "Halh Mongolian", |
|
"khm": "Khmer", |
|
"kir": "Kyrgyz", |
|
"kor": "Korean", |
|
"lao": "Lao", |
|
"lit": "Lithuanian", |
|
"ltz": "Luxembourgish", |
|
"lug": "Ganda", |
|
"luo": "Luo", |
|
"lvs": "Standard Latvian", |
|
"mai": "Maithili", |
|
"mal": "Malayalam", |
|
"mar": "Marathi", |
|
"mkd": "Macedonian", |
|
"mlt": "Maltese", |
|
"mni": "Meitei", |
|
"mya": "Burmese", |
|
"nld": "Dutch", |
|
"nno": "Norwegian Nynorsk", |
|
"nob": "Norwegian Bokm\u00e5l", |
|
"npi": "Nepali", |
|
"nya": "Nyanja", |
|
"oci": "Occitan", |
|
"ory": "Odia", |
|
"pan": "Punjabi", |
|
"pbt": "Southern Pashto", |
|
"pes": "Western Persian", |
|
"pol": "Polish", |
|
"por": "Portuguese", |
|
"ron": "Romanian", |
|
"rus": "Russian", |
|
"slk": "Slovak", |
|
"slv": "Slovenian", |
|
"sna": "Shona", |
|
"snd": "Sindhi", |
|
"som": "Somali", |
|
"spa": "Spanish", |
|
"srp": "Serbian", |
|
"swe": "Swedish", |
|
"swh": "Swahili", |
|
"tam": "Tamil", |
|
"tel": "Telugu", |
|
"tgk": "Tajik", |
|
"tgl": "Tagalog", |
|
"tha": "Thai", |
|
"tur": "Turkish", |
|
"ukr": "Ukrainian", |
|
"urd": "Urdu", |
|
"uzn": "Northern Uzbek", |
|
"vie": "Vietnamese", |
|
"xho": "Xhosa", |
|
"yor": "Yoruba", |
|
"yue": "Cantonese", |
|
"zlm": "Colloquial Malay", |
|
"zsm": "Standard Malay", |
|
"zul": "Zulu", |
|
} |
|
LANGUAGE_NAME_TO_CODE = {v: k for k, v in language_code_to_name.items()} |
|
|
|
|
|
|
|
text_source_language_codes = [ |
|
"afr", |
|
"amh", |
|
"arb", |
|
"ary", |
|
"arz", |
|
"asm", |
|
"azj", |
|
"bel", |
|
"ben", |
|
"bos", |
|
"bul", |
|
"cat", |
|
"ceb", |
|
"ces", |
|
"ckb", |
|
"cmn", |
|
"cym", |
|
"dan", |
|
"deu", |
|
"ell", |
|
"eng", |
|
"est", |
|
"eus", |
|
"fin", |
|
"fra", |
|
"gaz", |
|
"gle", |
|
"glg", |
|
"guj", |
|
"heb", |
|
"hin", |
|
"hrv", |
|
"hun", |
|
"hye", |
|
"ibo", |
|
"ind", |
|
"isl", |
|
"ita", |
|
"jav", |
|
"jpn", |
|
"kan", |
|
"kat", |
|
"kaz", |
|
"khk", |
|
"khm", |
|
"kir", |
|
"kor", |
|
"lao", |
|
"lit", |
|
"lug", |
|
"luo", |
|
"lvs", |
|
"mai", |
|
"mal", |
|
"mar", |
|
"mkd", |
|
"mlt", |
|
"mni", |
|
"mya", |
|
"nld", |
|
"nno", |
|
"nob", |
|
"npi", |
|
"nya", |
|
"ory", |
|
"pan", |
|
"pbt", |
|
"pes", |
|
"pol", |
|
"por", |
|
"ron", |
|
"rus", |
|
"slk", |
|
"slv", |
|
"sna", |
|
"snd", |
|
"som", |
|
"spa", |
|
"srp", |
|
"swe", |
|
"swh", |
|
"tam", |
|
"tel", |
|
"tgk", |
|
"tgl", |
|
"tha", |
|
"tur", |
|
"ukr", |
|
"urd", |
|
"uzn", |
|
"vie", |
|
"yor", |
|
"yue", |
|
"zsm", |
|
"zul", |
|
] |
|
TEXT_SOURCE_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in text_source_language_codes]) |
|
|
|
|
|
|
|
s2st_target_language_codes = [ |
|
"eng", |
|
"arb", |
|
"ben", |
|
"cat", |
|
"ces", |
|
"cmn", |
|
"cym", |
|
"dan", |
|
"deu", |
|
"est", |
|
"fin", |
|
"fra", |
|
"hin", |
|
"ind", |
|
"ita", |
|
"jpn", |
|
"kor", |
|
"mlt", |
|
"nld", |
|
"pes", |
|
"pol", |
|
"por", |
|
"ron", |
|
"rus", |
|
"slk", |
|
"spa", |
|
"swe", |
|
"swh", |
|
"tel", |
|
"tgl", |
|
"tha", |
|
"tur", |
|
"ukr", |
|
"urd", |
|
"uzn", |
|
"vie", |
|
] |
|
S2ST_TARGET_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in s2st_target_language_codes]) |
|
T2ST_TARGET_LANGUAGE_NAMES = S2ST_TARGET_LANGUAGE_NAMES |
|
|
|
|
|
S2TT_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES |
|
T2TT_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES |
|
ASR_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES |
|
|