class NllbLang(): def __init__(self, code, name, code_whisper=None, name_whisper=None): self.code = code self.name = name self.code_whisper = code_whisper self.name_whisper = name_whisper def __str__(self): return "Language(code={}, name={})".format(self.code, self.name) NLLB_LANGS = [ NllbLang('ace_Arab', 'Acehnese (Arabic script)'), NllbLang('ace_Latn', 'Acehnese (Latin script)'), NllbLang('acm_Arab', 'Mesopotamian Arabic', 'ar', 'Arabic'), NllbLang('acq_Arab', 'Ta’izzi-Adeni Arabic', 'ar', 'Arabic'), NllbLang('aeb_Arab', 'Tunisian Arabic'), NllbLang('afr_Latn', 'Afrikaans', 'am', 'Amharic'), NllbLang('ajp_Arab', 'South Levantine Arabic', 'ar', 'Arabic'), NllbLang('aka_Latn', 'Akan'), NllbLang('amh_Ethi', 'Amharic'), NllbLang('apc_Arab', 'North Levantine Arabic', 'ar', 'Arabic'), NllbLang('arb_Arab', 'Modern Standard Arabic', 'ar', 'Arabic'), NllbLang('arb_Latn', 'Modern Standard Arabic (Romanized)'), NllbLang('ars_Arab', 'Najdi Arabic', 'ar', 'Arabic'), NllbLang('ary_Arab', 'Moroccan Arabic', 'ar', 'Arabic'), NllbLang('arz_Arab', 'Egyptian Arabic', 'ar', 'Arabic'), NllbLang('asm_Beng', 'Assamese', 'as', 'Assamese'), NllbLang('ast_Latn', 'Asturian'), NllbLang('awa_Deva', 'Awadhi'), NllbLang('ayr_Latn', 'Central Aymara'), NllbLang('azb_Arab', 'South Azerbaijani', 'az', 'Azerbaijani'), NllbLang('azj_Latn', 'North Azerbaijani', 'az', 'Azerbaijani'), NllbLang('bak_Cyrl', 'Bashkir', 'ba', 'Bashkir'), NllbLang('bam_Latn', 'Bambara'), NllbLang('ban_Latn', 'Balinese'), NllbLang('bel_Cyrl', 'Belarusian', 'be', 'Belarusian'), NllbLang('bem_Latn', 'Bemba'), NllbLang('ben_Beng', 'Bengali', 'bn', 'Bengali'), NllbLang('bho_Deva', 'Bhojpuri'), NllbLang('bjn_Arab', 'Banjar (Arabic script)'), NllbLang('bjn_Latn', 'Banjar (Latin script)'), NllbLang('bod_Tibt', 'Standard Tibetan', 'bo', 'Tibetan'), NllbLang('bos_Latn', 'Bosnian', 'bs', 'Bosnian'), NllbLang('bug_Latn', 'Buginese'), NllbLang('bul_Cyrl', 'Bulgarian', 'bg', 'Bulgarian'), NllbLang('cat_Latn', 'Catalan', 'ca', 'Catalan'), NllbLang('ceb_Latn', 'Cebuano'), NllbLang('ces_Latn', 'Czech', 'cs', 'Czech'), NllbLang('cjk_Latn', 'Chokwe'), NllbLang('ckb_Arab', 'Central Kurdish'), NllbLang('crh_Latn', 'Crimean Tatar'), NllbLang('cym_Latn', 'Welsh', 'cy', 'Welsh'), NllbLang('dan_Latn', 'Danish', 'da', 'Danish'), NllbLang('deu_Latn', 'German', 'de', 'German'), NllbLang('dik_Latn', 'Southwestern Dinka'), NllbLang('dyu_Latn', 'Dyula'), NllbLang('dzo_Tibt', 'Dzongkha'), NllbLang('ell_Grek', 'Greek', 'el', 'Greek'), NllbLang('eng_Latn', 'English', 'en', 'English'), NllbLang('epo_Latn', 'Esperanto'), NllbLang('est_Latn', 'Estonian', 'et', 'Estonian'), NllbLang('eus_Latn', 'Basque', 'eu', 'Basque'), NllbLang('ewe_Latn', 'Ewe'), NllbLang('fao_Latn', 'Faroese', 'fo', 'Faroese'), NllbLang('fij_Latn', 'Fijian'), NllbLang('fin_Latn', 'Finnish', 'fi', 'Finnish'), NllbLang('fon_Latn', 'Fon'), NllbLang('fra_Latn', 'French', 'fr', 'French'), NllbLang('fur_Latn', 'Friulian'), NllbLang('fuv_Latn', 'Nigerian Fulfulde'), NllbLang('gla_Latn', 'Scottish Gaelic'), NllbLang('gle_Latn', 'Irish'), NllbLang('glg_Latn', 'Galician', 'gl', 'Galician'), NllbLang('grn_Latn', 'Guarani'), NllbLang('guj_Gujr', 'Gujarati', 'gu', 'Gujarati'), NllbLang('hat_Latn', 'Haitian Creole', 'ht', 'Haitian creole'), NllbLang('hau_Latn', 'Hausa', 'ha', 'Hausa'), NllbLang('heb_Hebr', 'Hebrew', 'he', 'Hebrew'), NllbLang('hin_Deva', 'Hindi', 'hi', 'Hindi'), NllbLang('hne_Deva', 'Chhattisgarhi'), NllbLang('hrv_Latn', 'Croatian', 'hr', 'Croatian'), NllbLang('hun_Latn', 'Hungarian', 'hu', 'Hungarian'), NllbLang('hye_Armn', 'Armenian', 'hy', 'Armenian'), NllbLang('ibo_Latn', 'Igbo'), NllbLang('ilo_Latn', 'Ilocano'), NllbLang('ind_Latn', 'Indonesian', 'id', 'Indonesian'), NllbLang('isl_Latn', 'Icelandic', 'is', 'Icelandic'), NllbLang('ita_Latn', 'Italian', 'it', 'Italian'), NllbLang('jav_Latn', 'Javanese', 'jw', 'Javanese'), NllbLang('jpn_Jpan', 'Japanese', 'ja', 'Japanese'), NllbLang('kab_Latn', 'Kabyle'), NllbLang('kac_Latn', 'Jingpho'), NllbLang('kam_Latn', 'Kamba'), NllbLang('kan_Knda', 'Kannada', 'kn', 'Kannada'), NllbLang('kas_Arab', 'Kashmiri (Arabic script)'), NllbLang('kas_Deva', 'Kashmiri (Devanagari script)'), NllbLang('kat_Geor', 'Georgian', 'ka', 'Georgian'), NllbLang('knc_Arab', 'Central Kanuri (Arabic script)'), NllbLang('knc_Latn', 'Central Kanuri (Latin script)'), NllbLang('kaz_Cyrl', 'Kazakh', 'kk', 'Kazakh'), NllbLang('kbp_Latn', 'Kabiyè'), NllbLang('kea_Latn', 'Kabuverdianu'), NllbLang('khm_Khmr', 'Khmer', 'km', 'Khmer'), NllbLang('kik_Latn', 'Kikuyu'), NllbLang('kin_Latn', 'Kinyarwanda'), NllbLang('kir_Cyrl', 'Kyrgyz'), NllbLang('kmb_Latn', 'Kimbundu'), NllbLang('kmr_Latn', 'Northern Kurdish'), NllbLang('kon_Latn', 'Kikongo'), NllbLang('kor_Hang', 'Korean', 'ko', 'Korean'), NllbLang('lao_Laoo', 'Lao', 'lo', 'Lao'), NllbLang('lij_Latn', 'Ligurian'), NllbLang('lim_Latn', 'Limburgish'), NllbLang('lin_Latn', 'Lingala', 'ln', 'Lingala'), NllbLang('lit_Latn', 'Lithuanian', 'lt', 'Lithuanian'), NllbLang('lmo_Latn', 'Lombard'), NllbLang('ltg_Latn', 'Latgalian'), NllbLang('ltz_Latn', 'Luxembourgish', 'lb', 'Luxembourgish'), NllbLang('lua_Latn', 'Luba-Kasai'), NllbLang('lug_Latn', 'Ganda'), NllbLang('luo_Latn', 'Luo'), NllbLang('lus_Latn', 'Mizo'), NllbLang('lvs_Latn', 'Standard Latvian', 'lv', 'Latvian'), NllbLang('mag_Deva', 'Magahi'), NllbLang('mai_Deva', 'Maithili'), NllbLang('mal_Mlym', 'Malayalam', 'ml', 'Malayalam'), NllbLang('mar_Deva', 'Marathi', 'mr', 'Marathi'), NllbLang('min_Arab', 'Minangkabau (Arabic script)'), NllbLang('min_Latn', 'Minangkabau (Latin script)'), NllbLang('mkd_Cyrl', 'Macedonian', 'mk', 'Macedonian'), NllbLang('plt_Latn', 'Plateau Malagasy', 'mg', 'Malagasy'), NllbLang('mlt_Latn', 'Maltese', 'mt', 'Maltese'), NllbLang('mni_Beng', 'Meitei (Bengali script)'), NllbLang('khk_Cyrl', 'Halh Mongolian', 'mn', 'Mongolian'), NllbLang('mos_Latn', 'Mossi'), NllbLang('mri_Latn', 'Maori', 'mi', 'Maori'), NllbLang('mya_Mymr', 'Burmese', 'my', 'Myanmar'), NllbLang('nld_Latn', 'Dutch', 'nl', 'Dutch'), NllbLang('nno_Latn', 'Norwegian Nynorsk', 'nn', 'Nynorsk'), NllbLang('nob_Latn', 'Norwegian Bokmål', 'no', 'Norwegian'), NllbLang('npi_Deva', 'Nepali', 'ne', 'Nepali'), NllbLang('nso_Latn', 'Northern Sotho'), NllbLang('nus_Latn', 'Nuer'), NllbLang('nya_Latn', 'Nyanja'), NllbLang('oci_Latn', 'Occitan', 'oc', 'Occitan'), NllbLang('gaz_Latn', 'West Central Oromo'), NllbLang('ory_Orya', 'Odia'), NllbLang('pag_Latn', 'Pangasinan'), NllbLang('pan_Guru', 'Eastern Panjabi', 'pa', 'Punjabi'), NllbLang('pap_Latn', 'Papiamento'), NllbLang('pes_Arab', 'Western Persian', 'fa', 'Persian'), NllbLang('pol_Latn', 'Polish', 'pl', 'Polish'), NllbLang('por_Latn', 'Portuguese', 'pt', 'Portuguese'), NllbLang('prs_Arab', 'Dari'), NllbLang('pbt_Arab', 'Southern Pashto', 'ps', 'Pashto'), NllbLang('quy_Latn', 'Ayacucho Quechua'), NllbLang('ron_Latn', 'Romanian', 'ro', 'Romanian'), NllbLang('run_Latn', 'Rundi'), NllbLang('rus_Cyrl', 'Russian', 'ru', 'Russian'), NllbLang('sag_Latn', 'Sango'), NllbLang('san_Deva', 'Sanskrit', 'sa', 'Sanskrit'), NllbLang('sat_Olck', 'Santali'), NllbLang('scn_Latn', 'Sicilian'), NllbLang('shn_Mymr', 'Shan'), NllbLang('sin_Sinh', 'Sinhala', 'si', 'Sinhala'), NllbLang('slk_Latn', 'Slovak', 'sk', 'Slovak'), NllbLang('slv_Latn', 'Slovenian', 'sl', 'Slovenian'), NllbLang('smo_Latn', 'Samoan'), NllbLang('sna_Latn', 'Shona', 'sn', 'Shona'), NllbLang('snd_Arab', 'Sindhi', 'sd', 'Sindhi'), NllbLang('som_Latn', 'Somali', 'so', 'Somali'), NllbLang('sot_Latn', 'Southern Sotho'), NllbLang('spa_Latn', 'Spanish', 'es', 'Spanish'), NllbLang('als_Latn', 'Tosk Albanian', 'sq', 'Albanian'), NllbLang('srd_Latn', 'Sardinian'), NllbLang('srp_Cyrl', 'Serbian', 'sr', 'Serbian'), NllbLang('ssw_Latn', 'Swati'), NllbLang('sun_Latn', 'Sundanese', 'su', 'Sundanese'), NllbLang('swe_Latn', 'Swedish', 'sv', 'Swedish'), NllbLang('swh_Latn', 'Swahili', 'sw', 'Swahili'), NllbLang('szl_Latn', 'Silesian'), NllbLang('tam_Taml', 'Tamil', 'ta', 'Tamil'), NllbLang('tat_Cyrl', 'Tatar', 'tt', 'Tatar'), NllbLang('tel_Telu', 'Telugu', 'te', 'Telugu'), NllbLang('tgk_Cyrl', 'Tajik', 'tg', 'Tajik'), NllbLang('tgl_Latn', 'Tagalog', 'tl', 'Tagalog'), NllbLang('tha_Thai', 'Thai', 'th', 'Thai'), NllbLang('tir_Ethi', 'Tigrinya'), NllbLang('taq_Latn', 'Tamasheq (Latin script)'), NllbLang('taq_Tfng', 'Tamasheq (Tifinagh script)'), NllbLang('tpi_Latn', 'Tok Pisin'), NllbLang('tsn_Latn', 'Tswana'), NllbLang('tso_Latn', 'Tsonga'), NllbLang('tuk_Latn', 'Turkmen', 'tk', 'Turkmen'), NllbLang('tum_Latn', 'Tumbuka'), NllbLang('tur_Latn', 'Turkish', 'tr', 'Turkish'), NllbLang('twi_Latn', 'Twi'), NllbLang('tzm_Tfng', 'Central Atlas Tamazight'), NllbLang('uig_Arab', 'Uyghur'), NllbLang('ukr_Cyrl', 'Ukrainian', 'uk', 'Ukrainian'), NllbLang('umb_Latn', 'Umbundu'), NllbLang('urd_Arab', 'Urdu', 'ur', 'Urdu'), NllbLang('uzn_Latn', 'Northern Uzbek', 'uz', 'Uzbek'), NllbLang('vec_Latn', 'Venetian'), NllbLang('vie_Latn', 'Vietnamese', 'vi', 'Vietnamese'), NllbLang('war_Latn', 'Waray'), NllbLang('wol_Latn', 'Wolof'), NllbLang('xho_Latn', 'Xhosa'), NllbLang('ydd_Hebr', 'Eastern Yiddish', 'yi', 'Yiddish'), NllbLang('yor_Latn', 'Yoruba', 'yo', 'Yoruba'), NllbLang('yue_Hant', 'Yue Chinese', 'zh', 'Chinese'), NllbLang('zho_Hans', 'Chinese (Simplified)', 'zh', 'Chinese'), NllbLang('zho_Hant', 'Chinese (Traditional)', 'zh', 'Chinese'), NllbLang('zsm_Latn', 'Standard Malay', 'ms', 'Malay'), NllbLang('zul_Latn', 'Zulu'), ] _TO_NLLB_LANG_CODE = {language.code.lower(): language for language in NLLB_LANGS if language.code is not None} _TO_NLLB_LANG_NAME = {language.name.lower(): language for language in NLLB_LANGS if language.name is not None} _TO_NLLB_LANG_WHISPER_CODE = {language.code_whisper.lower(): language for language in NLLB_LANGS if language.code_whisper is not None} _TO_NLLB_LANG_WHISPER_NAME = {language.name_whisper.lower(): language for language in NLLB_LANGS if language.name_whisper is not None} def get_nllb_lang_from_code(lang_code, default=None) -> NllbLang: """Return the language from the language code.""" return _TO_NLLB_LANG_CODE.get(lang_code, default) def get_nllb_lang_from_name(lang_name, default=None) -> NllbLang: """Return the language from the language name.""" return _TO_NLLB_LANG_NAME.get(lang_name.lower() if lang_name else None, default) def get_nllb_lang_from_code_whisper(lang_code_whisper, default=None) -> NllbLang: """Return the language from the language code.""" return _TO_NLLB_LANG_WHISPER_CODE.get(lang_code_whisper, default) def get_nllb_lang_from_name_whisper(lang_name_whisper, default=None) -> NllbLang: """Return the language from the language name.""" return _TO_NLLB_LANG_WHISPER_NAME.get(lang_name_whisper.lower() if lang_name_whisper else None, default) def get_nllb_lang_names(): """Return a list of language names.""" return [language.name for language in NLLB_LANGS] if __name__ == "__main__": # Test lookup print(get_nllb_lang_from_code('eng_Latn')) print(get_nllb_lang_from_name('English')) print(get_nllb_lang_names())