# OpenAI Whisper - Supported Languages whisper_languages = [ 'afrikaans', 'arabic', 'armenian', 'azerbaijani', 'belarusian', 'bosnian', 'bulgarian', 'catalan', 'chinese', 'croatian', 'czech', 'danish', 'dutch', 'english', 'estonian', 'finnish', 'french', 'galician', 'german', 'greek', 'hebrew', 'hindi', 'hungarian', 'icelandic', 'indonesian', 'italian', 'japanese', 'kannada', 'kazakh', 'korean', 'latvian', 'lithuanian', 'macedonian', 'malay', 'marathi', 'maori', 'nepali', 'norwegian', 'persian', 'polish', 'portuguese', 'romanian', 'russian', 'serbian', 'slovak', 'slovenian', 'spanish', 'swahili', 'swedish', 'tagalog', 'tamil', 'thai', 'turkish', 'ukrainian', 'urdu', 'vietnamese', 'welsh' ] # Google Translate - Supported Languages gt_languages = { 'afrikaans': 'af', 'albanian': 'sq', 'amharic': 'am', 'arabic': 'ar', 'armenian': 'hy', 'assamese': 'as', 'aymara': 'ay', 'azerbaijani': 'az', 'bambara': 'bm', 'basque': 'eu', 'belarusian': 'be', 'bengali': 'bn', 'bhojpuri': 'bho', 'bosnian': 'bs', 'bulgarian': 'bg', 'catalan': 'ca', 'cebuano': 'ceb', 'chichewa': 'ny', 'chinese (simplified)': 'zh-CN', 'chinese (traditional)': 'zh-TW', 'corsican': 'co', 'croatian': 'hr', 'czech': 'cs', 'danish': 'da', 'dhivehi': 'dv', 'dogri': 'doi', 'dutch': 'nl', 'english': 'en', 'esperanto': 'eo', 'estonian': 'et', 'ewe': 'ee', 'filipino': 'tl', 'finnish': 'fi', 'french': 'fr', 'frisian': 'fy', 'galician': 'gl', 'georgian': 'ka', 'german': 'de', 'greek': 'el', 'guarani': 'gn', 'gujarati': 'gu', 'haitian creole': 'ht', 'hausa': 'ha', 'hawaiian': 'haw', 'hebrew': 'iw', 'hindi': 'hi', 'hmong': 'hmn', 'hungarian': 'hu', 'icelandic': 'is', 'igbo': 'ig', 'ilocano': 'ilo', 'indonesian': 'id', 'irish': 'ga', 'italian': 'it', 'japanese': 'ja', 'javanese': 'jw', 'kannada': 'kn', 'kazakh': 'kk', 'khmer': 'km', 'kinyarwanda': 'rw', 'konkani': 'gom', 'korean': 'ko', 'krio': 'kri', 'kurdish (kurmanji)': 'ku', 'kurdish (sorani)': 'ckb', 'kyrgyz': 'ky', 'lao': 'lo', 'latin': 'la', 'latvian': 'lv', 'lingala': 'ln', 'lithuanian': 'lt', 'luganda': 'lg', 'luxembourgish': 'lb', 'macedonian': 'mk', 'maithili': 'mai', 'malagasy': 'mg', 'malay': 'ms', 'malayalam': 'ml', 'maltese': 'mt', 'maori': 'mi', 'marathi': 'mr', 'meiteilon (manipuri)': 'mni-Mtei', 'mizo': 'lus', 'mongolian': 'mn', 'myanmar': 'my', 'nepali': 'ne', 'norwegian': 'no', 'odia (oriya)': 'or', 'oromo': 'om', 'pashto': 'ps', 'persian': 'fa', 'polish': 'pl', 'portuguese': 'pt', 'punjabi': 'pa', 'quechua': 'qu', 'romanian': 'ro', 'russian': 'ru', 'samoan': 'sm', 'sanskrit': 'sa', 'scots gaelic': 'gd', 'sepedi': 'nso', 'serbian': 'sr', 'sesotho': 'st', 'shona': 'sn', 'sindhi': 'sd', 'sinhala': 'si', 'slovak': 'sk', 'slovenian': 'sl', 'somali': 'so', 'spanish': 'es', 'sundanese': 'su', 'swahili': 'sw', 'swedish': 'sv', 'tajik': 'tg', 'tamil': 'ta', 'tatar': 'tt', 'telugu': 'te', 'thai': 'th', 'tigrinya': 'ti', 'tsonga': 'ts', 'turkish': 'tr', 'turkmen': 'tk', 'twi': 'ak', 'ukrainian': 'uk', 'urdu': 'ur', 'uyghur': 'ug', 'uzbek': 'uz', 'vietnamese': 'vi', 'welsh': 'cy', 'xhosa': 'xh', 'yiddish': 'yi', 'yoruba': 'yo', 'zulu': 'zu' } # NLLB - Supported Languages nllb_languages = [ 'Acehnese (Arabic script)', 'Acehnese (Latin script)', 'Afrikaans', 'Akan', 'Amharic', 'Armenian', 'Assamese', 'Asturian', 'Awadhi', 'Ayacucho Quechua', 'Balinese', 'Bambara', 'Banjar (Arabic script)', 'Banjar (Latin script)', 'Bashkir', 'Basque', 'Belarusian', 'Bemba', 'Bengali', 'Bhojpuri', 'Bosnian', 'Buginese', 'Bulgarian', 'Burmese', 'Catalan', 'Cebuano', 'Central Atlas Tamazight', 'Central Aymara', 'Central Kanuri (Arabic script)', 'Central Kanuri (Latin script)', 'Central Kurdish', 'Chhattisgarhi', 'Chinese (Simplified)', 'Chinese (Traditional)', 'Chokwe', 'Crimean Tatar', 'Croatian', 'Czech', 'Danish', 'Dari', 'Dutch', 'Dyula', 'Dzongkha', 'Eastern Panjabi', 'Eastern Yiddish', 'Egyptian Arabic', 'English', 'Esperanto', 'Estonian', 'Ewe', 'Faroese', 'Fijian', 'Finnish', 'Fon', 'French', 'Friulian', 'Galician', 'Ganda', 'Georgian', 'German', 'Greek', 'Guarani', 'Gujarati', 'Haitian Creole', 'Halh Mongolian', 'Hausa', 'Hebrew', 'Hindi', 'Hungarian', 'Icelandic', 'Igbo', 'Ilocano', 'Indonesian', 'Irish', 'Italian', 'Japanese', 'Javanese', 'Jingpho', 'Kabiyè', 'Kabuverdianu', 'Kabyle', 'Kamba', 'Kannada', 'Kashmiri (Arabic script)', 'Kashmiri (Devanagari script)', 'Kazakh', 'Khmer', 'Kikongo', 'Kikuyu', 'Kimbundu', 'Kinyarwanda', 'Korean', 'Kyrgyz', 'Lao', 'Latgalian', 'Ligurian', 'Limburgish', 'Lingala', 'Lithuanian', 'Lombard', 'Luba-Kasai', 'Luo', 'Luxembourgish', 'Macedonian', 'Magahi', 'Maithili', 'Malayalam', 'Maltese', 'Maori', 'Marathi', 'Meitei (Bengali script)', 'Mesopotamian Arabic', 'Minangkabau (Arabic script)', 'Minangkabau (Latin script)', 'Mizo', 'Modern Standard Arabic', 'Modern Standard Arabic (Romanized)', 'Moroccan Arabic', 'Mossi', 'Najdi Arabic', 'Nepali', 'Nigerian Fulfulde', 'North Azerbaijani', 'North Levantine Arabic', 'Northern Kurdish', 'Northern Sotho', 'Northern Uzbek', 'Norwegian Bokmål', 'Norwegian Nynorsk', 'Nuer', 'Nyanja', 'Occitan', 'Odia', 'Pangasinan', 'Papiamento', 'Plateau Malagasy', 'Polish', 'Portuguese', 'Romanian', 'Rundi', 'Russian', 'Samoan', 'Sango', 'Sanskrit', 'Santali', 'Sardinian', 'Scottish Gaelic', 'Serbian', 'Shan', 'Shona', 'Sicilian', 'Silesian', 'Sindhi', 'Sinhala', 'Slovak', 'Slovenian', 'Somali', 'South Azerbaijani', 'South Levantine Arabic', 'Southern Pashto', 'Southern Sotho', 'Southwestern Dinka', 'Spanish', 'Standard Latvian', 'Standard Malay', 'Standard Tibetan', 'Sundanese', 'Swahili', 'Swati', 'Swedish', "Ta'izzi-Adeni Arabic", 'Tagalog', 'Tajik', 'Tamasheq (Latin script)', 'Tamasheq (Tifinagh script)', 'Tamil', 'Tatar', 'Telugu', 'Thai', 'Tigrinya', 'Tok Pisin', 'Tosk Albanian', 'Tsonga', 'Tswana', 'Tumbuka', 'Tunisian Arabic', 'Turkish', 'Turkmen', 'Twi', 'Ukrainian', 'Umbundu', 'Urdu', 'Uyghur', 'Venetian', 'Vietnamese', 'Waray', 'Welsh', 'West Central Oromo', 'Western Persian', 'Wolof', 'Xhosa', 'Yoruba', 'Yue Chinese', 'Zulu' ] # Right to Left Written Languages r2l_languages = { 'ar': 'Arabic', 'he': 'Hebrew', 'fa': 'Persian (Farsi)', 'ur': 'Urdu', 'yi': 'Yiddish', 'sd': 'Sindhi', 'ps': 'Pashto' # Add more language codes as needed } CODE2LANG = dict() LANG2CODE = dict() for lang in nllb_languages: lang_code = gt_languages.get(lang.lower(), None) if lang_code: CODE2LANG[lang_code] = lang LANG2CODE[lang] = lang_code