datascientist22's picture
Upload 2 files
51d3578 verified
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer
# Define a dictionary to map language names to model identifiers
models = {
'Afrikaans': 'Helsinki-NLP/opus-mt-en-af',
'Amharic': 'Helsinki-NLP/opus-mt-en-am',
'Arabic': 'Helsinki-NLP/opus-mt-en-ar',
'Asturian': 'Helsinki-NLP/opus-mt-en-ast',
'Azerbaijani': 'Helsinki-NLP/opus-mt-en-az',
'Bashkir': 'Helsinki-NLP/opus-mt-en-ba',
'Belarusian': 'Helsinki-NLP/opus-mt-en-be',
'Bulgarian': 'Helsinki-NLP/opus-mt-en-bg',
'Bengali': 'Helsinki-NLP/opus-mt-en-bn',
'Breton': 'Helsinki-NLP/opus-mt-en-br',
'Bosnian': 'Helsinki-NLP/opus-mt-en-bs',
'Catalan': 'Helsinki-NLP/opus-mt-en-ca',
'Cebuano': 'Helsinki-NLP/opus-mt-en-ceb',
'Czech': 'Helsinki-NLP/opus-mt-en-cs',
'Welsh': 'Helsinki-NLP/opus-mt-en-cy',
'Danish': 'Helsinki-NLP/opus-mt-en-da',
'German': 'Helsinki-NLP/opus-mt-en-de',
'Greek': 'Helsinki-NLP/opus-mt-en-el',
'English': 'Helsinki-NLP/opus-mt-en-en',
'Spanish': 'Helsinki-NLP/opus-mt-en-es',
'Estonian': 'Helsinki-NLP/opus-mt-en-et',
'Persian': 'Helsinki-NLP/opus-mt-en-fa',
'Fulah': 'Helsinki-NLP/opus-mt-en-ff',
'Finnish': 'Helsinki-NLP/opus-mt-en-fi',
'French': 'Helsinki-NLP/opus-mt-en-fr',
'Western Frisian': 'Helsinki-NLP/opus-mt-en-fy',
'Irish': 'Helsinki-NLP/opus-mt-en-ga',
'Scottish Gaelic': 'Helsinki-NLP/opus-mt-en-gd',
'Galician': 'Helsinki-NLP/opus-mt-en-gl',
'Gujarati': 'Helsinki-NLP/opus-mt-en-gu',
'Hausa': 'Helsinki-NLP/opus-mt-en-ha',
'Hebrew': 'Helsinki-NLP/opus-mt-en-he',
'Hindi': 'Helsinki-NLP/opus-mt-en-hi',
'Croatian': 'Helsinki-NLP/opus-mt-en-hr',
'Haitian Creole': 'Helsinki-NLP/opus-mt-en-ht',
'Hungarian': 'Helsinki-NLP/opus-mt-en-hu',
'Armenian': 'Helsinki-NLP/opus-mt-en-hy',
'Indonesian': 'Helsinki-NLP/opus-mt-en-id',
'Igbo': 'Helsinki-NLP/opus-mt-en-ig',
'Iloko': 'Helsinki-NLP/opus-mt-en-ilo',
'Icelandic': 'Helsinki-NLP/opus-mt-en-is',
'Italian': 'Helsinki-NLP/opus-mt-en-it',
'Japanese': 'Helsinki-NLP/opus-mt-en-ja',
'Javanese': 'Helsinki-NLP/opus-mt-en-jv',
'Georgian': 'Helsinki-NLP/opus-mt-en-ka',
'Kazakh': 'Helsinki-NLP/opus-mt-en-kk',
'Central Khmer': 'Helsinki-NLP/opus-mt-en-km',
'Kannada': 'Helsinki-NLP/opus-mt-en-kn',
'Korean': 'Helsinki-NLP/opus-mt-en-ko',
'Luxembourgish': 'Helsinki-NLP/opus-mt-en-lb',
'Ganda': 'Helsinki-NLP/opus-mt-en-lg',
'Lingala': 'Helsinki-NLP/opus-mt-en-ln',
'Lao': 'Helsinki-NLP/opus-mt-en-lo',
'Lithuanian': 'Helsinki-NLP/opus-mt-en-lt',
'Latvian': 'Helsinki-NLP/opus-mt-en-lv',
'Malagasy': 'Helsinki-NLP/opus-mt-en-mg',
'Macedonian': 'Helsinki-NLP/opus-mt-en-mk',
'Malayalam': 'Helsinki-NLP/opus-mt-en-ml',
'Mongolian': 'Helsinki-NLP/opus-mt-en-mn',
'Marathi': 'Helsinki-NLP/opus-mt-en-mr',
'Malay': 'Helsinki-NLP/opus-mt-en-ms',
'Burmese': 'Helsinki-NLP/opus-mt-en-my',
'Nepali': 'Helsinki-NLP/opus-mt-en-ne',
'Dutch': 'Helsinki-NLP/opus-mt-en-nl',
'Norwegian': 'Helsinki-NLP/opus-mt-en-no',
'Northern Sotho': 'Helsinki-NLP/opus-mt-en-ns',
'Occitan': 'Helsinki-NLP/opus-mt-en-oc',
'Oriya': 'Helsinki-NLP/opus-mt-en-or',
'Panjabi': 'Helsinki-NLP/opus-mt-en-pa',
'Polish': 'Helsinki-NLP/opus-mt-en-pl',
'Pushto': 'Helsinki-NLP/opus-mt-en-ps',
'Portuguese': 'Helsinki-NLP/opus-mt-en-pt',
'Romanian': 'Helsinki-NLP/opus-mt-en-ro',
'Russian': 'Helsinki-NLP/opus-mt-en-ru',
'Sindhi': 'Helsinki-NLP/opus-mt-en-sd',
'Sinhala': 'Helsinki-NLP/opus-mt-en-si',
'Slovak': 'Helsinki-NLP/opus-mt-en-sk',
'Slovenian': 'Helsinki-NLP/opus-mt-en-sl',
'Somali': 'Helsinki-NLP/opus-mt-en-so',
'Albanian': 'Helsinki-NLP/opus-mt-en-sq',
'Serbian': 'Helsinki-NLP/opus-mt-en-sr',
'Swati': 'Helsinki-NLP/opus-mt-en-ss',
'Sundanese': 'Helsinki-NLP/opus-mt-en-su',
'Swedish': 'Helsinki-NLP/opus-mt-en-sv',
'Swahili': 'Helsinki-NLP/opus-mt-en-sw',
'Tamil': 'Helsinki-NLP/opus-mt-en-ta',
'Thai': 'Helsinki-NLP/opus-mt-en-th',
'Tagalog': 'Helsinki-NLP/opus-mt-en-tl',
'Tswana': 'Helsinki-NLP/opus-mt-en-tn',
'Turkish': 'Helsinki-NLP/opus-mt-en-tr',
'Ukrainian': 'Helsinki-NLP/opus-mt-en-uk',
'Urdu': 'Helsinki-NLP/opus-mt-en-ur',
'Uzbek': 'Helsinki-NLP/opus-mt-en-uz',
'Vietnamese': 'Helsinki-NLP/opus-mt-en-vi',
'Wolof': 'Helsinki-NLP/opus-mt-en-wo',
'Xhosa': 'Helsinki-NLP/opus-mt-en-xh',
'Yiddish': 'Helsinki-NLP/opus-mt-en-yi',
'Yoruba': 'Helsinki-NLP/opus-mt-en-yo',
'Chinese': 'Helsinki-NLP/opus-mt-en-zh',
'Zulu': 'Helsinki-NLP/opus-mt-en-zu'
}
def load_model(language):
"""Load the model and tokenizer for the specified target language."""
model_name = models.get(language)
if model_name:
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
return model, tokenizer
else:
st.error(f"Model for {language} not found.")
return None, None
def translate_text(text, model, tokenizer):
"""Translate text using the provided model and tokenizer."""
inputs = tokenizer.encode(text, return_tensors="pt")
translated = model.generate(inputs)
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
return translated_text
def main():
st.title("🌐 Multilingual Translator")
st.markdown("Created by: [**Engr. Hamesh Raj**](https://www.linkedin.com/in/datascientisthameshraj/)")
# Target language selection
target_language = st.selectbox("Select target language:", list(models.keys()))
# Input text area
text_to_translate = st.text_area("Enter text in English:")
if st.button("Translate"):
if text_to_translate:
# Load the model based on target language
model, tokenizer = load_model(target_language)
if model and tokenizer:
translated_text = translate_text(text_to_translate, model, tokenizer)
st.write(f"**Translation in {target_language}:**")
st.write(translated_text)
else:
st.warning("Please enter text to translate.")
if __name__ == "__main__":
main()