import streamlit as st from transformers import MarianMTModel, MarianTokenizer # Define a list of supported language pairs languages = { 'English': 'en', 'Urdu': 'ur', 'French': 'fr', 'Spanish': 'es', 'German': 'de', 'Chinese': 'zh', 'Italian': 'it', 'Russian': 'ru', 'Japanese': 'ja', 'Arabic': 'ar', 'Hindi': 'hi', } # Define supported language pairs language_pairs = { ('en', 'ur'): 'Helsinki-NLP/opus-mt-en-ur', ('ur', 'en'): 'Helsinki-NLP/opus-mt-ur-en', ('en', 'fr'): 'Helsinki-NLP/opus-mt-en-fr', ('fr', 'en'): 'Helsinki-NLP/opus-mt-fr-en', ('en', 'es'): 'Helsinki-NLP/opus-mt-en-es', ('es', 'en'): 'Helsinki-NLP/opus-mt-es-en', ('en', 'de'): 'Helsinki-NLP/opus-mt-en-de', ('de', 'en'): 'Helsinki-NLP/opus-mt-de-en', ('en', 'zh'): 'Helsinki-NLP/opus-mt-en-zh', ('zh', 'en'): 'Helsinki-NLP/opus-mt-zh-en', ('en', 'it'): 'Helsinki-NLP/opus-mt-en-it', ('it', 'en'): 'Helsinki-NLP/opus-mt-it-en', ('en', 'ru'): 'Helsinki-NLP/opus-mt-en-ru', ('ru', 'en'): 'Helsinki-NLP/opus-mt-ru-en', ('en', 'ja'): 'Helsinki-NLP/opus-mt-en-ja', ('ja', 'en'): 'Helsinki-NLP/opus-mt-ja-en', ('en', 'ar'): 'Helsinki-NLP/opus-mt-en-ar', ('ar', 'en'): 'Helsinki-NLP/opus-mt-ar-en', ('en', 'hi'): 'Helsinki-NLP/opus-mt-en-hi', ('hi', 'en'): 'Helsinki-NLP/opus-mt-hi-en', # Add more pairs as available } def load_model(src_lang, tgt_lang): model_name = language_pairs.get((src_lang, tgt_lang)) if not model_name: raise ValueError(f"No available model for {src_lang} to {tgt_lang}") tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) return model, tokenizer def translate(text, src_lang, tgt_lang): model, tokenizer = load_model(src_lang, tgt_lang) inputs = tokenizer.encode(text, return_tensors="pt", padding=True) translated = model.generate(inputs) return tokenizer.decode(translated[0], skip_special_tokens=True) def translate_chain(text, src_lang, tgt_lang): if src_lang != 'en': text = translate(text, src_lang, 'en') if tgt_lang != 'en': text = translate(text, 'en', tgt_lang) return text def translate_ui(text, source_language, target_language): src_lang = languages[source_language] tgt_lang = languages[target_language] try: return translate(text, src_lang, tgt_lang) except ValueError: return translate_chain(text, src_lang, tgt_lang) # Streamlit App UI st.title("Multilingual Translator") st.write("Translate text between various languages including Urdu, French, Spanish, and more.") # Input text text = st.text_area("Enter text to translate", height=100) # Source and Target Languages source_language = st.selectbox("Select Source Language", list(languages.keys())) target_language = st.selectbox("Select Target Language", list(languages.keys())) # Translate Button if st.button("Translate"): if text.strip(): translation = translate_ui(text, source_language, target_language) st.text_area("Translated Text", translation, height=100) else: st.warning("Please enter text to translate.") # About Section st.sidebar.title("About") st.sidebar.info( """ This app allows you to translate text between multiple languages using the MarianMT model from Hugging Face's Helsinki-NLP collection. """ )