""" This file contains the functions to translate the text from one language to another. """ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer from deep_translator import GoogleTranslator, MyMemoryTranslator, MicrosoftTranslator, YandexTranslator, ChatGptTranslator from .text_preprocess import decontracting_words, space_punc from dotenv import load_dotenv import os # Load the environment variables from the .env file load_dotenv() # Translators API Keys MICROSOFT_API_KEY = os.getenv("MICROSOFT_TRANSLATOR_KEY") OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") YANDEX_API_KEY = os.getenv("YANDEX_API_KEY") # Digit Translation digit_converter = { '০': '0', '১': '1', '২': '2', '৩': '3', '৪': '4', '৫': '5', '৬': '6', '৭': '7', '৮': '8', '৯': '9' } def get_translated_digit(sentence): """ Translate the digits from Bengali to English """ translated_sentence = [] for each_letter in sentence: if each_letter in digit_converter.keys(): translated_sentence.append(digit_converter[each_letter]) # print(digit_converter[each_letter], end="") else: translated_sentence.append(each_letter) # print(each_letter, end="") return "".join(each for each in translated_sentence) # Bangla to English Translation (BUET BanglaNMT) translation_model_bn_en = AutoModelForSeq2SeqLM.from_pretrained("csebuetnlp/banglat5_nmt_bn_en") translation_tokenizer_bn_en = AutoTokenizer.from_pretrained("csebuetnlp/banglat5_nmt_bn_en") def banglanmt_translation(input_text): """ Translate a sentence from Bengali to English using BUET BanglaNMT """ inputs = translation_tokenizer_bn_en(input_text, return_tensors="pt") outputs = translation_model_bn_en.generate(**inputs) translated_text = translation_tokenizer_bn_en.decode(outputs[0], skip_special_tokens=True) return translated_text def google_translation(sentence: str, source="bn", target="en") -> str: """ Translate a sentence from one language to another using Google Translator.\n At first install dependencies \n `!pip install -U deep-translator` """ translator = GoogleTranslator() translated_sentence = translator.translate( sentence, source=source, target=target) return translated_sentence def microsoft_translation(sentence: str, source="bn", target="en") -> str: """ Translate a sentence from one language to another using Microsoft Translator.\n At first install dependencies \n `!pip install -U deep-translator` """ translator = MicrosoftTranslator(api_key=MICROSOFT_API_KEY, target='en') translated_sentence = translator.translate(sentence) return translated_sentence def chatgpt_translation(sentence: str, source="bn", target="en") -> str: """ Translate a sentence from one language to another using ChatGPT Translator.\n At first install dependencies \n `!pip install -U deep-translator` """ translator = ChatGptTranslator(api_key=OPENAI_API_KEY, target=target) translated_sentence = translator.translate(sentence) return translated_sentence def yandex_translation(sentence: str, source="bn", target="en") -> str: """ Translate a sentence from one language to another using Yandex Translator.\n At first install dependencies \n `!pip install -U deep-translator` """ translator = YandexTranslator(api_key=YANDEX_API_KEY) translated_sentence = translator.translate( sentence, source=source, target=target) return translated_sentence def mymemory_translation(sentence: str, source="bn-IN", target="en-US") -> str: """ Translate a sentence from one language to another using MyMemory Translator.\n At first install dependencies \n `!pip install -U deep-translator` """ translator = MyMemoryTranslator(source=source, target=target) translated_sentence = translator.translate(sentence) return translated_sentence def get_better_translation(translator_func, src=""): src_mod = get_translated_digit(src) tgt = translator_func(src_mod) tgt = decontracting_words(tgt) tgt = tgt.replace('rupees', 'takas').replace('Rs', 'takas') return tgt def select_translator(src, translator): """ Select the translator """ tgt = None tgt_base = None if translator == "Google": tgt = get_better_translation(google_translation, src) tgt = space_punc(tgt) tgt_base = google_translation(src) elif translator == "BanglaNMT": tgt = get_better_translation(banglanmt_translation, src) tgt = space_punc(tgt) tgt_base = banglanmt_translation(src) elif translator == "MyMemory": tgt = get_better_translation(mymemory_translation, src) tgt = space_punc(tgt) tgt_base = mymemory_translation(src) return tgt_base, tgt