PDL_translate / app.py
vteam27
updated examples
ef6d6f0
raw
history blame
4.37 kB
import gradio as gr
from lang_list import (
LANGUAGE_NAME_TO_CODE,
T2TT_TARGET_LANGUAGE_NAMES,
TEXT_SOURCE_LANGUAGE_NAMES,
)
DEFAULT_TARGET_LANGUAGE = "English"
from transformers import SeamlessM4TForTextToText
from transformers import AutoProcessor
model = SeamlessM4TForTextToText.from_pretrained("facebook/hf-seamless-m4t-medium")
processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
# text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
# output_tokens = model.generate(**text_inputs, tgt_lang="pan")
# translated_text_from_text = processor.decode(output_tokens[0].tolist(), skip_special_tokens=True)
# print(translated_text_from_text)
def run_t2tt(input_text: str, source_language: str, target_language: str) -> str:
source_language_code = LANGUAGE_NAME_TO_CODE[source_language]
target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
text_inputs = processor(text = input_text, src_lang=source_language_code , return_tensors="pt")
output_tokens = model.generate(**text_inputs, tgt_lang=target_language_code)
output = processor.decode(output_tokens[0].tolist(), skip_special_tokens=True)
return str(output)
with gr.Blocks() as demo_t2tt:
with gr.Row():
with gr.Column():
with gr.Group():
input_text = gr.Textbox(label="Input text")
with gr.Row():
source_language = gr.Dropdown(
label="Source language",
choices=TEXT_SOURCE_LANGUAGE_NAMES,
value="English",
)
target_language = gr.Dropdown(
label="Target language",
choices=T2TT_TARGET_LANGUAGE_NAMES,
value=DEFAULT_TARGET_LANGUAGE,
)
btn = gr.Button("Translate")
with gr.Column():
output_text = gr.Textbox(label="Translated text")
gr.Examples(
examples=[
[
"The sinister destruction of the holy Akal Takht and the ruthless massacre of thousands of innocent pilgrims had unmasked the deep-seated hatred and animosity that the Indian Government had been nurturing against Sikhs ever since inde- pendence",
"English",
"Punjabi",
],
[
"It contains. much useful information about administrative, revenue, judicial and ecclesiastical activities in various areas which, it is hoped, would supplement the information available in official records.",
"English",
"Hindi",
],
[
"दुनिया में बहुत सी अलग-अलग भाषाएं हैं और उनमें अपने वर्ण और शब्दों का भंडार होता है. इसमें में कुछ उनके अपने शब्द होते हैं तो कुछ ऐसे भी हैं, जो दूसरी भाषाओं से लिए जाते हैं.",
"Hindi",
"Punjabi",
],
[
"ਸੂੂਬੇ ਦੇ ਕਈ ਜ਼ਿਲ੍ਹਿਆਂ ’ਚ ਬੁੱਧਵਾਰ ਸਵੇਰੇ ਸੰਘਣੀ ਧੁੰਦ ਛਾਈ ਰਹੀ ਤੇ ਤੇਜ਼ ਹਵਾਵਾਂ ਨੇ ਕਾਂਬਾ ਹੋਰ ਵਧਾ ਦਿੱਤਾ। ਸੱਤ ਸ਼ਹਿਰਾਂ ’ਚ ਦਿਨ ਦਾ ਤਾਪਮਾਨ ਦਸ ਡਿਗਰੀ ਸੈਲਸੀਅਸ ਦੇ ਆਸਪਾਸ ਰਿਹਾ। ਸੂਬੇ ’ਚ ਵੱਧ ਤੋਂ ਵੱਧ ਤਾਪਮਾਨ ’ਚ ਵੀ ਦਸ ਡਿਗਰੀ ਸੈਲਸੀਅਸ ਦੀ ਗਿਰਾਵਟ ਦਰਜ ਕੀਤੀ ਗਈ",
"Punjabi",
"English",
],
],
inputs=[input_text, source_language, target_language],
outputs=output_text,
fn=run_t2tt,
cache_examples=True,
api_name=False,
)
gr.on(
triggers=[input_text.submit, btn.click],
fn=run_t2tt,
inputs=[input_text, source_language, target_language],
outputs=output_text,
api_name="t2tt",
)
if __name__ == "__main__":
demo_t2tt.launch()