Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 3,970 Bytes
ac0fe1d bd9805e a4d0b27 bd9805e a4d0b27 bd9805e a4d0b27 bd9805e a4d0b27 1884e2f bd9805e 3e2e722 9634fea bd9805e 1884e2f bd9805e a4d0b27 bd9805e a4d0b27 0e3d21c a4d0b27 bd9805e fe57abb 1e6aa6b ac0fe1d f751f4e 112c6bb b272afc f751f4e ac0fe1d 112c6bb ac0fe1d f751f4e ac0fe1d 1e6aa6b a4d0b27 bd9805e 3e2e722 bd9805e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
from turtle import title
import gradio as gr
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K"
src_lang="ru"
tgt_lang="zu"
tokenizer = AutoTokenizer.from_pretrained(model_path, src_lang=src_lang)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
def translate(text, num_beams=4, num_return_sequences=4):
inputs = tokenizer(text, return_tensors="pt")
num_return_sequences = min(num_return_sequences, num_beams)
translated_tokens = model.generate(
**inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=num_beams, num_return_sequences=num_return_sequences
)
translations = []
for translation in tokenizer.batch_decode(translated_tokens, skip_special_tokens=True):
translations.append(translation)
return "\n".join(["• " + translation for translation in translations])
output = gr.outputs.Textbox()
# with gr.Accordion("Advanced Options"):
num_beams = gr.inputs.Slider(2, 10, step=1, label="Number of beams", default=4)
num_return_sequences = gr.inputs.Slider(2, 10, step=1, label="Number of returned sentences", default=4)
title = "Russian-Circassian translator demo"
article = "<p style='text-align: center'>Want to help? Join the <a href='https://discord.gg/cXwv495r' target='_blank'>Discord server</a></p>"
examples = [
["Мы идем домой"],
["Сегодня хорошая погода"],
["Дети играют во дворе"],
["We live in a big house"],
["Tu es une bonne personne."],
["أين تعيش؟"],
["Bir şeyler yapmak istiyorum."],
["– Если я его отпущу, то ты вовек не сможешь его поймать, – заявил Сосруко."],
["Как только старик ушел, Сатаней пошла к Саусырыко."],
["我永远不会放弃你。"],
["우리는 소치에 살고 있습니다."],
]
gr.Interface(
fn=translate,
inputs=["text", num_beams, num_return_sequences],
outputs=output,
title=title,
examples=examples,
article=article).launch()
# import gradio as gr
# title = "Русско-черкесский переводчик"
# description = "Demo of a Russian-Circassian (Kabardian dialect) translator. <br>It is based on Facebook's <a href=\"https://about.fb.com/news/2020/10/first-multilingual-machine-translation-model/\">M2M-100 model</a> machine learning model, and has been trained on 45,000 Russian-Circassian sentence pairs. <br>It can also translate from 100 other languages to Circassian (English, French, Spanish, etc.), but less accurately. <br>The data corpus is constantly being expanded, and we need help in finding sentence sources, OCR, data cleaning, etc. <br>If you are interested in helping out with this project, please contact me at the link below.<br><br>This is only a demo, not a finished product. Translation quality is still low and will improve with time and more data.<br>45,000 sentence pairs is not enough to create an accurate machine translation model, and more data is needed.<br>You can help by finding sentence sources (books, web pages, etc.), scanning books, OCRing documents, data cleaning, and other tasks.<br><br>If you are interested in helping out with this project, contact me at the link below."
# article = """<p style='text-align: center'><a href='https://arxiv.org/abs/1806.00187'>Scaling Neural Machine Translation</a> | <a href='https://github.com/pytorch/fairseq/'>Github Repo</a></p>"""
# examples = [
# ["Мы идем домой"],
# ["Сегодня хорошая погода"],
# ["Дети играют во дворе"],
# ["We live in a big house"],
# ["Tu es une bonne personne."],
# ["أين تعيش؟"],
# ["Bir şeyler yapmak istiyorum."],
# ]
# gr.Interface.load("models/anzorq/m2m100_418M_ft_ru-kbd_44K", title=title, description=description, article=article, examples=examples).launch() |