anzorq commited on
Commit
59eb871
·
1 Parent(s): 72117dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -30
app.py CHANGED
@@ -1,46 +1,61 @@
1
  import gradio as gr
2
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
3
- from huggingface_hub import hf_hub_download
4
- import fasttext
5
 
6
- # Initialize fastText model
7
- # model_path = 'lid.323.ftz'
8
- # language_model = fasttext.load_model(model_path)
9
 
10
- lid_model = fasttext.load_model(hf_hub_download("facebook/fasttext-language-identification", "model.bin"))
11
- model_path_translation = "anzorq/m2m100_418M_ft_ru-kbd_44K"
12
  tgt_lang="zu"
13
 
14
- tokenizer = AutoTokenizer.from_pretrained(model_path_translation)
15
- model = AutoModelForSeq2SeqLM.from_pretrained(model_path_translation, use_safetensors=True)
 
16
 
17
  def translate(text, num_beams=4, num_return_sequences=4):
18
- # Detect language
19
- languages, _ = lid_model.predict(text, k=1)
20
- detected_language = languages[0].replace("__label__", "")
21
-
22
- inputs = tokenizer(text, return_tensors="pt")
23
- num_return_sequences = min(num_return_sequences, num_beams)
24
- translated_tokens = model.generate(
25
- **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=num_beams, num_return_sequences=num_return_sequences
26
- )
27
- translations = [tokenizer.decode(translation, skip_special_tokens=True) for translation in translated_tokens]
28
-
29
- return detected_language, text, translations
30
 
31
- title = "Russian-Circassian translator demo"
32
- article = "<p style='text-align: center'>Want to help? Join the <a href='https://discord.gg/cXwv495r' target='_blank'>Discord server</a></p>"
 
 
 
 
 
 
 
33
 
 
 
 
 
 
34
  num_beams = gr.inputs.Slider(2, 10, step=1, label="Number of beams", default=4)
35
  num_return_sequences = gr.inputs.Slider(2, 10, step=1, label="Number of returned sentences", default=4)
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  gr.Interface(
38
- fn=translate,
39
- inputs=["text", num_beams, num_return_sequences],
40
- outputs=["text", "text", gr.Textbox()],
41
- titles=["Detected Language", "Input", "Translations"],
42
- title=title,
43
- article=article).launch()
44
 
45
  # import gradio as gr
46
 
 
1
  import gradio as gr
 
 
 
2
 
3
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 
 
4
 
5
+ model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K"
6
+ src_lang="ru"
7
  tgt_lang="zu"
8
 
9
+ # tokenizer = AutoTokenizer.from_pretrained(model_path, src_lang=src_lang)
10
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
11
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_path, use_safetensors=True)#, load_in_4bit=True, device_map="auto")
12
 
13
  def translate(text, num_beams=4, num_return_sequences=4):
14
+ inputs = tokenizer(text, return_tensors="pt")
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ num_return_sequences = min(num_return_sequences, num_beams)
17
+
18
+ translated_tokens = model.generate(
19
+ **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=num_beams, num_return_sequences=num_return_sequences
20
+ )
21
+
22
+ translations = []
23
+ for translation in tokenizer.batch_decode(translated_tokens, skip_special_tokens=True):
24
+ translations.append(translation)
25
 
26
+ # result = {"input":text, "translations":translations}
27
+ return text, translations
28
+
29
+ output = gr.Textbox()
30
+ # with gr.Accordion("Advanced Options"):
31
  num_beams = gr.inputs.Slider(2, 10, step=1, label="Number of beams", default=4)
32
  num_return_sequences = gr.inputs.Slider(2, 10, step=1, label="Number of returned sentences", default=4)
33
 
34
+
35
+ title = "Russian-Circassian translator demo"
36
+ article = "<p style='text-align: center'>Want to help? Join the <a href='https://discord.gg/cXwv495r' target='_blank'>Discord server</a></p>"
37
+
38
+ examples = [
39
+ ["Мы идем домой"],
40
+ ["Сегодня хорошая погода"],
41
+ ["Дети играют во дворе"],
42
+ ["We live in a big house"],
43
+ ["Tu es une bonne personne."],
44
+ ["أين تعيش؟"],
45
+ ["Bir şeyler yapmak istiyorum."],
46
+ ["– Если я его отпущу, то ты вовек не сможешь его поймать, – заявил Сосруко."],
47
+ ["Как только старик ушел, Сатаней пошла к Саусырыко."],
48
+ ["我永远不会放弃你。"],
49
+ ["우리는 소치에 살고 있습니다."],
50
+ ]
51
+
52
  gr.Interface(
53
+ fn=translate,
54
+ inputs=["text", num_beams, num_return_sequences],
55
+ outputs=["text", output],
56
+ title=title,
57
+ # examples=examples,
58
+ article=article).launch()
59
 
60
  # import gradio as gr
61