Spaces:
Paused
Paused
Add application file
Browse files- README.md +2 -2
- app.py +84 -0
- requirements.txt +2 -0
README.md
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
---
|
2 |
title: Mt5 Translate Summ
|
3 |
-
emoji:
|
4 |
colorFrom: pink
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.38.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
license:
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: Mt5 Translate Summ
|
3 |
+
emoji: 🎓
|
4 |
colorFrom: pink
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.38.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import random
|
3 |
+
import torch
|
4 |
+
from transformers import MT5Tokenizer, MT5ForConditionalGeneration
|
5 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
6 |
+
|
7 |
+
tokenizer = MT5Tokenizer.from_pretrained("potsawee/mt5-english-thai-large-translation")
|
8 |
+
translator = MT5ForConditionalGeneration.from_pretrained("potsawee/mt5-english-thai-large-translation")
|
9 |
+
summarizer = MT5ForConditionalGeneration.from_pretrained("potsawee/mt5-english-thai-large-summarization")
|
10 |
+
translator.eval()
|
11 |
+
summarizer.eval()
|
12 |
+
translator.to(device)
|
13 |
+
summarizer.to(device)
|
14 |
+
|
15 |
+
|
16 |
+
# def generate_multiple_choice_question(
|
17 |
+
# context
|
18 |
+
# ):
|
19 |
+
# num_questions = 1
|
20 |
+
# question_item = question_generation_sampling(
|
21 |
+
# g1_model, g1_tokenizer,
|
22 |
+
# g2_model, g2_tokenizer,
|
23 |
+
# context, num_questions, device
|
24 |
+
# )[0]
|
25 |
+
# question = question_item['question']
|
26 |
+
# options = question_item['options']
|
27 |
+
# options[0] = f"{options[0]} [ANSWER]"
|
28 |
+
# random.shuffle(options)
|
29 |
+
# output_string = f"Question: {question}\n[A] {options[0]}\n[B] {options[1]}\n[C] {options[2]}\n[D] {options[3]}"
|
30 |
+
# return output_string
|
31 |
+
#
|
32 |
+
# demo = gr.Interface(
|
33 |
+
# fn=generate_multiple_choice_question,
|
34 |
+
# inputs=gr.Textbox(lines=8, placeholder="Context Here..."),
|
35 |
+
# outputs=gr.Textbox(lines=5, placeholder="Question: \n[A] \n[B] \n[C] \n[D] "),
|
36 |
+
# title="Multiple-choice Question Generator",
|
37 |
+
# description="Provide some context (e.g. news article or any passage) in the context box and click **Submit**. The models currently support English only. This demo is a part of MQAG - https://github.com/potsawee/mqag0.",
|
38 |
+
# allow_flagging='never'
|
39 |
+
# )
|
40 |
+
|
41 |
+
def generate_output(
|
42 |
+
task,
|
43 |
+
text,
|
44 |
+
):
|
45 |
+
inputs = tokenizer(
|
46 |
+
[text],
|
47 |
+
padding="longest",
|
48 |
+
max_length=1024,
|
49 |
+
truncation=True,
|
50 |
+
return_tensors="pt",
|
51 |
+
).to(device)
|
52 |
+
if task == 'Translation':
|
53 |
+
outputs = translator.generate(
|
54 |
+
**inputs,
|
55 |
+
max_new_tokens=256,
|
56 |
+
)
|
57 |
+
elif task == 'Summarization':
|
58 |
+
outputs = summarizer.generate(
|
59 |
+
**inputs,
|
60 |
+
max_new_tokens=256,
|
61 |
+
)
|
62 |
+
else:
|
63 |
+
raise ValueError("task undefined!")
|
64 |
+
gen_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
65 |
+
return gen_text
|
66 |
+
|
67 |
+
TASKS = ["Translation", "Summarization"]
|
68 |
+
|
69 |
+
demo = gr.Interface(
|
70 |
+
fn=generate_output,
|
71 |
+
inputs=[
|
72 |
+
gr.components.Radio(label="Task", choices=TASKS, value="Translation"),
|
73 |
+
gr.components.Textbox(label="Text (in English)", lines=10),
|
74 |
+
],
|
75 |
+
outputs=gr.Textbox(label="Text (in Thai)", lines=4),
|
76 |
+
# examples=[["Building a translation demo with Gradio is so easy!", "eng_Latn", "spa_Latn"]],
|
77 |
+
cache_examples=False,
|
78 |
+
title="English🇬🇧 to Thai🇹🇭 | Translation or Summarization",
|
79 |
+
description="Provide some text (in English) & select one of the tasks (Translation or Summarization). Note that currently the model only supports text up to 1024 tokens. The base architecture is mt5-large with the embeddings filtered to only English and Thai tokens and fine-tuned to XSum (Eng2Thai) Dataset (https://huggingface.co/datasets/potsawee/xsum_eng2thai).",
|
80 |
+
allow_flagging='never'
|
81 |
+
|
82 |
+
)
|
83 |
+
|
84 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
torch>=1.10
|
2 |
+
transformers>=4.11.3
|