huseinzol05
commited on
Commit
β’
a4577da
1
Parent(s):
bfe12e3
improve
Browse files
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: π
|
|
4 |
colorFrom: pink
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
|
|
4 |
colorFrom: pink
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.32.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
app.py
CHANGED
@@ -10,6 +10,8 @@ import gradio as gr
|
|
10 |
import logging
|
11 |
import os
|
12 |
|
|
|
|
|
13 |
logging.basicConfig(level=logging.INFO)
|
14 |
|
15 |
TO_LANG = {
|
@@ -42,8 +44,14 @@ tokenizer = AutoTokenizer.from_pretrained(
|
|
42 |
use_fast=False,
|
43 |
)
|
44 |
|
45 |
-
|
46 |
def translate(text, to_lang):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
global model
|
48 |
to_lang = TO_LANG[to_lang]
|
49 |
if model is None:
|
@@ -66,7 +74,7 @@ def translate(text, to_lang):
|
|
66 |
[[i for i in o if i not in [0, 1, 2]] for o in results],
|
67 |
spaces_between_special_tokens=False,
|
68 |
)
|
69 |
-
return results[0]
|
70 |
|
71 |
|
72 |
hansard = """
|
@@ -96,13 +104,22 @@ my_array = np.reshape(my_array, (2, 5)) # menjadi array 2D dengan 2 baris dan 5
|
|
96 |
Itulah beberapa operasi dasar numpy. Anda dapat menemukan dokumentasi resmi numpy di https://numpy.org/doc/stable/.
|
97 |
""".strip()
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
demo = gr.Interface(
|
100 |
fn=translate,
|
101 |
inputs=[
|
102 |
gr.components.Textbox(label='Input Text'),
|
103 |
gr.components.Dropdown(label='Output language', choices=TO_LANG_KEYS, value='Malay'),
|
104 |
],
|
105 |
-
outputs=[
|
|
|
|
|
|
|
106 |
examples=[
|
107 |
['Syed Saddiq berkata, mereka seharusnya mengingati bahawa semasa menjadi Perdana Menteri Pakatan Harapan', 'English'],
|
108 |
['SHAH ALAM - Pertubuhan Kebajikan Anak Bersatu Selangor bersetuju pihak kerajaan mewujudkan Suruhanjaya Siasatan Diraja untuk menyiasat isu kartel daging.', 'English'],
|
@@ -116,9 +133,12 @@ demo = gr.Interface(
|
|
116 |
[code, 'Malay']
|
117 |
|
118 |
],
|
|
|
|
|
|
|
119 |
cache_examples=False,
|
120 |
title='Malaysian NMT',
|
121 |
-
description=
|
122 |
)
|
123 |
|
124 |
demo.launch(server_name='0.0.0.0')
|
|
|
10 |
import logging
|
11 |
import os
|
12 |
|
13 |
+
HF_TOKEN = os.getenv('HF_TOKEN')
|
14 |
+
hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, 'mesolitica/crowdsourced-malaysian-translation')
|
15 |
logging.basicConfig(level=logging.INFO)
|
16 |
|
17 |
TO_LANG = {
|
|
|
44 |
use_fast=False,
|
45 |
)
|
46 |
|
|
|
47 |
def translate(text, to_lang):
|
48 |
+
|
49 |
+
if len(text) < 2:
|
50 |
+
raise gr.Error('text input must longer than 1 character')
|
51 |
+
|
52 |
+
if to_lang is None or len(to_lang) < 1:
|
53 |
+
raise gr.Error('Please select target language')
|
54 |
+
|
55 |
global model
|
56 |
to_lang = TO_LANG[to_lang]
|
57 |
if model is None:
|
|
|
74 |
[[i for i in o if i not in [0, 1, 2]] for o in results],
|
75 |
spaces_between_special_tokens=False,
|
76 |
)
|
77 |
+
return results[0], results[0]
|
78 |
|
79 |
|
80 |
hansard = """
|
|
|
104 |
Itulah beberapa operasi dasar numpy. Anda dapat menemukan dokumentasi resmi numpy di https://numpy.org/doc/stable/.
|
105 |
""".strip()
|
106 |
|
107 |
+
description = """
|
108 |
+
<a href="https://huggingface.co/mesolitica/translation-t5-small-standard-bahasa-cased-v2">mesolitica/translation-t5-small-standard-bahasa-cased-v2</a> able to translate malay, pasar malay (social media texts or local context), english, manglish, javanese, banjarese and indonesian to target language. It also able to maintain the text structure as it is and only translate necessary texts, eg, programming code.
|
109 |
+
|
110 |
+
If you found out the translation is wrong, by simply fix the second box output and click `Flag as Wrong` button. You can help us to improve the future model, the dataset will be open source at <a href="https://huggingface.co/datasets/mesolitica/crowdsourced-malaysian-translation">mesolitica/crowdsourced-malaysian-translation</a>
|
111 |
+
""".strip()
|
112 |
+
|
113 |
demo = gr.Interface(
|
114 |
fn=translate,
|
115 |
inputs=[
|
116 |
gr.components.Textbox(label='Input Text'),
|
117 |
gr.components.Dropdown(label='Output language', choices=TO_LANG_KEYS, value='Malay'),
|
118 |
],
|
119 |
+
outputs=[
|
120 |
+
gr.components.Textbox(label='Output', interactive = False),
|
121 |
+
gr.components.Textbox(label='Does it correct? fix me!', interactive = True),
|
122 |
+
],
|
123 |
examples=[
|
124 |
['Syed Saddiq berkata, mereka seharusnya mengingati bahawa semasa menjadi Perdana Menteri Pakatan Harapan', 'English'],
|
125 |
['SHAH ALAM - Pertubuhan Kebajikan Anak Bersatu Selangor bersetuju pihak kerajaan mewujudkan Suruhanjaya Siasatan Diraja untuk menyiasat isu kartel daging.', 'English'],
|
|
|
133 |
[code, 'Malay']
|
134 |
|
135 |
],
|
136 |
+
allow_flagging='manual',
|
137 |
+
flagging_options=['Wrong'],
|
138 |
+
flagging_callback=hf_writer,
|
139 |
cache_examples=False,
|
140 |
title='Malaysian NMT',
|
141 |
+
description=description,
|
142 |
)
|
143 |
|
144 |
demo.launch(server_name='0.0.0.0')
|