Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
|
|
7 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
8 |
LANG_CODES = {
|
9 |
"English":"en",
|
10 |
-
"
|
11 |
}
|
12 |
|
13 |
def translate(text, src_lang, tgt_lang, candidates:int):
|
@@ -41,13 +41,11 @@ def translate(text, src_lang, tgt_lang, candidates:int):
|
|
41 |
|
42 |
with gr.Blocks() as app:
|
43 |
markdown="""
|
44 |
-
# An English /
|
45 |
|
46 |
-
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
Input your text to translate, a source language and target language, and desired number of return sequences!
|
51 |
|
52 |
### Grammar Regularization
|
53 |
An interesting quirk of training a many-to-many translation model is that pseudo-grammar correction
|
@@ -55,23 +53,20 @@ with gr.Blocks() as app:
|
|
55 |
|
56 |
Remember, this can ***approximate*** grammaticality, but it isn't always the best.
|
57 |
|
58 |
-
For example, "mi li toki e toki pona" (Source Language: toki pona & Target Language: toki pona) will result in:
|
59 |
-
- ['mi toki e toki pona.', 'mi toki pona.', 'mi toki e toki pona']
|
60 |
-
- (Thus, the ungrammatical "li" is dropped)
|
61 |
|
62 |
### Model and Data
|
63 |
-
This app utilizes a fine-tuned version of Facebook/Meta AI's M2M100 418M param model.
|
64 |
|
65 |
-
By leveraging the
|
66 |
-
we can jumpstart our transfer learning to accomplish machine translation for
|
67 |
|
68 |
-
The model was fine-tuned on the English/
|
69 |
|
70 |
-
### This app is a work in progress
|
71 |
In addition to parameter quantity and the hyper-parameters used while training,
|
72 |
-
the *quality of data* found on Tatoeba directly influences the
|
73 |
|
74 |
-
|
75 |
"""
|
76 |
|
77 |
with gr.Row():
|
@@ -79,7 +74,7 @@ with gr.Blocks() as app:
|
|
79 |
with gr.Column():
|
80 |
input_text = gr.components.Textbox(label="Input Text", value="Raccoons are fascinating creatures, but I prefer opossums.")
|
81 |
source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys()))
|
82 |
-
target_lang = gr.components.Dropdown(label="Target Language", value="
|
83 |
return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=12, step=1)
|
84 |
|
85 |
inputs=[input_text, source_lang, target_lang, return_seqs]
|
@@ -90,9 +85,9 @@ with gr.Blocks() as app:
|
|
90 |
|
91 |
gr.Examples(
|
92 |
[
|
93 |
-
["Hello! How are you?", "English", "
|
94 |
-
["toki a! ilo pi ante toki ni li pona!", "
|
95 |
-
["mi li toki e
|
96 |
],
|
97 |
inputs=inputs
|
98 |
)
|
|
|
7 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
8 |
LANG_CODES = {
|
9 |
"English":"en",
|
10 |
+
"Zelsik":"tl"
|
11 |
}
|
12 |
|
13 |
def translate(text, src_lang, tgt_lang, candidates:int):
|
|
|
41 |
|
42 |
with gr.Blocks() as app:
|
43 |
markdown="""
|
44 |
+
# An English / Zelsik Neural Machine Translation App!
|
45 |
|
46 |
+
This is an English to Zelsik / Zelsik to English neural machine translation app.
|
47 |
|
48 |
+
Input your text to translate, a source language and target language, and the desired number of return sequences!
|
|
|
|
|
49 |
|
50 |
### Grammar Regularization
|
51 |
An interesting quirk of training a many-to-many translation model is that pseudo-grammar correction
|
|
|
53 |
|
54 |
Remember, this can ***approximate*** grammaticality, but it isn't always the best.
|
55 |
|
|
|
|
|
|
|
56 |
|
57 |
### Model and Data
|
58 |
+
This app utilizes a fine-tuned version of Facebook/Meta AI's M2M100 418M param model and the original app was made by Jayyydyyy for Toki Pona.
|
59 |
|
60 |
+
By leveraging the pre-trained weights of the massively multilingual M2M100 model,
|
61 |
+
we can jumpstart our transfer learning to accomplish machine translation for Zelsik!
|
62 |
|
63 |
+
The model was fine-tuned on the English/Zelsik bitexts found at [https://tatoeba.org/](https://tatoeba.org/)
|
64 |
|
65 |
+
### This app is a work in progress; obviously, not all translations will be perfect.
|
66 |
In addition to parameter quantity and the hyper-parameters used while training,
|
67 |
+
the *quality of data* found on Tatoeba directly influences the performance of projects like this!
|
68 |
|
69 |
+
im sorry jayyydyyy, im too lazy and dumb to change any of the descriptions
|
70 |
"""
|
71 |
|
72 |
with gr.Row():
|
|
|
74 |
with gr.Column():
|
75 |
input_text = gr.components.Textbox(label="Input Text", value="Raccoons are fascinating creatures, but I prefer opossums.")
|
76 |
source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys()))
|
77 |
+
target_lang = gr.components.Dropdown(label="Target Language", value="Zelsik", choices=list(LANG_CODES.keys()))
|
78 |
return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=12, step=1)
|
79 |
|
80 |
inputs=[input_text, source_lang, target_lang, return_seqs]
|
|
|
85 |
|
86 |
gr.Examples(
|
87 |
[
|
88 |
+
["Hello! How are you?", "English", "Zelsik", 3],
|
89 |
+
["toki a! ilo pi ante toki ni li pona!", "Zelsik", "English", 3],
|
90 |
+
["mi li toki e Zelsik", "Zelsik", "Zelsik", 3],
|
91 |
],
|
92 |
inputs=inputs
|
93 |
)
|