Spaces:

Jayyydyyy
/

english-tokipona-translator

Running

App Files Files Community

Jordan Myers commited on Dec 29, 2022

Commit

0101c12

•

1 Parent(s): 5d5e348

more updates

Browse files

Files changed (2) hide show

.gitignore +2 -0
app.py +34 -73

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .venv
2	+ __pycache__

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 import torch
-# this model was loaded from https://hf.co/models
 model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona")
 tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -38,62 +37,11 @@ def translate(text, src_lang, tgt_lang, candidates:int):
     outs = model.generate(**{**ins, **gen_args})
     output = tokenizer.batch_decode(outs.sequences, skip_special_tokens=True)
-    return output
-# app = gr.Interface(
-#     fn=translate,
-#     inputs=[
-#         gr.components.Textbox(label="Text"),
-#         gr.components.Dropdown(label="Source Language", choices=list(LANG_CODES.keys())),
-#         gr.components.Dropdown(label="Target Language", choices=list(LANG_CODES.keys())),
-#         gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=12, step=1)
-#     ],
-#     outputs=["text"],
-#     examples=[
-#         ["Welcome to my translation app.", "English", "toki pona", 3],
-#         ["Its not always perfect, but its pretty okay!", "English", "toki pona", 3],
-#         ["ilo pi ante toki ni li pona a!", "toki pona", "English", 3],
-#         ["kijetesantakalu li pona", "toki pona", "English", 3],
-#         ["mi li toki e toki pona", "toki pona", "toki pona", 3]
-#     ],
-#     cache_examples=False,
-#     article="""
-#     # A simple English / toki pona Neural Machine Translation App!
-#     ### toki a! 💬
-#     This is a simple english to toki pona / toki pona to english neural machine translation app.
-#     Input your text to translate, a source language and target language, and desired number of return sequences!
-#     ### Grammaticality / Regularization
-#     English -> English and/or toki pona -> toki pona will result in some form of regularization.
-#     This can approximate grammaticality, but it isn't always the best.
-#     For example, "mi li toki e toki pona" [src: toki pona, tgt: toki pona] will result in ['mi toki e toki pona.', 'mi toki pona.', 'mi toki e toki pona']
-#     (Thus, the ungrammatical "li" is dropped)
-#     ### Model and Data
-#     This app utilizes a fine-tuned version of Facebook/Meta AI's M2M100 418M param model.
-#     By leveraging the pretrained weights of the massively multilingual M2M100 model,
-#     we can jumpstart our transfer learning to accomplish machine translation for toki pona!
-#     The model was fine-tuned on the English/toki pona bitexts found at https://tatoeba.org/
-#     ### This app is a work in progress and obviously not all translations will be perfect.
-#     In addition to parameter quantity and the hyper-parameters used while training,
-#     the *quality of data* found on Tatoeba directly influences the perfomance of projects like this!
-#     If you wish to contribute, please simply add high quality and diverse translations to Tatoeba!
-#     """,
-#     title="English / toki pona Translation"
-# )
 with gr.Blocks() as app:
-    gr.Markdown("""
-    # A simple English / toki pona Neural Machine Translation App!
     ### toki a! 💬
@@ -101,13 +49,15 @@ with gr.Blocks() as app:
     Input your text to translate, a source language and target language, and desired number of return sequences!
-    ### Grammaticality / Regularization
-    English -> English and/or toki pona -> toki pona will result in some form of regularization.
-    This can approximate grammaticality, but it isn't always the best.
-    For example, "mi li toki e toki pona" [src: toki pona, tgt: toki pona] will result in ['mi toki e toki pona.', 'mi toki pona.', 'mi toki e toki pona']
-    (Thus, the ungrammatical "li" is dropped)
     ### Model and Data
     This app utilizes a fine-tuned version of Facebook/Meta AI's M2M100 418M param model.
@@ -121,19 +71,30 @@ with gr.Blocks() as app:
     In addition to parameter quantity and the hyper-parameters used while training,
     the *quality of data* found on Tatoeba directly influences the perfomance of projects like this!
-    If you wish to contribute, please simply add high quality and diverse translations to Tatoeba!
     """
-    )
-    inputs=[
-        gr.components.Textbox(label="Text"),
-        gr.components.Dropdown(label="Source Language", choices=list(LANG_CODES.keys())),
-        gr.components.Dropdown(label="Target Language", choices=list(LANG_CODES.keys())),
-        gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=12, step=1)
-    ]
-    outputs = gr.Textbox()
-    translate_btn = gr.Button("Translate! | o ante toki!")
-    translate_btn.click(translate, inputs=inputs, outputs=outputs)
 app.launch()

 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 import torch
 model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona")
 tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
     outs = model.generate(**{**ins, **gen_args})
     output = tokenizer.batch_decode(outs.sequences, skip_special_tokens=True)
+    return '\n'.join(output)
 with gr.Blocks() as app:
+    markdown="""
+    # A Simple English / toki pona Neural Machine Translation App!
     ### toki a! 💬
     Input your text to translate, a source language and target language, and desired number of return sequences!
+    ### Grammar Regularization
+    An interesting quirk of training a many-to-many translation model is that pseudo-grammar correction
+    can be achieved by translating *from* **language A** *to* **language A**
+    Remember, this can ***approximate*** grammaticality, but it isn't always the best.
+    For example, "mi li toki e toki pona" (Source Language: toki pona & Target Language: toki pona) will result in:
+    - ['mi toki e toki pona.', 'mi toki pona.', 'mi toki e toki pona']
+    - (Thus, the ungrammatical "li" is dropped)
     ### Model and Data
     This app utilizes a fine-tuned version of Facebook/Meta AI's M2M100 418M param model.
     In addition to parameter quantity and the hyper-parameters used while training,
     the *quality of data* found on Tatoeba directly influences the perfomance of projects like this!
+    If you wish to contribute, please add high quality and diverse translations to Tatoeba!
     """
+    with gr.Row():
+        gr.Markdown(markdown)
+        with gr.Column():
+            input_text = gr.components.Textbox(label="Input Text", value="Raccoons are fascinating creatures, but I prefer opossums.")
+            source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys()))
+            target_lang = gr.components.Dropdown(label="Target Language", value="toki pona", choices=list(LANG_CODES.keys()))
+            return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=12, step=1)
+            inputs=[input_text, source_lang, target_lang, return_seqs]
+            outputs = gr.Textbox()
+            translate_btn = gr.Button("Translate! | o ante toki!")
+            translate_btn.click(translate, inputs=inputs, outputs=outputs)
+            gr.Examples(
+                [
+                    ["Hello! How are you?", "English", "toki pona", 3],
+                    ["toki a! ilo pi ante toki ni li pona!", "toki pona", "English",  3],
+                    ["mi toki e toki pona", "toki pona", "toki pona", 3],
+                ],
+                inputs=inputs
+            )
 app.launch()