Tazmin commited on
Commit
f33d258
1 Parent(s): 6547184

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -20
app.py CHANGED
@@ -7,7 +7,7 @@ tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
7
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
8
  LANG_CODES = {
9
  "English":"en",
10
- "toki pona":"tl"
11
  }
12
 
13
  def translate(text, src_lang, tgt_lang, candidates:int):
@@ -41,13 +41,11 @@ def translate(text, src_lang, tgt_lang, candidates:int):
41
 
42
  with gr.Blocks() as app:
43
  markdown="""
44
- # An English / toki pona Neural Machine Translation App!
45
 
46
- ### toki a! 💬
47
 
48
- This is an english to toki pona / toki pona to english neural machine translation app.
49
-
50
- Input your text to translate, a source language and target language, and desired number of return sequences!
51
 
52
  ### Grammar Regularization
53
  An interesting quirk of training a many-to-many translation model is that pseudo-grammar correction
@@ -55,23 +53,20 @@ with gr.Blocks() as app:
55
 
56
  Remember, this can ***approximate*** grammaticality, but it isn't always the best.
57
 
58
- For example, "mi li toki e toki pona" (Source Language: toki pona & Target Language: toki pona) will result in:
59
- - ['mi toki e toki pona.', 'mi toki pona.', 'mi toki e toki pona']
60
- - (Thus, the ungrammatical "li" is dropped)
61
 
62
  ### Model and Data
63
- This app utilizes a fine-tuned version of Facebook/Meta AI's M2M100 418M param model.
64
 
65
- By leveraging the pretrained weights of the massively multilingual M2M100 model,
66
- we can jumpstart our transfer learning to accomplish machine translation for toki pona!
67
 
68
- The model was fine-tuned on the English/toki pona bitexts found at [https://tatoeba.org/](https://tatoeba.org/)
69
 
70
- ### This app is a work in progress and obviously not all translations will be perfect.
71
  In addition to parameter quantity and the hyper-parameters used while training,
72
- the *quality of data* found on Tatoeba directly influences the perfomance of projects like this!
73
 
74
- If you wish to contribute, please add high quality and diverse translations to Tatoeba!
75
  """
76
 
77
  with gr.Row():
@@ -79,7 +74,7 @@ with gr.Blocks() as app:
79
  with gr.Column():
80
  input_text = gr.components.Textbox(label="Input Text", value="Raccoons are fascinating creatures, but I prefer opossums.")
81
  source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys()))
82
- target_lang = gr.components.Dropdown(label="Target Language", value="toki pona", choices=list(LANG_CODES.keys()))
83
  return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=12, step=1)
84
 
85
  inputs=[input_text, source_lang, target_lang, return_seqs]
@@ -90,9 +85,9 @@ with gr.Blocks() as app:
90
 
91
  gr.Examples(
92
  [
93
- ["Hello! How are you?", "English", "toki pona", 3],
94
- ["toki a! ilo pi ante toki ni li pona!", "toki pona", "English", 3],
95
- ["mi li toki e toki pona", "toki pona", "toki pona", 3],
96
  ],
97
  inputs=inputs
98
  )
 
7
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
8
  LANG_CODES = {
9
  "English":"en",
10
+ "Zelsik":"tl"
11
  }
12
 
13
  def translate(text, src_lang, tgt_lang, candidates:int):
 
41
 
42
  with gr.Blocks() as app:
43
  markdown="""
44
+ # An English / Zelsik Neural Machine Translation App!
45
 
46
+ This is an English to Zelsik / Zelsik to English neural machine translation app.
47
 
48
+ Input your text to translate, a source language and target language, and the desired number of return sequences!
 
 
49
 
50
  ### Grammar Regularization
51
  An interesting quirk of training a many-to-many translation model is that pseudo-grammar correction
 
53
 
54
  Remember, this can ***approximate*** grammaticality, but it isn't always the best.
55
 
 
 
 
56
 
57
  ### Model and Data
58
+ This app utilizes a fine-tuned version of Facebook/Meta AI's M2M100 418M param model and the original app was made by Jayyydyyy for Toki Pona.
59
 
60
+ By leveraging the pre-trained weights of the massively multilingual M2M100 model,
61
+ we can jumpstart our transfer learning to accomplish machine translation for Zelsik!
62
 
63
+ The model was fine-tuned on the English/Zelsik bitexts found at [https://tatoeba.org/](https://tatoeba.org/)
64
 
65
+ ### This app is a work in progress; obviously, not all translations will be perfect.
66
  In addition to parameter quantity and the hyper-parameters used while training,
67
+ the *quality of data* found on Tatoeba directly influences the performance of projects like this!
68
 
69
+ im sorry jayyydyyy, im too lazy and dumb to change any of the descriptions
70
  """
71
 
72
  with gr.Row():
 
74
  with gr.Column():
75
  input_text = gr.components.Textbox(label="Input Text", value="Raccoons are fascinating creatures, but I prefer opossums.")
76
  source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys()))
77
+ target_lang = gr.components.Dropdown(label="Target Language", value="Zelsik", choices=list(LANG_CODES.keys()))
78
  return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=12, step=1)
79
 
80
  inputs=[input_text, source_lang, target_lang, return_seqs]
 
85
 
86
  gr.Examples(
87
  [
88
+ ["Hello! How are you?", "English", "Zelsik", 3],
89
+ ["toki a! ilo pi ante toki ni li pona!", "Zelsik", "English", 3],
90
+ ["mi li toki e Zelsik", "Zelsik", "Zelsik", 3],
91
  ],
92
  inputs=inputs
93
  )