lebakasable commited on
Commit
9380c36
Β·
1 Parent(s): c8b60bd

feat: update

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. LICENSE +21 -0
  3. README.md +9 -12
  4. app.py +79 -0
  5. requirements.txt +4 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __pycache__
2
+ venv
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Le Baka Sable
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,13 +1,10 @@
1
- ---
2
- title: Ante Toki
3
- emoji: πŸ“ˆ
4
- colorFrom: pink
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 4.7.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
1
+ # ante toki
 
 
 
 
 
 
 
 
 
 
2
 
3
+ An English to Toki Pona / Toki Pona to English translator.
4
+
5
+ ## Quick Start
6
+
7
+ ```console
8
+ $ pip install -r requirements.txt
9
+ $ python app.py
10
+ ```
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import torch
4
+
5
+ model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona")
6
+ tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
7
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
8
+ LANG_CODES = {"English": "en", "toki pona": "tl"}
9
+
10
+
11
+ def translate(text, src_lang, tgt_lang, candidates: int):
12
+ src = LANG_CODES.get(src_lang)
13
+ tgt = LANG_CODES.get(tgt_lang)
14
+
15
+ tokenizer.src_lang = src
16
+ tokenizer.tgt_lang = tgt
17
+
18
+ ins = tokenizer(text, return_tensors="pt").to(device)
19
+
20
+ gen_args = {
21
+ "return_dict_in_generate": True,
22
+ "output_scores": True,
23
+ "output_hidden_states": True,
24
+ "length_penalty": 0.0,
25
+ "num_return_sequences": candidates,
26
+ "num_beams": candidates,
27
+ "forced_bos_token_id": tokenizer.lang_code_to_id[tgt],
28
+ }
29
+
30
+ outs = model.generate(**{**ins, **gen_args})
31
+ output = tokenizer.batch_decode(outs.sequences, skip_special_tokens=True)
32
+
33
+ return "\n".join(output)
34
+
35
+
36
+ with gr.Blocks(theme="gstaff/xkcd") as app:
37
+ markdown = """
38
+ # ante toki
39
+
40
+ ## toki a! πŸ’¬
41
+
42
+ This is an English to Toki Pona / Toki Pona to English translator.
43
+
44
+ Input your text to translate and desired number of generated sentences !
45
+ """
46
+
47
+ with gr.Row():
48
+ gr.Markdown(markdown)
49
+ with gr.Column():
50
+ input_text = gr.components.Textbox(
51
+ label="Input Text",
52
+ value="Dogs are cute, but I prefer cats.",
53
+ )
54
+ source_lang = gr.components.Dropdown(
55
+ label="Source Language",
56
+ value="English",
57
+ choices=list(LANG_CODES.keys()),
58
+ )
59
+ target_lang = gr.components.Dropdown(
60
+ label="Target Language",
61
+ value="Toki Pona",
62
+ choices=list(LANG_CODES.keys()),
63
+ )
64
+ return_seqs = gr.Slider(
65
+ label="Number of generated sentences",
66
+ value=3,
67
+ minimum=1,
68
+ maximum=12,
69
+ step=1,
70
+ )
71
+
72
+ inputs = [input_text, source_lang, target_lang, return_seqs]
73
+ outputs = gr.Textbox()
74
+
75
+ translate_btn = gr.Button("o ante toki!")
76
+ translate_btn.click(translate, inputs=inputs, outputs=outputs)
77
+
78
+ app.launch()
79
+
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ gradio
4
+ sentencepiece