santhosh commited on
Commit
3cdca2d
1 Parent(s): 37b3cc8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ctranslate2
2
+ import gradio as gr
3
+ from huggingface_hub import snapshot_download
4
+ from sentencepiece import SentencePieceProcessor
5
+
6
+ title = "Mesolitica t5-base-standard-bahasa Translation Demo"
7
+ description = """
8
+ <p>
9
+ Translator using <a href='https://huggingface.co/spaces/mesolitica/malaysian-translation/' target='_blank'>Mesolitica Malaysian Translation model</a>. This demo application uses
10
+ CTranslate2 optimized version of it: <a href="https://huggingface.co/santhosh/mesolitica-t5-base-standard-bahasa-cased-ct2">santhosh/mesolitica-t5-base-standard-bahasa-cased-ct2</a>,
11
+ </p>
12
+ """
13
+
14
+
15
+ model_name = "santhosh/mesolitica-t5-base-standard-bahasa-cased-ct2"
16
+ model_path = snapshot_download(model_name)
17
+
18
+ tokenizer = SentencePieceProcessor()
19
+ tokenizer.load(f"{model_path}/sentencepiece.model")
20
+ translator = ctranslate2.Translator(model_path)
21
+
22
+ map_lang = {"en": "Inggeris", "jv": "Jawa", "bjn": "Banjarese", "ms": "Melayu", "id": "Indonesia"}
23
+
24
+
25
+ def translate(input_text, target_language):
26
+ input_tokens = tokenizer.encode(
27
+ f"f'terjemah ke {map_lang[target_language]}: {input_text}", out_type=str
28
+ )
29
+ results = translator.translate_batch(
30
+ [input_tokens],
31
+ batch_type="tokens",
32
+ max_input_length=6144,
33
+ max_decoding_length=6144,
34
+ max_batch_size=1024,
35
+ beam_size=1,
36
+ )
37
+ translated_sentence = tokenizer.decode(results[0].hypotheses[0])
38
+ return translated_sentence
39
+
40
+
41
+ def translate_interface(input_text, target_language):
42
+ translated_text = translate(input_text, target_language)
43
+ return translated_text
44
+
45
+
46
+ input_text = gr.Textbox(
47
+ label="Input Text",
48
+ value="Imagine a world in which every single person on the planet is given free access to the sum of all human knowledge.",
49
+ )
50
+ languages = [
51
+ ("English", "en"),
52
+ ("Bahasa Melayu", "ms"),
53
+ ("Indonesian", "id"),
54
+ ("Banjarese", "bjn"),
55
+ ("Jawa", "jv"),
56
+ ]
57
+ target_language = gr.Dropdown(languages, value="en", label="Target Language")
58
+ output_text = gr.Textbox(label="Translated Text")
59
+
60
+ gr.Interface(
61
+ title=title,
62
+ description=description,
63
+ fn=translate_interface,
64
+ inputs=[input_text, target_language],
65
+ outputs=output_text,
66
+ ).launch()