emon1521 commited on
Commit
7f3ad0d
1 Parent(s): 0d399d8

Add application file

Browse files
app.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import ctranslate2
3
+ import transformers
4
+ from translator import translate
5
+ st.title("On Device Translator")
6
+ user_input = st.text_area("Enter text to be translated in English here.")
7
+ languages = {
8
+ "English": "eng_Latn",
9
+ "Japanese": "jpn_Jpan",
10
+ "Hindi": "hin_Deva",
11
+ "French": "fra_Latn",
12
+ "Spanish": "spa_Latn",
13
+ "Chinese": "zho_Hans",
14
+ "Italian": "ita_Latn",
15
+ "German": "deu_Latn",
16
+ "Russian": "rus_Cyrl"
17
+ }
18
+ selected_language = st.selectbox("Select Target Language", list(languages.keys()))
19
+ if st.button("Translate"):
20
+ translated_text = translate(user_input, tgt_lang=languages[selected_language])
21
+ st.write(f"Translated text: {translated_text}")
22
+
lid.176.ftz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f3472cfe8738a7b6099e8e999c3cbfae0dcd15696aac7d7738a8039db603e83
3
+ size 938013
nllb-200-distilled-600M/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": false,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "</s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "unk_token": "<unk>"
9
+ }
nllb-200-distilled-600M/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3aef73bb382ccae3b25a2fecd7cfdc0146625e8ecd51c92f585984eaf654bc1
3
+ size 622596037
nllb-200-distilled-600M/shared_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
requirments.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gooey
2
+ ctranslate2
3
+ transformers
translator.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ctranslate2
2
+ import transformers
3
+ def translate(text,tgt_lang):
4
+ translator = ctranslate2.Translator("nllb-200-distilled-600M")
5
+ tokenizer = transformers.AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
6
+ source = tokenizer.convert_ids_to_tokens(tokenizer.encode(text))
7
+ target_prefix = [tgt_lang]
8
+ results = translator.translate_batch([source], target_prefix=[target_prefix])
9
+ target = results[0].hypotheses[0][1:]
10
+ final_result = tokenizer.decode(tokenizer.convert_tokens_to_ids(target))
11
+ return final_result
12
+
13
+ #tgt_lang = "eng_Latn"
14
+ #translated_text = translate("জাপান একটি সুন্দর দেশ। আমি জাপান যেতে আগ্রহী।", tgt_lang)
15
+ #print(translated_text)