Spaces:
Sleeping
Sleeping
Add application file
Browse files- app.py +22 -0
- lid.176.ftz +3 -0
- nllb-200-distilled-600M/config.json +9 -0
- nllb-200-distilled-600M/model.bin +3 -0
- nllb-200-distilled-600M/shared_vocabulary.json +0 -0
- requirments.txt +3 -0
- translator.py +15 -0
app.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import ctranslate2
|
3 |
+
import transformers
|
4 |
+
from translator import translate
|
5 |
+
st.title("On Device Translator")
|
6 |
+
user_input = st.text_area("Enter text to be translated in English here.")
|
7 |
+
languages = {
|
8 |
+
"English": "eng_Latn",
|
9 |
+
"Japanese": "jpn_Jpan",
|
10 |
+
"Hindi": "hin_Deva",
|
11 |
+
"French": "fra_Latn",
|
12 |
+
"Spanish": "spa_Latn",
|
13 |
+
"Chinese": "zho_Hans",
|
14 |
+
"Italian": "ita_Latn",
|
15 |
+
"German": "deu_Latn",
|
16 |
+
"Russian": "rus_Cyrl"
|
17 |
+
}
|
18 |
+
selected_language = st.selectbox("Select Target Language", list(languages.keys()))
|
19 |
+
if st.button("Translate"):
|
20 |
+
translated_text = translate(user_input, tgt_lang=languages[selected_language])
|
21 |
+
st.write(f"Translated text: {translated_text}")
|
22 |
+
|
lid.176.ftz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f3472cfe8738a7b6099e8e999c3cbfae0dcd15696aac7d7738a8039db603e83
|
3 |
+
size 938013
|
nllb-200-distilled-600M/config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_source_bos": false,
|
3 |
+
"add_source_eos": false,
|
4 |
+
"bos_token": "<s>",
|
5 |
+
"decoder_start_token": "</s>",
|
6 |
+
"eos_token": "</s>",
|
7 |
+
"layer_norm_epsilon": null,
|
8 |
+
"unk_token": "<unk>"
|
9 |
+
}
|
nllb-200-distilled-600M/model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3aef73bb382ccae3b25a2fecd7cfdc0146625e8ecd51c92f585984eaf654bc1
|
3 |
+
size 622596037
|
nllb-200-distilled-600M/shared_vocabulary.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirments.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gooey
|
2 |
+
ctranslate2
|
3 |
+
transformers
|
translator.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ctranslate2
|
2 |
+
import transformers
|
3 |
+
def translate(text,tgt_lang):
|
4 |
+
translator = ctranslate2.Translator("nllb-200-distilled-600M")
|
5 |
+
tokenizer = transformers.AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
|
6 |
+
source = tokenizer.convert_ids_to_tokens(tokenizer.encode(text))
|
7 |
+
target_prefix = [tgt_lang]
|
8 |
+
results = translator.translate_batch([source], target_prefix=[target_prefix])
|
9 |
+
target = results[0].hypotheses[0][1:]
|
10 |
+
final_result = tokenizer.decode(tokenizer.convert_tokens_to_ids(target))
|
11 |
+
return final_result
|
12 |
+
|
13 |
+
#tgt_lang = "eng_Latn"
|
14 |
+
#translated_text = translate("জাপান একটি সুন্দর দেশ। আমি জাপান যেতে আগ্রহী।", tgt_lang)
|
15 |
+
#print(translated_text)
|