Spaces:
Sleeping
Sleeping
Gabriela Nicole Gonzalez Saez
commited on
Commit
•
33fb482
1
Parent(s):
735ec79
index
Browse files- .gitattributes +19 -0
- .gitignore +2 -0
- app.py +15 -11
- index/en-ar_input_tokens.index +3 -0
- index/en-ar_input_words.index +3 -0
- index/en-ar_metadata_ref.pkl +3 -0
- index/en-ar_output_tokens.index +3 -0
- index/en-ar_output_words.index +3 -0
- index/en-es_input_tokens.index +0 -0
- index/en-es_input_words.index +0 -0
- index/en-es_metadata_ref.pkl +2 -2
- index/en-es_output_tokens.index +0 -0
- index/en-es_output_words.index +0 -0
- index/en-fr_input_tokens.index +3 -0
- index/en-fr_input_words.index +3 -0
- index/en-fr_metadata_ref.pkl +3 -0
- index/en-fr_output_tokens.index +3 -0
- index/en-fr_output_words.index +3 -0
- index/en-zh_input_tokens.index +3 -0
- index/en-zh_input_words.index +3 -0
- index/en-zh_metadata_ref.pkl +3 -0
- index/en-zh_output_tokens.index +3 -0
- index/en-zh_output_words.index +3 -0
.gitattributes
CHANGED
@@ -34,3 +34,22 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
index/en-es_metadata_ref.pkl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
index/en-es_metadata_ref.pkl filter=lfs diff=lfs merge=lfs -text
|
37 |
+
index/en-es_output_words.index filter=lfs diff=lfs merge=lfs -text
|
38 |
+
index/en-zh_metadata_ref.pkl filter=lfs diff=lfs merge=lfs -text
|
39 |
+
index/en-zh_output_words.index filter=lfs diff=lfs merge=lfs -text
|
40 |
+
index/en-ar_input_tokens.index filter=lfs diff=lfs merge=lfs -text
|
41 |
+
index/en-ar_output_tokens.index filter=lfs diff=lfs merge=lfs -text
|
42 |
+
index/en-fr_metadata_ref.pkl filter=lfs diff=lfs merge=lfs -text
|
43 |
+
index/en-fr_input_tokens.index filter=lfs diff=lfs merge=lfs -text
|
44 |
+
index/en-fr_input_words.index filter=lfs diff=lfs merge=lfs -text
|
45 |
+
index/en-fr_output_tokens.index filter=lfs diff=lfs merge=lfs -text
|
46 |
+
index/en-zh_input_tokens.index filter=lfs diff=lfs merge=lfs -text
|
47 |
+
index/en-ar_input_words.index filter=lfs diff=lfs merge=lfs -text
|
48 |
+
index/en-ar_metadata_ref.pkl filter=lfs diff=lfs merge=lfs -text
|
49 |
+
index/en-ar_output_words.index filter=lfs diff=lfs merge=lfs -text
|
50 |
+
index/en-fr_output_words.index filter=lfs diff=lfs merge=lfs -text
|
51 |
+
index/en-zh_input_words.index filter=lfs diff=lfs merge=lfs -text
|
52 |
+
index/en-zh_output_tokens.index filter=lfs diff=lfs merge=lfs -text
|
53 |
+
index/en-es_input_tokens.index filter=lfs diff=lfs merge=lfs -text
|
54 |
+
index/en-es_input_words.index filter=lfs diff=lfs merge=lfs -text
|
55 |
+
index/en-es_output_tokens.index filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
local_index/*
|
2 |
+
app_local.py
|
app.py
CHANGED
@@ -23,23 +23,24 @@ from transformers import AutoTokenizer, MarianTokenizer, AutoModel, AutoModelFor
|
|
23 |
model_es = "Helsinki-NLP/opus-mt-en-es"
|
24 |
model_fr = "Helsinki-NLP/opus-mt-en-fr"
|
25 |
model_zh = "Helsinki-NLP/opus-mt-en-zh"
|
26 |
-
|
27 |
|
28 |
tokenizer_es = AutoTokenizer.from_pretrained(model_es)
|
29 |
tokenizer_fr = AutoTokenizer.from_pretrained(model_fr)
|
30 |
tokenizer_zh = AutoTokenizer.from_pretrained(model_zh)
|
31 |
-
|
32 |
|
33 |
model_tr_es = MarianMTModel.from_pretrained(model_es)
|
34 |
model_tr_fr = MarianMTModel.from_pretrained(model_fr)
|
35 |
model_tr_zh = MarianMTModel.from_pretrained(model_zh)
|
36 |
-
|
37 |
|
38 |
from faiss import write_index, read_index
|
39 |
import pickle
|
40 |
|
41 |
def load_index(model):
|
42 |
-
with open('index/'+ model + '_metadata_ref.pkl', 'rb') as f:
|
|
|
43 |
loaded_dict = pickle.load(f)
|
44 |
for type in ['tokens','words']:
|
45 |
for kind in ['input', 'output']:
|
@@ -54,25 +55,28 @@ dict_models = {
|
|
54 |
'en-es': model_es,
|
55 |
'en-fr': model_fr,
|
56 |
'en-zh': model_zh,
|
57 |
-
'en-
|
58 |
}
|
59 |
|
60 |
dict_models_tr = {
|
61 |
'en-es': model_tr_es,
|
62 |
'en-fr': model_tr_fr,
|
63 |
'en-zh': model_tr_zh,
|
64 |
-
'en-
|
65 |
}
|
66 |
|
67 |
dict_tokenizer_tr = {
|
68 |
'en-es': tokenizer_es,
|
69 |
'en-fr': tokenizer_fr,
|
70 |
'en-zh': tokenizer_zh,
|
71 |
-
'en-
|
72 |
}
|
73 |
-
|
74 |
dict_reference_faiss = {
|
75 |
'en-es': load_index('en-es'),
|
|
|
|
|
|
|
76 |
}
|
77 |
|
78 |
# print("dict", dict_reference_faiss['en-es']['input']['tokens'][1])
|
@@ -698,11 +702,11 @@ html_embd = """
|
|
698 |
"""
|
699 |
|
700 |
html_tok_target ="""
|
701 |
-
<div id="d3_tok_target"
|
702 |
"""
|
703 |
|
704 |
html_embd_target= """
|
705 |
-
<div id="d3_embd_target"
|
706 |
<div id="d3_graph_output_words" class="d3_graph words"></div>
|
707 |
<div id="d3_graph_output_tokens" class="d3_graph tokens"></div>
|
708 |
<div id="similar_output_words" class=""></div>
|
@@ -823,7 +827,7 @@ with gr.Blocks(js="plotsjs.js") as demo:
|
|
823 |
"""
|
824 |
1. Select the language pair for the translation
|
825 |
""")
|
826 |
-
radio_c = gr.Radio(choices=['en-zh', 'en-es', 'en-fr', 'en-
|
827 |
gr.Markdown(
|
828 |
"""
|
829 |
2. Source text to translate
|
|
|
23 |
model_es = "Helsinki-NLP/opus-mt-en-es"
|
24 |
model_fr = "Helsinki-NLP/opus-mt-en-fr"
|
25 |
model_zh = "Helsinki-NLP/opus-mt-en-zh"
|
26 |
+
model_ar = "Helsinki-NLP/opus-mt-en-ar"
|
27 |
|
28 |
tokenizer_es = AutoTokenizer.from_pretrained(model_es)
|
29 |
tokenizer_fr = AutoTokenizer.from_pretrained(model_fr)
|
30 |
tokenizer_zh = AutoTokenizer.from_pretrained(model_zh)
|
31 |
+
tokenizer_ar = AutoTokenizer.from_pretrained(model_ar)
|
32 |
|
33 |
model_tr_es = MarianMTModel.from_pretrained(model_es)
|
34 |
model_tr_fr = MarianMTModel.from_pretrained(model_fr)
|
35 |
model_tr_zh = MarianMTModel.from_pretrained(model_zh)
|
36 |
+
model_tr_ar = MarianMTModel.from_pretrained(model_ar)
|
37 |
|
38 |
from faiss import write_index, read_index
|
39 |
import pickle
|
40 |
|
41 |
def load_index(model):
|
42 |
+
# with open('index/'+ model + '_metadata_ref.pkl', 'rb') as f:
|
43 |
+
with open('local_index/'+ model + '_metadata_ref.pkl', 'rb') as f:
|
44 |
loaded_dict = pickle.load(f)
|
45 |
for type in ['tokens','words']:
|
46 |
for kind in ['input', 'output']:
|
|
|
55 |
'en-es': model_es,
|
56 |
'en-fr': model_fr,
|
57 |
'en-zh': model_zh,
|
58 |
+
'en-ar': model_ar,
|
59 |
}
|
60 |
|
61 |
dict_models_tr = {
|
62 |
'en-es': model_tr_es,
|
63 |
'en-fr': model_tr_fr,
|
64 |
'en-zh': model_tr_zh,
|
65 |
+
'en-ar': model_tr_ar,
|
66 |
}
|
67 |
|
68 |
dict_tokenizer_tr = {
|
69 |
'en-es': tokenizer_es,
|
70 |
'en-fr': tokenizer_fr,
|
71 |
'en-zh': tokenizer_zh,
|
72 |
+
'en-ar': tokenizer_ar,
|
73 |
}
|
74 |
+
# dict_reference_faiss = {'en-es':[]}
|
75 |
dict_reference_faiss = {
|
76 |
'en-es': load_index('en-es'),
|
77 |
+
'en-ar': load_index('en-ar'),
|
78 |
+
'en-fr': load_index('en-fr'),
|
79 |
+
'en-zh': load_index('en-zh'),
|
80 |
}
|
81 |
|
82 |
# print("dict", dict_reference_faiss['en-es']['input']['tokens'][1])
|
|
|
702 |
"""
|
703 |
|
704 |
html_tok_target ="""
|
705 |
+
<div id="d3_tok_target"> </div>
|
706 |
"""
|
707 |
|
708 |
html_embd_target= """
|
709 |
+
<div id="d3_embd_target"> </div>
|
710 |
<div id="d3_graph_output_words" class="d3_graph words"></div>
|
711 |
<div id="d3_graph_output_tokens" class="d3_graph tokens"></div>
|
712 |
<div id="similar_output_words" class=""></div>
|
|
|
827 |
"""
|
828 |
1. Select the language pair for the translation
|
829 |
""")
|
830 |
+
radio_c = gr.Radio(choices=['en-zh', 'en-es', 'en-fr', 'en-ar'], value="en-es", label= '', container=False)
|
831 |
gr.Markdown(
|
832 |
"""
|
833 |
2. Source text to translate
|
index/en-ar_input_tokens.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:705af4d667addbc721da76b3099be10fcc437eeea0d30445751e1d7edbd2af19
|
3 |
+
size 754691
|
index/en-ar_input_words.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cac0ee3490002a39df24b27a6a7f570a60333d7f431c2f7aae2c4b1694d64948
|
3 |
+
size 756747
|
index/en-ar_metadata_ref.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37e6a98d587055a03daeceac1959cedd5ad793cf0fec6f1b377b1dcb5caa2c5f
|
3 |
+
size 28805589
|
index/en-ar_output_tokens.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd71fb94d70342945b2abdb658d6b04401c8cb2f98238d40919baecbd8f61781
|
3 |
+
size 1019915
|
index/en-ar_output_words.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:798941013c236a3be852512ddc8ff280969fee61f47336244e03da94d2eb8d52
|
3 |
+
size 892443
|
index/en-es_input_tokens.index
CHANGED
Binary files a/index/en-es_input_tokens.index and b/index/en-es_input_tokens.index differ
|
|
index/en-es_input_words.index
CHANGED
Binary files a/index/en-es_input_words.index and b/index/en-es_input_words.index differ
|
|
index/en-es_metadata_ref.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f766d5a212ded9981ff504628d9ecadb567872e5244297582a3bc4ad0e3b774
|
3 |
+
size 25129757
|
index/en-es_output_tokens.index
CHANGED
Binary files a/index/en-es_output_tokens.index and b/index/en-es_output_tokens.index differ
|
|
index/en-es_output_words.index
CHANGED
Binary files a/index/en-es_output_words.index and b/index/en-es_output_words.index differ
|
|
index/en-fr_input_tokens.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e253777db5a5cc1eb76da856178d841f1a2814e11c7e7d4ac24be817e4983638
|
3 |
+
size 764971
|
index/en-fr_input_words.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:961b08022a7cea12c1dfbb8db0a2812e833980b0fcbd8cdf0455ee70f5ae778d
|
3 |
+
size 756747
|
index/en-fr_metadata_ref.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab1301d87b26b6af5d2cd54ad891ae53b817bfda08a0315f60964ae0bd57e6c5
|
3 |
+
size 27041562
|
index/en-fr_output_tokens.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b205bacecb6e4c8791e7c9820de772501ed974a19d365608646a69fe5cde3ff
|
3 |
+
size 910947
|
index/en-fr_output_words.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c90bf7ac638494a23efed842d3128f113f6ed279413b3362747892d7e9913e77
|
3 |
+
size 873939
|
index/en-zh_input_tokens.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:098a6ed7021dd99b7dbeaf6174373081c73fe9e16375b9de79e56275997c035d
|
3 |
+
size 754691
|
index/en-zh_input_words.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:881ef345b1d0c689be2c98539beeacc0516ff35c571ac2f2eb75a8bb67fb5330
|
3 |
+
size 756747
|
index/en-zh_metadata_ref.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:223617175b67c586d07e10577e3e7168875eec66f7a660d1a3ada40bc05f864c
|
3 |
+
size 22426143
|
index/en-zh_output_tokens.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05a9caebc080364fec8685a5dd4f90bdc6f89a6dd2e39d8e649a0f3969e02639
|
3 |
+
size 880107
|
index/en-zh_output_words.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1293142f7b1639bd7ec7c618d12ca100ddb6ef91d5e20920fa62dacc8f5ee20d
|
3 |
+
size 242747
|