|
import gradio as gr |
|
from time import time |
|
|
|
import torch |
|
import os |
|
|
|
import argparse |
|
import random |
|
import numpy as np |
|
import faiss |
|
from argparse import Namespace |
|
from tqdm.notebook import tqdm |
|
from torch.utils.data import DataLoader |
|
from functools import partial |
|
from sklearn.manifold import TSNE |
|
|
|
from transformers import AutoTokenizer, MarianTokenizer, AutoModel, AutoModelForSeq2SeqLM, MarianMTModel |
|
import os |
|
dir_path = os.path.dirname(os.path.realpath(__file__)) |
|
print(dir_path) |
|
|
|
metadata_all = {} |
|
model_es = "Helsinki-NLP/opus-mt-en-es" |
|
model_fr = "Helsinki-NLP/opus-mt-en-fr" |
|
model_zh = "Helsinki-NLP/opus-mt-en-zh" |
|
|
|
tokenizer_es = AutoTokenizer.from_pretrained(model_es) |
|
tokenizer_fr = AutoTokenizer.from_pretrained(model_fr) |
|
tokenizer_zh = AutoTokenizer.from_pretrained(model_zh) |
|
|
|
model_tr_es = MarianMTModel.from_pretrained(model_es) |
|
model_tr_fr = MarianMTModel.from_pretrained(model_fr) |
|
model_tr_zh = MarianMTModel.from_pretrained(model_zh) |
|
|
|
dict_models = { |
|
'en-es': model_es, |
|
'en-fr': model_fr, |
|
'en-zh': model_zh, |
|
} |
|
|
|
dict_models_tr = { |
|
'en-es': model_tr_es, |
|
'en-fr': model_tr_fr, |
|
'en-zh': model_tr_zh, |
|
} |
|
|
|
dict_tokenizer_tr = { |
|
'en-es': tokenizer_es, |
|
'en-fr': tokenizer_fr, |
|
'en-zh': tokenizer_zh, |
|
} |
|
|
|
from faiss import write_index, read_index |
|
import pickle |
|
|
|
|
|
|
|
def translation_model(w1,model ): |
|
inputs = dict_tokenizer_tr[model](w1, return_tensors="pt") |
|
|
|
input_embeddings = dict_models_tr[model].get_encoder().embed_tokens(inputs.input_ids) |
|
|
|
print(inputs) |
|
num_ret_seq = 1 |
|
translated = dict_models_tr[model].generate(**inputs, |
|
num_beams=5, |
|
num_return_sequences=num_ret_seq, |
|
return_dict_in_generate=True, |
|
output_attentions =False, |
|
output_hidden_states = True, |
|
output_scores=True,) |
|
|
|
tgt_text = dict_tokenizer_tr[model].decode(translated.sequences[0], skip_special_tokens=True) |
|
|
|
target_embeddings = dict_models_tr[model].get_decoder().embed_tokens(translated.sequences) |
|
|
|
return tgt_text, translated, inputs.input_ids, input_embeddings, target_embeddings |
|
|
|
def create_vocab_multiple(embeddings_list, model): |
|
"""_summary_ |
|
|
|
Args: |
|
embeddings_list (list): embedding array |
|
|
|
Returns: |
|
Dict: vocabulary of tokens' embeddings |
|
""" |
|
print("START VOCAB CREATION MULTIPLE \n \n ") |
|
vocab = {} |
|
sentence_tokens_text_list = [] |
|
for embeddings in embeddings_list: |
|
tokens_id = embeddings['tokens'] |
|
for sent_i, sentence in enumerate(tokens_id): |
|
sentence_tokens = [] |
|
for tok_i, token in enumerate(sentence): |
|
sentence_tokens.append(token) |
|
if not (token in vocab): |
|
vocab[token] = { |
|
'token' : token, |
|
'count': 1, |
|
|
|
'text': dict_tokenizer_tr[model].decode([token]), |
|
|
|
'embed': embeddings['embeddings'][sent_i][tok_i]} |
|
else: |
|
vocab[token]['count'] = vocab[token]['count'] + 1 |
|
|
|
sentence_tokens_text_list.append(sentence_tokens) |
|
print("END VOCAB CREATION MULTIPLE \n \n ") |
|
return vocab, sentence_tokens_text_list |
|
|
|
def vocab_words_all_prefix(token_embeddings, model, sufix="@@",prefix = '▁' ): |
|
vocab = {} |
|
|
|
sentence_words_text_list = [] |
|
if prefix : |
|
n_prefix = len(prefix) |
|
for input_sentences in token_embeddings: |
|
|
|
for sent_i, sentence in enumerate(input_sentences['tokens']): |
|
words_text_list = [] |
|
|
|
word = '' |
|
tokens_ids = [] |
|
embeddings = [] |
|
ids_to_tokens = dict_tokenizer_tr[model].convert_ids_to_tokens(sentence) |
|
|
|
|
|
to_save= False |
|
for tok_i, token_text in enumerate(ids_to_tokens): |
|
token_id = sentence[tok_i] |
|
if token_text[:n_prefix] == prefix : |
|
|
|
if to_save: |
|
vocab[word] = { |
|
'word' : word, |
|
'text': word, |
|
'count': 1, |
|
'tokens_ids' : tokens_ids, |
|
'embed': np.mean(np.array(embeddings), 0).tolist() |
|
} |
|
words_text_list.append(word) |
|
|
|
tokens_ids = [token_id] |
|
embeddings = [input_sentences['embeddings'][sent_i][tok_i]] |
|
word = token_text[n_prefix:] |
|
|
|
to_save = True |
|
|
|
else : |
|
if (token_text in dict_tokenizer_tr[model].special_tokens_map.values()): |
|
|
|
if to_save: |
|
|
|
vocab[word] = { |
|
'word' : word, |
|
'text': word, |
|
'count': 1, |
|
'tokens_ids' : tokens_ids, |
|
'embed': np.mean(np.array(embeddings), 0).tolist() |
|
} |
|
words_text_list.append(word) |
|
|
|
|
|
tokens_ids = [token_id] |
|
embeddings = [input_sentences['embeddings'][sent_i][tok_i]] |
|
vocab[token_text] = { |
|
'word' : token_text, |
|
'count': 1, |
|
'text': word, |
|
'tokens_ids' : tokens_ids, |
|
'embed': np.mean(np.array(embeddings), 0).tolist() |
|
} |
|
words_text_list.append(token_text) |
|
to_save = False |
|
else: |
|
|
|
to_save = True |
|
word += token_text |
|
tokens_ids.append(token_id) |
|
embeddings.append(input_sentences['embeddings'][sent_i][tok_i]) |
|
if to_save: |
|
|
|
vocab[word] = tokens_ids |
|
if not (word in vocab): |
|
vocab[word] = { |
|
'word' : word, |
|
'count': 1, |
|
'text': word, |
|
'tokens_ids' : tokens_ids, |
|
'embed': np.mean(np.array(embeddings), 0).tolist() |
|
} |
|
words_text_list.append(word) |
|
else: |
|
vocab[word]['count'] = vocab[word]['count'] + 1 |
|
sentence_words_text_list.append(words_text_list) |
|
|
|
return vocab, sentence_words_text_list |
|
|
|
|
|
|
|
|
|
def create_index_voronoi(vocab): |
|
""" |
|
it returns an index of words and a metadata of ids. |
|
""" |
|
d = 1024 |
|
nb_embds = [] |
|
metadata = {} |
|
i_pos = 0 |
|
for key_token, token_values in vocab.items(): |
|
nb_embds.append(token_values['embed']) |
|
metadata[i_pos] = {'token': token_values['token'], 'text': token_values['text']} |
|
i_pos += 1 |
|
|
|
|
|
|
|
xb = np.array(nb_embds).astype('float32') |
|
|
|
d = len(xb[0]) |
|
|
|
nlist = 5 |
|
quantizer = faiss.IndexFlatL2(d) |
|
index = faiss.IndexIVFFlat(quantizer, d, nlist) |
|
index.train(xb) |
|
index.add(xb) |
|
|
|
|
|
return index, metadata |
|
|
|
def create_index_voronoi_words(vocab): |
|
""" |
|
it returns an index of words and a metadata of ids. |
|
""" |
|
d = 1024 |
|
nb_embds = [] |
|
metadata = {} |
|
i_pos = 0 |
|
for key_token, token_values in vocab.items(): |
|
nb_embds.append(token_values['embed']) |
|
metadata[i_pos] = {'word': token_values['word'], 'tokens': token_values['tokens_ids'],'text': token_values['text']} |
|
i_pos += 1 |
|
|
|
|
|
|
|
xb = np.array(nb_embds).astype('float32') |
|
|
|
d = len(xb[0]) |
|
|
|
nlist = 5 |
|
quantizer = faiss.IndexFlatL2(d) |
|
index = faiss.IndexIVFFlat(quantizer, d, nlist) |
|
index.train(xb) |
|
index.add(xb) |
|
|
|
|
|
return index, metadata |
|
|
|
def search_query_vocab(index, vocab_queries, topk = 10, limited_search = []): |
|
""" the embed queries are a vocabulary of words : embds_input_voc |
|
|
|
Args: |
|
index (_type_): faiss index |
|
embed_queries (_type_): vocab format. |
|
{ 'token' : token, |
|
'count': 1, |
|
'text': src_token_lists[sent_i][tok_i], |
|
'embed': embeddings[0]['embeddings'][sent_i][tok_i] } |
|
nb_ids (_type_): hash to find the token_id w.r.t the faiss index id. |
|
topk (int, optional): nb of similar tokens. Defaults to 10. |
|
|
|
Returns: |
|
_type_: Distance matrix D, indices matrix I and tokens ids (using nb_ids) |
|
""" |
|
|
|
nb_q_embds = [] |
|
metadata = {} |
|
qi_pos = 0 |
|
for key , token_values in vocab_queries.items(): |
|
|
|
metadata[qi_pos] = {'word': token_values['word'], 'tokens': token_values['tokens_ids'], 'text': token_values['text']} |
|
qi_pos += 1 |
|
nb_q_embds.append(token_values['embed']) |
|
|
|
xq = np.array(nb_q_embds).astype('float32') |
|
|
|
D,I = index.search(xq, topk) |
|
|
|
return D,I, metadata |
|
|
|
def search_query_vocab_token(index, vocab_queries, topk = 10, limited_search = []): |
|
""" the embed queries are a vocabulary of words : embds_input_vov |
|
Returns: |
|
_type_: Distance matrix D, indices matrix I and tokens ids (using nb_ids) |
|
""" |
|
|
|
nb_q_embds = [] |
|
metadata = {} |
|
qi_pos = 0 |
|
for key , token_values in vocab_queries.items(): |
|
|
|
metadata[qi_pos] = {'token': token_values['token'], 'text': token_values['text']} |
|
qi_pos += 1 |
|
nb_q_embds.append(token_values['embed']) |
|
|
|
xq = np.array(nb_q_embds).astype('float32') |
|
|
|
D,I = index.search(xq, topk) |
|
|
|
return D,I, metadata |
|
|
|
def build_search(query_embeddings, model,type="input"): |
|
global metadata_all |
|
|
|
|
|
vocab_queries, sentence_tokens_list = create_vocab_multiple(query_embeddings, model) |
|
words_vocab_queries, sentence_words_list = vocab_words_all_prefix(query_embeddings, model, sufix="@@",prefix="▁") |
|
|
|
index_vor_tokens = metadata_all[type]['tokens'][1] |
|
md_tokens = metadata_all[type]['tokens'][2] |
|
D, I, meta = search_query_vocab_token(index_vor_tokens, vocab_queries) |
|
|
|
qi_pos = 0 |
|
similar_tokens = {} |
|
|
|
for dist, ind in zip(D,I): |
|
try: |
|
|
|
similar_tokens[str(meta[qi_pos]['token'])] = { |
|
'token': meta[qi_pos]['token'], |
|
'text': meta[qi_pos]['text'], |
|
|
|
|
|
"similar_topk": [md_tokens[i_index]['token'] for i_index in ind if (i_index != -1) ], |
|
"distance": [dist[i] for (i, i_index) in enumerate(ind) if (i_index != -1)], |
|
} |
|
|
|
except: |
|
print("\n ERROR ", qi_pos, dist, ind) |
|
qi_pos += 1 |
|
|
|
|
|
index_vor_words = metadata_all[type]['words'][1] |
|
md_words = metadata_all[type]['words'][2] |
|
|
|
Dw, Iw, metaw = search_query_vocab(index_vor_words, words_vocab_queries) |
|
|
|
qi_pos = 0 |
|
|
|
similar_words = {} |
|
for dist, ind in zip(Dw,Iw): |
|
try: |
|
|
|
similar_words[str(metaw[qi_pos]['word']) ] = { |
|
'word': metaw[qi_pos]['word'], |
|
'text': metaw[qi_pos]['word'], |
|
"similar_topk": [md_words[i_index]['word'] for i_index in ind if (i_index != -1) ], |
|
"distance": [dist[i] for (i, i_index) in enumerate(ind) if (i_index != -1)], |
|
} |
|
|
|
except: |
|
print("\n ERROR ", qi_pos, dist, ind) |
|
qi_pos += 1 |
|
|
|
|
|
return {'tokens': {'D': D, 'I': I, 'meta': meta, 'vocab_queries': vocab_queries, 'similar':similar_tokens, 'sentence_key_list': sentence_tokens_list}, |
|
'words': {'D':Dw,'I': Iw, 'meta': metaw, 'vocab_queries':words_vocab_queries, 'sentence_key_list': sentence_words_list, 'similar': similar_words} |
|
} |
|
|
|
def build_reference(all_embeddings, model): |
|
|
|
|
|
vocab, sentence_tokens = create_vocab_multiple(all_embeddings,model) |
|
words_vocab, sentences = vocab_words_all_prefix(all_embeddings, model, sufix="@@",prefix="▁") |
|
|
|
index_tokens, meta_tokens = create_index_voronoi(vocab) |
|
index_words, meta_words = create_index_voronoi_words(words_vocab) |
|
|
|
|
|
|
|
return {'tokens': [vocab, index_tokens, meta_tokens], |
|
'words': [words_vocab, index_words, meta_words] |
|
} |
|
|
|
|
|
def embds_input_projection_vocab(vocab, key="token"): |
|
t0 = time() |
|
|
|
nb_ids = [] |
|
nb_embds = [] |
|
nb_text = [] |
|
tnse_error = [] |
|
for _ , token_values in vocab.items(): |
|
tnse_error.append([0,0]) |
|
nb_ids.append(token_values[key]) |
|
nb_text.append(token_values['text']) |
|
nb_embds.append(token_values['embed']) |
|
|
|
X = np.array(nb_embds).astype('float32') |
|
try: |
|
tsne = TSNE(random_state=0, n_iter=1000) |
|
tsne_results = tsne.fit_transform(X) |
|
|
|
tsne_results = np.c_[tsne_results, nb_ids, nb_text, range(len(nb_ids))] |
|
except: |
|
tsne_results = np.c_[tnse_error, nb_ids, nb_text, range(len(nb_ids))] |
|
|
|
t1 = time() |
|
print("t-SNE: %.2g sec" % (t1 - t0)) |
|
print(tsne_results) |
|
|
|
return tsne_results.tolist() |
|
|
|
def filtered_projection(similar_key, vocab, type="input", key="word"): |
|
global metadata_all |
|
vocab_proj = vocab.copy() |
|
|
|
source_words_voc_similar = set() |
|
|
|
for key_i in similar_key: |
|
words_set = similar_key[key_i] |
|
source_words_voc_similar.update(words_set['similar_topk']) |
|
|
|
print(len(source_words_voc_similar)) |
|
|
|
source_embeddings_filtered = {key_value: metadata_all[type][key][0][key_value] for key_value in source_words_voc_similar} |
|
vocab_proj.update(source_embeddings_filtered) |
|
|
|
try: |
|
result_TSNE = embds_input_projection_vocab(vocab_proj, key=key[:-1]) |
|
dict_projected_embds_all = {str(embds[2]): [embds[0], embds[1], embds[2], embds[3], embds[4]] for embds in result_TSNE} |
|
except: |
|
print('TSNE error', type, key) |
|
dict_projected_embds_all = {} |
|
|
|
|
|
|
|
|
|
return dict_projected_embds_all |
|
|
|
def first_function(w1, model): |
|
global metadata_all |
|
|
|
|
|
sentences = w1.split("\n") |
|
all_sentences = [] |
|
translated_text = '' |
|
input_embeddings = [] |
|
output_embeddings = [] |
|
for sentence in sentences : |
|
|
|
params = translation_model(sentence, model) |
|
all_sentences.append(params) |
|
|
|
translated_text += params[0] + ' \n' |
|
input_embeddings.append({ |
|
'embeddings': params[3].detach(), |
|
'tokens': params[2].tolist(), |
|
|
|
|
|
}) |
|
output_embeddings.append({ |
|
'embeddings' : params[4].detach(), |
|
'tokens': params[1].sequences.tolist(), |
|
|
|
}) |
|
|
|
|
|
|
|
|
|
|
|
result_input = build_reference(input_embeddings,model) |
|
result_output = build_reference(output_embeddings,model) |
|
|
|
|
|
metadata_all = {'input': result_input, 'output': result_output} |
|
|
|
|
|
|
|
return [translated_text, params] |
|
|
|
def first_function_tr(w1, model, var2={}): |
|
global metadata_all |
|
|
|
print("SEARCH -- ") |
|
sentences = w1.split("\n") |
|
all_sentences = [] |
|
translated_text = '' |
|
input_embeddings = [] |
|
output_embeddings = [] |
|
for sentence in sentences : |
|
|
|
params = translation_model(sentence, model) |
|
all_sentences.append(params) |
|
|
|
translated_text += params[0] + ' \n' |
|
input_embeddings.append({ |
|
'embeddings': params[3].detach(), |
|
'tokens': params[2].tolist(), |
|
|
|
}) |
|
output_embeddings.append({ |
|
'embeddings' : params[4].detach(), |
|
'tokens': params[1].sequences.tolist(), |
|
|
|
}) |
|
|
|
|
|
|
|
result_search = {} |
|
result_search['input'] = build_search(input_embeddings, model, type='input') |
|
result_search['output'] = build_search(output_embeddings, model, type='output') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
json_out = {'input': {'tokens': {}, 'words': {}}, 'output': {'tokens': {}, 'words': {}}} |
|
dict_projected = {} |
|
for type in ['input', 'output']: |
|
dict_projected[type] = {} |
|
for key in ['tokens', 'words']: |
|
similar_key = result_search[type][key]['similar'] |
|
vocab = result_search[type][key]['vocab_queries'] |
|
dict_projected[type][key] = filtered_projection(similar_key, vocab, type=type, key=key) |
|
json_out[type][key]['similar_queries'] = similar_key |
|
json_out[type][key]['tnse'] = dict_projected[type][key] |
|
json_out[type][key]['key_text_list'] = result_search[type][key]['sentence_key_list'] |
|
|
|
return [translated_text, [ json_out, json_out['output']['words'], json_out['output']['tokens']] ] |
|
|
|
|
|
from pathlib import Path |
|
|
|
html = """ |
|
<html> |
|
<script async src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js"></script> |
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.0/jquery.min"></script> |
|
<script async data-require="d3@3.5.3" data-semver="3.5.3" |
|
src="//cdnjs.cloudflare.com/ajax/libs/d3/3.5.3/d3.js"></script> |
|
<body> |
|
<div id="select_div"> |
|
<select id="select_type" class="form-select" aria-label="select example" hidden> |
|
<option selected value="words">Words</option> |
|
<option value="tokens">Tokens</option> |
|
</select> |
|
</div> |
|
<div id="d3_embed_div"> |
|
<div class="row"> |
|
<div class="col-6"> |
|
<div id="d3_embeds_input_words" class="d3_embed words"></div> |
|
</div> |
|
<div class="col-6"> |
|
<div id="d3_embeds_output_words" class="d3_embed words"></div> |
|
|
|
</div> |
|
<div class="col-6"> |
|
<div id="d3_embeds_input_tokens" class="d3_embed tokens"></div> |
|
</div> |
|
<div class="col-6"> |
|
<div id="d3_embeds_output_tokens" class="d3_embed tokens"></div> |
|
</div> |
|
</div> |
|
</div> |
|
<div id="d3_graph_div"> |
|
<div class="row"> |
|
<div class="col-4"> |
|
<div id="d3_graph_input_words" class="d3_graph words"></div> |
|
|
|
</div> |
|
<div class="col-4"> |
|
<div id="similar_input_words" class=""></div> |
|
</div> |
|
<div class="col-4"> |
|
<div id="d3_graph_output_words" class="d3_graph words"></div> |
|
<div id="similar_output_words" class="d3_graph words"></div> |
|
</div> |
|
</div> |
|
<div class="row"> |
|
<div class="col-6"> |
|
<div id="d3_graph_input_tokens" class="d3_graph tokens"></div> |
|
<div id="similar_input_tokens" class="d3_graph tokens"></div> |
|
</div> |
|
<div class="col-6"> |
|
<div id="d3_graph_output_tokens" class="d3_graph tokens"></div> |
|
<div id="similar_output_tokens" class="d3_graph tokens"></div> |
|
</div> |
|
</div> |
|
</div> |
|
</body> |
|
|
|
</html> |
|
""" |
|
html0 = """ |
|
<html> |
|
<script async src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js"></script> |
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.0/jquery.min"></script> |
|
<script async data-require="d3@3.5.3" data-semver="3.5.3" |
|
src="//cdnjs.cloudflare.com/ajax/libs/d3/3.5.3/d3.js"></script> |
|
<body> |
|
<div id="select_div"> |
|
<select id="select_type" class="form-select" aria-label="select example" hidden> |
|
<option selected value="words">Words</option> |
|
<option value="tokens">Tokens</option> |
|
</select> |
|
</div> |
|
</body> |
|
|
|
</html> |
|
""" |
|
|
|
html_col1 = """ |
|
<div id="d3_graph_input_words" class="d3_graph words"></div> |
|
<div id="d3_graph_input_tokens" class="d3_graph tokens"></div> |
|
""" |
|
|
|
html_col2 = """ |
|
<div id="similar_input_words" class=""></div> |
|
<div id="similar_output_words" class=""></div> |
|
<div id="similar_input_tokens" class=" "></div> |
|
<div id="similar_output_tokens" class=" "></div> |
|
|
|
""" |
|
|
|
|
|
html_col3 = """ |
|
<div id="d3_graph_output_words" class="d3_graph words"></div> |
|
<div id="d3_graph_output_tokens" class="d3_graph tokens"></div> |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
def second_function(w1,j2): |
|
|
|
|
|
print("second_function -- after the js", w1,j2) |
|
return "transition to second js function finished." |
|
|
|
paths = [] |
|
def save_index(model) : |
|
names = [] |
|
with open(model + '_metadata_ref.pkl', 'wb') as f: |
|
pickle.dump(metadata_all, f) |
|
names.append(model + '_metadata_ref.pkl') |
|
for type in ['tokens','words']: |
|
for kind in ['input', 'output']: |
|
|
|
name = model + "_" + kind + "_"+ type + ".index" |
|
write_index(metadata_all[kind][type][1], name) |
|
names.append(name) |
|
print("in save index done") |
|
return gr.File(names) |
|
|
|
|
|
with gr.Blocks(js="plotsjs.js") as demo: |
|
gr.Markdown( |
|
""" |
|
# MAKE NMT Workshop \t `Embeddings representation` |
|
""") |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
model_radio_c = gr.Radio(choices=['en-es', 'en-zh', 'en-fr'], value="en-es", label= '', container=False) |
|
|
|
with gr.Column(scale=2): |
|
gr.Markdown( |
|
""" |
|
### Reference Translation Sentences |
|
Enter at least 50 sentences to be used as comparison. |
|
This is submitted just once. |
|
""") |
|
in_text = gr.Textbox(lines=2, label="reference source text") |
|
out_text = gr.Textbox(label="reference target text", interactive=False) |
|
out_text2 = gr.Textbox(visible=False) |
|
var2 = gr.JSON(visible=False) |
|
btn = gr.Button("Reference Translation") |
|
|
|
|
|
|
|
save_index_btn = gr.Button("Generate index files to download ",) |
|
tab2_outputs = gr.File() |
|
input = tab2_outputs |
|
|
|
|
|
|
|
|
|
with gr.Column(scale=3): |
|
|
|
gr.Markdown( |
|
""" |
|
### Translation Sentences |
|
Sentences to be analysed. |
|
""") |
|
in_text_tr = gr.Textbox(lines=2, label="source text") |
|
out_text_tr = gr.Textbox(label="target text", interactive=False) |
|
out_text2_tr = gr.Textbox(visible=False) |
|
var2_tr = gr.JSON(visible=False) |
|
btn_faiss= gr.Button("Translation ") |
|
gr.Button("Download", link="/file=en-es_input_tokens.index") |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(scale=1): |
|
input_mic = gr.HTML(html0) |
|
input_html2 = gr.HTML(html_col2) |
|
|
|
with gr.Column(scale=2): |
|
input_html1 = gr.HTML(html_col1) |
|
|
|
|
|
with gr.Column(scale=2): |
|
input_html3 = gr.HTML(html_col3) |
|
|
|
|
|
btn.click(first_function, [in_text, model_radio_c], [out_text,var2], js="(in_text,model_radio_c) => testFn_out(in_text,model_radio_c)") |
|
btn_faiss.click(first_function_tr, [in_text_tr, model_radio_c], [out_text_tr,var2_tr], js="(in_text_tr,model_radio_c) => testFn_out(in_text_tr,model_radio_c)") |
|
|
|
|
|
out_text.change(second_function, [out_text, var2], out_text2, js="(out_text,var2) => testFn_out_json(var2)") |
|
out_text_tr.change(second_function, [out_text_tr, var2_tr], out_text2_tr, js="(out_text_tr,var2_tr) => testFn_out_json_tr(var2_tr)") |
|
save_index_btn.click(save_index, [model_radio_c], [tab2_outputs]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch(allowed_paths=["./", ".", "/"]) |