Gabriela Nicole Gonzalez Saez commited on
Commit
1c44501
2 Parent(s): 24a8fba 37413a8

Merge branch 'main' of https://huggingface.co/spaces/gabrielanicole/literacy

Browse files
Files changed (3) hide show
  1. app.py +29 -21
  2. pre-requirements.txt +1 -0
  3. requirements.txt +4 -4
app.py CHANGED
@@ -71,9 +71,11 @@ dict_tokenizer_tr = {
71
  'en-sw': tokenizer_sw,
72
  }
73
 
74
- dict_reference_faiss = {
75
- 'en-es': load_index('en-es'),
76
- }
 
 
77
 
78
  saliency_examples = [
79
  "Peace of Mind: Protection for consumers.",
@@ -753,7 +755,7 @@ def first_function(w1, model):
753
  # 'texts' : dict_tokenizer_tr[model].decode(params[1].sequences.tolist())
754
  })
755
 
756
- ## load_reference;
757
  ## Build FAISS index
758
  # ---> preload faiss using the respective model with a initial dataset.
759
  ## dict_reference_faiss[model] = metadata_all [per language]
@@ -763,22 +765,25 @@ def first_function(w1, model):
763
 
764
  ## Build FAISS index
765
  # ---> preload faiss using the respective model with a initial dataset.
766
- result_search = {}
767
- result_search['input'] = build_search(input_embeddings, model, type='input')
768
- result_search['output'] = build_search(output_embeddings, model, type='output')
769
-
770
- json_out = {'input': {'tokens': {}, 'words': {}}, 'output': {'tokens': {}, 'words': {}}}
771
- dict_projected = {}
772
- for type in ['input', 'output']:
773
- dict_projected[type] = {}
774
- for key in ['tokens', 'words']:
775
- similar_key = result_search[type][key]['similar']
776
- vocab = result_search[type][key]['vocab_queries']
777
- dict_projected[type][key] = filtered_projection(similar_key, vocab, model, type=type, key=key)
778
- json_out[type][key]['similar_queries'] = similar_key
779
- json_out[type][key]['tnse'] = dict_projected[type][key]
780
- json_out[type][key]['key_text_list'] = result_search[type][key]['sentence_key_list']
781
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
782
  ## bertviz
783
  # paramsbv, tgtbv = get_bertvis_data(w1, model)
784
 
@@ -787,8 +792,11 @@ def first_function(w1, model):
787
  html_att_dec = params[3][1]
788
  html_att_cross = params[4][1]
789
 
790
-
791
- params = [params[0], params[1], json_out, params[2][0], params[3][0], params[4][0]]
 
 
 
792
  # params.append([tgt, params['params'], params['html2'].data]
793
 
794
  return [translated_text, params, html_att_enc, html_att_dec, html_att_cross]
 
71
  'en-sw': tokenizer_sw,
72
  }
73
 
74
+ # dict_reference_faiss = {
75
+ # 'en-es': load_index('en-es'),
76
+ # }
77
+
78
+ # print("dict", dict_reference_faiss['en-es']['input']['tokens'][1])
79
 
80
  saliency_examples = [
81
  "Peace of Mind: Protection for consumers.",
 
755
  # 'texts' : dict_tokenizer_tr[model].decode(params[1].sequences.tolist())
756
  })
757
 
758
+ ## load_reference; ERROR
759
  ## Build FAISS index
760
  # ---> preload faiss using the respective model with a initial dataset.
761
  ## dict_reference_faiss[model] = metadata_all [per language]
 
765
 
766
  ## Build FAISS index
767
  # ---> preload faiss using the respective model with a initial dataset.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
768
 
769
+ ### to uncomment gg1 ###
770
+ # result_search = {}
771
+ # result_search['input'] = build_search(input_embeddings, model, type='input')
772
+ # result_search['output'] = build_search(output_embeddings, model, type='output')
773
+
774
+ # json_out = {'input': {'tokens': {}, 'words': {}}, 'output': {'tokens': {}, 'words': {}}}
775
+ # dict_projected = {}
776
+ # for type in ['input', 'output']:
777
+ # dict_projected[type] = {}
778
+ # for key in ['tokens', 'words']:
779
+ # similar_key = result_search[type][key]['similar']
780
+ # vocab = result_search[type][key]['vocab_queries']
781
+ # dict_projected[type][key] = filtered_projection(similar_key, vocab, model, type=type, key=key)
782
+ # json_out[type][key]['similar_queries'] = similar_key
783
+ # json_out[type][key]['tnse'] = dict_projected[type][key]
784
+ # json_out[type][key]['key_text_list'] = result_search[type][key]['sentence_key_list']
785
+ ### to uncomment gg1 ###
786
+
787
  ## bertviz
788
  # paramsbv, tgtbv = get_bertvis_data(w1, model)
789
 
 
792
  html_att_dec = params[3][1]
793
  html_att_cross = params[4][1]
794
 
795
+ ### to uncomment gg1 ###
796
+ # params = [params[0], params[1], json_out, params[2][0], params[3][0], params[4][0]]
797
+ ### to uncomment gg1 ###
798
+
799
+ params = [params[0], params[1], [], params[2][0], params[3][0], params[4][0]]
800
  # params.append([tgt, params['params'], params['html2'].data]
801
 
802
  return [translated_text, params, html_att_enc, html_att_dec, html_att_cross]
pre-requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ faiss-cpu==1.8.0
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- bertviz
2
- jupyter
3
- scikit-learn
4
- faiss-cpu
 
1
+ bertviz
2
+ jupyter
3
+ scikit-learn
4
+ faiss-cpu==1.8.0.post1