Walid Aissa commited on
Commit
77eba15
·
1 Parent(s): 288a5de
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -78,16 +78,21 @@ def answer_question(question):
78
 
79
  # ======== Tokenize ========
80
  # Apply the tokenizer to the input text, treating them as a text-pair.
 
 
81
  input_ids = tokenizer.encode(question, context)
 
82
 
83
  # Report how long the input sequence is. if longer than 512 tokens divide it multiple sequences
84
 
85
- print(f"Query has {len(input_ids)} tokens, divided in {len(input_ids)//513 + 1}.\n")
 
 
86
 
87
  input_ids_split = []
88
- for group in range(len(input_ids)//513):
89
- input_ids_split.append(input_ids[512*group:512*(group+1)-1])
90
- input_ids_split.append(input_ids[512*(len(input_ids)//513):len(input_ids)-1])
91
 
92
  scores = []
93
  for input in input_ids_split:
@@ -154,6 +159,8 @@ def answer_question(question):
154
 
155
  final_answer = max(scores, key=lambda x: x[0] + x[1])[2]
156
 
 
 
157
  # =====[ DEFINE INTERFACE ]===== #'
158
  title = "Azza Conversational Agent"
159
  examples = [
 
78
 
79
  # ======== Tokenize ========
80
  # Apply the tokenizer to the input text, treating them as a text-pair.
81
+
82
+
83
  input_ids = tokenizer.encode(question, context)
84
+ question_ids = input_ids[:input_ids.index(tokenizer.sep_token_id)+1]
85
 
86
  # Report how long the input sequence is. if longer than 512 tokens divide it multiple sequences
87
 
88
+ length_of_group = 512 - len(question_ids)
89
+ input_ids_without_question = input_ids[input_ids.index(tokenizer.sep_token_id)+1:]
90
+ print(f"Query has {len(input_ids)} tokens, divided in {len(input_ids_without_question)//length_of_group + 1}.\n")
91
 
92
  input_ids_split = []
93
+ for group in range(len(input_ids_without_question)//length_of_group + 1):
94
+ input_ids_split.append(question_ids + input_ids_without_question[length_of_group*group:length_of_group*(group+1)-1])
95
+ input_ids_split.append(question_ids + input_ids_without_question[length_of_group*(len(input_ids_without_question)//length_of_group + 1):len(input_ids_without_question)-1])
96
 
97
  scores = []
98
  for input in input_ids_split:
 
159
 
160
  final_answer = max(scores, key=lambda x: x[0] + x[1])[2]
161
 
162
+ return final_answer
163
+
164
  # =====[ DEFINE INTERFACE ]===== #'
165
  title = "Azza Conversational Agent"
166
  examples = [