Allen Park commited on
Commit
e34f0a0
·
1 Parent(s): 6283f19

feat(check token size of context)

Browse files

* fn that returns boolean for if token size is under 8000
* raise gr.Error if file exceeds token size

Files changed (2) hide show
  1. .DS_Store +0 -0
  2. app.py +11 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
app.py CHANGED
@@ -189,6 +189,12 @@ def model_call(question, document, answer, client_base_url):
189
  combined_reasoning = " ".join(reasoning)[1:-1]
190
  return combined_reasoning, score
191
 
 
 
 
 
 
 
192
  def get_filetype(filename):
193
  return filename.split(".")[-1]
194
 
@@ -218,6 +224,11 @@ def upload_file(filepath):
218
  extracted_file_text = extract_text_pymupdf(filepath)
219
  elif filetype == "docx":
220
  extracted_file_text = extract_text_python_docx(filepath)
 
 
 
 
 
221
  return [gr.UploadButton(visible=False), gr.Group(visible=True), gr.Markdown(f"**Uploaded file:** {name}"), extracted_file_text]
222
  else:
223
  return [gr.UploadButton(visible=True, file_count="single", file_types=UPLOADABLE_FILE_TYPES), gr.Group(visible=False), gr.Markdown(""), extracted_file_text]
 
189
  combined_reasoning = " ".join(reasoning)[1:-1]
190
  return combined_reasoning, score
191
 
192
+ def return_approximate_token_size(text):
193
+ MAX_TOKEN_LENGTH = 8000
194
+ number_of_total_characters = len(text)
195
+ number_of_tokens = number_of_total_characters / 4
196
+ return number_of_tokens < MAX_TOKEN_LENGTH
197
+
198
  def get_filetype(filename):
199
  return filename.split(".")[-1]
200
 
 
224
  extracted_file_text = extract_text_pymupdf(filepath)
225
  elif filetype == "docx":
226
  extracted_file_text = extract_text_python_docx(filepath)
227
+
228
+ # return warning if file is too large
229
+ if not return_approximate_token_size(extracted_file_text):
230
+ raise gr.Error("File is too large to process. Please upload a smaller file.")
231
+
232
  return [gr.UploadButton(visible=False), gr.Group(visible=True), gr.Markdown(f"**Uploaded file:** {name}"), extracted_file_text]
233
  else:
234
  return [gr.UploadButton(visible=True, file_count="single", file_types=UPLOADABLE_FILE_TYPES), gr.Group(visible=False), gr.Markdown(""), extracted_file_text]