Mazin Karjikar commited on
Commit
2c0daa4
·
1 Parent(s): 090e2ae

added token limits and error handling for long prompts

Browse files
Files changed (2) hide show
  1. src/perfguru.py +29 -0
  2. src/token_limits.json +9 -0
src/perfguru.py CHANGED
@@ -3,6 +3,7 @@ import json
3
  import os
4
  import datetime
5
  from itertools import zip_longest
 
6
 
7
  from models import select_random_model
8
  from rag import select_random_formatter
@@ -19,11 +20,23 @@ def code_upload(code_file_select):
19
  return gr.Button(interactive=True)
20
 
21
 
 
 
 
 
 
 
 
 
22
  def chat_with_llms(prompt, code_files, profile_file, profile_type):
23
  model1 = select_random_model()
24
  model2 = select_random_model()
25
  formatter1 = select_random_formatter()
26
  formatter2 = select_random_formatter()
 
 
 
 
27
 
28
  print(f"Selected models: {model1.name} and {model2.name}")
29
 
@@ -33,6 +46,22 @@ def chat_with_llms(prompt, code_files, profile_file, profile_type):
33
  if formatted1 is None or formatted2 is None:
34
  error_helper("Failed to format prompt. Please try again.")
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  response1 = model1.get_response(formatted1)
37
  response2 = model2.get_response(formatted2)
38
 
 
3
  import os
4
  import datetime
5
  from itertools import zip_longest
6
+ import tiktoken
7
 
8
  from models import select_random_model
9
  from rag import select_random_formatter
 
20
  return gr.Button(interactive=True)
21
 
22
 
23
+ def token_limit_getter(model: str) -> int:
24
+ with open("token_limits.json", "r") as f:
25
+ token_limits = json.load(f)
26
+ if model in token_limits:
27
+ return token_limits[model]
28
+ return int(2e6)
29
+
30
+
31
  def chat_with_llms(prompt, code_files, profile_file, profile_type):
32
  model1 = select_random_model()
33
  model2 = select_random_model()
34
  formatter1 = select_random_formatter()
35
  formatter2 = select_random_formatter()
36
+ encoder1 = None
37
+ encoder2 = None
38
+ num_tokens_1 = 0
39
+ num_tokens_2 = 0
40
 
41
  print(f"Selected models: {model1.name} and {model2.name}")
42
 
 
46
  if formatted1 is None or formatted2 is None:
47
  error_helper("Failed to format prompt. Please try again.")
48
 
49
+ if model1.name[:3] == "gpt":
50
+ encoder1 = tiktoken.encoding_for_model(model1.name)
51
+ if model2.name[:3] == "gpt":
52
+ encoder2 = tiktoken.encoding_for_model(model2.name)
53
+
54
+ if encoder1:
55
+ num_tokens_1 = encoder1.encode(formatted1)
56
+ if encoder2:
57
+ num_tokens_2 = encoder2.encode(formatted2)
58
+
59
+ token_limit_1 = token_limit_getter(model1.name)
60
+ token_limit_2 = token_limit_getter(model2.name)
61
+
62
+ if num_tokens_1 >= token_limit_1 or num_tokens_2 >= token_limit_2:
63
+ error_helper("Prompt is too long. Please try again.")
64
+
65
  response1 = model1.get_response(formatted1)
66
  response2 = model2.get_response(formatted2)
67
 
src/token_limits.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "gpt-4o": 128000,
3
+ "gpt-4o-mini": 128000,
4
+ "gpt-4-turbo": 128000,
5
+ "gpt-4": 8192,
6
+ "gpt-3.5-turbo": 16385,
7
+ "gemini-1.5-flash": 1048576,
8
+ "gemini-1.5-pro": 2097152
9
+ }