Spaces:
Sleeping
Sleeping
Mazin Karjikar
commited on
Commit
·
2c0daa4
1
Parent(s):
090e2ae
added token limits and error handling for long prompts
Browse files- src/perfguru.py +29 -0
- src/token_limits.json +9 -0
src/perfguru.py
CHANGED
@@ -3,6 +3,7 @@ import json
|
|
3 |
import os
|
4 |
import datetime
|
5 |
from itertools import zip_longest
|
|
|
6 |
|
7 |
from models import select_random_model
|
8 |
from rag import select_random_formatter
|
@@ -19,11 +20,23 @@ def code_upload(code_file_select):
|
|
19 |
return gr.Button(interactive=True)
|
20 |
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
def chat_with_llms(prompt, code_files, profile_file, profile_type):
|
23 |
model1 = select_random_model()
|
24 |
model2 = select_random_model()
|
25 |
formatter1 = select_random_formatter()
|
26 |
formatter2 = select_random_formatter()
|
|
|
|
|
|
|
|
|
27 |
|
28 |
print(f"Selected models: {model1.name} and {model2.name}")
|
29 |
|
@@ -33,6 +46,22 @@ def chat_with_llms(prompt, code_files, profile_file, profile_type):
|
|
33 |
if formatted1 is None or formatted2 is None:
|
34 |
error_helper("Failed to format prompt. Please try again.")
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
response1 = model1.get_response(formatted1)
|
37 |
response2 = model2.get_response(formatted2)
|
38 |
|
|
|
3 |
import os
|
4 |
import datetime
|
5 |
from itertools import zip_longest
|
6 |
+
import tiktoken
|
7 |
|
8 |
from models import select_random_model
|
9 |
from rag import select_random_formatter
|
|
|
20 |
return gr.Button(interactive=True)
|
21 |
|
22 |
|
23 |
+
def token_limit_getter(model: str) -> int:
|
24 |
+
with open("token_limits.json", "r") as f:
|
25 |
+
token_limits = json.load(f)
|
26 |
+
if model in token_limits:
|
27 |
+
return token_limits[model]
|
28 |
+
return int(2e6)
|
29 |
+
|
30 |
+
|
31 |
def chat_with_llms(prompt, code_files, profile_file, profile_type):
|
32 |
model1 = select_random_model()
|
33 |
model2 = select_random_model()
|
34 |
formatter1 = select_random_formatter()
|
35 |
formatter2 = select_random_formatter()
|
36 |
+
encoder1 = None
|
37 |
+
encoder2 = None
|
38 |
+
num_tokens_1 = 0
|
39 |
+
num_tokens_2 = 0
|
40 |
|
41 |
print(f"Selected models: {model1.name} and {model2.name}")
|
42 |
|
|
|
46 |
if formatted1 is None or formatted2 is None:
|
47 |
error_helper("Failed to format prompt. Please try again.")
|
48 |
|
49 |
+
if model1.name[:3] == "gpt":
|
50 |
+
encoder1 = tiktoken.encoding_for_model(model1.name)
|
51 |
+
if model2.name[:3] == "gpt":
|
52 |
+
encoder2 = tiktoken.encoding_for_model(model2.name)
|
53 |
+
|
54 |
+
if encoder1:
|
55 |
+
num_tokens_1 = encoder1.encode(formatted1)
|
56 |
+
if encoder2:
|
57 |
+
num_tokens_2 = encoder2.encode(formatted2)
|
58 |
+
|
59 |
+
token_limit_1 = token_limit_getter(model1.name)
|
60 |
+
token_limit_2 = token_limit_getter(model2.name)
|
61 |
+
|
62 |
+
if num_tokens_1 >= token_limit_1 or num_tokens_2 >= token_limit_2:
|
63 |
+
error_helper("Prompt is too long. Please try again.")
|
64 |
+
|
65 |
response1 = model1.get_response(formatted1)
|
66 |
response2 = model2.get_response(formatted2)
|
67 |
|
src/token_limits.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"gpt-4o": 128000,
|
3 |
+
"gpt-4o-mini": 128000,
|
4 |
+
"gpt-4-turbo": 128000,
|
5 |
+
"gpt-4": 8192,
|
6 |
+
"gpt-3.5-turbo": 16385,
|
7 |
+
"gemini-1.5-flash": 1048576,
|
8 |
+
"gemini-1.5-pro": 2097152
|
9 |
+
}
|