Spaces:

hpcgroup
/

perf-analysis-chat

Sleeping

App Files Files

Daniel Nichols commited on Sep 17, 2024

Commit

a36b415

•

1 Parent(s): 17b14ea

refactor error handling code

Browse files

Files changed (1) hide show

src/perfguru.py +15 -20

src/perfguru.py CHANGED Viewed

@@ -27,16 +27,24 @@ def token_limit_getter(model: str) -> int:
         return token_limits[model]
     return int(5e6)
 def chat_with_llms(prompt, code_files, profile_file, profile_type):
     model1 = select_random_model()
     model2 = select_random_model()
     formatter1 = select_random_formatter()
     formatter2 = select_random_formatter()
-    encoder1 = None
-    encoder2 = None
-    num_tokens_1 = 0
-    num_tokens_2 = 0
     print(f"Selected models: {model1.name} and {model2.name}")
@@ -45,22 +53,9 @@ def chat_with_llms(prompt, code_files, profile_file, profile_type):
     if formatted1 is None or formatted2 is None:
         error_helper("Failed to format prompt. Please try again.")
-    if model1.name[:3] == "gpt":
-        encoder1 = tiktoken.encoding_for_model(model1.name)
-    if model2.name[:3] == "gpt":
-        encoder2 = tiktoken.encoding_for_model(model2.name)
-    if encoder1:
-        num_tokens_1 = len(encoder1.encode(formatted1))
-    if encoder2:
-        num_tokens_2 = len(encoder2.encode(formatted2))
-    token_limit_1 = token_limit_getter(model1.name)
-    token_limit_2 = token_limit_getter(model2.name)
-    if num_tokens_1 >= token_limit_1 or num_tokens_2 >= token_limit_2:
-        error_helper("Prompt is too long. Please try again.")
     response1 = model1.get_response(formatted1)
     response2 = model2.get_response(formatted2)
@@ -190,4 +185,4 @@ with gr.Blocks(css=".not-voted p { color: black; } .voted p { color: green; } .r
 # Launch the Gradio interface
 if __name__ == '__main__':
-    interface.launch(share=True)

         return token_limits[model]
     return int(5e6)
+def check_length(text, model):
+    if model.name.startswith("gpt"):
+        encoder = lambda s: len(tiktoken.encoding_for_model(model.name).encode(text))
+    else:
+        encoder = lambda s: len(s)/4    # 4 char per token heuristic
+    token_length = encoder(text)
+    token_limit = token_limit_getter(model.name)
+    if token_length >= token_limit:
+        error_helper(f"Prompt is too long. Please try reducing the size of the prompt or code uploaded.")
 def chat_with_llms(prompt, code_files, profile_file, profile_type):
     model1 = select_random_model()
     model2 = select_random_model()
     formatter1 = select_random_formatter()
     formatter2 = select_random_formatter()
     print(f"Selected models: {model1.name} and {model2.name}")
     if formatted1 is None or formatted2 is None:
         error_helper("Failed to format prompt. Please try again.")
+    check_length(formatted1, model1)
+    check_length(formatted2, model2)
     response1 = model1.get_response(formatted1)
     response2 = model2.get_response(formatted2)
 # Launch the Gradio interface
 if __name__ == '__main__':
+    interface.launch(share=False)