Spaces:

hpcgroup
/

perf-analysis-chat

Sleeping

Mazin Karjikar commited on Oct 10

Commit

6f00050

•

1 Parent(s): f6de56e

Quickstarting llama.cpp (#2)

* added functionality to use local models in .gguf file format

* made hpctoolkit work and fixed truncated output by setting context limit

* new log file per interaction, named by current timestamp down to the milisecond

Files changed (9) hide show

code_samples/p1.py +7 -0
local_models/README.md +3 -0
requirements.txt +2 -1
src/models.py +37 -3
src/perf_guru_logs/README.md +4 -0
src/perfguru.py +6 -3
src/profiles.py +7 -1
src/rag.py +46 -2
token_limits.json +2 -1

code_samples/p1.py ADDED Viewed

	@@ -0,0 +1,7 @@

+def sort(arr):
+    for i in range(len(arr)):
+        for j in range(1,len(arr)):
+            if arr[j] < arr[j-1]:
+                arr[j],arr[j-1] = arr[j-1],arr[j]
+    return arr

local_models/README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # Local Models
2	+
3	+ ### This folder stores the local models being used by PerfGuru. In GitHub, this folder will be empty due to the size of models. Otherwise, when PerfGuru is ran on a machine, some local models such as Meta-Llama-3 should be used with llama.cpp.

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ gradio==4.39.0
 hatchet==1.4.0
 google-generativeai==0.7.2
 openai==1.37.0
-tiktoken==0.7.0

 hatchet==1.4.0
 google-generativeai==0.7.2
 openai==1.37.0
+tiktoken==0.7.0
+llama-cpp-python==0.2.90

src/models.py CHANGED Viewed

@@ -6,6 +6,7 @@ import os
 import random
 import openai
 import google.generativeai as genai
 class ChatModel(ABC):
     def __init__(self, name):
@@ -78,9 +79,43 @@ class GeminiModel(ChatModel):
             yield response
-AVAILABLE_MODELS = []
-#AVAILABLE_MODELS.append( DummyModel() )
 if os.environ.get("OPENAI_API_KEY"):
     openai_client = openai.OpenAI()
@@ -91,7 +126,6 @@ if os.environ.get("GOOGLE_API_KEY"):
     AVAILABLE_MODELS.append( GeminiModel("gemini-1.5-flash") )
     AVAILABLE_MODELS.append( GeminiModel("gemini-1.5-pro") )
 if not AVAILABLE_MODELS:
     raise ValueError("No models available. Please set OPENAI_API_KEY or GOOGLE_API_KEY environment variables.")

 import random
 import openai
 import google.generativeai as genai
+from llama_cpp import Llama
 class ChatModel(ABC):
     def __init__(self, name):
             yield response
+class LocalModel(ChatModel):
+    def __init__(self, model: str, model_path: str):
+        super().__init__(model)
+        self.llm = Llama(
+            model_path=model_path,
+            n_ctx=8000,
+        )
+    def get_response(self, prompt) -> Generator[str, None, None]:
+        output = self.llm.create_chat_completion(
+            messages = [
+                {"role": "system", "content": "You are PerfGuru, a helpful assistant for assisting developers in identifying performance bottlenecks in their code and optimizing them."},
+                {
+                    "role": "user",
+                    "content": prompt,
+                }
+            ],
+            max_tokens=4000,
+        )
+        result = output["choices"][0]["message"]["content"]
+        for idx in range(len(result)):
+            yield result[:idx+1]
+LOCAL_MODELS = [
+    "Meta-Llama-3-8B-Instruct.Q4_K_S",
+]
+AVAILABLE_MODELS = [
+    LocalModel(model_name, f"../local_models/{model_name}.gguf")
+    for model_name in LOCAL_MODELS
+]
+# AVAILABLE_MODELS.append( DummyModel() )
 if os.environ.get("OPENAI_API_KEY"):
     openai_client = openai.OpenAI()
     AVAILABLE_MODELS.append( GeminiModel("gemini-1.5-flash") )
     AVAILABLE_MODELS.append( GeminiModel("gemini-1.5-pro") )
 if not AVAILABLE_MODELS:
     raise ValueError("No models available. Please set OPENAI_API_KEY or GOOGLE_API_KEY environment variables.")

src/perf_guru_logs/README.md ADDED Viewed

	@@ -0,0 +1,4 @@


1	+ # Logging Interactions
2	+
3	+ ### This folder stores a json log of each interaction with PerfGuru.
4	+

src/perfguru.py CHANGED Viewed

@@ -21,7 +21,7 @@ def code_upload(code_file_select):
 def token_limit_getter(model: str) -> int:
-    with open("token_limits.json", "r") as f:
         token_limits = json.load(f)
     if model in token_limits:
         return token_limits[model]
@@ -37,7 +37,7 @@ def check_length(text, model):
     token_limit = token_limit_getter(model.name)
     if token_length >= token_limit:
-        error_helper(f"Prompt is too long. Please try reducing the size of the prompt or code uploaded.")
 def chat_with_llms(prompt, code_files, profile_file, profile_type):
@@ -93,7 +93,9 @@ def log_interaction(prompt, vote, response1, model1, formatter1, full_prompt1, r
         "timestamp": datetime.datetime.now().isoformat()
     }
-    log_file_path = "perf_guru_log.json"
     if os.path.exists(log_file_path):
         with open(log_file_path, "r") as log_file:
             logs = json.load(log_file)
@@ -105,6 +107,7 @@ def log_interaction(prompt, vote, response1, model1, formatter1, full_prompt1, r
     # Write updated logs to file
     with open(log_file_path, "w") as log_file:
         json.dump(logs, log_file, indent=4)
 def handle_vote(prompt, vote, response1, source1, full_prompt1, response2, source2, full_prompt2):
     model1, formatter1 = source1.split(" + ")

 def token_limit_getter(model: str) -> int:
+    with open("../token_limits.json", "r") as f:
         token_limits = json.load(f)
     if model in token_limits:
         return token_limits[model]
     token_limit = token_limit_getter(model.name)
     if token_length >= token_limit:
+        error_helper("Prompt is too long. Please try reducing the size of the prompt or code uploaded.")
 def chat_with_llms(prompt, code_files, profile_file, profile_type):
         "timestamp": datetime.datetime.now().isoformat()
     }
+    timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")[:-3]
+    log_file_path = f"perf_guru_logs/log_{timestamp}.json"
     if os.path.exists(log_file_path):
         with open(log_file_path, "r") as log_file:
             logs = json.load(log_file)
     # Write updated logs to file
     with open(log_file_path, "w") as log_file:
         json.dump(logs, log_file, indent=4)
 def handle_vote(prompt, vote, response1, source1, full_prompt1, response2, source2, full_prompt2):
     model1, formatter1 = source1.split(" + ")

src/profiles.py CHANGED Viewed

@@ -2,6 +2,8 @@
 """
 import json
 import os
 from typing import Literal, Optional
 import hatchet as ht
@@ -14,7 +16,11 @@ class Profile:
     def _load(self, profile_path: os.PathLike, profile_type: Literal["HPCToolkit", "CProfile", "Caliper"]) -> ht.GraphFrame:
         if profile_type == "HPCToolkit":
-            return ht.GraphFrame.from_hpctoolkit(profile_path)
         elif profile_type == "CProfile":
             return ht.GraphFrame.from_cprofile(profile_path)
         elif profile_type == "Caliper":

 """
 import json
 import os
+import tempfile as tf
+import zipfile as zf
 from typing import Literal, Optional
 import hatchet as ht
     def _load(self, profile_path: os.PathLike, profile_type: Literal["HPCToolkit", "CProfile", "Caliper"]) -> ht.GraphFrame:
         if profile_type == "HPCToolkit":
+            toolkit_dir = profile_path[profile_path.rfind("/")+1:-4] # last dir in path, without ".zip" [:-4]
+            with tf.TemporaryDirectory() as temp_dir:
+                with zf.ZipFile(profile_path, 'r') as zip_ref:
+                    zip_ref.extractall(temp_dir)
+                    return ht.GraphFrame.from_hpctoolkit(os.path.join(temp_dir, toolkit_dir))
         elif profile_type == "CProfile":
             return ht.GraphFrame.from_cprofile(profile_path)
         elif profile_type == "Caliper":

src/rag.py CHANGED Viewed

@@ -61,9 +61,53 @@ class BasicPromptFormatter(PerfGuruPromptFormatter):
         return f"Code:\n{concatenated_code}\n\n{profile_type} Profile:\n{profile_content}\n\n{prompt}"
-AVAILABLE_FORMATTERS = []
-AVAILABLE_FORMATTERS.append(BasicPromptFormatter())
 def select_random_formatter() -> PerfGuruPromptFormatter:

         return f"Code:\n{concatenated_code}\n\n{profile_type} Profile:\n{profile_content}\n\n{prompt}"
+class SlowestFunctionPromptFormatter(PerfGuruPromptFormatter):
+    def __init__(self):
+        super().__init__("slowest_function")
+    def format_prompt(self, prompt: str, code_paths: List[PathLike], profile_path: Optional[PathLike] = None, profile_type: Optional[str] = None, error_fn: Optional[callable] = None) -> str:
+        if not code_paths:
+            if error_fn:
+                error_fn("No code files provided. At least one code file must be provided.")
+            return None
+        concatenated_code = ""
+        code_file_contents = self._read_code_files(code_paths)
+        for code_path, content in code_file_contents.items():
+            fname = basename(code_path)
+            concatenated_code += f"{fname}:\n{content}\n\n"
+        if profile_path:
+            if not profile_type:
+                if error_fn:
+                    error_fn("Profile type must be provided if a profile file is provided.")
+                return None
+            k = 1
+            profile = self._read_profile(profile_path, profile_type)
+            slowest = profile.gf.dataframe.nlargest(k, 'time')
+            function_names = [slowest['name'].values[i] for i in range(k) if i < len(slowest['name'].values)]
+            execution_times = [slowest['time'].values[i] for i in range(k) if i < len(slowest['name'].values)]
+            # print(profile_content)
+            hot_path = profile.gf.hot_path()
+            hot_path_functions = []
+            for node in hot_path:
+                if "name" in node.frame.attrs:
+                    hot_path_functions.append(node.frame["name"])
+            hot_path_functions = hot_path_functions[:k]
+            profile_content = (f"The slowest functions are {function_names} and they took {execution_times} seconds, respectively." +
+            f" Also, these functions were in the hot path: {hot_path_functions}.")
+            print(profile_content)
+        else:
+            profile_content = ""
+        return f"Code:\n{concatenated_code}\n\n{profile_type} Profile:\n{profile_content}\n\n{prompt}"
+AVAILABLE_FORMATTERS = [SlowestFunctionPromptFormatter()]
+# AVAILABLE_FORMATTERS.append(BasicPromptFormatter())
 def select_random_formatter() -> PerfGuruPromptFormatter:

token_limits.json CHANGED Viewed

@@ -5,5 +5,6 @@
     "gpt-4": 8192,
     "gpt-3.5-turbo": 16385,
     "gemini-1.5-flash": 1048576,
-    "gemini-1.5-pro": 2097152
 }

     "gpt-4": 8192,
     "gpt-3.5-turbo": 16385,
     "gemini-1.5-flash": 1048576,
+    "gemini-1.5-pro": 2097152,
+    "Meta-Llama-3-8B-Instruct.Q4_K_S": 8000
 }