Mazin Karjikar commited on
Commit
6f00050
1 Parent(s): f6de56e

Quickstarting llama.cpp (#2)

Browse files

* added functionality to use local models in .gguf file format

* made hpctoolkit work and fixed truncated output by setting context limit

* new log file per interaction, named by current timestamp down to the milisecond

code_samples/p1.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+
2
+ def sort(arr):
3
+ for i in range(len(arr)):
4
+ for j in range(1,len(arr)):
5
+ if arr[j] < arr[j-1]:
6
+ arr[j],arr[j-1] = arr[j-1],arr[j]
7
+ return arr
local_models/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Local Models
2
+
3
+ ### This folder stores the local models being used by PerfGuru. In GitHub, this folder will be empty due to the size of models. Otherwise, when PerfGuru is ran on a machine, some local models such as Meta-Llama-3 should be used with llama.cpp.
requirements.txt CHANGED
@@ -2,4 +2,5 @@ gradio==4.39.0
2
  hatchet==1.4.0
3
  google-generativeai==0.7.2
4
  openai==1.37.0
5
- tiktoken==0.7.0
 
 
2
  hatchet==1.4.0
3
  google-generativeai==0.7.2
4
  openai==1.37.0
5
+ tiktoken==0.7.0
6
+ llama-cpp-python==0.2.90
src/models.py CHANGED
@@ -6,6 +6,7 @@ import os
6
  import random
7
  import openai
8
  import google.generativeai as genai
 
9
 
10
  class ChatModel(ABC):
11
  def __init__(self, name):
@@ -78,9 +79,43 @@ class GeminiModel(ChatModel):
78
  yield response
79
 
80
 
81
- AVAILABLE_MODELS = []
82
 
83
- #AVAILABLE_MODELS.append( DummyModel() )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  if os.environ.get("OPENAI_API_KEY"):
86
  openai_client = openai.OpenAI()
@@ -91,7 +126,6 @@ if os.environ.get("GOOGLE_API_KEY"):
91
  AVAILABLE_MODELS.append( GeminiModel("gemini-1.5-flash") )
92
  AVAILABLE_MODELS.append( GeminiModel("gemini-1.5-pro") )
93
 
94
-
95
  if not AVAILABLE_MODELS:
96
  raise ValueError("No models available. Please set OPENAI_API_KEY or GOOGLE_API_KEY environment variables.")
97
 
 
6
  import random
7
  import openai
8
  import google.generativeai as genai
9
+ from llama_cpp import Llama
10
 
11
  class ChatModel(ABC):
12
  def __init__(self, name):
 
79
  yield response
80
 
81
 
82
+ class LocalModel(ChatModel):
83
 
84
+ def __init__(self, model: str, model_path: str):
85
+ super().__init__(model)
86
+ self.llm = Llama(
87
+ model_path=model_path,
88
+ n_ctx=8000,
89
+ )
90
+
91
+ def get_response(self, prompt) -> Generator[str, None, None]:
92
+
93
+ output = self.llm.create_chat_completion(
94
+ messages = [
95
+ {"role": "system", "content": "You are PerfGuru, a helpful assistant for assisting developers in identifying performance bottlenecks in their code and optimizing them."},
96
+ {
97
+ "role": "user",
98
+ "content": prompt,
99
+ }
100
+ ],
101
+ max_tokens=4000,
102
+ )
103
+
104
+ result = output["choices"][0]["message"]["content"]
105
+ for idx in range(len(result)):
106
+ yield result[:idx+1]
107
+
108
+
109
+ LOCAL_MODELS = [
110
+ "Meta-Llama-3-8B-Instruct.Q4_K_S",
111
+ ]
112
+
113
+ AVAILABLE_MODELS = [
114
+ LocalModel(model_name, f"../local_models/{model_name}.gguf")
115
+ for model_name in LOCAL_MODELS
116
+ ]
117
+
118
+ # AVAILABLE_MODELS.append( DummyModel() )
119
 
120
  if os.environ.get("OPENAI_API_KEY"):
121
  openai_client = openai.OpenAI()
 
126
  AVAILABLE_MODELS.append( GeminiModel("gemini-1.5-flash") )
127
  AVAILABLE_MODELS.append( GeminiModel("gemini-1.5-pro") )
128
 
 
129
  if not AVAILABLE_MODELS:
130
  raise ValueError("No models available. Please set OPENAI_API_KEY or GOOGLE_API_KEY environment variables.")
131
 
src/perf_guru_logs/README.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Logging Interactions
2
+
3
+ ### This folder stores a json log of each interaction with PerfGuru.
4
+
src/perfguru.py CHANGED
@@ -21,7 +21,7 @@ def code_upload(code_file_select):
21
 
22
 
23
  def token_limit_getter(model: str) -> int:
24
- with open("token_limits.json", "r") as f:
25
  token_limits = json.load(f)
26
  if model in token_limits:
27
  return token_limits[model]
@@ -37,7 +37,7 @@ def check_length(text, model):
37
  token_limit = token_limit_getter(model.name)
38
 
39
  if token_length >= token_limit:
40
- error_helper(f"Prompt is too long. Please try reducing the size of the prompt or code uploaded.")
41
 
42
 
43
  def chat_with_llms(prompt, code_files, profile_file, profile_type):
@@ -93,7 +93,9 @@ def log_interaction(prompt, vote, response1, model1, formatter1, full_prompt1, r
93
  "timestamp": datetime.datetime.now().isoformat()
94
  }
95
 
96
- log_file_path = "perf_guru_log.json"
 
 
97
  if os.path.exists(log_file_path):
98
  with open(log_file_path, "r") as log_file:
99
  logs = json.load(log_file)
@@ -105,6 +107,7 @@ def log_interaction(prompt, vote, response1, model1, formatter1, full_prompt1, r
105
  # Write updated logs to file
106
  with open(log_file_path, "w") as log_file:
107
  json.dump(logs, log_file, indent=4)
 
108
 
109
  def handle_vote(prompt, vote, response1, source1, full_prompt1, response2, source2, full_prompt2):
110
  model1, formatter1 = source1.split(" + ")
 
21
 
22
 
23
  def token_limit_getter(model: str) -> int:
24
+ with open("../token_limits.json", "r") as f:
25
  token_limits = json.load(f)
26
  if model in token_limits:
27
  return token_limits[model]
 
37
  token_limit = token_limit_getter(model.name)
38
 
39
  if token_length >= token_limit:
40
+ error_helper("Prompt is too long. Please try reducing the size of the prompt or code uploaded.")
41
 
42
 
43
  def chat_with_llms(prompt, code_files, profile_file, profile_type):
 
93
  "timestamp": datetime.datetime.now().isoformat()
94
  }
95
 
96
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")[:-3]
97
+
98
+ log_file_path = f"perf_guru_logs/log_{timestamp}.json"
99
  if os.path.exists(log_file_path):
100
  with open(log_file_path, "r") as log_file:
101
  logs = json.load(log_file)
 
107
  # Write updated logs to file
108
  with open(log_file_path, "w") as log_file:
109
  json.dump(logs, log_file, indent=4)
110
+
111
 
112
  def handle_vote(prompt, vote, response1, source1, full_prompt1, response2, source2, full_prompt2):
113
  model1, formatter1 = source1.split(" + ")
src/profiles.py CHANGED
@@ -2,6 +2,8 @@
2
  """
3
  import json
4
  import os
 
 
5
  from typing import Literal, Optional
6
 
7
  import hatchet as ht
@@ -14,7 +16,11 @@ class Profile:
14
 
15
  def _load(self, profile_path: os.PathLike, profile_type: Literal["HPCToolkit", "CProfile", "Caliper"]) -> ht.GraphFrame:
16
  if profile_type == "HPCToolkit":
17
- return ht.GraphFrame.from_hpctoolkit(profile_path)
 
 
 
 
18
  elif profile_type == "CProfile":
19
  return ht.GraphFrame.from_cprofile(profile_path)
20
  elif profile_type == "Caliper":
 
2
  """
3
  import json
4
  import os
5
+ import tempfile as tf
6
+ import zipfile as zf
7
  from typing import Literal, Optional
8
 
9
  import hatchet as ht
 
16
 
17
  def _load(self, profile_path: os.PathLike, profile_type: Literal["HPCToolkit", "CProfile", "Caliper"]) -> ht.GraphFrame:
18
  if profile_type == "HPCToolkit":
19
+ toolkit_dir = profile_path[profile_path.rfind("/")+1:-4] # last dir in path, without ".zip" [:-4]
20
+ with tf.TemporaryDirectory() as temp_dir:
21
+ with zf.ZipFile(profile_path, 'r') as zip_ref:
22
+ zip_ref.extractall(temp_dir)
23
+ return ht.GraphFrame.from_hpctoolkit(os.path.join(temp_dir, toolkit_dir))
24
  elif profile_type == "CProfile":
25
  return ht.GraphFrame.from_cprofile(profile_path)
26
  elif profile_type == "Caliper":
src/rag.py CHANGED
@@ -61,9 +61,53 @@ class BasicPromptFormatter(PerfGuruPromptFormatter):
61
 
62
  return f"Code:\n{concatenated_code}\n\n{profile_type} Profile:\n{profile_content}\n\n{prompt}"
63
 
 
64
 
65
- AVAILABLE_FORMATTERS = []
66
- AVAILABLE_FORMATTERS.append(BasicPromptFormatter())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
 
69
  def select_random_formatter() -> PerfGuruPromptFormatter:
 
61
 
62
  return f"Code:\n{concatenated_code}\n\n{profile_type} Profile:\n{profile_content}\n\n{prompt}"
63
 
64
+ class SlowestFunctionPromptFormatter(PerfGuruPromptFormatter):
65
 
66
+ def __init__(self):
67
+ super().__init__("slowest_function")
68
+
69
+ def format_prompt(self, prompt: str, code_paths: List[PathLike], profile_path: Optional[PathLike] = None, profile_type: Optional[str] = None, error_fn: Optional[callable] = None) -> str:
70
+ if not code_paths:
71
+ if error_fn:
72
+ error_fn("No code files provided. At least one code file must be provided.")
73
+ return None
74
+
75
+ concatenated_code = ""
76
+ code_file_contents = self._read_code_files(code_paths)
77
+ for code_path, content in code_file_contents.items():
78
+ fname = basename(code_path)
79
+ concatenated_code += f"{fname}:\n{content}\n\n"
80
+
81
+ if profile_path:
82
+ if not profile_type:
83
+ if error_fn:
84
+ error_fn("Profile type must be provided if a profile file is provided.")
85
+ return None
86
+ k = 1
87
+ profile = self._read_profile(profile_path, profile_type)
88
+ slowest = profile.gf.dataframe.nlargest(k, 'time')
89
+ function_names = [slowest['name'].values[i] for i in range(k) if i < len(slowest['name'].values)]
90
+ execution_times = [slowest['time'].values[i] for i in range(k) if i < len(slowest['name'].values)]
91
+ # print(profile_content)
92
+ hot_path = profile.gf.hot_path()
93
+ hot_path_functions = []
94
+
95
+ for node in hot_path:
96
+ if "name" in node.frame.attrs:
97
+ hot_path_functions.append(node.frame["name"])
98
+ hot_path_functions = hot_path_functions[:k]
99
+
100
+ profile_content = (f"The slowest functions are {function_names} and they took {execution_times} seconds, respectively." +
101
+ f" Also, these functions were in the hot path: {hot_path_functions}.")
102
+ print(profile_content)
103
+
104
+ else:
105
+ profile_content = ""
106
+
107
+ return f"Code:\n{concatenated_code}\n\n{profile_type} Profile:\n{profile_content}\n\n{prompt}"
108
+
109
+ AVAILABLE_FORMATTERS = [SlowestFunctionPromptFormatter()]
110
+ # AVAILABLE_FORMATTERS.append(BasicPromptFormatter())
111
 
112
 
113
  def select_random_formatter() -> PerfGuruPromptFormatter:
token_limits.json CHANGED
@@ -5,5 +5,6 @@
5
  "gpt-4": 8192,
6
  "gpt-3.5-turbo": 16385,
7
  "gemini-1.5-flash": 1048576,
8
- "gemini-1.5-pro": 2097152
 
9
  }
 
5
  "gpt-4": 8192,
6
  "gpt-3.5-turbo": 16385,
7
  "gemini-1.5-flash": 1048576,
8
+ "gemini-1.5-pro": 2097152,
9
+ "Meta-Llama-3-8B-Instruct.Q4_K_S": 8000
10
  }