Spaces:
Sleeping
Sleeping
File size: 7,304 Bytes
8ab167c 8c0b7ca 8ab167c 6f00050 8ab167c 8c0b7ca 6f00050 8c0b7ca 6f00050 8c0b7ca 6f00050 8c0b7ca 6f00050 8c0b7ca 8ab167c 8c0b7ca 8ab167c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
""" Techniques for formatting the prompts that are passed to the LLMs.
These need to handle 2 major tasks:
1. Taking a directory of source code and embedding it in the prompt meaningfully (and possibly concatenating it).
2. Embedding a performance profile in the prompt if available.
"""
from abc import ABC, abstractmethod
from typing import Optional, List, Mapping
from os import PathLike
from os.path import basename
import random
from function_grabber import get_function_at_line
from profiles import Profile
class PerfGuruPromptFormatter(ABC):
def __init__(self, name: str):
self.name = name
def _read_code_files(self, code_paths: List[PathLike]) -> Mapping[PathLike, str]:
code_files = {}
for code_path in code_paths:
with open(code_path, "r") as file:
code_files[code_path] = file.read()
return code_files
def _read_profile(self, profile_path: PathLike, profile_type: str) -> Profile:
return Profile(profile_path, profile_type)
@abstractmethod
def format_prompt(self, prompt: str, code_paths: List[PathLike], profile_path: Optional[PathLike] = None, profile_type: Optional[str] = None, error_fn: Optional[callable] = None) -> str:
pass
class BasicPromptFormatter(PerfGuruPromptFormatter):
def __init__(self):
super().__init__("basic")
def format_prompt(self, prompt: str, code_paths: List[PathLike], profile_path: Optional[PathLike] = None, profile_type: Optional[str] = None, error_fn: Optional[callable] = None) -> str:
if not code_paths:
if error_fn:
error_fn("No code files provided. At least one code file must be provided.")
return None
concatenated_code = ""
code_file_contents = self._read_code_files(code_paths)
for code_path, content in code_file_contents.items():
fname = basename(code_path)
concatenated_code += f"{fname}:\n{content}\n\n"
if profile_path:
if not profile_type:
if error_fn:
error_fn("Profile type must be provided if a profile file is provided.")
return None
profile = self._read_profile(profile_path, profile_type)
profile_content = profile.profile_to_tree_str()
else:
profile_content = ""
return f"Code:\n{concatenated_code}\n\n{profile_type} Profile:\n{profile_content}\n\n{prompt}"
class SlowestFunctionPromptFormatter(PerfGuruPromptFormatter):
def __init__(self, k):
super().__init__("slowest_function")
self.k = k
def format_prompt(self, prompt: str, code_paths: List[PathLike], profile_path: Optional[PathLike] = None, profile_type: Optional[str] = None, error_fn: Optional[callable] = None) -> str:
if not code_paths:
if error_fn:
error_fn("No code files provided. At least one code file must be provided.")
return None
concatenated_code = ""
code_file_contents = self._read_code_files(code_paths)
for code_path, content in code_file_contents.items():
fname = basename(code_path)
concatenated_code += f"{fname}:\n{content}\n\n"
if profile_path:
if not profile_type:
if error_fn:
error_fn("Profile type must be provided if a profile file is provided.")
return None
profile = self._read_profile(profile_path, profile_type)
slowest = profile.gf.dataframe.nlargest(self.k, 'time')
function_names = [slowest['name'].values[i] for i in range(self.k) if i < len(slowest['name'].values)]
execution_times = [slowest['time'].values[i] for i in range(self.k) if i < len(slowest['name'].values)]
# print(profile_content)
hot_path = profile.gf.hot_path()
hot_path_functions = []
for node in hot_path:
if "name" in node.frame.attrs:
hot_path_functions.append(node.frame["name"])
hot_path_functions = hot_path_functions[:self.k]
profile_content = (f"The slowest functions are {function_names} and they took {execution_times} seconds, respectively." +
f" Also, these functions were in the hot path: {hot_path_functions}.")
print(profile_content)
else:
profile_content = ""
return f"Code:\n{concatenated_code}\n\n{profile_type} Profile:\n{profile_content}\n\n{prompt}"
class SlowestFunctionParsedPromptFormatter(PerfGuruPromptFormatter):
def __init__(self):
super().__init__("slowest_function_parsed")
def format_prompt(self, prompt: str, code_paths: List[PathLike], profile_path: Optional[PathLike] = None, profile_type: Optional[str] = None, error_fn: Optional[callable] = None) -> str:
if not code_paths:
if error_fn:
error_fn("No code files provided. At least one code file must be provided.")
return None
concatenated_code = ""
profile_content = ""
if profile_path:
if not profile_type:
if error_fn:
error_fn("Profile type must be provided if a profile file is provided.")
return None
k = 1
profile = self._read_profile(profile_path, profile_type)
slowest = profile.gf.dataframe.nlargest(k, 'time')
function_name = slowest['name'].values[0] if len(slowest['name'].values) > 0 else None
line_number = slowest['line'].values[0] if len(slowest['line'].values) > 0 else None
code = None
if line_number:
filename = ""
code_file_contents = self._read_code_files(code_paths)
for code_path, content in code_file_contents.items():
filename = basename(code_path)
code, _ = get_function_at_line(filename, str(line_number))
if code:
break
if code:
concatenated_code = f"{fname}:\n{code}\n\n"
print("Only function code:", concatenated_code)
profile_content = (f"The slowest function is {function_name}.")
print(profile_content)
if concatenated_code == "":
code_file_contents = self._read_code_files(code_paths)
for code_path, content in code_file_contents.items():
fname = basename(code_path)
concatenated_code += f"{fname}:\n{content}\n\n"
return f"Code:\n{concatenated_code}\n\n{profile_type} Profile:\n{profile_content}\n\n{prompt}"
AVAILABLE_FORMATTERS = []
AVAILABLE_FORMATTERS.append(SlowestFunctionPromptFormatter(k=1))
AVAILABLE_FORMATTERS.append(SlowestFunctionPromptFormatter(k=5))
AVAILABLE_FORMATTERS.append(SlowestFunctionPromptFormatter(k=10))
# AVAILABLE_FORMATTERS.append(BasicPromptFormatter())
AVAILABLE_FORMATTERS.append(SlowestFunctionParsedPromptFormatter())
def select_random_formatter() -> PerfGuruPromptFormatter:
return random.choice(AVAILABLE_FORMATTERS) |