""" Techniques for formatting the prompts that are passed to the LLMs. These need to handle 2 major tasks: 1. Taking a directory of source code and embedding it in the prompt meaningfully (and possibly concatenating it). 2. Embedding a performance profile in the prompt if available. """ from abc import ABC, abstractmethod from typing import Optional, List, Mapping from os import PathLike from os.path import basename import random from profiles import Profile class PerfGuruPromptFormatter(ABC): def __init__(self, name: str): self.name = name def _read_code_files(self, code_paths: List[PathLike]) -> Mapping[PathLike, str]: code_files = {} for code_path in code_paths: with open(code_path, "r") as file: code_files[code_path] = file.read() return code_files def _read_profile(self, profile_path: PathLike, profile_type: str) -> Profile: return Profile(profile_path, profile_type) @abstractmethod def format_prompt(self, prompt: str, code_paths: List[PathLike], profile_path: Optional[PathLike] = None, profile_type: Optional[str] = None, error_fn: Optional[callable] = None) -> str: pass class BasicPromptFormatter(PerfGuruPromptFormatter): def __init__(self): super().__init__("basic") def format_prompt(self, prompt: str, code_paths: List[PathLike], profile_path: Optional[PathLike] = None, profile_type: Optional[str] = None, error_fn: Optional[callable] = None) -> str: if not code_paths: if error_fn: error_fn("No code files provided. At least one code file must be provided.") return None concatenated_code = "" code_file_contents = self._read_code_files(code_paths) for code_path, content in code_file_contents.items(): fname = basename(code_path) concatenated_code += f"{fname}:\n{content}\n\n" if profile_path: if not profile_type: if error_fn: error_fn("Profile type must be provided if a profile file is provided.") return None profile = self._read_profile(profile_path, profile_type) profile_content = profile.profile_to_tree_str() else: profile_content = "" return f"Code:\n{concatenated_code}\n\n{profile_type} Profile:\n{profile_content}\n\n{prompt}" class SlowestFunctionPromptFormatter(PerfGuruPromptFormatter): def __init__(self): super().__init__("slowest_function") def format_prompt(self, prompt: str, code_paths: List[PathLike], profile_path: Optional[PathLike] = None, profile_type: Optional[str] = None, error_fn: Optional[callable] = None) -> str: if not code_paths: if error_fn: error_fn("No code files provided. At least one code file must be provided.") return None concatenated_code = "" code_file_contents = self._read_code_files(code_paths) for code_path, content in code_file_contents.items(): fname = basename(code_path) concatenated_code += f"{fname}:\n{content}\n\n" if profile_path: if not profile_type: if error_fn: error_fn("Profile type must be provided if a profile file is provided.") return None k = 1 profile = self._read_profile(profile_path, profile_type) slowest = profile.gf.dataframe.nlargest(k, 'time') function_names = [slowest['name'].values[i] for i in range(k) if i < len(slowest['name'].values)] execution_times = [slowest['time'].values[i] for i in range(k) if i < len(slowest['name'].values)] # print(profile_content) hot_path = profile.gf.hot_path() hot_path_functions = [] for node in hot_path: if "name" in node.frame.attrs: hot_path_functions.append(node.frame["name"]) hot_path_functions = hot_path_functions[:k] profile_content = (f"The slowest functions are {function_names} and they took {execution_times} seconds, respectively." + f" Also, these functions were in the hot path: {hot_path_functions}.") print(profile_content) else: profile_content = "" return f"Code:\n{concatenated_code}\n\n{profile_type} Profile:\n{profile_content}\n\n{prompt}" AVAILABLE_FORMATTERS = [SlowestFunctionPromptFormatter()] # AVAILABLE_FORMATTERS.append(BasicPromptFormatter()) def select_random_formatter() -> PerfGuruPromptFormatter: return random.choice(AVAILABLE_FORMATTERS)