""" A light wrapper around a bunch of chat LLMs. The class should define a method that takes text input and returns a response from the model. """ from abc import ABC, abstractmethod from typing import Generator, Optional, AsyncGenerator import os import random import glob import openai import google.generativeai as genai from llama_cpp import Llama from huggingface_hub import InferenceClient class ChatModel(ABC): def __init__(self, name): self.name = name def __str__(self): return self.name def __repr__(self): return self.name @abstractmethod def get_response(self, prompt) -> Generator[str, None, None]: pass class DummyModel(ChatModel): def __init__(self): super().__init__("dummy") def get_response(self, prompt: str) -> Generator[str, None, None]: response = f"Dummy response to: {prompt}" for idx in range(len(response)): yield response[:idx+1] class OpenAIModel(ChatModel): def __init__(self, model: str, client: openai.OpenAI): super().__init__(model) self.model = model self.client = client def get_response(self, prompt: str) -> Generator[str, None, None]: stream = self.client.chat.completions.create( model=self.model, messages=[ {"role": "system", "content": "You are PerfGuru, a helpful assistant for assisting developers in identifying performance bottlenecks in their code and optimizing them."}, {"role": "user", "content": prompt} ], stream=True, max_tokens=4096, ) response = "" for chunk in stream: response += chunk.choices[0].delta.content or "" yield response class GeminiModel(ChatModel): def __init__(self, model: str, api_key: Optional[str] = None): super().__init__(model) if api_key: genai.configure(api_key=api_key) self.model = genai.GenerativeModel(model) self.config = genai.types.GenerationConfig( candidate_count=1, max_output_tokens=4096, ) def get_response(self, prompt: str) -> Generator[str, None, None]: stream = self.model.generate_content(prompt, stream=True, generation_config=self.config) response = "" for chunk in stream: response += chunk.text or "" yield response class LocalModel(ChatModel): def __init__(self, model: str, model_path: str): super().__init__(model) self.llm = Llama( model_path=model_path, n_ctx=4096, ) def get_response(self, prompt) -> Generator[str, None, None]: outputs = self.llm.create_chat_completion( messages = [ {"role": "system", "content": "You are PerfGuru, a helpful assistant for assisting developers in identifying performance bottlenecks in their code and optimizing them."}, { "role": "user", "content": prompt, } ], max_tokens=4000, stream=True, ) response = "" for chunk in outputs: response += chunk['choices'][0]['delta'].get('content', '') yield response class InferenceHubModel(ChatModel): def __init__(self, model: str, client: InferenceClient, supports_system_messages: bool = True): super().__init__(model) self.model = model self.client = client self.supports_system_messages = supports_system_messages def get_response(self, prompt: str) -> Generator[str, None, None]: messages = [] if self.supports_system_messages: messages.append({"role": "system", "content": "You are PerfGuru, a helpful assistant for assisting developers in identifying performance bottlenecks in their code and optimizing them."}) messages.append({"role": "user", "content": prompt}) stream = self.client.chat.completions.create( model=self.model, messages=messages, stream=True, max_tokens=2048, ) response = "" for chunk in stream: response += chunk.choices[0].delta.content or "" yield response AVAILABLE_MODELS = [] if os.environ.get("USE_LOCAL_MODELS") == "1": HF_HOME = os.environ.get("HF_HOME", "/home/user/.cache/huggingface") GGUF_WILDCARD = os.path.join(HF_HOME, "hub", "models-*", "**", "*.gguf") GGUF_PATHS = [(os.path.basename(p), p) for p in glob.glob(GGUF_WILDCARD, recursive=True)] LOCAL_MODEL_PATHS = [(os.path.basename(p), p) for p in glob.glob(os.path.join("local_models", "*.gguf"))] ALL_LOCAL_MODELS = GGUF_PATHS + LOCAL_MODEL_PATHS AVAILABLE_MODELS.extend([ LocalModel(model_name, model_path) for model_name, model_path in ALL_LOCAL_MODELS if os.path.exists(model_path) ]) # AVAILABLE_MODELS.append( DummyModel() ) if os.environ.get("OPENAI_API_KEY"): openai_client = openai.OpenAI() AVAILABLE_MODELS.append( OpenAIModel("gpt-4o-mini", openai_client) ) AVAILABLE_MODELS.append( OpenAIModel("gpt-3.5-turbo", openai_client) ) if os.environ.get("GOOGLE_API_KEY"): AVAILABLE_MODELS.append( GeminiModel("gemini-1.5-flash") ) AVAILABLE_MODELS.append( GeminiModel("gemini-1.5-pro") ) if os.environ.get("HF_API_KEY"): hf_inference_client = InferenceClient(api_key=os.environ.get("HF_API_KEY"), timeout=60) #AVAILABLE_MODELS.append( InferenceHubModel("google/gemma-2-2b-it", hf_inference_client, supports_system_messages=False) ) #AVAILABLE_MODELS.append( InferenceHubModel("Qwen/Qwen2.5-7B-Instruct", hf_inference_client) ) AVAILABLE_MODELS.append( InferenceHubModel("microsoft/Phi-3-mini-4k-instruct", hf_inference_client) ) AVAILABLE_MODELS.append( InferenceHubModel("meta-llama/Llama-3.2-1B-Instruct", hf_inference_client) ) AVAILABLE_MODELS.append( InferenceHubModel("meta-llama/Llama-3.2-3B-Instruct", hf_inference_client) ) AVAILABLE_MODELS.append( InferenceHubModel("meta-llama/Meta-Llama-3.1-8B-Instruct", hf_inference_client) ) if not AVAILABLE_MODELS: raise ValueError("No models available. Please set OPENAI_API_KEY or GOOGLE_API_KEY environment variables.") def select_random_model() -> ChatModel: return random.choice(AVAILABLE_MODELS)