File size: 4,118 Bytes
8ab167c
 
 
 
 
 
 
 
6f00050
8ab167c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f00050
8ab167c
6f00050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2233c73
6f00050
ae7d0e1
6f00050
 
 
8ab167c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
""" A light wrapper around a bunch of chat LLMs. The class should define a method that takes text input and returns a response from the model.
"""
from abc import ABC, abstractmethod
from typing import Generator, Optional, AsyncGenerator
import os
import random
import openai
import google.generativeai as genai
from llama_cpp import Llama

class ChatModel(ABC):
    def __init__(self, name):
        self.name = name

    def __str__(self):
        return self.name

    def __repr__(self):
        return self.name

    @abstractmethod
    def get_response(self, prompt) -> Generator[str, None, None]:
        pass


class DummyModel(ChatModel):

    def __init__(self):
        super().__init__("dummy")

    def get_response(self, prompt: str) -> Generator[str, None, None]:
        response = f"Dummy response to: {prompt}"
        for idx in range(len(response)):
            yield response[:idx+1]


class OpenAIModel(ChatModel):

    def __init__(self, model: str, client: openai.OpenAI):
        super().__init__(model)
        self.model = model
        self.client = client

    def get_response(self, prompt: str) -> Generator[str, None, None]:
        stream = self.client.chat.completions.create(
            model=self.model,
            messages=[
                {"role": "system", "content": "You are PerfGuru, a helpful assistant for assisting developers in identifying performance bottlenecks in their code and optimizing them."},
                {"role": "user", "content": prompt}
            ],
            stream=True,
            max_tokens=4096,
        )
        response = ""
        for chunk in stream:
            response += chunk.choices[0].delta.content or ""
            yield response



class GeminiModel(ChatModel):

    def __init__(self, model: str, api_key: Optional[str] = None):
        super().__init__(model)
        if api_key:
            genai.configure(api_key=api_key)

        self.model = genai.GenerativeModel(model)
        self.config = genai.types.GenerationConfig(
            candidate_count=1,
            max_output_tokens=4096,
        )

    def get_response(self, prompt: str) -> Generator[str, None, None]:
        stream = self.model.generate_content(prompt, stream=True, generation_config=self.config)
        response = ""
        for chunk in stream:
            response += chunk.text or ""
            yield response


class LocalModel(ChatModel):

    def __init__(self, model: str, model_path: str):
        super().__init__(model)
        self.llm = Llama(
            model_path=model_path,
            n_ctx=8000,
        )

    def get_response(self, prompt) -> Generator[str, None, None]:

        output = self.llm.create_chat_completion(
            messages = [
                {"role": "system", "content": "You are PerfGuru, a helpful assistant for assisting developers in identifying performance bottlenecks in their code and optimizing them."},
                {
                    "role": "user",
                    "content": prompt,
                }
            ],
            max_tokens=4000,
        )

        result = output["choices"][0]["message"]["content"]
        for idx in range(len(result)):
            yield result[:idx+1]
    

LOCAL_MODELS = [
    "Meta-Llama-3-8B-Instruct.Q4_K_S",
]

AVAILABLE_MODELS = [
    LocalModel(model_name, f"local_models/{model_name}.gguf") 
    for model_name in LOCAL_MODELS
    if os.path.exists(f"local_models/{model_name}.gguf")
]

# AVAILABLE_MODELS.append( DummyModel() )

if os.environ.get("OPENAI_API_KEY"):
    openai_client = openai.OpenAI()
    AVAILABLE_MODELS.append( OpenAIModel("gpt-4o-mini", openai_client) )
    AVAILABLE_MODELS.append( OpenAIModel("gpt-3.5-turbo", openai_client) )

if os.environ.get("GOOGLE_API_KEY"):
    AVAILABLE_MODELS.append( GeminiModel("gemini-1.5-flash") )
    AVAILABLE_MODELS.append( GeminiModel("gemini-1.5-pro") )

if not AVAILABLE_MODELS:
    raise ValueError("No models available. Please set OPENAI_API_KEY or GOOGLE_API_KEY environment variables.")

def select_random_model() -> ChatModel:
    return random.choice(AVAILABLE_MODELS)