Spaces:
Sleeping
Sleeping
File size: 4,064 Bytes
8ab167c 6f00050 8ab167c 6f00050 8ab167c 6f00050 8ab167c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
""" A light wrapper around a bunch of chat LLMs. The class should define a method that takes text input and returns a response from the model.
"""
from abc import ABC, abstractmethod
from typing import Generator, Optional, AsyncGenerator
import os
import random
import openai
import google.generativeai as genai
from llama_cpp import Llama
class ChatModel(ABC):
def __init__(self, name):
self.name = name
def __str__(self):
return self.name
def __repr__(self):
return self.name
@abstractmethod
def get_response(self, prompt) -> Generator[str, None, None]:
pass
class DummyModel(ChatModel):
def __init__(self):
super().__init__("dummy")
def get_response(self, prompt: str) -> Generator[str, None, None]:
response = f"Dummy response to: {prompt}"
for idx in range(len(response)):
yield response[:idx+1]
class OpenAIModel(ChatModel):
def __init__(self, model: str, client: openai.OpenAI):
super().__init__(model)
self.model = model
self.client = client
def get_response(self, prompt: str) -> Generator[str, None, None]:
stream = self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": "You are PerfGuru, a helpful assistant for assisting developers in identifying performance bottlenecks in their code and optimizing them."},
{"role": "user", "content": prompt}
],
stream=True,
max_tokens=4096,
)
response = ""
for chunk in stream:
response += chunk.choices[0].delta.content or ""
yield response
class GeminiModel(ChatModel):
def __init__(self, model: str, api_key: Optional[str] = None):
super().__init__(model)
if api_key:
genai.configure(api_key=api_key)
self.model = genai.GenerativeModel(model)
self.config = genai.types.GenerationConfig(
candidate_count=1,
max_output_tokens=4096,
)
def get_response(self, prompt: str) -> Generator[str, None, None]:
stream = self.model.generate_content(prompt, stream=True, generation_config=self.config)
response = ""
for chunk in stream:
response += chunk.text or ""
yield response
class LocalModel(ChatModel):
def __init__(self, model: str, model_path: str):
super().__init__(model)
self.llm = Llama(
model_path=model_path,
n_ctx=8000,
)
def get_response(self, prompt) -> Generator[str, None, None]:
output = self.llm.create_chat_completion(
messages = [
{"role": "system", "content": "You are PerfGuru, a helpful assistant for assisting developers in identifying performance bottlenecks in their code and optimizing them."},
{
"role": "user",
"content": prompt,
}
],
max_tokens=4000,
)
result = output["choices"][0]["message"]["content"]
for idx in range(len(result)):
yield result[:idx+1]
LOCAL_MODELS = [
"Meta-Llama-3-8B-Instruct.Q4_K_S",
]
AVAILABLE_MODELS = [
LocalModel(model_name, f"../local_models/{model_name}.gguf")
for model_name in LOCAL_MODELS
]
# AVAILABLE_MODELS.append( DummyModel() )
if os.environ.get("OPENAI_API_KEY"):
openai_client = openai.OpenAI()
AVAILABLE_MODELS.append( OpenAIModel("gpt-4o-mini", openai_client) )
AVAILABLE_MODELS.append( OpenAIModel("gpt-3.5-turbo", openai_client) )
if os.environ.get("GOOGLE_API_KEY"):
AVAILABLE_MODELS.append( GeminiModel("gemini-1.5-flash") )
AVAILABLE_MODELS.append( GeminiModel("gemini-1.5-pro") )
if not AVAILABLE_MODELS:
raise ValueError("No models available. Please set OPENAI_API_KEY or GOOGLE_API_KEY environment variables.")
def select_random_model() -> ChatModel:
return random.choice(AVAILABLE_MODELS) |