Spaces:
Running
Running
File size: 5,296 Bytes
1a6d961 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# aiclient.py
import os
import time
import json
from typing import List, Dict, Optional, Union, AsyncGenerator
from openai import AsyncOpenAI
from starlette.responses import StreamingResponse
from observability import log_execution ,LLMObservabilityManager
import psycopg2
import requests
from functools import lru_cache
import logging
import pandas as pd
logger = logging.getLogger(__name__)
@lru_cache(maxsize=1)
def get_model_info():
try:
model_info_dict = requests.get(
'https://openrouter.ai/api/v1/models',
headers={'accept': 'application/json'}
).json()["data"]
# Save the model info to a JSON file
with open('model_info.json', 'w') as json_file:
json.dump(model_info_dict, json_file, indent=4)
except Exception as e:
logger.error(f"Failed to fetch model info: {e}. Loading from file.")
if os.path.exists('model_info.json'):
with open('model_info.json', 'r') as json_file:
model_info_dict = json.load(json_file)
model_info = pd.DataFrame(model_info_dict)
return model_info
else:
logger.error("No model info file found")
return None
model_info = pd.DataFrame(model_info_dict)
return model_info
class AIClient:
def __init__(self):
self.client = AsyncOpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=os.environ['OPENROUTER_API_KEY']
)
self.observability_manager = LLMObservabilityManager()
self.model_info = get_model_info()
#@log_execution
async def generate_response(
self,
messages: List[Dict[str, str]],
model: str = "openai/gpt-4o-mini",
max_tokens: int = 32000,
conversation_id: Optional[str] = None,
user: str = "anonymous"
) -> AsyncGenerator[str, None]:
if not messages:
return
start_time = time.time()
full_response = ""
usage = {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0}
status = "success"
try:
response = await self.client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_tokens,
stream=True,
stream_options={"include_usage": True}
)
end_time = time.time()
latency = end_time - start_time
async for chunk in response:
if chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
full_response += chunk.choices[0].delta.content
if chunk.usage:
model = chunk.model
usage["completion_tokens"] = chunk.usage.completion_tokens
usage["prompt_tokens"] = chunk.usage.prompt_tokens
usage["total_tokens"] = chunk.usage.total_tokens
print(usage)
print(model)
except Exception as e:
status = "error"
full_response = str(e)
latency = time.time() - start_time
print(f"Error in generate_response: {e}")
finally:
# Log the observation
try:
pricing_data = self.model_info[self.model_info.id == model]["pricing"].values[0]
cost = float(pricing_data["completion"]) * float(usage["completion_tokens"]) + float(pricing_data["prompt"]) * float(usage["prompt_tokens"])
self.observability_manager.insert_observation(
response=full_response,
model=model,
completion_tokens=usage["completion_tokens"],
prompt_tokens=usage["prompt_tokens"],
total_tokens=usage["total_tokens"],
cost=cost,
conversation_id=conversation_id or "default",
status=status,
request=json.dumps([msg for msg in messages if msg.get('role') != 'system']),
latency=latency,
user=user
)
except Exception as obs_error:
print(f"Error logging observation: {obs_error}")
class DatabaseManager:
"""Manages database operations."""
def __init__(self):
self.db_params = {
"dbname": "postgres",
"user": os.environ['SUPABASE_USER'],
"password": os.environ['SUPABASE_PASSWORD'],
"host": "aws-0-us-west-1.pooler.supabase.com",
"port": "5432"
}
@log_execution
def update_database(self, user_id: str, user_query: str, response: str) -> None:
with psycopg2.connect(**self.db_params) as conn:
with conn.cursor() as cur:
insert_query = """
INSERT INTO ai_document_generator (user_id, user_query, response)
VALUES (%s, %s, %s);
"""
cur.execute(insert_query, (user_id, user_query, response)) |