general_chat

Running

App Files Files Community

pvanand commited on Oct 13, 2024

Commit

c2dccd1

verified ·

1 Parent(s): 902db61

Update document_generator.py

Browse files

Files changed (1) hide show

document_generator.py +216 -23

document_generator.py CHANGED Viewed

@@ -81,6 +81,156 @@ FORMAT YOUR OUTPUT AS A TEMPLATE ENCLOSED IN <response></response> tags
 DOCUMENT_TEMPLATE_SECTION_PROMPT_USER = """<prompt>Output the content for the section "{section_or_subsection_title}" formatted as markdown. Follow this instruction: {content_instruction}</prompt>"""
 # File: app.py
 import os
@@ -138,30 +288,17 @@ class DatabaseManager:
                 """
                 cur.execute(insert_query, (user_id, user_query, response))
-class AIClient:
-    def __init__(self):
-        self.client = OpenAI(
-            base_url="https://openrouter.ai/api/v1",
-            api_key="sk-or-v1-"+os.environ['OPENROUTER_API_KEY']
-        )
-    @log_execution
-    def generate_response(
-        self,
-        messages: List[Dict[str, str]],
-        model: str = "openai/gpt-4o-mini",
-        max_tokens: int = 32000
-    ) -> Optional[str]:
-        if not messages:
-            return None
-        response = self.client.chat.completions.create(
-            model=model,
-            messages=messages,
-            max_tokens=max_tokens,
-            stream=False
-        )
-        return response.choices[0].message.content
 class DocumentGenerator:
     def __init__(self, ai_client: AIClient):
         self.ai_client = ai_client
@@ -395,6 +532,62 @@ async def generate_markdown_document_stream_endpoint(request: MarkdownDocumentRe
     return StreamingResponse(stream_generator(), media_type="application/octet-stream")
 ###########################################
 class MarkdownDocumentResponse(BaseModel):
     markdown_document: str

 DOCUMENT_TEMPLATE_SECTION_PROMPT_USER = """<prompt>Output the content for the section "{section_or_subsection_title}" formatted as markdown. Follow this instruction: {content_instruction}</prompt>"""
+# File: llm_observability.py
+import sqlite3
+import json
+from datetime import datetime
+from typing import Dict, Any, List, Optional
+class LLMObservabilityManager:
+    def __init__(self, db_path: str = "llm_observability.db"):
+        self.db_path = db_path
+        self.create_table()
+    def create_table(self):
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                CREATE TABLE IF NOT EXISTS llm_observations (
+                    id TEXT PRIMARY KEY,
+                    conversation_id TEXT,
+                    created_at DATETIME,
+                    status TEXT,
+                    request TEXT,
+                    response TEXT,
+                    model TEXT,
+                    total_tokens INTEGER,
+                    prompt_tokens INTEGER,
+                    completion_tokens INTEGER,
+                    latency FLOAT,
+                    user TEXT
+                )
+            ''')
+    def insert_observation(self, response: Dict[str, Any], conversation_id: str, status: str, request: str, latency: float, user: str):
+        created_at = datetime.fromtimestamp(response['created'])
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                INSERT INTO llm_observations
+                (id, conversation_id, created_at, status, request, response, model, total_tokens, prompt_tokens, completion_tokens, latency, user)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+            ''', (
+                response['id'],
+                conversation_id,
+                created_at,
+                status,
+                request,
+                json.dumps(response['choices'][0]['message']),
+                response['model'],
+                response['usage']['total_tokens'],
+                response['usage']['prompt_tokens'],
+                response['usage']['completion_tokens'],
+                latency,
+                user
+            ))
+    def get_observations(self, conversation_id: Optional[str] = None) -> List[Dict[str, Any]]:
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.cursor()
+            if conversation_id:
+                cursor.execute('SELECT * FROM llm_observations WHERE conversation_id = ? ORDER BY created_at', (conversation_id,))
+            else:
+                cursor.execute('SELECT * FROM llm_observations ORDER BY created_at')
+            rows = cursor.fetchall()
+            column_names = [description[0] for description in cursor.description]
+            return [dict(zip(column_names, row)) for row in rows]
+    def get_all_observations(self) -> List[Dict[str, Any]]:
+        return self.get_observations()
+# aiclient.py
+class AIClient:
+    def __init__(self):
+        self.client = OpenAI(
+            base_url="https://openrouter.ai/api/v1",
+            api_key="sk-or-v1-" + os.environ['OPENROUTER_API_KEY']
+        )
+        self.observability_manager = LLMObservabilityManager()
+    @log_execution
+    def generate_response(
+        self,
+        messages: List[Dict[str, str]],
+        model: str = "openai/gpt-4o-mini",
+        max_tokens: int = 32000,
+        conversation_id: str = None,
+        user: str = "anonymous"
+    ) -> Optional[str]:
+        if not messages:
+            return None
+        start_time = time.time()
+        response = self.client.chat.completions.create(
+            model=model,
+            messages=messages,
+            max_tokens=max_tokens,
+            stream=False
+        )
+        end_time = time.time()
+        latency = end_time - start_time
+        # Log the observation
+        self.observability_manager.insert_observation(
+            response=response.dict(),
+            conversation_id=conversation_id or "default",
+            status="success",
+            request=json.dumps(messages),
+            latency=latency,
+            user=user
+        )
+        return response.choices[0].message.content
+    @log_execution
+    def generate_vision_response(
+        self,
+        messages: List[Dict[str, Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]]],
+        model: str = "google/gemini-flash-1.5-8b",
+        max_tokens: int = 32000,
+        conversation_id: str = None,
+        user: str = "anonymous"
+    ) -> Optional[str]:
+        if not messages:
+            return None
+        start_time = time.time()
+        response = self.client.chat.completions.create(
+            model=model,
+            messages=messages,
+            max_tokens=max_tokens,
+            stream=False
+        )
+        end_time = time.time()
+        latency = end_time - start_time
+        # Log the observation
+        self.observability_manager.insert_observation(
+            response=response.dict(),
+            conversation_id=conversation_id or "default",
+            status="success",
+            request=json.dumps(messages),
+            latency=latency,
+            user=user
+        )
+        return response.choices[0].message.content
 # File: app.py
 import os
                 """
                 cur.execute(insert_query, (user_id, user_query, response))
+def log_execution(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        print(f"{func.__name__} executed in {end_time - start_time:.2f} seconds")
+        return result
+    return wrapper
 class DocumentGenerator:
     def __init__(self, ai_client: AIClient):
         self.ai_client = ai_client
     return StreamingResponse(stream_generator(), media_type="application/octet-stream")
+## OBSERVABILITY
+from uuid import uuid4
+import csv
+from io import StringIO
+def create_csv_response(observations: List[Dict]) -> StreamingResponse:
+    def iter_csv(data):
+        output = StringIO()
+        writer = csv.DictWriter(output, fieldnames=data[0].keys() if data else [])
+        writer.writeheader()
+        for row in data:
+            writer.writerow(row)
+        output.seek(0)
+        yield output.read()
+    headers = {
+        'Content-Disposition': 'attachment; filename="observations.csv"'
+    }
+    return StreamingResponse(iter_csv(observations), media_type="text/csv", headers=headers)
+@router.get("/last-observations/{limit}")
+async def get_last_observations(limit: int = 10, format: str = "json"):
+    observability_manager = LLMObservabilityManager()
+    try:
+        # Get all observations, sorted by created_at in descending order
+        all_observations = observability_manager.get_observations()
+        all_observations.sort(key=lambda x: x['created_at'], reverse=True)
+        # Get the last conversation_id
+        if all_observations:
+            last_conversation_id = all_observations[0]['conversation_id']
+            # Filter observations for the last conversation
+            last_conversation_observations = [
+                obs for obs in all_observations
+                if obs['conversation_id'] == last_conversation_id
+            ][:limit]
+            if format.lower() == "csv":
+                return create_csv_response(last_conversation_observations)
+            else:
+                return ObservationResponse(observations=last_conversation_observations)
+        else:
+            if format.lower() == "csv":
+                return create_csv_response([])
+            else:
+                return ObservationResponse(observations=[])
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to retrieve observations: {str(e)}")
 ###########################################
 class MarkdownDocumentResponse(BaseModel):
     markdown_document: str