pvanand commited on
Commit
c2dccd1
1 Parent(s): 902db61

Update document_generator.py

Browse files
Files changed (1) hide show
  1. document_generator.py +216 -23
document_generator.py CHANGED
@@ -81,6 +81,156 @@ FORMAT YOUR OUTPUT AS A TEMPLATE ENCLOSED IN <response></response> tags
81
  DOCUMENT_TEMPLATE_SECTION_PROMPT_USER = """<prompt>Output the content for the section "{section_or_subsection_title}" formatted as markdown. Follow this instruction: {content_instruction}</prompt>"""
82
 
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  # File: app.py
86
  import os
@@ -138,30 +288,17 @@ class DatabaseManager:
138
  """
139
  cur.execute(insert_query, (user_id, user_query, response))
140
 
141
- class AIClient:
142
- def __init__(self):
143
- self.client = OpenAI(
144
- base_url="https://openrouter.ai/api/v1",
145
- api_key="sk-or-v1-"+os.environ['OPENROUTER_API_KEY']
146
- )
147
-
148
- @log_execution
149
- def generate_response(
150
- self,
151
- messages: List[Dict[str, str]],
152
- model: str = "openai/gpt-4o-mini",
153
- max_tokens: int = 32000
154
- ) -> Optional[str]:
155
- if not messages:
156
- return None
157
- response = self.client.chat.completions.create(
158
- model=model,
159
- messages=messages,
160
- max_tokens=max_tokens,
161
- stream=False
162
- )
163
- return response.choices[0].message.content
164
 
 
165
  class DocumentGenerator:
166
  def __init__(self, ai_client: AIClient):
167
  self.ai_client = ai_client
@@ -395,6 +532,62 @@ async def generate_markdown_document_stream_endpoint(request: MarkdownDocumentRe
395
 
396
  return StreamingResponse(stream_generator(), media_type="application/octet-stream")
397
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
  ###########################################
399
  class MarkdownDocumentResponse(BaseModel):
400
  markdown_document: str
 
81
  DOCUMENT_TEMPLATE_SECTION_PROMPT_USER = """<prompt>Output the content for the section "{section_or_subsection_title}" formatted as markdown. Follow this instruction: {content_instruction}</prompt>"""
82
 
83
 
84
+ # File: llm_observability.py
85
+
86
+ import sqlite3
87
+ import json
88
+ from datetime import datetime
89
+ from typing import Dict, Any, List, Optional
90
+
91
+ class LLMObservabilityManager:
92
+ def __init__(self, db_path: str = "llm_observability.db"):
93
+ self.db_path = db_path
94
+ self.create_table()
95
+
96
+ def create_table(self):
97
+ with sqlite3.connect(self.db_path) as conn:
98
+ cursor = conn.cursor()
99
+ cursor.execute('''
100
+ CREATE TABLE IF NOT EXISTS llm_observations (
101
+ id TEXT PRIMARY KEY,
102
+ conversation_id TEXT,
103
+ created_at DATETIME,
104
+ status TEXT,
105
+ request TEXT,
106
+ response TEXT,
107
+ model TEXT,
108
+ total_tokens INTEGER,
109
+ prompt_tokens INTEGER,
110
+ completion_tokens INTEGER,
111
+ latency FLOAT,
112
+ user TEXT
113
+ )
114
+ ''')
115
+
116
+ def insert_observation(self, response: Dict[str, Any], conversation_id: str, status: str, request: str, latency: float, user: str):
117
+ created_at = datetime.fromtimestamp(response['created'])
118
+
119
+ with sqlite3.connect(self.db_path) as conn:
120
+ cursor = conn.cursor()
121
+ cursor.execute('''
122
+ INSERT INTO llm_observations
123
+ (id, conversation_id, created_at, status, request, response, model, total_tokens, prompt_tokens, completion_tokens, latency, user)
124
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
125
+ ''', (
126
+ response['id'],
127
+ conversation_id,
128
+ created_at,
129
+ status,
130
+ request,
131
+ json.dumps(response['choices'][0]['message']),
132
+ response['model'],
133
+ response['usage']['total_tokens'],
134
+ response['usage']['prompt_tokens'],
135
+ response['usage']['completion_tokens'],
136
+ latency,
137
+ user
138
+ ))
139
+
140
+ def get_observations(self, conversation_id: Optional[str] = None) -> List[Dict[str, Any]]:
141
+ with sqlite3.connect(self.db_path) as conn:
142
+ cursor = conn.cursor()
143
+ if conversation_id:
144
+ cursor.execute('SELECT * FROM llm_observations WHERE conversation_id = ? ORDER BY created_at', (conversation_id,))
145
+ else:
146
+ cursor.execute('SELECT * FROM llm_observations ORDER BY created_at')
147
+ rows = cursor.fetchall()
148
+
149
+ column_names = [description[0] for description in cursor.description]
150
+ return [dict(zip(column_names, row)) for row in rows]
151
+
152
+ def get_all_observations(self) -> List[Dict[str, Any]]:
153
+ return self.get_observations()
154
+
155
+
156
+ # aiclient.py
157
+
158
+ class AIClient:
159
+ def __init__(self):
160
+ self.client = OpenAI(
161
+ base_url="https://openrouter.ai/api/v1",
162
+ api_key="sk-or-v1-" + os.environ['OPENROUTER_API_KEY']
163
+ )
164
+ self.observability_manager = LLMObservabilityManager()
165
+
166
+ @log_execution
167
+ def generate_response(
168
+ self,
169
+ messages: List[Dict[str, str]],
170
+ model: str = "openai/gpt-4o-mini",
171
+ max_tokens: int = 32000,
172
+ conversation_id: str = None,
173
+ user: str = "anonymous"
174
+ ) -> Optional[str]:
175
+ if not messages:
176
+ return None
177
+
178
+ start_time = time.time()
179
+ response = self.client.chat.completions.create(
180
+ model=model,
181
+ messages=messages,
182
+ max_tokens=max_tokens,
183
+ stream=False
184
+ )
185
+ end_time = time.time()
186
+ latency = end_time - start_time
187
+
188
+ # Log the observation
189
+ self.observability_manager.insert_observation(
190
+ response=response.dict(),
191
+ conversation_id=conversation_id or "default",
192
+ status="success",
193
+ request=json.dumps(messages),
194
+ latency=latency,
195
+ user=user
196
+ )
197
+
198
+ return response.choices[0].message.content
199
+
200
+ @log_execution
201
+ def generate_vision_response(
202
+ self,
203
+ messages: List[Dict[str, Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]]],
204
+ model: str = "google/gemini-flash-1.5-8b",
205
+ max_tokens: int = 32000,
206
+ conversation_id: str = None,
207
+ user: str = "anonymous"
208
+ ) -> Optional[str]:
209
+ if not messages:
210
+ return None
211
+
212
+ start_time = time.time()
213
+ response = self.client.chat.completions.create(
214
+ model=model,
215
+ messages=messages,
216
+ max_tokens=max_tokens,
217
+ stream=False
218
+ )
219
+ end_time = time.time()
220
+ latency = end_time - start_time
221
+
222
+ # Log the observation
223
+ self.observability_manager.insert_observation(
224
+ response=response.dict(),
225
+ conversation_id=conversation_id or "default",
226
+ status="success",
227
+ request=json.dumps(messages),
228
+ latency=latency,
229
+ user=user
230
+ )
231
+
232
+ return response.choices[0].message.content
233
+
234
 
235
  # File: app.py
236
  import os
 
288
  """
289
  cur.execute(insert_query, (user_id, user_query, response))
290
 
291
+ def log_execution(func):
292
+ @wraps(func)
293
+ def wrapper(*args, **kwargs):
294
+ start_time = time.time()
295
+ result = func(*args, **kwargs)
296
+ end_time = time.time()
297
+ print(f"{func.__name__} executed in {end_time - start_time:.2f} seconds")
298
+ return result
299
+ return wrapper
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
+
302
  class DocumentGenerator:
303
  def __init__(self, ai_client: AIClient):
304
  self.ai_client = ai_client
 
532
 
533
  return StreamingResponse(stream_generator(), media_type="application/octet-stream")
534
 
535
+
536
+ ## OBSERVABILITY
537
+ from uuid import uuid4
538
+ import csv
539
+ from io import StringIO
540
+
541
+ def create_csv_response(observations: List[Dict]) -> StreamingResponse:
542
+ def iter_csv(data):
543
+ output = StringIO()
544
+ writer = csv.DictWriter(output, fieldnames=data[0].keys() if data else [])
545
+ writer.writeheader()
546
+ for row in data:
547
+ writer.writerow(row)
548
+ output.seek(0)
549
+ yield output.read()
550
+
551
+ headers = {
552
+ 'Content-Disposition': 'attachment; filename="observations.csv"'
553
+ }
554
+ return StreamingResponse(iter_csv(observations), media_type="text/csv", headers=headers)
555
+
556
+
557
+ @router.get("/last-observations/{limit}")
558
+ async def get_last_observations(limit: int = 10, format: str = "json"):
559
+ observability_manager = LLMObservabilityManager()
560
+
561
+ try:
562
+ # Get all observations, sorted by created_at in descending order
563
+ all_observations = observability_manager.get_observations()
564
+ all_observations.sort(key=lambda x: x['created_at'], reverse=True)
565
+
566
+ # Get the last conversation_id
567
+ if all_observations:
568
+ last_conversation_id = all_observations[0]['conversation_id']
569
+
570
+ # Filter observations for the last conversation
571
+ last_conversation_observations = [
572
+ obs for obs in all_observations
573
+ if obs['conversation_id'] == last_conversation_id
574
+ ][:limit]
575
+
576
+ if format.lower() == "csv":
577
+ return create_csv_response(last_conversation_observations)
578
+ else:
579
+ return ObservationResponse(observations=last_conversation_observations)
580
+ else:
581
+ if format.lower() == "csv":
582
+ return create_csv_response([])
583
+ else:
584
+ return ObservationResponse(observations=[])
585
+ except Exception as e:
586
+ raise HTTPException(status_code=500, detail=f"Failed to retrieve observations: {str(e)}")
587
+
588
+
589
+
590
+
591
  ###########################################
592
  class MarkdownDocumentResponse(BaseModel):
593
  markdown_document: str