pvanand commited on
Commit
5645c36
·
verified ·
1 Parent(s): 96594d9

update agent

Browse files
Files changed (1) hide show
  1. main.py +119 -397
main.py CHANGED
@@ -1,23 +1,21 @@
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI, HTTPException, Depends, Security, BackgroundTasks
2
  from fastapi.security import APIKeyHeader
3
  from fastapi.responses import StreamingResponse
4
  from pydantic import BaseModel, Field
5
- from typing import Literal, List, Dict
6
- import os
7
- from functools import lru_cache
8
  from openai import OpenAI
9
- from uuid import uuid4
10
- import tiktoken
11
- import sqlite3
12
- import time
13
- from datetime import datetime, timedelta
14
- import asyncio
15
- import requests
16
- from prompts import CODING_ASSISTANT_PROMPT, NEWS_ASSISTANT_PROMPT, generate_news_prompt, SEARCH_ASSISTANT_PROMPT, generate_search_prompt
17
- from fastapi_cache import FastAPICache
18
- from fastapi_cache.backends.inmemory import InMemoryBackend
19
- from fastapi_cache.decorator import cache
20
- import logging
21
 
22
  # Configure logging
23
  logging.basicConfig(
@@ -30,12 +28,15 @@ logging.basicConfig(
30
  )
31
  logger = logging.getLogger(__name__)
32
 
 
33
  app = FastAPI()
34
 
 
35
  API_KEY_NAME = "X-API-Key"
36
  API_KEY = os.environ.get("CHAT_AUTH_KEY", "default_secret_key")
37
  api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
38
 
 
39
  ModelID = Literal[
40
  "openai/gpt-4o-mini",
41
  "meta-llama/llama-3-70b-instruct",
@@ -47,45 +48,34 @@ ModelID = Literal[
47
  "google/gemma-2-27b-it"
48
  ]
49
 
50
- class QueryModel(BaseModel):
51
- user_query: str = Field(..., description="User's coding query")
 
 
52
  model_id: ModelID = Field(
53
- default="meta-llama/llama-3-70b-instruct",
54
  description="ID of the model to use for response generation"
55
  )
56
- conversation_id: str = Field(default_factory=lambda: str(uuid4()), description="Unique identifier for the conversation")
57
  user_id: str = Field(..., description="Unique identifier for the user")
58
 
59
  class Config:
60
  schema_extra = {
61
  "example": {
62
- "user_query": "How do I implement a binary search in Python?",
 
63
  "model_id": "meta-llama/llama-3-70b-instruct",
64
  "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
65
  "user_id": "user123"
66
  }
67
  }
68
 
69
- class NewsQueryModel(BaseModel):
70
- query: str = Field(..., description="News topic to search for")
71
- model_id: ModelID = Field(
72
- default="openai/gpt-4o-mini",
73
- description="ID of the model to use for response generation"
74
- )
75
- class Config:
76
- schema_extra = {
77
- "example": {
78
- "query": "Latest developments in AI",
79
- "model_id": "openai/gpt-4o-mini"
80
- }
81
- }
82
-
83
  @lru_cache()
84
  def get_api_keys():
85
  logger.info("Loading API keys")
86
  return {
87
  "OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}",
88
- "BRAVE_API_KEY": os.environ['BRAVE_API_KEY']
89
  }
90
 
91
  api_keys = get_api_keys()
@@ -98,53 +88,10 @@ last_activity: Dict[str, float] = {}
98
  # Token encoding
99
  encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
100
 
101
- def limit_tokens(input_string, token_limit=6000):
102
- return encoding.decode(encoding.encode(input_string)[:token_limit])
103
-
104
- def calculate_tokens(msgs):
105
- return sum(len(encoding.encode(str(m))) for m in msgs)
106
-
107
- def chat_with_llama_stream(messages, model="openai/gpt-4o-mini", max_llm_history=4, max_output_tokens=2500):
108
- logger.info(f"Starting chat with model: {model}")
109
- while calculate_tokens(messages) > (8000 - max_output_tokens):
110
- if len(messages) > max_llm_history:
111
- messages = [messages[0]] + messages[-max_llm_history:]
112
- else:
113
- max_llm_history -= 1
114
- if max_llm_history < 2:
115
- error_message = "Token limit exceeded. Please shorten your input or start a new conversation."
116
- logger.error(error_message)
117
- raise HTTPException(status_code=400, detail=error_message)
118
-
119
- try:
120
- response = or_client.chat.completions.create(
121
- model=model,
122
- messages=messages,
123
- max_tokens=max_output_tokens,
124
- stream=True
125
- )
126
-
127
- full_response = ""
128
- for chunk in response:
129
- if chunk.choices[0].delta.content is not None:
130
- content = chunk.choices[0].delta.content
131
- full_response += content
132
- yield content
133
-
134
- # After streaming, add the full response to the conversation history
135
- messages.append({"role": "assistant", "content": full_response})
136
- logger.info("Chat completed successfully")
137
- except Exception as e:
138
- logger.error(f"Error in model response: {str(e)}")
139
- raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
140
-
141
- async def verify_api_key(api_key: str = Security(api_key_header)):
142
- if api_key != API_KEY:
143
- logger.warning("Invalid API key used")
144
- raise HTTPException(status_code=403, detail="Could not validate credentials")
145
- return api_key
146
 
147
- # SQLite setup
148
  DB_PATH = '/app/data/conversations.db'
149
 
150
  def init_db():
@@ -163,8 +110,6 @@ def init_db():
163
  conn.close()
164
  logger.info("Database initialized successfully")
165
 
166
- init_db()
167
-
168
  def update_db(user_id, conversation_id, message, response):
169
  logger.info(f"Updating database for conversation: {conversation_id}")
170
  conn = sqlite3.connect(DB_PATH)
@@ -175,6 +120,65 @@ def update_db(user_id, conversation_id, message, response):
175
  conn.close()
176
  logger.info("Database updated successfully")
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  async def clear_inactive_conversations():
179
  while True:
180
  logger.info("Clearing inactive conversations")
@@ -189,320 +193,45 @@ async def clear_inactive_conversations():
189
  logger.info(f"Cleared {len(inactive_convos)} inactive conversations")
190
  await asyncio.sleep(60) # Check every minute
191
 
 
 
 
 
192
  @app.on_event("startup")
193
  async def startup_event():
194
  logger.info("Starting up the application")
195
- FastAPICache.init(InMemoryBackend(), prefix="fastapi-cache")
196
  asyncio.create_task(clear_inactive_conversations())
197
 
198
- @app.post("/coding-assistant")
199
- async def coding_assistant(query: QueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
200
- """
201
- Coding assistant endpoint that provides programming help based on user queries.
202
- Available models:
203
- - meta-llama/llama-3-70b-instruct (default)
204
- - anthropic/claude-3.5-sonnet
205
- - deepseek/deepseek-coder
206
- - anthropic/claude-3-haiku
207
- - openai/gpt-3.5-turbo-instruct
208
- - qwen/qwen-72b-chat
209
- - google/gemma-2-27b-it
210
- - openai/gpt-4o-mini
211
- Requires API Key authentication via X-API-Key header.
212
- """
213
- logger.info(f"Received coding assistant query: {query.user_query}")
214
- if query.conversation_id not in conversations:
215
- conversations[query.conversation_id] = [
216
- {"role": "system", "content": "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code."}
217
- ]
218
-
219
- conversations[query.conversation_id].append({"role": "user", "content": query.user_query})
220
- last_activity[query.conversation_id] = time.time()
221
-
222
- # Limit tokens in the conversation history
223
- limited_conversation = conversations[query.conversation_id]
224
-
225
- def process_response():
226
- full_response = ""
227
- for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
228
- full_response += content
229
- yield content
230
- background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.user_query, full_response)
231
- logger.info(f"Completed coding assistant response for query: {query.user_query}")
232
-
233
- return StreamingResponse(process_response(), media_type="text/event-stream")
234
-
235
- # New functions for news assistant
236
-
237
- def internet_search(query, type = "web", num_results=20):
238
- logger.info(f"Performing internet search for query: {query}, type: {type}")
239
- if type == "web":
240
- url = "https://api.search.brave.com/res/v1/web/search"
241
- else:
242
- url = "https://api.search.brave.com/res/v1/news/search"
243
-
244
- headers = {
245
- "Accept": "application/json",
246
- "Accept-Encoding": "gzip",
247
- "X-Subscription-Token": api_keys["BRAVE_API_KEY"]
248
- }
249
- params = {"q": query}
250
-
251
- response = requests.get(url, headers=headers, params=params)
252
-
253
- if response.status_code != 200:
254
- logger.error(f"Failed to fetch search results. Status code: {response.status_code}")
255
- return []
256
-
257
- if type == "web":
258
- search_data = response.json()["web"]["results"]
259
- else:
260
- search_data = response.json()["results"]
261
- processed_results = []
262
-
263
- for item in search_data:
264
- if not item.get("extra_snippets"):
265
- continue
266
-
267
- result = {
268
- "title": item["title"],
269
- "snippet": item["extra_snippets"][0],
270
- "last_updated": item.get("age", "")
271
- }
272
- processed_results.append(result)
273
-
274
- logger.info(f"Retrieved {len(processed_results)} search results")
275
- return processed_results[:num_results]
276
-
277
- @lru_cache(maxsize=100)
278
- def cached_internet_search(query: str):
279
- logger.info(f"Performing cached internet search for query: {query}")
280
- return internet_search(query, type = "news")
281
-
282
-
283
- def analyze_news(query):
284
- logger.info(f"Analyzing news for query: {query}")
285
- news_data = cached_internet_search(query)
286
-
287
- if not news_data:
288
- logger.error("Failed to fetch news data")
289
- return "Failed to fetch news data.", []
290
-
291
- # Prepare the prompt for the AI
292
- # Use the imported function to generate the prompt (now includes today's date)
293
- prompt = generate_news_prompt(query, news_data)
294
-
295
- messages = [
296
- {"role": "system", "content": NEWS_ASSISTANT_PROMPT},
297
- {"role": "user", "content": prompt}
298
- ]
299
-
300
- logger.info("News analysis completed")
301
- return messages
302
-
303
- @app.post("/news-assistant")
304
- async def news_assistant(query: NewsQueryModel, api_key: str = Depends(verify_api_key)):
305
  """
306
- News assistant endpoint that provides summaries and analysis of recent news based on user queries.
 
307
  Requires API Key authentication via X-API-Key header.
308
  """
309
- logger.info(f"Received news assistant query: {query.query}")
310
- messages = analyze_news(query.query)
311
-
312
- if not messages:
313
- logger.error("Failed to fetch news data")
314
- raise HTTPException(status_code=500, detail="Failed to fetch news data")
315
-
316
- def process_response():
317
- for content in chat_with_llama_stream(messages, model=query.model_id):
318
- yield content
319
- logger.info(f"Completed news assistant response for query: {query.query}")
320
 
321
- return StreamingResponse(process_response(), media_type="text/event-stream")
322
-
323
- class SearchQueryModel(BaseModel):
324
- query: str = Field(..., description="Search query")
325
- model_id: ModelID = Field(
326
- default="openai/gpt-4o-mini",
327
- description="ID of the model to use for response generation"
328
- )
329
- class Config:
330
- schema_extra = {
331
- "example": {
332
- "query": "What are the latest advancements in quantum computing?",
333
- "model_id": "meta-llama/llama-3-70b-instruct"
334
- }
335
- }
336
-
337
- def analyze_search_results(query):
338
- logger.info(f"Analyzing search results for query: {query}")
339
- search_data = internet_search(query, type="web")
340
-
341
- if not search_data:
342
- logger.error("Failed to fetch search data")
343
- return "Failed to fetch search data.", []
344
-
345
- # Prepare the prompt for the AI
346
- prompt = generate_search_prompt(query, search_data)
347
-
348
- messages = [
349
- {"role": "system", "content": SEARCH_ASSISTANT_PROMPT},
350
- {"role": "user", "content": prompt}
351
- ]
352
-
353
- logger.info("Search results analysis completed")
354
- return messages
355
-
356
- @app.post("/search-assistant")
357
- async def search_assistant(query: SearchQueryModel, api_key: str = Depends(verify_api_key)):
358
- """
359
- Search assistant endpoint that provides summaries and analysis of web search results based on user queries.
360
- Requires API Key authentication via X-API-Key header.
361
- """
362
- logger.info(f"Received search assistant query: {query.query}")
363
- messages = analyze_search_results(query.query)
364
-
365
- if not messages:
366
- logger.error("Failed to fetch search data")
367
- raise HTTPException(status_code=500, detail="Failed to fetch search data")
368
-
369
- def process_response():
370
- logger.info(f"Generating response using LLM: {messages}")
371
- full_response = ""
372
- for content in chat_with_llama_stream(messages, model=query.model_id):
373
- full_response+=content
374
- yield content
375
- logger.info(f"Completed search assistant response for query: {query.query}")
376
- logger.info(f"LLM Response: {full_response}")
377
-
378
- return StreamingResponse(process_response(), media_type="text/event-stream")
379
-
380
-
381
- from pydantic import BaseModel, Field
382
- import yaml
383
- import json
384
- from yaml.loader import SafeLoader
385
-
386
- class FollowupQueryModel(BaseModel):
387
- query: str = Field(..., description="User's query for the followup agent")
388
- model_id: ModelID = Field(
389
- default="openai/gpt-4o-mini",
390
- description="ID of the model to use for response generation"
391
- )
392
- conversation_id: str = Field(default_factory=lambda: str(uuid4()), description="Unique identifier for the conversation")
393
- user_id: str = Field(..., description="Unique identifier for the user")
394
-
395
- class Config:
396
- schema_extra = {
397
- "example": {
398
- "query": "How can I improve my productivity?",
399
- "model_id": "openai/gpt-4o-mini",
400
- "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
401
- "user_id": "user123"
402
- }
403
- }
404
-
405
- FOLLOWUP_AGENT_PROMPT = """
406
- You are a helpful,engaging assistant with the following skills, use them, as necessory. Use <response> tag to provide responses well formatted using markdown format.
407
-
408
- If the user request needs further clarification, asnwer to your best of ability in a <response>, further analyze the user request and generate clarifying questions with options <clarification>. Else respond with a helpful answer. After providing response, you can also ask followup questions using the same <clarification> tags in yaml format
409
-
410
- <response>response to user request in markdown</response>
411
- <clarification>
412
- questions:
413
- - text: [First clarifying question]
414
- options:
415
- - [Option 1]
416
- - [Option 2]
417
- - [Option 3]
418
- - [Option 4 (if needed)]
419
- - text: [Second clarifying question]
420
- options:
421
- - [Option 1]
422
- - [Option 2]
423
- - [Option 3]
424
- # Add more questions as needed
425
- # make sure this section is in valid YAML format
426
- </clarification>
427
-
428
- """
429
-
430
- import re
431
-
432
- def parse_followup_response(input_text):
433
- # Define patterns for response and clarification
434
- response_pattern = re.compile(r'<response>(.*?)<\/response>', re.DOTALL)
435
- clarification_pattern = re.compile(r'<clarification>(.*?)<\/clarification>', re.DOTALL)
436
-
437
- # Find all matches for response and clarification
438
- response_matches = response_pattern.finditer(input_text)
439
- clarification_matches = clarification_pattern.finditer(input_text)
440
-
441
- # Initialize variables to keep track of the position
442
- last_end = 0
443
- combined_response = ""
444
- parsed_clarifications = []
445
-
446
- # Combine responses and capture everything in between
447
- for response_match in response_matches:
448
- # Capture text before the current response tag
449
- combined_response += input_text[last_end:response_match.start()].strip() + "\n"
450
- # Add the response content
451
- combined_response += response_match.group(1).strip() + "\n"
452
- # Update the last end position
453
- last_end = response_match.end()
454
-
455
- # Check for clarifications and parse them
456
- for clarification_match in clarification_matches:
457
- # Capture text before the current clarification tag
458
- combined_response += input_text[last_end:clarification_match.start()].strip() + "\n"
459
- # Process the clarification block
460
- clarification_text = clarification_match.group(1).strip()
461
- if clarification_text:
462
- # Split by "text:" to separate each question block
463
- question_blocks = clarification_text.split("- text:")
464
-
465
- # Loop through each block and extract the question and its options
466
- for block in question_blocks[1:]:
467
- # Extract the question using regex (up to the "options:" part)
468
- question_match = re.search(r'^(.*?)\s*options:', block, re.DOTALL)
469
- if question_match:
470
- question = question_match.group(1).strip()
471
-
472
- # Extract the options using regex
473
- options_match = re.search(r'options:\s*(.*?)$', block, re.DOTALL)
474
- if options_match:
475
- options = [option.strip() for option in options_match.group(1).split('-') if option.strip()]
476
-
477
- # Add the parsed question and options to the list
478
- parsed_clarifications.append({'question': question, 'options': options})
479
- # Update the last end position
480
- last_end = clarification_match.end()
481
-
482
- # Capture any remaining text after the last tag
483
- combined_response += input_text[last_end:].strip()
484
-
485
- return combined_response.strip(), parsed_clarifications
486
-
487
-
488
- @app.post("/followup-agent")
489
- async def followup_agent(query: FollowupQueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
490
- """
491
- Followup agent endpoint that provides helpful responses or generates clarifying questions based on user queries.
492
- Requires API Key authentication via X-API-Key header.
493
- """
494
- logger.info(f"Received followup agent query: {query.query}")
495
 
 
496
  if query.conversation_id not in conversations:
 
497
  conversations[query.conversation_id] = [
498
- {"role": "system", "content": FOLLOWUP_AGENT_PROMPT}
499
  ]
500
-
501
- conversations[query.conversation_id].append({"role": "user", "content": query.query})
 
 
 
 
502
  last_activity[query.conversation_id] = time.time()
503
-
504
  # Limit tokens in the conversation history
505
- limited_conversation = conversations[query.conversation_id]
506
 
507
  def process_response():
508
  full_response = ""
@@ -510,25 +239,18 @@ async def followup_agent(query: FollowupQueryModel, background_tasks: Background
510
  full_response += content
511
  yield content
512
 
513
- logger.info(f"LLM RAW response for query: {query.query}: {full_response}")
514
- response_content, clarification = parse_followup_response(full_response)
515
-
516
- result = {
517
- "response": response_content,
518
- "clarification": clarification
519
- }
520
-
521
- yield "\n\n" + json.dumps(result)
522
-
523
  # Add the assistant's response to the conversation history
524
  conversations[query.conversation_id].append({"role": "assistant", "content": full_response})
525
-
526
- background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.query, full_response)
527
- logger.info(f"Completed followup agent response for query: {query.query}")
528
 
529
  return StreamingResponse(process_response(), media_type="text/event-stream")
530
 
 
 
 
 
531
  if __name__ == "__main__":
532
  import uvicorn
533
- logger.info("Starting the application")
534
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ import os
2
+ import time
3
+ import asyncio
4
+ import logging
5
+ import sqlite3
6
+ import tiktoken
7
+ from uuid import uuid4
8
+ from functools import lru_cache
9
+ from typing import Optional, List, Dict, Literal
10
  from fastapi import FastAPI, HTTPException, Depends, Security, BackgroundTasks
11
  from fastapi.security import APIKeyHeader
12
  from fastapi.responses import StreamingResponse
13
  from pydantic import BaseModel, Field
 
 
 
14
  from openai import OpenAI
15
+
16
+ # ============================================================================
17
+ # Configuration and Setup
18
+ # ============================================================================
 
 
 
 
 
 
 
 
19
 
20
  # Configure logging
21
  logging.basicConfig(
 
28
  )
29
  logger = logging.getLogger(__name__)
30
 
31
+ # FastAPI app setup
32
  app = FastAPI()
33
 
34
+ # API key configuration
35
  API_KEY_NAME = "X-API-Key"
36
  API_KEY = os.environ.get("CHAT_AUTH_KEY", "default_secret_key")
37
  api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
38
 
39
+ # Model definitions
40
  ModelID = Literal[
41
  "openai/gpt-4o-mini",
42
  "meta-llama/llama-3-70b-instruct",
 
48
  "google/gemma-2-27b-it"
49
  ]
50
 
51
+ # Pydantic models
52
+ class LLMAgentQueryModel(BaseModel):
53
+ prompt: str = Field(..., description="User's query or prompt")
54
+ system_message: Optional[str] = Field(None, description="Custom system message for the conversation")
55
  model_id: ModelID = Field(
56
+ default="openai/gpt-4o-mini",
57
  description="ID of the model to use for response generation"
58
  )
59
+ conversation_id: Optional[str] = Field(None, description="Unique identifier for the conversation")
60
  user_id: str = Field(..., description="Unique identifier for the user")
61
 
62
  class Config:
63
  schema_extra = {
64
  "example": {
65
+ "prompt": "How do I implement a binary search in Python?",
66
+ "system_message": "You are a helpful coding assistant.",
67
  "model_id": "meta-llama/llama-3-70b-instruct",
68
  "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
69
  "user_id": "user123"
70
  }
71
  }
72
 
73
+ # API key and client setup
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  @lru_cache()
75
  def get_api_keys():
76
  logger.info("Loading API keys")
77
  return {
78
  "OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}",
 
79
  }
80
 
81
  api_keys = get_api_keys()
 
88
  # Token encoding
89
  encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
90
 
91
+ # ============================================================================
92
+ # Database Functions
93
+ # ============================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
 
95
  DB_PATH = '/app/data/conversations.db'
96
 
97
  def init_db():
 
110
  conn.close()
111
  logger.info("Database initialized successfully")
112
 
 
 
113
  def update_db(user_id, conversation_id, message, response):
114
  logger.info(f"Updating database for conversation: {conversation_id}")
115
  conn = sqlite3.connect(DB_PATH)
 
120
  conn.close()
121
  logger.info("Database updated successfully")
122
 
123
+ # ============================================================================
124
+ # Utility Functions
125
+ # ============================================================================
126
+
127
+ def calculate_tokens(msgs):
128
+ return sum(len(encoding.encode(str(m))) for m in msgs)
129
+
130
+ def limit_conversation_history(conversation: List[Dict[str, str]], max_tokens: int = 4000) -> List[Dict[str, str]]:
131
+ """Limit the conversation history to a maximum number of tokens."""
132
+ limited_conversation = []
133
+ current_tokens = 0
134
+
135
+ for message in reversed(conversation):
136
+ message_tokens = calculate_tokens([message])
137
+ if current_tokens + message_tokens > max_tokens:
138
+ break
139
+ limited_conversation.insert(0, message)
140
+ current_tokens += message_tokens
141
+
142
+ return limited_conversation
143
+
144
+ async def verify_api_key(api_key: str = Security(api_key_header)):
145
+ if api_key != API_KEY:
146
+ logger.warning("Invalid API key used")
147
+ raise HTTPException(status_code=403, detail="Could not validate credentials")
148
+ return api_key
149
+
150
+ # ============================================================================
151
+ # LLM Interaction Functions
152
+ # ============================================================================
153
+
154
+ def chat_with_llama_stream(messages, model="meta-llama/llama-3-70b-instruct", max_output_tokens=2500):
155
+ logger.info(f"Starting chat with model: {model}")
156
+ try:
157
+ response = or_client.chat.completions.create(
158
+ model=model,
159
+ messages=messages,
160
+ max_tokens=max_output_tokens,
161
+ stream=True
162
+ )
163
+
164
+ full_response = ""
165
+ for chunk in response:
166
+ if chunk.choices[0].delta.content is not None:
167
+ content = chunk.choices[0].delta.content
168
+ full_response += content
169
+ yield content
170
+
171
+ # After streaming, add the full response to the conversation history
172
+ messages.append({"role": "assistant", "content": full_response})
173
+ logger.info("Chat completed successfully")
174
+ except Exception as e:
175
+ logger.error(f"Error in model response: {str(e)}")
176
+ raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
177
+
178
+ # ============================================================================
179
+ # Background Tasks
180
+ # ============================================================================
181
+
182
  async def clear_inactive_conversations():
183
  while True:
184
  logger.info("Clearing inactive conversations")
 
193
  logger.info(f"Cleared {len(inactive_convos)} inactive conversations")
194
  await asyncio.sleep(60) # Check every minute
195
 
196
+ # ============================================================================
197
+ # FastAPI Events and Endpoints
198
+ # ============================================================================
199
+
200
  @app.on_event("startup")
201
  async def startup_event():
202
  logger.info("Starting up the application")
203
+ init_db()
204
  asyncio.create_task(clear_inactive_conversations())
205
 
206
+ @app.post("/llm-agent")
207
+ async def llm_agent(query: LLMAgentQueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  """
209
+ LLM agent endpoint that provides responses based on user queries, maintaining conversation history.
210
+ Accepts custom system messages and allows selection of different models.
211
  Requires API Key authentication via X-API-Key header.
212
  """
213
+ logger.info(f"Received LLM agent query: {query.prompt}")
 
 
 
 
 
 
 
 
 
 
214
 
215
+ # Generate a new conversation ID if not provided
216
+ if not query.conversation_id:
217
+ query.conversation_id = str(uuid4())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
+ # Initialize or retrieve conversation history
220
  if query.conversation_id not in conversations:
221
+ system_message = query.system_message or "You are a helpful assistant. Provide concise and accurate responses."
222
  conversations[query.conversation_id] = [
223
+ {"role": "system", "content": system_message}
224
  ]
225
+ elif query.system_message:
226
+ # Update system message if provided
227
+ conversations[query.conversation_id][0] = {"role": "system", "content": query.system_message}
228
+
229
+ # Add user's prompt to conversation history
230
+ conversations[query.conversation_id].append({"role": "user", "content": query.prompt})
231
  last_activity[query.conversation_id] = time.time()
232
+
233
  # Limit tokens in the conversation history
234
+ limited_conversation = limit_conversation_history(conversations[query.conversation_id])
235
 
236
  def process_response():
237
  full_response = ""
 
239
  full_response += content
240
  yield content
241
 
 
 
 
 
 
 
 
 
 
 
242
  # Add the assistant's response to the conversation history
243
  conversations[query.conversation_id].append({"role": "assistant", "content": full_response})
244
+
245
+ background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.prompt, full_response)
246
+ logger.info(f"Completed LLM agent response for query: {query.prompt}")
247
 
248
  return StreamingResponse(process_response(), media_type="text/event-stream")
249
 
250
+ # ============================================================================
251
+ # Main Execution
252
+ # ============================================================================
253
+
254
  if __name__ == "__main__":
255
  import uvicorn
256
+ uvicorn.run(app, host="0.0.0.0", port=8000)