Spaces:
Running
Running
add openai/gpt-4o-mini
Browse files
main.py
CHANGED
@@ -37,6 +37,7 @@ API_KEY = os.environ.get("CHAT_AUTH_KEY", "default_secret_key")
|
|
37 |
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
|
38 |
|
39 |
ModelID = Literal[
|
|
|
40 |
"meta-llama/llama-3-70b-instruct",
|
41 |
"anthropic/claude-3.5-sonnet",
|
42 |
"deepseek/deepseek-coder",
|
@@ -68,14 +69,14 @@ class QueryModel(BaseModel):
|
|
68 |
class NewsQueryModel(BaseModel):
|
69 |
query: str = Field(..., description="News topic to search for")
|
70 |
model_id: ModelID = Field(
|
71 |
-
default="
|
72 |
description="ID of the model to use for response generation"
|
73 |
)
|
74 |
class Config:
|
75 |
schema_extra = {
|
76 |
"example": {
|
77 |
"query": "Latest developments in AI",
|
78 |
-
"model_id": "
|
79 |
}
|
80 |
}
|
81 |
|
@@ -103,7 +104,7 @@ def limit_tokens(input_string, token_limit=6000):
|
|
103 |
def calculate_tokens(msgs):
|
104 |
return sum(len(encoding.encode(str(m))) for m in msgs)
|
105 |
|
106 |
-
def chat_with_llama_stream(messages, model="gpt-
|
107 |
logger.info(f"Starting chat with model: {model}")
|
108 |
while calculate_tokens(messages) > (8000 - max_output_tokens):
|
109 |
if len(messages) > max_llm_history:
|
@@ -206,6 +207,7 @@ async def coding_assistant(query: QueryModel, background_tasks: BackgroundTasks,
|
|
206 |
- openai/gpt-3.5-turbo-instruct
|
207 |
- qwen/qwen-72b-chat
|
208 |
- google/gemma-2-27b-it
|
|
|
209 |
Requires API Key authentication via X-API-Key header.
|
210 |
"""
|
211 |
logger.info(f"Received coding assistant query: {query.user_query}")
|
@@ -321,7 +323,7 @@ async def news_assistant(query: NewsQueryModel, api_key: str = Depends(verify_ap
|
|
321 |
class SearchQueryModel(BaseModel):
|
322 |
query: str = Field(..., description="Search query")
|
323 |
model_id: ModelID = Field(
|
324 |
-
default="
|
325 |
description="ID of the model to use for response generation"
|
326 |
)
|
327 |
class Config:
|
|
|
37 |
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
|
38 |
|
39 |
ModelID = Literal[
|
40 |
+
"openai/gpt-4o-mini",
|
41 |
"meta-llama/llama-3-70b-instruct",
|
42 |
"anthropic/claude-3.5-sonnet",
|
43 |
"deepseek/deepseek-coder",
|
|
|
69 |
class NewsQueryModel(BaseModel):
|
70 |
query: str = Field(..., description="News topic to search for")
|
71 |
model_id: ModelID = Field(
|
72 |
+
default="openai/gpt-4o-mini",
|
73 |
description="ID of the model to use for response generation"
|
74 |
)
|
75 |
class Config:
|
76 |
schema_extra = {
|
77 |
"example": {
|
78 |
"query": "Latest developments in AI",
|
79 |
+
"model_id": "openai/gpt-4o-mini"
|
80 |
}
|
81 |
}
|
82 |
|
|
|
104 |
def calculate_tokens(msgs):
|
105 |
return sum(len(encoding.encode(str(m))) for m in msgs)
|
106 |
|
107 |
+
def chat_with_llama_stream(messages, model="openai/gpt-4o-mini", max_llm_history=4, max_output_tokens=2500):
|
108 |
logger.info(f"Starting chat with model: {model}")
|
109 |
while calculate_tokens(messages) > (8000 - max_output_tokens):
|
110 |
if len(messages) > max_llm_history:
|
|
|
207 |
- openai/gpt-3.5-turbo-instruct
|
208 |
- qwen/qwen-72b-chat
|
209 |
- google/gemma-2-27b-it
|
210 |
+
- openai/gpt-4o-mini
|
211 |
Requires API Key authentication via X-API-Key header.
|
212 |
"""
|
213 |
logger.info(f"Received coding assistant query: {query.user_query}")
|
|
|
323 |
class SearchQueryModel(BaseModel):
|
324 |
query: str = Field(..., description="Search query")
|
325 |
model_id: ModelID = Field(
|
326 |
+
default="openai/gpt-4o-mini",
|
327 |
description="ID of the model to use for response generation"
|
328 |
)
|
329 |
class Config:
|