Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ from fastapi.responses import HTMLResponse
|
|
6 |
from llama_cpp import Llama
|
7 |
from pydantic import BaseModel
|
8 |
import uvicorn
|
|
|
9 |
|
10 |
# Configuration
|
11 |
MODEL_URL = "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
|
@@ -132,17 +133,25 @@ class ChatCompletionResponse(BaseModel):
|
|
132 |
choices: list[dict]
|
133 |
usage: dict
|
134 |
|
135 |
-
@app.
|
136 |
-
async def chat_completion(
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
try:
|
138 |
-
|
|
|
|
|
139 |
prompt += "\nassistant:"
|
140 |
|
141 |
response = llm(
|
142 |
prompt=prompt,
|
143 |
-
max_tokens=
|
144 |
-
temperature=
|
145 |
-
top_p=
|
146 |
stop=["</s>"]
|
147 |
)
|
148 |
|
|
|
6 |
from llama_cpp import Llama
|
7 |
from pydantic import BaseModel
|
8 |
import uvicorn
|
9 |
+
import json
|
10 |
|
11 |
# Configuration
|
12 |
MODEL_URL = "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
|
|
|
133 |
choices: list[dict]
|
134 |
usage: dict
|
135 |
|
136 |
+
@app.get("/v1/chat/completions")
|
137 |
+
async def chat_completion(
|
138 |
+
messages: str,
|
139 |
+
max_tokens: int = 128,
|
140 |
+
temperature: float = 0.7,
|
141 |
+
top_p: float = 0.9,
|
142 |
+
stream: bool = False
|
143 |
+
):
|
144 |
try:
|
145 |
+
messages_list = json.loads(messages)
|
146 |
+
|
147 |
+
prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages_list])
|
148 |
prompt += "\nassistant:"
|
149 |
|
150 |
response = llm(
|
151 |
prompt=prompt,
|
152 |
+
max_tokens=max_tokens,
|
153 |
+
temperature=temperature,
|
154 |
+
top_p=top_p,
|
155 |
stop=["</s>"]
|
156 |
)
|
157 |
|