Pamudu13 commited on
Commit
9b54a18
·
verified ·
1 Parent(s): f9e8a03

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -6
app.py CHANGED
@@ -6,6 +6,7 @@ from fastapi.responses import HTMLResponse
6
  from llama_cpp import Llama
7
  from pydantic import BaseModel
8
  import uvicorn
 
9
 
10
  # Configuration
11
  MODEL_URL = "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
@@ -132,17 +133,25 @@ class ChatCompletionResponse(BaseModel):
132
  choices: list[dict]
133
  usage: dict
134
 
135
- @app.post("/v1/chat/completions")
136
- async def chat_completion(request: ChatCompletionRequest):
 
 
 
 
 
 
137
  try:
138
- prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in request.messages])
 
 
139
  prompt += "\nassistant:"
140
 
141
  response = llm(
142
  prompt=prompt,
143
- max_tokens=request.max_tokens,
144
- temperature=request.temperature,
145
- top_p=request.top_p,
146
  stop=["</s>"]
147
  )
148
 
 
6
  from llama_cpp import Llama
7
  from pydantic import BaseModel
8
  import uvicorn
9
+ import json
10
 
11
  # Configuration
12
  MODEL_URL = "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
 
133
  choices: list[dict]
134
  usage: dict
135
 
136
+ @app.get("/v1/chat/completions")
137
+ async def chat_completion(
138
+ messages: str,
139
+ max_tokens: int = 128,
140
+ temperature: float = 0.7,
141
+ top_p: float = 0.9,
142
+ stream: bool = False
143
+ ):
144
  try:
145
+ messages_list = json.loads(messages)
146
+
147
+ prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages_list])
148
  prompt += "\nassistant:"
149
 
150
  response = llm(
151
  prompt=prompt,
152
+ max_tokens=max_tokens,
153
+ temperature=temperature,
154
+ top_p=top_p,
155
  stop=["</s>"]
156
  )
157