Spaces:
Paused
Paused
feat(endpoint): add prefix /api on each endpoint
Browse files- README.md +5 -0
- openai_compatible_api_server.py +4 -4
README.md
CHANGED
@@ -20,6 +20,11 @@ poetry export -f requirements.txt --output requirements.txt --without-hashes
|
|
20 |
|
21 |
> References: https://huggingface.co/spaces/sofianhw/ai/tree/c6527a750644a849b6705bb6fe2fcea4e54a8196
|
22 |
|
|
|
|
|
|
|
|
|
|
|
23 |
This `api_server.py` file is exact copy version from https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/entrypoints/openai/api_server.py
|
24 |
|
25 |
* The `HUGGING_FACE_HUB_TOKEN` must exist during runtime.
|
|
|
20 |
|
21 |
> References: https://huggingface.co/spaces/sofianhw/ai/tree/c6527a750644a849b6705bb6fe2fcea4e54a8196
|
22 |
|
23 |
+
Fixes:
|
24 |
+
|
25 |
+
* [x] change everything route in api_server.py that start (“/v1/xxx”) to (“/api/v1/xxx”).
|
26 |
+
and just run the python api_server.py with arguments. https://discuss.huggingface.co/t/run-vllm-docker-on-space/70228/5?u=yusufs
|
27 |
+
|
28 |
This `api_server.py` file is exact copy version from https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/entrypoints/openai/api_server.py
|
29 |
|
30 |
* The `HUGGING_FACE_HUB_TOKEN` must exist during runtime.
|
openai_compatible_api_server.py
CHANGED
@@ -322,7 +322,7 @@ async def detokenize(request: DetokenizeRequest, raw_request: Request):
|
|
322 |
assert_never(generator)
|
323 |
|
324 |
|
325 |
-
@router.get("/v1/models")
|
326 |
async def show_available_models(raw_request: Request):
|
327 |
handler = base(raw_request)
|
328 |
|
@@ -336,7 +336,7 @@ async def show_version():
|
|
336 |
return JSONResponse(content=ver)
|
337 |
|
338 |
|
339 |
-
@router.post("/v1/chat/completions")
|
340 |
async def create_chat_completion(request: ChatCompletionRequest,
|
341 |
raw_request: Request):
|
342 |
handler = chat(raw_request)
|
@@ -356,7 +356,7 @@ async def create_chat_completion(request: ChatCompletionRequest,
|
|
356 |
return StreamingResponse(content=generator, media_type="text/event-stream")
|
357 |
|
358 |
|
359 |
-
@router.post("/v1/completions")
|
360 |
async def create_completion(request: CompletionRequest, raw_request: Request):
|
361 |
handler = completion(raw_request)
|
362 |
if handler is None:
|
@@ -373,7 +373,7 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
|
|
373 |
return StreamingResponse(content=generator, media_type="text/event-stream")
|
374 |
|
375 |
|
376 |
-
@router.post("/v1/embeddings")
|
377 |
async def create_embedding(request: EmbeddingRequest, raw_request: Request):
|
378 |
handler = embedding(raw_request)
|
379 |
if handler is None:
|
|
|
322 |
assert_never(generator)
|
323 |
|
324 |
|
325 |
+
@router.get("/api/v1/models")
|
326 |
async def show_available_models(raw_request: Request):
|
327 |
handler = base(raw_request)
|
328 |
|
|
|
336 |
return JSONResponse(content=ver)
|
337 |
|
338 |
|
339 |
+
@router.post("/api/v1/chat/completions")
|
340 |
async def create_chat_completion(request: ChatCompletionRequest,
|
341 |
raw_request: Request):
|
342 |
handler = chat(raw_request)
|
|
|
356 |
return StreamingResponse(content=generator, media_type="text/event-stream")
|
357 |
|
358 |
|
359 |
+
@router.post("/api/v1/completions")
|
360 |
async def create_completion(request: CompletionRequest, raw_request: Request):
|
361 |
handler = completion(raw_request)
|
362 |
if handler is None:
|
|
|
373 |
return StreamingResponse(content=generator, media_type="text/event-stream")
|
374 |
|
375 |
|
376 |
+
@router.post("/api/v1/embeddings")
|
377 |
async def create_embedding(request: EmbeddingRequest, raw_request: Request):
|
378 |
handler = embedding(raw_request)
|
379 |
if handler is None:
|