yusufs commited on
Commit
5f3bf21
·
1 Parent(s): 7935381

feat(endpoint): add prefix /api on each endpoint

Browse files
Files changed (2) hide show
  1. README.md +5 -0
  2. openai_compatible_api_server.py +4 -4
README.md CHANGED
@@ -20,6 +20,11 @@ poetry export -f requirements.txt --output requirements.txt --without-hashes
20
 
21
  > References: https://huggingface.co/spaces/sofianhw/ai/tree/c6527a750644a849b6705bb6fe2fcea4e54a8196
22
 
 
 
 
 
 
23
  This `api_server.py` file is exact copy version from https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/entrypoints/openai/api_server.py
24
 
25
  * The `HUGGING_FACE_HUB_TOKEN` must exist during runtime.
 
20
 
21
  > References: https://huggingface.co/spaces/sofianhw/ai/tree/c6527a750644a849b6705bb6fe2fcea4e54a8196
22
 
23
+ Fixes:
24
+
25
+ * [x] change everything route in api_server.py that start (“/v1/xxx”) to (“/api/v1/xxx”).
26
+ and just run the python api_server.py with arguments. https://discuss.huggingface.co/t/run-vllm-docker-on-space/70228/5?u=yusufs
27
+
28
  This `api_server.py` file is exact copy version from https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/entrypoints/openai/api_server.py
29
 
30
  * The `HUGGING_FACE_HUB_TOKEN` must exist during runtime.
openai_compatible_api_server.py CHANGED
@@ -322,7 +322,7 @@ async def detokenize(request: DetokenizeRequest, raw_request: Request):
322
  assert_never(generator)
323
 
324
 
325
- @router.get("/v1/models")
326
  async def show_available_models(raw_request: Request):
327
  handler = base(raw_request)
328
 
@@ -336,7 +336,7 @@ async def show_version():
336
  return JSONResponse(content=ver)
337
 
338
 
339
- @router.post("/v1/chat/completions")
340
  async def create_chat_completion(request: ChatCompletionRequest,
341
  raw_request: Request):
342
  handler = chat(raw_request)
@@ -356,7 +356,7 @@ async def create_chat_completion(request: ChatCompletionRequest,
356
  return StreamingResponse(content=generator, media_type="text/event-stream")
357
 
358
 
359
- @router.post("/v1/completions")
360
  async def create_completion(request: CompletionRequest, raw_request: Request):
361
  handler = completion(raw_request)
362
  if handler is None:
@@ -373,7 +373,7 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
373
  return StreamingResponse(content=generator, media_type="text/event-stream")
374
 
375
 
376
- @router.post("/v1/embeddings")
377
  async def create_embedding(request: EmbeddingRequest, raw_request: Request):
378
  handler = embedding(raw_request)
379
  if handler is None:
 
322
  assert_never(generator)
323
 
324
 
325
+ @router.get("/api/v1/models")
326
  async def show_available_models(raw_request: Request):
327
  handler = base(raw_request)
328
 
 
336
  return JSONResponse(content=ver)
337
 
338
 
339
+ @router.post("/api/v1/chat/completions")
340
  async def create_chat_completion(request: ChatCompletionRequest,
341
  raw_request: Request):
342
  handler = chat(raw_request)
 
356
  return StreamingResponse(content=generator, media_type="text/event-stream")
357
 
358
 
359
+ @router.post("/api/v1/completions")
360
  async def create_completion(request: CompletionRequest, raw_request: Request):
361
  handler = completion(raw_request)
362
  if handler is None:
 
373
  return StreamingResponse(content=generator, media_type="text/event-stream")
374
 
375
 
376
+ @router.post("/api/v1/embeddings")
377
  async def create_embedding(request: EmbeddingRequest, raw_request: Request):
378
  handler = embedding(raw_request)
379
  if handler is None: