Spaces:

ykl45
/

ap

Sleeping

App Files Files Community

ykl45 commited on Apr 25

Commit

b483f64

•

1 Parent(s): 9ef0114

Create main.py

Browse files

Files changed (1) hide show

main.py +68 -0

main.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from fastapi import FastAPI, Request, Response, HTTPException
+from starlette.responses import StreamingResponse
+import requests
+import json
+app = FastAPI()
+# FastAPI uses Pydantic to parse the request body into python object
+@app.post("/v1/chat/completions")
+@app.options("/v1/chat/completions")  # This maps to the appropriate URL
+async def main(request: Request):
+    if request.method != 'POST':
+        return Response(None, status_code=204, headers={
+            'Access-Control-Allow-Origin': '*',
+            "Access-Control-Allow-Headers": '*',
+            'Content-Type': 'text/event-stream',
+            'Cache-Control': 'no-cache',
+            'Connection': 'keep-alive'
+        })
+    headers = dict(request.headers)
+    # Add or modify headers
+    headers['Content-Type'] = 'application/json'
+    url = 'https://multillm.ai-pro.org/api/openai-completion'  # target API address
+    headers ={ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36", "Content-Type": "application/json"}
+    jsonData = await request.json()
+    jsonData["stream"] = True
+    response = requests.post(url, headers=headers, json=jsonData)  # Send the request
+    if response.status_code != 200:
+        return Response(content='Unable to reach the backend API', status_code=502)
+    response_data = response.json()
+    return StreamingResponse(
+        event_stream(response_data),
+        headers={
+            'Access-Control-Allow-Origin': '*',
+            "Access-Control-Allow-Headers": '*',
+            'Content-Type': 'text/event-stream',
+            'Cache-Control': 'no-cache',
+            'Connection': 'keep-alive'
+        }
+    )
+def event_stream(data):
+    # Simplified eventStream function that does not split the content into chunks
+    output = json.dumps({
+        "id": data['id'],
+        "object": 'chat.completion.chunk',
+        "created": data['created'],
+        "model": data['model'],
+        "system_fingerprint": data['system_fingerprint'],
+        "choices": [{
+            "index": 0,
+            "delta": {"role": 'assistant', "content": data['choices'][0]['message']['content']},
+            "logprobs": None,
+            "finish_reason": data['choices'][0]['finish_reason']
+        }]
+    })
+    yield f'data: {output}\n\n'  # The StreamingResponse expects an iterable
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)