Create main.py
Browse files
main.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, Request, Response, HTTPException
|
2 |
+
from starlette.responses import StreamingResponse
|
3 |
+
import requests
|
4 |
+
import json
|
5 |
+
|
6 |
+
app = FastAPI()
|
7 |
+
|
8 |
+
|
9 |
+
# FastAPI uses Pydantic to parse the request body into python object
|
10 |
+
@app.post("/v1/chat/completions")
|
11 |
+
@app.options("/v1/chat/completions") # This maps to the appropriate URL
|
12 |
+
async def main(request: Request):
|
13 |
+
if request.method != 'POST':
|
14 |
+
return Response(None, status_code=204, headers={
|
15 |
+
'Access-Control-Allow-Origin': '*',
|
16 |
+
"Access-Control-Allow-Headers": '*',
|
17 |
+
'Content-Type': 'text/event-stream',
|
18 |
+
'Cache-Control': 'no-cache',
|
19 |
+
'Connection': 'keep-alive'
|
20 |
+
})
|
21 |
+
|
22 |
+
headers = dict(request.headers)
|
23 |
+
# Add or modify headers
|
24 |
+
headers['Content-Type'] = 'application/json'
|
25 |
+
|
26 |
+
url = 'https://multillm.ai-pro.org/api/openai-completion' # target API address
|
27 |
+
headers ={ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36", "Content-Type": "application/json"}
|
28 |
+
jsonData = await request.json()
|
29 |
+
jsonData["stream"] = True
|
30 |
+
response = requests.post(url, headers=headers, json=jsonData) # Send the request
|
31 |
+
|
32 |
+
if response.status_code != 200:
|
33 |
+
return Response(content='Unable to reach the backend API', status_code=502)
|
34 |
+
|
35 |
+
response_data = response.json()
|
36 |
+
return StreamingResponse(
|
37 |
+
event_stream(response_data),
|
38 |
+
headers={
|
39 |
+
'Access-Control-Allow-Origin': '*',
|
40 |
+
"Access-Control-Allow-Headers": '*',
|
41 |
+
'Content-Type': 'text/event-stream',
|
42 |
+
'Cache-Control': 'no-cache',
|
43 |
+
'Connection': 'keep-alive'
|
44 |
+
}
|
45 |
+
)
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
def event_stream(data):
|
50 |
+
# Simplified eventStream function that does not split the content into chunks
|
51 |
+
output = json.dumps({
|
52 |
+
"id": data['id'],
|
53 |
+
"object": 'chat.completion.chunk',
|
54 |
+
"created": data['created'],
|
55 |
+
"model": data['model'],
|
56 |
+
"system_fingerprint": data['system_fingerprint'],
|
57 |
+
"choices": [{
|
58 |
+
"index": 0,
|
59 |
+
"delta": {"role": 'assistant', "content": data['choices'][0]['message']['content']},
|
60 |
+
"logprobs": None,
|
61 |
+
"finish_reason": data['choices'][0]['finish_reason']
|
62 |
+
}]
|
63 |
+
})
|
64 |
+
yield f'data: {output}\n\n' # The StreamingResponse expects an iterable
|
65 |
+
|
66 |
+
if __name__ == "__main__":
|
67 |
+
import uvicorn
|
68 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|