ykl45 commited on
Commit
b483f64
1 Parent(s): 9ef0114

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +68 -0
main.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, Response, HTTPException
2
+ from starlette.responses import StreamingResponse
3
+ import requests
4
+ import json
5
+
6
+ app = FastAPI()
7
+
8
+
9
+ # FastAPI uses Pydantic to parse the request body into python object
10
+ @app.post("/v1/chat/completions")
11
+ @app.options("/v1/chat/completions") # This maps to the appropriate URL
12
+ async def main(request: Request):
13
+ if request.method != 'POST':
14
+ return Response(None, status_code=204, headers={
15
+ 'Access-Control-Allow-Origin': '*',
16
+ "Access-Control-Allow-Headers": '*',
17
+ 'Content-Type': 'text/event-stream',
18
+ 'Cache-Control': 'no-cache',
19
+ 'Connection': 'keep-alive'
20
+ })
21
+
22
+ headers = dict(request.headers)
23
+ # Add or modify headers
24
+ headers['Content-Type'] = 'application/json'
25
+
26
+ url = 'https://multillm.ai-pro.org/api/openai-completion' # target API address
27
+ headers ={ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36", "Content-Type": "application/json"}
28
+ jsonData = await request.json()
29
+ jsonData["stream"] = True
30
+ response = requests.post(url, headers=headers, json=jsonData) # Send the request
31
+
32
+ if response.status_code != 200:
33
+ return Response(content='Unable to reach the backend API', status_code=502)
34
+
35
+ response_data = response.json()
36
+ return StreamingResponse(
37
+ event_stream(response_data),
38
+ headers={
39
+ 'Access-Control-Allow-Origin': '*',
40
+ "Access-Control-Allow-Headers": '*',
41
+ 'Content-Type': 'text/event-stream',
42
+ 'Cache-Control': 'no-cache',
43
+ 'Connection': 'keep-alive'
44
+ }
45
+ )
46
+
47
+
48
+
49
+ def event_stream(data):
50
+ # Simplified eventStream function that does not split the content into chunks
51
+ output = json.dumps({
52
+ "id": data['id'],
53
+ "object": 'chat.completion.chunk',
54
+ "created": data['created'],
55
+ "model": data['model'],
56
+ "system_fingerprint": data['system_fingerprint'],
57
+ "choices": [{
58
+ "index": 0,
59
+ "delta": {"role": 'assistant', "content": data['choices'][0]['message']['content']},
60
+ "logprobs": None,
61
+ "finish_reason": data['choices'][0]['finish_reason']
62
+ }]
63
+ })
64
+ yield f'data: {output}\n\n' # The StreamingResponse expects an iterable
65
+
66
+ if __name__ == "__main__":
67
+ import uvicorn
68
+ uvicorn.run(app, host="0.0.0.0", port=7860)