Spaces:
Running
Running
File size: 4,325 Bytes
883ac62 0287610 883ac62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import json
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from threading import Thread
from util import build_parameters, try_start_cloudflared
from gpt4 import ask_gpt
# from modules import shared
# from modules.chat import generate_chat_reply
# from modules.text_generation import encode, generate_reply, stop_everything_event
class Handler(BaseHTTPRequestHandler):
def do_GET(self):
if self.path == '/api/v1/model':
self.send_response(200)
self.end_headers()
response = json.dumps({
'result': 'GPT4 mindsdb OpenAI original'
})
self.wfile.write(response.encode('utf-8'))
else:
self.send_error(404)
def do_POST(self):
content_length = int(self.headers['Content-Length'])
body = json.loads(self.rfile.read(content_length).decode('utf-8'))
if self.path == '/api/v1/generate':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
prompt = body['prompt']
generate_params = build_parameters(body)
stopping_strings = generate_params.pop('stopping_strings')
generate_params['stream'] = False
# generator = generate_reply(
# prompt, generate_params, stopping_strings=stopping_strings, is_chat=False)
answer = ask_gpt(prompt)
response = json.dumps({
'results': [{
'text': answer
}]
})
self.wfile.write(response.encode('utf-8'))
elif self.path == '/api/v1/chat':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
user_input = body['user_input']
history = body['history']
regenerate = body.get('regenerate', False)
_continue = body.get('_continue', False)
generate_params = build_parameters(body, chat=True)
generate_params['stream'] = False
generator = 'error'
# generator = generate_chat_reply(
# user_input, history, generate_params, regenerate=regenerate, _continue=_continue, loading_message=False)
answer = history
for a in generator:
answer = a
response = json.dumps({
'results': [{
'history': answer
}]
})
self.wfile.write(response.encode('utf-8'))
elif self.path == '/api/v1/stop-stream':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
# stop_everything_event()
response = json.dumps({
'results': 'error'
})
self.wfile.write(response.encode('utf-8'))
elif self.path == '/api/v1/token-count':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
# tokens = encode(body['prompt'])[0]
response = json.dumps({
'results': [{
'tokens': 'error'
}]
})
self.wfile.write(response.encode('utf-8'))
else:
self.send_error(404)
def _run_server(port: int, share: bool = False):
address = '0.0.0.0' if 0 else '127.0.0.1'
server = ThreadingHTTPServer((address, port), Handler)
def on_start(public_url: str):
with open('main.md', 'r') as f:
text = f.read()
text = text.replace("[located in the logs of this container]", f"{public_url}/api")
with open('main.md', 'w') as f:
f.write(text)
print(f'Starting non-streaming server at public url {public_url}/api')
if share:
try:
try_start_cloudflared(port, max_attempts=3, on_start=on_start)
except Exception:
pass
else:
print(
f'Starting API at http://{address}:{port}/api')
server.serve_forever()
def start_server(port: int, share: bool = False):
Thread(target=_run_server, args=[port, share], daemon=True).start()
|