Spaces:
Paused
Paused
PawinChan
commited on
Commit
·
88be9f7
1
Parent(s):
5d6983f
Several bugfixes, as well as added Vicuna.
Browse files- client.py +5 -3
- main.py +52 -8
- resources/{ggml-model-q4_0.bin → alpaca-ggml-model-q4_0.bin} +0 -0
client.py
CHANGED
@@ -1,14 +1,16 @@
|
|
1 |
import time, requests
|
2 |
|
|
|
|
|
3 |
serverIsPreparing = True
|
4 |
print("Checking server status")
|
5 |
while serverIsPreparing:
|
6 |
try:
|
7 |
-
with requests.get(
|
8 |
print("Status Check: " + r.text)
|
9 |
if r.text == 'Ready':
|
10 |
break
|
11 |
-
else:
|
12 |
time.sleep(5)
|
13 |
except requests.exceptions.ConnectionError:
|
14 |
print("Connection Refused. Retrying in 5 seconds.")
|
@@ -24,7 +26,7 @@ messages = [
|
|
24 |
|
25 |
print("Sending Request...")
|
26 |
try:
|
27 |
-
with requests.post(
|
28 |
print(r.json()["content"])
|
29 |
except requests.exceptions.JSONDecodeError:
|
30 |
print(f"Something went wrong: {r.status_code}- {r.text}")
|
|
|
1 |
import time, requests
|
2 |
|
3 |
+
ENDPOINT = "https://pawinc-chadalpaca-flask.hf.space/chat"
|
4 |
+
|
5 |
serverIsPreparing = True
|
6 |
print("Checking server status")
|
7 |
while serverIsPreparing:
|
8 |
try:
|
9 |
+
with requests.get(ENDPOINT) as r:
|
10 |
print("Status Check: " + r.text)
|
11 |
if r.text == 'Ready':
|
12 |
break
|
13 |
+
else:
|
14 |
time.sleep(5)
|
15 |
except requests.exceptions.ConnectionError:
|
16 |
print("Connection Refused. Retrying in 5 seconds.")
|
|
|
26 |
|
27 |
print("Sending Request...")
|
28 |
try:
|
29 |
+
with requests.post(ENDPOINT, headers=headers, json=messages) as r:
|
30 |
print(r.json()["content"])
|
31 |
except requests.exceptions.JSONDecodeError:
|
32 |
print(f"Something went wrong: {r.status_code}- {r.text}")
|
main.py
CHANGED
@@ -1,35 +1,62 @@
|
|
1 |
-
from flask import Flask,
|
2 |
from werkzeug.security import generate_password_hash, check_password_hash
|
3 |
from werkzeug.exceptions import HTTPException
|
4 |
|
5 |
-
import os, threading, json, waitress, datetime
|
6 |
from llama_cpp import Llama
|
7 |
from dotenv import load_dotenv
|
8 |
load_dotenv()
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
#Variables
|
11 |
DEBUGMODEENABLED = (os.getenv('debugModeEnabled', 'False') == 'True')
|
|
|
12 |
llm = None
|
13 |
AlpacaLoaded = False
|
14 |
|
15 |
#Chat Functions
|
16 |
def load_alpaca():
|
17 |
-
global llm, AlpacaLoaded
|
18 |
if not AlpacaLoaded:
|
19 |
print("Loading Alpaca...")
|
20 |
try:
|
21 |
-
llm = Llama(model_path="./resources/ggml-model-q4_0.bin", use_mmap=False, n_threads=2, verbose=False) #use_mlock=True
|
22 |
AlpacaLoaded = True
|
23 |
print("Done loading Alpaca.")
|
|
|
24 |
except AttributeError:
|
25 |
print("Error loading Alpaca. Please make sure you have the model file in the resources folder.")
|
|
|
26 |
else:
|
27 |
print("Alpaca already loaded.")
|
|
|
28 |
|
29 |
def getChatResponse(modelOutput):
|
30 |
return str(modelOutput["choices"][0]['message']['content'])
|
31 |
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
#Authentication Functions
|
34 |
def loadHashes():
|
35 |
global hashesDict
|
@@ -98,7 +125,7 @@ def chat():
|
|
98 |
print("Got Message" + str(messages))
|
99 |
|
100 |
if AlpacaLoaded:
|
101 |
-
modelOutput = llm.create_chat_completion(messages=messages, max_tokens=
|
102 |
responseMessage = modelOutput["choices"][0]['message']
|
103 |
print(f"\n\nResponseMessage: {responseMessage}\n\n")
|
104 |
return Response(json.dumps(responseMessage, indent=2), content_type='application/json')
|
@@ -107,12 +134,29 @@ def chat():
|
|
107 |
else:
|
108 |
return "Ready" if AlpacaLoaded else "Not Ready", 200 if AlpacaLoaded else 503
|
109 |
|
|
|
|
|
|
|
|
|
110 |
@app.errorhandler(HTTPException)
|
111 |
def handle_exception(e):
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
if __name__ == '__main__':
|
115 |
-
|
116 |
|
117 |
port = int(os.getenv("port", "8080"))
|
118 |
print("Server successfully started.")
|
|
|
1 |
+
from flask import Flask, request, abort, Response
|
2 |
from werkzeug.security import generate_password_hash, check_password_hash
|
3 |
from werkzeug.exceptions import HTTPException
|
4 |
|
5 |
+
import os, threading, json, waitress, datetime, traceback
|
6 |
from llama_cpp import Llama
|
7 |
from dotenv import load_dotenv
|
8 |
load_dotenv()
|
9 |
|
10 |
+
import sentry_sdk
|
11 |
+
from flask import Flask
|
12 |
+
from sentry_sdk.integrations.flask import FlaskIntegration
|
13 |
+
|
14 |
+
sentry_sdk.init(
|
15 |
+
dsn="https://5dcf8a99012c4c86b9b1f0293f6b4c2e@o4505516024004608.ingest.sentry.io/4505541971935232",
|
16 |
+
integrations=[
|
17 |
+
FlaskIntegration(),
|
18 |
+
],
|
19 |
+
|
20 |
+
# Set traces_sample_rate to 1.0 to capture 100%
|
21 |
+
# of transactions for performance monitoring.
|
22 |
+
# We recommend adjusting this value in production.
|
23 |
+
traces_sample_rate=1.0
|
24 |
+
)
|
25 |
+
|
26 |
#Variables
|
27 |
DEBUGMODEENABLED = (os.getenv('debugModeEnabled', 'False') == 'True')
|
28 |
+
modelName = "vicuna"
|
29 |
llm = None
|
30 |
AlpacaLoaded = False
|
31 |
|
32 |
#Chat Functions
|
33 |
def load_alpaca():
|
34 |
+
global llm, AlpacaLoaded, modelName
|
35 |
if not AlpacaLoaded:
|
36 |
print("Loading Alpaca...")
|
37 |
try:
|
38 |
+
llm = Llama(model_path=f"./resources/{modelName}ggml-model-q4_0.bin", use_mmap=False, n_threads=2, verbose=False, n_ctx=2048) #use_mlock=True
|
39 |
AlpacaLoaded = True
|
40 |
print("Done loading Alpaca.")
|
41 |
+
return "Done"
|
42 |
except AttributeError:
|
43 |
print("Error loading Alpaca. Please make sure you have the model file in the resources folder.")
|
44 |
+
return "Error"
|
45 |
else:
|
46 |
print("Alpaca already loaded.")
|
47 |
+
return "Already Loaded"
|
48 |
|
49 |
def getChatResponse(modelOutput):
|
50 |
return str(modelOutput["choices"][0]['message']['content'])
|
51 |
|
52 |
+
def reload_alpaca():
|
53 |
+
global llm, AlpacaLoaded, modelName
|
54 |
+
if AlpacaLoaded:
|
55 |
+
llm = None
|
56 |
+
input("Pleease confirm that the memory is cleared!")
|
57 |
+
AlpacaLoaded = False
|
58 |
+
load_alpaca()
|
59 |
+
return "Done"
|
60 |
#Authentication Functions
|
61 |
def loadHashes():
|
62 |
global hashesDict
|
|
|
125 |
print("Got Message" + str(messages))
|
126 |
|
127 |
if AlpacaLoaded:
|
128 |
+
modelOutput = llm.create_chat_completion(messages=messages, max_tokens=1024)
|
129 |
responseMessage = modelOutput["choices"][0]['message']
|
130 |
print(f"\n\nResponseMessage: {responseMessage}\n\n")
|
131 |
return Response(json.dumps(responseMessage, indent=2), content_type='application/json')
|
|
|
134 |
else:
|
135 |
return "Ready" if AlpacaLoaded else "Not Ready", 200 if AlpacaLoaded else 503
|
136 |
|
137 |
+
@app.route('/sentry_check')
|
138 |
+
def trigger_error():
|
139 |
+
division_by_zero = 1 / 0
|
140 |
+
|
141 |
@app.errorhandler(HTTPException)
|
142 |
def handle_exception(e):
|
143 |
+
errorInfo = json.dumps({"error": f"{e.code} - {e.name}", "message": e.description}, indent=2)
|
144 |
+
return Response(errorInfo, content_type='application/json'), e.code
|
145 |
+
|
146 |
+
@app.errorhandler(Exception)
|
147 |
+
def handle_errors(e):
|
148 |
+
print(f"INTERNAL SERVER ERROR 500 @ {request.path}")
|
149 |
+
exceptionInfo = f"{type(e).__name__}: {str(e)}"
|
150 |
+
errorTraceback = traceback.format_exc()
|
151 |
+
print(errorTraceback)
|
152 |
+
sentry_sdk.capture_exception(e)
|
153 |
+
errorInfo = json.dumps({"error": f"500 - Internal Server Error", "message": exceptionInfo}, indent=2)
|
154 |
+
return Response(errorInfo, content_type='application/json'), 500
|
155 |
+
|
156 |
+
|
157 |
|
158 |
if __name__ == '__main__':
|
159 |
+
threading.Thread(target=load_alpaca, daemon=True).start()
|
160 |
|
161 |
port = int(os.getenv("port", "8080"))
|
162 |
print("Server successfully started.")
|
resources/{ggml-model-q4_0.bin → alpaca-ggml-model-q4_0.bin}
RENAMED
File without changes
|