PawinChan commited on
Commit
88be9f7
·
1 Parent(s): 5d6983f

Several bugfixes, as well as added Vicuna.

Browse files
client.py CHANGED
@@ -1,14 +1,16 @@
1
  import time, requests
2
 
 
 
3
  serverIsPreparing = True
4
  print("Checking server status")
5
  while serverIsPreparing:
6
  try:
7
- with requests.get("http://localhost:8080/chat") as r:
8
  print("Status Check: " + r.text)
9
  if r.text == 'Ready':
10
  break
11
- else:
12
  time.sleep(5)
13
  except requests.exceptions.ConnectionError:
14
  print("Connection Refused. Retrying in 5 seconds.")
@@ -24,7 +26,7 @@ messages = [
24
 
25
  print("Sending Request...")
26
  try:
27
- with requests.post("http://localhost:8080/chat", headers=headers, json=messages) as r:
28
  print(r.json()["content"])
29
  except requests.exceptions.JSONDecodeError:
30
  print(f"Something went wrong: {r.status_code}- {r.text}")
 
1
  import time, requests
2
 
3
+ ENDPOINT = "https://pawinc-chadalpaca-flask.hf.space/chat"
4
+
5
  serverIsPreparing = True
6
  print("Checking server status")
7
  while serverIsPreparing:
8
  try:
9
+ with requests.get(ENDPOINT) as r:
10
  print("Status Check: " + r.text)
11
  if r.text == 'Ready':
12
  break
13
+ else:
14
  time.sleep(5)
15
  except requests.exceptions.ConnectionError:
16
  print("Connection Refused. Retrying in 5 seconds.")
 
26
 
27
  print("Sending Request...")
28
  try:
29
+ with requests.post(ENDPOINT, headers=headers, json=messages) as r:
30
  print(r.json()["content"])
31
  except requests.exceptions.JSONDecodeError:
32
  print(f"Something went wrong: {r.status_code}- {r.text}")
main.py CHANGED
@@ -1,35 +1,62 @@
1
- from flask import Flask, render_template, request, abort, redirect, url_for, Response
2
  from werkzeug.security import generate_password_hash, check_password_hash
3
  from werkzeug.exceptions import HTTPException
4
 
5
- import os, threading, json, waitress, datetime
6
  from llama_cpp import Llama
7
  from dotenv import load_dotenv
8
  load_dotenv()
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  #Variables
11
  DEBUGMODEENABLED = (os.getenv('debugModeEnabled', 'False') == 'True')
 
12
  llm = None
13
  AlpacaLoaded = False
14
 
15
  #Chat Functions
16
  def load_alpaca():
17
- global llm, AlpacaLoaded
18
  if not AlpacaLoaded:
19
  print("Loading Alpaca...")
20
  try:
21
- llm = Llama(model_path="./resources/ggml-model-q4_0.bin", use_mmap=False, n_threads=2, verbose=False) #use_mlock=True
22
  AlpacaLoaded = True
23
  print("Done loading Alpaca.")
 
24
  except AttributeError:
25
  print("Error loading Alpaca. Please make sure you have the model file in the resources folder.")
 
26
  else:
27
  print("Alpaca already loaded.")
 
28
 
29
  def getChatResponse(modelOutput):
30
  return str(modelOutput["choices"][0]['message']['content'])
31
 
32
-
 
 
 
 
 
 
 
33
  #Authentication Functions
34
  def loadHashes():
35
  global hashesDict
@@ -98,7 +125,7 @@ def chat():
98
  print("Got Message" + str(messages))
99
 
100
  if AlpacaLoaded:
101
- modelOutput = llm.create_chat_completion(messages=messages, max_tokens=512)
102
  responseMessage = modelOutput["choices"][0]['message']
103
  print(f"\n\nResponseMessage: {responseMessage}\n\n")
104
  return Response(json.dumps(responseMessage, indent=2), content_type='application/json')
@@ -107,12 +134,29 @@ def chat():
107
  else:
108
  return "Ready" if AlpacaLoaded else "Not Ready", 200 if AlpacaLoaded else 503
109
 
 
 
 
 
110
  @app.errorhandler(HTTPException)
111
  def handle_exception(e):
112
- return Response({"error": f"{e.code} - {e.name}", "message": e.description}, content_type='application/json'), e.code
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  if __name__ == '__main__':
115
- t = threading.Thread(target=load_alpaca, daemon=True).start()
116
 
117
  port = int(os.getenv("port", "8080"))
118
  print("Server successfully started.")
 
1
+ from flask import Flask, request, abort, Response
2
  from werkzeug.security import generate_password_hash, check_password_hash
3
  from werkzeug.exceptions import HTTPException
4
 
5
+ import os, threading, json, waitress, datetime, traceback
6
  from llama_cpp import Llama
7
  from dotenv import load_dotenv
8
  load_dotenv()
9
 
10
+ import sentry_sdk
11
+ from flask import Flask
12
+ from sentry_sdk.integrations.flask import FlaskIntegration
13
+
14
+ sentry_sdk.init(
15
+ dsn="https://5dcf8a99012c4c86b9b1f0293f6b4c2e@o4505516024004608.ingest.sentry.io/4505541971935232",
16
+ integrations=[
17
+ FlaskIntegration(),
18
+ ],
19
+
20
+ # Set traces_sample_rate to 1.0 to capture 100%
21
+ # of transactions for performance monitoring.
22
+ # We recommend adjusting this value in production.
23
+ traces_sample_rate=1.0
24
+ )
25
+
26
  #Variables
27
  DEBUGMODEENABLED = (os.getenv('debugModeEnabled', 'False') == 'True')
28
+ modelName = "vicuna"
29
  llm = None
30
  AlpacaLoaded = False
31
 
32
  #Chat Functions
33
  def load_alpaca():
34
+ global llm, AlpacaLoaded, modelName
35
  if not AlpacaLoaded:
36
  print("Loading Alpaca...")
37
  try:
38
+ llm = Llama(model_path=f"./resources/{modelName}ggml-model-q4_0.bin", use_mmap=False, n_threads=2, verbose=False, n_ctx=2048) #use_mlock=True
39
  AlpacaLoaded = True
40
  print("Done loading Alpaca.")
41
+ return "Done"
42
  except AttributeError:
43
  print("Error loading Alpaca. Please make sure you have the model file in the resources folder.")
44
+ return "Error"
45
  else:
46
  print("Alpaca already loaded.")
47
+ return "Already Loaded"
48
 
49
  def getChatResponse(modelOutput):
50
  return str(modelOutput["choices"][0]['message']['content'])
51
 
52
+ def reload_alpaca():
53
+ global llm, AlpacaLoaded, modelName
54
+ if AlpacaLoaded:
55
+ llm = None
56
+ input("Pleease confirm that the memory is cleared!")
57
+ AlpacaLoaded = False
58
+ load_alpaca()
59
+ return "Done"
60
  #Authentication Functions
61
  def loadHashes():
62
  global hashesDict
 
125
  print("Got Message" + str(messages))
126
 
127
  if AlpacaLoaded:
128
+ modelOutput = llm.create_chat_completion(messages=messages, max_tokens=1024)
129
  responseMessage = modelOutput["choices"][0]['message']
130
  print(f"\n\nResponseMessage: {responseMessage}\n\n")
131
  return Response(json.dumps(responseMessage, indent=2), content_type='application/json')
 
134
  else:
135
  return "Ready" if AlpacaLoaded else "Not Ready", 200 if AlpacaLoaded else 503
136
 
137
+ @app.route('/sentry_check')
138
+ def trigger_error():
139
+ division_by_zero = 1 / 0
140
+
141
  @app.errorhandler(HTTPException)
142
  def handle_exception(e):
143
+ errorInfo = json.dumps({"error": f"{e.code} - {e.name}", "message": e.description}, indent=2)
144
+ return Response(errorInfo, content_type='application/json'), e.code
145
+
146
+ @app.errorhandler(Exception)
147
+ def handle_errors(e):
148
+ print(f"INTERNAL SERVER ERROR 500 @ {request.path}")
149
+ exceptionInfo = f"{type(e).__name__}: {str(e)}"
150
+ errorTraceback = traceback.format_exc()
151
+ print(errorTraceback)
152
+ sentry_sdk.capture_exception(e)
153
+ errorInfo = json.dumps({"error": f"500 - Internal Server Error", "message": exceptionInfo}, indent=2)
154
+ return Response(errorInfo, content_type='application/json'), 500
155
+
156
+
157
 
158
  if __name__ == '__main__':
159
+ threading.Thread(target=load_alpaca, daemon=True).start()
160
 
161
  port = int(os.getenv("port", "8080"))
162
  print("Server successfully started.")
resources/{ggml-model-q4_0.bin → alpaca-ggml-model-q4_0.bin} RENAMED
File without changes