ChadAlpaca-Flask

Paused

App Files Files Community

PawinChan commited on Jul 18, 2023

Commit

88be9f7

1 Parent(s): 5d6983f

Several bugfixes, as well as added Vicuna.

Browse files

Files changed (3) hide show

client.py +5 -3
main.py +52 -8
resources/{ggml-model-q4_0.bin → alpaca-ggml-model-q4_0.bin} +0 -0

client.py CHANGED Viewed

@@ -1,14 +1,16 @@
 import time, requests
 serverIsPreparing = True
 print("Checking server status")
 while serverIsPreparing:
   try:
-    with requests.get("http://localhost:8080/chat") as r:
       print("Status Check: " + r.text)
       if r.text == 'Ready':
         break
-      else:
         time.sleep(5)
   except requests.exceptions.ConnectionError:
     print("Connection Refused. Retrying in 5 seconds.")
@@ -24,7 +26,7 @@ messages = [
 print("Sending Request...")
 try:
-  with requests.post("http://localhost:8080/chat", headers=headers, json=messages) as r:
     print(r.json()["content"])
 except requests.exceptions.JSONDecodeError:
   print(f"Something went wrong: {r.status_code}- {r.text}")

 import time, requests
+ENDPOINT = "https://pawinc-chadalpaca-flask.hf.space/chat"
 serverIsPreparing = True
 print("Checking server status")
 while serverIsPreparing:
   try:
+    with requests.get(ENDPOINT) as r:
       print("Status Check: " + r.text)
       if r.text == 'Ready':
         break
+      else:
         time.sleep(5)
   except requests.exceptions.ConnectionError:
     print("Connection Refused. Retrying in 5 seconds.")
 print("Sending Request...")
 try:
+  with requests.post(ENDPOINT, headers=headers, json=messages) as r:
     print(r.json()["content"])
 except requests.exceptions.JSONDecodeError:
   print(f"Something went wrong: {r.status_code}- {r.text}")

main.py CHANGED Viewed

@@ -1,35 +1,62 @@
-from flask import Flask, render_template, request, abort, redirect, url_for, Response
 from werkzeug.security import generate_password_hash, check_password_hash
 from werkzeug.exceptions import HTTPException
-import os, threading, json, waitress, datetime
 from llama_cpp import Llama
 from dotenv import load_dotenv
 load_dotenv()
 #Variables
 DEBUGMODEENABLED = (os.getenv('debugModeEnabled', 'False') == 'True')
 llm = None
 AlpacaLoaded = False
 #Chat Functions
 def load_alpaca():
-  global llm, AlpacaLoaded
   if not AlpacaLoaded:
     print("Loading Alpaca...")
     try:
-      llm = Llama(model_path="./resources/ggml-model-q4_0.bin", use_mmap=False, n_threads=2, verbose=False) #use_mlock=True
       AlpacaLoaded = True
       print("Done loading Alpaca.")
     except AttributeError:
       print("Error loading Alpaca. Please make sure you have the model file in the resources folder.")
   else:
     print("Alpaca already loaded.")
 def getChatResponse(modelOutput):
   return str(modelOutput["choices"][0]['message']['content'])
 #Authentication Functions
 def loadHashes():
   global hashesDict
@@ -98,7 +125,7 @@ def chat():
     print("Got Message" + str(messages))
     if AlpacaLoaded:
-      modelOutput = llm.create_chat_completion(messages=messages, max_tokens=512)
       responseMessage = modelOutput["choices"][0]['message']
       print(f"\n\nResponseMessage: {responseMessage}\n\n")
       return Response(json.dumps(responseMessage, indent=2), content_type='application/json')
@@ -107,12 +134,29 @@ def chat():
   else:
     return "Ready" if AlpacaLoaded else "Not Ready", 200 if AlpacaLoaded else 503
 @app.errorhandler(HTTPException)
 def handle_exception(e):
-  return Response({"error": f"{e.code} - {e.name}", "message": e.description}, content_type='application/json'), e.code
 if __name__ == '__main__':
-  t = threading.Thread(target=load_alpaca, daemon=True).start()
   port = int(os.getenv("port", "8080"))
   print("Server successfully started.")

+from flask import Flask, request, abort, Response
 from werkzeug.security import generate_password_hash, check_password_hash
 from werkzeug.exceptions import HTTPException
+import os, threading, json, waitress, datetime, traceback
 from llama_cpp import Llama
 from dotenv import load_dotenv
 load_dotenv()
+import sentry_sdk
+from flask import Flask
+from sentry_sdk.integrations.flask import FlaskIntegration
+sentry_sdk.init(
+    dsn="https://5dcf8a99012c4c86b9b1f0293f6b4c2e@o4505516024004608.ingest.sentry.io/4505541971935232",
+    integrations=[
+        FlaskIntegration(),
+    ],
+    # Set traces_sample_rate to 1.0 to capture 100%
+    # of transactions for performance monitoring.
+    # We recommend adjusting this value in production.
+    traces_sample_rate=1.0
+)
 #Variables
 DEBUGMODEENABLED = (os.getenv('debugModeEnabled', 'False') == 'True')
+modelName = "vicuna"
 llm = None
 AlpacaLoaded = False
 #Chat Functions
 def load_alpaca():
+  global llm, AlpacaLoaded, modelName
   if not AlpacaLoaded:
     print("Loading Alpaca...")
     try:
+      llm = Llama(model_path=f"./resources/{modelName}ggml-model-q4_0.bin", use_mmap=False, n_threads=2, verbose=False, n_ctx=2048) #use_mlock=True
       AlpacaLoaded = True
       print("Done loading Alpaca.")
+      return "Done"
     except AttributeError:
       print("Error loading Alpaca. Please make sure you have the model file in the resources folder.")
+      return "Error"
   else:
     print("Alpaca already loaded.")
+    return "Already Loaded"
 def getChatResponse(modelOutput):
   return str(modelOutput["choices"][0]['message']['content'])
+def reload_alpaca():
+  global llm, AlpacaLoaded, modelName
+  if AlpacaLoaded:
+    llm = None
+    input("Pleease confirm that the memory is cleared!")
+    AlpacaLoaded = False
+  load_alpaca()
+  return "Done"
 #Authentication Functions
 def loadHashes():
   global hashesDict
     print("Got Message" + str(messages))
     if AlpacaLoaded:
+      modelOutput = llm.create_chat_completion(messages=messages, max_tokens=1024)
       responseMessage = modelOutput["choices"][0]['message']
       print(f"\n\nResponseMessage: {responseMessage}\n\n")
       return Response(json.dumps(responseMessage, indent=2), content_type='application/json')
   else:
     return "Ready" if AlpacaLoaded else "Not Ready", 200 if AlpacaLoaded else 503
+@app.route('/sentry_check')
+def trigger_error():
+  division_by_zero = 1 / 0
 @app.errorhandler(HTTPException)
 def handle_exception(e):
+  errorInfo = json.dumps({"error": f"{e.code} - {e.name}", "message": e.description}, indent=2)
+  return Response(errorInfo, content_type='application/json'), e.code
+@app.errorhandler(Exception)
+def handle_errors(e):
+  print(f"INTERNAL SERVER ERROR 500 @ {request.path}")
+  exceptionInfo = f"{type(e).__name__}: {str(e)}"
+  errorTraceback = traceback.format_exc()
+  print(errorTraceback)
+  sentry_sdk.capture_exception(e)
+  errorInfo = json.dumps({"error": f"500 - Internal Server Error", "message": exceptionInfo}, indent=2)
+  return Response(errorInfo, content_type='application/json'), 500
 if __name__ == '__main__':
+  threading.Thread(target=load_alpaca, daemon=True).start()
   port = int(os.getenv("port", "8080"))
   print("Server successfully started.")

resources/{ggml-model-q4_0.bin → alpaca-ggml-model-q4_0.bin} RENAMED Viewed

File without changes