Daniel Marques commited on
Commit
b21e4ba
1 Parent(s): 5073361

feat: add websocket

Browse files
Files changed (3) hide show
  1. ingest.py +4 -1
  2. load_models.py +2 -2
  3. main.py +1 -3
ingest.py CHANGED
@@ -124,7 +124,7 @@ def main(device_type):
124
  text_documents, python_documents = split_documents(documents)
125
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
126
  python_splitter = RecursiveCharacterTextSplitter.from_language(
127
- language=Language.PYTHON, chunk_size=880, chunk_overlap=200
128
  )
129
  texts = text_splitter.split_documents(text_documents)
130
  texts.extend(python_splitter.split_documents(python_documents))
@@ -151,6 +151,9 @@ def main(device_type):
151
 
152
  )
153
 
 
 
 
154
  if __name__ == "__main__":
155
  logging.basicConfig(
156
  format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(message)s", level=logging.INFO
 
124
  text_documents, python_documents = split_documents(documents)
125
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
126
  python_splitter = RecursiveCharacterTextSplitter.from_language(
127
+ language=Language.PYTHON, chunk_size=1000, chunk_overlap=200
128
  )
129
  texts = text_splitter.split_documents(text_documents)
130
  texts.extend(python_splitter.split_documents(python_documents))
 
151
 
152
  )
153
 
154
+ db.persist()
155
+ db = None
156
+
157
  if __name__ == "__main__":
158
  logging.basicConfig(
159
  format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(message)s", level=logging.INFO
load_models.py CHANGED
@@ -58,13 +58,13 @@ def load_quantized_model_gguf_ggml(model_id, model_basename, device_type, loggin
58
  "model_path": model_path,
59
  "n_ctx": CONTEXT_WINDOW_SIZE,
60
  "max_tokens": MAX_NEW_TOKENS,
61
- "n_batch": N_BATCH,
62
  # set this based on your GPU & CPU RAM
63
  }
64
  if device_type.lower() == "mps":
65
  kwargs["n_gpu_layers"] = 1
66
  if device_type.lower() == "cuda":
67
- kwargs["n_gpu_layers"] = N_GPU_LAYERS # set this based on your GPU
 
68
 
69
  kwargs["stream"] = stream
70
 
 
58
  "model_path": model_path,
59
  "n_ctx": CONTEXT_WINDOW_SIZE,
60
  "max_tokens": MAX_NEW_TOKENS,
 
61
  # set this based on your GPU & CPU RAM
62
  }
63
  if device_type.lower() == "mps":
64
  kwargs["n_gpu_layers"] = 1
65
  if device_type.lower() == "cuda":
66
+ kwargs["n_gpu_layers"] = N_GPU_LAYERS
67
+ kwargs["n_batch"] = MAX_NEW_TOKENS # set this based on your GPU
68
 
69
  kwargs["stream"] = stream
70
 
main.py CHANGED
@@ -56,8 +56,6 @@ QA = RetrievalQA.from_chain_type(
56
  )
57
 
58
  def sendPromptChain(QA, user_prompt):
59
- print(QA)
60
-
61
  res = QA(user_prompt)
62
 
63
  answer, docs = res["result"], res["source_documents"]
@@ -290,7 +288,7 @@ async def websocket_endpoint_room(websocket: WebSocket, room_id: str, user_id: s
290
  while True:
291
  data = await websocket.receive_text()
292
 
293
- prompt_response_dict = sendPromptChain(QA, data, True)
294
 
295
  await socket_manager.broadcast_to_room(room_id, json.dumps(prompt_response_dict))
296
 
 
56
  )
57
 
58
  def sendPromptChain(QA, user_prompt):
 
 
59
  res = QA(user_prompt)
60
 
61
  answer, docs = res["result"], res["source_documents"]
 
288
  while True:
289
  data = await websocket.receive_text()
290
 
291
+ prompt_response_dict = sendPromptChain(QA, data)
292
 
293
  await socket_manager.broadcast_to_room(room_id, json.dumps(prompt_response_dict))
294