Spaces:
Paused
Paused
Daniel Marques
commited on
Commit
•
b21e4ba
1
Parent(s):
5073361
feat: add websocket
Browse files- ingest.py +4 -1
- load_models.py +2 -2
- main.py +1 -3
ingest.py
CHANGED
@@ -124,7 +124,7 @@ def main(device_type):
|
|
124 |
text_documents, python_documents = split_documents(documents)
|
125 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
126 |
python_splitter = RecursiveCharacterTextSplitter.from_language(
|
127 |
-
language=Language.PYTHON, chunk_size=
|
128 |
)
|
129 |
texts = text_splitter.split_documents(text_documents)
|
130 |
texts.extend(python_splitter.split_documents(python_documents))
|
@@ -151,6 +151,9 @@ def main(device_type):
|
|
151 |
|
152 |
)
|
153 |
|
|
|
|
|
|
|
154 |
if __name__ == "__main__":
|
155 |
logging.basicConfig(
|
156 |
format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(message)s", level=logging.INFO
|
|
|
124 |
text_documents, python_documents = split_documents(documents)
|
125 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
126 |
python_splitter = RecursiveCharacterTextSplitter.from_language(
|
127 |
+
language=Language.PYTHON, chunk_size=1000, chunk_overlap=200
|
128 |
)
|
129 |
texts = text_splitter.split_documents(text_documents)
|
130 |
texts.extend(python_splitter.split_documents(python_documents))
|
|
|
151 |
|
152 |
)
|
153 |
|
154 |
+
db.persist()
|
155 |
+
db = None
|
156 |
+
|
157 |
if __name__ == "__main__":
|
158 |
logging.basicConfig(
|
159 |
format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(message)s", level=logging.INFO
|
load_models.py
CHANGED
@@ -58,13 +58,13 @@ def load_quantized_model_gguf_ggml(model_id, model_basename, device_type, loggin
|
|
58 |
"model_path": model_path,
|
59 |
"n_ctx": CONTEXT_WINDOW_SIZE,
|
60 |
"max_tokens": MAX_NEW_TOKENS,
|
61 |
-
"n_batch": N_BATCH,
|
62 |
# set this based on your GPU & CPU RAM
|
63 |
}
|
64 |
if device_type.lower() == "mps":
|
65 |
kwargs["n_gpu_layers"] = 1
|
66 |
if device_type.lower() == "cuda":
|
67 |
-
kwargs["n_gpu_layers"] = N_GPU_LAYERS
|
|
|
68 |
|
69 |
kwargs["stream"] = stream
|
70 |
|
|
|
58 |
"model_path": model_path,
|
59 |
"n_ctx": CONTEXT_WINDOW_SIZE,
|
60 |
"max_tokens": MAX_NEW_TOKENS,
|
|
|
61 |
# set this based on your GPU & CPU RAM
|
62 |
}
|
63 |
if device_type.lower() == "mps":
|
64 |
kwargs["n_gpu_layers"] = 1
|
65 |
if device_type.lower() == "cuda":
|
66 |
+
kwargs["n_gpu_layers"] = N_GPU_LAYERS
|
67 |
+
kwargs["n_batch"] = MAX_NEW_TOKENS # set this based on your GPU
|
68 |
|
69 |
kwargs["stream"] = stream
|
70 |
|
main.py
CHANGED
@@ -56,8 +56,6 @@ QA = RetrievalQA.from_chain_type(
|
|
56 |
)
|
57 |
|
58 |
def sendPromptChain(QA, user_prompt):
|
59 |
-
print(QA)
|
60 |
-
|
61 |
res = QA(user_prompt)
|
62 |
|
63 |
answer, docs = res["result"], res["source_documents"]
|
@@ -290,7 +288,7 @@ async def websocket_endpoint_room(websocket: WebSocket, room_id: str, user_id: s
|
|
290 |
while True:
|
291 |
data = await websocket.receive_text()
|
292 |
|
293 |
-
prompt_response_dict = sendPromptChain(QA, data
|
294 |
|
295 |
await socket_manager.broadcast_to_room(room_id, json.dumps(prompt_response_dict))
|
296 |
|
|
|
56 |
)
|
57 |
|
58 |
def sendPromptChain(QA, user_prompt):
|
|
|
|
|
59 |
res = QA(user_prompt)
|
60 |
|
61 |
answer, docs = res["result"], res["source_documents"]
|
|
|
288 |
while True:
|
289 |
data = await websocket.receive_text()
|
290 |
|
291 |
+
prompt_response_dict = sendPromptChain(QA, data)
|
292 |
|
293 |
await socket_manager.broadcast_to_room(room_id, json.dumps(prompt_response_dict))
|
294 |
|