Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -46,7 +46,7 @@ def get_retriever(url):
|
|
46 |
docs = text_splitter.split_documents(documents)
|
47 |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
48 |
db = DocArrayInMemorySearch.from_documents(docs, embeddings)
|
49 |
-
print("at least we ar ehere?")
|
50 |
retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 10})
|
51 |
return retriever
|
52 |
|
@@ -63,12 +63,18 @@ def create_chain(_retriever):
|
|
63 |
# stream handler to make it appear as if the LLM is typing the
|
64 |
# responses in real time.
|
65 |
# callback_manager = CallbackManager([stream_handler])
|
|
|
|
|
66 |
|
|
|
|
|
|
|
|
|
67 |
n_gpu_layers = 1 # Change this value based on your model and your GPU VRAM pool.
|
68 |
n_batch = 1024 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
69 |
|
70 |
llm = LlamaCpp(
|
71 |
-
model_path=
|
72 |
n_batch=n_batch,
|
73 |
n_ctx=2048,
|
74 |
max_tokens=2048,
|
|
|
46 |
docs = text_splitter.split_documents(documents)
|
47 |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
48 |
db = DocArrayInMemorySearch.from_documents(docs, embeddings)
|
49 |
+
#print("at least we ar ehere?")
|
50 |
retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 10})
|
51 |
return retriever
|
52 |
|
|
|
63 |
# stream handler to make it appear as if the LLM is typing the
|
64 |
# responses in real time.
|
65 |
# callback_manager = CallbackManager([stream_handler])
|
66 |
+
(repo_id, model_file_name) = ("TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
|
67 |
+
"mistral-7b-instruct-v0.1.Q5_0.gguf")
|
68 |
|
69 |
+
model_path = hf_hub_download(repo_id=repo_id,
|
70 |
+
filename=model_file_name,
|
71 |
+
repo_type="model")
|
72 |
+
|
73 |
n_gpu_layers = 1 # Change this value based on your model and your GPU VRAM pool.
|
74 |
n_batch = 1024 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
75 |
|
76 |
llm = LlamaCpp(
|
77 |
+
model_path=model_path,
|
78 |
n_batch=n_batch,
|
79 |
n_ctx=2048,
|
80 |
max_tokens=2048,
|