grantjw commited on
Commit
59cc10d
·
verified ·
1 Parent(s): 3f1b4eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -46,7 +46,7 @@ def get_retriever(url):
46
  docs = text_splitter.split_documents(documents)
47
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
48
  db = DocArrayInMemorySearch.from_documents(docs, embeddings)
49
- print("at least we ar ehere?")
50
  retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 10})
51
  return retriever
52
 
@@ -63,12 +63,18 @@ def create_chain(_retriever):
63
  # stream handler to make it appear as if the LLM is typing the
64
  # responses in real time.
65
  # callback_manager = CallbackManager([stream_handler])
 
 
66
 
 
 
 
 
67
  n_gpu_layers = 1 # Change this value based on your model and your GPU VRAM pool.
68
  n_batch = 1024 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
69
 
70
  llm = LlamaCpp(
71
- model_path="models/mistral-7b-instruct-v0.1.Q5_0.gguf",
72
  n_batch=n_batch,
73
  n_ctx=2048,
74
  max_tokens=2048,
 
46
  docs = text_splitter.split_documents(documents)
47
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
48
  db = DocArrayInMemorySearch.from_documents(docs, embeddings)
49
+ #print("at least we ar ehere?")
50
  retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 10})
51
  return retriever
52
 
 
63
  # stream handler to make it appear as if the LLM is typing the
64
  # responses in real time.
65
  # callback_manager = CallbackManager([stream_handler])
66
+ (repo_id, model_file_name) = ("TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
67
+ "mistral-7b-instruct-v0.1.Q5_0.gguf")
68
 
69
+ model_path = hf_hub_download(repo_id=repo_id,
70
+ filename=model_file_name,
71
+ repo_type="model")
72
+
73
  n_gpu_layers = 1 # Change this value based on your model and your GPU VRAM pool.
74
  n_batch = 1024 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
75
 
76
  llm = LlamaCpp(
77
+ model_path=model_path,
78
  n_batch=n_batch,
79
  n_ctx=2048,
80
  max_tokens=2048,