ppsingh commited on
Commit
5930e3b
1 Parent(s): 1c586ef

Update auditqa/doc_process.py

Browse files
Files changed (1) hide show
  1. auditqa/doc_process.py +13 -0
auditqa/doc_process.py CHANGED
@@ -6,6 +6,7 @@ from torch import cuda
6
  from langchain_community.document_loaders import PyMuPDFLoader
7
  from langchain_community.embeddings import HuggingFaceEmbeddings, HuggingFaceInferenceAPIEmbeddings
8
  from langchain_community.vectorstores import Qdrant
 
9
  from auditqa.reports import files, report_list
10
  device = 'cuda' if cuda.is_available() else 'cpu'
11
 
@@ -91,4 +92,16 @@ def process_pdf():
91
 
92
  print("done")
93
  return qdrant_collections
 
 
 
 
 
 
 
 
 
 
 
 
94
 
 
6
  from langchain_community.document_loaders import PyMuPDFLoader
7
  from langchain_community.embeddings import HuggingFaceEmbeddings, HuggingFaceInferenceAPIEmbeddings
8
  from langchain_community.vectorstores import Qdrant
9
+ from qdrant_client import QdrantClient
10
  from auditqa.reports import files, report_list
11
  device = 'cuda' if cuda.is_available() else 'cpu'
12
 
 
92
 
93
  print("done")
94
  return qdrant_collections
95
+
96
+ def get_local_qdrant(name):
97
+ client = QdrantClient(path=f"./data/{name}")
98
+ embeddings = HuggingFaceEmbeddings(
99
+ model_kwargs = {'device': device},
100
+ encode_kwargs = {'normalize_embeddings': True},
101
+ model_name="BAAI/bge-small-en-v1.5")
102
+
103
+ vectorstore = Qdrant(client=client, collection_name=name, embeddings=embeddings, )
104
+ return vectorstore
105
+
106
+
107