ppsingh commited on
Commit
6240195
1 Parent(s): 86a6762

Update auditqa/doc_process.py

Browse files
Files changed (1) hide show
  1. auditqa/doc_process.py +4 -1
auditqa/doc_process.py CHANGED
@@ -41,13 +41,16 @@ def process_pdf():
41
  doc_processed = text_splitter.split_documents(value)
42
  for doc in doc_processed:
43
  doc.metadata["source"] = file
 
44
  all_documents[file] = doc_processed
45
 
46
  print(all_documents.keys())
47
 
48
 
49
  embeddings = HuggingFaceEmbeddings(
50
- model_name="BAAI/bge-small-en-v1.5"
 
 
51
  )
52
 
53
  qdrant_collections = {}
 
41
  doc_processed = text_splitter.split_documents(value)
42
  for doc in doc_processed:
43
  doc.metadata["source"] = file
44
+ doc.metadata["year"] = file[-4:]
45
  all_documents[file] = doc_processed
46
 
47
  print(all_documents.keys())
48
 
49
 
50
  embeddings = HuggingFaceEmbeddings(
51
+ model_kwargs = {'device': 'cpu'},
52
+ encode_kwargs = {'normalize_embeddings': True},
53
+ model_name="BAAI/bge-small-en-v1.5"
54
  )
55
 
56
  qdrant_collections = {}