mitulagr2 commited on
Commit
76b90b9
1 Parent(s): 9f0a9ca

fix logger

Browse files
Files changed (1) hide show
  1. app/rag.py +15 -14
app/rag.py CHANGED
@@ -26,20 +26,21 @@ class ChatPDF:
26
  doc_ids = []
27
  nodes = []
28
  hyde_query_engine = None
 
29
 
30
  def __init__(self):
31
  logging.basicConfig(level=logging.INFO)
32
- logger = logging.getLogger(__name__)
33
 
34
  text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=100)
35
 
36
- logger.info("initializing the vector store related objects")
37
  client = QdrantClient(url=QDRANT_API_URL, api_key=QDRANT_API_KEY)
38
  vector_store = QdrantVectorStore(client=client, collection_name="rag_documents")
39
 
40
- logger.info("initializing the OllamaEmbedding")
41
  embed_model = OllamaEmbedding(model_name='mxbai-embed-large')
42
- logger.info("initializing the global settings")
43
  Settings.embed_model = embed_model
44
  Settings.llm = Ollama(model="qwen:1.8b", request_timeout=1000000)
45
  Settings.transformations = [text_parser]
@@ -47,44 +48,44 @@ class ChatPDF:
47
  def ingest(self, dir_path: str):
48
  docs = SimpleDirectoryReader(input_dir=dir_path).load_data()
49
 
50
- logger.info("enumerating docs")
51
  for doc_idx, doc in enumerate(docs):
52
  curr_text_chunks = text_parser.split_text(doc.text)
53
  text_chunks.extend(curr_text_chunks)
54
  doc_ids.extend([doc_idx] * len(curr_text_chunks))
55
 
56
- logger.info("enumerating text_chunks")
57
  for idx, text_chunk in enumerate(text_chunks):
58
  node = TextNode(text=text_chunk)
59
  src_doc = docs[doc_ids[idx]]
60
  node.metadata = src_doc.metadata
61
  nodes.append(node)
62
 
63
- logger.info("enumerating nodes")
64
  for node in nodes:
65
  node_embedding = embed_model.get_text_embedding(
66
  node.get_content(metadata_mode=MetadataMode.ALL)
67
  )
68
  node.embedding = node_embedding
69
 
70
- logger.info("initializing the storage context")
71
  storage_context = StorageContext.from_defaults(vector_store=vector_store)
72
- logger.info("indexing the nodes in VectorStoreIndex")
73
  index = VectorStoreIndex(
74
  nodes=nodes,
75
  storage_context=storage_context,
76
  transformations=Settings.transformations,
77
  )
78
 
79
- logger.info("initializing the VectorIndexRetriever with top_k as 5")
80
  vector_retriever = VectorIndexRetriever(index=index, similarity_top_k=5)
81
  response_synthesizer = get_response_synthesizer()
82
- logger.info("creating the RetrieverQueryEngine instance")
83
  vector_query_engine = RetrieverQueryEngine(
84
  retriever=vector_retriever,
85
  response_synthesizer=response_synthesizer,
86
  )
87
- logger.info("creating the HyDEQueryTransform instance")
88
  hyde = HyDEQueryTransform(include_original=True)
89
  self.hyde_query_engine = TransformQueryEngine(vector_query_engine, hyde)
90
 
@@ -92,9 +93,9 @@ class ChatPDF:
92
  if not self.hyde_query_engine:
93
  return "Please, add a PDF document first."
94
 
95
- logger.info("retrieving the response to the query")
96
  response = self.hyde_query_engine.query(str_or_query_bundle=query)
97
- print(response)
98
  return response
99
 
100
  def clear(self):
 
26
  doc_ids = []
27
  nodes = []
28
  hyde_query_engine = None
29
+ logger = None
30
 
31
  def __init__(self):
32
  logging.basicConfig(level=logging.INFO)
33
+ self.logger = logging.getLogger(__name__)
34
 
35
  text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=100)
36
 
37
+ self.logger.info("initializing the vector store related objects")
38
  client = QdrantClient(url=QDRANT_API_URL, api_key=QDRANT_API_KEY)
39
  vector_store = QdrantVectorStore(client=client, collection_name="rag_documents")
40
 
41
+ self.logger.info("initializing the OllamaEmbedding")
42
  embed_model = OllamaEmbedding(model_name='mxbai-embed-large')
43
+ self.logger.info("initializing the global settings")
44
  Settings.embed_model = embed_model
45
  Settings.llm = Ollama(model="qwen:1.8b", request_timeout=1000000)
46
  Settings.transformations = [text_parser]
 
48
  def ingest(self, dir_path: str):
49
  docs = SimpleDirectoryReader(input_dir=dir_path).load_data()
50
 
51
+ self.logger.info("enumerating docs")
52
  for doc_idx, doc in enumerate(docs):
53
  curr_text_chunks = text_parser.split_text(doc.text)
54
  text_chunks.extend(curr_text_chunks)
55
  doc_ids.extend([doc_idx] * len(curr_text_chunks))
56
 
57
+ self.logger.info("enumerating text_chunks")
58
  for idx, text_chunk in enumerate(text_chunks):
59
  node = TextNode(text=text_chunk)
60
  src_doc = docs[doc_ids[idx]]
61
  node.metadata = src_doc.metadata
62
  nodes.append(node)
63
 
64
+ self.logger.info("enumerating nodes")
65
  for node in nodes:
66
  node_embedding = embed_model.get_text_embedding(
67
  node.get_content(metadata_mode=MetadataMode.ALL)
68
  )
69
  node.embedding = node_embedding
70
 
71
+ self.logger.info("initializing the storage context")
72
  storage_context = StorageContext.from_defaults(vector_store=vector_store)
73
+ self.logger.info("indexing the nodes in VectorStoreIndex")
74
  index = VectorStoreIndex(
75
  nodes=nodes,
76
  storage_context=storage_context,
77
  transformations=Settings.transformations,
78
  )
79
 
80
+ self.logger.info("initializing the VectorIndexRetriever with top_k as 5")
81
  vector_retriever = VectorIndexRetriever(index=index, similarity_top_k=5)
82
  response_synthesizer = get_response_synthesizer()
83
+ self.logger.info("creating the RetrieverQueryEngine instance")
84
  vector_query_engine = RetrieverQueryEngine(
85
  retriever=vector_retriever,
86
  response_synthesizer=response_synthesizer,
87
  )
88
+ self.logger.info("creating the HyDEQueryTransform instance")
89
  hyde = HyDEQueryTransform(include_original=True)
90
  self.hyde_query_engine = TransformQueryEngine(vector_query_engine, hyde)
91
 
 
93
  if not self.hyde_query_engine:
94
  return "Please, add a PDF document first."
95
 
96
+ self.logger.info("retrieving the response to the query")
97
  response = self.hyde_query_engine.query(str_or_query_bundle=query)
98
+ self.logger.info(response)
99
  return response
100
 
101
  def clear(self):