sabazo commited on
Commit
86bc433
2 Parent(s): f2ccb97 f790226

Merge pull request #15 from almutareb/save_conversations

Browse files
app.py CHANGED
@@ -6,10 +6,38 @@ from innovation_pathfinder_ai.source_container.container import (
6
  from innovation_pathfinder_ai.utils.utils import extract_urls
7
  from innovation_pathfinder_ai.utils import logger
8
 
 
 
 
 
 
 
 
 
 
 
 
9
  logger = logger.get_console_logger("app")
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  if __name__ == "__main__":
12
 
 
 
 
 
13
  def add_text(history, text):
14
  history = history + [(text, None)]
15
  return history, ""
@@ -30,6 +58,18 @@ if __name__ == "__main__":
30
  "chat_history": history
31
  }
32
  )
 
 
 
 
 
 
 
 
 
 
 
 
33
  return result
34
 
35
  def vote(data: gr.LikeData):
 
6
  from innovation_pathfinder_ai.utils.utils import extract_urls
7
  from innovation_pathfinder_ai.utils import logger
8
 
9
+ from innovation_pathfinder_ai.utils.utils import (
10
+ generate_uuid
11
+ )
12
+ from langchain_community.vectorstores import Chroma
13
+
14
+ import chromadb
15
+ import dotenv
16
+ import os
17
+
18
+ dotenv.load_dotenv()
19
+
20
  logger = logger.get_console_logger("app")
21
 
22
+ def initialize_chroma_db() -> Chroma:
23
+ collection_name=os.getenv("CONVERSATION_COLLECTION_NAME")
24
+
25
+ client = chromadb.PersistentClient()
26
+
27
+ collection = client.get_or_create_collection(
28
+ name=collection_name,
29
+ )
30
+
31
+ return collection
32
+
33
+
34
+
35
  if __name__ == "__main__":
36
 
37
+ current_id = generate_uuid()
38
+
39
+ db = initialize_chroma_db()
40
+
41
  def add_text(history, text):
42
  history = history + [(text, None)]
43
  return history, ""
 
58
  "chat_history": history
59
  }
60
  )
61
+
62
+ db.add(
63
+ ids=[current_id],
64
+ documents=[result['output']],
65
+ metadatas=[
66
+ {
67
+ "query":query,
68
+ "intermediate_steps":result['intermediate_steps'].__str__()
69
+ }
70
+ ]
71
+ )
72
+
73
  return result
74
 
75
  def vote(data: gr.LikeData):
example.env CHANGED
@@ -8,4 +8,6 @@ OLLMA_BASE_URL=
8
  SERPAPI_API_KEY=
9
 
10
  # for chromadb
11
- VECTOR_DATABASE_LOCATION=
 
 
 
8
  SERPAPI_API_KEY=
9
 
10
  # for chromadb
11
+ VECTOR_DATABASE_LOCATION=
12
+
13
+ CONVERSATION_COLLECTION_NAME="ConversationMemory"
innovation_pathfinder_ai/structured_tools/structured_tools.py CHANGED
@@ -94,6 +94,7 @@ def wikipedia_search(query: str) -> str:
94
  @tool
95
  def chroma_search(query:str) -> str:
96
  """Search the Arxiv vector store for docmunets and relevent chunks"""
 
97
  client = chromadb.PersistentClient(
98
  # path=persist_directory,
99
  )
@@ -155,4 +156,29 @@ def embed_arvix_paper(paper_id:str) -> None:
155
  collection_name=collection_name,
156
  pdf_file_location=full_path,
157
  )
158
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  @tool
95
  def chroma_search(query:str) -> str:
96
  """Search the Arxiv vector store for docmunets and relevent chunks"""
97
+ # Since we have more than one collections we should change the name of this tool
98
  client = chromadb.PersistentClient(
99
  # path=persist_directory,
100
  )
 
156
  collection_name=collection_name,
157
  pdf_file_location=full_path,
158
  )
159
+
160
+ @tool
161
+ def conversational_search(query:str) -> str:
162
+ """Search from past conversations for docmunets and relevent chunks"""
163
+ # Since we have more than one collections we should change the name of this tool
164
+ client = chromadb.PersistentClient(
165
+ # path=persist_directory,
166
+ )
167
+
168
+ collection_name=os.getenv("CONVERSATION_COLLECTION_NAME")
169
+ #store using envar
170
+
171
+ embedding_function = SentenceTransformerEmbeddings(
172
+ model_name="all-MiniLM-L6-v2",
173
+ )
174
+
175
+ vector_db = Chroma(
176
+ client=client, # client for Chroma
177
+ collection_name=collection_name,
178
+ embedding_function=embedding_function,
179
+ )
180
+
181
+ retriever = vector_db.as_retriever()
182
+ docs = retriever.get_relevant_documents(query)
183
+
184
+ return docs.__str__()
innovation_pathfinder_ai/utils/utils.py CHANGED
@@ -1,6 +1,7 @@
1
  import hashlib
2
  import datetime
3
  import os
 
4
 
5
  from innovation_pathfinder_ai.utils import logger
6
 
@@ -182,4 +183,13 @@ def create_folder_if_not_exists(folder_path: str) -> None:
182
  os.makedirs(folder_path)
183
  print(f"Folder '{folder_path}' created.")
184
  else:
185
- print(f"Folder '{folder_path}' already exists.")
 
 
 
 
 
 
 
 
 
 
1
  import hashlib
2
  import datetime
3
  import os
4
+ import uuid
5
 
6
  from innovation_pathfinder_ai.utils import logger
7
 
 
183
  os.makedirs(folder_path)
184
  print(f"Folder '{folder_path}' created.")
185
  else:
186
+ print(f"Folder '{folder_path}' already exists.")
187
+
188
+ def generate_uuid() -> str:
189
+ """
190
+ Generate a UUID (Universally Unique Identifier) and return it as a string.
191
+
192
+ Returns:
193
+ str: A UUID string.
194
+ """
195
+ return str(uuid.uuid4())
innovation_pathfinder_ai/vector_store/chroma_vector_store.py CHANGED
@@ -21,8 +21,10 @@ from langchain_community.vectorstores import Chroma
21
  from langchain_community.embeddings.sentence_transformer import (
22
  SentenceTransformerEmbeddings,
23
  )
 
 
 
24
 
25
- import uuid
26
  import dotenv
27
  import os
28
 
@@ -31,14 +33,7 @@ dotenv.load_dotenv()
31
 
32
  VECTOR_DATABASE_LOCATION = os.getenv("VECTOR_DATABASE_LOCATION")
33
 
34
- def generate_uuid() -> str:
35
- """
36
- Generate a UUID (Universally Unique Identifier) and return it as a string.
37
 
38
- Returns:
39
- str: A UUID string.
40
- """
41
- return str(uuid.uuid4())
42
 
43
  def read_markdown_file(file_path: str) -> str:
44
  """
 
21
  from langchain_community.embeddings.sentence_transformer import (
22
  SentenceTransformerEmbeddings,
23
  )
24
+ from innovation_pathfinder_ai.utils.utils import (
25
+ generate_uuid
26
+ )
27
 
 
28
  import dotenv
29
  import os
30
 
 
33
 
34
  VECTOR_DATABASE_LOCATION = os.getenv("VECTOR_DATABASE_LOCATION")
35
 
 
 
 
36
 
 
 
 
 
37
 
38
  def read_markdown_file(file_path: str) -> str:
39
  """