sabazo commited on
Commit
f616346
·
unverified ·
2 Parent(s): c47b75b 793cb2b

Merge pull request #4 from almutareb/reference_parser

Browse files
innovation_pathfinder_ai/source_container/container.py ADDED
@@ -0,0 +1 @@
 
 
1
+ all_sources = []
innovation_pathfinder_ai/structured_tools/structured_tools.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.tools import BaseTool, StructuredTool, tool
2
+ from langchain.retrievers import ArxivRetriever
3
+ from langchain_community.utilities import SerpAPIWrapper
4
+ import arxiv
5
+
6
+ # hacky and should be replaced with a database
7
+ from innovation_pathfinder_ai.source_container.container import (
8
+ all_sources
9
+ )
10
+
11
+ @tool
12
+ def arxiv_search(query: str) -> str:
13
+ """Using the arxiv search and collects metadata."""
14
+ # return "LangChain"
15
+ global all_sources
16
+ arxiv_retriever = ArxivRetriever(load_max_docs=2)
17
+ data = arxiv_retriever.invoke(query)
18
+ meta_data = [i.metadata for i in data]
19
+ # meta_data += all_sources
20
+ # all_sources += meta_data
21
+ all_sources += meta_data
22
+
23
+ # formatted_info = format_info(entry_id, published, title, authors)
24
+
25
+ # formatted_info = format_info_list(all_sources)
26
+
27
+ return meta_data.__str__()
28
+
29
+ @tool
30
+ def get_arxiv_paper(paper_id:str) -> None:
31
+ """Download a paper from axriv to download a paper please input
32
+ the axriv id such as "1605.08386v1" This tool is named get_arxiv_paper
33
+ If you input "http://arxiv.org/abs/2312.02813", This will break the code. Also only do
34
+ "2312.02813". In addition please download one paper at a time. Pleaase keep the inputs/output
35
+ free of additional information only have the id.
36
+ """
37
+ # code from https://lukasschwab.me/arxiv.py/arxiv.html
38
+ paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id])))
39
+
40
+ number_without_period = paper_id.replace('.', '')
41
+
42
+ # Download the PDF to a specified directory with a custom filename.
43
+ paper.download_pdf(dirpath="./mydir", filename=f"{number_without_period}.pdf")
44
+
45
+
46
+ @tool
47
+ def google_search(query: str) -> str:
48
+ """Using the google search and collects metadata."""
49
+ # return "LangChain"
50
+ global all_sources
51
+
52
+ x = SerpAPIWrapper()
53
+ search_results:dict = x.results(query)
54
+
55
+
56
+ organic_source = search_results['organic_results']
57
+ # formatted_string = "Title: {title}, link: {link}, snippet: {snippet}".format(**organic_source)
58
+ cleaner_sources = ["Title: {title}, link: {link}, snippet: {snippet}".format(**i) for i in organic_source]
59
+
60
+ all_sources += cleaner_sources
61
+
62
+ return cleaner_sources.__str__()
mixtral_agent.py CHANGED
@@ -1,31 +1,29 @@
1
  # LangChain supports many other chat models. Here, we're using Ollama
2
  from langchain_community.chat_models import ChatOllama
3
- from langchain_core.output_parsers import StrOutputParser
4
  from langchain_core.prompts import ChatPromptTemplate
5
- from langchain.tools.retriever import create_retriever_tool
6
- from langchain_community.utilities import SerpAPIWrapper
7
- from langchain.retrievers import ArxivRetriever
8
- from langchain_core.tools import Tool
9
  from langchain import hub
10
- from langchain.agents import AgentExecutor, load_tools
11
  from langchain.agents.format_scratchpad import format_log_to_str
12
  from langchain.agents.output_parsers import (
13
  ReActJsonSingleInputOutputParser,
14
  )
15
  # Import things that are needed generically
16
- from langchain.pydantic_v1 import BaseModel, Field
17
- from langchain.tools import BaseTool, StructuredTool, tool
18
  from typing import List, Dict
19
- from datetime import datetime
20
  from langchain.tools.render import render_text_description
21
  import os
 
 
 
 
22
 
 
 
 
 
23
 
24
- import dotenv
25
 
26
  dotenv.load_dotenv()
27
 
28
-
29
  OLLMA_BASE_URL = os.getenv("OLLMA_BASE_URL")
30
 
31
 
@@ -35,89 +33,13 @@ llm = ChatOllama(
35
  model="mistral:instruct",
36
  base_url= OLLMA_BASE_URL
37
  )
38
- prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")
39
 
40
- arxiv_retriever = ArxivRetriever(load_max_docs=2)
41
 
42
-
43
-
44
- def format_info_list(info_list: List[Dict[str, str]]) -> str:
45
- """
46
- Format a list of dictionaries containing information into a single string.
47
-
48
- Args:
49
- info_list (List[Dict[str, str]]): A list of dictionaries containing information.
50
-
51
- Returns:
52
- str: A formatted string containing the information from the list.
53
- """
54
- formatted_strings = []
55
- for info_dict in info_list:
56
- formatted_string = "|"
57
- for key, value in info_dict.items():
58
- if isinstance(value, datetime.date):
59
- value = value.strftime('%Y-%m-%d')
60
- formatted_string += f"'{key}': '{value}', "
61
- formatted_string = formatted_string.rstrip(', ') + "|"
62
- formatted_strings.append(formatted_string)
63
- return '\n'.join(formatted_strings)
64
-
65
- @tool
66
- def arxiv_search(query: str) -> str:
67
- """Using the arxiv search and collects metadata."""
68
- # return "LangChain"
69
- global all_sources
70
- data = arxiv_retriever.invoke(query)
71
- meta_data = [i.metadata for i in data]
72
- # meta_data += all_sources
73
- # all_sources += meta_data
74
- all_sources += meta_data
75
-
76
- # formatted_info = format_info(entry_id, published, title, authors)
77
-
78
- # formatted_info = format_info_list(all_sources)
79
-
80
- return meta_data.__str__()
81
-
82
- @tool
83
- def google_search(query: str) -> str:
84
- """Using the google search and collects metadata."""
85
- # return "LangChain"
86
- global all_sources
87
-
88
- x = SerpAPIWrapper()
89
- search_results:dict = x.results(query)
90
-
91
-
92
- organic_source = search_results['organic_results']
93
- # formatted_string = "Title: {title}, link: {link}, snippet: {snippet}".format(**organic_source)
94
- cleaner_sources = ["Title: {title}, link: {link}, snippet: {snippet}".format(**i) for i in organic_source]
95
-
96
- all_sources += cleaner_sources
97
-
98
- return cleaner_sources.__str__()
99
- # return organic_source
100
-
101
-
102
-
103
-
104
- tools = [arxiv_search,google_search]
105
-
106
- # tools = [
107
- # create_retriever_tool(
108
- # retriever,
109
- # "search arxiv's database for",
110
- # "Use this to recomend the user a paper to read Unless stated please choose the most recent models",
111
- # # "Searches and returns excerpts from the 2022 State of the Union.",
112
- # ),
113
-
114
- # Tool(
115
- # name="SerpAPI",
116
- # description="A low-cost Google Search API. Useful for when you need to answer questions about current events. Input should be a search query.",
117
- # func=SerpAPIWrapper().run,
118
- # )
119
-
120
- # ]
121
 
122
 
123
  prompt = hub.pull("hwchase17/react-json")
@@ -126,9 +48,9 @@ prompt = prompt.partial(
126
  tool_names=", ".join([t.name for t in tools]),
127
  )
128
 
129
- chat_model = llm
130
  # define the agent
131
- chat_model_with_stop = chat_model.bind(stop=["\nObservation"])
132
  agent = (
133
  {
134
  "input": lambda x: x["input"],
@@ -148,16 +70,14 @@ agent_executor = AgentExecutor(
148
  )
149
 
150
 
151
-
152
  if __name__ == "__main__":
153
 
154
- # global variable for collecting sources
155
- all_sources = []
156
-
157
  input = agent_executor.invoke(
158
  {
159
  "input": "How to generate videos from images using state of the art macchine learning models; Using the axriv retriever " +
160
- "add the urls of the papers used in the final answer using the metadata from the retriever please do not use '`' "
 
 
161
  # f"Please prioritize the newest papers this is the current data {get_current_date()}"
162
  }
163
  )
@@ -170,7 +90,7 @@ if __name__ == "__main__":
170
  # }
171
  # )
172
 
173
- # input_1 = agent_executor.invoke(
174
  # {
175
  # "input": "I am looking for a text to 3d model; Using the google search tool " +
176
  # "add the urls in the final answer using the metadata from the retriever, also provid a summary of the searches"
@@ -178,5 +98,4 @@ if __name__ == "__main__":
178
  # }
179
  # )
180
 
181
- x = 0
182
-
 
1
  # LangChain supports many other chat models. Here, we're using Ollama
2
  from langchain_community.chat_models import ChatOllama
 
3
  from langchain_core.prompts import ChatPromptTemplate
 
 
 
 
4
  from langchain import hub
5
+ from langchain.agents import AgentExecutor
6
  from langchain.agents.format_scratchpad import format_log_to_str
7
  from langchain.agents.output_parsers import (
8
  ReActJsonSingleInputOutputParser,
9
  )
10
  # Import things that are needed generically
 
 
11
  from typing import List, Dict
 
12
  from langchain.tools.render import render_text_description
13
  import os
14
+ import dotenv
15
+ from innovation_pathfinder_ai.structured_tools.structured_tools import (
16
+ arxiv_search, get_arxiv_paper, google_search
17
+ )
18
 
19
+ # hacky and should be replaced with a database
20
+ from innovation_pathfinder_ai.source_container.container import (
21
+ all_sources
22
+ )
23
 
 
24
 
25
  dotenv.load_dotenv()
26
 
 
27
  OLLMA_BASE_URL = os.getenv("OLLMA_BASE_URL")
28
 
29
 
 
33
  model="mistral:instruct",
34
  base_url= OLLMA_BASE_URL
35
  )
 
36
 
 
37
 
38
+ tools = [
39
+ arxiv_search,
40
+ google_search,
41
+ get_arxiv_paper,
42
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
 
45
  prompt = hub.pull("hwchase17/react-json")
 
48
  tool_names=", ".join([t.name for t in tools]),
49
  )
50
 
51
+
52
  # define the agent
53
+ chat_model_with_stop = llm.bind(stop=["\nObservation"])
54
  agent = (
55
  {
56
  "input": lambda x: x["input"],
 
70
  )
71
 
72
 
 
73
  if __name__ == "__main__":
74
 
 
 
 
75
  input = agent_executor.invoke(
76
  {
77
  "input": "How to generate videos from images using state of the art macchine learning models; Using the axriv retriever " +
78
+ "add the urls of the papers used in the final answer using the metadata from the retriever please do not use '`' " +
79
+ "please use the `download_arxiv_paper` tool to download any axriv paper you find" +
80
+ "Please only use the tools provided to you"
81
  # f"Please prioritize the newest papers this is the current data {get_current_date()}"
82
  }
83
  )
 
90
  # }
91
  # )
92
 
93
+ # input_2 = agent_executor.invoke(
94
  # {
95
  # "input": "I am looking for a text to 3d model; Using the google search tool " +
96
  # "add the urls in the final answer using the metadata from the retriever, also provid a summary of the searches"
 
98
  # }
99
  # )
100
 
101
+ x = 0 # for debugging purposes