Spaces:

sabazo
/

innoSageAgentOne

Sleeping

App Files Files Community

sabazo commited on Mar 4, 2024

Commit

f616346

unverified ·

2 Parent(s): c47b75b 793cb2b

Merge pull request #4 from almutareb/reference_parser

Browse files

Files changed (3) hide show

innovation_pathfinder_ai/source_container/container.py +1 -0
innovation_pathfinder_ai/structured_tools/structured_tools.py +62 -0
mixtral_agent.py +21 -102

innovation_pathfinder_ai/source_container/container.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ all_sources = []

innovation_pathfinder_ai/structured_tools/structured_tools.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from langchain.tools import BaseTool, StructuredTool, tool
+from langchain.retrievers import ArxivRetriever
+from langchain_community.utilities import SerpAPIWrapper
+import arxiv
+# hacky and should be replaced with a database
+from innovation_pathfinder_ai.source_container.container import (
+    all_sources
+)
+@tool
+def arxiv_search(query: str) -> str:
+    """Using the arxiv search and collects metadata."""
+    # return "LangChain"
+    global all_sources
+    arxiv_retriever = ArxivRetriever(load_max_docs=2)
+    data = arxiv_retriever.invoke(query)
+    meta_data = [i.metadata for i in data]
+    # meta_data += all_sources
+    # all_sources += meta_data
+    all_sources += meta_data
+    # formatted_info = format_info(entry_id, published, title, authors)
+    # formatted_info = format_info_list(all_sources)
+    return meta_data.__str__()
+@tool
+def get_arxiv_paper(paper_id:str) -> None:
+    """Download a paper from axriv to download a paper please input
+    the axriv id such as "1605.08386v1" This tool is named get_arxiv_paper
+    If you input "http://arxiv.org/abs/2312.02813", This will break the code. Also only do
+    "2312.02813". In addition please download one paper at a time. Pleaase keep the inputs/output
+    free of additional information only have the id.
+    """
+    # code from https://lukasschwab.me/arxiv.py/arxiv.html
+    paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id])))
+    number_without_period = paper_id.replace('.', '')
+    # Download the PDF to a specified directory with a custom filename.
+    paper.download_pdf(dirpath="./mydir", filename=f"{number_without_period}.pdf")
+@tool
+def google_search(query: str) -> str:
+    """Using the google search and collects metadata."""
+    # return "LangChain"
+    global all_sources
+    x = SerpAPIWrapper()
+    search_results:dict = x.results(query)
+    organic_source = search_results['organic_results']
+    # formatted_string = "Title: {title}, link: {link}, snippet: {snippet}".format(**organic_source)
+    cleaner_sources = ["Title: {title}, link: {link}, snippet: {snippet}".format(**i) for i in organic_source]
+    all_sources += cleaner_sources
+    return cleaner_sources.__str__()

mixtral_agent.py CHANGED Viewed

@@ -1,31 +1,29 @@
 # LangChain supports many other chat models. Here, we're using Ollama
 from langchain_community.chat_models import ChatOllama
-from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
-from langchain.tools.retriever import create_retriever_tool
-from langchain_community.utilities import SerpAPIWrapper
-from langchain.retrievers import ArxivRetriever
-from langchain_core.tools import Tool
 from langchain import hub
-from langchain.agents import AgentExecutor, load_tools
 from langchain.agents.format_scratchpad import format_log_to_str
 from langchain.agents.output_parsers import (
     ReActJsonSingleInputOutputParser,
 )
 # Import things that are needed generically
-from langchain.pydantic_v1 import BaseModel, Field
-from langchain.tools import BaseTool, StructuredTool, tool
 from typing import List, Dict
-from datetime import datetime
 from langchain.tools.render import render_text_description
 import os
-import dotenv
 dotenv.load_dotenv()
 OLLMA_BASE_URL = os.getenv("OLLMA_BASE_URL")
@@ -35,89 +33,13 @@ llm = ChatOllama(
     model="mistral:instruct",
     base_url= OLLMA_BASE_URL
     )
-prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")
-arxiv_retriever = ArxivRetriever(load_max_docs=2)
-def format_info_list(info_list: List[Dict[str, str]]) -> str:
-    """
-    Format a list of dictionaries containing information into a single string.
-    Args:
-        info_list (List[Dict[str, str]]): A list of dictionaries containing information.
-    Returns:
-        str: A formatted string containing the information from the list.
-    """
-    formatted_strings = []
-    for info_dict in info_list:
-        formatted_string = "|"
-        for key, value in info_dict.items():
-            if isinstance(value, datetime.date):
-                value = value.strftime('%Y-%m-%d')
-            formatted_string += f"'{key}': '{value}', "
-        formatted_string = formatted_string.rstrip(', ') + "|"
-        formatted_strings.append(formatted_string)
-    return '\n'.join(formatted_strings)
-@tool
-def arxiv_search(query: str) -> str:
-    """Using the arxiv search and collects metadata."""
-    # return "LangChain"
-    global all_sources
-    data = arxiv_retriever.invoke(query)
-    meta_data = [i.metadata for i in data]
-    # meta_data += all_sources
-    # all_sources += meta_data
-    all_sources += meta_data
-    # formatted_info = format_info(entry_id, published, title, authors)
-    # formatted_info = format_info_list(all_sources)
-    return meta_data.__str__()
-@tool
-def google_search(query: str) -> str:
-    """Using the google search and collects metadata."""
-    # return "LangChain"
-    global all_sources
-    x = SerpAPIWrapper()
-    search_results:dict = x.results(query)
-    organic_source = search_results['organic_results']
-    # formatted_string = "Title: {title}, link: {link}, snippet: {snippet}".format(**organic_source)
-    cleaner_sources = ["Title: {title}, link: {link}, snippet: {snippet}".format(**i) for i in organic_source]
-    all_sources += cleaner_sources
-    return cleaner_sources.__str__()
-    # return organic_source
-tools = [arxiv_search,google_search]
-# tools = [
-#     create_retriever_tool(
-#     retriever,
-#     "search arxiv's database for",
-#     "Use this to recomend the user a paper to read Unless stated please choose the most recent models",
-#     # "Searches and returns excerpts from the 2022 State of the Union.",
-#     ),
-#     Tool(
-#         name="SerpAPI",
-#         description="A low-cost Google Search API. Useful for when you need to answer questions about current events. Input should be a search query.",
-#         func=SerpAPIWrapper().run,
-#     )
-# ]
 prompt = hub.pull("hwchase17/react-json")
@@ -126,9 +48,9 @@ prompt = prompt.partial(
     tool_names=", ".join([t.name for t in tools]),
 )
-chat_model = llm
 # define the agent
-chat_model_with_stop = chat_model.bind(stop=["\nObservation"])
 agent = (
     {
         "input": lambda x: x["input"],
@@ -148,16 +70,14 @@ agent_executor = AgentExecutor(
     )
 if __name__ == "__main__":
-    # global variable for collecting sources
-    all_sources =  []
     input = agent_executor.invoke(
         {
             "input": "How to generate videos from images using state of the art macchine learning models; Using the axriv retriever  " +
-            "add the urls of the papers used in the final answer using the metadata from the retriever please do not use '`' "
             # f"Please prioritize the newest papers this is the current data {get_current_date()}"
         }
     )
@@ -170,7 +90,7 @@ if __name__ == "__main__":
     #     }
     # )
-    # input_1 = agent_executor.invoke(
     #     {
     #         "input": "I am looking for a text to 3d model; Using the google search tool " +
     #         "add the urls in the final answer using the metadata from the retriever, also provid a summary of the searches"
@@ -178,5 +98,4 @@ if __name__ == "__main__":
     #     }
     # )
-    x = 0

 # LangChain supports many other chat models. Here, we're using Ollama
 from langchain_community.chat_models import ChatOllama
 from langchain_core.prompts import ChatPromptTemplate
 from langchain import hub
+from langchain.agents import AgentExecutor
 from langchain.agents.format_scratchpad import format_log_to_str
 from langchain.agents.output_parsers import (
     ReActJsonSingleInputOutputParser,
 )
 # Import things that are needed generically
 from typing import List, Dict
 from langchain.tools.render import render_text_description
 import os
+import dotenv
+from innovation_pathfinder_ai.structured_tools.structured_tools import (
+    arxiv_search, get_arxiv_paper, google_search
+)
+# hacky and should be replaced with a database
+from innovation_pathfinder_ai.source_container.container import (
+    all_sources
+)
 dotenv.load_dotenv()
 OLLMA_BASE_URL = os.getenv("OLLMA_BASE_URL")
     model="mistral:instruct",
     base_url= OLLMA_BASE_URL
     )
+tools = [
+    arxiv_search,
+    google_search,
+    get_arxiv_paper,
+    ]
 prompt = hub.pull("hwchase17/react-json")
     tool_names=", ".join([t.name for t in tools]),
 )
 # define the agent
+chat_model_with_stop = llm.bind(stop=["\nObservation"])
 agent = (
     {
         "input": lambda x: x["input"],
     )
 if __name__ == "__main__":
     input = agent_executor.invoke(
         {
             "input": "How to generate videos from images using state of the art macchine learning models; Using the axriv retriever  " +
+            "add the urls of the papers used in the final answer using the metadata from the retriever please do not use '`' " +
+            "please use the `download_arxiv_paper` tool  to download any axriv paper you find" +
+            "Please only use the tools provided to you"
             # f"Please prioritize the newest papers this is the current data {get_current_date()}"
         }
     )
     #     }
     # )
+    # input_2 = agent_executor.invoke(
     #     {
     #         "input": "I am looking for a text to 3d model; Using the google search tool " +
     #         "add the urls in the final answer using the metadata from the retriever, also provid a summary of the searches"
     #     }
     # )
+    x = 0 # for debugging purposes