Asaad Almutareb commited on
Commit
c30ce87
1 Parent(s): 2e6490e

cleaned code, updated requirmenets

Browse files
hf_mixtral_agent.py CHANGED
@@ -1,15 +1,9 @@
1
  # HF libraries
2
  from langchain_community.llms import HuggingFaceEndpoint
3
- from langchain_core.prompts import ChatPromptTemplate
4
- from langchain import hub
5
- import gradio as gr
6
  from langchain.agents import AgentExecutor
7
  from langchain.agents.format_scratchpad import format_log_to_str
8
- from langchain.agents.output_parsers import (
9
- ReActJsonSingleInputOutputParser,
10
- )
11
  # Import things that are needed generically
12
- from typing import List, Dict
13
  from langchain.tools.render import render_text_description
14
  import os
15
  from dotenv import load_dotenv
@@ -17,12 +11,11 @@ from innovation_pathfinder_ai.structured_tools.structured_tools import (
17
  arxiv_search, get_arxiv_paper, google_search, wikipedia_search
18
  )
19
 
20
- # hacky and should be replaced with a database
21
- from innovation_pathfinder_ai.source_container.container import (
22
- all_sources
23
- )
24
  from langchain import PromptTemplate
25
  from innovation_pathfinder_ai.templates.react_json_with_memory import template_system
 
 
 
26
 
27
  config = load_dotenv(".env")
28
  HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
@@ -49,13 +42,6 @@ tools = [
49
  # get_arxiv_paper,
50
  ]
51
 
52
- tools_papers = [
53
- arxiv_search,
54
- get_arxiv_paper,
55
-
56
- ]
57
-
58
-
59
  prompt = PromptTemplate.from_template(
60
  template=template_system
61
  )
@@ -87,15 +73,4 @@ agent_executor = AgentExecutor(
87
  #max_execution_time=60, # timout at 60 sec
88
  return_intermediate_steps=True,
89
  handle_parsing_errors=True,
90
- )
91
-
92
- # instantiate AgentExecutor
93
- agent_executor_noweb = AgentExecutor(
94
- agent=agent,
95
- tools=tools_papers,
96
- verbose=True,
97
- max_iterations=6, # cap number of iterations
98
- #max_execution_time=60, # timout at 60 sec
99
- return_intermediate_steps=True,
100
- handle_parsing_errors=True,
101
  )
 
1
  # HF libraries
2
  from langchain_community.llms import HuggingFaceEndpoint
 
 
 
3
  from langchain.agents import AgentExecutor
4
  from langchain.agents.format_scratchpad import format_log_to_str
5
+ from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
 
 
6
  # Import things that are needed generically
 
7
  from langchain.tools.render import render_text_description
8
  import os
9
  from dotenv import load_dotenv
 
11
  arxiv_search, get_arxiv_paper, google_search, wikipedia_search
12
  )
13
 
 
 
 
 
14
  from langchain import PromptTemplate
15
  from innovation_pathfinder_ai.templates.react_json_with_memory import template_system
16
+ from innovation_pathfinder_ai.utils import logger
17
+
18
+ logger = logger.get_console_logger("hf_mixtral_agent")
19
 
20
  config = load_dotenv(".env")
21
  HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
 
42
  # get_arxiv_paper,
43
  ]
44
 
 
 
 
 
 
 
 
45
  prompt = PromptTemplate.from_template(
46
  template=template_system
47
  )
 
73
  #max_execution_time=60, # timout at 60 sec
74
  return_intermediate_steps=True,
75
  handle_parsing_errors=True,
 
 
 
 
 
 
 
 
 
 
 
76
  )
innovation_pathfinder_ai/utils.py DELETED
@@ -1,42 +0,0 @@
1
- def create_wikipedia_urls_from_text(text):
2
- """
3
- Extracts page titles from a given text and constructs Wikipedia URLs for each title.
4
-
5
- Args:
6
- - text (str): A string containing multiple sections, each starting with "Page:" followed by the title.
7
-
8
- Returns:
9
- - list: A list of Wikipedia URLs constructed from the extracted titles.
10
- """
11
- # Split the text into sections based on "Page:" prefix
12
- sections = text.split("Page: ")
13
- # Remove the first item if it's empty (in case the text starts with "Page:")
14
- if sections[0].strip() == "":
15
- sections = sections[1:]
16
-
17
- urls = [] # Initialize an empty list to store the URLs
18
- for section in sections:
19
- # Extract the title, which is the string up to the first newline
20
- title = section.split("\n", 1)[0]
21
- # Replace spaces with underscores for the URL
22
- url_title = title.replace(" ", "_")
23
- # Construct the URL and add it to the list
24
- url = f"https://en.wikipedia.org/wiki/{url_title}"
25
- urls.append(url)
26
-
27
- return urls
28
-
29
- def collect_urls(data_list):
30
- urls = []
31
- for item in data_list:
32
- # Check if item is a string and contains 'link:'
33
- if isinstance(item, str) and 'link:' in item:
34
- start = item.find('link:') + len('link: ')
35
- end = item.find(',', start)
36
- url = item[start:end if end != -1 else None].strip()
37
- urls.append(url)
38
- # Check if item is a dictionary and has 'Entry ID'
39
- elif isinstance(item, dict) and 'Entry ID' in item:
40
- urls.append(item['Entry ID'])
41
- last_sources = urls[-3:]
42
- return last_sources
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -8,4 +8,6 @@ wikipedia
8
  gradio==3.48.0
9
  chromadb
10
  google_api_python_client
11
- pypdf2
 
 
 
8
  gradio==3.48.0
9
  chromadb
10
  google_api_python_client
11
+ pypdf2
12
+ sqlmodel
13
+ rich