Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,17 @@ from langchain_cohere import ChatCohere
|
|
9 |
from langchain_community.tools import DuckDuckGoSearchRun, DuckDuckGoSearchResults
|
10 |
from crewai_tools import tool, SeleniumScrapingTool, ScrapeWebsiteTool
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
# Define the DuckDuckGoSearch tool using the decorator for tool registration
|
13 |
# @tool('DuckDuckGoSearch')
|
14 |
# def search(search_query: str):
|
@@ -17,7 +28,7 @@ from crewai_tools import tool, SeleniumScrapingTool, ScrapeWebsiteTool
|
|
17 |
|
18 |
# Define the DuckDuckGoSearch tool
|
19 |
@tool('DuckDuckGoResults')
|
20 |
-
def search_results(search_query: str):
|
21 |
"""
|
22 |
Performs a web search using the DuckDuckGo search engine to gather and return a collection of search results.
|
23 |
This tool automates the retrieval of web-based information related to a specified query.
|
@@ -33,14 +44,10 @@ def search_results(search_query: str):
|
|
33 |
- Output: [snippet: The telecommunications and media industry is at the forefront of integrating generative AI into their operations, viewing it as a catalyst for growth and innovation. Industry leaders are enthusiastic about its ability to not only enhance the current processes but also spearhead new innovations, create new opportunities, unlock new sources of ..., title: Generative AI in the telecom industry | Google Cloud Blog, link: https://cloud.google.com/blog/topics/telecommunications/generative-ai-in-the-telecom-industry], ...]
|
34 |
|
35 |
"""
|
36 |
-
return DuckDuckGoSearchResults().run(search_query)
|
37 |
-
|
38 |
-
# Retrieve the Cohere API key from environment variables
|
39 |
-
cohere_api_key = os.getenv('COHERE_API_KEY')
|
40 |
-
co = cohere.Client(cohere_api_key)
|
41 |
|
42 |
@tool('WebScrapper')
|
43 |
-
def web_scrapper(url: str, topic: str):
|
44 |
"""
|
45 |
A tool designed to extract and read the content of a specified link and generate a summary on a specific topic.
|
46 |
It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content.
|
@@ -52,7 +59,7 @@ def web_scrapper(url: str, topic: str):
|
|
52 |
- topic (str): The specific topic on which to generate a summary.
|
53 |
|
54 |
Returns:
|
55 |
-
-
|
56 |
"""
|
57 |
# Scrape content from the specified URL
|
58 |
scraper = ScrapeWebsiteTool(website_url=url)
|
@@ -66,6 +73,7 @@ def web_scrapper(url: str, topic: str):
|
|
66 |
model='command-r-plus',
|
67 |
message=prompt,
|
68 |
temperature=0.2,
|
|
|
69 |
chat_history=[],
|
70 |
prompt_truncation='AUTO'
|
71 |
)
|
@@ -93,35 +101,28 @@ def kickoff_crew(topic: str) -> dict:
|
|
93 |
# Define Agents with Groq LLM
|
94 |
researcher = Agent(
|
95 |
role='Researcher',
|
96 |
-
goal='Collect detailed information on {topic}',
|
97 |
tools=[search_results, web_scrapper],
|
98 |
-
|
99 |
backstory=(
|
100 |
-
"
|
101 |
-
"
|
102 |
-
"With a
|
103 |
-
"and piece of data gathered. Your research is thorough, ensuring that no stone is left unturned. "
|
104 |
-
"This dedication not only enhances the quality of the information but also ensures "
|
105 |
-
"reliability and trustworthiness in your findings."
|
106 |
-
"To achieve your objectives, think carefully about the {topic}, develop strategies to expand "
|
107 |
-
"and follows a step-by-step strategy to build conclusions."
|
108 |
),
|
109 |
allow_delegation=False,
|
110 |
max_iter=5,
|
111 |
verbose=True, # Optional
|
112 |
)
|
|
|
113 |
|
114 |
editor = Agent(
|
115 |
role='Editor',
|
116 |
-
goal='Compile and refine the information into a comprehensive report on {topic}',
|
117 |
-
|
118 |
backstory=(
|
119 |
-
"
|
120 |
-
"
|
121 |
-
"
|
122 |
-
"clearly communicates the key findings in a manner that is accessible to all readers. "
|
123 |
-
"As an editor, your role is crucial in shaping the final presentation of data, making "
|
124 |
-
"complex information easy to understand and appealing to the audience."
|
125 |
),
|
126 |
allow_delegation=False,
|
127 |
max_iter=3,
|
@@ -131,39 +132,34 @@ def kickoff_crew(topic: str) -> dict:
|
|
131 |
# Define Tasks
|
132 |
research_task = Task(
|
133 |
description=(
|
134 |
-
"
|
135 |
-
"If
|
136 |
-
"
|
137 |
-
"Compile
|
138 |
-
"
|
139 |
),
|
140 |
expected_output=(
|
141 |
-
"A draft report
|
142 |
-
"
|
143 |
-
"Each section should cite sources accurately and provide a comprehensive overview of the findings."
|
144 |
),
|
145 |
agent=researcher
|
146 |
)
|
|
|
147 |
|
148 |
edit_task = Task(
|
149 |
description=(
|
150 |
-
"Review and refine the draft report
|
151 |
-
"
|
152 |
-
"
|
153 |
-
"
|
154 |
-
"Include a dedicated section that lists all sources used in the research_task. "
|
155 |
-
"Each source used in the analysis should be presented as a bullet point in the follwint format: "
|
156 |
-
"- title: link "
|
157 |
-
"Ensure that all sources you include in the final report exist by scrapping them if necessary. "
|
158 |
-
"This section should be comprehensive, clearly formatted, and easy to navigate, providing full transparency on the references used."
|
159 |
),
|
160 |
expected_output=(
|
161 |
-
"A
|
162 |
-
"
|
163 |
-
"
|
164 |
),
|
165 |
agent=editor,
|
166 |
-
context=[research_task]
|
167 |
)
|
168 |
|
169 |
# Forming the Crew
|
|
|
9 |
from langchain_community.tools import DuckDuckGoSearchRun, DuckDuckGoSearchResults
|
10 |
from crewai_tools import tool, SeleniumScrapingTool, ScrapeWebsiteTool
|
11 |
|
12 |
+
# Ensure essential environment variables are set
|
13 |
+
cohere_api_key = os.getenv('COHERE_API_KEY')
|
14 |
+
if not cohere_api_key:
|
15 |
+
raise EnvironmentError("COHERE_API_KEY is not set in environment variables")
|
16 |
+
groq_api_key = os.getenv("GROQ_API_KEY")
|
17 |
+
if not groq_api_key:
|
18 |
+
raise EnvironmentError("GROQ_API_KEY is not set in environment variables")
|
19 |
+
|
20 |
+
# Initialize API clients
|
21 |
+
co = cohere.Client(cohere_api_key)
|
22 |
+
|
23 |
# Define the DuckDuckGoSearch tool using the decorator for tool registration
|
24 |
# @tool('DuckDuckGoSearch')
|
25 |
# def search(search_query: str):
|
|
|
28 |
|
29 |
# Define the DuckDuckGoSearch tool
|
30 |
@tool('DuckDuckGoResults')
|
31 |
+
def search_results(search_query: str): -> dict
|
32 |
"""
|
33 |
Performs a web search using the DuckDuckGo search engine to gather and return a collection of search results.
|
34 |
This tool automates the retrieval of web-based information related to a specified query.
|
|
|
44 |
- Output: [snippet: The telecommunications and media industry is at the forefront of integrating generative AI into their operations, viewing it as a catalyst for growth and innovation. Industry leaders are enthusiastic about its ability to not only enhance the current processes but also spearhead new innovations, create new opportunities, unlock new sources of ..., title: Generative AI in the telecom industry | Google Cloud Blog, link: https://cloud.google.com/blog/topics/telecommunications/generative-ai-in-the-telecom-industry], ...]
|
45 |
|
46 |
"""
|
47 |
+
return DuckDuckGoSearchResults(max_results=3).run(search_query)
|
|
|
|
|
|
|
|
|
48 |
|
49 |
@tool('WebScrapper')
|
50 |
+
def web_scrapper(url: str, topic: str): -> str
|
51 |
"""
|
52 |
A tool designed to extract and read the content of a specified link and generate a summary on a specific topic.
|
53 |
It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content.
|
|
|
59 |
- topic (str): The specific topic on which to generate a summary.
|
60 |
|
61 |
Returns:
|
62 |
+
- summary (str): summary of the url on the topic
|
63 |
"""
|
64 |
# Scrape content from the specified URL
|
65 |
scraper = ScrapeWebsiteTool(website_url=url)
|
|
|
73 |
model='command-r-plus',
|
74 |
message=prompt,
|
75 |
temperature=0.2,
|
76 |
+
max_tokens=300,
|
77 |
chat_history=[],
|
78 |
prompt_truncation='AUTO'
|
79 |
)
|
|
|
101 |
# Define Agents with Groq LLM
|
102 |
researcher = Agent(
|
103 |
role='Researcher',
|
104 |
+
goal='Collect detailed information on topic ## {topic} ##',
|
105 |
tools=[search_results, web_scrapper],
|
106 |
+
llm=selected_llm, # Assigning the Groq LLM here
|
107 |
backstory=(
|
108 |
+
"You are a meticulous researcher, skilled at navigating vast amounts of information to extract essential insights on any given topic. "
|
109 |
+
"Your dedication to detail ensures the reliability and thoroughness of your findings. "
|
110 |
+
"With a strategic approach, you carefully analyze and document data, aiming to provide accurate and trustworthy results."
|
|
|
|
|
|
|
|
|
|
|
111 |
),
|
112 |
allow_delegation=False,
|
113 |
max_iter=5,
|
114 |
verbose=True, # Optional
|
115 |
)
|
116 |
+
|
117 |
|
118 |
editor = Agent(
|
119 |
role='Editor',
|
120 |
+
goal='Compile and refine the information into a comprehensive report on topic ## {topic} ##',
|
121 |
+
llm=selected_llm, # Assigning the Groq LLM here
|
122 |
backstory=(
|
123 |
+
"As an expert editor, you specialize in transforming raw data into clear, engaging reports. "
|
124 |
+
"Your strong command of language and attention to detail ensure that each report not only conveys essential insights "
|
125 |
+
"but is also easily understandable and appealing to diverse audiences. "
|
|
|
|
|
|
|
126 |
),
|
127 |
allow_delegation=False,
|
128 |
max_iter=3,
|
|
|
132 |
# Define Tasks
|
133 |
research_task = Task(
|
134 |
description=(
|
135 |
+
"Use the DuckDuckGoResults tool to collect initial search snippets on ##{topic}##. "
|
136 |
+
"If more detailed searches are required, generate and execute new queries related to ##{topic}##. "
|
137 |
+
"Subsequently, employ the WebScrapper tool to delve deeper into significant URLs identified from the snippets, extracting further information and insights. "
|
138 |
+
"Compile these findings into a preliminary draft, documenting all relevant sources, titles, and links associated with the topic. "
|
139 |
+
"Ensure high accuracy throughout the process and avoid any fabrication or misrepresentation of information."
|
140 |
),
|
141 |
expected_output=(
|
142 |
+
"A structured draft report about the topic, featuring an introduction, a detailed main body organized by different aspects of the topic, and a conclusion. "
|
143 |
+
"Each section should properly cite sources, providing a thorough overview of the information gathered."
|
|
|
144 |
),
|
145 |
agent=researcher
|
146 |
)
|
147 |
+
|
148 |
|
149 |
edit_task = Task(
|
150 |
description=(
|
151 |
+
"Review and refine the initial draft report from the research task. Organize the content logically to enhance information flow. "
|
152 |
+
"Verify the accuracy of all data, correct discrepancies, and update information to ensure it reflects current knowledge and is well-supported by sources. "
|
153 |
+
"Improve the report’s readability by enhancing language clarity, adjusting sentence structures, and maintaining a consistent tone. "
|
154 |
+
"Include a section listing all sources used, formatted as bullet points like so: '- title: link'."
|
|
|
|
|
|
|
|
|
|
|
155 |
),
|
156 |
expected_output=(
|
157 |
+
"A polished, comprehensive report on topic ## {topic} ##, with a clear, professional narrative that accurately reflects the research findings. "
|
158 |
+
"The report should include an introduction, an extensive discussion section, a concise conclusion, and a well-organized source list. "
|
159 |
+
"Ensure the document is grammatically correct and ready for publication or presentation."
|
160 |
),
|
161 |
agent=editor,
|
162 |
+
# context=[research_task]
|
163 |
)
|
164 |
|
165 |
# Forming the Crew
|