Spaces:
Running
Running
import os | |
import gradio as gr | |
import cohere | |
from crewai import Agent, Task, Crew, Process | |
from langchain_groq import ChatGroq | |
from langchain_cohere import ChatCohere | |
from langchain_community.tools import DuckDuckGoSearchRun, DuckDuckGoSearchResults | |
from crewai_tools import tool, SeleniumScrapingTool, ScrapeWebsiteTool | |
# Ensure essential environment variables are set | |
cohere_api_key = os.getenv('COHERE_API_KEY') | |
if not cohere_api_key: | |
raise EnvironmentError("COHERE_API_KEY is not set in environment variables") | |
groq_api_key = os.getenv("GROQ_API_KEY") | |
if not groq_api_key: | |
raise EnvironmentError("GROQ_API_KEY is not set in environment variables") | |
# Initialize API clients | |
co = cohere.Client(cohere_api_key) | |
# Define the DuckDuckGoSearch tool using the decorator for tool registration | |
# @tool('DuckDuckGoSearch') | |
# def search(search_query: str): | |
# """Search the web for information on a given topic.""" | |
# return DuckDuckGoSearchRun().run(search_query) | |
# Define the DuckDuckGoSearch tool | |
def search_results(search_query: str) -> dict: | |
""" | |
Performs a web search using the DuckDuckGo search engine to gather and return a collection of search results. | |
This tool automates the retrieval of web-based information related to a specified query. | |
Args: | |
- search_query (str): The query string that specifies the information to be searched on the web. This should be a clear and concise expression of the user's information needs. | |
Returns: | |
- list: A list of dictionaries, where each dictionary represents a search result. Each dictionary includes at least the 'title' of the page and the 'url' linking to it. Additional information such as a brief summary or snippet from the page might also be included depending on the specifics of the DuckDuckGo API response. | |
""" | |
results = DuckDuckGoSearchResults(max_results=10).run(search_query) | |
return results | |
def web_scrapper(url: str, topic: str) -> str: | |
""" | |
A tool designed to extract and read the content of a specified link and generate a summary on a specific topic. | |
It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content. | |
This tool uses Cohere's API to generate summaries, which can be particularly useful for web scraping tasks, data collection, | |
or extracting specific information from websites. | |
Args: | |
- url (str): The URL from which to scrape content. | |
- topic (str): The specific topic on which to generate a summary. | |
Returns: | |
- summary (str): summary of the url on the topic | |
""" | |
# Scrape content from the specified URL | |
scraper = ScrapeWebsiteTool(website_url=url) | |
content = scraper.run() | |
# Prepare the prompt for generating the summary | |
prompt = f"Generate a summary of the following content on the topic ## {topic} ### \n\nCONTENT:\n\n" + content | |
# Generate the summary using Cohere | |
response = co.chat( | |
model='command-r-plus', | |
message=prompt, | |
temperature=0.2, | |
max_tokens=500, | |
chat_history=[], | |
prompt_truncation='AUTO' | |
) | |
summary_response = f"""### | |
Summary: {response.text} | |
URL: {url} | |
### | |
""" | |
return summary_response | |
def kickoff_crew(topic: str) -> dict: | |
try: | |
"""Kickoff the research process for a given topic using CrewAI components.""" | |
# Retrieve the API key from the environment variables | |
groq_api_key = os.environ.get("GROQ_API_KEY") | |
if not groq_api_key: | |
raise ValueError("API Key for Groq is not set in environment variables") | |
# Initialize the large language models | |
groq_llm_70b = ChatGroq(temperature=0, groq_api_key=groq_api_key, model_name="llama3-70b-8192") | |
cohere_llm = ChatCohere( | |
temperature=0, | |
cohere_api_key=cohere_api_key, | |
model_name="command-r-plus" | |
) | |
#selected_llm = cohere_llm | |
selected_llm = groq_llm_70b | |
# Define Agents with Groq LLM | |
researcher = Agent( | |
role='Researcher', | |
goal='Search and Collect detailed information on topic ## {topic} ##', | |
tools=[search_results, web_scrapper], | |
llm=selected_llm, # Assigning the LLM here | |
backstory=( | |
"You are a meticulous researcher, skilled at navigating vast amounts of information to extract essential insights on any given topic. " | |
"Your dedication to detail ensures the reliability and thoroughness of your findings. " | |
"With a strategic approach, you carefully analyze and document data, aiming to provide accurate and trustworthy results." | |
), | |
allow_delegation=False, | |
max_iter=15, | |
max_rpm=20, | |
memory=True, | |
verbose=True | |
) | |
editor = Agent( | |
role='Editor', | |
goal='Compile and refine the information into a comprehensive report on topic ## {topic} ##', | |
llm=selected_llm, # Assigning the LLM here | |
backstory=( | |
"As an expert editor, you specialize in transforming raw data into clear, engaging reports. " | |
"Your strong command of language and attention to detail ensure that each report not only conveys essential insights " | |
"but is also easily understandable and appealing to diverse audiences. " | |
), | |
allow_delegation=False, | |
max_iter=5, | |
max_rpm=15, | |
memory=True, | |
verbose=True | |
) | |
# Define Tasks | |
research_task = Task( | |
description=( | |
"Use the DuckDuckGoSearchResults tool to collect initial search snippets on ## {topic} ##. " | |
"If more detailed searches are required, generate and execute new queries related to ## {topic} ##. " | |
"Subsequently, employ the WebScrapper tool to delve deeper into significant URLs identified from the snippets, extracting further information and insights. " | |
"Compile these findings into a preliminary draft, documenting all relevant sources, titles, and links associated with the topic. " | |
"Ensure high accuracy throughout the process and avoid any fabrication or misrepresentation of information." | |
), | |
expected_output=( | |
"A structured draft report about the topic, featuring an introduction, a detailed main body organized by different aspects of the topic, and a conclusion. " | |
"Each section should properly cite sources, providing a thorough overview of the information gathered." | |
), | |
agent=researcher | |
) | |
edit_task = Task( | |
description=( | |
"Review and refine the initial draft report from the research task. Organize the content logically to enhance information flow. " | |
"Verify the accuracy of all data, correct discrepancies, and update information to ensure it reflects current knowledge and is well-supported by sources. " | |
"Improve the report’s readability by enhancing language clarity, adjusting sentence structures, and maintaining a consistent tone. " | |
"Include a section listing all sources used, formatted as bullet points following this template: " | |
"- title: url'." | |
), | |
expected_output=( | |
"A polished, comprehensive report on topic ## {topic} ##, with a clear, professional narrative that accurately reflects the research findings. " | |
"The report should include an introduction, an extensive discussion section, a concise conclusion, and a well-organized source list. " | |
"Ensure the document is grammatically correct and ready for publication or presentation." | |
), | |
agent=editor, | |
context=[research_task] | |
) | |
# Forming the Crew | |
crew = Crew( | |
agents=[researcher, editor], | |
tasks=[research_task, edit_task], | |
process=Process.sequential, | |
) | |
# Kick-off the research process | |
result = crew.kickoff(inputs={'topic': topic}) | |
return result | |
except Exception as e: | |
return f"Error: {str(e)}" | |
def main(): | |
"""Set up the Gradio interface for the CrewAI Research Tool.""" | |
with gr.Blocks() as demo: | |
gr.Markdown("## CrewAI Research Tool") | |
topic_input = gr.Textbox(label="Enter Topic", placeholder="Type here...") | |
submit_button = gr.Button("Start Research") | |
output = gr.Markdown(label="Result") | |
submit_button.click( | |
fn=kickoff_crew, | |
inputs=topic_input, | |
outputs=output | |
) | |
# demo.launch(debug=True) | |
demo.queue(api_open=False, max_size=3).launch() | |
if __name__ == "__main__": | |
main() | |