File size: 9,347 Bytes
5d0b799
 
200e087
3c6c367
5f96800
5d0b799
5f96800
 
5d0b799
b4f8caf
5d0b799
ce281ba
 
 
 
 
 
 
 
 
 
 
5d0b799
200e087
 
 
 
5d0b799
e7c7689
 
6d99c81
e7c7689
 
 
 
246f606
e7c7689
 
 
 
 
 
246f606
e7c7689
 
 
ce281ba
e7c7689
a182e49
6d99c81
e7c7689
200e087
e7c7689
200e087
 
 
 
 
 
246f606
 
ce281ba
e7c7689
200e087
 
 
 
 
 
 
 
 
 
 
 
ce281ba
200e087
 
 
 
 
a182e49
5d0b799
4f9016f
 
 
 
 
 
 
 
f53cb24
5f96800
 
 
 
 
c85a52f
 
e7c7689
4f9016f
 
 
6d99c81
200e087
6d99c81
4f9016f
ce281ba
 
 
4f9016f
 
 
6d99c81
4f9016f
ce281ba
4f9016f
 
 
ce281ba
6d99c81
4f9016f
ce281ba
 
 
4f9016f
 
6d99c81
 
4f9016f
 
 
 
 
a61c9c0
 
ce281ba
 
 
4f9016f
 
ce281ba
 
4f9016f
 
 
ce281ba
4f9016f
 
 
ce281ba
 
 
 
4f9016f
 
ce281ba
 
 
4f9016f
 
a61c9c0
4f9016f
e7c7689
4f9016f
 
 
3c6c367
 
4f9016f
e7c7689
4f9016f
 
 
 
 
5d0b799
 
 
 
 
 
 
702a17d
5d0b799
 
 
 
 
 
 
7292b0f
0c26770
5d0b799
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import os
import gradio as gr
import cohere
from crewai import Agent, Task, Crew, Process

from langchain_groq import ChatGroq
from langchain_cohere import ChatCohere

from langchain_community.tools import DuckDuckGoSearchRun, DuckDuckGoSearchResults
from crewai_tools import tool, SeleniumScrapingTool, ScrapeWebsiteTool

# Ensure essential environment variables are set
cohere_api_key = os.getenv('COHERE_API_KEY')
if not cohere_api_key:
    raise EnvironmentError("COHERE_API_KEY is not set in environment variables")
groq_api_key = os.getenv("GROQ_API_KEY")
if not groq_api_key:
    raise EnvironmentError("GROQ_API_KEY is not set in environment variables")

# Initialize API clients
co = cohere.Client(cohere_api_key)

# Define the DuckDuckGoSearch tool using the decorator for tool registration
# @tool('DuckDuckGoSearch')
# def search(search_query: str):
#     """Search the web for information on a given topic."""
#     return DuckDuckGoSearchRun().run(search_query)

# Define the DuckDuckGoSearch tool
@tool('DuckDuckGoResults')
def search_results(search_query: str) -> dict:
    """
    Performs a web search using the DuckDuckGo search engine to gather and return a collection of search results.
    This tool automates the retrieval of web-based information related to a specified query.

    Args:
    - search_query (str): The query string that specifies the information to be searched on the web. This should be a clear and concise expression of the user's information needs.

    Returns:
    - list: A list of dictionaries, where each dictionary represents a search result. Each dictionary includes at least the 'title' of the page and the 'url' linking to it. Additional information such as a brief summary or snippet from the page might also be included depending on the specifics of the DuckDuckGo API response.

    Example:
    - Input: search_results(search_query = "Generative AI in Telecom and Media")
    - Output: [snippet: The telecommunications and media industry is at the forefront of integrating generative AI into their operations, viewing it as a catalyst for growth and innovation. Industry leaders are enthusiastic about its ability to not only enhance the current processes but also spearhead new innovations, create new opportunities, unlock new sources of ..., title: Generative AI in the telecom industry | Google Cloud Blog, link: https://cloud.google.com/blog/topics/telecommunications/generative-ai-in-the-telecom-industry], ...]

    """
    return DuckDuckGoSearchResults(max_results=3).run(search_query)

@tool('WebScrapper')
def web_scrapper(url: str, topic: str) -> str:
    """
    A tool designed to extract and read the content of a specified link and generate a summary on a specific topic.
    It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content.
    This tool uses Cohere's API to generate summaries, which can be particularly useful for web scraping tasks, data collection,
    or extracting specific information from websites.
    
    Args:
    - url (str): The URL from which to scrape content.
    - topic (str): The specific topic on which to generate a summary.

    Returns:
    - summary (str): summary of the url on the topic
    """
    # Scrape content from the specified URL
    scraper = ScrapeWebsiteTool(website_url=url)
    content = scraper.run()
    
    # Prepare the prompt for generating the summary
    prompt = f"Generate a summary of the following content on the topic ## {topic} ### \n\nCONTENT:\n\n" + content
    
    # Generate the summary using Cohere
    response = co.chat(
        model='command-r-plus',
        message=prompt,
        temperature=0.2,
        max_tokens=300,
        chat_history=[],
        prompt_truncation='AUTO'
    )
    
    return response.text

def kickoff_crew(topic: str) -> dict:
    try:
        """Kickoff the research process for a given topic using CrewAI components."""
        # Retrieve the API key from the environment variables
        groq_api_key = os.environ.get("GROQ_API_KEY")
        if not groq_api_key:
            raise ValueError("API Key for Groq is not set in environment variables")
    
        # Initialize the Groq large language model
        groq_llm_70b = ChatGroq(temperature=0, groq_api_key=groq_api_key, model_name="llama3-70b-8192")
        cohere_llm = ChatCohere(
            temperature=0.2,
            cohere_api_key=cohere_api_key,
            model="command-r-plus"
        )

        selected_llm = groq_llm_70b
    
        # Define Agents with Groq LLM
        researcher = Agent(
            role='Researcher',
            goal='Search and Collect detailed information on topic ## {topic} ##',
            tools=[search_results, web_scrapper],
            llm=selected_llm,  # Assigning the LLM here
            backstory=(
                "You are a meticulous researcher, skilled at navigating vast amounts of information to extract essential insights on any given topic. "
                "Your dedication to detail ensures the reliability and thoroughness of your findings. "
                "With a strategic approach, you carefully analyze and document data, aiming to provide accurate and trustworthy results."
            ),
            allow_delegation=False,
            max_iter=5,
            verbose=True
        )

        
        editor = Agent(
            role='Editor',
            goal='Compile and refine the information into a comprehensive report on topic ## {topic} ##',
            llm=selected_llm,  # Assigning the LLM here
            backstory=(
                "As an expert editor, you specialize in transforming raw data into clear, engaging reports. "
                "Your strong command of language and attention to detail ensure that each report not only conveys essential insights "
                "but is also easily understandable and appealing to diverse audiences. "
            ),
            allow_delegation=False,
            max_iter=2,
            verbose=True
        )
        
        # Define Tasks
        research_task = Task(
            description=(
                "Use the DuckDuckGoResults tool to collect initial search snippets on ## {topic} ##. "
                "If more detailed searches are required, generate and execute new queries related to ## {topic} ##. "
                "Subsequently, employ the WebScrapper tool to delve deeper into significant URLs identified from the snippets, extracting further information and insights. "
                "Compile these findings into a preliminary draft, documenting all relevant sources, titles, and links associated with the topic. "
                "Ensure high accuracy throughout the process and avoid any fabrication or misrepresentation of information."
            ),
            expected_output=(
                "A structured draft report about the topic, featuring an introduction, a detailed main body organized by different aspects of the topic, and a conclusion. "
                "Each section should properly cite sources, providing a thorough overview of the information gathered."
            ),
            agent=researcher
        )

        
        edit_task = Task(
            description=(
                "Review and refine the initial draft report from the research task. Organize the content logically to enhance information flow. "
                "Verify the accuracy of all data, correct discrepancies, and update information to ensure it reflects current knowledge and is well-supported by sources. "
                "Improve the report’s readability by enhancing language clarity, adjusting sentence structures, and maintaining a consistent tone. "
                "Include a section listing all sources used, formatted as bullet points like so: '- title: link'."
            ),
            expected_output=(
                "A polished, comprehensive report on topic ## {topic} ##, with a clear, professional narrative that accurately reflects the research findings. "
                "The report should include an introduction, an extensive discussion section, a concise conclusion, and a well-organized source list. "
                "Ensure the document is grammatically correct and ready for publication or presentation."
            ),
            agent=editor,
            context=[research_task]
        )
    
        # Forming the Crew
        crew = Crew(
            agents=[researcher, editor],
            tasks=[research_task, edit_task],
            process=Process.sequential,
        )
    
        # Kick-off the research process
        result = crew.kickoff(inputs={'topic': topic})
        return result
    except Exception as e:
        return f"Error: {str(e)}"

def main():
    """Set up the Gradio interface for the CrewAI Research Tool."""
    with gr.Blocks() as demo:
        gr.Markdown("## CrewAI Research Tool")
        topic_input = gr.Textbox(label="Enter Topic", placeholder="Type here...")
        submit_button = gr.Button("Start Research")
        output = gr.Markdown(label="Result")

        submit_button.click(
            fn=kickoff_crew,
            inputs=topic_input,
            outputs=output
        )

    # demo.launch(debug=True)
    demo.queue(api_open=False, max_size=3).launch()

if __name__ == "__main__":
    main()