DocuSmart / app.py
SurabhiT's picture
Upload app.py
e2f3005 verified
raw
history blame
No virus
10.2 kB
# from groq import Groq
# client = Groq()
# completion = client.chat.completions.create(
# model="llama3-70b-8192",
# messages=[
# {
# "role": "user",
# "content": "Write a fairy tale"
# }
# ],
# temperature=1,
# max_tokens=1024,
# top_p=1,
# stream=True,
# stop=None,
# )
# for chunk in completion:
# print(chunk.choices[0].delta.content or "", end="")
import os
import tkinter as tk
from tkinter import filedialog
from crewai import Agent, Task, Crew
from langchain_openai import ChatOpenAI
from langchain_community.llms import Ollama
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_csv_agent
from langchain_openai import ChatOpenAI, OpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_groq import ChatGroq
from crewai.process import Process
import gradio as gr
import numpy as np
os.environ["OPENAI_API_KEY"] = "NA"
os.environ["GOOGLE_API_KEY"] = "AIzaSyD1Gxk5OZMnqAlzWoYjAoDBeu3Z5l2TS7U"
os.environ["GROQ_API_KEY"] = "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"
from crewai_tools import PDFSearchTool
from crewai_tools import FileReadTool
from crewai_tools import DOCXSearchTool
from crewai_tools import TXTSearchTool
from crewai_tools import CSVSearchTool
llm = ChatGroq(
model="llama3-70b-8192",
verbose=True,
temperature=0.8,
api_key=os.environ["GROQ_API_KEY"]
)
#--------------------------------------------Class for choosing agent---------------------------------------#
class agentCollection:
def agentPDF(filepath):
agentpdf = Agent(
role="PDF Content Searcher and Writer",
goal="Retrieve and summarize relevant content from a PDF provided by the user",
backstory="You are an expert in navigating and extracting information from PDF documents. Your task is to find the most relevant and accurate content within the PDF and provide a detailed and concise summary that addresses the user's query.",
verbose=True,
tools=[toolsCollection.toolPDF(filepath)],
llm=llm,
max_iter=10
)
return agentpdf
def agentFile(filepath):
agentfile = Agent(
role="General File Content Searcher and Writer",
goal="Retrieve and summarize relevant content from various file formats provided by the user",
backstory="You have extensive experience in handling different types of files, including PDFs, DOCX, TXT, and CSV. Your role is to expertly extract and summarize the most pertinent information from any file format to meet the user's needs.",
verbose=True,
tools=[toolsCollection.toolFile(filepath)],
llm=llm,
max_iter=10
)
return agentfile
def agentTXT(filepath):
agenttxt = Agent(
role="Text File Content Searcher and Writer",
goal="Retrieve and summarize relevant content from text files provided by the user",
backstory="You specialize in working with plain text files. Your job is to sift through the text and identify the most relevant information, providing a clear and accurate summary that fulfills the user's query.",
verbose=True,
tools=[toolsCollection.toolTXT(filepath)],
llm=llm,
max_iter=10
)
return agenttxt
def agentDOCX(filepath):
agentdoc = Agent(
role="DOCX Content Searcher and Writer",
goal="Retrieve and summarize relevant content from DOCX files provided by the user",
backstory="You are proficient in reading and extracting information from DOCX documents. Your expertise allows you to locate and summarize the most relevant content within a DOCX file, ensuring the user's query is answered thoroughly and accurately.",
verbose=True,
tools=[toolsCollection.toolDOCX(filepath)],
llm=llm,
max_iter=10
)
return agentdoc
def agentCSV(filepath):
agentcsv = create_csv_agent(
ChatGroq(temperature=0, model="llama3-70b-8192"),
filepath,
verbose=True,
agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION
)
return agentcsv
def agentContentWriter():
agentwriter = Agent(
role="Content Writer",
goal="Write a comprehensive report or blog based on the data received from other agents",
backstory="""You are a skilled content writer with expertise in synthesizing information from various sources. Your task is to use the summaries and insights provided by other agents to create a well-structured and coherent report or blog that addresses the user's query in detail.""",
verbose=True,
llm=llm,
max_iter=7
)
return agentwriter
#--------------------------------------------Class for choosing tool---------------------------------------#
class toolsCollection:
def toolPDF(filepath):
if filepath == "":
print("FILE NOT FOUND")
return
pdftool = PDFSearchTool(
config=dict(
llm=dict(
provider="ollama",
config=dict(
model="llama3-70b-8192",
),
),
embedder=dict(
provider="huggingface",
config=dict(
model="sentence-transformers/msmarco-distilbert-base-v4"
),
),
),
pdf=filepath
)
return pdftool
def toolFile(filepath):
filetool = FileReadTool(
config=dict(
llm=dict(
provider="ollama",
config=dict(
model="llama3-70b-8192",
),
),
embedder=dict(
provider="huggingface",
config=dict(
model="sentence-transformers/msmarco-distilbert-base-v4"
),
),
),
file_path=filepath
)
return filetool
def toolTXT(filepath):
txttool = TXTSearchTool(
config=dict(
llm=dict(
provider="ollama",
config=dict(
model="llama3-70b-8192",
),
),
embedder=dict(
provider="huggingface",
config=dict(
model="sentence-transformers/msmarco-distilbert-base-v4"
),
),
),
txt=filepath
)
return txttool
def toolDOCX(filepath):
if filepath == "":
print("FILE NOT FOUND")
return
docxtool = DOCXSearchTool(
config=dict(
llm=dict(
provider="ollama",
config=dict(
model="llama3-70b-8192",
),
),
embedder=dict(
provider="huggingface",
config=dict(
model="sentence-transformers/msmarco-distilbert-base-v4"
),
),
),
docx=filepath
)
return docxtool
def toolCSV(filepath):
csvtool = CSVSearchTool(
config=dict(
llm=dict(
provider="ollama",
config=dict(
model="llama3-70b-8192",
),
),
embedder=dict(
provider="huggingface",
config=dict(
model="sentence-transformers/msmarco-distilbert-base-v4"
),
),
),
csv=filepath
)
return csvtool
def run_ai(file, query, required_ans_format):
filepath = file.name
if filepath.endswith(".pdf"):
myagent = agentCollection.agentPDF(filepath)
elif filepath.endswith(".json"):
myagent = agentCollection.agentFile(filepath)
elif filepath.endswith(".docx"):
myagent = agentCollection.agentDOCX(filepath)
elif filepath.endswith(".txt"):
myagent = agentCollection.agentTXT(filepath)
elif filepath.endswith(".csv"):
myagent = agentCollection.agentCSV(filepath)
return myagent.run(query)
task = Task(
description=f"Summarize the detailed description of the {query}",
expected_output=f'{required_ans_format} : {query}',
agent=myagent,
)
content_writer_agent = agentCollection.agentContentWriter()
content_writer_task = Task(
description=f"Write a comprehensive report on the topic '{query}' using the summaries and insights from the other agents",
expected_output=f'{required_ans_format} : {query}',
agent=content_writer_agent,
)
crew = Crew(
agents=[myagent, content_writer_agent],
tasks=[task, content_writer_task],
process=Process.sequential,
verbose=2
)
# crew = Crew(
# agents=[myagent],
# tasks=[task],
# process=Process.sequential,
# verbose=2
# )
result = crew.kickoff()
return result
iface = gr.Interface(
fn=run_ai,
inputs=[gr.File(label="Upload File"), "text", "text"],
outputs="text",
title="Document and Data Analyzer",
description="Upload a file, enter your query, and specify the format of the expected answer"
)
iface.launch()