|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import pandas as pd |
|
|
|
from langchain import SQLDatabase, SQLDatabaseChain, HuggingFaceHub |
|
from langchain.agents import AgentType, load_tools, initialize_agent,\ |
|
create_pandas_dataframe_agent |
|
from langchain.chat_models import ChatOpenAI |
|
from langchain.chains import RetrievalQA |
|
from langchain.document_loaders import DirectoryLoader, TextLoader |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.llms import OpenAI |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.tools import DuckDuckGoSearchRun |
|
from langchain.vectorstores import Chroma |
|
|
|
import chromadb |
|
from chromadb.config import Settings |
|
DB_DIR = "./db" |
|
|
|
OPENAI_LLMS = [ |
|
'text-davinci-003', |
|
'text-babbage-001', |
|
'text-curie-001', |
|
'text-ada-001' |
|
] |
|
|
|
OPENAI_CHAT_LLMS = [ |
|
'gpt-3.5-turbo', |
|
'gpt-4', |
|
] |
|
|
|
HUGGINGFACE_LLMS = [ |
|
'google/flan-t5-xl', |
|
'databricks/dolly-v2-3b', |
|
'bigscience/bloom-1b7' |
|
] |
|
|
|
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") |
|
|
|
def createLLM(model_name="text-davinci-003", temperature=0): |
|
llm = None |
|
if model_name in OPENAI_LLMS: |
|
llm = OpenAI(model_name=model_name, temperature=temperature) |
|
elif model_name in OPENAI_CHAT_LLMS: |
|
llm = ChatOpenAI(model_name=model_name, temperature=temperature) |
|
elif model_name in HUGGINGFACE_LLMS: |
|
llm = HuggingFaceHub(repo_id=model_name, |
|
model_kwargs={"temperature":1e-10}) |
|
return llm |
|
|
|
def load_chat_agent(verbose=True): |
|
return createLLM(OPENAI_CHAT_LLMS[0], temperature=0.5) |
|
|
|
def load_earnings_agent(verbose=True): |
|
retriever = None |
|
embeddings = OpenAIEmbeddings(openai_api_key = os.environ['OPENAI_API_KEY']) |
|
|
|
if not os.path.exists(DB_DIR): |
|
loader = DirectoryLoader(path="./data/", glob="**/*.txt") |
|
docs = loader.load() |
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20) |
|
text_chunks = text_splitter.split_documents(documents=docs) |
|
docsearch = Chroma.from_documents(text_chunks, embeddings, |
|
persist_directory="./db") |
|
retriever = docsearch.as_retriever() |
|
else: |
|
vectordb = Chroma(persist_directory=DB_DIR, |
|
embedding_function=embeddings) |
|
retriever = vectordb.as_retriever() |
|
|
|
qa = RetrievalQA.from_chain_type(llm = OpenAI(temperature=0.0), |
|
chain_type="stuff", |
|
retriever=retriever, |
|
return_source_documents=True |
|
) |
|
return qa |
|
|
|
def load_book_agent(verbose=True): |
|
retriever = None |
|
embeddings = OpenAIEmbeddings(openai_api_key = os.environ['OPENAI_API_KEY']) |
|
|
|
if not os.path.exists(DB_DIR): |
|
loader = DirectoryLoader(path="./data/", glob="**/*.txt") |
|
docs = loader.load() |
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20) |
|
text_chunks = text_splitter.split_documents(documents=docs) |
|
docsearch = Chroma.from_documents(text_chunks, embeddings, |
|
persist_directory="./db") |
|
retriever = docsearch.as_retriever() |
|
else: |
|
vectordb = Chroma(persist_directory=DB_DIR, |
|
embedding_function=embeddings) |
|
retriever = vectordb.as_retriever() |
|
|
|
qa = RetrievalQA.from_chain_type(llm = OpenAI(temperature=0.7), |
|
chain_type="stuff", |
|
retriever=retriever, |
|
return_source_documents=True |
|
) |
|
return qa |
|
|
|
def load_sales_agent(verbose=True): |
|
''' |
|
Hard-coded agent that gates an internal sales CSV file for demo |
|
''' |
|
chat = createLLM(model_name='text-davinci-003') |
|
df = pd.read_csv("data/sales_data.csv") |
|
agent = create_pandas_dataframe_agent(chat, df, verbose=verbose) |
|
return agent |
|
|
|
def load_sqlite_agent(model_name="text-davinci-003"): |
|
''' |
|
Hard-coded agent that gates a sqlite DB of digital media for demo |
|
''' |
|
llm = createLLM(OPENAI_LLMS[0]) |
|
sqlite_db_path = "./data/Chinook_Sqlite.sqlite" |
|
db = SQLDatabase.from_uri(f"sqlite:///{sqlite_db_path}") |
|
db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True) |
|
return db_chain |
|
|
|
def load_chained_agent(verbose=True, model_name="text-davinci-003"): |
|
llm = createLLM(model_name) |
|
toolkit = load_tools(["serpapi", "open-meteo-api", "news-api", |
|
"python_repl", "wolfram-alpha"], |
|
llm=llm, |
|
serpapi_api_key=os.getenv('SERPAPI_API_KEY'), |
|
news_api_key=os.getenv('NEWS_API_KEY'), |
|
tmdb_bearer_token=os.getenv('TMDB_BEARER_TOKEN') |
|
) |
|
toolkit += [DuckDuckGoSearchRun()] |
|
|
|
agent = initialize_agent(toolkit, |
|
llm, |
|
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, |
|
verbose=verbose, |
|
return_intermediate_steps=True) |
|
return agent |