File size: 5,488 Bytes
ca7e9c6 e42e9ae ca7e9c6 f57b8d4 e42e9ae f57b8d4 e42e9ae ca7e9c6 e42e9ae f57b8d4 e42e9ae f57b8d4 ca7e9c6 e0753bf ca7e9c6 1584363 4536af0 ca7e9c6 1584363 ca7e9c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
##############################################################################
# Utility methods for building LLMs and agent models
#
# @philmui
# Mon May 1 18:34:45 PDT 2023
##############################################################################
import os
import pandas as pd
from langchain import SQLDatabase, SQLDatabaseChain, HuggingFaceHub
from langchain.agents import AgentType, load_tools, initialize_agent,\
create_pandas_dataframe_agent
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.tools import DuckDuckGoSearchRun
from langchain.vectorstores import Chroma
import chromadb
from chromadb.config import Settings
DB_DIR = "./db"
OPENAI_LLMS = [
'text-davinci-003',
'text-babbage-001',
'text-curie-001',
'text-ada-001'
]
OPENAI_CHAT_LLMS = [
'gpt-3.5-turbo',
'gpt-4',
]
HUGGINGFACE_LLMS = [
'google/flan-t5-xl',
'databricks/dolly-v2-3b',
'bigscience/bloom-1b7'
]
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
def createLLM(model_name="text-davinci-003", temperature=0):
llm = None
if model_name in OPENAI_LLMS:
llm = OpenAI(model_name=model_name, temperature=temperature)
elif model_name in OPENAI_CHAT_LLMS:
llm = ChatOpenAI(model_name=model_name, temperature=temperature)
elif model_name in HUGGINGFACE_LLMS:
llm = HuggingFaceHub(repo_id=model_name,
model_kwargs={"temperature":1e-10})
return llm
def load_chat_agent(verbose=True):
return createLLM(OPENAI_CHAT_LLMS[0], temperature=0.5)
def load_earnings_agent(verbose=True):
retriever = None
embeddings = OpenAIEmbeddings(openai_api_key = os.environ['OPENAI_API_KEY'])
if not os.path.exists(DB_DIR):
loader = DirectoryLoader(path="./data/", glob="**/*.txt")
docs = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
text_chunks = text_splitter.split_documents(documents=docs)
docsearch = Chroma.from_documents(text_chunks, embeddings,
persist_directory="./db")
retriever = docsearch.as_retriever()
else:
vectordb = Chroma(persist_directory=DB_DIR,
embedding_function=embeddings)
retriever = vectordb.as_retriever()
qa = RetrievalQA.from_chain_type(llm = OpenAI(temperature=0.0),
chain_type="stuff",
retriever=retriever,
return_source_documents=True
)
return qa
def load_book_agent(verbose=True):
retriever = None
embeddings = OpenAIEmbeddings(openai_api_key = os.environ['OPENAI_API_KEY'])
if not os.path.exists(DB_DIR):
loader = DirectoryLoader(path="./data/", glob="**/*.txt")
docs = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
text_chunks = text_splitter.split_documents(documents=docs)
docsearch = Chroma.from_documents(text_chunks, embeddings,
persist_directory="./db")
retriever = docsearch.as_retriever()
else:
vectordb = Chroma(persist_directory=DB_DIR,
embedding_function=embeddings)
retriever = vectordb.as_retriever()
qa = RetrievalQA.from_chain_type(llm = OpenAI(temperature=0.7),
chain_type="stuff",
retriever=retriever,
return_source_documents=True
)
return qa
def load_sales_agent(verbose=True):
'''
Hard-coded agent that gates an internal sales CSV file for demo
'''
chat = createLLM(model_name='text-davinci-003')
df = pd.read_csv("data/sales_data.csv")
agent = create_pandas_dataframe_agent(chat, df, verbose=verbose)
return agent
def load_sqlite_agent(model_name="text-davinci-003"):
'''
Hard-coded agent that gates a sqlite DB of digital media for demo
'''
llm = createLLM(OPENAI_LLMS[0])
sqlite_db_path = "./data/Chinook_Sqlite.sqlite"
db = SQLDatabase.from_uri(f"sqlite:///{sqlite_db_path}")
db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)
return db_chain
def load_chained_agent(verbose=True, model_name="text-davinci-003"):
llm = createLLM(model_name)
toolkit = load_tools(["serpapi", "open-meteo-api", "news-api",
"python_repl", "wolfram-alpha"],
llm=llm,
serpapi_api_key=os.getenv('SERPAPI_API_KEY'),
news_api_key=os.getenv('NEWS_API_KEY'),
tmdb_bearer_token=os.getenv('TMDB_BEARER_TOKEN')
)
toolkit += [DuckDuckGoSearchRun()]
agent = initialize_agent(toolkit,
llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=verbose,
return_intermediate_steps=True)
return agent |