File size: 5,488 Bytes
ca7e9c6
 
 
 
 
 
 
 
 
 
e42e9ae
ca7e9c6
 
 
f57b8d4
 
 
e42e9ae
f57b8d4
e42e9ae
 
 
 
 
 
ca7e9c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e42e9ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f57b8d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e42e9ae
f57b8d4
 
 
 
 
 
ca7e9c6
 
 
 
e0753bf
ca7e9c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1584363
4536af0
ca7e9c6
 
 
 
 
1584363
ca7e9c6
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
##############################################################################
# Utility methods for building LLMs and agent models
#
# @philmui
# Mon May 1 18:34:45 PDT 2023
##############################################################################

import os
import pandas as pd

from langchain import SQLDatabase, SQLDatabaseChain, HuggingFaceHub
from langchain.agents import AgentType, load_tools, initialize_agent,\
                            create_pandas_dataframe_agent
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.tools import DuckDuckGoSearchRun
from langchain.vectorstores import Chroma

import chromadb
from chromadb.config import Settings
DB_DIR = "./db"

OPENAI_LLMS = [ 
    'text-davinci-003', 
    'text-babbage-001', 
    'text-curie-001', 
    'text-ada-001'
]

OPENAI_CHAT_LLMS = [
    'gpt-3.5-turbo',     
    'gpt-4',
]

HUGGINGFACE_LLMS = [
    'google/flan-t5-xl',
    'databricks/dolly-v2-3b',
    'bigscience/bloom-1b7'
]

HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")

def createLLM(model_name="text-davinci-003", temperature=0):
    llm = None
    if model_name in OPENAI_LLMS:
        llm = OpenAI(model_name=model_name, temperature=temperature)
    elif model_name in OPENAI_CHAT_LLMS:
        llm = ChatOpenAI(model_name=model_name, temperature=temperature)
    elif model_name in HUGGINGFACE_LLMS:
        llm = HuggingFaceHub(repo_id=model_name, 
                             model_kwargs={"temperature":1e-10})
    return llm

def load_chat_agent(verbose=True):
    return createLLM(OPENAI_CHAT_LLMS[0], temperature=0.5)

def load_earnings_agent(verbose=True):
    retriever = None
    embeddings = OpenAIEmbeddings(openai_api_key = os.environ['OPENAI_API_KEY'])

    if not os.path.exists(DB_DIR):
        loader = DirectoryLoader(path="./data/", glob="**/*.txt")
        docs = loader.load()
        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
        text_chunks = text_splitter.split_documents(documents=docs)
        docsearch = Chroma.from_documents(text_chunks, embeddings, 
                                        persist_directory="./db")
        retriever = docsearch.as_retriever()
    else:
        vectordb = Chroma(persist_directory=DB_DIR, 
                          embedding_function=embeddings)
        retriever = vectordb.as_retriever()

    qa = RetrievalQA.from_chain_type(llm = OpenAI(temperature=0.0),
                                    chain_type="stuff",
                                    retriever=retriever,
                                    return_source_documents=True
                                    )
    return qa

def load_book_agent(verbose=True):
    retriever = None
    embeddings = OpenAIEmbeddings(openai_api_key = os.environ['OPENAI_API_KEY'])

    if not os.path.exists(DB_DIR):
        loader = DirectoryLoader(path="./data/", glob="**/*.txt")
        docs = loader.load()
        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
        text_chunks = text_splitter.split_documents(documents=docs)
        docsearch = Chroma.from_documents(text_chunks, embeddings, 
                                        persist_directory="./db")
        retriever = docsearch.as_retriever()
    else:
        vectordb = Chroma(persist_directory=DB_DIR, 
                          embedding_function=embeddings)
        retriever = vectordb.as_retriever()

    qa = RetrievalQA.from_chain_type(llm = OpenAI(temperature=0.7),
                                    chain_type="stuff",
                                    retriever=retriever,
                                    return_source_documents=True
                                    )
    return qa

def load_sales_agent(verbose=True):
    '''
    Hard-coded agent that gates an internal sales CSV file for demo
    '''
    chat = createLLM(model_name='text-davinci-003')
    df = pd.read_csv("data/sales_data.csv")
    agent = create_pandas_dataframe_agent(chat, df, verbose=verbose)
    return agent

def load_sqlite_agent(model_name="text-davinci-003"):
    '''
    Hard-coded agent that gates a sqlite DB of digital media for demo
    '''
    llm = createLLM(OPENAI_LLMS[0])
    sqlite_db_path = "./data/Chinook_Sqlite.sqlite"
    db = SQLDatabase.from_uri(f"sqlite:///{sqlite_db_path}")
    db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)
    return db_chain

def load_chained_agent(verbose=True, model_name="text-davinci-003"):
    llm = createLLM(model_name)
    toolkit = load_tools(["serpapi", "open-meteo-api", "news-api", 
                           "python_repl", "wolfram-alpha"], 
                            llm=llm, 
                            serpapi_api_key=os.getenv('SERPAPI_API_KEY'),
                            news_api_key=os.getenv('NEWS_API_KEY'),
                            tmdb_bearer_token=os.getenv('TMDB_BEARER_TOKEN')
                            )
    toolkit += [DuckDuckGoSearchRun()]

    agent = initialize_agent(toolkit, 
                             llm, 
                             agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, 
                             verbose=verbose, 
                             return_intermediate_steps=True)
    return agent