File size: 4,506 Bytes
ca7e9c6
 
 
 
 
 
 
 
 
 
 
 
f57b8d4
ca7e9c6
 
f57b8d4
 
 
 
 
ca7e9c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f57b8d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca7e9c6
 
 
 
e0753bf
ca7e9c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1584363
4536af0
ca7e9c6
 
 
 
 
1584363
ca7e9c6
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
##############################################################################
# Utility methods for building LLMs and agent models
#
# @philmui
# Mon May 1 18:34:45 PDT 2023
##############################################################################

import os
import pandas as pd

from langchain.agents import AgentType, load_tools, initialize_agent,\
                            create_pandas_dataframe_agent
from langchain import SQLDatabase, SQLDatabaseChain, HuggingFaceHub
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter

OPENAI_LLMS = [ 
    'text-davinci-003', 
    'text-babbage-001', 
    'text-curie-001', 
    'text-ada-001'
]

OPENAI_CHAT_LLMS = [
    'gpt-3.5-turbo',     
    'gpt-4',
]

HUGGINGFACE_LLMS = [
    'google/flan-t5-xl',
    'databricks/dolly-v2-3b',
    'bigscience/bloom-1b7'
]

HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")

def createLLM(model_name="text-davinci-003", temperature=0):
    llm = None
    if model_name in OPENAI_LLMS:
        llm = OpenAI(model_name=model_name, temperature=temperature)
    elif model_name in OPENAI_CHAT_LLMS:
        llm = ChatOpenAI(model_name=model_name, temperature=temperature)
    elif model_name in HUGGINGFACE_LLMS:
        llm = HuggingFaceHub(repo_id=model_name, 
                             model_kwargs={"temperature":1e-10})
    return llm

def load_chat_agent(verbose=True):
    return createLLM(OPENAI_CHAT_LLMS[0], temperature=0.5)

import os
import chromadb
from chromadb.config import Settings
DB_DIR = "./db"

def load_book_agent(verbose=True):
    retriever = None
    embeddings = OpenAIEmbeddings(openai_api_key = os.environ['OPENAI_API_KEY'])

    if not os.path.exists(DB_DIR):
        loader = DirectoryLoader(path="./data/", glob="**/*.txt")
        docs = loader.load()
        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
        text_chunks = text_splitter.split_documents(documents=docs)
        docsearch = Chroma.from_documents(text_chunks, embeddings, 
                                        persist_directory="./db")
        retriever = docsearch.as_retriever()
    else:
        vectordb = Chroma(persist_directory=DB_DIR, 
                          embedding_function=embeddings)
        retriever = vectordb.as_retriever()

    qa = RetrievalQA.from_chain_type(llm = OpenAI(temperature=0.9),
                                    chain_type="stuff",
                                    retriever=retriever,
                                    return_source_documents=True
                                    )
    return qa

def load_sales_agent(verbose=True):
    '''
    Hard-coded agent that gates an internal sales CSV file for demo
    '''
    chat = createLLM(model_name='text-davinci-003')
    df = pd.read_csv("data/sales_data.csv")
    agent = create_pandas_dataframe_agent(chat, df, verbose=verbose)
    return agent

def load_sqlite_agent(model_name="text-davinci-003"):
    '''
    Hard-coded agent that gates a sqlite DB of digital media for demo
    '''
    llm = createLLM(OPENAI_LLMS[0])
    sqlite_db_path = "./data/Chinook_Sqlite.sqlite"
    db = SQLDatabase.from_uri(f"sqlite:///{sqlite_db_path}")
    db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)
    return db_chain

from langchain.tools import DuckDuckGoSearchRun, GoogleSearchRun
from langchain.utilities import GoogleSearchAPIWrapper
def load_chained_agent(verbose=True, model_name="text-davinci-003"):
    llm = createLLM(model_name)
    toolkit = load_tools(["serpapi", "open-meteo-api", "news-api", 
                           "python_repl", "wolfram-alpha"], 
                            llm=llm, 
                            serpapi_api_key=os.getenv('SERPAPI_API_KEY'),
                            news_api_key=os.getenv('NEWS_API_KEY'),
                            tmdb_bearer_token=os.getenv('TMDB_BEARER_TOKEN')
                            )
    toolkit += [DuckDuckGoSearchRun()]

    agent = initialize_agent(toolkit, 
                             llm, 
                             agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, 
                             verbose=verbose, 
                             return_intermediate_steps=True)
    return agent