Spaces:
Sleeping
Sleeping
File size: 2,290 Bytes
98997fa b0da961 98997fa b0da961 98997fa b0da961 98997fa 220a7fc 98997fa 220a7fc 98997fa 220a7fc 98997fa d40a83a 98997fa 9667972 220a7fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import os
import openai
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.tools import Tool
from langchain.vectorstores import Chroma
import chainlit as cl
# OpenAI API Key Setup
openai.api_key = os.environ["OPENAI_API_KEY"]
# Define our RAG tool function
def rag(query):
# Load The Goal PDF
loader = UnstructuredPDFLoader("data/The Goal - A Process of Ongoing Improvement (Third Revised Edition).pdf") # , mode="elements"
docs = loader.load()
# Split Text Chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
# Embed Chunks into Chroma Vector Store
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()
# Use RAG Prompt Template
prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-4-1106-preview", temperature=0) # or gpt-3.5-turbo
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
response = ""
for chunk in rag_chain.stream(query): #e.g. "What is a Bottleneck Constraint?"
cl.user_session(chunk, end="", flush=True)
response += f"\n{chunk}"
# rag_chain.invoke("What is a Bottleneck Constraint?")
return response
# this is our tool - which is what allows our agent to access RAG agent
# the `description` field is of utmost imporance as it is what the LLM "brain" uses to determine
# which tool to use for a given input.
rag_format = '{{"prompt": "prompt"}}'
rag_tool = Tool.from_function(
func=rag,
name="RAG",
description=f"Useful for retrieving contextual information about the PDF to answer user questions. Input should be a single string strictly in the following JSON format: {rag_format}",
return_direct=True,
)
|