suneeln-duke's picture
f
6c57304
raw
history blame
No virus
3.03 kB
import os
import json
import re
import openai
import langchain
import langchain.document_loaders
from langchain.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.chroma import Chroma
import os
import shutil
from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
# read from config.ini file
import PyPDF2
def read_pages(pdf_file):
pages = []
reader = PyPDF2.PdfReader(pdf_file)
for page_number in range(len(reader.pages)):
page = reader.pages[page_number]
page_content = page.extract_text()
pages.append(page_content)
return pages
def get_chunks(file_path):
loader = PyPDFLoader(file_path)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=300,
chunk_overlap=100,
length_function=len,
add_start_index=True,
)
chunks = text_splitter.split_documents(documents)
return chunks
def get_vectordb(chunks, CHROMA_PATH):
CHROMA_PATH = f"../../data/chroma/{CHROMA_PATH}"
if os.path.exists(CHROMA_PATH):
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=OpenAIEmbeddings())
else:
db = Chroma.from_documents(
chunks, OpenAIEmbeddings(), persist_directory=CHROMA_PATH
)
db.persist()
print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")
return db
def classify_dec(text, db):
PROMPT_TEMPLATE = """
Answer the question based only on the following context:
{context}
---
Answer the question based on the above context: {question}
"""
query_text = f"""
Classify whether the given chunk involves a decision that will effect the story or not.
A decision is defined as when the character goes about making a choice between two or more options.
The decision should be significant enough to affect the story in a major way.
It doesn't really involve emotions, feelings or thoughts, but what the character does, or what happens to them.
This involes interactions between characters, or the character and the environment.
What isn't a decision is chunks describing the setting, or the character's thoughts or feelings.
Return the answer as the corresponding decision label "yes" or "no"
{text}
"""
results = db.similarity_search_with_relevance_scores(query_text, k=5)
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question=query_text)
model = ChatOpenAI()
response_text = model.predict(prompt)
return (response_text)