Spaces:

suneeln-duke
/

nexusai-v3

Runtime error

App Files Files Community

nexusai-v3 / scripts /path_gen /paths_gen.py

suneeln-duke

6c57304 3 months ago

raw

history blame

No virus

3.05 kB

	import langchain.document_loaders

	from langchain.document_loaders import DirectoryLoader, PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.schema import Document
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.vectorstores.chroma import Chroma
	import os
	import shutil

	from langchain.vectorstores.chroma import Chroma
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.chat_models import ChatOpenAI
	from langchain.prompts import ChatPromptTemplate


	def get_chunks(file_path):

	loader = PyPDFLoader(file_path)

	documents = loader.load()

	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=300,
	chunk_overlap=100,
	length_function=len,
	add_start_index=True,
	)

	chunks = text_splitter.split_documents(documents)

	return chunks

	def get_vectordb(chunks):

	# CHROMA_PATH = f"../../chroma/{CHROMA_PATH}"

	db = Chroma.from_documents(chunks, embedding_function=OpenAIEmbeddings())

	# if os.path.exists(CHROMA_PATH):
	# db = Chroma(persist_directory=CHROMA_PATH, embedding_function=OpenAIEmbeddings())

	# else:
	# db = Chroma.from_documents(
	# chunks, OpenAIEmbeddings(), persist_directory=CHROMA_PATH
	# )

	# db.persist()

	# print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")

	return db


	def gen_sample(text, db):

	PROMPT_TEMPLATE = """
	Answer the question based only on the following context:

	{context}

	---

	Answer the question based on the above context: {question}
	"""

	query_text = f"""

	Act as the author of a Choose Your Own Adventure Book. This book is special as it is based on existing material.
	Now, as with any choose your own adventure book, you'll have to generate decision paths at certain points in the story.
	Your job is to generate 4 decision paths for the given point in the story, if applicable to that point in the story.
	If the given part of the story doesn't contain any decisions from which to generate decision paths, don't
	generate any. If the given part of the story contains a decision, generate 4 decision paths for that decision.
	One among the 4 decision paths should be the original path, the other 3 should deviate from the original path in a sensible manner.
	The decision paths should be generated in a way that they are coherent with the existing story.
	The result should be a JSON object with the following keys: [text, paths]

	text: The given text
	paths: The generated decision paths as strings in a list

	```{text}```

	"""

	results = db.similarity_search_with_relevance_scores(query_text, k=5)

	context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
	prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
	prompt = prompt_template.format(context=context_text, question=query_text)

	model = ChatOpenAI()
	response_text = model.predict(prompt)

	return eval(response_text)