Spaces:

abhinavshrivastva
/

RAG

Sleeping

App Files Files Community

RAG / rag.py

abhinavshrivastva

Update rag.py

c35a477 verified 5 months ago

raw

history blame contribute delete

No virus

3.9 kB

	# -- coding: utf-8 --
	"""RAG

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/18JQq2-GCmrrwAk9UuvqqeVrrnB4raKZt
	"""

	# !pip install -q pypdf
	# !pip install torch
	# !pip install -q transformers
	# !pip -q install sentence-transformers
	# !pip install -q llama-index

	# !CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir

	# !pip install llama-index-embeddings-huggingface
	# !pip install llama-index-llms-llama-cpp

	# !pip install cuda

	import torch
	from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
	from llama_index.llms.llama_cpp import LlamaCPP
	from llama_index.llms.llama_cpp.llama_utils import (
	messages_to_prompt,
	completion_to_prompt,
	)
	llm = LlamaCPP(
	# You can pass in the URL to a GGML model to download it automatically
	# model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf',
	model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf',
	# optionally, you can set the path to a pre-downloaded model instead of model_url
	model_path=None,
	temperature=0.1,
	max_new_tokens=256,
	# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
	context_window=4096,
	# kwargs to pass to __call__()
	generate_kwargs={},
	# kwargs to pass to __init__()
	# set to at least 1 to use GPU
	model_kwargs={"n_gpu_layers": -1},
	# transform inputs into Llama2 format
	messages_to_prompt=messages_to_prompt,
	completion_to_prompt=completion_to_prompt,
	verbose=True,
	)

	from llama_index.core import SimpleDirectoryReader
	from llama_index.core import Document

	documents = SimpleDirectoryReader(
	input_files = ["/content/Mindcase Data.pdf"]
	).load_data()

	documents = Document(text = "\n\n".join([doc.text for doc in documents]))

	import os
	from llama_index.core.node_parser import SentenceWindowNodeParser
	from llama_index.core import VectorStoreIndex, ServiceContext, load_index_from_storage
	def get_build_index(documents,llm,embed_model="local:BAAI/bge-small-en-v1.5",sentence_window_size=3,save_dir="./vector_store/index"):

	node_parser = SentenceWindowNodeParser(
	window_size = sentence_window_size,
	window_metadata_key = "window",
	original_text_metadata_key = "original_text"
	)

	sentence_context = ServiceContext.from_defaults(
	llm = llm,
	embed_model= embed_model,
	node_parser = node_parser,
	)

	if not os.path.exists(save_dir):
	# create and load the index
	index = VectorStoreIndex.from_documents(
	[documents], service_context=sentence_context
	)
	index.storage_context.persist(persist_dir=save_dir)
	else:
	# load the existing index
	index = load_index_from_storage(
	StorageContext.from_defaults(persist_dir=save_dir),
	service_context=sentence_context,
	)

	return index

	# get the vector index
	vector_index = get_build_index(documents=documents, llm=llm, embed_model="local:BAAI/bge-small-en-v1.5", sentence_window_size=3, save_dir="./vector_store/index")

	from llama_index.core.postprocessor import MetadataReplacementPostProcessor, SentenceTransformerRerank
	def get_query_engine(sentence_index, similarity_top_k=6, rerank_top_n=2):
	postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
	rerank = SentenceTransformerRerank(
	top_n=rerank_top_n, model="BAAI/bge-reranker-base"
	)
	engine = sentence_index.as_query_engine(
	similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
	)

	return engine

	query_engine = get_query_engine(sentence_index=vector_index, similarity_top_k=6, rerank_top_n=2)


	def query(input):
	return query_engine(input)