Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
"""RAG | |
Automatically generated by Colab. | |
Original file is located at | |
https://colab.research.google.com/drive/18JQq2-GCmrrwAk9UuvqqeVrrnB4raKZt | |
""" | |
# !pip install -q pypdf | |
# !pip install torch | |
# !pip install -q transformers | |
# !pip -q install sentence-transformers | |
# !pip install -q llama-index | |
# !CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir | |
# !pip install llama-index-embeddings-huggingface | |
# !pip install llama-index-llms-llama-cpp | |
# !pip install cuda | |
import torch | |
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex | |
from llama_index.llms.llama_cpp import LlamaCPP | |
from llama_index.llms.llama_cpp.llama_utils import ( | |
messages_to_prompt, | |
completion_to_prompt, | |
) | |
llm = LlamaCPP( | |
# You can pass in the URL to a GGML model to download it automatically | |
# model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf', | |
model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf', | |
# optionally, you can set the path to a pre-downloaded model instead of model_url | |
model_path=None, | |
temperature=0.1, | |
max_new_tokens=256, | |
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room | |
context_window=4096, | |
# kwargs to pass to __call__() | |
generate_kwargs={}, | |
# kwargs to pass to __init__() | |
# set to at least 1 to use GPU | |
model_kwargs={"n_gpu_layers": -1}, | |
# transform inputs into Llama2 format | |
messages_to_prompt=messages_to_prompt, | |
completion_to_prompt=completion_to_prompt, | |
verbose=True, | |
) | |
from llama_index.core import SimpleDirectoryReader | |
from llama_index.core import Document | |
documents = SimpleDirectoryReader( | |
input_files = ["/content/Mindcase Data.pdf"] | |
).load_data() | |
documents = Document(text = "\n\n".join([doc.text for doc in documents])) | |
import os | |
from llama_index.core.node_parser import SentenceWindowNodeParser | |
from llama_index.core import VectorStoreIndex, ServiceContext, load_index_from_storage | |
def get_build_index(documents,llm,embed_model="local:BAAI/bge-small-en-v1.5",sentence_window_size=3,save_dir="./vector_store/index"): | |
node_parser = SentenceWindowNodeParser( | |
window_size = sentence_window_size, | |
window_metadata_key = "window", | |
original_text_metadata_key = "original_text" | |
) | |
sentence_context = ServiceContext.from_defaults( | |
llm = llm, | |
embed_model= embed_model, | |
node_parser = node_parser, | |
) | |
if not os.path.exists(save_dir): | |
# create and load the index | |
index = VectorStoreIndex.from_documents( | |
[documents], service_context=sentence_context | |
) | |
index.storage_context.persist(persist_dir=save_dir) | |
else: | |
# load the existing index | |
index = load_index_from_storage( | |
StorageContext.from_defaults(persist_dir=save_dir), | |
service_context=sentence_context, | |
) | |
return index | |
# get the vector index | |
vector_index = get_build_index(documents=documents, llm=llm, embed_model="local:BAAI/bge-small-en-v1.5", sentence_window_size=3, save_dir="./vector_store/index") | |
from llama_index.core.postprocessor import MetadataReplacementPostProcessor, SentenceTransformerRerank | |
def get_query_engine(sentence_index, similarity_top_k=6, rerank_top_n=2): | |
postproc = MetadataReplacementPostProcessor(target_metadata_key="window") | |
rerank = SentenceTransformerRerank( | |
top_n=rerank_top_n, model="BAAI/bge-reranker-base" | |
) | |
engine = sentence_index.as_query_engine( | |
similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank] | |
) | |
return engine | |
query_engine = get_query_engine(sentence_index=vector_index, similarity_top_k=6, rerank_top_n=2) | |
def query(input): | |
return query_engine(input) | |