from llama_index.core import ( VectorStoreIndex, SimpleDirectoryReader, get_response_synthesizer, ServiceContext, ) from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.core.postprocessor import SentenceTransformerRerank from typing import Optional, List from llama_index.llms.groq import Groq class RAG: def __init__( self, filepaths: List[str], rerank: Optional[SentenceTransformerRerank] = None ) -> None: documents = SimpleDirectoryReader(input_files=filepaths).load_data() response_synthesizer = get_response_synthesizer( response_mode="tree_summarize", use_async=True, ) self.index = VectorStoreIndex.from_documents( documents=documents, response_synthesizer=response_synthesizer, ) if not rerank: self.query_engine = self.index.as_query_engine( response_mode="tree_summarize", use_async=True, streaming=True, similarity_top_k=10, ) else: self.query_engine = self.index.as_query_engine( response_mode="tree_summarize", use_async=True, streaming=True, similarity_top_k=10, node_postprocessors=[rerank], ) def run_query_engine(self, prompt): response = self.query_engine.query(prompt) response.print_response_stream() return str(response) class ServiceContextModule: def __init__(self, api_key, model_name) -> None: self._llm = Groq(model=model_name, api_key=api_key) self._embedding_model = HuggingFaceEmbedding( "Snowflake/snowflake-arctic-embed-m-long", trust_remote_code=True ) self.service_context = ServiceContext.from_defaults( llm=self._llm, embed_model=self._embedding_model, )