|
|
|
import os |
|
from dotenv import load_dotenv, find_dotenv |
|
from llama_index.llms.azure_openai import AzureOpenAI |
|
from llama_index.readers.file import PyMuPDFReader |
|
from llama_index.core.chat_engine import ContextChatEngine |
|
from llama_index.core import KnowledgeGraphIndex |
|
from llama_index.core.node_parser import SentenceSplitter |
|
from llama_index.embeddings.huggingface import HuggingFaceEmbedding |
|
|
|
def initialize_openai_creds(): |
|
"""Load environment variables and set API keys.""" |
|
dotenv_path = find_dotenv() |
|
if dotenv_path == "": |
|
print("No .env file found. Make sure the .env file is in the correct directory.") |
|
else: |
|
print(f".env file found at: {dotenv_path}") |
|
|
|
load_dotenv(dotenv_path) |
|
|
|
|
|
general_creds = { |
|
"api_key": os.getenv('AZURE_OPENAI_API_KEY'), |
|
"api_version": os.getenv("AZURE_API_VERSION"), |
|
"endpoint": os.getenv("AZURE_OPENAI_ENDPOINT"), |
|
"temperature": 0, |
|
"gpt35_deployment_name": os.getenv("AZURE_DEPLOYMENT_NAME"), |
|
"gpt4o_mini_deployment_name": os.getenv("GPT4O_MINI_DEPLOYMENT_NAME") |
|
} |
|
|
|
|
|
gpt4o_creds = { |
|
"api_key": os.getenv('GPT4O_API_KEY'), |
|
"api_version": os.getenv("GPT4O_API_VERSION"), |
|
"endpoint": os.getenv("GPT4O_AZURE_ENDPOINT"), |
|
"deployment_name": os.getenv("GPT4O_DEPLOYMENT_NAME"), |
|
"temperature": os.getenv("GPT4O_TEMPERATURE", 0) |
|
} |
|
|
|
return general_creds, gpt4o_creds |
|
|
|
|
|
|
|
def initialize_openai_creds(): |
|
"""Load environment variables and set API keys.""" |
|
dotenv_path = find_dotenv() |
|
if dotenv_path == "": |
|
print("No .env file found. Make sure the .env file is in the correct directory.") |
|
else: |
|
print(f".env file found at: {dotenv_path}") |
|
|
|
load_dotenv(dotenv_path) |
|
|
|
|
|
gpt35_creds = { |
|
"api_key": os.getenv('AZURE_OPENAI_API_KEY_GPT35'), |
|
"api_version": os.getenv("AZURE_API_VERSION"), |
|
"endpoint": os.getenv("AZURE_OPENAI_ENDPOINT_GPT35"), |
|
"temperature": 0, |
|
"deployment_name": os.getenv("AZURE_DEPLOYMENT_NAME_GPT35") |
|
} |
|
|
|
|
|
gpt4o_mini_creds = { |
|
"api_key": os.getenv('AZURE_OPENAI_API_KEY_GPT4O_MINI'), |
|
"api_version": os.getenv("AZURE_API_VERSION"), |
|
"endpoint": os.getenv("AZURE_OPENAI_ENDPOINT_GPT4O_MINI"), |
|
"temperature": 0, |
|
"deployment_name": os.getenv("GPT4O_MINI_DEPLOYMENT_NAME") |
|
} |
|
|
|
|
|
gpt4o_creds = { |
|
"api_key": os.getenv('GPT4O_API_KEY'), |
|
"api_version": os.getenv("GPT4O_API_VERSION"), |
|
"endpoint": os.getenv("GPT4O_AZURE_ENDPOINT"), |
|
"deployment_name": os.getenv("GPT4O_DEPLOYMENT_NAME"), |
|
"temperature": os.getenv("GPT4O_TEMPERATURE", 0) |
|
} |
|
|
|
return gpt35_creds, gpt4o_mini_creds, gpt4o_creds |
|
|
|
|
|
|
|
def create_llm(model: str, gpt35_creds: dict, gpt4o_mini_creds: dict, gpt4o_creds: dict): |
|
""" |
|
Initialize and return the Azure OpenAI LLM based on the selected model. |
|
|
|
:param model: The model to initialize ("gpt35", "gpt4o", or "gpt-4o-mini"). |
|
:param gpt35_creds: Credentials for gpt35. |
|
:param gpt4o_mini_creds: Credentials for gpt-4o-mini. |
|
:param gpt4o_creds: Credentials for gpt4o. |
|
""" |
|
if model == "gpt35": |
|
return AzureOpenAI( |
|
deployment_name=gpt35_creds["deployment_name"], |
|
temperature=gpt35_creds["temperature"], |
|
api_key=gpt35_creds["api_key"], |
|
azure_endpoint=gpt35_creds["endpoint"], |
|
api_version=gpt35_creds["api_version"] |
|
) |
|
elif model == "gpt-4o-mini": |
|
return AzureOpenAI( |
|
deployment_name=gpt4o_mini_creds["deployment_name"], |
|
temperature=gpt4o_mini_creds["temperature"], |
|
api_key=gpt4o_mini_creds["api_key"], |
|
azure_endpoint=gpt4o_mini_creds["endpoint"], |
|
api_version=gpt4o_mini_creds["api_version"] |
|
) |
|
elif model == "gpt4o": |
|
return AzureOpenAI( |
|
deployment_name=gpt4o_creds["deployment_name"], |
|
temperature=gpt4o_creds["temperature"], |
|
api_key=gpt4o_creds["api_key"], |
|
azure_endpoint=gpt4o_creds["endpoint"], |
|
api_version=gpt4o_creds["api_version"] |
|
) |
|
else: |
|
raise ValueError(f"Invalid model: {model}. Choose from 'gpt35', 'gpt4o', or 'gpt-4o-mini'.") |
|
|
|
|
|
|
|
def create_chat_engine(retriever, memory, llm): |
|
"""Create and return the ContextChatEngine using the provided retriever and memory.""" |
|
chat_engine = ContextChatEngine.from_defaults( |
|
retriever=retriever, |
|
memory=memory, |
|
llm=llm |
|
) |
|
return chat_engine |
|
|
|
|
|
def load_documents(filepaths): |
|
""" |
|
Load and return documents from specified file paths. |
|
|
|
:param filepaths: A string (single file path) or a list of strings (multiple file paths). |
|
:return: A list of loaded documents. |
|
""" |
|
loader = PyMuPDFReader() |
|
|
|
|
|
if isinstance(filepaths, str): |
|
filepaths = [filepaths] |
|
|
|
|
|
all_documents = [] |
|
for filepath in filepaths: |
|
documents = loader.load(file_path=filepath) |
|
all_documents += documents |
|
|
|
return all_documents |
|
|
|
|
|
def create_kg_index( |
|
documents, |
|
storage_context, |
|
llm, |
|
max_triplets_per_chunk=10, |
|
embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5"), |
|
include_embeddings=True, |
|
chunk_size=512 |
|
): |
|
splitter = SentenceSplitter(chunk_size=chunk_size) |
|
graph_index = KnowledgeGraphIndex.from_documents( |
|
documents, |
|
storage_context=storage_context, |
|
max_triplets_per_chunk=max_triplets_per_chunk, |
|
llm=llm, |
|
embed_model=embed_model, |
|
include_embeddings=include_embeddings, |
|
transformations=[splitter] |
|
) |
|
return graph_index |
|
|
|
|
|
from llama_index.core.indices.property_graph import SimpleLLMPathExtractor |
|
from llama_index.core.indices.property_graph import DynamicLLMPathExtractor |
|
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore |
|
from llama_index.core import PropertyGraphIndex |
|
|
|
|
|
def create_pg_index( |
|
llm, |
|
documents, |
|
graph_store, |
|
max_triplets_per_chunk=10, |
|
num_workers=4, |
|
embed_kg_nodes=True, |
|
embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5") |
|
): |
|
|
|
splitter = SentenceSplitter(chunk_size=512) |
|
|
|
kg_extractor = DynamicLLMPathExtractor( |
|
llm=llm, |
|
max_triplets_per_chunk=max_triplets_per_chunk, |
|
num_workers=num_workers |
|
) |
|
|
|
|
|
|
|
graph_index = PropertyGraphIndex.from_documents( |
|
documents, |
|
property_graph_store=graph_store, |
|
embed_model=embed_model, |
|
embed_kg_nodes=embed_kg_nodes, |
|
kg_extractors=[kg_extractor], |
|
transformations=[splitter] |
|
) |
|
|
|
return graph_index |