|
import os |
|
import numpy as np |
|
from trulens_eval import ( |
|
Feedback, |
|
TruLlama, |
|
OpenAI |
|
) |
|
from trulens_eval.feedback import Groundedness |
|
|
|
from llama_index import ServiceContext, VectorStoreIndex, StorageContext |
|
from llama_index import load_index_from_storage |
|
from llama_index.node_parser import HierarchicalNodeParser |
|
from llama_index.node_parser import get_leaf_nodes |
|
from llama_index import StorageContext |
|
|
|
import nest_asyncio |
|
nest_asyncio.apply() |
|
|
|
openai = OpenAI() |
|
qa_relevance = ( |
|
Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance") |
|
.on_input_output() |
|
) |
|
|
|
qs_relevance = ( |
|
Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance") |
|
.on_input() |
|
.on(TruLlama.select_source_nodes().node.text) |
|
.aggregate(np.mean) |
|
) |
|
|
|
|
|
grounded = Groundedness(groundedness_provider=openai) |
|
|
|
groundedness = ( |
|
Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness") |
|
.on(TruLlama.select_source_nodes().node.text) |
|
.on_output() |
|
.aggregate(grounded.grounded_statements_aggregator) |
|
) |
|
|
|
feedbacks = [qa_relevance, qs_relevance, groundedness] |
|
|
|
def get_openai_api_key(): |
|
return os.getenv("OPENAI_API_KEY") |
|
|
|
def get_trulens_recorder(query_engine, feedbacks, app_id): |
|
tru_recorder = TruLlama( |
|
query_engine, |
|
app_id=app_id, |
|
feedbacks=feedbacks |
|
) |
|
return tru_recorder |
|
|
|
def get_prebuilt_trulens_recorder(query_engine, app_id): |
|
tru_recorder = TruLlama( |
|
query_engine, |
|
app_id=app_id, |
|
feedbacks=feedbacks |
|
) |
|
return tru_recorder |
|
|
|
def build_automerging_index( |
|
documents, |
|
llm, |
|
embed_model="local:BAAI/bge-small-en-v1.5", |
|
save_dir="merging_index", |
|
chunk_sizes=None, |
|
): |
|
chunk_sizes = chunk_sizes or [2048, 512, 128] |
|
node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes) |
|
nodes = node_parser.get_nodes_from_documents(documents) |
|
leaf_nodes = get_leaf_nodes(nodes) |
|
merging_context = ServiceContext.from_defaults( |
|
llm=llm, |
|
embed_model=embed_model, |
|
) |
|
storage_context = StorageContext.from_defaults() |
|
storage_context.docstore.add_documents(nodes) |
|
|
|
if not os.path.exists(save_dir): |
|
automerging_index = VectorStoreIndex( |
|
leaf_nodes, storage_context=storage_context, service_context=merging_context |
|
) |
|
automerging_index.storage_context.persist(persist_dir=save_dir) |
|
else: |
|
automerging_index = load_index_from_storage( |
|
StorageContext.from_defaults(persist_dir=save_dir), |
|
service_context=merging_context, |
|
) |
|
return automerging_index |