lingyit1108 commited on
Commit
b580d80
1 Parent(s): ac8a60b

to create RAGAs result with triad of metrics

Browse files
.gitignore CHANGED
@@ -3,10 +3,10 @@
3
  .streamlit/
4
  results/
5
 
6
- *.sqlite
7
  data/
8
 
9
  notebooks/test_model
10
  screenshot_questions/
11
 
12
- # ux/
 
 
3
  .streamlit/
4
  results/
5
 
 
6
  data/
7
 
8
  notebooks/test_model
9
  screenshot_questions/
10
 
11
+ # ux/
12
+ # *.sqlite
archive/{requirements.txt → dependencies/requirements_backup.txt} RENAMED
File without changes
archive/dependencies/requirements_llama-index==0.9.24.txt ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.9.1
2
+ aiosignal==1.3.1
3
+ alembic==1.13.1
4
+ altair==5.2.0
5
+ annotated-types==0.6.0
6
+ anyio==4.2.0
7
+ appnope==0.1.3
8
+ argon2-cffi==23.1.0
9
+ argon2-cffi-bindings==21.2.0
10
+ arrow==1.3.0
11
+ asgiref==3.7.2
12
+ asttokens==2.4.1
13
+ async-lru==2.0.4
14
+ async-timeout==4.0.3
15
+ attrs==23.2.0
16
+ Babel==2.14.0
17
+ backoff==2.2.1
18
+ bcrypt==4.1.2
19
+ beautifulsoup4==4.12.3
20
+ bleach==6.1.0
21
+ blinker==1.7.0
22
+ bs4==0.0.2
23
+ build==1.0.3
24
+ cachetools==5.3.2
25
+ certifi==2023.11.17
26
+ cffi==1.16.0
27
+ charset-normalizer==3.3.2
28
+ chroma-hnswlib==0.7.3
29
+ chromadb==0.4.22
30
+ click==8.1.7
31
+ coloredlogs==15.0.1
32
+ comm==0.2.0
33
+ contourpy==1.2.0
34
+ cycler==0.12.1
35
+ dataclasses-json==0.6.3
36
+ debugpy==1.8.0
37
+ decorator==5.1.1
38
+ defusedxml==0.7.1
39
+ Deprecated==1.2.14
40
+ dill==0.3.7
41
+ dirtyjson==1.0.8
42
+ distro==1.9.0
43
+ entrypoints==0.4
44
+ exceptiongroup==1.2.0
45
+ executing==2.0.1
46
+ Faker==22.0.0
47
+ fastapi==0.109.0
48
+ fastjsonschema==2.19.1
49
+ favicon==0.7.0
50
+ filelock==3.13.1
51
+ flatbuffers==23.5.26
52
+ fonttools==4.47.0
53
+ fqdn==1.5.1
54
+ frozendict==2.4.0
55
+ frozenlist==1.4.1
56
+ fsspec==2023.12.2
57
+ gitdb==4.0.11
58
+ GitPython==3.1.40
59
+ google-auth==2.27.0
60
+ googleapis-common-protos==1.62.0
61
+ greenlet==3.0.3
62
+ grpcio==1.60.0
63
+ h11==0.14.0
64
+ htbuilder==0.6.2
65
+ httpcore==1.0.2
66
+ httptools==0.6.1
67
+ httpx==0.26.0
68
+ huggingface-hub==0.20.1
69
+ humanfriendly==10.0
70
+ humanize==4.9.0
71
+ idna==3.6
72
+ importlib-metadata==6.11.0
73
+ importlib-resources==6.1.1
74
+ ipykernel==6.28.0
75
+ ipython==8.18.1
76
+ ipywidgets==8.1.1
77
+ isoduration==20.11.0
78
+ jedi==0.19.1
79
+ Jinja2==3.1.2
80
+ joblib==1.3.2
81
+ json5==0.9.14
82
+ jsonpatch==1.33
83
+ jsonpointer==2.4
84
+ jsonschema==4.20.0
85
+ jsonschema-specifications==2023.12.1
86
+ jupyter==1.0.0
87
+ jupyter-console==6.6.3
88
+ jupyter-events==0.9.0
89
+ jupyter-lsp==2.2.1
90
+ jupyter_client==8.6.0
91
+ jupyter_core==5.6.1
92
+ jupyter_server==2.12.1
93
+ jupyter_server_terminals==0.5.1
94
+ jupyterlab==4.0.10
95
+ jupyterlab-widgets==3.0.9
96
+ jupyterlab_pygments==0.3.0
97
+ jupyterlab_server==2.25.2
98
+ kiwisolver==1.4.5
99
+ kubernetes==29.0.0
100
+ langchain==0.0.354
101
+ langchain-community==0.0.8
102
+ langchain-core==0.1.23
103
+ langsmith==0.0.87
104
+ llama-index==0.9.24
105
+ lxml==5.1.0
106
+ Mako==1.3.0
107
+ Markdown==3.5.1
108
+ markdown-it-py==3.0.0
109
+ markdownlit==0.0.7
110
+ MarkupSafe==2.1.3
111
+ marshmallow==3.20.1
112
+ matplotlib==3.8.2
113
+ matplotlib-inline==0.1.6
114
+ mdurl==0.1.2
115
+ merkle-json==1.0.0
116
+ millify==0.1.1
117
+ mistune==3.0.2
118
+ mmh3==4.1.0
119
+ monotonic==1.6
120
+ more-itertools==10.1.0
121
+ mpmath==1.3.0
122
+ multidict==6.0.4
123
+ munch==4.0.0
124
+ mypy-extensions==1.0.0
125
+ nbclient==0.9.0
126
+ nbconvert==7.14.0
127
+ nbformat==5.9.2
128
+ nest-asyncio==1.5.8
129
+ networkx==3.2.1
130
+ nltk==3.8.1
131
+ notebook==7.0.6
132
+ notebook_shim==0.2.3
133
+ numpy==1.26.2
134
+ oauthlib==3.2.2
135
+ onnxruntime==1.17.0
136
+ openai==1.6.1
137
+ opentelemetry-api==1.22.0
138
+ opentelemetry-exporter-otlp-proto-common==1.22.0
139
+ opentelemetry-exporter-otlp-proto-grpc==1.22.0
140
+ opentelemetry-instrumentation==0.43b0
141
+ opentelemetry-instrumentation-asgi==0.43b0
142
+ opentelemetry-instrumentation-fastapi==0.43b0
143
+ opentelemetry-proto==1.22.0
144
+ opentelemetry-sdk==1.22.0
145
+ opentelemetry-semantic-conventions==0.43b0
146
+ opentelemetry-util-http==0.43b0
147
+ overrides==7.4.0
148
+ packaging==23.2
149
+ pandas==2.1.4
150
+ pandocfilters==1.5.0
151
+ parso==0.8.3
152
+ pexpect==4.9.0
153
+ pillow==10.2.0
154
+ platformdirs==4.1.0
155
+ posthog==3.3.3
156
+ prometheus-client==0.19.0
157
+ prompt-toolkit==3.0.43
158
+ protobuf==4.25.1
159
+ psutil==5.9.7
160
+ ptyprocess==0.7.0
161
+ pulsar-client==3.4.0
162
+ pure-eval==0.2.2
163
+ pyarrow==14.0.2
164
+ pyasn1==0.5.1
165
+ pyasn1-modules==0.3.0
166
+ pycparser==2.21
167
+ pydantic==2.5.3
168
+ pydantic_core==2.14.6
169
+ pydeck==0.8.1b0
170
+ Pygments==2.17.2
171
+ pymdown-extensions==10.7
172
+ PyMuPDF==1.23.22
173
+ PyMuPDFb==1.23.22
174
+ pyparsing==3.1.1
175
+ pypdf==4.0.1
176
+ PyPika==0.48.9
177
+ pyproject_hooks==1.0.0
178
+ python-dateutil==2.8.2
179
+ python-decouple==3.8
180
+ python-dotenv==1.0.0
181
+ python-json-logger==2.0.7
182
+ pytz==2023.3.post1
183
+ PyYAML==6.0.1
184
+ pyzmq==25.1.2
185
+ qtconsole==5.5.1
186
+ QtPy==2.4.1
187
+ referencing==0.32.0
188
+ regex==2023.12.25
189
+ requests==2.31.0
190
+ requests-oauthlib==1.3.1
191
+ rfc3339-validator==0.1.4
192
+ rfc3986-validator==0.1.1
193
+ rich==13.7.0
194
+ rpds-py==0.16.2
195
+ rsa==4.9
196
+ safetensors==0.4.1
197
+ scikit-learn==1.4.0
198
+ scipy==1.12.0
199
+ Send2Trash==1.8.2
200
+ sentence-transformers==2.3.0
201
+ sentencepiece==0.1.99
202
+ six==1.16.0
203
+ smmap==5.0.1
204
+ sniffio==1.3.0
205
+ soupsieve==2.5
206
+ SQLAlchemy==2.0.24
207
+ st-annotated-text==4.0.1
208
+ stack-data==0.6.3
209
+ starlette==0.35.1
210
+ streamlit==1.31.1
211
+ streamlit-aggrid==0.3.4.post3
212
+ streamlit-camera-input-live==0.2.0
213
+ streamlit-card==1.0.0
214
+ streamlit-embedcode==0.1.2
215
+ streamlit-extras==0.3.6
216
+ streamlit-faker==0.0.3
217
+ streamlit-feedback==0.1.3
218
+ streamlit-image-coordinates==0.1.6
219
+ streamlit-keyup==0.2.2
220
+ streamlit-toggle-switch==1.0.2
221
+ streamlit-vertical-slider==2.5.5
222
+ sympy==1.12
223
+ tenacity==8.2.3
224
+ terminado==0.18.0
225
+ threadpoolctl==3.2.0
226
+ tiktoken==0.5.2
227
+ tinycss2==1.2.1
228
+ tokenizers==0.15.2
229
+ toml==0.10.2
230
+ tomli==2.0.1
231
+ toolz==0.12.0
232
+ torch==2.1.2
233
+ tornado==6.4
234
+ tqdm==4.66.1
235
+ traitlets==5.14.0
236
+ transformers==4.37.2
237
+ trulens==0.13.4
238
+ trulens-eval==0.20.0
239
+ typer==0.9.0
240
+ types-python-dateutil==2.8.19.14
241
+ typing-inspect==0.9.0
242
+ typing_extensions==4.9.0
243
+ tzdata==2023.4
244
+ tzlocal==5.2
245
+ uri-template==1.3.0
246
+ urllib3==2.1.0
247
+ uvicorn==0.27.0
248
+ uvloop==0.19.0
249
+ validators==0.22.0
250
+ watchfiles==0.21.0
251
+ wcwidth==0.2.12
252
+ webcolors==1.13
253
+ webencodings==0.5.1
254
+ websocket-client==1.7.0
255
+ websockets==12.0
256
+ widgetsnbextension==4.0.9
257
+ wrapt==1.16.0
258
+ yarl==1.9.4
259
+ zipp==3.17.0
archive/{test.py → experiments/test.py} RENAMED
File without changes
archive/{init_setup.py → model_evaluation/init_setup.py} RENAMED
File without changes
archive/{main.py → model_evaluation/main.py} RENAMED
File without changes
archive/model_evaluation/main_new.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import utils
2
+ import os
3
+
4
+ import numpy as np
5
+ import nest_asyncio
6
+ import openai
7
+ import chromadb
8
+
9
+ from llama_index.legacy import (
10
+ VectorStoreIndex,
11
+ SimpleDirectoryReader
12
+ )
13
+ from llama_index.core import (
14
+ StorageContext,
15
+ Document,
16
+ Settings
17
+ )
18
+ from llama_index.vector_stores.chroma.base import ChromaVectorStore
19
+ from llama_index.llms.openai import OpenAI
20
+ from llama_index.embeddings.huggingface.base import HuggingFaceEmbedding
21
+ from trulens_eval import Tru
22
+
23
+ from utils import get_prebuilt_trulens_recorder
24
+ import time
25
+
26
+ nest_asyncio.apply()
27
+ openai.api_key = utils.get_openai_api_key()
28
+
29
+ def main():
30
+
31
+ if not os.path.exists("./default.sqlite"):
32
+
33
+ start_time = time.time()
34
+
35
+ llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.0)
36
+ fine_tuned_path = "local:./models/fine-tuned-embeddings"
37
+
38
+ Settings.llm = llm
39
+ Settings.embed_model = fine_tuned_path
40
+
41
+ db = chromadb.PersistentClient(path="./models/chroma_db")
42
+ chroma_collection = db.get_or_create_collection("quickstart")
43
+
44
+ # assign chroma as the vector_store to the context
45
+ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
46
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
47
+
48
+ # create your index
49
+ index = VectorStoreIndex.from_vector_store(
50
+ vector_store=vector_store,
51
+ storage_context=storage_context
52
+ )
53
+ query_engine = index.as_query_engine()
54
+
55
+ separator = "\n\n"
56
+ eval_questions = []
57
+ with open('./raw_documents/eval_questions.txt', 'r') as file:
58
+ content = file.read()
59
+
60
+ for question in content.split(separator):
61
+ print(question)
62
+ print(separator)
63
+ eval_questions.append(question.strip())
64
+
65
+ response = query_engine.query(eval_questions[0])
66
+ print(str(response))
67
+
68
+ tru = Tru(database_file="./models/trulens_eval.sqlite")
69
+ tru_recorder = get_prebuilt_trulens_recorder(query_engine,
70
+ app_id="Direct Query Engine")
71
+
72
+ print("Sending each question to llm ..")
73
+ with tru_recorder as recording:
74
+ for question in eval_questions:
75
+ response = query_engine.query(question)
76
+
77
+ records, feedback = tru.get_records_and_feedback(app_ids=[])
78
+
79
+ os.makedirs("./results", exist_ok=True)
80
+ records.to_csv("./results/records.csv", index=False)
81
+
82
+ print(tru.db.engine.url.render_as_string(hide_password=False))
83
+
84
+ end_time = time.time()
85
+ time_spent_mins = (end_time - start_time) / 60
86
+ with open("./results/time_cost.txt", "w") as fp:
87
+ fp.write(f"Takes {int(time_spent_mins)} mins to create llm evaluation.")
88
+
89
+ if __name__ == "__main__":
90
+
91
+ # main()
92
+ if False:
93
+ start_time = time.time()
94
+
95
+ llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.0)
96
+ fine_tuned_path = "local:./models/fine-tuned-embeddings"
97
+
98
+ Settings.llm = llm
99
+ Settings.embed_model = fine_tuned_path
100
+
101
+ db = chromadb.PersistentClient(path="./models/chroma_db")
102
+ chroma_collection = db.get_or_create_collection("quickstart")
103
+
104
+ # assign chroma as the vector_store to the context
105
+ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
106
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
107
+
108
+ # create your index
109
+ index = VectorStoreIndex.from_vector_store(
110
+ vector_store=vector_store,
111
+ storage_context=storage_context
112
+ )
113
+ query_engine = index.as_query_engine()
114
+
115
+ separator = "\n\n"
116
+ eval_questions = []
117
+ with open('./raw_documents/eval_questions.txt', 'r') as file:
118
+ content = file.read()
119
+
120
+ for question in content.split(separator):
121
+ print(question)
122
+ print(separator)
123
+ eval_questions.append(question.strip())
124
+
125
+ response = query_engine.query(eval_questions[0])
126
+ print(str(response))
127
+
128
+
129
+
130
+ from trulens_eval import Tru
131
+ tru = Tru()
132
+
133
+ documents = SimpleDirectoryReader(
134
+ input_files=["./raw_documents/qna.txt"]
135
+ ).load_data()
136
+ index = VectorStoreIndex.from_documents(documents)
137
+
138
+ query_engine = index.as_query_engine()
139
+ response = query_engine.query("Which is not a government healthcare philosophy?")
140
+ print(response)
141
+
142
+ from trulens_eval.feedback.provider.openai import OpenAI
143
+ openai = OpenAI()
144
+
145
+ # select context to be used in feedback. the location of context is app specific.
146
+ from trulens_eval.app import App
147
+ context = App.select_context(query_engine)
148
+
149
+ from trulens_eval import Feedback
150
+
151
+ # Define a groundedness feedback function
152
+ from trulens_eval.feedback import Groundedness
153
+ grounded = Groundedness(groundedness_provider=OpenAI())
154
+ f_groundedness = (
155
+ Feedback(grounded.groundedness_measure_with_cot_reasons)
156
+ .on(context.collect()) # collect context chunks into a list
157
+ .on_output()
158
+ .aggregate(grounded.grounded_statements_aggregator)
159
+ )
160
+
161
+ # Question/answer relevance between overall question and answer.
162
+ f_qa_relevance = Feedback(openai.relevance).on_input_output()
163
+
164
+ # Question/statement relevance between question and each context chunk.
165
+ f_qs_relevance = (
166
+ Feedback(openai.qs_relevance)
167
+ .on_input()
168
+ .on(context)
169
+ .aggregate(np.mean)
170
+ )
171
+
172
+ from trulens_eval import TruLlama
173
+ tru_query_engine_recorder = TruLlama(query_engine,
174
+ app_id='LlamaIndex_App1',
175
+ feedbacks=[f_groundedness, f_qa_relevance, f_qs_relevance])
176
+
177
+ if False:
178
+ # or as context manager
179
+ with tru_query_engine_recorder as recording:
180
+ query_engine.query("Which of the following is TRUE on the similarity of Means Testing and Casemix?")
archive/model_evaluation/utils.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ from trulens_eval import (
4
+ Feedback,
5
+ TruLlama,
6
+ OpenAI
7
+ )
8
+
9
+ from trulens_eval.feedback import Groundedness
10
+ import nest_asyncio
11
+
12
+ from llama_index.core import ServiceContext, VectorStoreIndex, StorageContext
13
+ from llama_index.core import load_index_from_storage
14
+ from llama_index.core.node_parser import HierarchicalNodeParser
15
+ from llama_index.core.node_parser import get_leaf_nodes
16
+
17
+ from llama_index.packs.auto_merging_retriever.base import AutoMergingRetrieverPack
18
+
19
+
20
+ from llama_index.node_parser import SentenceWindowNodeParser
21
+ from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
22
+ from llama_index.indices.postprocessor import SentenceTransformerRerank
23
+ from llama_index.query_engine import RetrieverQueryEngine
24
+
25
+
26
+ nest_asyncio.apply()
27
+ openai = OpenAI()
28
+
29
+ qa_relevance = (
30
+ Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
31
+ .on_input_output()
32
+ )
33
+
34
+ qs_relevance = (
35
+ Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance")
36
+ .on_input()
37
+ .on(TruLlama.select_source_nodes().node.text)
38
+ .aggregate(np.mean)
39
+ )
40
+
41
+ #grounded = Groundedness(groundedness_provider=openai, summarize_provider=openai)
42
+ grounded = Groundedness(groundedness_provider=openai)
43
+
44
+ groundedness = (
45
+ Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness")
46
+ .on(TruLlama.select_source_nodes().node.text)
47
+ .on_output()
48
+ .aggregate(grounded.grounded_statements_aggregator)
49
+ )
50
+
51
+ feedbacks = [qa_relevance, qs_relevance, groundedness]
52
+
53
+ def get_openai_api_key():
54
+ return os.getenv("OPENAI_API_KEY")
55
+
56
+ def get_trulens_recorder(query_engine, feedbacks, app_id):
57
+ tru_recorder = TruLlama(
58
+ query_engine,
59
+ app_id=app_id,
60
+ feedbacks=feedbacks
61
+ )
62
+ return tru_recorder
63
+
64
+ def get_prebuilt_trulens_recorder(query_engine, app_id):
65
+ tru_recorder = TruLlama(
66
+ query_engine,
67
+ app_id=app_id,
68
+ feedbacks=feedbacks
69
+ )
70
+ return tru_recorder
71
+
72
+ def build_sentence_window_index(
73
+ document, llm, embed_model="local:BAAI/bge-small-en-v1.5", save_dir="sentence_index"
74
+ ):
75
+ # create the sentence window node parser w/ default settings
76
+ node_parser = SentenceWindowNodeParser.from_defaults(
77
+ window_size=3,
78
+ window_metadata_key="window",
79
+ original_text_metadata_key="original_text",
80
+ )
81
+ sentence_context = ServiceContext.from_defaults(
82
+ llm=llm,
83
+ embed_model=embed_model,
84
+ node_parser=node_parser,
85
+ )
86
+ if not os.path.exists(save_dir):
87
+ sentence_index = VectorStoreIndex.from_documents(
88
+ [document], service_context=sentence_context
89
+ )
90
+ sentence_index.storage_context.persist(persist_dir=save_dir)
91
+ else:
92
+ sentence_index = load_index_from_storage(
93
+ StorageContext.from_defaults(persist_dir=save_dir),
94
+ service_context=sentence_context,
95
+ )
96
+
97
+ return sentence_index
98
+
99
+ def get_sentence_window_query_engine(
100
+ sentence_index,
101
+ similarity_top_k=6,
102
+ rerank_top_n=2,
103
+ ):
104
+ # define postprocessors
105
+ postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
106
+ rerank = SentenceTransformerRerank(
107
+ top_n=rerank_top_n, model="BAAI/bge-reranker-base"
108
+ )
109
+
110
+ sentence_window_engine = sentence_index.as_query_engine(
111
+ similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
112
+ )
113
+ return sentence_window_engine
114
+
115
+ def build_automerging_index(
116
+ documents,
117
+ llm,
118
+ embed_model="local:BAAI/bge-small-en-v1.5",
119
+ save_dir="merging_index",
120
+ chunk_sizes=None,
121
+ ):
122
+ chunk_sizes = chunk_sizes or [2048, 512, 128]
123
+ node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
124
+ nodes = node_parser.get_nodes_from_documents(documents)
125
+ leaf_nodes = get_leaf_nodes(nodes)
126
+ merging_context = ServiceContext.from_defaults(
127
+ llm=llm,
128
+ embed_model=embed_model,
129
+ )
130
+ storage_context = StorageContext.from_defaults()
131
+ storage_context.docstore.add_documents(nodes)
132
+
133
+ if not os.path.exists(save_dir):
134
+ automerging_index = VectorStoreIndex(
135
+ leaf_nodes, storage_context=storage_context, service_context=merging_context
136
+ )
137
+ automerging_index.storage_context.persist(persist_dir=save_dir)
138
+ else:
139
+ automerging_index = load_index_from_storage(
140
+ StorageContext.from_defaults(persist_dir=save_dir),
141
+ service_context=merging_context,
142
+ )
143
+ return automerging_index
144
+
145
+ def get_automerging_query_engine(
146
+ automerging_index,
147
+ similarity_top_k=12,
148
+ rerank_top_n=2,
149
+ ):
150
+ base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
151
+ retriever = AutoMergingRetriever(
152
+ base_retriever, automerging_index.storage_context, verbose=True
153
+ )
154
+ rerank = SentenceTransformerRerank(
155
+ top_n=rerank_top_n, model="BAAI/bge-reranker-base"
156
+ )
157
+ auto_merging_engine = RetrieverQueryEngine.from_args(
158
+ retriever, node_postprocessors=[rerank]
159
+ )
160
+ return auto_merging_engine
archive/model_evaluation/utils_new.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ from trulens_eval import (
4
+ Feedback,
5
+ TruLlama,
6
+ OpenAI
7
+ )
8
+
9
+ from trulens_eval.feedback import Groundedness
10
+ import nest_asyncio
11
+
12
+ from llama_index.core import ServiceContext, VectorStoreIndex, StorageContext
13
+ from llama_index.core import load_index_from_storage
14
+ from llama_index.core.node_parser import HierarchicalNodeParser
15
+ from llama_index.core.node_parser import get_leaf_nodes
16
+ from llama_index.core.query_engine import RetrieverQueryEngine
17
+
18
+ from llama_index.packs.auto_merging_retriever.base import AutoMergingRetrieverPack
19
+
20
+
21
+ nest_asyncio.apply()
22
+ openai = OpenAI()
23
+
24
+ qa_relevance = (
25
+ Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
26
+ .on_input_output()
27
+ )
28
+
29
+ qs_relevance = (
30
+ Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance")
31
+ .on_input()
32
+ .on(TruLlama.select_source_nodes().node.text)
33
+ .aggregate(np.mean)
34
+ )
35
+
36
+ #grounded = Groundedness(groundedness_provider=openai, summarize_provider=openai)
37
+ grounded = Groundedness(groundedness_provider=openai)
38
+
39
+ groundedness = (
40
+ Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness")
41
+ .on(TruLlama.select_source_nodes().node.text)
42
+ .on_output()
43
+ .aggregate(grounded.grounded_statements_aggregator)
44
+ )
45
+
46
+ feedbacks = [qa_relevance, qs_relevance, groundedness]
47
+
48
+ def get_openai_api_key():
49
+ return os.getenv("OPENAI_API_KEY")
50
+
51
+ def get_trulens_recorder(query_engine, feedbacks, app_id):
52
+ tru_recorder = TruLlama(
53
+ query_engine,
54
+ app_id=app_id,
55
+ feedbacks=feedbacks
56
+ )
57
+ return tru_recorder
58
+
59
+ def get_prebuilt_trulens_recorder(query_engine, app_id):
60
+ tru_recorder = TruLlama(
61
+ query_engine,
62
+ app_id=app_id,
63
+ feedbacks=feedbacks
64
+ )
65
+ return tru_recorder
66
+
67
+ def build_automerging_index(
68
+ documents,
69
+ llm,
70
+ embed_model="local:BAAI/bge-small-en-v1.5",
71
+ save_dir="merging_index",
72
+ chunk_sizes=None,
73
+ ):
74
+ chunk_sizes = chunk_sizes or [2048, 512, 128]
75
+ node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
76
+ nodes = node_parser.get_nodes_from_documents(documents)
77
+ leaf_nodes = get_leaf_nodes(nodes)
78
+ merging_context = ServiceContext.from_defaults(
79
+ llm=llm,
80
+ embed_model=embed_model,
81
+ )
82
+ storage_context = StorageContext.from_defaults()
83
+ storage_context.docstore.add_documents(nodes)
84
+
85
+ if not os.path.exists(save_dir):
86
+ automerging_index = VectorStoreIndex(
87
+ leaf_nodes, storage_context=storage_context, service_context=merging_context
88
+ )
89
+ automerging_index.storage_context.persist(persist_dir=save_dir)
90
+ else:
91
+ automerging_index = load_index_from_storage(
92
+ StorageContext.from_defaults(persist_dir=save_dir),
93
+ service_context=merging_context,
94
+ )
95
+ return automerging_index
database/mock_qna_source.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b604288137e94da640f1e5a88900390084eba746508cd7257dbcdba8cbe67f32
3
- size 2701
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c80d88333c3b9fb2a700d49113d2ba3fef7cc671c11b640168c389bef411bc05
3
+ size 7624
evaluate_model.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, time
2
+ import pandas as pd
3
+ from tqdm import tqdm
4
+
5
+ import chromadb
6
+ import openai
7
+ from llama_index import (
8
+ SimpleDirectoryReader,
9
+ StorageContext,
10
+ Document,
11
+ VectorStoreIndex,
12
+ ServiceContext
13
+ )
14
+
15
+ from llama_index.vector_stores.chroma import ChromaVectorStore
16
+ from llama_index.llms import OpenAI
17
+ from llama_index.embeddings import HuggingFaceEmbedding
18
+ from trulens_eval import Tru
19
+
20
+ import utils
21
+ from utils import get_prebuilt_trulens_recorder
22
+
23
+ openai.api_key = utils.get_openai_api_key()
24
+
25
+ def main():
26
+
27
+ start_time = time.time()
28
+
29
+ llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.0)
30
+ fine_tuned_path = "local:./models/fine-tuned-embeddings"
31
+
32
+ db = chromadb.PersistentClient(path="./models/chroma_db")
33
+ chroma_collection = db.get_or_create_collection("quickstart")
34
+
35
+ # assign chroma as the vector_store to the context
36
+ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
37
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
38
+ service_context = ServiceContext.from_defaults(llm=llm, embed_model=fine_tuned_path)
39
+
40
+ print("Loading embeddings from vector store..")
41
+ index = VectorStoreIndex.from_vector_store(
42
+ vector_store=vector_store,
43
+ storage_context=storage_context,
44
+ service_context=service_context
45
+ )
46
+ query_engine = index.as_query_engine()
47
+
48
+ mock_qna_source = pd.read_csv("./database/mock_qna_source.csv")
49
+ mock_qna_source = mock_qna_source[ mock_qna_source["question"].notnull() ]
50
+ print("mock_qna_source.shape", mock_qna_source.shape)
51
+
52
+ with open("./raw_documents/eval_questions.txt", "r") as fp:
53
+ questions_content = fp.read()
54
+ questions_content_ls = questions_content.split("\n\n")
55
+
56
+ eval_questions = mock_qna_source["question"].tolist() + questions_content_ls
57
+ response = query_engine.query(eval_questions[0])
58
+ print(str(response))
59
+
60
+ tru = Tru(database_file="./models/trulens_eval.sqlite")
61
+ tru_recorder = get_prebuilt_trulens_recorder(query_engine,
62
+ app_id="Direct Query Engine")
63
+
64
+ print("Sending each question to llm..")
65
+ with tru_recorder as recording:
66
+ for question in tqdm(eval_questions):
67
+ response = query_engine.query(question)
68
+
69
+ records, feedback = tru.get_records_and_feedback(app_ids=[])
70
+
71
+ os.makedirs("./results", exist_ok=True)
72
+ records.to_csv("./results/records.csv", index=False)
73
+
74
+ print(tru.db.engine.url.render_as_string(hide_password=False))
75
+
76
+ end_time = time.time()
77
+ time_spent_mins = (end_time - start_time) / 60
78
+ with open("./results/time_cost.txt", "w") as fp:
79
+ fp.write(f"Takes {int(time_spent_mins)} mins to create llm evaluation.")
80
+
81
+ if __name__ == "__main__":
82
+
83
+ main()
models/trulens_eval.sqlite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6849488edfa526805c51322b217557de99ac01882a9d2a136a351a68c6b305d5
3
+ size 2936832
notebooks/002_persisted-embedding-model.ipynb CHANGED
@@ -236,6 +236,7 @@
236
  "from llama_index.core import StorageContext\n",
237
  "from llama_index.core import ServiceContext\n",
238
  "from llama_index.core import Document\n",
 
239
  "\n",
240
  "from llama_index.embeddings.huggingface.base import HuggingFaceEmbedding\n",
241
  "from llama_index.llms.openai import OpenAI\n",
 
236
  "from llama_index.core import StorageContext\n",
237
  "from llama_index.core import ServiceContext\n",
238
  "from llama_index.core import Document\n",
239
+ "from llama_index.core import Settings\n",
240
  "\n",
241
  "from llama_index.embeddings.huggingface.base import HuggingFaceEmbedding\n",
242
  "from llama_index.llms.openai import OpenAI\n",
pages/1_Leaderboard.py CHANGED
@@ -31,7 +31,7 @@ database_url = None
31
 
32
 
33
  def streamlit_app():
34
- tru = Tru(database_url=database_url)
35
  lms = tru.db
36
 
37
  # Set the title and subtitle of the app
 
31
 
32
 
33
  def streamlit_app():
34
+ tru = Tru(database_file="./models/trulens_eval.sqlite")
35
  lms = tru.db
36
 
37
  # Set the title and subtitle of the app
pages/2_Evaluations.py CHANGED
@@ -48,7 +48,7 @@ st.runtime.legacy_caching.clear_cache()
48
 
49
  add_logo_and_style_overrides()
50
 
51
- tru = Tru()
52
  lms = tru.db
53
 
54
  df_results, feedback_cols = lms.get_records_and_feedback([])
@@ -143,7 +143,7 @@ else:
143
  else:
144
  app = apps
145
 
146
- st.experimental_set_query_params(app=app)
147
 
148
  options = st.multiselect("Filter Applications", apps, default=app)
149
 
 
48
 
49
  add_logo_and_style_overrides()
50
 
51
+ tru = Tru(database_file="./models/trulens_eval.sqlite")
52
  lms = tru.db
53
 
54
  df_results, feedback_cols = lms.get_records_and_feedback([])
 
143
  else:
144
  app = apps
145
 
146
+ st.query_params["app"] = app
147
 
148
  options = st.multiselect("Filter Applications", apps, default=app)
149
 
pages/3_app.py CHANGED
@@ -4,13 +4,11 @@ import os
4
  try:
5
  raw_docs_files = ", ".join(os.listdir("./raw_documents"))
6
  curr_directory_files = ", ".join(os.listdir("."))
 
 
7
 
8
- with open("./results/time_cost.txt", "r") as fp:
9
- time_cost_str = fp.read()
10
-
11
- system_update = raw_docs_files + "\n\n" + curr_directory_files + "\n\n" + time_cost_str
12
-
13
  except:
14
  system_update = "NA"
15
 
16
- st.write(f"Hello World! File list: {system_update}")
 
4
  try:
5
  raw_docs_files = ", ".join(os.listdir("./raw_documents"))
6
  curr_directory_files = ", ".join(os.listdir("."))
7
+ with open("./raw_documents/eval_answers.txt", "r") as fp:
8
+ eval_answers = fp.read()
9
 
10
+ system_update = raw_docs_files + "\n\n" + curr_directory_files + "\n\n" + eval_answers
 
 
 
 
11
  except:
12
  system_update = "NA"
13
 
14
+ st.write(f"Hello World! Info about the app: {system_update}")
qna_prompting.py CHANGED
@@ -22,7 +22,11 @@ class QnA_Model(BaseModel):
22
  description=(
23
  "which chapter to extract, the format of this function argumet"
24
  "is with `Chapter_` as prefix concatenated with chapter number"
25
- "in integer. For example, `Chapter_2`, `Chapter_10`.")
 
 
 
 
26
  )
27
 
28
  def get_qna_question(chapter_n: str) -> str:
@@ -37,11 +41,12 @@ def get_qna_question(chapter_n: str) -> str:
37
  """
38
  con = sqlite3.connect(db_path)
39
  cur = con.cursor()
40
-
41
- sql_string = f"""SELECT id, question, option_1, option_2, option_3, option_4, correct_answer
42
- FROM qna_tbl
43
- WHERE chapter='{chapter_n}'
44
- """
 
45
  res = cur.execute(sql_string)
46
  result = res.fetchone()
47
 
 
22
  description=(
23
  "which chapter to extract, the format of this function argumet"
24
  "is with `Chapter_` as prefix concatenated with chapter number"
25
+ "in integer. For example, `Chapter_2`, `Chapter_10`."
26
+ "if no chapter number specified or user requested for random question"
27
+ "or user has no preference over which chapter of textbook to be tested"
28
+ "return `Chapter_0`"
29
+ )
30
  )
31
 
32
  def get_qna_question(chapter_n: str) -> str:
 
41
  """
42
  con = sqlite3.connect(db_path)
43
  cur = con.cursor()
44
+
45
+ filter_clause = "" if chapter_n == "Chapter_0" else f"WHERE chapter='{chapter_n}'"
46
+ sql_string = """SELECT id, question, option_1, option_2, option_3, option_4, correct_answer
47
+ FROM qna_tbl
48
+ """ + filter_clause
49
+
50
  res = cur.execute(sql_string)
51
  result = res.fetchone()
52
 
raw_documents/eval_answers.txt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d6f42a68ccf96496a6dcd89016e53ebb1add84c42ecef1fffe08e211037c4df
3
- size 332
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ac533f41fb123fe9281d27f2a3166e997f09c37178d12f5cbbea1fedeb5026b
3
+ size 1458
raw_documents/eval_questions.txt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:931b5c74d5696e5efb242c7d968765734a621d881642a1b16dbd1d004fd2900e
3
- size 1473
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bae3f2ac0cf2fdb2f58de8ecaa8d63014a4f84aa8a839dc7ff0d4ae8eb0eb22
3
+ size 1126
raw_documents/qna.txt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96f148c23c11fe6df506f5286d2c90143b274ce2705501deaeac47fa63863825
3
- size 2134
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59cc1b620ccad1393fc4311e91e538774ac76149a42bd2391af7c855895d80bc
3
+ size 56746
requirements.txt CHANGED
@@ -99,8 +99,8 @@ kiwisolver==1.4.5
99
  kubernetes==29.0.0
100
  langchain==0.0.354
101
  langchain-community==0.0.8
102
- langchain-core==0.1.5
103
- langsmith==0.0.77
104
  llama-index==0.10.1
105
  llama-index-agent-openai==0.1.1
106
  llama-index-core==0.10.1
@@ -109,6 +109,7 @@ llama-index-embeddings-openai==0.1.1
109
  llama-index-legacy==0.9.48
110
  llama-index-llms-openai==0.1.1
111
  llama-index-multi-modal-llms-openai==0.1.1
 
112
  llama-index-program-openai==0.1.1
113
  llama-index-question-gen-openai==0.1.1
114
  llama-index-readers-file==0.1.2
@@ -218,7 +219,7 @@ SQLAlchemy==2.0.24
218
  st-annotated-text==4.0.1
219
  stack-data==0.6.3
220
  starlette==0.35.1
221
- streamlit==1.29.0
222
  streamlit-aggrid==0.3.4.post3
223
  streamlit-camera-input-live==0.2.0
224
  streamlit-card==1.0.0
@@ -246,7 +247,7 @@ tqdm==4.66.1
246
  traitlets==5.14.0
247
  transformers==4.37.2
248
  trulens==0.13.4
249
- trulens-eval==0.20.0
250
  typer==0.9.0
251
  types-python-dateutil==2.8.19.14
252
  typing-inspect==0.9.0
 
99
  kubernetes==29.0.0
100
  langchain==0.0.354
101
  langchain-community==0.0.8
102
+ langchain-core==0.1.23
103
+ langsmith==0.0.87
104
  llama-index==0.10.1
105
  llama-index-agent-openai==0.1.1
106
  llama-index-core==0.10.1
 
109
  llama-index-legacy==0.9.48
110
  llama-index-llms-openai==0.1.1
111
  llama-index-multi-modal-llms-openai==0.1.1
112
+ llama-index-packs-auto-merging-retriever==0.1.2
113
  llama-index-program-openai==0.1.1
114
  llama-index-question-gen-openai==0.1.1
115
  llama-index-readers-file==0.1.2
 
219
  st-annotated-text==4.0.1
220
  stack-data==0.6.3
221
  starlette==0.35.1
222
+ streamlit==1.31.1
223
  streamlit-aggrid==0.3.4.post3
224
  streamlit-camera-input-live==0.2.0
225
  streamlit-card==1.0.0
 
247
  traitlets==5.14.0
248
  transformers==4.37.2
249
  trulens==0.13.4
250
+ trulens-eval==0.22.2
251
  typer==0.9.0
252
  types-python-dateutil==2.8.19.14
253
  typing-inspect==0.9.0
streamlit_app.py CHANGED
@@ -71,7 +71,7 @@ with st.sidebar:
71
 
72
  st.subheader("Models and parameters")
73
  selected_model = st.sidebar.selectbox("Choose an OpenAI model",
74
- ["gpt-3.5-turbo-1106", "gpt-4-1106-preview"],
75
  key="selected_model")
76
  temperature = st.sidebar.slider("temperature", min_value=0.0, max_value=2.0,
77
  value=0.0, step=0.01)
 
71
 
72
  st.subheader("Models and parameters")
73
  selected_model = st.sidebar.selectbox("Choose an OpenAI model",
74
+ ["gpt-3.5-turbo-0125", "gpt-4-0125-preview"],
75
  key="selected_model")
76
  temperature = st.sidebar.slider("temperature", min_value=0.0, max_value=2.0,
77
  value=0.0, step=0.01)
utils.py CHANGED
@@ -5,27 +5,18 @@ from trulens_eval import (
5
  TruLlama,
6
  OpenAI
7
  )
8
-
9
  from trulens_eval.feedback import Groundedness
10
- import nest_asyncio
11
 
12
  from llama_index import ServiceContext, VectorStoreIndex, StorageContext
13
- from llama_index.node_parser import SentenceWindowNodeParser
14
- from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
15
- from llama_index.indices.postprocessor import SentenceTransformerRerank
16
  from llama_index import load_index_from_storage
17
-
18
  from llama_index.node_parser import HierarchicalNodeParser
19
  from llama_index.node_parser import get_leaf_nodes
20
  from llama_index import StorageContext
21
- from llama_index.retrievers import AutoMergingRetriever
22
- from llama_index.indices.postprocessor import SentenceTransformerRerank
23
- from llama_index.query_engine import RetrieverQueryEngine
24
-
25
 
 
26
  nest_asyncio.apply()
27
- openai = OpenAI()
28
 
 
29
  qa_relevance = (
30
  Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
31
  .on_input_output()
@@ -69,49 +60,6 @@ def get_prebuilt_trulens_recorder(query_engine, app_id):
69
  )
70
  return tru_recorder
71
 
72
- def build_sentence_window_index(
73
- document, llm, embed_model="local:BAAI/bge-small-en-v1.5", save_dir="sentence_index"
74
- ):
75
- # create the sentence window node parser w/ default settings
76
- node_parser = SentenceWindowNodeParser.from_defaults(
77
- window_size=3,
78
- window_metadata_key="window",
79
- original_text_metadata_key="original_text",
80
- )
81
- sentence_context = ServiceContext.from_defaults(
82
- llm=llm,
83
- embed_model=embed_model,
84
- node_parser=node_parser,
85
- )
86
- if not os.path.exists(save_dir):
87
- sentence_index = VectorStoreIndex.from_documents(
88
- [document], service_context=sentence_context
89
- )
90
- sentence_index.storage_context.persist(persist_dir=save_dir)
91
- else:
92
- sentence_index = load_index_from_storage(
93
- StorageContext.from_defaults(persist_dir=save_dir),
94
- service_context=sentence_context,
95
- )
96
-
97
- return sentence_index
98
-
99
- def get_sentence_window_query_engine(
100
- sentence_index,
101
- similarity_top_k=6,
102
- rerank_top_n=2,
103
- ):
104
- # define postprocessors
105
- postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
106
- rerank = SentenceTransformerRerank(
107
- top_n=rerank_top_n, model="BAAI/bge-reranker-base"
108
- )
109
-
110
- sentence_window_engine = sentence_index.as_query_engine(
111
- similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
112
- )
113
- return sentence_window_engine
114
-
115
  def build_automerging_index(
116
  documents,
117
  llm,
@@ -140,21 +88,4 @@ def build_automerging_index(
140
  StorageContext.from_defaults(persist_dir=save_dir),
141
  service_context=merging_context,
142
  )
143
- return automerging_index
144
-
145
- def get_automerging_query_engine(
146
- automerging_index,
147
- similarity_top_k=12,
148
- rerank_top_n=2,
149
- ):
150
- base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
151
- retriever = AutoMergingRetriever(
152
- base_retriever, automerging_index.storage_context, verbose=True
153
- )
154
- rerank = SentenceTransformerRerank(
155
- top_n=rerank_top_n, model="BAAI/bge-reranker-base"
156
- )
157
- auto_merging_engine = RetrieverQueryEngine.from_args(
158
- retriever, node_postprocessors=[rerank]
159
- )
160
- return auto_merging_engine
 
5
  TruLlama,
6
  OpenAI
7
  )
 
8
  from trulens_eval.feedback import Groundedness
 
9
 
10
  from llama_index import ServiceContext, VectorStoreIndex, StorageContext
 
 
 
11
  from llama_index import load_index_from_storage
 
12
  from llama_index.node_parser import HierarchicalNodeParser
13
  from llama_index.node_parser import get_leaf_nodes
14
  from llama_index import StorageContext
 
 
 
 
15
 
16
+ import nest_asyncio
17
  nest_asyncio.apply()
 
18
 
19
+ openai = OpenAI()
20
  qa_relevance = (
21
  Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
22
  .on_input_output()
 
60
  )
61
  return tru_recorder
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def build_automerging_index(
64
  documents,
65
  llm,
 
88
  StorageContext.from_defaults(persist_dir=save_dir),
89
  service_context=merging_context,
90
  )
91
+ return automerging_index