lingyit1108 commited on
Commit
b2b3b83
1 Parent(s): 187a37b

added trulens implementation for evaluation

Browse files
.gitignore CHANGED
@@ -1,3 +1,8 @@
1
  .DS_Store
2
 
3
- .streamlit/
 
 
 
 
 
 
1
  .DS_Store
2
 
3
+ .streamlit/
4
+ results/
5
+
6
+ *.sqlite
7
+ ux/
8
+ pages/
main.py CHANGED
@@ -9,32 +9,62 @@ from llama_index import ServiceContext
9
  from llama_index.llms import OpenAI
10
 
11
  from llama_index.embeddings import HuggingFaceEmbedding
 
12
 
 
13
 
14
  openai.api_key = utils.get_openai_api_key()
15
 
16
- if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- documents = SimpleDirectoryReader(
19
- input_files=["./raw_documents/HI_knowledge_base.pdf"]
20
- ).load_data()
 
21
 
22
- document = Document(text="\n\n".join([doc.text for doc in documents]))
 
 
 
23
 
24
- ### gpt-4-1106-preview
25
- ### gpt-3.5-turbo-1106 / gpt-3.5-turbo
26
- llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.1)
27
- embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
28
 
29
- service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
30
- index = VectorStoreIndex.from_documents([document], service_context=service_context)
31
 
32
- query_engine = index.as_query_engine()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- response = query_engine.query(
35
- ("Intermediate and Long Term Care (ILTC) services are for those who need further care and"
36
- "treatment after discharge from the hospital, who may need assistance with their activities of"
37
- "daily living. This can be through"
38
- )
39
- )
40
- print(str(response))
 
9
  from llama_index.llms import OpenAI
10
 
11
  from llama_index.embeddings import HuggingFaceEmbedding
12
+ from trulens_eval import Tru
13
 
14
+ from utils import get_prebuilt_trulens_recorder
15
 
16
  openai.api_key = utils.get_openai_api_key()
17
 
18
+ def main():
19
+
20
+ if not os.path.exists("./default.sqlite"):
21
+
22
+ documents = SimpleDirectoryReader(
23
+ input_files=["./raw_documents/HI_knowledge_base.pdf"]
24
+ ).load_data()
25
+
26
+ document = Document(text="\n\n".join([doc.text for doc in documents]))
27
+
28
+ ### gpt-4-1106-preview
29
+ ### gpt-3.5-turbo-1106 / gpt-3.5-turbo
30
+ llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.1)
31
+ embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
32
+
33
+ service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
34
+ index = VectorStoreIndex.from_documents([document], service_context=service_context)
35
+
36
+ query_engine = index.as_query_engine()
37
 
38
+ separator = "\n\n"
39
+ eval_questions = []
40
+ with open('raw_documents/eval_questions.txt', 'r') as file:
41
+ content = file.read()
42
 
43
+ for question in content.split(separator):
44
+ print(question)
45
+ print(separator)
46
+ eval_questions.append(question.strip())
47
 
48
+ response = query_engine.query(eval_questions[0])
49
+ print(str(response))
 
 
50
 
51
+ tru = Tru()
52
+ # tru.reset_database()
53
 
54
+ tru_recorder = get_prebuilt_trulens_recorder(query_engine,
55
+ app_id="Direct Query Engine")
56
+ with tru_recorder as recording:
57
+ for question in eval_questions:
58
+ response = query_engine.query(question)
59
+
60
+ records, feedback = tru.get_records_and_feedback(app_ids=[])
61
+
62
+ os.makedirs("results", exist_ok=True)
63
+ records.to_csv("results/records.csv", index=False)
64
+
65
+ print(tru.db.engine.url.render_as_string(hide_password=False))
66
+ # tru.run_dashboard()
67
+
68
+ if __name__ == "__main__":
69
 
70
+ main()
 
 
 
 
 
 
raw_documents/eval_answers.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d6f42a68ccf96496a6dcd89016e53ebb1add84c42ecef1fffe08e211037c4df
3
+ size 332
raw_documents/eval_questions.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:931b5c74d5696e5efb242c7d968765734a621d881642a1b16dbd1d004fd2900e
3
+ size 1473
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  openai==1.6.1
2
- streamlit==1.29.0
 
 
 
1
  openai==1.6.1
2
+ streamlit==1.29.0
3
+ trulens==0.13.4
4
+ trulens-eval==0.20.0
streamlit_app.py CHANGED
@@ -1,13 +1,44 @@
1
  import streamlit as st
2
  import os
 
3
 
4
  import openai
5
  from openai import OpenAI
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  # App title
8
  st.set_page_config(page_title="💬 Open AI Chatbot")
9
  openai_api = os.getenv("OPENAI_API_KEY")
10
 
 
 
 
 
 
 
 
11
  # Replicate Credentials
12
  with st.sidebar:
13
  st.title("💬 Open AI Chatbot")
@@ -33,6 +64,20 @@ with st.sidebar:
33
  key="selected_model")
34
  temperature = st.sidebar.slider("temperature", min_value=0.01, max_value=2.0,
35
  value=0.1, step=0.01)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  st.markdown("📖 Reach out to SakiMilo to learn how to create this app!")
37
 
38
  # Store LLM generated responses
 
1
  import streamlit as st
2
  import os
3
+ import pandas as pd
4
 
5
  import openai
6
  from openai import OpenAI
7
 
8
+ import pkg_resources
9
+ import shutil
10
+ import main
11
+
12
+ ### To trigger trulens evaluation
13
+ main.main()
14
+
15
+ ### Finally, start streamlit app
16
+ leaderboard_path = pkg_resources.resource_filename(
17
+ "trulens_eval", "Leaderboard.py"
18
+ )
19
+ evaluation_path = pkg_resources.resource_filename(
20
+ "trulens_eval", "pages/Evaluations.py"
21
+ )
22
+ ux_path = pkg_resources.resource_filename(
23
+ "trulens_eval", "ux"
24
+ )
25
+ shutil.copyfile(leaderboard_path, os.path.join("pages", "1_Leaderboard.py"))
26
+ shutil.copyfile(evaluation_path, os.path.join("pages", "2_Evaluations.py"))
27
+ if os.path.exists("./ux"):
28
+ shutil.rmtree("./ux")
29
+ shutil.copytree(ux_path, "./ux")
30
+
31
  # App title
32
  st.set_page_config(page_title="💬 Open AI Chatbot")
33
  openai_api = os.getenv("OPENAI_API_KEY")
34
 
35
+ data_df = pd.DataFrame(
36
+ {
37
+ "Completion": [30, 40, 100, 10],
38
+ }
39
+ )
40
+ data_df.index = ["Chapter 1", "Chapter 2", "Chapter 3", "Chapter 4"]
41
+
42
  # Replicate Credentials
43
  with st.sidebar:
44
  st.title("💬 Open AI Chatbot")
 
64
  key="selected_model")
65
  temperature = st.sidebar.slider("temperature", min_value=0.01, max_value=2.0,
66
  value=0.1, step=0.01)
67
+ st.data_editor(
68
+ data_df,
69
+ column_config={
70
+ "Completion": st.column_config.ProgressColumn(
71
+ "Completion %",
72
+ help="Percentage of content covered",
73
+ format="%.1f%%",
74
+ min_value=0,
75
+ max_value=100,
76
+ ),
77
+ },
78
+ hide_index=False,
79
+ )
80
+
81
  st.markdown("📖 Reach out to SakiMilo to learn how to create this app!")
82
 
83
  # Store LLM generated responses
utils.py CHANGED
@@ -1,4 +1,160 @@
1
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  def get_openai_api_key():
4
- return os.getenv("OPENAI_API_KEY")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import numpy as np
3
+ from trulens_eval import (
4
+ Feedback,
5
+ TruLlama,
6
+ OpenAI
7
+ )
8
+
9
+ from trulens_eval.feedback import Groundedness
10
+ import nest_asyncio
11
+
12
+ from llama_index import ServiceContext, VectorStoreIndex, StorageContext
13
+ from llama_index.node_parser import SentenceWindowNodeParser
14
+ from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
15
+ from llama_index.indices.postprocessor import SentenceTransformerRerank
16
+ from llama_index import load_index_from_storage
17
+
18
+ from llama_index.node_parser import HierarchicalNodeParser
19
+ from llama_index.node_parser import get_leaf_nodes
20
+ from llama_index import StorageContext
21
+ from llama_index.retrievers import AutoMergingRetriever
22
+ from llama_index.indices.postprocessor import SentenceTransformerRerank
23
+ from llama_index.query_engine import RetrieverQueryEngine
24
+
25
+
26
+ nest_asyncio.apply()
27
+ openai = OpenAI()
28
+
29
+ qa_relevance = (
30
+ Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
31
+ .on_input_output()
32
+ )
33
+
34
+ qs_relevance = (
35
+ Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance")
36
+ .on_input()
37
+ .on(TruLlama.select_source_nodes().node.text)
38
+ .aggregate(np.mean)
39
+ )
40
+
41
+ #grounded = Groundedness(groundedness_provider=openai, summarize_provider=openai)
42
+ grounded = Groundedness(groundedness_provider=openai)
43
+
44
+ groundedness = (
45
+ Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness")
46
+ .on(TruLlama.select_source_nodes().node.text)
47
+ .on_output()
48
+ .aggregate(grounded.grounded_statements_aggregator)
49
+ )
50
+
51
+ feedbacks = [qa_relevance, qs_relevance, groundedness]
52
 
53
  def get_openai_api_key():
54
+ return os.getenv("OPENAI_API_KEY")
55
+
56
+ def get_trulens_recorder(query_engine, feedbacks, app_id):
57
+ tru_recorder = TruLlama(
58
+ query_engine,
59
+ app_id=app_id,
60
+ feedbacks=feedbacks
61
+ )
62
+ return tru_recorder
63
+
64
+ def get_prebuilt_trulens_recorder(query_engine, app_id):
65
+ tru_recorder = TruLlama(
66
+ query_engine,
67
+ app_id=app_id,
68
+ feedbacks=feedbacks
69
+ )
70
+ return tru_recorder
71
+
72
+ def build_sentence_window_index(
73
+ document, llm, embed_model="local:BAAI/bge-small-en-v1.5", save_dir="sentence_index"
74
+ ):
75
+ # create the sentence window node parser w/ default settings
76
+ node_parser = SentenceWindowNodeParser.from_defaults(
77
+ window_size=3,
78
+ window_metadata_key="window",
79
+ original_text_metadata_key="original_text",
80
+ )
81
+ sentence_context = ServiceContext.from_defaults(
82
+ llm=llm,
83
+ embed_model=embed_model,
84
+ node_parser=node_parser,
85
+ )
86
+ if not os.path.exists(save_dir):
87
+ sentence_index = VectorStoreIndex.from_documents(
88
+ [document], service_context=sentence_context
89
+ )
90
+ sentence_index.storage_context.persist(persist_dir=save_dir)
91
+ else:
92
+ sentence_index = load_index_from_storage(
93
+ StorageContext.from_defaults(persist_dir=save_dir),
94
+ service_context=sentence_context,
95
+ )
96
+
97
+ return sentence_index
98
+
99
+ def get_sentence_window_query_engine(
100
+ sentence_index,
101
+ similarity_top_k=6,
102
+ rerank_top_n=2,
103
+ ):
104
+ # define postprocessors
105
+ postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
106
+ rerank = SentenceTransformerRerank(
107
+ top_n=rerank_top_n, model="BAAI/bge-reranker-base"
108
+ )
109
+
110
+ sentence_window_engine = sentence_index.as_query_engine(
111
+ similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
112
+ )
113
+ return sentence_window_engine
114
+
115
+ def build_automerging_index(
116
+ documents,
117
+ llm,
118
+ embed_model="local:BAAI/bge-small-en-v1.5",
119
+ save_dir="merging_index",
120
+ chunk_sizes=None,
121
+ ):
122
+ chunk_sizes = chunk_sizes or [2048, 512, 128]
123
+ node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
124
+ nodes = node_parser.get_nodes_from_documents(documents)
125
+ leaf_nodes = get_leaf_nodes(nodes)
126
+ merging_context = ServiceContext.from_defaults(
127
+ llm=llm,
128
+ embed_model=embed_model,
129
+ )
130
+ storage_context = StorageContext.from_defaults()
131
+ storage_context.docstore.add_documents(nodes)
132
+
133
+ if not os.path.exists(save_dir):
134
+ automerging_index = VectorStoreIndex(
135
+ leaf_nodes, storage_context=storage_context, service_context=merging_context
136
+ )
137
+ automerging_index.storage_context.persist(persist_dir=save_dir)
138
+ else:
139
+ automerging_index = load_index_from_storage(
140
+ StorageContext.from_defaults(persist_dir=save_dir),
141
+ service_context=merging_context,
142
+ )
143
+ return automerging_index
144
+
145
+ def get_automerging_query_engine(
146
+ automerging_index,
147
+ similarity_top_k=12,
148
+ rerank_top_n=2,
149
+ ):
150
+ base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
151
+ retriever = AutoMergingRetriever(
152
+ base_retriever, automerging_index.storage_context, verbose=True
153
+ )
154
+ rerank = SentenceTransformerRerank(
155
+ top_n=rerank_top_n, model="BAAI/bge-reranker-base"
156
+ )
157
+ auto_merging_engine = RetrieverQueryEngine.from_args(
158
+ retriever, node_postprocessors=[rerank]
159
+ )
160
+ return auto_merging_engine