Spaces:
Running
Running
lingyit1108
commited on
Commit
β’
8c107a7
1
Parent(s):
22585fc
swap to new embedding model and handle user 'i dont know' scenario
Browse files- config/model_config_advanced.yml +1 -1
- database/mock_qna.sqlite +1 -1
- models/chroma_db_advanced/a88943fe-4428-425d-8b9c-7bb8665a0c79/link_lists.bin +0 -0
- models/chroma_db_advanced/af9795b7-8b5f-4493-adbc-40aedf3c96ed/data_level0.bin +3 -0
- models/chroma_db_advanced/{a88943fe-4428-425d-8b9c-7bb8665a0c79 β af9795b7-8b5f-4493-adbc-40aedf3c96ed}/header.bin +1 -1
- models/chroma_db_advanced/{a88943fe-4428-425d-8b9c-7bb8665a0c79/data_level0.bin β af9795b7-8b5f-4493-adbc-40aedf3c96ed/index_metadata.pickle} +2 -2
- models/chroma_db_advanced/{a88943fe-4428-425d-8b9c-7bb8665a0c79 β af9795b7-8b5f-4493-adbc-40aedf3c96ed}/length.bin +2 -2
- models/chroma_db_advanced/af9795b7-8b5f-4493-adbc-40aedf3c96ed/link_lists.bin +3 -0
- models/chroma_db_advanced/chroma.sqlite3 +2 -2
- notebooks/002_persisted-embedding-model-advanced.ipynb +228 -69
- preprocess_raw_documents.py +16 -0
- qna_prompting.py +33 -13
- streamlit_app.py +1 -1
config/model_config_advanced.yml
CHANGED
@@ -14,4 +14,4 @@ vector_store:
|
|
14 |
persisted_path: './models/chroma_db_advanced'
|
15 |
|
16 |
questionaire_data:
|
17 |
-
db_path: './database/
|
|
|
14 |
persisted_path: './models/chroma_db_advanced'
|
15 |
|
16 |
questionaire_data:
|
17 |
+
db_path: './database/mock_qna.sqlite'
|
database/mock_qna.sqlite
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 40960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d51005d26f568ee304005ab7cf52cdc58a55f528230ae914a11dc9b75219623
|
3 |
size 40960
|
models/chroma_db_advanced/a88943fe-4428-425d-8b9c-7bb8665a0c79/link_lists.bin
DELETED
File without changes
|
models/chroma_db_advanced/af9795b7-8b5f-4493-adbc-40aedf3c96ed/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:453d35bee81975816ce0a286e796c4884c609c148e52d0605ac221daa46bf3d7
|
3 |
+
size 10056000
|
models/chroma_db_advanced/{a88943fe-4428-425d-8b9c-7bb8665a0c79 β af9795b7-8b5f-4493-adbc-40aedf3c96ed}/header.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89bd0cf182f20a10a0d7faa81bf3304c0565bc9b6f4705056ae63c061b9269ff
|
3 |
size 100
|
models/chroma_db_advanced/{a88943fe-4428-425d-8b9c-7bb8665a0c79/data_level0.bin β af9795b7-8b5f-4493-adbc-40aedf3c96ed/index_metadata.pickle}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5ecccac152d2deee938b41b1533b454bb8d5778a0befcd855529538a1a17bdf
|
3 |
+
size 346049
|
models/chroma_db_advanced/{a88943fe-4428-425d-8b9c-7bb8665a0c79 β af9795b7-8b5f-4493-adbc-40aedf3c96ed}/length.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae1fb78e4b679db0ad051360ddb549f4584c14a8b45f99d8d052f7d67067acb3
|
3 |
+
size 24000
|
models/chroma_db_advanced/af9795b7-8b5f-4493-adbc-40aedf3c96ed/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:535d672bfbbeec1181b50015d78bc1e776088cbbb0738d04bc725a76249eb744
|
3 |
+
size 52152
|
models/chroma_db_advanced/chroma.sqlite3
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74c0d3543bf7cab83459feda7fad58a984a7c018fc566f79e937038b3756fcca
|
3 |
+
size 101720064
|
notebooks/002_persisted-embedding-model-advanced.ipynb
CHANGED
@@ -10,11 +10,16 @@
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "code",
|
13 |
-
"execution_count":
|
14 |
"id": "7de9c591-5a77-4bbe-80f1-4897e15f0b97",
|
15 |
"metadata": {},
|
16 |
"outputs": [],
|
17 |
"source": [
|
|
|
|
|
|
|
|
|
|
|
18 |
"import chromadb\n",
|
19 |
"from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
|
20 |
"from llama_index.vector_stores.chroma.base import ChromaVectorStore\n",
|
@@ -31,27 +36,78 @@
|
|
31 |
"import time"
|
32 |
]
|
33 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
{
|
35 |
"cell_type": "code",
|
36 |
"execution_count": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
"id": "3e65dff6-77b6-4be8-8857-5cecf3a035bb",
|
38 |
"metadata": {},
|
39 |
"outputs": [],
|
40 |
"source": [
|
41 |
"# load some documents\n",
|
42 |
"documents = SimpleDirectoryReader(input_files=[\n",
|
43 |
-
" \"../raw_documents/qna.txt\",\n",
|
44 |
" \"../raw_documents/HI Chapter Summary Version 1.3.pdf\",\n",
|
45 |
" \"../raw_documents/conversation_examples.txt\",\n",
|
46 |
" \"../raw_documents/HI_Knowledge_Base.pdf\",\n",
|
47 |
-
"
|
48 |
-
" ]).load_data()\n",
|
49 |
"document = Document(text=\"\\n\\n\".join([doc.text for doc in documents]))"
|
50 |
]
|
51 |
},
|
52 |
{
|
53 |
"cell_type": "code",
|
54 |
-
"execution_count":
|
55 |
"id": "bd86b3f5-1dfc-4257-bd9c-86d34f02398d",
|
56 |
"metadata": {},
|
57 |
"outputs": [],
|
@@ -62,7 +118,7 @@
|
|
62 |
},
|
63 |
{
|
64 |
"cell_type": "code",
|
65 |
-
"execution_count":
|
66 |
"id": "f568ce7b-bcbf-455c-acf1-6c2cae129fed",
|
67 |
"metadata": {},
|
68 |
"outputs": [],
|
@@ -73,7 +129,7 @@
|
|
73 |
},
|
74 |
{
|
75 |
"cell_type": "code",
|
76 |
-
"execution_count":
|
77 |
"id": "ed0b018e-1982-46b2-b1b4-04f5c0ce8672",
|
78 |
"metadata": {},
|
79 |
"outputs": [],
|
@@ -92,19 +148,28 @@
|
|
92 |
},
|
93 |
{
|
94 |
"cell_type": "code",
|
95 |
-
"execution_count":
|
96 |
"id": "0946b6ce-96ab-44de-ad75-e424a8429f67",
|
97 |
"metadata": {},
|
98 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
"source": [
|
100 |
"Settings.llm = None\n",
|
101 |
"Settings.chunk_size = 1024\n",
|
|
|
102 |
"Settings.embed_model = \"local:../models/fine-tuned-embeddings-advanced\""
|
103 |
]
|
104 |
},
|
105 |
{
|
106 |
"cell_type": "code",
|
107 |
-
"execution_count":
|
108 |
"id": "b8c73a2c-1129-406a-8046-085afcaf9cbb",
|
109 |
"metadata": {},
|
110 |
"outputs": [],
|
@@ -114,10 +179,21 @@
|
|
114 |
},
|
115 |
{
|
116 |
"cell_type": "code",
|
117 |
-
"execution_count":
|
118 |
"id": "75f1c76f-d3e5-4b69-818c-98865adb1457",
|
119 |
"metadata": {},
|
120 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
"source": [
|
122 |
"len(nodes)"
|
123 |
]
|
@@ -132,7 +208,7 @@
|
|
132 |
},
|
133 |
{
|
134 |
"cell_type": "code",
|
135 |
-
"execution_count":
|
136 |
"id": "dab4c6f3-ef67-4d90-b3d5-e290c5d1b6f4",
|
137 |
"metadata": {},
|
138 |
"outputs": [],
|
@@ -142,7 +218,7 @@
|
|
142 |
},
|
143 |
{
|
144 |
"cell_type": "code",
|
145 |
-
"execution_count":
|
146 |
"id": "6a764113-ad7e-4674-aa57-ebbf405902a8",
|
147 |
"metadata": {},
|
148 |
"outputs": [],
|
@@ -160,7 +236,7 @@
|
|
160 |
},
|
161 |
{
|
162 |
"cell_type": "code",
|
163 |
-
"execution_count":
|
164 |
"id": "e492ed4a-23a3-47d6-8b50-51fb48b3aa05",
|
165 |
"metadata": {},
|
166 |
"outputs": [],
|
@@ -170,7 +246,7 @@
|
|
170 |
},
|
171 |
{
|
172 |
"cell_type": "code",
|
173 |
-
"execution_count":
|
174 |
"id": "cbd11b89-9b83-4f08-bb30-160f750f2ffb",
|
175 |
"metadata": {},
|
176 |
"outputs": [],
|
@@ -180,39 +256,88 @@
|
|
180 |
},
|
181 |
{
|
182 |
"cell_type": "code",
|
183 |
-
"execution_count":
|
184 |
-
"id": "
|
185 |
"metadata": {},
|
186 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
"source": [
|
188 |
-
"
|
|
|
|
|
189 |
]
|
190 |
},
|
191 |
{
|
192 |
"cell_type": "code",
|
193 |
-
"execution_count":
|
194 |
-
"id": "
|
195 |
"metadata": {},
|
196 |
"outputs": [],
|
197 |
"source": [
|
198 |
-
"
|
199 |
-
"indexing_cost = indexing_cost / 60\n",
|
200 |
-
"print(f\"Indexing time: {indexing_cost:.1f} mins\")"
|
201 |
]
|
202 |
},
|
203 |
{
|
204 |
"cell_type": "code",
|
205 |
-
"execution_count":
|
206 |
"id": "3290e870-41d7-49c4-9c4f-cb16bd1f469e",
|
207 |
"metadata": {
|
208 |
"scrolled": true
|
209 |
},
|
210 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
"source": [
|
212 |
"response = vector_query_engine.query(\"Healthcare System in Singapore consists of?\")\n",
|
213 |
"response"
|
214 |
]
|
215 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
{
|
217 |
"cell_type": "code",
|
218 |
"execution_count": null,
|
@@ -239,7 +364,7 @@
|
|
239 |
},
|
240 |
{
|
241 |
"cell_type": "code",
|
242 |
-
"execution_count":
|
243 |
"id": "c1a42c35-5f57-423c-8fb7-7d18b3b466b5",
|
244 |
"metadata": {},
|
245 |
"outputs": [],
|
@@ -269,7 +394,7 @@
|
|
269 |
},
|
270 |
{
|
271 |
"cell_type": "code",
|
272 |
-
"execution_count":
|
273 |
"id": "d38dc953-b923-4128-86a1-c8c6f69af0ed",
|
274 |
"metadata": {},
|
275 |
"outputs": [],
|
@@ -279,7 +404,7 @@
|
|
279 |
},
|
280 |
{
|
281 |
"cell_type": "code",
|
282 |
-
"execution_count":
|
283 |
"id": "4c83c613-2cfc-4871-9d07-c82f77a3bd5e",
|
284 |
"metadata": {},
|
285 |
"outputs": [],
|
@@ -289,7 +414,7 @@
|
|
289 |
},
|
290 |
{
|
291 |
"cell_type": "code",
|
292 |
-
"execution_count":
|
293 |
"id": "0583e9b0-d977-488c-8331-46dfa749924c",
|
294 |
"metadata": {},
|
295 |
"outputs": [],
|
@@ -308,7 +433,7 @@
|
|
308 |
},
|
309 |
{
|
310 |
"cell_type": "code",
|
311 |
-
"execution_count":
|
312 |
"id": "2159a2b6-494b-41b9-ac54-dd342bfb74ba",
|
313 |
"metadata": {},
|
314 |
"outputs": [],
|
@@ -318,7 +443,7 @@
|
|
318 |
},
|
319 |
{
|
320 |
"cell_type": "code",
|
321 |
-
"execution_count":
|
322 |
"id": "1b385644-b46e-4d13-88fa-9f4af39db405",
|
323 |
"metadata": {},
|
324 |
"outputs": [],
|
@@ -328,7 +453,7 @@
|
|
328 |
},
|
329 |
{
|
330 |
"cell_type": "code",
|
331 |
-
"execution_count":
|
332 |
"id": "93cb53d1-6b8c-4b2d-a839-53501c0d54b2",
|
333 |
"metadata": {},
|
334 |
"outputs": [],
|
@@ -340,7 +465,7 @@
|
|
340 |
},
|
341 |
{
|
342 |
"cell_type": "code",
|
343 |
-
"execution_count":
|
344 |
"id": "c40d59e1-6d42-41f0-8c9b-70aa026093ae",
|
345 |
"metadata": {},
|
346 |
"outputs": [],
|
@@ -362,7 +487,7 @@
|
|
362 |
},
|
363 |
{
|
364 |
"cell_type": "code",
|
365 |
-
"execution_count":
|
366 |
"id": "1a506940-c2b4-4d14-ad93-fd451331c582",
|
367 |
"metadata": {},
|
368 |
"outputs": [],
|
@@ -375,7 +500,7 @@
|
|
375 |
},
|
376 |
{
|
377 |
"cell_type": "code",
|
378 |
-
"execution_count":
|
379 |
"id": "3f592848-8536-4b4d-b34a-adc32d043432",
|
380 |
"metadata": {},
|
381 |
"outputs": [],
|
@@ -385,7 +510,7 @@
|
|
385 |
},
|
386 |
{
|
387 |
"cell_type": "code",
|
388 |
-
"execution_count":
|
389 |
"id": "6c7df81a-fd2f-42bf-b09c-46d7750f7252",
|
390 |
"metadata": {},
|
391 |
"outputs": [],
|
@@ -399,58 +524,66 @@
|
|
399 |
},
|
400 |
{
|
401 |
"cell_type": "code",
|
402 |
-
"execution_count":
|
403 |
-
"id": "
|
404 |
"metadata": {},
|
405 |
"outputs": [],
|
406 |
"source": [
|
407 |
-
"
|
408 |
-
"
|
409 |
-
"
|
410 |
-
"
|
411 |
-
"
|
412 |
-
"
|
413 |
-
"\"\"\""
|
414 |
]
|
415 |
},
|
416 |
{
|
417 |
"cell_type": "code",
|
418 |
"execution_count": null,
|
419 |
-
"id": "
|
420 |
"metadata": {},
|
421 |
"outputs": [],
|
422 |
-
"source": [
|
423 |
-
"res = chat_engine.chat(prompt)\n",
|
424 |
-
"print(res.response)"
|
425 |
-
]
|
426 |
},
|
427 |
{
|
428 |
"cell_type": "code",
|
429 |
-
"execution_count":
|
430 |
-
"id": "
|
431 |
"metadata": {},
|
432 |
"outputs": [],
|
433 |
-
"source": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
434 |
},
|
435 |
{
|
436 |
"cell_type": "code",
|
437 |
-
"execution_count":
|
438 |
-
"id": "
|
439 |
"metadata": {},
|
440 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
441 |
"source": [
|
442 |
-
"
|
443 |
-
"
|
444 |
-
" system_prompt=system_content,\n",
|
445 |
-
" similarity_top_k=3,\n",
|
446 |
-
" streaming=True\n",
|
447 |
-
")"
|
448 |
]
|
449 |
},
|
450 |
{
|
451 |
"cell_type": "code",
|
452 |
"execution_count": null,
|
453 |
-
"id": "
|
454 |
"metadata": {},
|
455 |
"outputs": [],
|
456 |
"source": []
|
@@ -458,18 +591,44 @@
|
|
458 |
{
|
459 |
"cell_type": "code",
|
460 |
"execution_count": null,
|
461 |
-
"id": "
|
462 |
"metadata": {},
|
463 |
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
464 |
"source": [
|
465 |
-
"res =
|
466 |
-
"print(res)"
|
467 |
]
|
468 |
},
|
469 |
{
|
470 |
"cell_type": "code",
|
471 |
"execution_count": null,
|
472 |
-
"id": "
|
473 |
"metadata": {},
|
474 |
"outputs": [],
|
475 |
"source": []
|
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "code",
|
13 |
+
"execution_count": 1,
|
14 |
"id": "7de9c591-5a77-4bbe-80f1-4897e15f0b97",
|
15 |
"metadata": {},
|
16 |
"outputs": [],
|
17 |
"source": [
|
18 |
+
"import sys, os, shutil\n",
|
19 |
+
"sys.path.insert(0, \"../\")\n",
|
20 |
+
"\n",
|
21 |
+
"from preprocess_raw_documents import split_content\n",
|
22 |
+
"\n",
|
23 |
"import chromadb\n",
|
24 |
"from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
|
25 |
"from llama_index.vector_stores.chroma.base import ChromaVectorStore\n",
|
|
|
36 |
"import time"
|
37 |
]
|
38 |
},
|
39 |
+
{
|
40 |
+
"cell_type": "code",
|
41 |
+
"execution_count": 2,
|
42 |
+
"id": "978152ce-4d87-44b5-b521-dbaff60b32b0",
|
43 |
+
"metadata": {},
|
44 |
+
"outputs": [
|
45 |
+
{
|
46 |
+
"name": "stderr",
|
47 |
+
"output_type": "stream",
|
48 |
+
"text": [
|
49 |
+
"199it [00:00, 8821.71it/s]\n",
|
50 |
+
"200it [00:00, 12584.17it/s]\n"
|
51 |
+
]
|
52 |
+
}
|
53 |
+
],
|
54 |
+
"source": [
|
55 |
+
"split_content(filepath=\"../raw_documents/answers.txt\", \n",
|
56 |
+
" separator=\"\\n\\n\", \n",
|
57 |
+
" tmp_folder=\"../raw_documents/answers_temp\")\n",
|
58 |
+
"\n",
|
59 |
+
"split_content(filepath=\"../raw_documents/qna.txt\", \n",
|
60 |
+
" separator=\"\\n\\n\\n\", \n",
|
61 |
+
" tmp_folder=\"../raw_documents/qna_temp\")"
|
62 |
+
]
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"cell_type": "code",
|
66 |
+
"execution_count": 5,
|
67 |
+
"id": "d925371b-8777-4f5b-a7f2-ec3f228ef266",
|
68 |
+
"metadata": {},
|
69 |
+
"outputs": [],
|
70 |
+
"source": [
|
71 |
+
"answers_temp_files = []\n",
|
72 |
+
"folder_path = \"../raw_documents/answers_temp\"\n",
|
73 |
+
"for f in os.listdir(folder_path):\n",
|
74 |
+
" fpath = os.path.join(folder_path, f)\n",
|
75 |
+
" answers_temp_files.append(fpath)\n",
|
76 |
+
" \n",
|
77 |
+
"qna_temp_files = []\n",
|
78 |
+
"folder_path = \"../raw_documents/qna_temp\"\n",
|
79 |
+
"for f in os.listdir(folder_path):\n",
|
80 |
+
" fpath = os.path.join(folder_path, f)\n",
|
81 |
+
" qna_temp_files.append(fpath)"
|
82 |
+
]
|
83 |
+
},
|
84 |
{
|
85 |
"cell_type": "code",
|
86 |
"execution_count": null,
|
87 |
+
"id": "e876a26b-822d-44d6-a3dd-ccdcc04933cf",
|
88 |
+
"metadata": {},
|
89 |
+
"outputs": [],
|
90 |
+
"source": []
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"cell_type": "code",
|
94 |
+
"execution_count": 7,
|
95 |
"id": "3e65dff6-77b6-4be8-8857-5cecf3a035bb",
|
96 |
"metadata": {},
|
97 |
"outputs": [],
|
98 |
"source": [
|
99 |
"# load some documents\n",
|
100 |
"documents = SimpleDirectoryReader(input_files=[\n",
|
|
|
101 |
" \"../raw_documents/HI Chapter Summary Version 1.3.pdf\",\n",
|
102 |
" \"../raw_documents/conversation_examples.txt\",\n",
|
103 |
" \"../raw_documents/HI_Knowledge_Base.pdf\",\n",
|
104 |
+
" ] + answers_temp_files + qna_temp_files ).load_data()\n",
|
|
|
105 |
"document = Document(text=\"\\n\\n\".join([doc.text for doc in documents]))"
|
106 |
]
|
107 |
},
|
108 |
{
|
109 |
"cell_type": "code",
|
110 |
+
"execution_count": 8,
|
111 |
"id": "bd86b3f5-1dfc-4257-bd9c-86d34f02398d",
|
112 |
"metadata": {},
|
113 |
"outputs": [],
|
|
|
118 |
},
|
119 |
{
|
120 |
"cell_type": "code",
|
121 |
+
"execution_count": 9,
|
122 |
"id": "f568ce7b-bcbf-455c-acf1-6c2cae129fed",
|
123 |
"metadata": {},
|
124 |
"outputs": [],
|
|
|
129 |
},
|
130 |
{
|
131 |
"cell_type": "code",
|
132 |
+
"execution_count": 10,
|
133 |
"id": "ed0b018e-1982-46b2-b1b4-04f5c0ce8672",
|
134 |
"metadata": {},
|
135 |
"outputs": [],
|
|
|
148 |
},
|
149 |
{
|
150 |
"cell_type": "code",
|
151 |
+
"execution_count": 11,
|
152 |
"id": "0946b6ce-96ab-44de-ad75-e424a8429f67",
|
153 |
"metadata": {},
|
154 |
+
"outputs": [
|
155 |
+
{
|
156 |
+
"name": "stdout",
|
157 |
+
"output_type": "stream",
|
158 |
+
"text": [
|
159 |
+
"LLM is explicitly disabled. Using MockLLM.\n"
|
160 |
+
]
|
161 |
+
}
|
162 |
+
],
|
163 |
"source": [
|
164 |
"Settings.llm = None\n",
|
165 |
"Settings.chunk_size = 1024\n",
|
166 |
+
"Settings.chunk_overlap = 50\n",
|
167 |
"Settings.embed_model = \"local:../models/fine-tuned-embeddings-advanced\""
|
168 |
]
|
169 |
},
|
170 |
{
|
171 |
"cell_type": "code",
|
172 |
+
"execution_count": 12,
|
173 |
"id": "b8c73a2c-1129-406a-8046-085afcaf9cbb",
|
174 |
"metadata": {},
|
175 |
"outputs": [],
|
|
|
179 |
},
|
180 |
{
|
181 |
"cell_type": "code",
|
182 |
+
"execution_count": 13,
|
183 |
"id": "75f1c76f-d3e5-4b69-818c-98865adb1457",
|
184 |
"metadata": {},
|
185 |
+
"outputs": [
|
186 |
+
{
|
187 |
+
"data": {
|
188 |
+
"text/plain": [
|
189 |
+
"6814"
|
190 |
+
]
|
191 |
+
},
|
192 |
+
"execution_count": 13,
|
193 |
+
"metadata": {},
|
194 |
+
"output_type": "execute_result"
|
195 |
+
}
|
196 |
+
],
|
197 |
"source": [
|
198 |
"len(nodes)"
|
199 |
]
|
|
|
208 |
},
|
209 |
{
|
210 |
"cell_type": "code",
|
211 |
+
"execution_count": 14,
|
212 |
"id": "dab4c6f3-ef67-4d90-b3d5-e290c5d1b6f4",
|
213 |
"metadata": {},
|
214 |
"outputs": [],
|
|
|
218 |
},
|
219 |
{
|
220 |
"cell_type": "code",
|
221 |
+
"execution_count": 15,
|
222 |
"id": "6a764113-ad7e-4674-aa57-ebbf405902a8",
|
223 |
"metadata": {},
|
224 |
"outputs": [],
|
|
|
236 |
},
|
237 |
{
|
238 |
"cell_type": "code",
|
239 |
+
"execution_count": 16,
|
240 |
"id": "e492ed4a-23a3-47d6-8b50-51fb48b3aa05",
|
241 |
"metadata": {},
|
242 |
"outputs": [],
|
|
|
246 |
},
|
247 |
{
|
248 |
"cell_type": "code",
|
249 |
+
"execution_count": 17,
|
250 |
"id": "cbd11b89-9b83-4f08-bb30-160f750f2ffb",
|
251 |
"metadata": {},
|
252 |
"outputs": [],
|
|
|
256 |
},
|
257 |
{
|
258 |
"cell_type": "code",
|
259 |
+
"execution_count": 18,
|
260 |
+
"id": "d3bd848d-9985-4a3d-bdc4-ec340cc69ef3",
|
261 |
"metadata": {},
|
262 |
+
"outputs": [
|
263 |
+
{
|
264 |
+
"name": "stdout",
|
265 |
+
"output_type": "stream",
|
266 |
+
"text": [
|
267 |
+
"Indexing time: 2.3 mins\n"
|
268 |
+
]
|
269 |
+
}
|
270 |
+
],
|
271 |
"source": [
|
272 |
+
"indexing_cost = time.time() - start_time\n",
|
273 |
+
"indexing_cost = indexing_cost / 60\n",
|
274 |
+
"print(f\"Indexing time: {indexing_cost:.1f} mins\")"
|
275 |
]
|
276 |
},
|
277 |
{
|
278 |
"cell_type": "code",
|
279 |
+
"execution_count": 19,
|
280 |
+
"id": "f16cca33-71fb-437d-a033-671b9fd44054",
|
281 |
"metadata": {},
|
282 |
"outputs": [],
|
283 |
"source": [
|
284 |
+
"vector_query_engine = vector_index.as_query_engine()"
|
|
|
|
|
285 |
]
|
286 |
},
|
287 |
{
|
288 |
"cell_type": "code",
|
289 |
+
"execution_count": 20,
|
290 |
"id": "3290e870-41d7-49c4-9c4f-cb16bd1f469e",
|
291 |
"metadata": {
|
292 |
"scrolled": true
|
293 |
},
|
294 |
+
"outputs": [
|
295 |
+
{
|
296 |
+
"data": {
|
297 |
+
"text/plain": [
|
298 |
+
"Response(response='Context information is below.\\n---------------------\\nfile_path: ../raw_documents/answers_temp/answers_050.txt\\n\\nQuestion: The fundamental principle of Singapore healthcare financing is ____________.\\nAnswer: The answer is \"Individual Savings\".\\n\\nfile_path: ../raw_documents/qna_temp/qna_050.txt\\n\\nC1/5\\nQuestion: The fundamental principle of Singapore healthcare financing is ____________.\\nA. The 3βs M. That is Medisave, Medishield, Medifund.\\nB. Means Testing and Casemix.\\nC. Individual Savings.\\nD. Tax based subsidies and government subvention.\\nAnswer: C. The answer is \"Individual Savings\".\\n---------------------\\nGiven the context information and not prior knowledge, answer the query.\\nQuery: Healthcare System in Singapore consists of?\\nAnswer: ', source_nodes=[NodeWithScore(node=TextNode(id_='536fef67-6a3f-4054-a94a-cc9143599510', embedding=None, metadata={'file_path': '../raw_documents/answers_temp/answers_050.txt', 'file_name': 'answers_050.txt', 'file_type': 'text/plain', 'file_size': 130, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='2b0f7dad-c532-4abd-8c42-f53383a4fc76', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '../raw_documents/answers_temp/answers_050.txt', 'file_name': 'answers_050.txt', 'file_type': 'text/plain', 'file_size': 130, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}, hash='5b1d1dc729a663e4ccfacc0f18adf0f6644a2a7d2991490fd962d1550c83f2ff'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='6d93c092-b4cc-4b5b-b379-080d777d3908', node_type=<ObjectType.TEXT: '1'>, metadata={'file_path': '../raw_documents/answers_temp/answers_044.txt', 'file_name': 'answers_044.txt', 'file_type': 'text/plain', 'file_size': 164, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}, hash='caeb59043b8daa56ed472941882947570abff951f64aa0498672aba5921fac1d'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='859a9958-6f5d-4581-95d0-39edfc950ef5', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='8416454b2fbad3e6122c5151d2b3d1eadf0afde3514ba09374c71e96baf712bc')}, text='Question: The fundamental principle of Singapore healthcare financing is ____________.\\nAnswer: The answer is \"Individual Savings\".', start_char_idx=0, end_char_idx=130, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.4159636550867191), NodeWithScore(node=TextNode(id_='472000ae-a0aa-4464-a200-72fe67a3fbde', embedding=None, metadata={'file_path': '../raw_documents/qna_temp/qna_050.txt', 'file_name': 'qna_050.txt', 'file_type': 'text/plain', 'file_size': 297, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='506fb715-d3b0-4ca7-b7ca-011a1e1a1f0d', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '../raw_documents/qna_temp/qna_050.txt', 'file_name': 'qna_050.txt', 'file_type': 'text/plain', 'file_size': 297, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}, hash='7461ffa12ff6729003131976b82995b7254ab10f8dc7d79c65988ec9e3b7b631'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='d8232b90-d641-4966-b98f-4ca0821db773', node_type=<ObjectType.TEXT: '1'>, metadata={'file_path': '../raw_documents/qna_temp/qna_044.txt', 'file_name': 'qna_044.txt', 'file_type': 'text/plain', 'file_size': 383, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}, hash='cbeb00c29c6130548466697a862fee43ab2be92d84158cc0b69c2f5c7bbe68b1'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='e772e623-cf91-41cd-a516-50acb894eb54', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='a7583b0fd46f98d0118c712632277d81f417b779f8bcc100ab2558dae6317cde')}, text='C1/5\\nQuestion: The fundamental principle of Singapore healthcare financing is ____________.\\nA. The 3βs M. That is Medisave, Medishield, Medifund.\\nB. Means Testing and Casemix.\\nC. Individual Savings.\\nD. Tax based subsidies and government subvention.\\nAnswer: C. The answer is \"Individual Savings\".', start_char_idx=0, end_char_idx=295, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.4126648577998099)], metadata={'536fef67-6a3f-4054-a94a-cc9143599510': {'file_path': '../raw_documents/answers_temp/answers_050.txt', 'file_name': 'answers_050.txt', 'file_type': 'text/plain', 'file_size': 130, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}, '472000ae-a0aa-4464-a200-72fe67a3fbde': {'file_path': '../raw_documents/qna_temp/qna_050.txt', 'file_name': 'qna_050.txt', 'file_type': 'text/plain', 'file_size': 297, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}})"
|
299 |
+
]
|
300 |
+
},
|
301 |
+
"execution_count": 20,
|
302 |
+
"metadata": {},
|
303 |
+
"output_type": "execute_result"
|
304 |
+
}
|
305 |
+
],
|
306 |
"source": [
|
307 |
"response = vector_query_engine.query(\"Healthcare System in Singapore consists of?\")\n",
|
308 |
"response"
|
309 |
]
|
310 |
},
|
311 |
+
{
|
312 |
+
"cell_type": "code",
|
313 |
+
"execution_count": null,
|
314 |
+
"id": "aa4b9906-5f75-4003-9f4c-5cfcc7ab1eaf",
|
315 |
+
"metadata": {},
|
316 |
+
"outputs": [],
|
317 |
+
"source": []
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"cell_type": "code",
|
321 |
+
"execution_count": 21,
|
322 |
+
"id": "1bb75b04-6a62-43a4-8728-d2e52e49f1c0",
|
323 |
+
"metadata": {},
|
324 |
+
"outputs": [],
|
325 |
+
"source": [
|
326 |
+
"if os.path.exists(\"../raw_documents/answers_temp\"):\n",
|
327 |
+
" shutil.rmtree(\"../raw_documents/answers_temp\")"
|
328 |
+
]
|
329 |
+
},
|
330 |
+
{
|
331 |
+
"cell_type": "code",
|
332 |
+
"execution_count": 22,
|
333 |
+
"id": "0ed920fb-6456-49ac-8b63-08bd86b5b39c",
|
334 |
+
"metadata": {},
|
335 |
+
"outputs": [],
|
336 |
+
"source": [
|
337 |
+
"if os.path.exists(\"../raw_documents/qna_temp\"):\n",
|
338 |
+
" shutil.rmtree(\"../raw_documents/qna_temp\")"
|
339 |
+
]
|
340 |
+
},
|
341 |
{
|
342 |
"cell_type": "code",
|
343 |
"execution_count": null,
|
|
|
364 |
},
|
365 |
{
|
366 |
"cell_type": "code",
|
367 |
+
"execution_count": 1,
|
368 |
"id": "c1a42c35-5f57-423c-8fb7-7d18b3b466b5",
|
369 |
"metadata": {},
|
370 |
"outputs": [],
|
|
|
394 |
},
|
395 |
{
|
396 |
"cell_type": "code",
|
397 |
+
"execution_count": 2,
|
398 |
"id": "d38dc953-b923-4128-86a1-c8c6f69af0ed",
|
399 |
"metadata": {},
|
400 |
"outputs": [],
|
|
|
404 |
},
|
405 |
{
|
406 |
"cell_type": "code",
|
407 |
+
"execution_count": 3,
|
408 |
"id": "4c83c613-2cfc-4871-9d07-c82f77a3bd5e",
|
409 |
"metadata": {},
|
410 |
"outputs": [],
|
|
|
414 |
},
|
415 |
{
|
416 |
"cell_type": "code",
|
417 |
+
"execution_count": 4,
|
418 |
"id": "0583e9b0-d977-488c-8331-46dfa749924c",
|
419 |
"metadata": {},
|
420 |
"outputs": [],
|
|
|
433 |
},
|
434 |
{
|
435 |
"cell_type": "code",
|
436 |
+
"execution_count": 5,
|
437 |
"id": "2159a2b6-494b-41b9-ac54-dd342bfb74ba",
|
438 |
"metadata": {},
|
439 |
"outputs": [],
|
|
|
443 |
},
|
444 |
{
|
445 |
"cell_type": "code",
|
446 |
+
"execution_count": 6,
|
447 |
"id": "1b385644-b46e-4d13-88fa-9f4af39db405",
|
448 |
"metadata": {},
|
449 |
"outputs": [],
|
|
|
453 |
},
|
454 |
{
|
455 |
"cell_type": "code",
|
456 |
+
"execution_count": 7,
|
457 |
"id": "93cb53d1-6b8c-4b2d-a839-53501c0d54b2",
|
458 |
"metadata": {},
|
459 |
"outputs": [],
|
|
|
465 |
},
|
466 |
{
|
467 |
"cell_type": "code",
|
468 |
+
"execution_count": 8,
|
469 |
"id": "c40d59e1-6d42-41f0-8c9b-70aa026093ae",
|
470 |
"metadata": {},
|
471 |
"outputs": [],
|
|
|
487 |
},
|
488 |
{
|
489 |
"cell_type": "code",
|
490 |
+
"execution_count": 9,
|
491 |
"id": "1a506940-c2b4-4d14-ad93-fd451331c582",
|
492 |
"metadata": {},
|
493 |
"outputs": [],
|
|
|
500 |
},
|
501 |
{
|
502 |
"cell_type": "code",
|
503 |
+
"execution_count": 10,
|
504 |
"id": "3f592848-8536-4b4d-b34a-adc32d043432",
|
505 |
"metadata": {},
|
506 |
"outputs": [],
|
|
|
510 |
},
|
511 |
{
|
512 |
"cell_type": "code",
|
513 |
+
"execution_count": 11,
|
514 |
"id": "6c7df81a-fd2f-42bf-b09c-46d7750f7252",
|
515 |
"metadata": {},
|
516 |
"outputs": [],
|
|
|
524 |
},
|
525 |
{
|
526 |
"cell_type": "code",
|
527 |
+
"execution_count": 12,
|
528 |
+
"id": "c3106dff-dd6f-47a9-9454-1e61775e7539",
|
529 |
"metadata": {},
|
530 |
"outputs": [],
|
531 |
"source": [
|
532 |
+
"hi_engine = index.as_query_engine(\n",
|
533 |
+
" memory=memory,\n",
|
534 |
+
" system_prompt=system_content,\n",
|
535 |
+
" similarity_top_k=10,\n",
|
536 |
+
" streaming=True\n",
|
537 |
+
")"
|
|
|
538 |
]
|
539 |
},
|
540 |
{
|
541 |
"cell_type": "code",
|
542 |
"execution_count": null,
|
543 |
+
"id": "53a38081-4a79-44bc-bfa3-5d8653804328",
|
544 |
"metadata": {},
|
545 |
"outputs": [],
|
546 |
+
"source": []
|
|
|
|
|
|
|
547 |
},
|
548 |
{
|
549 |
"cell_type": "code",
|
550 |
+
"execution_count": 24,
|
551 |
+
"id": "434f0caf-8b1f-40c6-b9ec-b039cd1ca612",
|
552 |
"metadata": {},
|
553 |
"outputs": [],
|
554 |
+
"source": [
|
555 |
+
"prompt = \"\"\"\n",
|
556 |
+
"Question: Which is not a government healthcare philosophy? \n",
|
557 |
+
"A. To nurture a healthy nation by promoting good health.\n",
|
558 |
+
"B. To rely on competition to improve service and raise efficiency\n",
|
559 |
+
"C. To intervene directly whenever necessary\n",
|
560 |
+
"D. To provide for the care of employees\n",
|
561 |
+
"\"\"\""
|
562 |
+
]
|
563 |
},
|
564 |
{
|
565 |
"cell_type": "code",
|
566 |
+
"execution_count": 26,
|
567 |
+
"id": "a1c83dff-50d1-47b1-b7e9-4fc5cd08e1e8",
|
568 |
"metadata": {},
|
569 |
+
"outputs": [
|
570 |
+
{
|
571 |
+
"name": "stdout",
|
572 |
+
"output_type": "stream",
|
573 |
+
"text": [
|
574 |
+
"D. To provide for the care of employees\n"
|
575 |
+
]
|
576 |
+
}
|
577 |
+
],
|
578 |
"source": [
|
579 |
+
"res = hi_engine.query(prompt)\n",
|
580 |
+
"print(res)"
|
|
|
|
|
|
|
|
|
581 |
]
|
582 |
},
|
583 |
{
|
584 |
"cell_type": "code",
|
585 |
"execution_count": null,
|
586 |
+
"id": "cedd3512-548d-4455-80fd-c6a8b2c0cd00",
|
587 |
"metadata": {},
|
588 |
"outputs": [],
|
589 |
"source": []
|
|
|
591 |
{
|
592 |
"cell_type": "code",
|
593 |
"execution_count": null,
|
594 |
+
"id": "ec53dfcf-d4c0-4d10-a24e-be2004a83656",
|
595 |
"metadata": {},
|
596 |
"outputs": [],
|
597 |
+
"source": []
|
598 |
+
},
|
599 |
+
{
|
600 |
+
"cell_type": "code",
|
601 |
+
"execution_count": 14,
|
602 |
+
"id": "78abaf95-e52d-445c-9d8e-bc51efb20f06",
|
603 |
+
"metadata": {},
|
604 |
+
"outputs": [
|
605 |
+
{
|
606 |
+
"name": "stderr",
|
607 |
+
"output_type": "stream",
|
608 |
+
"text": [
|
609 |
+
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
610 |
+
"To disable this warning, you can either:\n",
|
611 |
+
"\t- Avoid using `tokenizers` before the fork if possible\n",
|
612 |
+
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
|
613 |
+
]
|
614 |
+
},
|
615 |
+
{
|
616 |
+
"name": "stdout",
|
617 |
+
"output_type": "stream",
|
618 |
+
"text": [
|
619 |
+
"The correct answer is \"Deductibles apply for all treatments\".\n"
|
620 |
+
]
|
621 |
+
}
|
622 |
+
],
|
623 |
"source": [
|
624 |
+
"res = chat_engine.chat(prompt)\n",
|
625 |
+
"print(res.response)"
|
626 |
]
|
627 |
},
|
628 |
{
|
629 |
"cell_type": "code",
|
630 |
"execution_count": null,
|
631 |
+
"id": "1e62303c-3a00-448f-ad93-15cb6cee1f24",
|
632 |
"metadata": {},
|
633 |
"outputs": [],
|
634 |
"source": []
|
preprocess_raw_documents.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
from tqdm import tqdm
|
4 |
+
|
5 |
+
|
6 |
+
def split_content(filepath, separator, tmp_folder):
|
7 |
+
os.makedirs(tmp_folder, exist_ok=True)
|
8 |
+
base_file_name = os.path.basename(filepath)
|
9 |
+
fname, fextn = base_file_name.split(".")
|
10 |
+
with open(filepath, "r") as fp:
|
11 |
+
content = fp.read()
|
12 |
+
content_chunk = content.split(separator)
|
13 |
+
for index, chunk in tqdm(enumerate(content_chunk)):
|
14 |
+
new_fpath = os.path.join(tmp_folder, f"{fname}_{index:03d}.{fextn}")
|
15 |
+
with open(new_fpath, "w") as fp:
|
16 |
+
fp.write(chunk)
|
qna_prompting.py
CHANGED
@@ -25,10 +25,11 @@ qna_question_data_format = """
|
|
25 |
Example 3: `Chapter_5` for fifth chapter
|
26 |
"""
|
27 |
qna_answer_description = """
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
32 |
|
33 |
If user's answer is not a single alphabet letter, but is contextually
|
34 |
closer to a particular answer choice, return the corresponding
|
@@ -122,7 +123,6 @@ def evaluate_qna_answer(user_selected_answer: str) -> str:
|
|
122 |
|
123 |
### convert to numeric type
|
124 |
qna_answer = int(qna_answer)
|
125 |
-
|
126 |
qna_answer_alphabet = num_mapping.get(qna_answer, "ERROR")
|
127 |
|
128 |
con = sqlite3.connect(db_path)
|
@@ -138,13 +138,34 @@ def evaluate_qna_answer(user_selected_answer: str) -> str:
|
|
138 |
con.commit()
|
139 |
con.close()
|
140 |
|
141 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
st.toast("π― yummy yummy, hooray!", icon="π")
|
143 |
time.sleep(2)
|
144 |
st.toast("π»ππ― You got it right!", icon="π")
|
145 |
time.sleep(2)
|
146 |
st.toast("π₯ You are amazing! π―π―", icon="πͺ")
|
147 |
st.balloons()
|
|
|
148 |
else:
|
149 |
st.toast("πΌ Something doesn't seem right.. π₯π π₯", icon="π")
|
150 |
time.sleep(2)
|
@@ -152,17 +173,16 @@ def evaluate_qna_answer(user_selected_answer: str) -> str:
|
|
152 |
time.sleep(2)
|
153 |
st.toast("π€π€ Nevertheless, it was a good try!! ποΈββοΈποΈββοΈ", icon="π")
|
154 |
st.snow()
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
-
reasoning = "" if "textbook" in reasons else "Rationale is that: " + reasons
|
157 |
-
qna_answer_response = (
|
158 |
-
f"Your selected answer is `{user_selected_answer}`, "
|
159 |
-
f"but the actual answer is `{qna_answer_alphabet}`. " + reasoning
|
160 |
-
)
|
161 |
-
|
162 |
except Exception as e:
|
163 |
print(e)
|
164 |
|
165 |
-
return
|
166 |
|
167 |
get_qna_question_tool = FunctionTool.from_defaults(
|
168 |
fn=get_qna_question,
|
|
|
25 |
Example 3: `Chapter_5` for fifth chapter
|
26 |
"""
|
27 |
qna_answer_description = """
|
28 |
+
Not to trigger this when questions being asked, come directly from user.
|
29 |
+
Only use this tool to trigger the evaluation of user's provided input with the
|
30 |
+
correct answer of the Q&A question asked by Assistant. When user provides
|
31 |
+
answer to the question asked, they can reply in natural language or giving
|
32 |
+
the alphabet letter of which selected choice they think it's the right answer.
|
33 |
|
34 |
If user's answer is not a single alphabet letter, but is contextually
|
35 |
closer to a particular answer choice, return the corresponding
|
|
|
123 |
|
124 |
### convert to numeric type
|
125 |
qna_answer = int(qna_answer)
|
|
|
126 |
qna_answer_alphabet = num_mapping.get(qna_answer, "ERROR")
|
127 |
|
128 |
con = sqlite3.connect(db_path)
|
|
|
138 |
con.commit()
|
139 |
con.close()
|
140 |
|
141 |
+
reasoning = "" if "textbook" in reasons else f"Rationale is that: {reasons}. "
|
142 |
+
qna_answer_response = (
|
143 |
+
f"Your selected answer is `{user_selected_answer}`, "
|
144 |
+
f"but the actual answer is `{qna_answer_alphabet}`. "
|
145 |
+
)
|
146 |
+
qna_not_knowing_response = (
|
147 |
+
f"No problem! The answer is `{qna_answer_alphabet}`. "
|
148 |
+
)
|
149 |
+
to_know_more = (
|
150 |
+
"Let me know if you want to know more, "
|
151 |
+
"I can give you an explanation π»π"
|
152 |
+
)
|
153 |
+
|
154 |
+
if user_answer_numeric == 0:
|
155 |
+
st.toast("π―β couldn't find the honey? π no worries!", icon="π« ")
|
156 |
+
time.sleep(2)
|
157 |
+
st.toast("π» Let me bring it to you! π―π", icon="π")
|
158 |
+
time.sleep(2)
|
159 |
+
st.toast("β¨ You will do great next time! π", icon="π")
|
160 |
+
final_response = qna_not_knowing_response + reasoning + to_know_more
|
161 |
+
elif qna_answer == user_answer_numeric:
|
162 |
st.toast("π― yummy yummy, hooray!", icon="π")
|
163 |
time.sleep(2)
|
164 |
st.toast("π»ππ― You got it right!", icon="π")
|
165 |
time.sleep(2)
|
166 |
st.toast("π₯ You are amazing! π―π―", icon="πͺ")
|
167 |
st.balloons()
|
168 |
+
final_response = qna_answer_response + reasoning + to_know_more
|
169 |
else:
|
170 |
st.toast("πΌ Something doesn't seem right.. π₯π π₯", icon="π")
|
171 |
time.sleep(2)
|
|
|
173 |
time.sleep(2)
|
174 |
st.toast("π€π€ Nevertheless, it was a good try!! ποΈββοΈποΈββοΈ", icon="π")
|
175 |
st.snow()
|
176 |
+
final_response = qna_answer_response + reasoning + to_know_more
|
177 |
+
|
178 |
+
st.session_state.question_id = None
|
179 |
+
st.session_state.qna_answer = None
|
180 |
+
st.session_state.reasons = None
|
181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
except Exception as e:
|
183 |
print(e)
|
184 |
|
185 |
+
return final_response
|
186 |
|
187 |
get_qna_question_tool = FunctionTool.from_defaults(
|
188 |
fn=get_qna_question,
|
streamlit_app.py
CHANGED
@@ -40,7 +40,7 @@ nest_asyncio.apply()
|
|
40 |
st.set_page_config(page_title="π»π Study Bear π―")
|
41 |
openai_api = os.getenv("OPENAI_API_KEY")
|
42 |
|
43 |
-
with open("./config/
|
44 |
model_config = yaml.safe_load(file_reader)
|
45 |
|
46 |
input_files = model_config["input_data"]["source"]
|
|
|
40 |
st.set_page_config(page_title="π»π Study Bear π―")
|
41 |
openai_api = os.getenv("OPENAI_API_KEY")
|
42 |
|
43 |
+
with open("./config/model_config_advanced.yml", "r") as file_reader:
|
44 |
model_config = yaml.safe_load(file_reader)
|
45 |
|
46 |
input_files = model_config["input_data"]["source"]
|