Spaces:
Sleeping
Sleeping
latest code/data
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +0 -0
- Llama-2-eval/data/datasets/ms_macro/dataset_info.json +0 -95
- Llama-2-eval/data/datasets/ms_macro/state.json +0 -13
- Llama-2-eval/data/results/results_full-a40.csv +0 -10
- Llama-2-eval/data/results/results_full-l40.csv +0 -10
- Llama-2-eval/notebook/baseline.ipynb +0 -1983
- Llama-2-eval/notebook/metrics.ipynb +0 -1293
- Makefile +25 -0
- README.md +132 -17
- app.py +62 -92
- app_modules/llm_loader.py +8 -3
- app_modules/utils.py +1 -153
- data/datasets/WebQSP.test.wikidata.json +0 -0
- data/{logs/Phi-3-mini-128k-instruct_mm_false_RP_1.060.txt → eval/Llama-2-13b-chat-hf_wd_true_RP_1.000-t2.json} +2 -2
- data/{logs/Phi-3-mini-128k-instruct_mm_false_RP_1.120.txt → eval/Llama-2-13b-chat-hf_wd_true_RP_1.000-t2_evaluated.json} +2 -2
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.020-t2.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.020-t2_evaluated.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.040-t2.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.040-t2_evaluated.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.060-t2.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.060-t2_evaluated.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.080-t2.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.080-t2_evaluated.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.100-t2.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.100-t2_evaluated.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.120-t2.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.120-t2_evaluated.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.140-t2.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.140-t2_evaluated.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.160-t2.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.160-t2_evaluated.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.180-t2.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.180-t2_evaluated.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.200-t2.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.200-t2_evaluated.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.220-t2.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.220-t2_evaluated.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.240-t2.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.240-t2_evaluated.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.260-t2.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.260-t2_evaluated.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.280-t2.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.280-t2_evaluated.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.300-t2.json +3 -0
- data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.300-t2_evaluated.json +3 -0
- Llama-2-eval/data/datasets/ms_macro/data-00000-of-00001.arrow → data/eval/Llama-2-70b-chat-hf_wd_RP_1.000-t2.json +2 -2
- data/{logs/Phi-3-mini-128k-instruct_mm_false_RP_1.000.txt → eval/Llama-2-70b-chat-hf_wd_RP_1.000-t2_evaluated.json} +2 -2
- data/eval/Llama-2-70b-chat-hf_wd_RP_1.020-t2.json +3 -0
- data/eval/Llama-2-70b-chat-hf_wd_RP_1.020-t2_evaluated.json +3 -0
- data/eval/Llama-2-70b-chat-hf_wd_RP_1.040-t2.json +3 -0
.gitattributes
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
Llama-2-eval/data/datasets/ms_macro/dataset_info.json
DELETED
@@ -1,95 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"builder_name": "parquet",
|
3 |
-
"citation": "",
|
4 |
-
"config_name": "default",
|
5 |
-
"dataset_name": "ms-macro-wellformed_only",
|
6 |
-
"dataset_size": 726469485,
|
7 |
-
"description": "",
|
8 |
-
"download_checksums": {
|
9 |
-
"hf://datasets/zhengxuanzenwu/ms-macro-wellformed_only@d6a0dd610474a02e63224176514c0073bb723c7c/data/train-00000-of-00002-0a6f58dc7ee03f61.parquet": {
|
10 |
-
"num_bytes": 164629356,
|
11 |
-
"checksum": null
|
12 |
-
},
|
13 |
-
"hf://datasets/zhengxuanzenwu/ms-macro-wellformed_only@d6a0dd610474a02e63224176514c0073bb723c7c/data/train-00001-of-00002-5262fd5ec1911156.parquet": {
|
14 |
-
"num_bytes": 164721520,
|
15 |
-
"checksum": null
|
16 |
-
},
|
17 |
-
"hf://datasets/zhengxuanzenwu/ms-macro-wellformed_only@d6a0dd610474a02e63224176514c0073bb723c7c/data/test-00000-of-00001-f965dd5a841915d3.parquet": {
|
18 |
-
"num_bytes": 26541566,
|
19 |
-
"checksum": null
|
20 |
-
}
|
21 |
-
},
|
22 |
-
"download_size": 355892442,
|
23 |
-
"features": {
|
24 |
-
"answers": {
|
25 |
-
"feature": {
|
26 |
-
"dtype": "string",
|
27 |
-
"_type": "Value"
|
28 |
-
},
|
29 |
-
"_type": "Sequence"
|
30 |
-
},
|
31 |
-
"passages": {
|
32 |
-
"feature": {
|
33 |
-
"is_selected": {
|
34 |
-
"dtype": "int32",
|
35 |
-
"_type": "Value"
|
36 |
-
},
|
37 |
-
"passage_text": {
|
38 |
-
"dtype": "string",
|
39 |
-
"_type": "Value"
|
40 |
-
},
|
41 |
-
"url": {
|
42 |
-
"dtype": "string",
|
43 |
-
"_type": "Value"
|
44 |
-
}
|
45 |
-
},
|
46 |
-
"_type": "Sequence"
|
47 |
-
},
|
48 |
-
"query": {
|
49 |
-
"dtype": "string",
|
50 |
-
"_type": "Value"
|
51 |
-
},
|
52 |
-
"query_id": {
|
53 |
-
"dtype": "int32",
|
54 |
-
"_type": "Value"
|
55 |
-
},
|
56 |
-
"query_type": {
|
57 |
-
"dtype": "string",
|
58 |
-
"_type": "Value"
|
59 |
-
},
|
60 |
-
"wellFormedAnswers": {
|
61 |
-
"feature": {
|
62 |
-
"dtype": "string",
|
63 |
-
"_type": "Value"
|
64 |
-
},
|
65 |
-
"_type": "Sequence"
|
66 |
-
}
|
67 |
-
},
|
68 |
-
"homepage": "",
|
69 |
-
"license": "",
|
70 |
-
"size_in_bytes": 1082361927,
|
71 |
-
"splits": {
|
72 |
-
"train": {
|
73 |
-
"name": "train",
|
74 |
-
"num_bytes": 674327331,
|
75 |
-
"num_examples": 153725,
|
76 |
-
"shard_lengths": [
|
77 |
-
116863,
|
78 |
-
36862
|
79 |
-
],
|
80 |
-
"dataset_name": "ms-macro-wellformed_only"
|
81 |
-
},
|
82 |
-
"test": {
|
83 |
-
"name": "test",
|
84 |
-
"num_bytes": 52142154,
|
85 |
-
"num_examples": 12467,
|
86 |
-
"dataset_name": "ms-macro-wellformed_only"
|
87 |
-
}
|
88 |
-
},
|
89 |
-
"version": {
|
90 |
-
"version_str": "0.0.0",
|
91 |
-
"major": 0,
|
92 |
-
"minor": 0,
|
93 |
-
"patch": 0
|
94 |
-
}
|
95 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Llama-2-eval/data/datasets/ms_macro/state.json
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_data_files": [
|
3 |
-
{
|
4 |
-
"filename": "data-00000-of-00001.arrow"
|
5 |
-
}
|
6 |
-
],
|
7 |
-
"_fingerprint": "fe2a26ddba75833a",
|
8 |
-
"_format_columns": null,
|
9 |
-
"_format_kwargs": {},
|
10 |
-
"_format_type": null,
|
11 |
-
"_output_all_columns": false,
|
12 |
-
"_split": "test"
|
13 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Llama-2-eval/data/results/results_full-a40.csv
DELETED
@@ -1,10 +0,0 @@
|
|
1 |
-
model_name,repetition_penalty,generation_time,evaluation_time,total_tokens,total_words,tokens_per_second,tokens_per_word,numeric_bleu,numeric_rougeL,description_bleu,description_rougeL,entity_bleu,entity_rougeL,person_bleu,person_rougeL,location_bleu,location_rougeL,overall_bleu,overall_rougeL,total_words_over_total_tokens
|
2 |
-
gpt-4,,2696.407,1.772,34069,29552,12.635,1.153,0.1732,0.3337,0.1895,0.3248,0.1654,0.3117,0.1879,0.3286,0.4068,0.6213,0.1969,0.3843,0.867
|
3 |
-
gpt-3.5-turbo,,1492.921,1.786,34353,29917,23.011,1.148,0.1606,0.3178,0.1623,0.2582,0.1296,0.2939,0.2024,0.3462,0.3632,0.5953,0.1761,0.3623,0.871
|
4 |
-
Llama-2-13b-chat-hf,1.12,2133.992,1.66,33389,24007,15.646,1.391,0.163,0.3345,0.2031,0.3756,0.1632,0.2962,0.1388,0.3045,0.3423,0.5302,0.1846,0.3694,0.719
|
5 |
-
vicuna-13b-v1.1,1.095,2212.946,1.682,35308,26456,15.955,1.335,0.1285,0.2319,0.1991,0.2812,0.1556,0.2644,0.2009,0.2768,0.3159,0.5761,0.1853,0.3276,0.749
|
6 |
-
Llama-2-7b-chat-hf,1.19,1280.314,1.793,34349,23987,26.829,1.432,0.1274,0.2383,0.1836,0.2621,0.1572,0.2754,0.17,0.2911,0.3631,0.5383,0.1781,0.3209,0.698
|
7 |
-
vicuna-7b-v1.1,1.095,975.73,1.574,25932,18714,26.577,1.386,0.1664,0.2838,0.2227,0.3118,0.166,0.2351,0.259,0.2753,0.4542,0.5838,0.2218,0.3379,0.722
|
8 |
-
wizardLM-7B-HF,1.095,1265.93,1.667,33570,24003,26.518,1.399,0.1367,0.2584,0.2027,0.2882,0.1358,0.2592,0.1985,0.3085,0.4154,0.5794,0.1866,0.3384,0.715
|
9 |
-
mpt-7b-instruct,1.05,2071.066,1.42,12374,9927,5.975,1.246,0.1804,0.285,0.2589,0.2556,0.2383,0.2468,0.2635,0.2571,0.3512,0.4042,0.2509,0.2897,0.802
|
10 |
-
gpt4all-j,1.095,5603.316,1.706,31502,27099,5.622,1.162,0.1236,0.2406,0.1708,0.2511,0.143,0.255,0.194,0.2941,0.3721,0.5337,0.1737,0.3153,0.860
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Llama-2-eval/data/results/results_full-l40.csv
DELETED
@@ -1,10 +0,0 @@
|
|
1 |
-
model_name,repetition_penalty,generation_time,evaluation_time,total_tokens,total_words,tokens_per_second,tokens_per_word,words_per_token_l40,words_per_second,numeric_bleu,numeric_rougeL,description_bleu,description_rougeL,entity_bleu,entity_rougeL,person_bleu,person_rougeL,location_bleu,location_rougeL,overall_bleu,overall_rougeL,total_words_over_total_tokens
|
2 |
-
gpt-4,,2696.407,1.772,34069,29552,12.635,1.153,0.867,,0.1732,0.3337,0.1895,0.3248,0.1654,0.3117,0.1879,0.3286,0.4068,0.6213,0.1969,0.3843,0.867
|
3 |
-
gpt-3.5-turbo,,1492.921,1.786,34353,29917,23.011,1.148,0.871,,0.1606,0.3178,0.1623,0.2582,0.1296,0.2939,0.2024,0.3462,0.3632,0.5953,0.1761,0.3623,0.871
|
4 |
-
Llama-2-13b-chat-hf,1.12,1687.637,1.785,32808,23575,19.44,1.392,0.718,13.969,0.1612,0.3305,0.2061,0.3701,0.1675,0.3018,0.141,0.305,0.3394,0.5288,0.1866,0.368,0.719
|
5 |
-
vicuna-13b-v1.1,1.095,1799.165,2.197,35543,26613,19.755,1.336,0.749,14.792,0.1274,0.2321,0.1994,0.2834,0.154,0.2631,0.1984,0.2773,0.3194,0.5759,0.1844,0.3256,0.749
|
6 |
-
Llama-2-7b-chat-hf,1.19,1002.46,6.606,34686,24229,34.601,1.432,0.698,24.170,0.1269,0.2404,0.1824,0.2614,0.157,0.2769,0.1687,0.2896,0.3565,0.5378,0.177,0.3214,0.699
|
7 |
-
vicuna-7b-v1.1,1.095,758.227,1.432,25827,18638,34.062,1.386,0.722,24.581,0.1673,0.2859,0.2221,0.3096,0.1655,0.2327,0.2576,0.2717,0.4564,0.5849,0.2216,0.3387,0.722
|
8 |
-
wizardLM-7B-HF,1.095,998.702,1.683,33674,23996,33.718,1.403,0.713,24.027,0.1372,0.259,0.2046,0.2878,0.1354,0.2588,0.1982,0.3083,0.4154,0.5769,0.187,0.3383,0.713
|
9 |
-
mpt-7b-instruct,1.05,1622.435,1.338,12607,10139,7.77,1.243,0.805,6.249,0.1751,0.2756,0.2569,0.2625,0.2349,0.2456,0.2466,0.2566,0.3522,0.4049,0.2455,0.2889,0.804
|
10 |
-
gpt4all-j,1.095,3794.429,1.611,31719,27286,8.359,1.162,0.861,7.191,0.1262,0.2443,0.1669,0.251,0.1394,0.2505,0.1937,0.2968,0.3693,0.5348,0.1719,0.3151,0.860
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Llama-2-eval/notebook/baseline.ipynb
DELETED
@@ -1,1983 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 5,
|
6 |
-
"id": "a6d96660",
|
7 |
-
"metadata": {},
|
8 |
-
"outputs": [
|
9 |
-
{
|
10 |
-
"data": {
|
11 |
-
"text/plain": [
|
12 |
-
"True"
|
13 |
-
]
|
14 |
-
},
|
15 |
-
"execution_count": 5,
|
16 |
-
"metadata": {},
|
17 |
-
"output_type": "execute_result"
|
18 |
-
}
|
19 |
-
],
|
20 |
-
"source": [
|
21 |
-
"import os\n",
|
22 |
-
"from dotenv import load_dotenv\n",
|
23 |
-
"\n",
|
24 |
-
"load_dotenv()"
|
25 |
-
]
|
26 |
-
},
|
27 |
-
{
|
28 |
-
"cell_type": "code",
|
29 |
-
"execution_count": 11,
|
30 |
-
"id": "7510ab87",
|
31 |
-
"metadata": {},
|
32 |
-
"outputs": [
|
33 |
-
{
|
34 |
-
"data": {
|
35 |
-
"text/plain": [
|
36 |
-
"DatasetDict({\n",
|
37 |
-
" train: Dataset({\n",
|
38 |
-
" features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
|
39 |
-
" num_rows: 153725\n",
|
40 |
-
" })\n",
|
41 |
-
" test: Dataset({\n",
|
42 |
-
" features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
|
43 |
-
" num_rows: 12467\n",
|
44 |
-
" })\n",
|
45 |
-
"})"
|
46 |
-
]
|
47 |
-
},
|
48 |
-
"execution_count": 11,
|
49 |
-
"metadata": {},
|
50 |
-
"output_type": "execute_result"
|
51 |
-
}
|
52 |
-
],
|
53 |
-
"source": [
|
54 |
-
"from datasets import load_dataset\n",
|
55 |
-
"\n",
|
56 |
-
"dataset = load_dataset(\"zhengxuanzenwu/ms-macro-wellformed_only\")\n",
|
57 |
-
"dataset"
|
58 |
-
]
|
59 |
-
},
|
60 |
-
{
|
61 |
-
"cell_type": "code",
|
62 |
-
"execution_count": 12,
|
63 |
-
"id": "1f4f0e76",
|
64 |
-
"metadata": {},
|
65 |
-
"outputs": [
|
66 |
-
{
|
67 |
-
"data": {
|
68 |
-
"text/html": [
|
69 |
-
"<div>\n",
|
70 |
-
"<style scoped>\n",
|
71 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
72 |
-
" vertical-align: middle;\n",
|
73 |
-
" }\n",
|
74 |
-
"\n",
|
75 |
-
" .dataframe tbody tr th {\n",
|
76 |
-
" vertical-align: top;\n",
|
77 |
-
" }\n",
|
78 |
-
"\n",
|
79 |
-
" .dataframe thead th {\n",
|
80 |
-
" text-align: right;\n",
|
81 |
-
" }\n",
|
82 |
-
"</style>\n",
|
83 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
84 |
-
" <thead>\n",
|
85 |
-
" <tr style=\"text-align: right;\">\n",
|
86 |
-
" <th></th>\n",
|
87 |
-
" <th>answers</th>\n",
|
88 |
-
" <th>passages</th>\n",
|
89 |
-
" <th>query</th>\n",
|
90 |
-
" <th>query_id</th>\n",
|
91 |
-
" <th>query_type</th>\n",
|
92 |
-
" <th>wellFormedAnswers</th>\n",
|
93 |
-
" </tr>\n",
|
94 |
-
" </thead>\n",
|
95 |
-
" <tbody>\n",
|
96 |
-
" <tr>\n",
|
97 |
-
" <th>0</th>\n",
|
98 |
-
" <td>[2,662]</td>\n",
|
99 |
-
" <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
|
100 |
-
" <td>albany mn population</td>\n",
|
101 |
-
" <td>15177</td>\n",
|
102 |
-
" <td>NUMERIC</td>\n",
|
103 |
-
" <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
|
104 |
-
" </tr>\n",
|
105 |
-
" <tr>\n",
|
106 |
-
" <th>1</th>\n",
|
107 |
-
" <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
|
108 |
-
" <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
|
109 |
-
" <td>current weather in volcano, ca</td>\n",
|
110 |
-
" <td>114414</td>\n",
|
111 |
-
" <td>DESCRIPTION</td>\n",
|
112 |
-
" <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
|
113 |
-
" </tr>\n",
|
114 |
-
" <tr>\n",
|
115 |
-
" <th>2</th>\n",
|
116 |
-
" <td>[Hippocrates]</td>\n",
|
117 |
-
" <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
|
118 |
-
" <td>____________________ is considered the father ...</td>\n",
|
119 |
-
" <td>9083</td>\n",
|
120 |
-
" <td>DESCRIPTION</td>\n",
|
121 |
-
" <td>[Hippocrates is considered the father of moder...</td>\n",
|
122 |
-
" </tr>\n",
|
123 |
-
" <tr>\n",
|
124 |
-
" <th>3</th>\n",
|
125 |
-
" <td>[120 days from the date of the Note.]</td>\n",
|
126 |
-
" <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
|
127 |
-
" <td>how many days is an appraisal good for a fanni...</td>\n",
|
128 |
-
" <td>281439</td>\n",
|
129 |
-
" <td>NUMERIC</td>\n",
|
130 |
-
" <td>[An appraisal is good for 120 days from the da...</td>\n",
|
131 |
-
" </tr>\n",
|
132 |
-
" <tr>\n",
|
133 |
-
" <th>4</th>\n",
|
134 |
-
" <td>[From $26,000 to $39,000 a year]</td>\n",
|
135 |
-
" <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
|
136 |
-
" <td>average pharmacy tech salary</td>\n",
|
137 |
-
" <td>40287</td>\n",
|
138 |
-
" <td>NUMERIC</td>\n",
|
139 |
-
" <td>[The average salary for a pharmacy technician ...</td>\n",
|
140 |
-
" </tr>\n",
|
141 |
-
" </tbody>\n",
|
142 |
-
"</table>\n",
|
143 |
-
"</div>"
|
144 |
-
],
|
145 |
-
"text/plain": [
|
146 |
-
" answers \\\n",
|
147 |
-
"0 [2,662] \n",
|
148 |
-
"1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
|
149 |
-
"2 [Hippocrates] \n",
|
150 |
-
"3 [120 days from the date of the Note.] \n",
|
151 |
-
"4 [From $26,000 to $39,000 a year] \n",
|
152 |
-
"\n",
|
153 |
-
" passages \\\n",
|
154 |
-
"0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
|
155 |
-
"1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
|
156 |
-
"2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
|
157 |
-
"3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
|
158 |
-
"4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
|
159 |
-
"\n",
|
160 |
-
" query query_id query_type \\\n",
|
161 |
-
"0 albany mn population 15177 NUMERIC \n",
|
162 |
-
"1 current weather in volcano, ca 114414 DESCRIPTION \n",
|
163 |
-
"2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
|
164 |
-
"3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
|
165 |
-
"4 average pharmacy tech salary 40287 NUMERIC \n",
|
166 |
-
"\n",
|
167 |
-
" wellFormedAnswers \n",
|
168 |
-
"0 [The population of Albany, Minnesota is 2,662. ] \n",
|
169 |
-
"1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
|
170 |
-
"2 [Hippocrates is considered the father of moder... \n",
|
171 |
-
"3 [An appraisal is good for 120 days from the da... \n",
|
172 |
-
"4 [The average salary for a pharmacy technician ... "
|
173 |
-
]
|
174 |
-
},
|
175 |
-
"execution_count": 12,
|
176 |
-
"metadata": {},
|
177 |
-
"output_type": "execute_result"
|
178 |
-
}
|
179 |
-
],
|
180 |
-
"source": [
|
181 |
-
"df = dataset[\"test\"].to_pandas()\n",
|
182 |
-
"df.head()"
|
183 |
-
]
|
184 |
-
},
|
185 |
-
{
|
186 |
-
"cell_type": "code",
|
187 |
-
"execution_count": 15,
|
188 |
-
"id": "3e9b4cef",
|
189 |
-
"metadata": {},
|
190 |
-
"outputs": [
|
191 |
-
{
|
192 |
-
"data": {
|
193 |
-
"text/plain": [
|
194 |
-
"{'answers': ['2,662'],\n",
|
195 |
-
" 'passages': {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0],\n",
|
196 |
-
" 'passage_text': ['City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.',\n",
|
197 |
-
" 'Place of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.',\n",
|
198 |
-
" 'For the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.',\n",
|
199 |
-
" 'Albany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.',\n",
|
200 |
-
" 'Sponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.',\n",
|
201 |
-
" 'Recent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).',\n",
|
202 |
-
" \"For population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\",\n",
|
203 |
-
" \"For population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\"],\n",
|
204 |
-
" 'url': ['http://zipcode.org/city/MN/ALBANY',\n",
|
205 |
-
" 'http://www.city-data.com/zips/56307.html',\n",
|
206 |
-
" 'https://en.wikipedia.org/wiki/Albany,_Minnesota',\n",
|
207 |
-
" 'http://ci.albany.mn.us/index.asp?SEC=A8341FEC-6B8C-47D2-926B-75A89ED4C539&Type=B_BASIC',\n",
|
208 |
-
" 'https://www.mapquest.com/us/mn/albany-282023394',\n",
|
209 |
-
" 'http://www.city-data.com/city/Albany-Minnesota.html',\n",
|
210 |
-
" 'http://www.city-data.com/zips/56307.html',\n",
|
211 |
-
" 'http://www.city-data.com/city/Albany-Minnesota.html']},\n",
|
212 |
-
" 'query': 'albany mn population',\n",
|
213 |
-
" 'query_id': 15177,\n",
|
214 |
-
" 'query_type': 'NUMERIC',\n",
|
215 |
-
" 'wellFormedAnswers': ['The population of Albany, Minnesota is 2,662. ']}"
|
216 |
-
]
|
217 |
-
},
|
218 |
-
"execution_count": 15,
|
219 |
-
"metadata": {},
|
220 |
-
"output_type": "execute_result"
|
221 |
-
}
|
222 |
-
],
|
223 |
-
"source": [
|
224 |
-
"test = dataset[\"test\"]\n",
|
225 |
-
"test[0]"
|
226 |
-
]
|
227 |
-
},
|
228 |
-
{
|
229 |
-
"cell_type": "code",
|
230 |
-
"execution_count": 24,
|
231 |
-
"id": "104dfbea",
|
232 |
-
"metadata": {},
|
233 |
-
"outputs": [
|
234 |
-
{
|
235 |
-
"data": {
|
236 |
-
"text/plain": [
|
237 |
-
"12467"
|
238 |
-
]
|
239 |
-
},
|
240 |
-
"execution_count": 24,
|
241 |
-
"metadata": {},
|
242 |
-
"output_type": "execute_result"
|
243 |
-
}
|
244 |
-
],
|
245 |
-
"source": [
|
246 |
-
"test.num_rows"
|
247 |
-
]
|
248 |
-
},
|
249 |
-
{
|
250 |
-
"cell_type": "code",
|
251 |
-
"execution_count": 35,
|
252 |
-
"id": "01b3a886",
|
253 |
-
"metadata": {},
|
254 |
-
"outputs": [
|
255 |
-
{
|
256 |
-
"data": {
|
257 |
-
"text/plain": [
|
258 |
-
"({'NUMERIC': 3685,\n",
|
259 |
-
" 'DESCRIPTION': 5487,\n",
|
260 |
-
" 'ENTITY': 1077,\n",
|
261 |
-
" 'PERSON': 868,\n",
|
262 |
-
" 'LOCATION': 1350},\n",
|
263 |
-
" {'NUMERIC': 179,\n",
|
264 |
-
" 'DESCRIPTION': 218,\n",
|
265 |
-
" 'ENTITY': 2403,\n",
|
266 |
-
" 'LOCATION': 2559,\n",
|
267 |
-
" 'PERSON': 3966})"
|
268 |
-
]
|
269 |
-
},
|
270 |
-
"execution_count": 35,
|
271 |
-
"metadata": {},
|
272 |
-
"output_type": "execute_result"
|
273 |
-
}
|
274 |
-
],
|
275 |
-
"source": [
|
276 |
-
"counts = {}\n",
|
277 |
-
"indices = {}\n",
|
278 |
-
"size = 100\n",
|
279 |
-
"for i in range(test.num_rows):\n",
|
280 |
-
" row = test[i]\n",
|
281 |
-
" query_type = row[\"query_type\"]\n",
|
282 |
-
" if query_type in counts:\n",
|
283 |
-
" counts[query_type] += 1\n",
|
284 |
-
" else:\n",
|
285 |
-
" counts[query_type] = 1\n",
|
286 |
-
" if counts[query_type] == size:\n",
|
287 |
-
" indices[query_type] = i\n",
|
288 |
-
"counts, indices"
|
289 |
-
]
|
290 |
-
},
|
291 |
-
{
|
292 |
-
"cell_type": "code",
|
293 |
-
"execution_count": 39,
|
294 |
-
"id": "967bc1cd",
|
295 |
-
"metadata": {},
|
296 |
-
"outputs": [],
|
297 |
-
"source": [
|
298 |
-
"# create new dataset exluding those idx\n",
|
299 |
-
"baseline = test.select(\n",
|
300 |
-
" (i for i in range(len(test)) if i <= indices[test[i][\"query_type\"]])\n",
|
301 |
-
")"
|
302 |
-
]
|
303 |
-
},
|
304 |
-
{
|
305 |
-
"cell_type": "code",
|
306 |
-
"execution_count": 40,
|
307 |
-
"id": "9a5fcad5",
|
308 |
-
"metadata": {},
|
309 |
-
"outputs": [
|
310 |
-
{
|
311 |
-
"data": {
|
312 |
-
"text/plain": [
|
313 |
-
"Dataset({\n",
|
314 |
-
" features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
|
315 |
-
" num_rows: 500\n",
|
316 |
-
"})"
|
317 |
-
]
|
318 |
-
},
|
319 |
-
"execution_count": 40,
|
320 |
-
"metadata": {},
|
321 |
-
"output_type": "execute_result"
|
322 |
-
}
|
323 |
-
],
|
324 |
-
"source": [
|
325 |
-
"baseline"
|
326 |
-
]
|
327 |
-
},
|
328 |
-
{
|
329 |
-
"cell_type": "code",
|
330 |
-
"execution_count": 41,
|
331 |
-
"id": "0524a973",
|
332 |
-
"metadata": {},
|
333 |
-
"outputs": [
|
334 |
-
{
|
335 |
-
"data": {
|
336 |
-
"text/html": [
|
337 |
-
"<div>\n",
|
338 |
-
"<style scoped>\n",
|
339 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
340 |
-
" vertical-align: middle;\n",
|
341 |
-
" }\n",
|
342 |
-
"\n",
|
343 |
-
" .dataframe tbody tr th {\n",
|
344 |
-
" vertical-align: top;\n",
|
345 |
-
" }\n",
|
346 |
-
"\n",
|
347 |
-
" .dataframe thead th {\n",
|
348 |
-
" text-align: right;\n",
|
349 |
-
" }\n",
|
350 |
-
"</style>\n",
|
351 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
352 |
-
" <thead>\n",
|
353 |
-
" <tr style=\"text-align: right;\">\n",
|
354 |
-
" <th></th>\n",
|
355 |
-
" <th>answers</th>\n",
|
356 |
-
" <th>passages</th>\n",
|
357 |
-
" <th>query</th>\n",
|
358 |
-
" <th>query_id</th>\n",
|
359 |
-
" <th>query_type</th>\n",
|
360 |
-
" <th>wellFormedAnswers</th>\n",
|
361 |
-
" </tr>\n",
|
362 |
-
" </thead>\n",
|
363 |
-
" <tbody>\n",
|
364 |
-
" <tr>\n",
|
365 |
-
" <th>0</th>\n",
|
366 |
-
" <td>[2,662]</td>\n",
|
367 |
-
" <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
|
368 |
-
" <td>albany mn population</td>\n",
|
369 |
-
" <td>15177</td>\n",
|
370 |
-
" <td>NUMERIC</td>\n",
|
371 |
-
" <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
|
372 |
-
" </tr>\n",
|
373 |
-
" <tr>\n",
|
374 |
-
" <th>1</th>\n",
|
375 |
-
" <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
|
376 |
-
" <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
|
377 |
-
" <td>current weather in volcano, ca</td>\n",
|
378 |
-
" <td>114414</td>\n",
|
379 |
-
" <td>DESCRIPTION</td>\n",
|
380 |
-
" <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
|
381 |
-
" </tr>\n",
|
382 |
-
" <tr>\n",
|
383 |
-
" <th>2</th>\n",
|
384 |
-
" <td>[Hippocrates]</td>\n",
|
385 |
-
" <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
|
386 |
-
" <td>____________________ is considered the father ...</td>\n",
|
387 |
-
" <td>9083</td>\n",
|
388 |
-
" <td>DESCRIPTION</td>\n",
|
389 |
-
" <td>[Hippocrates is considered the father of moder...</td>\n",
|
390 |
-
" </tr>\n",
|
391 |
-
" <tr>\n",
|
392 |
-
" <th>3</th>\n",
|
393 |
-
" <td>[120 days from the date of the Note.]</td>\n",
|
394 |
-
" <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
|
395 |
-
" <td>how many days is an appraisal good for a fanni...</td>\n",
|
396 |
-
" <td>281439</td>\n",
|
397 |
-
" <td>NUMERIC</td>\n",
|
398 |
-
" <td>[An appraisal is good for 120 days from the da...</td>\n",
|
399 |
-
" </tr>\n",
|
400 |
-
" <tr>\n",
|
401 |
-
" <th>4</th>\n",
|
402 |
-
" <td>[From $26,000 to $39,000 a year]</td>\n",
|
403 |
-
" <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
|
404 |
-
" <td>average pharmacy tech salary</td>\n",
|
405 |
-
" <td>40287</td>\n",
|
406 |
-
" <td>NUMERIC</td>\n",
|
407 |
-
" <td>[The average salary for a pharmacy technician ...</td>\n",
|
408 |
-
" </tr>\n",
|
409 |
-
" <tr>\n",
|
410 |
-
" <th>...</th>\n",
|
411 |
-
" <td>...</td>\n",
|
412 |
-
" <td>...</td>\n",
|
413 |
-
" <td>...</td>\n",
|
414 |
-
" <td>...</td>\n",
|
415 |
-
" <td>...</td>\n",
|
416 |
-
" <td>...</td>\n",
|
417 |
-
" </tr>\n",
|
418 |
-
" <tr>\n",
|
419 |
-
" <th>495</th>\n",
|
420 |
-
" <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
|
421 |
-
" <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
|
422 |
-
" <td>the pool shower company</td>\n",
|
423 |
-
" <td>518269</td>\n",
|
424 |
-
" <td>PERSON</td>\n",
|
425 |
-
" <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
|
426 |
-
" </tr>\n",
|
427 |
-
" <tr>\n",
|
428 |
-
" <th>496</th>\n",
|
429 |
-
" <td>[Hanson]</td>\n",
|
430 |
-
" <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
|
431 |
-
" <td>longest tenured american football players</td>\n",
|
432 |
-
" <td>442806</td>\n",
|
433 |
-
" <td>PERSON</td>\n",
|
434 |
-
" <td>[Hanson is the longest tenured American footba...</td>\n",
|
435 |
-
" </tr>\n",
|
436 |
-
" <tr>\n",
|
437 |
-
" <th>497</th>\n",
|
438 |
-
" <td>[Mount Able Baptist Church is located at the a...</td>\n",
|
439 |
-
" <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
|
440 |
-
" <td>mt. view baptist in pendleton sc</td>\n",
|
441 |
-
" <td>460250</td>\n",
|
442 |
-
" <td>PERSON</td>\n",
|
443 |
-
" <td>[Mount Able Baptist Church is located at the a...</td>\n",
|
444 |
-
" </tr>\n",
|
445 |
-
" <tr>\n",
|
446 |
-
" <th>498</th>\n",
|
447 |
-
" <td>[Honeysuckle Weeks]</td>\n",
|
448 |
-
" <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
|
449 |
-
" <td>what actress disappeared for a while</td>\n",
|
450 |
-
" <td>549739</td>\n",
|
451 |
-
" <td>PERSON</td>\n",
|
452 |
-
" <td>[The actress disappeared for a while Honeysuck...</td>\n",
|
453 |
-
" </tr>\n",
|
454 |
-
" <tr>\n",
|
455 |
-
" <th>499</th>\n",
|
456 |
-
" <td>[African-Nguni]</td>\n",
|
457 |
-
" <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
|
458 |
-
" <td>what ethnicity is the surname sabol</td>\n",
|
459 |
-
" <td>658265</td>\n",
|
460 |
-
" <td>PERSON</td>\n",
|
461 |
-
" <td>[The ethnicity of the surname Sabol is African...</td>\n",
|
462 |
-
" </tr>\n",
|
463 |
-
" </tbody>\n",
|
464 |
-
"</table>\n",
|
465 |
-
"<p>500 rows × 6 columns</p>\n",
|
466 |
-
"</div>"
|
467 |
-
],
|
468 |
-
"text/plain": [
|
469 |
-
" answers \\\n",
|
470 |
-
"0 [2,662] \n",
|
471 |
-
"1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
|
472 |
-
"2 [Hippocrates] \n",
|
473 |
-
"3 [120 days from the date of the Note.] \n",
|
474 |
-
"4 [From $26,000 to $39,000 a year] \n",
|
475 |
-
".. ... \n",
|
476 |
-
"495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
|
477 |
-
"496 [Hanson] \n",
|
478 |
-
"497 [Mount Able Baptist Church is located at the a... \n",
|
479 |
-
"498 [Honeysuckle Weeks] \n",
|
480 |
-
"499 [African-Nguni] \n",
|
481 |
-
"\n",
|
482 |
-
" passages \\\n",
|
483 |
-
"0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
|
484 |
-
"1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
|
485 |
-
"2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
|
486 |
-
"3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
|
487 |
-
"4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
|
488 |
-
".. ... \n",
|
489 |
-
"495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
|
490 |
-
"496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
|
491 |
-
"497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
|
492 |
-
"498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
|
493 |
-
"499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
|
494 |
-
"\n",
|
495 |
-
" query query_id query_type \\\n",
|
496 |
-
"0 albany mn population 15177 NUMERIC \n",
|
497 |
-
"1 current weather in volcano, ca 114414 DESCRIPTION \n",
|
498 |
-
"2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
|
499 |
-
"3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
|
500 |
-
"4 average pharmacy tech salary 40287 NUMERIC \n",
|
501 |
-
".. ... ... ... \n",
|
502 |
-
"495 the pool shower company 518269 PERSON \n",
|
503 |
-
"496 longest tenured american football players 442806 PERSON \n",
|
504 |
-
"497 mt. view baptist in pendleton sc 460250 PERSON \n",
|
505 |
-
"498 what actress disappeared for a while 549739 PERSON \n",
|
506 |
-
"499 what ethnicity is the surname sabol 658265 PERSON \n",
|
507 |
-
"\n",
|
508 |
-
" wellFormedAnswers \n",
|
509 |
-
"0 [The population of Albany, Minnesota is 2,662. ] \n",
|
510 |
-
"1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
|
511 |
-
"2 [Hippocrates is considered the father of moder... \n",
|
512 |
-
"3 [An appraisal is good for 120 days from the da... \n",
|
513 |
-
"4 [The average salary for a pharmacy technician ... \n",
|
514 |
-
".. ... \n",
|
515 |
-
"495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
|
516 |
-
"496 [Hanson is the longest tenured American footba... \n",
|
517 |
-
"497 [Mount Able Baptist Church is located at the a... \n",
|
518 |
-
"498 [The actress disappeared for a while Honeysuck... \n",
|
519 |
-
"499 [The ethnicity of the surname Sabol is African... \n",
|
520 |
-
"\n",
|
521 |
-
"[500 rows x 6 columns]"
|
522 |
-
]
|
523 |
-
},
|
524 |
-
"execution_count": 41,
|
525 |
-
"metadata": {},
|
526 |
-
"output_type": "execute_result"
|
527 |
-
}
|
528 |
-
],
|
529 |
-
"source": [
|
530 |
-
"baseline.to_pandas()"
|
531 |
-
]
|
532 |
-
},
|
533 |
-
{
|
534 |
-
"cell_type": "code",
|
535 |
-
"execution_count": 42,
|
536 |
-
"id": "57a195e0",
|
537 |
-
"metadata": {},
|
538 |
-
"outputs": [
|
539 |
-
{
|
540 |
-
"data": {
|
541 |
-
"application/vnd.jupyter.widget-view+json": {
|
542 |
-
"model_id": "66abd394cb054cf1b7459e92d4763d02",
|
543 |
-
"version_major": 2,
|
544 |
-
"version_minor": 0
|
545 |
-
},
|
546 |
-
"text/plain": [
|
547 |
-
"Saving the dataset (0/1 shards): 0%| | 0/500 [00:00<?, ? examples/s]"
|
548 |
-
]
|
549 |
-
},
|
550 |
-
"metadata": {},
|
551 |
-
"output_type": "display_data"
|
552 |
-
}
|
553 |
-
],
|
554 |
-
"source": [
|
555 |
-
"baseline.save_to_disk(\"../data/datasets/ms_macro/\")"
|
556 |
-
]
|
557 |
-
},
|
558 |
-
{
|
559 |
-
"cell_type": "code",
|
560 |
-
"execution_count": 44,
|
561 |
-
"id": "b72bf3f9",
|
562 |
-
"metadata": {},
|
563 |
-
"outputs": [
|
564 |
-
{
|
565 |
-
"data": {
|
566 |
-
"text/plain": [
|
567 |
-
"Dataset({\n",
|
568 |
-
" features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
|
569 |
-
" num_rows: 500\n",
|
570 |
-
"})"
|
571 |
-
]
|
572 |
-
},
|
573 |
-
"execution_count": 44,
|
574 |
-
"metadata": {},
|
575 |
-
"output_type": "execute_result"
|
576 |
-
}
|
577 |
-
],
|
578 |
-
"source": [
|
579 |
-
"from datasets import load_from_disk\n",
|
580 |
-
"\n",
|
581 |
-
"new_ds = load_from_disk(\"../data/datasets/ms_macro/\")\n",
|
582 |
-
"new_ds"
|
583 |
-
]
|
584 |
-
},
|
585 |
-
{
|
586 |
-
"cell_type": "code",
|
587 |
-
"execution_count": 45,
|
588 |
-
"id": "051bd771",
|
589 |
-
"metadata": {},
|
590 |
-
"outputs": [
|
591 |
-
{
|
592 |
-
"data": {
|
593 |
-
"text/plain": [
|
594 |
-
"({'NUMERIC': 100,\n",
|
595 |
-
" 'DESCRIPTION': 100,\n",
|
596 |
-
" 'ENTITY': 100,\n",
|
597 |
-
" 'PERSON': 100,\n",
|
598 |
-
" 'LOCATION': 100},\n",
|
599 |
-
" {'NUMERIC': 179,\n",
|
600 |
-
" 'DESCRIPTION': 215,\n",
|
601 |
-
" 'ENTITY': 443,\n",
|
602 |
-
" 'LOCATION': 461,\n",
|
603 |
-
" 'PERSON': 499})"
|
604 |
-
]
|
605 |
-
},
|
606 |
-
"execution_count": 45,
|
607 |
-
"metadata": {},
|
608 |
-
"output_type": "execute_result"
|
609 |
-
}
|
610 |
-
],
|
611 |
-
"source": [
|
612 |
-
"counts = {}\n",
|
613 |
-
"indices = {}\n",
|
614 |
-
"size = 100\n",
|
615 |
-
"for i in range(new_ds.num_rows):\n",
|
616 |
-
" row = new_ds[i]\n",
|
617 |
-
" query_type = row[\"query_type\"]\n",
|
618 |
-
" if query_type in counts:\n",
|
619 |
-
" counts[query_type] += 1\n",
|
620 |
-
" else:\n",
|
621 |
-
" counts[query_type] = 1\n",
|
622 |
-
" if counts[query_type] == size:\n",
|
623 |
-
" indices[query_type] = i\n",
|
624 |
-
"counts, indices"
|
625 |
-
]
|
626 |
-
},
|
627 |
-
{
|
628 |
-
"cell_type": "code",
|
629 |
-
"execution_count": 46,
|
630 |
-
"id": "db48dcc4",
|
631 |
-
"metadata": {},
|
632 |
-
"outputs": [
|
633 |
-
{
|
634 |
-
"data": {
|
635 |
-
"text/html": [
|
636 |
-
"<div>\n",
|
637 |
-
"<style scoped>\n",
|
638 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
639 |
-
" vertical-align: middle;\n",
|
640 |
-
" }\n",
|
641 |
-
"\n",
|
642 |
-
" .dataframe tbody tr th {\n",
|
643 |
-
" vertical-align: top;\n",
|
644 |
-
" }\n",
|
645 |
-
"\n",
|
646 |
-
" .dataframe thead th {\n",
|
647 |
-
" text-align: right;\n",
|
648 |
-
" }\n",
|
649 |
-
"</style>\n",
|
650 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
651 |
-
" <thead>\n",
|
652 |
-
" <tr style=\"text-align: right;\">\n",
|
653 |
-
" <th></th>\n",
|
654 |
-
" <th>answers</th>\n",
|
655 |
-
" <th>passages</th>\n",
|
656 |
-
" <th>query</th>\n",
|
657 |
-
" <th>query_id</th>\n",
|
658 |
-
" <th>query_type</th>\n",
|
659 |
-
" <th>wellFormedAnswers</th>\n",
|
660 |
-
" </tr>\n",
|
661 |
-
" </thead>\n",
|
662 |
-
" <tbody>\n",
|
663 |
-
" <tr>\n",
|
664 |
-
" <th>0</th>\n",
|
665 |
-
" <td>[2,662]</td>\n",
|
666 |
-
" <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
|
667 |
-
" <td>albany mn population</td>\n",
|
668 |
-
" <td>15177</td>\n",
|
669 |
-
" <td>NUMERIC</td>\n",
|
670 |
-
" <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
|
671 |
-
" </tr>\n",
|
672 |
-
" <tr>\n",
|
673 |
-
" <th>1</th>\n",
|
674 |
-
" <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
|
675 |
-
" <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
|
676 |
-
" <td>current weather in volcano, ca</td>\n",
|
677 |
-
" <td>114414</td>\n",
|
678 |
-
" <td>DESCRIPTION</td>\n",
|
679 |
-
" <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
|
680 |
-
" </tr>\n",
|
681 |
-
" <tr>\n",
|
682 |
-
" <th>2</th>\n",
|
683 |
-
" <td>[Hippocrates]</td>\n",
|
684 |
-
" <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
|
685 |
-
" <td>____________________ is considered the father ...</td>\n",
|
686 |
-
" <td>9083</td>\n",
|
687 |
-
" <td>DESCRIPTION</td>\n",
|
688 |
-
" <td>[Hippocrates is considered the father of moder...</td>\n",
|
689 |
-
" </tr>\n",
|
690 |
-
" <tr>\n",
|
691 |
-
" <th>3</th>\n",
|
692 |
-
" <td>[120 days from the date of the Note.]</td>\n",
|
693 |
-
" <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
|
694 |
-
" <td>how many days is an appraisal good for a fanni...</td>\n",
|
695 |
-
" <td>281439</td>\n",
|
696 |
-
" <td>NUMERIC</td>\n",
|
697 |
-
" <td>[An appraisal is good for 120 days from the da...</td>\n",
|
698 |
-
" </tr>\n",
|
699 |
-
" <tr>\n",
|
700 |
-
" <th>4</th>\n",
|
701 |
-
" <td>[From $26,000 to $39,000 a year]</td>\n",
|
702 |
-
" <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
|
703 |
-
" <td>average pharmacy tech salary</td>\n",
|
704 |
-
" <td>40287</td>\n",
|
705 |
-
" <td>NUMERIC</td>\n",
|
706 |
-
" <td>[The average salary for a pharmacy technician ...</td>\n",
|
707 |
-
" </tr>\n",
|
708 |
-
" <tr>\n",
|
709 |
-
" <th>...</th>\n",
|
710 |
-
" <td>...</td>\n",
|
711 |
-
" <td>...</td>\n",
|
712 |
-
" <td>...</td>\n",
|
713 |
-
" <td>...</td>\n",
|
714 |
-
" <td>...</td>\n",
|
715 |
-
" <td>...</td>\n",
|
716 |
-
" </tr>\n",
|
717 |
-
" <tr>\n",
|
718 |
-
" <th>495</th>\n",
|
719 |
-
" <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
|
720 |
-
" <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
|
721 |
-
" <td>the pool shower company</td>\n",
|
722 |
-
" <td>518269</td>\n",
|
723 |
-
" <td>PERSON</td>\n",
|
724 |
-
" <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
|
725 |
-
" </tr>\n",
|
726 |
-
" <tr>\n",
|
727 |
-
" <th>496</th>\n",
|
728 |
-
" <td>[Hanson]</td>\n",
|
729 |
-
" <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
|
730 |
-
" <td>longest tenured american football players</td>\n",
|
731 |
-
" <td>442806</td>\n",
|
732 |
-
" <td>PERSON</td>\n",
|
733 |
-
" <td>[Hanson is the longest tenured American footba...</td>\n",
|
734 |
-
" </tr>\n",
|
735 |
-
" <tr>\n",
|
736 |
-
" <th>497</th>\n",
|
737 |
-
" <td>[Mount Able Baptist Church is located at the a...</td>\n",
|
738 |
-
" <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
|
739 |
-
" <td>mt. view baptist in pendleton sc</td>\n",
|
740 |
-
" <td>460250</td>\n",
|
741 |
-
" <td>PERSON</td>\n",
|
742 |
-
" <td>[Mount Able Baptist Church is located at the a...</td>\n",
|
743 |
-
" </tr>\n",
|
744 |
-
" <tr>\n",
|
745 |
-
" <th>498</th>\n",
|
746 |
-
" <td>[Honeysuckle Weeks]</td>\n",
|
747 |
-
" <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
|
748 |
-
" <td>what actress disappeared for a while</td>\n",
|
749 |
-
" <td>549739</td>\n",
|
750 |
-
" <td>PERSON</td>\n",
|
751 |
-
" <td>[The actress disappeared for a while Honeysuck...</td>\n",
|
752 |
-
" </tr>\n",
|
753 |
-
" <tr>\n",
|
754 |
-
" <th>499</th>\n",
|
755 |
-
" <td>[African-Nguni]</td>\n",
|
756 |
-
" <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
|
757 |
-
" <td>what ethnicity is the surname sabol</td>\n",
|
758 |
-
" <td>658265</td>\n",
|
759 |
-
" <td>PERSON</td>\n",
|
760 |
-
" <td>[The ethnicity of the surname Sabol is African...</td>\n",
|
761 |
-
" </tr>\n",
|
762 |
-
" </tbody>\n",
|
763 |
-
"</table>\n",
|
764 |
-
"<p>500 rows × 6 columns</p>\n",
|
765 |
-
"</div>"
|
766 |
-
],
|
767 |
-
"text/plain": [
|
768 |
-
" answers \\\n",
|
769 |
-
"0 [2,662] \n",
|
770 |
-
"1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
|
771 |
-
"2 [Hippocrates] \n",
|
772 |
-
"3 [120 days from the date of the Note.] \n",
|
773 |
-
"4 [From $26,000 to $39,000 a year] \n",
|
774 |
-
".. ... \n",
|
775 |
-
"495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
|
776 |
-
"496 [Hanson] \n",
|
777 |
-
"497 [Mount Able Baptist Church is located at the a... \n",
|
778 |
-
"498 [Honeysuckle Weeks] \n",
|
779 |
-
"499 [African-Nguni] \n",
|
780 |
-
"\n",
|
781 |
-
" passages \\\n",
|
782 |
-
"0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
|
783 |
-
"1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
|
784 |
-
"2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
|
785 |
-
"3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
|
786 |
-
"4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
|
787 |
-
".. ... \n",
|
788 |
-
"495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
|
789 |
-
"496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
|
790 |
-
"497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
|
791 |
-
"498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
|
792 |
-
"499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
|
793 |
-
"\n",
|
794 |
-
" query query_id query_type \\\n",
|
795 |
-
"0 albany mn population 15177 NUMERIC \n",
|
796 |
-
"1 current weather in volcano, ca 114414 DESCRIPTION \n",
|
797 |
-
"2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
|
798 |
-
"3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
|
799 |
-
"4 average pharmacy tech salary 40287 NUMERIC \n",
|
800 |
-
".. ... ... ... \n",
|
801 |
-
"495 the pool shower company 518269 PERSON \n",
|
802 |
-
"496 longest tenured american football players 442806 PERSON \n",
|
803 |
-
"497 mt. view baptist in pendleton sc 460250 PERSON \n",
|
804 |
-
"498 what actress disappeared for a while 549739 PERSON \n",
|
805 |
-
"499 what ethnicity is the surname sabol 658265 PERSON \n",
|
806 |
-
"\n",
|
807 |
-
" wellFormedAnswers \n",
|
808 |
-
"0 [The population of Albany, Minnesota is 2,662. ] \n",
|
809 |
-
"1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
|
810 |
-
"2 [Hippocrates is considered the father of moder... \n",
|
811 |
-
"3 [An appraisal is good for 120 days from the da... \n",
|
812 |
-
"4 [The average salary for a pharmacy technician ... \n",
|
813 |
-
".. ... \n",
|
814 |
-
"495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
|
815 |
-
"496 [Hanson is the longest tenured American footba... \n",
|
816 |
-
"497 [Mount Able Baptist Church is located at the a... \n",
|
817 |
-
"498 [The actress disappeared for a while Honeysuck... \n",
|
818 |
-
"499 [The ethnicity of the surname Sabol is African... \n",
|
819 |
-
"\n",
|
820 |
-
"[500 rows x 6 columns]"
|
821 |
-
]
|
822 |
-
},
|
823 |
-
"execution_count": 46,
|
824 |
-
"metadata": {},
|
825 |
-
"output_type": "execute_result"
|
826 |
-
}
|
827 |
-
],
|
828 |
-
"source": [
|
829 |
-
"new_ds.to_pandas()"
|
830 |
-
]
|
831 |
-
},
|
832 |
-
{
|
833 |
-
"cell_type": "code",
|
834 |
-
"execution_count": 47,
|
835 |
-
"id": "7ed0c22d",
|
836 |
-
"metadata": {},
|
837 |
-
"outputs": [],
|
838 |
-
"source": [
|
839 |
-
"\"\"\"\n",
|
840 |
-
"Official evaluation script for QAConv, modified from SQuAD 2.0.\n",
|
841 |
-
"\n",
|
842 |
-
" * Copyright (c) 2021, salesforce.com, inc.\n",
|
843 |
-
" * All rights reserved.\n",
|
844 |
-
" * SPDX-License-Identifier: BSD-3-Clause\n",
|
845 |
-
" * For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause\n",
|
846 |
-
"\n",
|
847 |
-
"\"\"\"\n",
|
848 |
-
"\n",
|
849 |
-
"import collections\n",
|
850 |
-
"import re\n",
|
851 |
-
"import string\n",
|
852 |
-
"\n",
|
853 |
-
"\n",
|
854 |
-
"def normalize_answer(s):\n",
|
855 |
-
" \"\"\"Lower text and remove punctuation, articles and extra whitespace.\"\"\"\n",
|
856 |
-
"\n",
|
857 |
-
" def remove_articles(text):\n",
|
858 |
-
" regex = re.compile(r\"\\b(a|an|the)\\b\", re.UNICODE)\n",
|
859 |
-
" return re.sub(regex, \" \", text)\n",
|
860 |
-
"\n",
|
861 |
-
" def white_space_fix(text):\n",
|
862 |
-
" return \" \".join(text.split())\n",
|
863 |
-
"\n",
|
864 |
-
" def remove_punc(text):\n",
|
865 |
-
" exclude = set(string.punctuation)\n",
|
866 |
-
" return \"\".join(ch for ch in text if ch not in exclude)\n",
|
867 |
-
"\n",
|
868 |
-
" def lower(text):\n",
|
869 |
-
" return text.lower()\n",
|
870 |
-
"\n",
|
871 |
-
" return white_space_fix(remove_articles(remove_punc(lower(s))))\n",
|
872 |
-
"\n",
|
873 |
-
"\n",
|
874 |
-
"def get_tokens(s):\n",
|
875 |
-
" if not s:\n",
|
876 |
-
" return []\n",
|
877 |
-
" return normalize_answer(s).split()\n",
|
878 |
-
"\n",
|
879 |
-
"\n",
|
880 |
-
"def compute_exact(a_gold, a_pred):\n",
|
881 |
-
" return int(normalize_answer(a_gold) == normalize_answer(a_pred))\n",
|
882 |
-
"\n",
|
883 |
-
"\n",
|
884 |
-
"def compute_f1(a_gold, a_pred):\n",
|
885 |
-
" gold_toks = get_tokens(a_gold)\n",
|
886 |
-
" pred_toks = get_tokens(a_pred)\n",
|
887 |
-
" common = collections.Counter(gold_toks) & collections.Counter(pred_toks)\n",
|
888 |
-
" num_same = sum(common.values())\n",
|
889 |
-
" if len(gold_toks) == 0 or len(pred_toks) == 0:\n",
|
890 |
-
" # If either is no-answer, then F1 is 1 if they agree, 0 otherwise\n",
|
891 |
-
" return int(gold_toks == pred_toks)\n",
|
892 |
-
" if num_same == 0:\n",
|
893 |
-
" return 0\n",
|
894 |
-
" precision = 1.0 * num_same / len(pred_toks)\n",
|
895 |
-
" recall = 1.0 * num_same / len(gold_toks)\n",
|
896 |
-
" f1 = (2 * precision * recall) / (precision + recall)\n",
|
897 |
-
" return f1"
|
898 |
-
]
|
899 |
-
},
|
900 |
-
{
|
901 |
-
"cell_type": "code",
|
902 |
-
"execution_count": 49,
|
903 |
-
"id": "d9ff4756",
|
904 |
-
"metadata": {},
|
905 |
-
"outputs": [
|
906 |
-
{
|
907 |
-
"data": {
|
908 |
-
"application/vnd.jupyter.widget-view+json": {
|
909 |
-
"model_id": "d8a8d425f60a467eb56f6a13a50ed94b",
|
910 |
-
"version_major": 2,
|
911 |
-
"version_minor": 0
|
912 |
-
},
|
913 |
-
"text/plain": [
|
914 |
-
"Map: 0%| | 0/500 [00:00<?, ? examples/s]"
|
915 |
-
]
|
916 |
-
},
|
917 |
-
"metadata": {},
|
918 |
-
"output_type": "display_data"
|
919 |
-
},
|
920 |
-
{
|
921 |
-
"data": {
|
922 |
-
"text/plain": [
|
923 |
-
"Dataset({\n",
|
924 |
-
" features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers', 'EM', 'F1'],\n",
|
925 |
-
" num_rows: 500\n",
|
926 |
-
"})"
|
927 |
-
]
|
928 |
-
},
|
929 |
-
"execution_count": 49,
|
930 |
-
"metadata": {},
|
931 |
-
"output_type": "execute_result"
|
932 |
-
}
|
933 |
-
],
|
934 |
-
"source": [
|
935 |
-
"result_all = new_ds.map(\n",
|
936 |
-
" lambda record, idx: {\n",
|
937 |
-
" \"EM\": compute_exact(record[\"wellFormedAnswers\"][0], record[\"answers\"][0]),\n",
|
938 |
-
" \"F1\": compute_f1(record[\"wellFormedAnswers\"][0], record[\"answers\"][0]),\n",
|
939 |
-
" },\n",
|
940 |
-
" batched=False,\n",
|
941 |
-
" with_indices=True,\n",
|
942 |
-
")\n",
|
943 |
-
"result_all"
|
944 |
-
]
|
945 |
-
},
|
946 |
-
{
|
947 |
-
"cell_type": "code",
|
948 |
-
"execution_count": 50,
|
949 |
-
"id": "31402fb2",
|
950 |
-
"metadata": {},
|
951 |
-
"outputs": [
|
952 |
-
{
|
953 |
-
"data": {
|
954 |
-
"text/html": [
|
955 |
-
"<div>\n",
|
956 |
-
"<style scoped>\n",
|
957 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
958 |
-
" vertical-align: middle;\n",
|
959 |
-
" }\n",
|
960 |
-
"\n",
|
961 |
-
" .dataframe tbody tr th {\n",
|
962 |
-
" vertical-align: top;\n",
|
963 |
-
" }\n",
|
964 |
-
"\n",
|
965 |
-
" .dataframe thead th {\n",
|
966 |
-
" text-align: right;\n",
|
967 |
-
" }\n",
|
968 |
-
"</style>\n",
|
969 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
970 |
-
" <thead>\n",
|
971 |
-
" <tr style=\"text-align: right;\">\n",
|
972 |
-
" <th></th>\n",
|
973 |
-
" <th>answers</th>\n",
|
974 |
-
" <th>passages</th>\n",
|
975 |
-
" <th>query</th>\n",
|
976 |
-
" <th>query_id</th>\n",
|
977 |
-
" <th>query_type</th>\n",
|
978 |
-
" <th>wellFormedAnswers</th>\n",
|
979 |
-
" <th>EM</th>\n",
|
980 |
-
" <th>F1</th>\n",
|
981 |
-
" </tr>\n",
|
982 |
-
" </thead>\n",
|
983 |
-
" <tbody>\n",
|
984 |
-
" <tr>\n",
|
985 |
-
" <th>0</th>\n",
|
986 |
-
" <td>[2,662]</td>\n",
|
987 |
-
" <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
|
988 |
-
" <td>albany mn population</td>\n",
|
989 |
-
" <td>15177</td>\n",
|
990 |
-
" <td>NUMERIC</td>\n",
|
991 |
-
" <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
|
992 |
-
" <td>0</td>\n",
|
993 |
-
" <td>0.285714</td>\n",
|
994 |
-
" </tr>\n",
|
995 |
-
" <tr>\n",
|
996 |
-
" <th>1</th>\n",
|
997 |
-
" <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
|
998 |
-
" <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
|
999 |
-
" <td>current weather in volcano, ca</td>\n",
|
1000 |
-
" <td>114414</td>\n",
|
1001 |
-
" <td>DESCRIPTION</td>\n",
|
1002 |
-
" <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
|
1003 |
-
" <td>1</td>\n",
|
1004 |
-
" <td>1.000000</td>\n",
|
1005 |
-
" </tr>\n",
|
1006 |
-
" <tr>\n",
|
1007 |
-
" <th>2</th>\n",
|
1008 |
-
" <td>[Hippocrates]</td>\n",
|
1009 |
-
" <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
|
1010 |
-
" <td>____________________ is considered the father ...</td>\n",
|
1011 |
-
" <td>9083</td>\n",
|
1012 |
-
" <td>DESCRIPTION</td>\n",
|
1013 |
-
" <td>[Hippocrates is considered the father of moder...</td>\n",
|
1014 |
-
" <td>0</td>\n",
|
1015 |
-
" <td>0.250000</td>\n",
|
1016 |
-
" </tr>\n",
|
1017 |
-
" <tr>\n",
|
1018 |
-
" <th>3</th>\n",
|
1019 |
-
" <td>[120 days from the date of the Note.]</td>\n",
|
1020 |
-
" <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
|
1021 |
-
" <td>how many days is an appraisal good for a fanni...</td>\n",
|
1022 |
-
" <td>281439</td>\n",
|
1023 |
-
" <td>NUMERIC</td>\n",
|
1024 |
-
" <td>[An appraisal is good for 120 days from the da...</td>\n",
|
1025 |
-
" <td>0</td>\n",
|
1026 |
-
" <td>0.631579</td>\n",
|
1027 |
-
" </tr>\n",
|
1028 |
-
" <tr>\n",
|
1029 |
-
" <th>4</th>\n",
|
1030 |
-
" <td>[From $26,000 to $39,000 a year]</td>\n",
|
1031 |
-
" <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
|
1032 |
-
" <td>average pharmacy tech salary</td>\n",
|
1033 |
-
" <td>40287</td>\n",
|
1034 |
-
" <td>NUMERIC</td>\n",
|
1035 |
-
" <td>[The average salary for a pharmacy technician ...</td>\n",
|
1036 |
-
" <td>0</td>\n",
|
1037 |
-
" <td>0.500000</td>\n",
|
1038 |
-
" </tr>\n",
|
1039 |
-
" <tr>\n",
|
1040 |
-
" <th>...</th>\n",
|
1041 |
-
" <td>...</td>\n",
|
1042 |
-
" <td>...</td>\n",
|
1043 |
-
" <td>...</td>\n",
|
1044 |
-
" <td>...</td>\n",
|
1045 |
-
" <td>...</td>\n",
|
1046 |
-
" <td>...</td>\n",
|
1047 |
-
" <td>...</td>\n",
|
1048 |
-
" <td>...</td>\n",
|
1049 |
-
" </tr>\n",
|
1050 |
-
" <tr>\n",
|
1051 |
-
" <th>495</th>\n",
|
1052 |
-
" <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
|
1053 |
-
" <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
|
1054 |
-
" <td>the pool shower company</td>\n",
|
1055 |
-
" <td>518269</td>\n",
|
1056 |
-
" <td>PERSON</td>\n",
|
1057 |
-
" <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
|
1058 |
-
" <td>1</td>\n",
|
1059 |
-
" <td>1.000000</td>\n",
|
1060 |
-
" </tr>\n",
|
1061 |
-
" <tr>\n",
|
1062 |
-
" <th>496</th>\n",
|
1063 |
-
" <td>[Hanson]</td>\n",
|
1064 |
-
" <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
|
1065 |
-
" <td>longest tenured american football players</td>\n",
|
1066 |
-
" <td>442806</td>\n",
|
1067 |
-
" <td>PERSON</td>\n",
|
1068 |
-
" <td>[Hanson is the longest tenured American footba...</td>\n",
|
1069 |
-
" <td>0</td>\n",
|
1070 |
-
" <td>0.250000</td>\n",
|
1071 |
-
" </tr>\n",
|
1072 |
-
" <tr>\n",
|
1073 |
-
" <th>497</th>\n",
|
1074 |
-
" <td>[Mount Able Baptist Church is located at the a...</td>\n",
|
1075 |
-
" <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
|
1076 |
-
" <td>mt. view baptist in pendleton sc</td>\n",
|
1077 |
-
" <td>460250</td>\n",
|
1078 |
-
" <td>PERSON</td>\n",
|
1079 |
-
" <td>[Mount Able Baptist Church is located at the a...</td>\n",
|
1080 |
-
" <td>1</td>\n",
|
1081 |
-
" <td>1.000000</td>\n",
|
1082 |
-
" </tr>\n",
|
1083 |
-
" <tr>\n",
|
1084 |
-
" <th>498</th>\n",
|
1085 |
-
" <td>[Honeysuckle Weeks]</td>\n",
|
1086 |
-
" <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
|
1087 |
-
" <td>what actress disappeared for a while</td>\n",
|
1088 |
-
" <td>549739</td>\n",
|
1089 |
-
" <td>PERSON</td>\n",
|
1090 |
-
" <td>[The actress disappeared for a while Honeysuck...</td>\n",
|
1091 |
-
" <td>0</td>\n",
|
1092 |
-
" <td>0.500000</td>\n",
|
1093 |
-
" </tr>\n",
|
1094 |
-
" <tr>\n",
|
1095 |
-
" <th>499</th>\n",
|
1096 |
-
" <td>[African-Nguni]</td>\n",
|
1097 |
-
" <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
|
1098 |
-
" <td>what ethnicity is the surname sabol</td>\n",
|
1099 |
-
" <td>658265</td>\n",
|
1100 |
-
" <td>PERSON</td>\n",
|
1101 |
-
" <td>[The ethnicity of the surname Sabol is African...</td>\n",
|
1102 |
-
" <td>0</td>\n",
|
1103 |
-
" <td>0.285714</td>\n",
|
1104 |
-
" </tr>\n",
|
1105 |
-
" </tbody>\n",
|
1106 |
-
"</table>\n",
|
1107 |
-
"<p>500 rows × 8 columns</p>\n",
|
1108 |
-
"</div>"
|
1109 |
-
],
|
1110 |
-
"text/plain": [
|
1111 |
-
" answers \\\n",
|
1112 |
-
"0 [2,662] \n",
|
1113 |
-
"1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
|
1114 |
-
"2 [Hippocrates] \n",
|
1115 |
-
"3 [120 days from the date of the Note.] \n",
|
1116 |
-
"4 [From $26,000 to $39,000 a year] \n",
|
1117 |
-
".. ... \n",
|
1118 |
-
"495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
|
1119 |
-
"496 [Hanson] \n",
|
1120 |
-
"497 [Mount Able Baptist Church is located at the a... \n",
|
1121 |
-
"498 [Honeysuckle Weeks] \n",
|
1122 |
-
"499 [African-Nguni] \n",
|
1123 |
-
"\n",
|
1124 |
-
" passages \\\n",
|
1125 |
-
"0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
|
1126 |
-
"1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
|
1127 |
-
"2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
|
1128 |
-
"3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
|
1129 |
-
"4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
|
1130 |
-
".. ... \n",
|
1131 |
-
"495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
|
1132 |
-
"496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
|
1133 |
-
"497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
|
1134 |
-
"498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
|
1135 |
-
"499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
|
1136 |
-
"\n",
|
1137 |
-
" query query_id query_type \\\n",
|
1138 |
-
"0 albany mn population 15177 NUMERIC \n",
|
1139 |
-
"1 current weather in volcano, ca 114414 DESCRIPTION \n",
|
1140 |
-
"2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
|
1141 |
-
"3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
|
1142 |
-
"4 average pharmacy tech salary 40287 NUMERIC \n",
|
1143 |
-
".. ... ... ... \n",
|
1144 |
-
"495 the pool shower company 518269 PERSON \n",
|
1145 |
-
"496 longest tenured american football players 442806 PERSON \n",
|
1146 |
-
"497 mt. view baptist in pendleton sc 460250 PERSON \n",
|
1147 |
-
"498 what actress disappeared for a while 549739 PERSON \n",
|
1148 |
-
"499 what ethnicity is the surname sabol 658265 PERSON \n",
|
1149 |
-
"\n",
|
1150 |
-
" wellFormedAnswers EM F1 \n",
|
1151 |
-
"0 [The population of Albany, Minnesota is 2,662. ] 0 0.285714 \n",
|
1152 |
-
"1 [The Volcano forecast for Apr 12 is 52 degrees... 1 1.000000 \n",
|
1153 |
-
"2 [Hippocrates is considered the father of moder... 0 0.250000 \n",
|
1154 |
-
"3 [An appraisal is good for 120 days from the da... 0 0.631579 \n",
|
1155 |
-
"4 [The average salary for a pharmacy technician ... 0 0.500000 \n",
|
1156 |
-
".. ... .. ... \n",
|
1157 |
-
"495 [The Pool Shower, Inc. is a Georgia Domestic P... 1 1.000000 \n",
|
1158 |
-
"496 [Hanson is the longest tenured American footba... 0 0.250000 \n",
|
1159 |
-
"497 [Mount Able Baptist Church is located at the a... 1 1.000000 \n",
|
1160 |
-
"498 [The actress disappeared for a while Honeysuck... 0 0.500000 \n",
|
1161 |
-
"499 [The ethnicity of the surname Sabol is African... 0 0.285714 \n",
|
1162 |
-
"\n",
|
1163 |
-
"[500 rows x 8 columns]"
|
1164 |
-
]
|
1165 |
-
},
|
1166 |
-
"execution_count": 50,
|
1167 |
-
"metadata": {},
|
1168 |
-
"output_type": "execute_result"
|
1169 |
-
}
|
1170 |
-
],
|
1171 |
-
"source": [
|
1172 |
-
"result_all.to_pandas()"
|
1173 |
-
]
|
1174 |
-
},
|
1175 |
-
{
|
1176 |
-
"cell_type": "code",
|
1177 |
-
"execution_count": 53,
|
1178 |
-
"id": "af2d4577",
|
1179 |
-
"metadata": {},
|
1180 |
-
"outputs": [
|
1181 |
-
{
|
1182 |
-
"name": "stdout",
|
1183 |
-
"output_type": "stream",
|
1184 |
-
"text": [
|
1185 |
-
"Note: you may need to restart the kernel to use updated packages.\n"
|
1186 |
-
]
|
1187 |
-
}
|
1188 |
-
],
|
1189 |
-
"source": [
|
1190 |
-
"%pip install -q evaluate rouge_score"
|
1191 |
-
]
|
1192 |
-
},
|
1193 |
-
{
|
1194 |
-
"cell_type": "code",
|
1195 |
-
"execution_count": 54,
|
1196 |
-
"id": "89494c3d",
|
1197 |
-
"metadata": {},
|
1198 |
-
"outputs": [],
|
1199 |
-
"source": [
|
1200 |
-
"import evaluate\n",
|
1201 |
-
"\n",
|
1202 |
-
"bleu = evaluate.load(\"bleu\")\n",
|
1203 |
-
"rouge = evaluate.load(\"rouge\")"
|
1204 |
-
]
|
1205 |
-
},
|
1206 |
-
{
|
1207 |
-
"cell_type": "code",
|
1208 |
-
"execution_count": 56,
|
1209 |
-
"id": "e447aa08",
|
1210 |
-
"metadata": {},
|
1211 |
-
"outputs": [
|
1212 |
-
{
|
1213 |
-
"data": {
|
1214 |
-
"application/vnd.jupyter.widget-view+json": {
|
1215 |
-
"model_id": "c87066449ebc44d39a66b1630977f2ac",
|
1216 |
-
"version_major": 2,
|
1217 |
-
"version_minor": 0
|
1218 |
-
},
|
1219 |
-
"text/plain": [
|
1220 |
-
"Map: 0%| | 0/500 [00:00<?, ? examples/s]"
|
1221 |
-
]
|
1222 |
-
},
|
1223 |
-
"metadata": {},
|
1224 |
-
"output_type": "display_data"
|
1225 |
-
},
|
1226 |
-
{
|
1227 |
-
"data": {
|
1228 |
-
"text/html": [
|
1229 |
-
"<div>\n",
|
1230 |
-
"<style scoped>\n",
|
1231 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
1232 |
-
" vertical-align: middle;\n",
|
1233 |
-
" }\n",
|
1234 |
-
"\n",
|
1235 |
-
" .dataframe tbody tr th {\n",
|
1236 |
-
" vertical-align: top;\n",
|
1237 |
-
" }\n",
|
1238 |
-
"\n",
|
1239 |
-
" .dataframe thead th {\n",
|
1240 |
-
" text-align: right;\n",
|
1241 |
-
" }\n",
|
1242 |
-
"</style>\n",
|
1243 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
1244 |
-
" <thead>\n",
|
1245 |
-
" <tr style=\"text-align: right;\">\n",
|
1246 |
-
" <th></th>\n",
|
1247 |
-
" <th>answers</th>\n",
|
1248 |
-
" <th>passages</th>\n",
|
1249 |
-
" <th>query</th>\n",
|
1250 |
-
" <th>query_id</th>\n",
|
1251 |
-
" <th>query_type</th>\n",
|
1252 |
-
" <th>wellFormedAnswers</th>\n",
|
1253 |
-
" <th>EM</th>\n",
|
1254 |
-
" <th>F1</th>\n",
|
1255 |
-
" <th>bleu</th>\n",
|
1256 |
-
" <th>precisions</th>\n",
|
1257 |
-
" <th>brevity_penalty</th>\n",
|
1258 |
-
" <th>length_ratio</th>\n",
|
1259 |
-
" <th>translation_length</th>\n",
|
1260 |
-
" <th>reference_length</th>\n",
|
1261 |
-
" </tr>\n",
|
1262 |
-
" </thead>\n",
|
1263 |
-
" <tbody>\n",
|
1264 |
-
" <tr>\n",
|
1265 |
-
" <th>0</th>\n",
|
1266 |
-
" <td>[2,662]</td>\n",
|
1267 |
-
" <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
|
1268 |
-
" <td>albany mn population</td>\n",
|
1269 |
-
" <td>15177</td>\n",
|
1270 |
-
" <td>NUMERIC</td>\n",
|
1271 |
-
" <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
|
1272 |
-
" <td>0</td>\n",
|
1273 |
-
" <td>0.285714</td>\n",
|
1274 |
-
" <td>0.000000</td>\n",
|
1275 |
-
" <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
|
1276 |
-
" <td>0.000335</td>\n",
|
1277 |
-
" <td>0.111111</td>\n",
|
1278 |
-
" <td>1</td>\n",
|
1279 |
-
" <td>9</td>\n",
|
1280 |
-
" </tr>\n",
|
1281 |
-
" <tr>\n",
|
1282 |
-
" <th>1</th>\n",
|
1283 |
-
" <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
|
1284 |
-
" <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
|
1285 |
-
" <td>current weather in volcano, ca</td>\n",
|
1286 |
-
" <td>114414</td>\n",
|
1287 |
-
" <td>DESCRIPTION</td>\n",
|
1288 |
-
" <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
|
1289 |
-
" <td>1</td>\n",
|
1290 |
-
" <td>1.000000</td>\n",
|
1291 |
-
" <td>1.000000</td>\n",
|
1292 |
-
" <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
|
1293 |
-
" <td>1.000000</td>\n",
|
1294 |
-
" <td>1.000000</td>\n",
|
1295 |
-
" <td>14</td>\n",
|
1296 |
-
" <td>14</td>\n",
|
1297 |
-
" </tr>\n",
|
1298 |
-
" <tr>\n",
|
1299 |
-
" <th>2</th>\n",
|
1300 |
-
" <td>[Hippocrates]</td>\n",
|
1301 |
-
" <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
|
1302 |
-
" <td>____________________ is considered the father ...</td>\n",
|
1303 |
-
" <td>9083</td>\n",
|
1304 |
-
" <td>DESCRIPTION</td>\n",
|
1305 |
-
" <td>[Hippocrates is considered the father of moder...</td>\n",
|
1306 |
-
" <td>0</td>\n",
|
1307 |
-
" <td>0.250000</td>\n",
|
1308 |
-
" <td>0.000000</td>\n",
|
1309 |
-
" <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
|
1310 |
-
" <td>0.000335</td>\n",
|
1311 |
-
" <td>0.111111</td>\n",
|
1312 |
-
" <td>1</td>\n",
|
1313 |
-
" <td>9</td>\n",
|
1314 |
-
" </tr>\n",
|
1315 |
-
" <tr>\n",
|
1316 |
-
" <th>3</th>\n",
|
1317 |
-
" <td>[120 days from the date of the Note.]</td>\n",
|
1318 |
-
" <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
|
1319 |
-
" <td>how many days is an appraisal good for a fanni...</td>\n",
|
1320 |
-
" <td>281439</td>\n",
|
1321 |
-
" <td>NUMERIC</td>\n",
|
1322 |
-
" <td>[An appraisal is good for 120 days from the da...</td>\n",
|
1323 |
-
" <td>0</td>\n",
|
1324 |
-
" <td>0.631579</td>\n",
|
1325 |
-
" <td>0.327096</td>\n",
|
1326 |
-
" <td>[1.0, 0.875, 0.8571428571428571, 0.83333333333...</td>\n",
|
1327 |
-
" <td>0.367879</td>\n",
|
1328 |
-
" <td>0.500000</td>\n",
|
1329 |
-
" <td>9</td>\n",
|
1330 |
-
" <td>18</td>\n",
|
1331 |
-
" </tr>\n",
|
1332 |
-
" <tr>\n",
|
1333 |
-
" <th>4</th>\n",
|
1334 |
-
" <td>[From $26,000 to $39,000 a year]</td>\n",
|
1335 |
-
" <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
|
1336 |
-
" <td>average pharmacy tech salary</td>\n",
|
1337 |
-
" <td>40287</td>\n",
|
1338 |
-
" <td>NUMERIC</td>\n",
|
1339 |
-
" <td>[The average salary for a pharmacy technician ...</td>\n",
|
1340 |
-
" <td>0</td>\n",
|
1341 |
-
" <td>0.500000</td>\n",
|
1342 |
-
" <td>0.193040</td>\n",
|
1343 |
-
" <td>[0.875, 0.7142857142857143, 0.5, 0.4]</td>\n",
|
1344 |
-
" <td>0.324652</td>\n",
|
1345 |
-
" <td>0.470588</td>\n",
|
1346 |
-
" <td>8</td>\n",
|
1347 |
-
" <td>17</td>\n",
|
1348 |
-
" </tr>\n",
|
1349 |
-
" <tr>\n",
|
1350 |
-
" <th>...</th>\n",
|
1351 |
-
" <td>...</td>\n",
|
1352 |
-
" <td>...</td>\n",
|
1353 |
-
" <td>...</td>\n",
|
1354 |
-
" <td>...</td>\n",
|
1355 |
-
" <td>...</td>\n",
|
1356 |
-
" <td>...</td>\n",
|
1357 |
-
" <td>...</td>\n",
|
1358 |
-
" <td>...</td>\n",
|
1359 |
-
" <td>...</td>\n",
|
1360 |
-
" <td>...</td>\n",
|
1361 |
-
" <td>...</td>\n",
|
1362 |
-
" <td>...</td>\n",
|
1363 |
-
" <td>...</td>\n",
|
1364 |
-
" <td>...</td>\n",
|
1365 |
-
" </tr>\n",
|
1366 |
-
" <tr>\n",
|
1367 |
-
" <th>495</th>\n",
|
1368 |
-
" <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
|
1369 |
-
" <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
|
1370 |
-
" <td>the pool shower company</td>\n",
|
1371 |
-
" <td>518269</td>\n",
|
1372 |
-
" <td>PERSON</td>\n",
|
1373 |
-
" <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
|
1374 |
-
" <td>1</td>\n",
|
1375 |
-
" <td>1.000000</td>\n",
|
1376 |
-
" <td>1.000000</td>\n",
|
1377 |
-
" <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
|
1378 |
-
" <td>1.000000</td>\n",
|
1379 |
-
" <td>1.000000</td>\n",
|
1380 |
-
" <td>19</td>\n",
|
1381 |
-
" <td>19</td>\n",
|
1382 |
-
" </tr>\n",
|
1383 |
-
" <tr>\n",
|
1384 |
-
" <th>496</th>\n",
|
1385 |
-
" <td>[Hanson]</td>\n",
|
1386 |
-
" <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
|
1387 |
-
" <td>longest tenured american football players</td>\n",
|
1388 |
-
" <td>442806</td>\n",
|
1389 |
-
" <td>PERSON</td>\n",
|
1390 |
-
" <td>[Hanson is the longest tenured American footba...</td>\n",
|
1391 |
-
" <td>0</td>\n",
|
1392 |
-
" <td>0.250000</td>\n",
|
1393 |
-
" <td>0.000000</td>\n",
|
1394 |
-
" <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
|
1395 |
-
" <td>0.000335</td>\n",
|
1396 |
-
" <td>0.111111</td>\n",
|
1397 |
-
" <td>1</td>\n",
|
1398 |
-
" <td>9</td>\n",
|
1399 |
-
" </tr>\n",
|
1400 |
-
" <tr>\n",
|
1401 |
-
" <th>497</th>\n",
|
1402 |
-
" <td>[Mount Able Baptist Church is located at the a...</td>\n",
|
1403 |
-
" <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
|
1404 |
-
" <td>mt. view baptist in pendleton sc</td>\n",
|
1405 |
-
" <td>460250</td>\n",
|
1406 |
-
" <td>PERSON</td>\n",
|
1407 |
-
" <td>[Mount Able Baptist Church is located at the a...</td>\n",
|
1408 |
-
" <td>1</td>\n",
|
1409 |
-
" <td>1.000000</td>\n",
|
1410 |
-
" <td>1.000000</td>\n",
|
1411 |
-
" <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
|
1412 |
-
" <td>1.000000</td>\n",
|
1413 |
-
" <td>1.000000</td>\n",
|
1414 |
-
" <td>21</td>\n",
|
1415 |
-
" <td>21</td>\n",
|
1416 |
-
" </tr>\n",
|
1417 |
-
" <tr>\n",
|
1418 |
-
" <th>498</th>\n",
|
1419 |
-
" <td>[Honeysuckle Weeks]</td>\n",
|
1420 |
-
" <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
|
1421 |
-
" <td>what actress disappeared for a while</td>\n",
|
1422 |
-
" <td>549739</td>\n",
|
1423 |
-
" <td>PERSON</td>\n",
|
1424 |
-
" <td>[The actress disappeared for a while Honeysuck...</td>\n",
|
1425 |
-
" <td>0</td>\n",
|
1426 |
-
" <td>0.500000</td>\n",
|
1427 |
-
" <td>0.000000</td>\n",
|
1428 |
-
" <td>[1.0, 1.0, 0.0, 0.0]</td>\n",
|
1429 |
-
" <td>0.030197</td>\n",
|
1430 |
-
" <td>0.222222</td>\n",
|
1431 |
-
" <td>2</td>\n",
|
1432 |
-
" <td>9</td>\n",
|
1433 |
-
" </tr>\n",
|
1434 |
-
" <tr>\n",
|
1435 |
-
" <th>499</th>\n",
|
1436 |
-
" <td>[African-Nguni]</td>\n",
|
1437 |
-
" <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
|
1438 |
-
" <td>what ethnicity is the surname sabol</td>\n",
|
1439 |
-
" <td>658265</td>\n",
|
1440 |
-
" <td>PERSON</td>\n",
|
1441 |
-
" <td>[The ethnicity of the surname Sabol is African...</td>\n",
|
1442 |
-
" <td>0</td>\n",
|
1443 |
-
" <td>0.285714</td>\n",
|
1444 |
-
" <td>0.000000</td>\n",
|
1445 |
-
" <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
|
1446 |
-
" <td>0.000335</td>\n",
|
1447 |
-
" <td>0.111111</td>\n",
|
1448 |
-
" <td>1</td>\n",
|
1449 |
-
" <td>9</td>\n",
|
1450 |
-
" </tr>\n",
|
1451 |
-
" </tbody>\n",
|
1452 |
-
"</table>\n",
|
1453 |
-
"<p>500 rows × 14 columns</p>\n",
|
1454 |
-
"</div>"
|
1455 |
-
],
|
1456 |
-
"text/plain": [
|
1457 |
-
" answers \\\n",
|
1458 |
-
"0 [2,662] \n",
|
1459 |
-
"1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
|
1460 |
-
"2 [Hippocrates] \n",
|
1461 |
-
"3 [120 days from the date of the Note.] \n",
|
1462 |
-
"4 [From $26,000 to $39,000 a year] \n",
|
1463 |
-
".. ... \n",
|
1464 |
-
"495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
|
1465 |
-
"496 [Hanson] \n",
|
1466 |
-
"497 [Mount Able Baptist Church is located at the a... \n",
|
1467 |
-
"498 [Honeysuckle Weeks] \n",
|
1468 |
-
"499 [African-Nguni] \n",
|
1469 |
-
"\n",
|
1470 |
-
" passages \\\n",
|
1471 |
-
"0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
|
1472 |
-
"1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
|
1473 |
-
"2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
|
1474 |
-
"3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
|
1475 |
-
"4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
|
1476 |
-
".. ... \n",
|
1477 |
-
"495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
|
1478 |
-
"496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
|
1479 |
-
"497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
|
1480 |
-
"498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
|
1481 |
-
"499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
|
1482 |
-
"\n",
|
1483 |
-
" query query_id query_type \\\n",
|
1484 |
-
"0 albany mn population 15177 NUMERIC \n",
|
1485 |
-
"1 current weather in volcano, ca 114414 DESCRIPTION \n",
|
1486 |
-
"2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
|
1487 |
-
"3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
|
1488 |
-
"4 average pharmacy tech salary 40287 NUMERIC \n",
|
1489 |
-
".. ... ... ... \n",
|
1490 |
-
"495 the pool shower company 518269 PERSON \n",
|
1491 |
-
"496 longest tenured american football players 442806 PERSON \n",
|
1492 |
-
"497 mt. view baptist in pendleton sc 460250 PERSON \n",
|
1493 |
-
"498 what actress disappeared for a while 549739 PERSON \n",
|
1494 |
-
"499 what ethnicity is the surname sabol 658265 PERSON \n",
|
1495 |
-
"\n",
|
1496 |
-
" wellFormedAnswers EM F1 \\\n",
|
1497 |
-
"0 [The population of Albany, Minnesota is 2,662. ] 0 0.285714 \n",
|
1498 |
-
"1 [The Volcano forecast for Apr 12 is 52 degrees... 1 1.000000 \n",
|
1499 |
-
"2 [Hippocrates is considered the father of moder... 0 0.250000 \n",
|
1500 |
-
"3 [An appraisal is good for 120 days from the da... 0 0.631579 \n",
|
1501 |
-
"4 [The average salary for a pharmacy technician ... 0 0.500000 \n",
|
1502 |
-
".. ... .. ... \n",
|
1503 |
-
"495 [The Pool Shower, Inc. is a Georgia Domestic P... 1 1.000000 \n",
|
1504 |
-
"496 [Hanson is the longest tenured American footba... 0 0.250000 \n",
|
1505 |
-
"497 [Mount Able Baptist Church is located at the a... 1 1.000000 \n",
|
1506 |
-
"498 [The actress disappeared for a while Honeysuck... 0 0.500000 \n",
|
1507 |
-
"499 [The ethnicity of the surname Sabol is African... 0 0.285714 \n",
|
1508 |
-
"\n",
|
1509 |
-
" bleu precisions \\\n",
|
1510 |
-
"0 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
|
1511 |
-
"1 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
|
1512 |
-
"2 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
|
1513 |
-
"3 0.327096 [1.0, 0.875, 0.8571428571428571, 0.83333333333... \n",
|
1514 |
-
"4 0.193040 [0.875, 0.7142857142857143, 0.5, 0.4] \n",
|
1515 |
-
".. ... ... \n",
|
1516 |
-
"495 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
|
1517 |
-
"496 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
|
1518 |
-
"497 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
|
1519 |
-
"498 0.000000 [1.0, 1.0, 0.0, 0.0] \n",
|
1520 |
-
"499 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
|
1521 |
-
"\n",
|
1522 |
-
" brevity_penalty length_ratio translation_length reference_length \n",
|
1523 |
-
"0 0.000335 0.111111 1 9 \n",
|
1524 |
-
"1 1.000000 1.000000 14 14 \n",
|
1525 |
-
"2 0.000335 0.111111 1 9 \n",
|
1526 |
-
"3 0.367879 0.500000 9 18 \n",
|
1527 |
-
"4 0.324652 0.470588 8 17 \n",
|
1528 |
-
".. ... ... ... ... \n",
|
1529 |
-
"495 1.000000 1.000000 19 19 \n",
|
1530 |
-
"496 0.000335 0.111111 1 9 \n",
|
1531 |
-
"497 1.000000 1.000000 21 21 \n",
|
1532 |
-
"498 0.030197 0.222222 2 9 \n",
|
1533 |
-
"499 0.000335 0.111111 1 9 \n",
|
1534 |
-
"\n",
|
1535 |
-
"[500 rows x 14 columns]"
|
1536 |
-
]
|
1537 |
-
},
|
1538 |
-
"execution_count": 56,
|
1539 |
-
"metadata": {},
|
1540 |
-
"output_type": "execute_result"
|
1541 |
-
}
|
1542 |
-
],
|
1543 |
-
"source": [
|
1544 |
-
"result_all = result_all.map(\n",
|
1545 |
-
" lambda record: bleu.compute(\n",
|
1546 |
-
" predictions=[record[\"answers\"][0]], references=[record[\"wellFormedAnswers\"][0]]\n",
|
1547 |
-
" ),\n",
|
1548 |
-
" batched=False,\n",
|
1549 |
-
")\n",
|
1550 |
-
"result_all.to_pandas()"
|
1551 |
-
]
|
1552 |
-
},
|
1553 |
-
{
|
1554 |
-
"cell_type": "code",
|
1555 |
-
"execution_count": 57,
|
1556 |
-
"id": "fbbe31fd",
|
1557 |
-
"metadata": {},
|
1558 |
-
"outputs": [
|
1559 |
-
{
|
1560 |
-
"data": {
|
1561 |
-
"application/vnd.jupyter.widget-view+json": {
|
1562 |
-
"model_id": "88f839b74aa54fcd8c95215e22e30472",
|
1563 |
-
"version_major": 2,
|
1564 |
-
"version_minor": 0
|
1565 |
-
},
|
1566 |
-
"text/plain": [
|
1567 |
-
"Map: 0%| | 0/500 [00:00<?, ? examples/s]"
|
1568 |
-
]
|
1569 |
-
},
|
1570 |
-
"metadata": {},
|
1571 |
-
"output_type": "display_data"
|
1572 |
-
},
|
1573 |
-
{
|
1574 |
-
"data": {
|
1575 |
-
"text/html": [
|
1576 |
-
"<div>\n",
|
1577 |
-
"<style scoped>\n",
|
1578 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
1579 |
-
" vertical-align: middle;\n",
|
1580 |
-
" }\n",
|
1581 |
-
"\n",
|
1582 |
-
" .dataframe tbody tr th {\n",
|
1583 |
-
" vertical-align: top;\n",
|
1584 |
-
" }\n",
|
1585 |
-
"\n",
|
1586 |
-
" .dataframe thead th {\n",
|
1587 |
-
" text-align: right;\n",
|
1588 |
-
" }\n",
|
1589 |
-
"</style>\n",
|
1590 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
1591 |
-
" <thead>\n",
|
1592 |
-
" <tr style=\"text-align: right;\">\n",
|
1593 |
-
" <th></th>\n",
|
1594 |
-
" <th>answers</th>\n",
|
1595 |
-
" <th>passages</th>\n",
|
1596 |
-
" <th>query</th>\n",
|
1597 |
-
" <th>query_id</th>\n",
|
1598 |
-
" <th>query_type</th>\n",
|
1599 |
-
" <th>wellFormedAnswers</th>\n",
|
1600 |
-
" <th>EM</th>\n",
|
1601 |
-
" <th>F1</th>\n",
|
1602 |
-
" <th>bleu</th>\n",
|
1603 |
-
" <th>precisions</th>\n",
|
1604 |
-
" <th>brevity_penalty</th>\n",
|
1605 |
-
" <th>length_ratio</th>\n",
|
1606 |
-
" <th>translation_length</th>\n",
|
1607 |
-
" <th>reference_length</th>\n",
|
1608 |
-
" <th>rouge1</th>\n",
|
1609 |
-
" <th>rouge2</th>\n",
|
1610 |
-
" <th>rougeL</th>\n",
|
1611 |
-
" <th>rougeLsum</th>\n",
|
1612 |
-
" </tr>\n",
|
1613 |
-
" </thead>\n",
|
1614 |
-
" <tbody>\n",
|
1615 |
-
" <tr>\n",
|
1616 |
-
" <th>0</th>\n",
|
1617 |
-
" <td>[2,662]</td>\n",
|
1618 |
-
" <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
|
1619 |
-
" <td>albany mn population</td>\n",
|
1620 |
-
" <td>15177</td>\n",
|
1621 |
-
" <td>NUMERIC</td>\n",
|
1622 |
-
" <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
|
1623 |
-
" <td>0</td>\n",
|
1624 |
-
" <td>0.285714</td>\n",
|
1625 |
-
" <td>0.000000</td>\n",
|
1626 |
-
" <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
|
1627 |
-
" <td>0.000335</td>\n",
|
1628 |
-
" <td>0.111111</td>\n",
|
1629 |
-
" <td>1</td>\n",
|
1630 |
-
" <td>9</td>\n",
|
1631 |
-
" <td>0.400000</td>\n",
|
1632 |
-
" <td>0.250000</td>\n",
|
1633 |
-
" <td>0.400000</td>\n",
|
1634 |
-
" <td>0.400000</td>\n",
|
1635 |
-
" </tr>\n",
|
1636 |
-
" <tr>\n",
|
1637 |
-
" <th>1</th>\n",
|
1638 |
-
" <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
|
1639 |
-
" <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
|
1640 |
-
" <td>current weather in volcano, ca</td>\n",
|
1641 |
-
" <td>114414</td>\n",
|
1642 |
-
" <td>DESCRIPTION</td>\n",
|
1643 |
-
" <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
|
1644 |
-
" <td>1</td>\n",
|
1645 |
-
" <td>1.000000</td>\n",
|
1646 |
-
" <td>1.000000</td>\n",
|
1647 |
-
" <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
|
1648 |
-
" <td>1.000000</td>\n",
|
1649 |
-
" <td>1.000000</td>\n",
|
1650 |
-
" <td>14</td>\n",
|
1651 |
-
" <td>14</td>\n",
|
1652 |
-
" <td>1.000000</td>\n",
|
1653 |
-
" <td>1.000000</td>\n",
|
1654 |
-
" <td>1.000000</td>\n",
|
1655 |
-
" <td>1.000000</td>\n",
|
1656 |
-
" </tr>\n",
|
1657 |
-
" <tr>\n",
|
1658 |
-
" <th>2</th>\n",
|
1659 |
-
" <td>[Hippocrates]</td>\n",
|
1660 |
-
" <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
|
1661 |
-
" <td>____________________ is considered the father ...</td>\n",
|
1662 |
-
" <td>9083</td>\n",
|
1663 |
-
" <td>DESCRIPTION</td>\n",
|
1664 |
-
" <td>[Hippocrates is considered the father of moder...</td>\n",
|
1665 |
-
" <td>0</td>\n",
|
1666 |
-
" <td>0.250000</td>\n",
|
1667 |
-
" <td>0.000000</td>\n",
|
1668 |
-
" <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
|
1669 |
-
" <td>0.000335</td>\n",
|
1670 |
-
" <td>0.111111</td>\n",
|
1671 |
-
" <td>1</td>\n",
|
1672 |
-
" <td>9</td>\n",
|
1673 |
-
" <td>0.222222</td>\n",
|
1674 |
-
" <td>0.000000</td>\n",
|
1675 |
-
" <td>0.222222</td>\n",
|
1676 |
-
" <td>0.222222</td>\n",
|
1677 |
-
" </tr>\n",
|
1678 |
-
" <tr>\n",
|
1679 |
-
" <th>3</th>\n",
|
1680 |
-
" <td>[120 days from the date of the Note.]</td>\n",
|
1681 |
-
" <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
|
1682 |
-
" <td>how many days is an appraisal good for a fanni...</td>\n",
|
1683 |
-
" <td>281439</td>\n",
|
1684 |
-
" <td>NUMERIC</td>\n",
|
1685 |
-
" <td>[An appraisal is good for 120 days from the da...</td>\n",
|
1686 |
-
" <td>0</td>\n",
|
1687 |
-
" <td>0.631579</td>\n",
|
1688 |
-
" <td>0.327096</td>\n",
|
1689 |
-
" <td>[1.0, 0.875, 0.8571428571428571, 0.83333333333...</td>\n",
|
1690 |
-
" <td>0.367879</td>\n",
|
1691 |
-
" <td>0.500000</td>\n",
|
1692 |
-
" <td>9</td>\n",
|
1693 |
-
" <td>18</td>\n",
|
1694 |
-
" <td>0.640000</td>\n",
|
1695 |
-
" <td>0.608696</td>\n",
|
1696 |
-
" <td>0.640000</td>\n",
|
1697 |
-
" <td>0.640000</td>\n",
|
1698 |
-
" </tr>\n",
|
1699 |
-
" <tr>\n",
|
1700 |
-
" <th>4</th>\n",
|
1701 |
-
" <td>[From $26,000 to $39,000 a year]</td>\n",
|
1702 |
-
" <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
|
1703 |
-
" <td>average pharmacy tech salary</td>\n",
|
1704 |
-
" <td>40287</td>\n",
|
1705 |
-
" <td>NUMERIC</td>\n",
|
1706 |
-
" <td>[The average salary for a pharmacy technician ...</td>\n",
|
1707 |
-
" <td>0</td>\n",
|
1708 |
-
" <td>0.500000</td>\n",
|
1709 |
-
" <td>0.193040</td>\n",
|
1710 |
-
" <td>[0.875, 0.7142857142857143, 0.5, 0.4]</td>\n",
|
1711 |
-
" <td>0.324652</td>\n",
|
1712 |
-
" <td>0.470588</td>\n",
|
1713 |
-
" <td>8</td>\n",
|
1714 |
-
" <td>17</td>\n",
|
1715 |
-
" <td>0.583333</td>\n",
|
1716 |
-
" <td>0.454545</td>\n",
|
1717 |
-
" <td>0.583333</td>\n",
|
1718 |
-
" <td>0.583333</td>\n",
|
1719 |
-
" </tr>\n",
|
1720 |
-
" <tr>\n",
|
1721 |
-
" <th>...</th>\n",
|
1722 |
-
" <td>...</td>\n",
|
1723 |
-
" <td>...</td>\n",
|
1724 |
-
" <td>...</td>\n",
|
1725 |
-
" <td>...</td>\n",
|
1726 |
-
" <td>...</td>\n",
|
1727 |
-
" <td>...</td>\n",
|
1728 |
-
" <td>...</td>\n",
|
1729 |
-
" <td>...</td>\n",
|
1730 |
-
" <td>...</td>\n",
|
1731 |
-
" <td>...</td>\n",
|
1732 |
-
" <td>...</td>\n",
|
1733 |
-
" <td>...</td>\n",
|
1734 |
-
" <td>...</td>\n",
|
1735 |
-
" <td>...</td>\n",
|
1736 |
-
" <td>...</td>\n",
|
1737 |
-
" <td>...</td>\n",
|
1738 |
-
" <td>...</td>\n",
|
1739 |
-
" <td>...</td>\n",
|
1740 |
-
" </tr>\n",
|
1741 |
-
" <tr>\n",
|
1742 |
-
" <th>495</th>\n",
|
1743 |
-
" <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
|
1744 |
-
" <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
|
1745 |
-
" <td>the pool shower company</td>\n",
|
1746 |
-
" <td>518269</td>\n",
|
1747 |
-
" <td>PERSON</td>\n",
|
1748 |
-
" <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
|
1749 |
-
" <td>1</td>\n",
|
1750 |
-
" <td>1.000000</td>\n",
|
1751 |
-
" <td>1.000000</td>\n",
|
1752 |
-
" <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
|
1753 |
-
" <td>1.000000</td>\n",
|
1754 |
-
" <td>1.000000</td>\n",
|
1755 |
-
" <td>19</td>\n",
|
1756 |
-
" <td>19</td>\n",
|
1757 |
-
" <td>1.000000</td>\n",
|
1758 |
-
" <td>1.000000</td>\n",
|
1759 |
-
" <td>1.000000</td>\n",
|
1760 |
-
" <td>1.000000</td>\n",
|
1761 |
-
" </tr>\n",
|
1762 |
-
" <tr>\n",
|
1763 |
-
" <th>496</th>\n",
|
1764 |
-
" <td>[Hanson]</td>\n",
|
1765 |
-
" <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
|
1766 |
-
" <td>longest tenured american football players</td>\n",
|
1767 |
-
" <td>442806</td>\n",
|
1768 |
-
" <td>PERSON</td>\n",
|
1769 |
-
" <td>[Hanson is the longest tenured American footba...</td>\n",
|
1770 |
-
" <td>0</td>\n",
|
1771 |
-
" <td>0.250000</td>\n",
|
1772 |
-
" <td>0.000000</td>\n",
|
1773 |
-
" <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
|
1774 |
-
" <td>0.000335</td>\n",
|
1775 |
-
" <td>0.111111</td>\n",
|
1776 |
-
" <td>1</td>\n",
|
1777 |
-
" <td>9</td>\n",
|
1778 |
-
" <td>0.222222</td>\n",
|
1779 |
-
" <td>0.000000</td>\n",
|
1780 |
-
" <td>0.222222</td>\n",
|
1781 |
-
" <td>0.222222</td>\n",
|
1782 |
-
" </tr>\n",
|
1783 |
-
" <tr>\n",
|
1784 |
-
" <th>497</th>\n",
|
1785 |
-
" <td>[Mount Able Baptist Church is located at the a...</td>\n",
|
1786 |
-
" <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
|
1787 |
-
" <td>mt. view baptist in pendleton sc</td>\n",
|
1788 |
-
" <td>460250</td>\n",
|
1789 |
-
" <td>PERSON</td>\n",
|
1790 |
-
" <td>[Mount Able Baptist Church is located at the a...</td>\n",
|
1791 |
-
" <td>1</td>\n",
|
1792 |
-
" <td>1.000000</td>\n",
|
1793 |
-
" <td>1.000000</td>\n",
|
1794 |
-
" <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
|
1795 |
-
" <td>1.000000</td>\n",
|
1796 |
-
" <td>1.000000</td>\n",
|
1797 |
-
" <td>21</td>\n",
|
1798 |
-
" <td>21</td>\n",
|
1799 |
-
" <td>1.000000</td>\n",
|
1800 |
-
" <td>1.000000</td>\n",
|
1801 |
-
" <td>1.000000</td>\n",
|
1802 |
-
" <td>1.000000</td>\n",
|
1803 |
-
" </tr>\n",
|
1804 |
-
" <tr>\n",
|
1805 |
-
" <th>498</th>\n",
|
1806 |
-
" <td>[Honeysuckle Weeks]</td>\n",
|
1807 |
-
" <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
|
1808 |
-
" <td>what actress disappeared for a while</td>\n",
|
1809 |
-
" <td>549739</td>\n",
|
1810 |
-
" <td>PERSON</td>\n",
|
1811 |
-
" <td>[The actress disappeared for a while Honeysuck...</td>\n",
|
1812 |
-
" <td>0</td>\n",
|
1813 |
-
" <td>0.500000</td>\n",
|
1814 |
-
" <td>0.000000</td>\n",
|
1815 |
-
" <td>[1.0, 1.0, 0.0, 0.0]</td>\n",
|
1816 |
-
" <td>0.030197</td>\n",
|
1817 |
-
" <td>0.222222</td>\n",
|
1818 |
-
" <td>2</td>\n",
|
1819 |
-
" <td>9</td>\n",
|
1820 |
-
" <td>0.400000</td>\n",
|
1821 |
-
" <td>0.250000</td>\n",
|
1822 |
-
" <td>0.400000</td>\n",
|
1823 |
-
" <td>0.400000</td>\n",
|
1824 |
-
" </tr>\n",
|
1825 |
-
" <tr>\n",
|
1826 |
-
" <th>499</th>\n",
|
1827 |
-
" <td>[African-Nguni]</td>\n",
|
1828 |
-
" <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
|
1829 |
-
" <td>what ethnicity is the surname sabol</td>\n",
|
1830 |
-
" <td>658265</td>\n",
|
1831 |
-
" <td>PERSON</td>\n",
|
1832 |
-
" <td>[The ethnicity of the surname Sabol is African...</td>\n",
|
1833 |
-
" <td>0</td>\n",
|
1834 |
-
" <td>0.285714</td>\n",
|
1835 |
-
" <td>0.000000</td>\n",
|
1836 |
-
" <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
|
1837 |
-
" <td>0.000335</td>\n",
|
1838 |
-
" <td>0.111111</td>\n",
|
1839 |
-
" <td>1</td>\n",
|
1840 |
-
" <td>9</td>\n",
|
1841 |
-
" <td>0.363636</td>\n",
|
1842 |
-
" <td>0.222222</td>\n",
|
1843 |
-
" <td>0.363636</td>\n",
|
1844 |
-
" <td>0.363636</td>\n",
|
1845 |
-
" </tr>\n",
|
1846 |
-
" </tbody>\n",
|
1847 |
-
"</table>\n",
|
1848 |
-
"<p>500 rows × 18 columns</p>\n",
|
1849 |
-
"</div>"
|
1850 |
-
],
|
1851 |
-
"text/plain": [
|
1852 |
-
" answers \\\n",
|
1853 |
-
"0 [2,662] \n",
|
1854 |
-
"1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
|
1855 |
-
"2 [Hippocrates] \n",
|
1856 |
-
"3 [120 days from the date of the Note.] \n",
|
1857 |
-
"4 [From $26,000 to $39,000 a year] \n",
|
1858 |
-
".. ... \n",
|
1859 |
-
"495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
|
1860 |
-
"496 [Hanson] \n",
|
1861 |
-
"497 [Mount Able Baptist Church is located at the a... \n",
|
1862 |
-
"498 [Honeysuckle Weeks] \n",
|
1863 |
-
"499 [African-Nguni] \n",
|
1864 |
-
"\n",
|
1865 |
-
" passages \\\n",
|
1866 |
-
"0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
|
1867 |
-
"1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
|
1868 |
-
"2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
|
1869 |
-
"3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
|
1870 |
-
"4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
|
1871 |
-
".. ... \n",
|
1872 |
-
"495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
|
1873 |
-
"496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
|
1874 |
-
"497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
|
1875 |
-
"498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
|
1876 |
-
"499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
|
1877 |
-
"\n",
|
1878 |
-
" query query_id query_type \\\n",
|
1879 |
-
"0 albany mn population 15177 NUMERIC \n",
|
1880 |
-
"1 current weather in volcano, ca 114414 DESCRIPTION \n",
|
1881 |
-
"2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
|
1882 |
-
"3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
|
1883 |
-
"4 average pharmacy tech salary 40287 NUMERIC \n",
|
1884 |
-
".. ... ... ... \n",
|
1885 |
-
"495 the pool shower company 518269 PERSON \n",
|
1886 |
-
"496 longest tenured american football players 442806 PERSON \n",
|
1887 |
-
"497 mt. view baptist in pendleton sc 460250 PERSON \n",
|
1888 |
-
"498 what actress disappeared for a while 549739 PERSON \n",
|
1889 |
-
"499 what ethnicity is the surname sabol 658265 PERSON \n",
|
1890 |
-
"\n",
|
1891 |
-
" wellFormedAnswers EM F1 \\\n",
|
1892 |
-
"0 [The population of Albany, Minnesota is 2,662. ] 0 0.285714 \n",
|
1893 |
-
"1 [The Volcano forecast for Apr 12 is 52 degrees... 1 1.000000 \n",
|
1894 |
-
"2 [Hippocrates is considered the father of moder... 0 0.250000 \n",
|
1895 |
-
"3 [An appraisal is good for 120 days from the da... 0 0.631579 \n",
|
1896 |
-
"4 [The average salary for a pharmacy technician ... 0 0.500000 \n",
|
1897 |
-
".. ... .. ... \n",
|
1898 |
-
"495 [The Pool Shower, Inc. is a Georgia Domestic P... 1 1.000000 \n",
|
1899 |
-
"496 [Hanson is the longest tenured American footba... 0 0.250000 \n",
|
1900 |
-
"497 [Mount Able Baptist Church is located at the a... 1 1.000000 \n",
|
1901 |
-
"498 [The actress disappeared for a while Honeysuck... 0 0.500000 \n",
|
1902 |
-
"499 [The ethnicity of the surname Sabol is African... 0 0.285714 \n",
|
1903 |
-
"\n",
|
1904 |
-
" bleu precisions \\\n",
|
1905 |
-
"0 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
|
1906 |
-
"1 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
|
1907 |
-
"2 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
|
1908 |
-
"3 0.327096 [1.0, 0.875, 0.8571428571428571, 0.83333333333... \n",
|
1909 |
-
"4 0.193040 [0.875, 0.7142857142857143, 0.5, 0.4] \n",
|
1910 |
-
".. ... ... \n",
|
1911 |
-
"495 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
|
1912 |
-
"496 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
|
1913 |
-
"497 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
|
1914 |
-
"498 0.000000 [1.0, 1.0, 0.0, 0.0] \n",
|
1915 |
-
"499 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
|
1916 |
-
"\n",
|
1917 |
-
" brevity_penalty length_ratio translation_length reference_length \\\n",
|
1918 |
-
"0 0.000335 0.111111 1 9 \n",
|
1919 |
-
"1 1.000000 1.000000 14 14 \n",
|
1920 |
-
"2 0.000335 0.111111 1 9 \n",
|
1921 |
-
"3 0.367879 0.500000 9 18 \n",
|
1922 |
-
"4 0.324652 0.470588 8 17 \n",
|
1923 |
-
".. ... ... ... ... \n",
|
1924 |
-
"495 1.000000 1.000000 19 19 \n",
|
1925 |
-
"496 0.000335 0.111111 1 9 \n",
|
1926 |
-
"497 1.000000 1.000000 21 21 \n",
|
1927 |
-
"498 0.030197 0.222222 2 9 \n",
|
1928 |
-
"499 0.000335 0.111111 1 9 \n",
|
1929 |
-
"\n",
|
1930 |
-
" rouge1 rouge2 rougeL rougeLsum \n",
|
1931 |
-
"0 0.400000 0.250000 0.400000 0.400000 \n",
|
1932 |
-
"1 1.000000 1.000000 1.000000 1.000000 \n",
|
1933 |
-
"2 0.222222 0.000000 0.222222 0.222222 \n",
|
1934 |
-
"3 0.640000 0.608696 0.640000 0.640000 \n",
|
1935 |
-
"4 0.583333 0.454545 0.583333 0.583333 \n",
|
1936 |
-
".. ... ... ... ... \n",
|
1937 |
-
"495 1.000000 1.000000 1.000000 1.000000 \n",
|
1938 |
-
"496 0.222222 0.000000 0.222222 0.222222 \n",
|
1939 |
-
"497 1.000000 1.000000 1.000000 1.000000 \n",
|
1940 |
-
"498 0.400000 0.250000 0.400000 0.400000 \n",
|
1941 |
-
"499 0.363636 0.222222 0.363636 0.363636 \n",
|
1942 |
-
"\n",
|
1943 |
-
"[500 rows x 18 columns]"
|
1944 |
-
]
|
1945 |
-
},
|
1946 |
-
"execution_count": 57,
|
1947 |
-
"metadata": {},
|
1948 |
-
"output_type": "execute_result"
|
1949 |
-
}
|
1950 |
-
],
|
1951 |
-
"source": [
|
1952 |
-
"result_all = result_all.map(\n",
|
1953 |
-
" lambda record: rouge.compute(\n",
|
1954 |
-
" predictions=[record[\"answers\"][0]], references=[record[\"wellFormedAnswers\"][0]]\n",
|
1955 |
-
" ),\n",
|
1956 |
-
" batched=False,\n",
|
1957 |
-
")\n",
|
1958 |
-
"result_all.to_pandas()"
|
1959 |
-
]
|
1960 |
-
}
|
1961 |
-
],
|
1962 |
-
"metadata": {
|
1963 |
-
"kernelspec": {
|
1964 |
-
"display_name": "Python 3 (ipykernel)",
|
1965 |
-
"language": "python",
|
1966 |
-
"name": "python3"
|
1967 |
-
},
|
1968 |
-
"language_info": {
|
1969 |
-
"codemirror_mode": {
|
1970 |
-
"name": "ipython",
|
1971 |
-
"version": 3
|
1972 |
-
},
|
1973 |
-
"file_extension": ".py",
|
1974 |
-
"mimetype": "text/x-python",
|
1975 |
-
"name": "python",
|
1976 |
-
"nbconvert_exporter": "python",
|
1977 |
-
"pygments_lexer": "ipython3",
|
1978 |
-
"version": "3.11.4"
|
1979 |
-
}
|
1980 |
-
},
|
1981 |
-
"nbformat": 4,
|
1982 |
-
"nbformat_minor": 5
|
1983 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Llama-2-eval/notebook/metrics.ipynb
DELETED
@@ -1,1293 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 1,
|
6 |
-
"id": "af2d4577",
|
7 |
-
"metadata": {},
|
8 |
-
"outputs": [
|
9 |
-
{
|
10 |
-
"name": "stdout",
|
11 |
-
"output_type": "stream",
|
12 |
-
"text": [
|
13 |
-
"Note: you may need to restart the kernel to use updated packages.\n"
|
14 |
-
]
|
15 |
-
}
|
16 |
-
],
|
17 |
-
"source": [
|
18 |
-
"%pip install -q evaluate rouge_score"
|
19 |
-
]
|
20 |
-
},
|
21 |
-
{
|
22 |
-
"cell_type": "code",
|
23 |
-
"execution_count": 2,
|
24 |
-
"id": "a6d96660",
|
25 |
-
"metadata": {},
|
26 |
-
"outputs": [
|
27 |
-
{
|
28 |
-
"data": {
|
29 |
-
"text/plain": [
|
30 |
-
"True"
|
31 |
-
]
|
32 |
-
},
|
33 |
-
"execution_count": 2,
|
34 |
-
"metadata": {},
|
35 |
-
"output_type": "execute_result"
|
36 |
-
}
|
37 |
-
],
|
38 |
-
"source": [
|
39 |
-
"import os\n",
|
40 |
-
"from dotenv import load_dotenv\n",
|
41 |
-
"\n",
|
42 |
-
"load_dotenv()"
|
43 |
-
]
|
44 |
-
},
|
45 |
-
{
|
46 |
-
"cell_type": "code",
|
47 |
-
"execution_count": 3,
|
48 |
-
"id": "b72bf3f9",
|
49 |
-
"metadata": {},
|
50 |
-
"outputs": [
|
51 |
-
{
|
52 |
-
"data": {
|
53 |
-
"text/plain": [
|
54 |
-
"Dataset({\n",
|
55 |
-
" features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
|
56 |
-
" num_rows: 500\n",
|
57 |
-
"})"
|
58 |
-
]
|
59 |
-
},
|
60 |
-
"execution_count": 3,
|
61 |
-
"metadata": {},
|
62 |
-
"output_type": "execute_result"
|
63 |
-
}
|
64 |
-
],
|
65 |
-
"source": [
|
66 |
-
"from datasets import load_from_disk\n",
|
67 |
-
"\n",
|
68 |
-
"new_ds = load_from_disk(\"../data/datasets/ms_macro/\")\n",
|
69 |
-
"new_ds"
|
70 |
-
]
|
71 |
-
},
|
72 |
-
{
|
73 |
-
"cell_type": "code",
|
74 |
-
"execution_count": 4,
|
75 |
-
"id": "051bd771",
|
76 |
-
"metadata": {},
|
77 |
-
"outputs": [
|
78 |
-
{
|
79 |
-
"data": {
|
80 |
-
"text/plain": [
|
81 |
-
"({'NUMERIC': 100,\n",
|
82 |
-
" 'DESCRIPTION': 100,\n",
|
83 |
-
" 'ENTITY': 100,\n",
|
84 |
-
" 'PERSON': 100,\n",
|
85 |
-
" 'LOCATION': 100},\n",
|
86 |
-
" {'NUMERIC': 179,\n",
|
87 |
-
" 'DESCRIPTION': 215,\n",
|
88 |
-
" 'ENTITY': 443,\n",
|
89 |
-
" 'LOCATION': 461,\n",
|
90 |
-
" 'PERSON': 499})"
|
91 |
-
]
|
92 |
-
},
|
93 |
-
"execution_count": 4,
|
94 |
-
"metadata": {},
|
95 |
-
"output_type": "execute_result"
|
96 |
-
}
|
97 |
-
],
|
98 |
-
"source": [
|
99 |
-
"counts = {}\n",
|
100 |
-
"indices = {}\n",
|
101 |
-
"size = 100\n",
|
102 |
-
"for i in range(new_ds.num_rows):\n",
|
103 |
-
" row = new_ds[i]\n",
|
104 |
-
" query_type = row[\"query_type\"]\n",
|
105 |
-
" if query_type in counts:\n",
|
106 |
-
" counts[query_type] += 1\n",
|
107 |
-
" else:\n",
|
108 |
-
" counts[query_type] = 1\n",
|
109 |
-
" if counts[query_type] == size:\n",
|
110 |
-
" indices[query_type] = i\n",
|
111 |
-
"counts, indices"
|
112 |
-
]
|
113 |
-
},
|
114 |
-
{
|
115 |
-
"cell_type": "code",
|
116 |
-
"execution_count": 5,
|
117 |
-
"id": "db48dcc4",
|
118 |
-
"metadata": {},
|
119 |
-
"outputs": [
|
120 |
-
{
|
121 |
-
"data": {
|
122 |
-
"text/html": [
|
123 |
-
"<div>\n",
|
124 |
-
"<style scoped>\n",
|
125 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
126 |
-
" vertical-align: middle;\n",
|
127 |
-
" }\n",
|
128 |
-
"\n",
|
129 |
-
" .dataframe tbody tr th {\n",
|
130 |
-
" vertical-align: top;\n",
|
131 |
-
" }\n",
|
132 |
-
"\n",
|
133 |
-
" .dataframe thead th {\n",
|
134 |
-
" text-align: right;\n",
|
135 |
-
" }\n",
|
136 |
-
"</style>\n",
|
137 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
138 |
-
" <thead>\n",
|
139 |
-
" <tr style=\"text-align: right;\">\n",
|
140 |
-
" <th></th>\n",
|
141 |
-
" <th>answers</th>\n",
|
142 |
-
" <th>passages</th>\n",
|
143 |
-
" <th>query</th>\n",
|
144 |
-
" <th>query_id</th>\n",
|
145 |
-
" <th>query_type</th>\n",
|
146 |
-
" <th>wellFormedAnswers</th>\n",
|
147 |
-
" </tr>\n",
|
148 |
-
" </thead>\n",
|
149 |
-
" <tbody>\n",
|
150 |
-
" <tr>\n",
|
151 |
-
" <th>0</th>\n",
|
152 |
-
" <td>[2,662]</td>\n",
|
153 |
-
" <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
|
154 |
-
" <td>albany mn population</td>\n",
|
155 |
-
" <td>15177</td>\n",
|
156 |
-
" <td>NUMERIC</td>\n",
|
157 |
-
" <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
|
158 |
-
" </tr>\n",
|
159 |
-
" <tr>\n",
|
160 |
-
" <th>1</th>\n",
|
161 |
-
" <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
|
162 |
-
" <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
|
163 |
-
" <td>current weather in volcano, ca</td>\n",
|
164 |
-
" <td>114414</td>\n",
|
165 |
-
" <td>DESCRIPTION</td>\n",
|
166 |
-
" <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
|
167 |
-
" </tr>\n",
|
168 |
-
" <tr>\n",
|
169 |
-
" <th>2</th>\n",
|
170 |
-
" <td>[Hippocrates]</td>\n",
|
171 |
-
" <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
|
172 |
-
" <td>____________________ is considered the father ...</td>\n",
|
173 |
-
" <td>9083</td>\n",
|
174 |
-
" <td>DESCRIPTION</td>\n",
|
175 |
-
" <td>[Hippocrates is considered the father of moder...</td>\n",
|
176 |
-
" </tr>\n",
|
177 |
-
" <tr>\n",
|
178 |
-
" <th>3</th>\n",
|
179 |
-
" <td>[120 days from the date of the Note.]</td>\n",
|
180 |
-
" <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
|
181 |
-
" <td>how many days is an appraisal good for a fanni...</td>\n",
|
182 |
-
" <td>281439</td>\n",
|
183 |
-
" <td>NUMERIC</td>\n",
|
184 |
-
" <td>[An appraisal is good for 120 days from the da...</td>\n",
|
185 |
-
" </tr>\n",
|
186 |
-
" <tr>\n",
|
187 |
-
" <th>4</th>\n",
|
188 |
-
" <td>[From $26,000 to $39,000 a year]</td>\n",
|
189 |
-
" <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
|
190 |
-
" <td>average pharmacy tech salary</td>\n",
|
191 |
-
" <td>40287</td>\n",
|
192 |
-
" <td>NUMERIC</td>\n",
|
193 |
-
" <td>[The average salary for a pharmacy technician ...</td>\n",
|
194 |
-
" </tr>\n",
|
195 |
-
" <tr>\n",
|
196 |
-
" <th>...</th>\n",
|
197 |
-
" <td>...</td>\n",
|
198 |
-
" <td>...</td>\n",
|
199 |
-
" <td>...</td>\n",
|
200 |
-
" <td>...</td>\n",
|
201 |
-
" <td>...</td>\n",
|
202 |
-
" <td>...</td>\n",
|
203 |
-
" </tr>\n",
|
204 |
-
" <tr>\n",
|
205 |
-
" <th>495</th>\n",
|
206 |
-
" <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
|
207 |
-
" <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
|
208 |
-
" <td>the pool shower company</td>\n",
|
209 |
-
" <td>518269</td>\n",
|
210 |
-
" <td>PERSON</td>\n",
|
211 |
-
" <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
|
212 |
-
" </tr>\n",
|
213 |
-
" <tr>\n",
|
214 |
-
" <th>496</th>\n",
|
215 |
-
" <td>[Hanson]</td>\n",
|
216 |
-
" <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
|
217 |
-
" <td>longest tenured american football players</td>\n",
|
218 |
-
" <td>442806</td>\n",
|
219 |
-
" <td>PERSON</td>\n",
|
220 |
-
" <td>[Hanson is the longest tenured American footba...</td>\n",
|
221 |
-
" </tr>\n",
|
222 |
-
" <tr>\n",
|
223 |
-
" <th>497</th>\n",
|
224 |
-
" <td>[Mount Able Baptist Church is located at the a...</td>\n",
|
225 |
-
" <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
|
226 |
-
" <td>mt. view baptist in pendleton sc</td>\n",
|
227 |
-
" <td>460250</td>\n",
|
228 |
-
" <td>PERSON</td>\n",
|
229 |
-
" <td>[Mount Able Baptist Church is located at the a...</td>\n",
|
230 |
-
" </tr>\n",
|
231 |
-
" <tr>\n",
|
232 |
-
" <th>498</th>\n",
|
233 |
-
" <td>[Honeysuckle Weeks]</td>\n",
|
234 |
-
" <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
|
235 |
-
" <td>what actress disappeared for a while</td>\n",
|
236 |
-
" <td>549739</td>\n",
|
237 |
-
" <td>PERSON</td>\n",
|
238 |
-
" <td>[The actress disappeared for a while Honeysuck...</td>\n",
|
239 |
-
" </tr>\n",
|
240 |
-
" <tr>\n",
|
241 |
-
" <th>499</th>\n",
|
242 |
-
" <td>[African-Nguni]</td>\n",
|
243 |
-
" <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
|
244 |
-
" <td>what ethnicity is the surname sabol</td>\n",
|
245 |
-
" <td>658265</td>\n",
|
246 |
-
" <td>PERSON</td>\n",
|
247 |
-
" <td>[The ethnicity of the surname Sabol is African...</td>\n",
|
248 |
-
" </tr>\n",
|
249 |
-
" </tbody>\n",
|
250 |
-
"</table>\n",
|
251 |
-
"<p>500 rows × 6 columns</p>\n",
|
252 |
-
"</div>"
|
253 |
-
],
|
254 |
-
"text/plain": [
|
255 |
-
" answers \\\n",
|
256 |
-
"0 [2,662] \n",
|
257 |
-
"1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
|
258 |
-
"2 [Hippocrates] \n",
|
259 |
-
"3 [120 days from the date of the Note.] \n",
|
260 |
-
"4 [From $26,000 to $39,000 a year] \n",
|
261 |
-
".. ... \n",
|
262 |
-
"495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
|
263 |
-
"496 [Hanson] \n",
|
264 |
-
"497 [Mount Able Baptist Church is located at the a... \n",
|
265 |
-
"498 [Honeysuckle Weeks] \n",
|
266 |
-
"499 [African-Nguni] \n",
|
267 |
-
"\n",
|
268 |
-
" passages \\\n",
|
269 |
-
"0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
|
270 |
-
"1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
|
271 |
-
"2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
|
272 |
-
"3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
|
273 |
-
"4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
|
274 |
-
".. ... \n",
|
275 |
-
"495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
|
276 |
-
"496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
|
277 |
-
"497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
|
278 |
-
"498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
|
279 |
-
"499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
|
280 |
-
"\n",
|
281 |
-
" query query_id query_type \\\n",
|
282 |
-
"0 albany mn population 15177 NUMERIC \n",
|
283 |
-
"1 current weather in volcano, ca 114414 DESCRIPTION \n",
|
284 |
-
"2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
|
285 |
-
"3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
|
286 |
-
"4 average pharmacy tech salary 40287 NUMERIC \n",
|
287 |
-
".. ... ... ... \n",
|
288 |
-
"495 the pool shower company 518269 PERSON \n",
|
289 |
-
"496 longest tenured american football players 442806 PERSON \n",
|
290 |
-
"497 mt. view baptist in pendleton sc 460250 PERSON \n",
|
291 |
-
"498 what actress disappeared for a while 549739 PERSON \n",
|
292 |
-
"499 what ethnicity is the surname sabol 658265 PERSON \n",
|
293 |
-
"\n",
|
294 |
-
" wellFormedAnswers \n",
|
295 |
-
"0 [The population of Albany, Minnesota is 2,662. ] \n",
|
296 |
-
"1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
|
297 |
-
"2 [Hippocrates is considered the father of moder... \n",
|
298 |
-
"3 [An appraisal is good for 120 days from the da... \n",
|
299 |
-
"4 [The average salary for a pharmacy technician ... \n",
|
300 |
-
".. ... \n",
|
301 |
-
"495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
|
302 |
-
"496 [Hanson is the longest tenured American footba... \n",
|
303 |
-
"497 [Mount Able Baptist Church is located at the a... \n",
|
304 |
-
"498 [The actress disappeared for a while Honeysuck... \n",
|
305 |
-
"499 [The ethnicity of the surname Sabol is African... \n",
|
306 |
-
"\n",
|
307 |
-
"[500 rows x 6 columns]"
|
308 |
-
]
|
309 |
-
},
|
310 |
-
"execution_count": 5,
|
311 |
-
"metadata": {},
|
312 |
-
"output_type": "execute_result"
|
313 |
-
}
|
314 |
-
],
|
315 |
-
"source": [
|
316 |
-
"new_ds.to_pandas()"
|
317 |
-
]
|
318 |
-
},
|
319 |
-
{
|
320 |
-
"cell_type": "code",
|
321 |
-
"execution_count": 5,
|
322 |
-
"id": "89494c3d",
|
323 |
-
"metadata": {},
|
324 |
-
"outputs": [],
|
325 |
-
"source": [
|
326 |
-
"import evaluate\n",
|
327 |
-
"\n",
|
328 |
-
"bleu = evaluate.load(\"bleu\")\n",
|
329 |
-
"rouge = evaluate.load(\"rouge\")"
|
330 |
-
]
|
331 |
-
},
|
332 |
-
{
|
333 |
-
"cell_type": "code",
|
334 |
-
"execution_count": 6,
|
335 |
-
"id": "24a818ba",
|
336 |
-
"metadata": {},
|
337 |
-
"outputs": [],
|
338 |
-
"source": [
|
339 |
-
"def calc_metrics(ds):\n",
|
340 |
-
" predictions = [ds[i][\"answers\"][0] for i in range(ds.num_rows)]\n",
|
341 |
-
" references = [ds[i][\"wellFormedAnswers\"][0] for i in range(ds.num_rows)]\n",
|
342 |
-
" bleu_scores = bleu.compute(predictions=predictions, references=references)\n",
|
343 |
-
" rouge_scores = rouge.compute(predictions=predictions, references=references)\n",
|
344 |
-
" return {\"bleu_scores\": bleu_scores, \"rouge_scores\": rouge_scores}"
|
345 |
-
]
|
346 |
-
},
|
347 |
-
{
|
348 |
-
"cell_type": "code",
|
349 |
-
"execution_count": 8,
|
350 |
-
"id": "e447aa08",
|
351 |
-
"metadata": {},
|
352 |
-
"outputs": [
|
353 |
-
{
|
354 |
-
"data": {
|
355 |
-
"text/plain": [
|
356 |
-
"{'bleu_scores': {'bleu': 0.5842479720128682,\n",
|
357 |
-
" 'precisions': [0.7814257485940113,\n",
|
358 |
-
" 0.7185392334265505,\n",
|
359 |
-
" 0.6801561945331913,\n",
|
360 |
-
" 0.6543700340522134],\n",
|
361 |
-
" 'brevity_penalty': 0.8263321448047812,\n",
|
362 |
-
" 'length_ratio': 0.8398008680112331,\n",
|
363 |
-
" 'translation_length': 6579,\n",
|
364 |
-
" 'reference_length': 7834},\n",
|
365 |
-
" 'rouge_scores': {'rouge1': 0.6301946495853493,\n",
|
366 |
-
" 'rouge2': 0.5266427189500504,\n",
|
367 |
-
" 'rougeL': 0.623467453115133,\n",
|
368 |
-
" 'rougeLsum': 0.6239164817179192}}"
|
369 |
-
]
|
370 |
-
},
|
371 |
-
"execution_count": 8,
|
372 |
-
"metadata": {},
|
373 |
-
"output_type": "execute_result"
|
374 |
-
}
|
375 |
-
],
|
376 |
-
"source": [
|
377 |
-
"calc_metrics(new_ds)"
|
378 |
-
]
|
379 |
-
},
|
380 |
-
{
|
381 |
-
"cell_type": "code",
|
382 |
-
"execution_count": 9,
|
383 |
-
"id": "b29d1f3e",
|
384 |
-
"metadata": {},
|
385 |
-
"outputs": [],
|
386 |
-
"source": [
|
387 |
-
"def calc_all_metrics(ds):\n",
|
388 |
-
" result = {}\n",
|
389 |
-
" result[\"OVERALL\"] = calc_metrics(ds)\n",
|
390 |
-
" for query_type in indices:\n",
|
391 |
-
" result[query_type] = calc_metrics(\n",
|
392 |
-
" ds.filter(lambda example: example[\"query_type\"] == query_type)\n",
|
393 |
-
" )\n",
|
394 |
-
"\n",
|
395 |
-
" return result"
|
396 |
-
]
|
397 |
-
},
|
398 |
-
{
|
399 |
-
"cell_type": "code",
|
400 |
-
"execution_count": 10,
|
401 |
-
"id": "1a4273da",
|
402 |
-
"metadata": {},
|
403 |
-
"outputs": [
|
404 |
-
{
|
405 |
-
"data": {
|
406 |
-
"text/plain": [
|
407 |
-
"{'OVERALL': {'bleu_scores': {'bleu': 0.5842479720128682,\n",
|
408 |
-
" 'precisions': [0.7814257485940113,\n",
|
409 |
-
" 0.7185392334265505,\n",
|
410 |
-
" 0.6801561945331913,\n",
|
411 |
-
" 0.6543700340522134],\n",
|
412 |
-
" 'brevity_penalty': 0.8263321448047812,\n",
|
413 |
-
" 'length_ratio': 0.8398008680112331,\n",
|
414 |
-
" 'translation_length': 6579,\n",
|
415 |
-
" 'reference_length': 7834},\n",
|
416 |
-
" 'rouge_scores': {'rouge1': 0.6301946495853493,\n",
|
417 |
-
" 'rouge2': 0.5266427189500504,\n",
|
418 |
-
" 'rougeL': 0.623467453115133,\n",
|
419 |
-
" 'rougeLsum': 0.6239164817179192}},\n",
|
420 |
-
" 'NUMERIC': {'bleu_scores': {'bleu': 0.3589193328591513,\n",
|
421 |
-
" 'precisions': [0.7536764705882353,\n",
|
422 |
-
" 0.6494413407821229,\n",
|
423 |
-
" 0.5884244372990354,\n",
|
424 |
-
" 0.5657657657657658],\n",
|
425 |
-
" 'brevity_penalty': 0.5649158870633492,\n",
|
426 |
-
" 'length_ratio': 0.6365054602184087,\n",
|
427 |
-
" 'translation_length': 816,\n",
|
428 |
-
" 'reference_length': 1282},\n",
|
429 |
-
" 'rouge_scores': {'rouge1': 0.5569863096088544,\n",
|
430 |
-
" 'rouge2': 0.4262959859853511,\n",
|
431 |
-
" 'rougeL': 0.5495190228731732,\n",
|
432 |
-
" 'rougeLsum': 0.5502805905003136}},\n",
|
433 |
-
" 'DESCRIPTION': {'bleu_scores': {'bleu': 0.7521919521555381,\n",
|
434 |
-
" 'precisions': [0.8093238135237295,\n",
|
435 |
-
" 0.761946514686541,\n",
|
436 |
-
" 0.7335164835164835,\n",
|
437 |
-
" 0.7077144226161955],\n",
|
438 |
-
" 'brevity_penalty': 1.0,\n",
|
439 |
-
" 'length_ratio': 1.0778632865550022,\n",
|
440 |
-
" 'translation_length': 2381,\n",
|
441 |
-
" 'reference_length': 2209},\n",
|
442 |
-
" 'rouge_scores': {'rouge1': 0.8503571429521525,\n",
|
443 |
-
" 'rouge2': 0.8009206345153658,\n",
|
444 |
-
" 'rougeL': 0.8406066569954856,\n",
|
445 |
-
" 'rougeLsum': 0.8405710628479812}},\n",
|
446 |
-
" 'ENTITY': {'bleu_scores': {'bleu': 0.5057439480363012,\n",
|
447 |
-
" 'precisions': [0.7135050741608119,\n",
|
448 |
-
" 0.6375952582557155,\n",
|
449 |
-
" 0.5884509624197983,\n",
|
450 |
-
" 0.5555555555555556],\n",
|
451 |
-
" 'brevity_penalty': 0.8143961563151505,\n",
|
452 |
-
" 'length_ratio': 0.8296632124352331,\n",
|
453 |
-
" 'translation_length': 1281,\n",
|
454 |
-
" 'reference_length': 1544},\n",
|
455 |
-
" 'rouge_scores': {'rouge1': 0.5877667231458372,\n",
|
456 |
-
" 'rouge2': 0.48898551862814277,\n",
|
457 |
-
" 'rougeL': 0.5796676511145928,\n",
|
458 |
-
" 'rougeLsum': 0.5784518864116339}},\n",
|
459 |
-
" 'LOCATION': {'bleu_scores': {'bleu': 0.4167786604147962,\n",
|
460 |
-
" 'precisions': [0.8600583090379009,\n",
|
461 |
-
" 0.7986348122866894,\n",
|
462 |
-
" 0.7573385518590998,\n",
|
463 |
-
" 0.7414529914529915],\n",
|
464 |
-
" 'brevity_penalty': 0.5288627994571649,\n",
|
465 |
-
" 'length_ratio': 0.6108637577916296,\n",
|
466 |
-
" 'translation_length': 686,\n",
|
467 |
-
" 'reference_length': 1123},\n",
|
468 |
-
" 'rouge_scores': {'rouge1': 0.5405464995752973,\n",
|
469 |
-
" 'rouge2': 0.3950940848806123,\n",
|
470 |
-
" 'rougeL': 0.5400724136440879,\n",
|
471 |
-
" 'rougeLsum': 0.5389556394979822}},\n",
|
472 |
-
" 'PERSON': {'bleu_scores': {'bleu': 0.5861084149356606,\n",
|
473 |
-
" 'precisions': [0.773851590106007,\n",
|
474 |
-
" 0.7178707224334601,\n",
|
475 |
-
" 0.6810766721044046,\n",
|
476 |
-
" 0.6522864538395168],\n",
|
477 |
-
" 'brevity_penalty': 0.8315596069910627,\n",
|
478 |
-
" 'length_ratio': 0.844272076372315,\n",
|
479 |
-
" 'translation_length': 1415,\n",
|
480 |
-
" 'reference_length': 1676},\n",
|
481 |
-
" 'rouge_scores': {'rouge1': 0.6119770025611677,\n",
|
482 |
-
" 'rouge2': 0.522853938087197,\n",
|
483 |
-
" 'rougeL': 0.6096713664231095,\n",
|
484 |
-
" 'rougeLsum': 0.6103086543984155}}}"
|
485 |
-
]
|
486 |
-
},
|
487 |
-
"execution_count": 10,
|
488 |
-
"metadata": {},
|
489 |
-
"output_type": "execute_result"
|
490 |
-
}
|
491 |
-
],
|
492 |
-
"source": [
|
493 |
-
"calc_all_metrics(new_ds)"
|
494 |
-
]
|
495 |
-
},
|
496 |
-
{
|
497 |
-
"cell_type": "code",
|
498 |
-
"execution_count": 11,
|
499 |
-
"id": "3698be27",
|
500 |
-
"metadata": {},
|
501 |
-
"outputs": [
|
502 |
-
{
|
503 |
-
"name": "stdout",
|
504 |
-
"output_type": "stream",
|
505 |
-
"text": [
|
506 |
-
"loading env vars from: /Users/inflaton/code/emtech/gpt/Llama-2-eval/.env\n",
|
507 |
-
"App init started at 2023-10-10 12:04:33.775140\n",
|
508 |
-
"Running on: macOS-14.0-arm64-arm-64bit\n",
|
509 |
-
"MPS is available\n",
|
510 |
-
"CUDA is NOT available\n",
|
511 |
-
"hf_embeddings_device_type: mps\n",
|
512 |
-
"hf_pipeline_device_type: mps\n",
|
513 |
-
"initializing LLM: openai\n",
|
514 |
-
" hf_pipeline_device_type: mps\n",
|
515 |
-
" load_quantized_model: None\n",
|
516 |
-
" torch_dtype: torch.float32\n",
|
517 |
-
" n_threds: 24\n",
|
518 |
-
" using model: gpt-3.5-turbo\n",
|
519 |
-
"initialization complete\n",
|
520 |
-
"App init completed in 0.167s\n"
|
521 |
-
]
|
522 |
-
}
|
523 |
-
],
|
524 |
-
"source": [
|
525 |
-
"import json\n",
|
526 |
-
"import sys\n",
|
527 |
-
"import os\n",
|
528 |
-
"\n",
|
529 |
-
"os.environ[\"TEST_FIRST_5\"] = \"true\"\n",
|
530 |
-
"os.environ[\"LANGCHAIN_DEBUG\"] = \"true\"\n",
|
531 |
-
"\n",
|
532 |
-
"from pathlib import Path\n",
|
533 |
-
"\n",
|
534 |
-
"sys.path.append(str(Path.cwd().parent))\n",
|
535 |
-
"\n",
|
536 |
-
"from evaluate_llm_ms_macro import (\n",
|
537 |
-
" QAChainWithMsMacroDataset,\n",
|
538 |
-
" llm_loader,\n",
|
539 |
-
" calc_all_metrics,\n",
|
540 |
-
")"
|
541 |
-
]
|
542 |
-
},
|
543 |
-
{
|
544 |
-
"cell_type": "code",
|
545 |
-
"execution_count": 12,
|
546 |
-
"id": "2395804d",
|
547 |
-
"metadata": {},
|
548 |
-
"outputs": [
|
549 |
-
{
|
550 |
-
"name": "stdout",
|
551 |
-
"output_type": "stream",
|
552 |
-
"text": [
|
553 |
-
"{'question': 'albany mn population', 'chat_history': []}\n",
|
554 |
-
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
|
555 |
-
"\u001b[0m{\n",
|
556 |
-
" \"question\": \"albany mn population\",\n",
|
557 |
-
" \"chat_history\": []\n",
|
558 |
-
"}\n",
|
559 |
-
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
|
560 |
-
"\u001b[0m[inputs]\n",
|
561 |
-
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
|
562 |
-
"\u001b[0m{\n",
|
563 |
-
" \"question\": \"albany mn population\",\n",
|
564 |
-
" \"context\": \"City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\\n\\nPlace of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\\n\\nFor the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nAlbany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\\n\\nSponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nRecent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\\n\\nFor population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\\n\\nFor population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\"\n",
|
565 |
-
"}\n",
|
566 |
-
"\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
|
567 |
-
"\u001b[0m{\n",
|
568 |
-
" \"prompts\": [\n",
|
569 |
-
" \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nCity of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\\n\\nPlace of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\\n\\nFor the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nAlbany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\\n\\nSponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nRecent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\\n\\nFor population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\\n\\nFor population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\\nHuman: albany mn population\"\n",
|
570 |
-
" ]\n",
|
571 |
-
"}\n",
|
572 |
-
"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [3.23s] Exiting LLM run with output:\n",
|
573 |
-
"\u001b[0m{\n",
|
574 |
-
" \"generations\": [\n",
|
575 |
-
" [\n",
|
576 |
-
" {\n",
|
577 |
-
" \"text\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\",\n",
|
578 |
-
" \"generation_info\": {\n",
|
579 |
-
" \"finish_reason\": \"stop\"\n",
|
580 |
-
" },\n",
|
581 |
-
" \"message\": {\n",
|
582 |
-
" \"lc\": 1,\n",
|
583 |
-
" \"type\": \"constructor\",\n",
|
584 |
-
" \"id\": [\n",
|
585 |
-
" \"langchain\",\n",
|
586 |
-
" \"schema\",\n",
|
587 |
-
" \"messages\",\n",
|
588 |
-
" \"AIMessageChunk\"\n",
|
589 |
-
" ],\n",
|
590 |
-
" \"kwargs\": {\n",
|
591 |
-
" \"example\": false,\n",
|
592 |
-
" \"content\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\",\n",
|
593 |
-
" \"additional_kwargs\": {}\n",
|
594 |
-
" }\n",
|
595 |
-
" }\n",
|
596 |
-
" }\n",
|
597 |
-
" ]\n",
|
598 |
-
" ],\n",
|
599 |
-
" \"llm_output\": null,\n",
|
600 |
-
" \"run\": null\n",
|
601 |
-
"}\n",
|
602 |
-
"\n",
|
603 |
-
"\n",
|
604 |
-
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [3.23s] Exiting Chain run with output:\n",
|
605 |
-
"\u001b[0m{\n",
|
606 |
-
" \"text\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\"\n",
|
607 |
-
"}\n",
|
608 |
-
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [3.23s] Exiting Chain run with output:\n",
|
609 |
-
"\u001b[0m{\n",
|
610 |
-
" \"output_text\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\"\n",
|
611 |
-
"}\n",
|
612 |
-
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [3.46s] Exiting Chain run with output:\n",
|
613 |
-
"\u001b[0m[outputs]\n",
|
614 |
-
"{'question': 'current weather in volcano, ca', 'chat_history': []}\n",
|
615 |
-
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
|
616 |
-
"\u001b[0m{\n",
|
617 |
-
" \"question\": \"current weather in volcano, ca\",\n",
|
618 |
-
" \"chat_history\": []\n",
|
619 |
-
"}\n",
|
620 |
-
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
|
621 |
-
"\u001b[0m[inputs]\n",
|
622 |
-
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
|
623 |
-
"\u001b[0m{\n",
|
624 |
-
" \"question\": \"current weather in volcano, ca\",\n",
|
625 |
-
" \"context\": \"Volcano 10 Day Weather. Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny.\\n\\nCurrent U.S. National Radar--Current. The Current National Weather Radar is shown below with a UTC Time (subtract 5 hours from UTC to get Eastern Time). National Weather Forecast--Current. The Current National Weather Forecast and National Weather Map are shown below.\\n\\nVolcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. There is 49 percentage chance of rain and 3 mph winds from the Southwest.\\n\\nVolcano, CA Weather Data. 1 Volcano, CA Current Weather Data. 2 Sponsored. 3 Volcano, CA Historical Weather Trends. Volcano, CA area 1 Highlights. Volcano, CA Chance of Sunshine. Volcano, CA Historical 1 Temperature. Volcano, CA Rainfall and Snowfall Average. Volcano, CA Energy Demand.\\n\\nVolcano Weather. Volcano weather and daily current conditions with summary and 5 Day forecast including humidity, precipitation, high and low temperatures presented in Fahrenheit and Celsius, barometric pressure, heat index, wind chill, hourly forecast, sunrise, sunset, wind speed with direction, and more.\\n\\nHourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. There is 83 percentage chance of rain and 2 mph winds from the East. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. There is 77 percentage chance of rain and 2 mph winds from the East.\\n\\nVolcano 7 Day Weather. 1 Monday:The Volcano forecast for Apr 03 is 58 degrees and Sunny. There is 34 percentage chance of rain and 5 mph winds from the West. 2 Tuesday:The Volcano forecast for Apr 04 is 59 degrees and Sunny. There is 33 percentage chance of rain and 5 mph winds from the West-Southwest.\\n\\nVolcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. 3 Tuesday:The Volcano forecast for Apr 11 is 49 degrees and Patchy rain possible. Wednesday:The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.\\n\\nVolcano, CA weather and traffic updates by locals. Write your own weather report, forecast, or traffic update: Please note by clicking on Post you acknowledge that you have read the Terms of Service and the report and/or forecast you are posting is in compliance with such terms. Be respectful.\\n\\nHourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. 3 6am:The Volcano, CA forecast for Apr 03 is 41 degrees and Clear. 9am:The Volcano, CA forecast for Apr 03 is 48 degrees and Sunny.\"\n",
|
626 |
-
"}\n",
|
627 |
-
"\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
|
628 |
-
"\u001b[0m{\n",
|
629 |
-
" \"prompts\": [\n",
|
630 |
-
" \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nVolcano 10 Day Weather. Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny.\\n\\nCurrent U.S. National Radar--Current. The Current National Weather Radar is shown below with a UTC Time (subtract 5 hours from UTC to get Eastern Time). National Weather Forecast--Current. The Current National Weather Forecast and National Weather Map are shown below.\\n\\nVolcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. There is 49 percentage chance of rain and 3 mph winds from the Southwest.\\n\\nVolcano, CA Weather Data. 1 Volcano, CA Current Weather Data. 2 Sponsored. 3 Volcano, CA Historical Weather Trends. Volcano, CA area 1 Highlights. Volcano, CA Chance of Sunshine. Volcano, CA Historical 1 Temperature. Volcano, CA Rainfall and Snowfall Average. Volcano, CA Energy Demand.\\n\\nVolcano Weather. Volcano weather and daily current conditions with summary and 5 Day forecast including humidity, precipitation, high and low temperatures presented in Fahrenheit and Celsius, barometric pressure, heat index, wind chill, hourly forecast, sunrise, sunset, wind speed with direction, and more.\\n\\nHourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. There is 83 percentage chance of rain and 2 mph winds from the East. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. There is 77 percentage chance of rain and 2 mph winds from the East.\\n\\nVolcano 7 Day Weather. 1 Monday:The Volcano forecast for Apr 03 is 58 degrees and Sunny. There is 34 percentage chance of rain and 5 mph winds from the West. 2 Tuesday:The Volcano forecast for Apr 04 is 59 degrees and Sunny. There is 33 percentage chance of rain and 5 mph winds from the West-Southwest.\\n\\nVolcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. 3 Tuesday:The Volcano forecast for Apr 11 is 49 degrees and Patchy rain possible. Wednesday:The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.\\n\\nVolcano, CA weather and traffic updates by locals. Write your own weather report, forecast, or traffic update: Please note by clicking on Post you acknowledge that you have read the Terms of Service and the report and/or forecast you are posting is in compliance with such terms. Be respectful.\\n\\nHourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. 3 6am:The Volcano, CA forecast for Apr 03 is 41 degrees and Clear. 9am:The Volcano, CA forecast for Apr 03 is 48 degrees and Sunny.\\nHuman: current weather in volcano, ca\"\n",
|
631 |
-
" ]\n",
|
632 |
-
"}\n",
|
633 |
-
"I don't have the current weather information for Volcano, CA.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.04s] Exiting LLM run with output:\n",
|
634 |
-
"\u001b[0m{\n",
|
635 |
-
" \"generations\": [\n",
|
636 |
-
" [\n",
|
637 |
-
" {\n",
|
638 |
-
" \"text\": \"I don't have the current weather information for Volcano, CA.\",\n",
|
639 |
-
" \"generation_info\": {\n",
|
640 |
-
" \"finish_reason\": \"stop\"\n",
|
641 |
-
" },\n",
|
642 |
-
" \"message\": {\n",
|
643 |
-
" \"lc\": 1,\n",
|
644 |
-
" \"type\": \"constructor\",\n",
|
645 |
-
" \"id\": [\n",
|
646 |
-
" \"langchain\",\n",
|
647 |
-
" \"schema\",\n",
|
648 |
-
" \"messages\",\n",
|
649 |
-
" \"AIMessageChunk\"\n",
|
650 |
-
" ],\n",
|
651 |
-
" \"kwargs\": {\n",
|
652 |
-
" \"example\": false,\n",
|
653 |
-
" \"content\": \"I don't have the current weather information for Volcano, CA.\",\n",
|
654 |
-
" \"additional_kwargs\": {}\n",
|
655 |
-
" }\n",
|
656 |
-
" }\n",
|
657 |
-
" }\n",
|
658 |
-
" ]\n",
|
659 |
-
" ],\n",
|
660 |
-
" \"llm_output\": null,\n",
|
661 |
-
" \"run\": null\n",
|
662 |
-
"}\n",
|
663 |
-
"\n",
|
664 |
-
"\n",
|
665 |
-
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [1.04s] Exiting Chain run with output:\n",
|
666 |
-
"\u001b[0m{\n",
|
667 |
-
" \"text\": \"I don't have the current weather information for Volcano, CA.\"\n",
|
668 |
-
"}\n",
|
669 |
-
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [1.04s] Exiting Chain run with output:\n",
|
670 |
-
"\u001b[0m{\n",
|
671 |
-
" \"output_text\": \"I don't have the current weather information for Volcano, CA.\"\n",
|
672 |
-
"}\n",
|
673 |
-
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [1.04s] Exiting Chain run with output:\n",
|
674 |
-
"\u001b[0m[outputs]\n",
|
675 |
-
"{'question': '____________________ is considered the father of modern medicine.', 'chat_history': []}\n",
|
676 |
-
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
|
677 |
-
"\u001b[0m{\n",
|
678 |
-
" \"question\": \"____________________ is considered the father of modern medicine.\",\n",
|
679 |
-
" \"chat_history\": []\n",
|
680 |
-
"}\n",
|
681 |
-
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
|
682 |
-
"\u001b[0m[inputs]\n",
|
683 |
-
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
|
684 |
-
"\u001b[0m{\n",
|
685 |
-
" \"question\": \"____________________ is considered the father of modern medicine.\",\n",
|
686 |
-
" \"context\": \"Hippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\\n\\nMany of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\\n\\nDespite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\\n\\nAt least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\\n\\n460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\\n\\nTRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \\n\\nThe two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\\n\\nHippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\\n\\nReload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\\n\\nHowever, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\"\n",
|
687 |
-
"}\n",
|
688 |
-
"\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
|
689 |
-
"\u001b[0m{\n",
|
690 |
-
" \"prompts\": [\n",
|
691 |
-
" \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nHippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\\n\\nMany of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\\n\\nDespite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\\n\\nAt least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\\n\\n460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\\n\\nTRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \\n\\nThe two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\\n\\nHippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\\n\\nReload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\\n\\nHowever, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\\nHuman: ____________________ is considered the father of modern medicine.\"\n",
|
692 |
-
" ]\n",
|
693 |
-
"}\n",
|
694 |
-
"Hippocrates is considered the father of modern medicine.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [654ms] Exiting LLM run with output:\n",
|
695 |
-
"\u001b[0m{\n",
|
696 |
-
" \"generations\": [\n",
|
697 |
-
" [\n",
|
698 |
-
" {\n",
|
699 |
-
" \"text\": \"Hippocrates is considered the father of modern medicine.\",\n",
|
700 |
-
" \"generation_info\": {\n",
|
701 |
-
" \"finish_reason\": \"stop\"\n",
|
702 |
-
" },\n",
|
703 |
-
" \"message\": {\n",
|
704 |
-
" \"lc\": 1,\n",
|
705 |
-
" \"type\": \"constructor\",\n",
|
706 |
-
" \"id\": [\n",
|
707 |
-
" \"langchain\",\n",
|
708 |
-
" \"schema\",\n",
|
709 |
-
" \"messages\",\n",
|
710 |
-
" \"AIMessageChunk\"\n",
|
711 |
-
" ],\n",
|
712 |
-
" \"kwargs\": {\n",
|
713 |
-
" \"example\": false,\n",
|
714 |
-
" \"content\": \"Hippocrates is considered the father of modern medicine.\",\n",
|
715 |
-
" \"additional_kwargs\": {}\n",
|
716 |
-
" }\n",
|
717 |
-
" }\n",
|
718 |
-
" }\n",
|
719 |
-
" ]\n",
|
720 |
-
" ],\n",
|
721 |
-
" \"llm_output\": null,\n",
|
722 |
-
" \"run\": null\n",
|
723 |
-
"}\n",
|
724 |
-
"\n",
|
725 |
-
"\n",
|
726 |
-
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [655ms] Exiting Chain run with output:\n",
|
727 |
-
"\u001b[0m{\n",
|
728 |
-
" \"text\": \"Hippocrates is considered the father of modern medicine.\"\n",
|
729 |
-
"}\n",
|
730 |
-
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [655ms] Exiting Chain run with output:\n",
|
731 |
-
"\u001b[0m{\n",
|
732 |
-
" \"output_text\": \"Hippocrates is considered the father of modern medicine.\"\n",
|
733 |
-
"}\n",
|
734 |
-
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [657ms] Exiting Chain run with output:\n",
|
735 |
-
"\u001b[0m[outputs]\n",
|
736 |
-
"{'question': 'how many days is an appraisal good for a fannie loan', 'chat_history': []}\n",
|
737 |
-
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
|
738 |
-
"\u001b[0m{\n",
|
739 |
-
" \"question\": \"how many days is an appraisal good for a fannie loan\",\n",
|
740 |
-
" \"chat_history\": []\n",
|
741 |
-
"}\n",
|
742 |
-
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
|
743 |
-
"\u001b[0m[inputs]\n",
|
744 |
-
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
|
745 |
-
"\u001b[0m{\n",
|
746 |
-
" \"question\": \"how many days is an appraisal good for a fannie loan\",\n",
|
747 |
-
" \"context\": \"New and Updated Underwriting and Eligibility Policies. Age of Credit Documents Selling Guide, B1-1-04, Allowable Age of Credit. Documents. The maximum age of credit documents is reduced from 120 days to 90 days for existing. construction and from 180 days to 120 days for new construction. Credit documents include. credit reports and employment, income, and asset documentation. The age of the documents is.\\n\\nIn no case may the appraisal be dated more than 1 year prior to the date of the Note. Property Inspection Reports/Condition and Marketability Reports (Fannie Mae Form 2070/Freddie Mac Form. 2075 may be dated no earlier than 120 days from the date of the Note. Continued on next page.\\n\\nFannie Mae will allow the use of an origination appraisal for a subsequent transaction if the following requirements are met: 1 The subsequent transaction may only be a Limited Cash-Out Refinance. 2 The appraisal report must not be more than 12 months old on the note date of the subsequent transaction.\\n\\nThe subsequent transaction may only be a Limited Cash-Out Refinance. The appraisal report must not be more than 12 months old on the note date of the subsequent transaction. If the appraisal report is greater than 4 months old on the date of the note and mortgage, then an appraisal update is required.\\n\\nIf they were sold with exposure to the market, listed in. MLS they should be considered. How long is the FHA case # good for (not the appraisal, but the actual case #)? the case number is valid for 6 months unless the appraiser expires prior to the 6 month time frame.\\n\\nNo the borrower can only pay for one appraisal. Your question about Comps is not acceptable, Comps over 1 year old for comps 1-3 are not. acceptable, but supporting comps are with an adequate explanation from the Appraiser. Comps, over one year old would be acceptable, onlywith a waiver request by the lender.\\n\\nThe appraisal may be dated no earlier than 120 days from the date of the Note, regardless of whether the. property was appraised as proposed or existing construction. When the appraisal will be more than 120 days old but less than 1 year old on the date of the Note, the.\\n\\nThis inspection and results of the analysis must be reported on the Appraisal Update and/or Completion Report (Form 1004D). 1 If the appraiser indicates on the Form 1004D that the property value has declined, then the lender must obtain a new appraisal for the property.\\n\\nUnfortunately, that is a complete new order (and expense), as one year is a lifetime where property values are concerned. Thanks for the information, however I asked for the PMi to be removed just over 3 months after the appraisal. In fact, the manager at Nationstar said it was 91 days and not valid per Fannie Mae.\\n\\nReputation: 6463. Actually, Fannie Mae and FHA went to 120 days, but at no time were they ever over 6 months for existing construction. Generally, comparables from August 2009 could not be included on an appraisal report today, so there is no way an appraisal issued then would be acceptable.\"\n",
|
748 |
-
"}\n",
|
749 |
-
"\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
|
750 |
-
"\u001b[0m{\n",
|
751 |
-
" \"prompts\": [\n",
|
752 |
-
" \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nNew and Updated Underwriting and Eligibility Policies. Age of Credit Documents Selling Guide, B1-1-04, Allowable Age of Credit. Documents. The maximum age of credit documents is reduced from 120 days to 90 days for existing. construction and from 180 days to 120 days for new construction. Credit documents include. credit reports and employment, income, and asset documentation. The age of the documents is.\\n\\nIn no case may the appraisal be dated more than 1 year prior to the date of the Note. Property Inspection Reports/Condition and Marketability Reports (Fannie Mae Form 2070/Freddie Mac Form. 2075 may be dated no earlier than 120 days from the date of the Note. Continued on next page.\\n\\nFannie Mae will allow the use of an origination appraisal for a subsequent transaction if the following requirements are met: 1 The subsequent transaction may only be a Limited Cash-Out Refinance. 2 The appraisal report must not be more than 12 months old on the note date of the subsequent transaction.\\n\\nThe subsequent transaction may only be a Limited Cash-Out Refinance. The appraisal report must not be more than 12 months old on the note date of the subsequent transaction. If the appraisal report is greater than 4 months old on the date of the note and mortgage, then an appraisal update is required.\\n\\nIf they were sold with exposure to the market, listed in. MLS they should be considered. How long is the FHA case # good for (not the appraisal, but the actual case #)? the case number is valid for 6 months unless the appraiser expires prior to the 6 month time frame.\\n\\nNo the borrower can only pay for one appraisal. Your question about Comps is not acceptable, Comps over 1 year old for comps 1-3 are not. acceptable, but supporting comps are with an adequate explanation from the Appraiser. Comps, over one year old would be acceptable, onlywith a waiver request by the lender.\\n\\nThe appraisal may be dated no earlier than 120 days from the date of the Note, regardless of whether the. property was appraised as proposed or existing construction. When the appraisal will be more than 120 days old but less than 1 year old on the date of the Note, the.\\n\\nThis inspection and results of the analysis must be reported on the Appraisal Update and/or Completion Report (Form 1004D). 1 If the appraiser indicates on the Form 1004D that the property value has declined, then the lender must obtain a new appraisal for the property.\\n\\nUnfortunately, that is a complete new order (and expense), as one year is a lifetime where property values are concerned. Thanks for the information, however I asked for the PMi to be removed just over 3 months after the appraisal. In fact, the manager at Nationstar said it was 91 days and not valid per Fannie Mae.\\n\\nReputation: 6463. Actually, Fannie Mae and FHA went to 120 days, but at no time were they ever over 6 months for existing construction. Generally, comparables from August 2009 could not be included on an appraisal report today, so there is no way an appraisal issued then would be acceptable.\\nHuman: how many days is an appraisal good for a fannie loan\"\n",
|
753 |
-
" ]\n",
|
754 |
-
"}\n",
|
755 |
-
"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.02s] Exiting LLM run with output:\n",
|
756 |
-
"\u001b[0m{\n",
|
757 |
-
" \"generations\": [\n",
|
758 |
-
" [\n",
|
759 |
-
" {\n",
|
760 |
-
" \"text\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\",\n",
|
761 |
-
" \"generation_info\": {\n",
|
762 |
-
" \"finish_reason\": \"stop\"\n",
|
763 |
-
" },\n",
|
764 |
-
" \"message\": {\n",
|
765 |
-
" \"lc\": 1,\n",
|
766 |
-
" \"type\": \"constructor\",\n",
|
767 |
-
" \"id\": [\n",
|
768 |
-
" \"langchain\",\n",
|
769 |
-
" \"schema\",\n",
|
770 |
-
" \"messages\",\n",
|
771 |
-
" \"AIMessageChunk\"\n",
|
772 |
-
" ],\n",
|
773 |
-
" \"kwargs\": {\n",
|
774 |
-
" \"example\": false,\n",
|
775 |
-
" \"content\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\",\n",
|
776 |
-
" \"additional_kwargs\": {}\n",
|
777 |
-
" }\n",
|
778 |
-
" }\n",
|
779 |
-
" }\n",
|
780 |
-
" ]\n",
|
781 |
-
" ],\n",
|
782 |
-
" \"llm_output\": null,\n",
|
783 |
-
" \"run\": null\n",
|
784 |
-
"}\n",
|
785 |
-
"\n",
|
786 |
-
"\n",
|
787 |
-
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [1.02s] Exiting Chain run with output:\n",
|
788 |
-
"\u001b[0m{\n",
|
789 |
-
" \"text\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\"\n",
|
790 |
-
"}\n",
|
791 |
-
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [1.02s] Exiting Chain run with output:\n",
|
792 |
-
"\u001b[0m{\n",
|
793 |
-
" \"output_text\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\"\n",
|
794 |
-
"}\n",
|
795 |
-
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [1.02s] Exiting Chain run with output:\n",
|
796 |
-
"\u001b[0m[outputs]\n",
|
797 |
-
"{'question': 'average pharmacy tech salary', 'chat_history': []}\n",
|
798 |
-
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
|
799 |
-
"\u001b[0m{\n",
|
800 |
-
" \"question\": \"average pharmacy tech salary\",\n",
|
801 |
-
" \"chat_history\": []\n",
|
802 |
-
"}\n",
|
803 |
-
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
|
804 |
-
"\u001b[0m[inputs]\n",
|
805 |
-
"\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
|
806 |
-
"\u001b[0m{\n",
|
807 |
-
" \"question\": \"average pharmacy tech salary\",\n",
|
808 |
-
" \"context\": \"If you are interested in becoming a pharmacy technician, you’re choosing a career that is in high demand. According to the U.S. Bureau of Labor Statistics (BLS), the career growth is expected to be “much faster than average”, with an employment increase of 32% predicted in the decade spanning 2010 to 2020*.\\n\\nWhat can a pharmacy technician really expect to earn in today’s economy? According to Salary.com, pharmacy technicians make anywhere from $26,000 to $39,000 a year, though most make around $32,000 annually. California has the highest average pharmacy technician wage, at $34,317, according to Open Farm Tech’s website.\\n\\nThe median annual wage for pharmacy technicians was $30,410 in May 2015. Employment of pharmacy technicians is projected to grow 9 percent from 2014 to 2024, faster than the average for all occupations. Increased demand for prescription medications will lead to more demand for pharmaceutical services.\\n\\nThe majority of pharmacy techs work in drug stores and hospitals, where the average annual salary was $28,940 and $34,410, respectively**. However, a higher salary can be had if you can find employment with outpatient care centers or physicians’ offices, where the annual pay is in the $37,000-$39,000 range.\\n\\nThe pharmacy technician salary** depends on a number of factors, from the area and type of employer, to your educational background. Browse pharmacy tech pay for a comparison between similar careers, geographic location, educational and certification requirements, and more.\\n\\nPharmacy Technician Salary. A Pharmacy Technician earns an average wage of $12.68 per hour. The skills that increase pay for this job the most are Mail Order Pharmacy and Long Term Care. People in this job generally don't have more than 20 years' experience. $18,722 - $48,714.\\n\\nPopular Companies. * Please note that all salary figures are approximations based upon third party submissions to Simply Hired. These figures are given to Simply Hired users for the purpose of generalized comparison only. Minimum wage may differ by jurisdiction and you should consult the employer for actual salary figures.\\n\\nPharmacy Technician average salary is $30,288, median salary is $30,534 with a salary range from $21,570 to $34,320. Pharmacy Technician salaries are collected from government agencies and companies. Each salary is associated with a real job position. Pharmacy Technician salary statistics is not exclusive and is for reference only.\\n\\nIt also states that pharmacy technicians working in an acute care hospital earn an average salary of $37,000 per year, while those working for the military or a pharmaceutical company earn an average salary of $38,000 per year. This represents a difference of more than $10,000, simply due to the health care setting.\\n\\nOccupational Employment and Wages, May 2016. 29-2052 Pharmacy Technicians. Prepare medications under the direction of a pharmacist. May measure, mix, count out, label, and record amounts and dosages of medications according to prescription orders. National estimates for this occupation. Industry profile for this occupation.\"\n",
|
809 |
-
"}\n",
|
810 |
-
"\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
|
811 |
-
"\u001b[0m{\n",
|
812 |
-
" \"prompts\": [\n",
|
813 |
-
" \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nIf you are interested in becoming a pharmacy technician, you’re choosing a career that is in high demand. According to the U.S. Bureau of Labor Statistics (BLS), the career growth is expected to be “much faster than average”, with an employment increase of 32% predicted in the decade spanning 2010 to 2020*.\\n\\nWhat can a pharmacy technician really expect to earn in today’s economy? According to Salary.com, pharmacy technicians make anywhere from $26,000 to $39,000 a year, though most make around $32,000 annually. California has the highest average pharmacy technician wage, at $34,317, according to Open Farm Tech’s website.\\n\\nThe median annual wage for pharmacy technicians was $30,410 in May 2015. Employment of pharmacy technicians is projected to grow 9 percent from 2014 to 2024, faster than the average for all occupations. Increased demand for prescription medications will lead to more demand for pharmaceutical services.\\n\\nThe majority of pharmacy techs work in drug stores and hospitals, where the average annual salary was $28,940 and $34,410, respectively**. However, a higher salary can be had if you can find employment with outpatient care centers or physicians’ offices, where the annual pay is in the $37,000-$39,000 range.\\n\\nThe pharmacy technician salary** depends on a number of factors, from the area and type of employer, to your educational background. Browse pharmacy tech pay for a comparison between similar careers, geographic location, educational and certification requirements, and more.\\n\\nPharmacy Technician Salary. A Pharmacy Technician earns an average wage of $12.68 per hour. The skills that increase pay for this job the most are Mail Order Pharmacy and Long Term Care. People in this job generally don't have more than 20 years' experience. $18,722 - $48,714.\\n\\nPopular Companies. * Please note that all salary figures are approximations based upon third party submissions to Simply Hired. These figures are given to Simply Hired users for the purpose of generalized comparison only. Minimum wage may differ by jurisdiction and you should consult the employer for actual salary figures.\\n\\nPharmacy Technician average salary is $30,288, median salary is $30,534 with a salary range from $21,570 to $34,320. Pharmacy Technician salaries are collected from government agencies and companies. Each salary is associated with a real job position. Pharmacy Technician salary statistics is not exclusive and is for reference only.\\n\\nIt also states that pharmacy technicians working in an acute care hospital earn an average salary of $37,000 per year, while those working for the military or a pharmaceutical company earn an average salary of $38,000 per year. This represents a difference of more than $10,000, simply due to the health care setting.\\n\\nOccupational Employment and Wages, May 2016. 29-2052 Pharmacy Technicians. Prepare medications under the direction of a pharmacist. May measure, mix, count out, label, and record amounts and dosages of medications according to prescription orders. National estimates for this occupation. Industry profile for this occupation.\\nHuman: average pharmacy tech salary\"\n",
|
814 |
-
" ]\n",
|
815 |
-
"}\n",
|
816 |
-
"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.45s] Exiting LLM run with output:\n",
|
817 |
-
"\u001b[0m{\n",
|
818 |
-
" \"generations\": [\n",
|
819 |
-
" [\n",
|
820 |
-
" {\n",
|
821 |
-
" \"text\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\",\n",
|
822 |
-
" \"generation_info\": {\n",
|
823 |
-
" \"finish_reason\": \"stop\"\n",
|
824 |
-
" },\n",
|
825 |
-
" \"message\": {\n",
|
826 |
-
" \"lc\": 1,\n",
|
827 |
-
" \"type\": \"constructor\",\n",
|
828 |
-
" \"id\": [\n",
|
829 |
-
" \"langchain\",\n",
|
830 |
-
" \"schema\",\n",
|
831 |
-
" \"messages\",\n",
|
832 |
-
" \"AIMessageChunk\"\n",
|
833 |
-
" ],\n",
|
834 |
-
" \"kwargs\": {\n",
|
835 |
-
" \"example\": false,\n",
|
836 |
-
" \"content\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\",\n",
|
837 |
-
" \"additional_kwargs\": {}\n",
|
838 |
-
" }\n",
|
839 |
-
" }\n",
|
840 |
-
" }\n",
|
841 |
-
" ]\n",
|
842 |
-
" ],\n",
|
843 |
-
" \"llm_output\": null,\n",
|
844 |
-
" \"run\": null\n",
|
845 |
-
"}\n",
|
846 |
-
"\n",
|
847 |
-
"\n",
|
848 |
-
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [1.45s] Exiting Chain run with output:\n",
|
849 |
-
"\u001b[0m{\n",
|
850 |
-
" \"text\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\"\n",
|
851 |
-
"}\n",
|
852 |
-
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [1.46s] Exiting Chain run with output:\n",
|
853 |
-
"\u001b[0m{\n",
|
854 |
-
" \"output_text\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\"\n",
|
855 |
-
"}\n",
|
856 |
-
"\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [1.46s] Exiting Chain run with output:\n",
|
857 |
-
"\u001b[0m[outputs]\n",
|
858 |
-
"Q-001: albany mn population\n",
|
859 |
-
"A-001: The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\n",
|
860 |
-
"G-001: The population of Albany, Minnesota is 2,662. \n",
|
861 |
-
"\n",
|
862 |
-
"Q-002: current weather in volcano, ca\n",
|
863 |
-
"A-002: I don't have the current weather information for Volcano, CA.\n",
|
864 |
-
"G-002: The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.\n",
|
865 |
-
"\n",
|
866 |
-
"Q-003: ____________________ is considered the father of modern medicine.\n",
|
867 |
-
"A-003: Hippocrates is considered the father of modern medicine.\n",
|
868 |
-
"G-003: Hippocrates is considered the father of modern medicine.\n",
|
869 |
-
"\n",
|
870 |
-
"Q-004: how many days is an appraisal good for a fannie loan\n",
|
871 |
-
"A-004: According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\n",
|
872 |
-
"G-004: An appraisal is good for 120 days from the date of the Note for a Fannie loan.\n",
|
873 |
-
"\n",
|
874 |
-
"Q-005: average pharmacy tech salary\n",
|
875 |
-
"A-005: The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\n",
|
876 |
-
"G-005: The average salary for a pharmacy technician is $26,000 to $39,000 in a year.\n",
|
877 |
-
"\n",
|
878 |
-
"\n",
|
879 |
-
"\n",
|
880 |
-
"scores: {\n",
|
881 |
-
" \"OVERALL\": {\n",
|
882 |
-
" \"bleu_scores\": {\n",
|
883 |
-
" \"bleu\": 0.3953488372093023,\n",
|
884 |
-
" \"precisions\": [\n",
|
885 |
-
" 0.3953488372093023\n",
|
886 |
-
" ],\n",
|
887 |
-
" \"brevity_penalty\": 1.0,\n",
|
888 |
-
" \"length_ratio\": 1.9253731343283582,\n",
|
889 |
-
" \"translation_length\": 129,\n",
|
890 |
-
" \"reference_length\": 67\n",
|
891 |
-
" },\n",
|
892 |
-
" \"rouge_scores\": {\n",
|
893 |
-
" \"rouge1\": 0.5737456342107505,\n",
|
894 |
-
" \"rouge2\": 0.4160794941282746,\n",
|
895 |
-
" \"rougeL\": 0.5108953062441435,\n",
|
896 |
-
" \"rougeLsum\": 0.4989862850327967\n",
|
897 |
-
" }\n",
|
898 |
-
" },\n",
|
899 |
-
" \"NUMERIC\": {\n",
|
900 |
-
" \"bleu_scores\": {\n",
|
901 |
-
" \"bleu\": 0.36111111111111116,\n",
|
902 |
-
" \"precisions\": [\n",
|
903 |
-
" 0.3611111111111111\n",
|
904 |
-
" ],\n",
|
905 |
-
" \"brevity_penalty\": 1.0,\n",
|
906 |
-
" \"length_ratio\": 2.4545454545454546,\n",
|
907 |
-
" \"translation_length\": 108,\n",
|
908 |
-
" \"reference_length\": 44\n",
|
909 |
-
" },\n",
|
910 |
-
" \"rouge_scores\": {\n",
|
911 |
-
" \"rouge1\": 0.5395760570179174,\n",
|
912 |
-
" \"rouge2\": 0.3694751662231337,\n",
|
913 |
-
" \"rougeL\": 0.4656557912371866,\n",
|
914 |
-
" \"rougeLsum\": 0.4656557912371866\n",
|
915 |
-
" }\n",
|
916 |
-
" },\n",
|
917 |
-
" \"DESCRIPTION\": {\n",
|
918 |
-
" \"bleu_scores\": {\n",
|
919 |
-
" \"bleu\": 0.5195179673581217,\n",
|
920 |
-
" \"precisions\": [\n",
|
921 |
-
" 0.5714285714285714\n",
|
922 |
-
" ],\n",
|
923 |
-
" \"brevity_penalty\": 0.909156442876713,\n",
|
924 |
-
" \"length_ratio\": 0.9130434782608695,\n",
|
925 |
-
" \"translation_length\": 21,\n",
|
926 |
-
" \"reference_length\": 23\n",
|
927 |
-
" },\n",
|
928 |
-
" \"rouge_scores\": {\n",
|
929 |
-
" \"rouge1\": 0.625,\n",
|
930 |
-
" \"rouge2\": 0.5,\n",
|
931 |
-
" \"rougeL\": 0.5833333333333334,\n",
|
932 |
-
" \"rougeLsum\": 0.5833333333333334\n",
|
933 |
-
" }\n",
|
934 |
-
" }\n",
|
935 |
-
"}\n",
|
936 |
-
"\n",
|
937 |
-
"CPU times: user 512 ms, sys: 63.7 ms, total: 576 ms\n",
|
938 |
-
"Wall time: 7.85 s\n"
|
939 |
-
]
|
940 |
-
}
|
941 |
-
],
|
942 |
-
"source": [
|
943 |
-
"%%time\n",
|
944 |
-
"\n",
|
945 |
-
"eval_ds = new_ds.select(range(5))\n",
|
946 |
-
"qa_chain = QAChainWithMsMacroDataset(eval_ds, llm_loader)\n",
|
947 |
-
"\n",
|
948 |
-
"answers = []\n",
|
949 |
-
"for i in range(eval_ds.num_rows):\n",
|
950 |
-
" inputs = {\"question\": str(eval_ds[i][\"query\"]), \"chat_history\": []}\n",
|
951 |
-
" result = qa_chain.call_chain(\n",
|
952 |
-
" inputs,\n",
|
953 |
-
" None,\n",
|
954 |
-
" None,\n",
|
955 |
-
" True,\n",
|
956 |
-
" )\n",
|
957 |
-
" answers.append(result[\"answer\"])\n",
|
958 |
-
"\n",
|
959 |
-
"result = calc_all_metrics(eval_ds, answers)\n",
|
960 |
-
"\n",
|
961 |
-
"for i in range(eval_ds.num_rows):\n",
|
962 |
-
" n = i + 1\n",
|
963 |
-
" print(f\"Q-{n:03d}: {eval_ds[i]['query']}\")\n",
|
964 |
-
" print(f\"A-{n:03d}: {answers[i]}\")\n",
|
965 |
-
" print(f\"G-{n:03d}: {eval_ds[i]['wellFormedAnswers'][0]}\\n\")\n",
|
966 |
-
"\n",
|
967 |
-
"print(f\"\\n\\nscores: {json.dumps(result, indent=2)}\\n\")"
|
968 |
-
]
|
969 |
-
},
|
970 |
-
{
|
971 |
-
"cell_type": "code",
|
972 |
-
"execution_count": 13,
|
973 |
-
"id": "bae05024",
|
974 |
-
"metadata": {},
|
975 |
-
"outputs": [
|
976 |
-
{
|
977 |
-
"name": "stdout",
|
978 |
-
"output_type": "stream",
|
979 |
-
"text": [
|
980 |
-
"System: Use the following pieces of context to answer the users question. \n",
|
981 |
-
"If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
|
982 |
-
"----------------\n",
|
983 |
-
"City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\n",
|
984 |
-
"\n",
|
985 |
-
"Place of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\n",
|
986 |
-
"\n",
|
987 |
-
"For the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\n",
|
988 |
-
"\n",
|
989 |
-
"Albany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\n",
|
990 |
-
"\n",
|
991 |
-
"Sponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\n",
|
992 |
-
"\n",
|
993 |
-
"Recent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\n",
|
994 |
-
"\n",
|
995 |
-
"For population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\n",
|
996 |
-
"\n",
|
997 |
-
"For population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\n",
|
998 |
-
"Human: albany mn population\n"
|
999 |
-
]
|
1000 |
-
}
|
1001 |
-
],
|
1002 |
-
"source": [
|
1003 |
-
"print(\n",
|
1004 |
-
" \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nCity of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\\n\\nPlace of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\\n\\nFor the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nAlbany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\\n\\nSponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nRecent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\\n\\nFor population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\\n\\nFor population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\\nHuman: albany mn population\"\n",
|
1005 |
-
")"
|
1006 |
-
]
|
1007 |
-
},
|
1008 |
-
{
|
1009 |
-
"cell_type": "code",
|
1010 |
-
"execution_count": 4,
|
1011 |
-
"id": "593f574a",
|
1012 |
-
"metadata": {},
|
1013 |
-
"outputs": [
|
1014 |
-
{
|
1015 |
-
"name": "stdout",
|
1016 |
-
"output_type": "stream",
|
1017 |
-
"text": [
|
1018 |
-
"System: Use the following pieces of context to answer the users question. \n",
|
1019 |
-
"If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
|
1020 |
-
"----------------\n",
|
1021 |
-
"Hippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\n",
|
1022 |
-
"\n",
|
1023 |
-
"Many of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\n",
|
1024 |
-
"\n",
|
1025 |
-
"Despite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\n",
|
1026 |
-
"\n",
|
1027 |
-
"At least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\n",
|
1028 |
-
"\n",
|
1029 |
-
"460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\n",
|
1030 |
-
"\n",
|
1031 |
-
"TRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \n",
|
1032 |
-
"\n",
|
1033 |
-
"The two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\n",
|
1034 |
-
"\n",
|
1035 |
-
"Hippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\n",
|
1036 |
-
"\n",
|
1037 |
-
"Reload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\n",
|
1038 |
-
"\n",
|
1039 |
-
"However, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\n",
|
1040 |
-
"Human: ____________________ is considered the father of modern medicine.\n"
|
1041 |
-
]
|
1042 |
-
}
|
1043 |
-
],
|
1044 |
-
"source": [
|
1045 |
-
"print(\n",
|
1046 |
-
" \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nHippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\\n\\nMany of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\\n\\nDespite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\\n\\nAt least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\\n\\n460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\\n\\nTRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \\n\\nThe two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\\n\\nHippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\\n\\nReload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\\n\\nHowever, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\\nHuman: ____________________ is considered the father of modern medicine.\"\n",
|
1047 |
-
")"
|
1048 |
-
]
|
1049 |
-
},
|
1050 |
-
{
|
1051 |
-
"cell_type": "markdown",
|
1052 |
-
"id": "5b9204e0",
|
1053 |
-
"metadata": {},
|
1054 |
-
"source": [
|
1055 |
-
"```\n",
|
1056 |
-
"Q-003: ____________________ is considered the father of modern medicine.\n",
|
1057 |
-
"A-003: Hippocrates is considered the father of modern medicine.\n",
|
1058 |
-
"G-003: Hippocrates is considered the father of modern medicine.\n",
|
1059 |
-
"```"
|
1060 |
-
]
|
1061 |
-
},
|
1062 |
-
{
|
1063 |
-
"cell_type": "code",
|
1064 |
-
"execution_count": 11,
|
1065 |
-
"id": "5cfc8320",
|
1066 |
-
"metadata": {},
|
1067 |
-
"outputs": [
|
1068 |
-
{
|
1069 |
-
"data": {
|
1070 |
-
"text/plain": [
|
1071 |
-
"{'answers': ['The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.'],\n",
|
1072 |
-
" 'passages': {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0],\n",
|
1073 |
-
" 'passage_text': ['Volcano 10 Day Weather. Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny.',\n",
|
1074 |
-
" 'Current U.S. National Radar--Current. The Current National Weather Radar is shown below with a UTC Time (subtract 5 hours from UTC to get Eastern Time). National Weather Forecast--Current. The Current National Weather Forecast and National Weather Map are shown below.',\n",
|
1075 |
-
" 'Volcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. There is 49 percentage chance of rain and 3 mph winds from the Southwest.',\n",
|
1076 |
-
" 'Volcano, CA Weather Data. 1 Volcano, CA Current Weather Data. 2 Sponsored. 3 Volcano, CA Historical Weather Trends. Volcano, CA area 1 Highlights. Volcano, CA Chance of Sunshine. Volcano, CA Historical 1 Temperature. Volcano, CA Rainfall and Snowfall Average. Volcano, CA Energy Demand.',\n",
|
1077 |
-
" 'Volcano Weather. Volcano weather and daily current conditions with summary and 5 Day forecast including humidity, precipitation, high and low temperatures presented in Fahrenheit and Celsius, barometric pressure, heat index, wind chill, hourly forecast, sunrise, sunset, wind speed with direction, and more.',\n",
|
1078 |
-
" 'Hourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. There is 83 percentage chance of rain and 2 mph winds from the East. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. There is 77 percentage chance of rain and 2 mph winds from the East.',\n",
|
1079 |
-
" 'Volcano 7 Day Weather. 1 Monday:The Volcano forecast for Apr 03 is 58 degrees and Sunny. There is 34 percentage chance of rain and 5 mph winds from the West. 2 Tuesday:The Volcano forecast for Apr 04 is 59 degrees and Sunny. There is 33 percentage chance of rain and 5 mph winds from the West-Southwest.',\n",
|
1080 |
-
" 'Volcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. 3 Tuesday:The Volcano forecast for Apr 11 is 49 degrees and Patchy rain possible. Wednesday:The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.',\n",
|
1081 |
-
" 'Volcano, CA weather and traffic updates by locals. Write your own weather report, forecast, or traffic update: Please note by clicking on Post you acknowledge that you have read the Terms of Service and the report and/or forecast you are posting is in compliance with such terms. Be respectful.',\n",
|
1082 |
-
" 'Hourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. 3 6am:The Volcano, CA forecast for Apr 03 is 41 degrees and Clear. 9am:The Volcano, CA forecast for Apr 03 is 48 degrees and Sunny.'],\n",
|
1083 |
-
" 'url': ['http://www.weatherman.com/us/ca/zip-codes/95689-10-day-weather',\n",
|
1084 |
-
" 'http://www.fastweather.com/index.php?city=Volcano_CA&g',\n",
|
1085 |
-
" 'http://www.weatherman.com/us/ca/zip-codes/95689-10-day-weather',\n",
|
1086 |
-
" 'http://www.homefacts.com/weather/California/Amador-County/Volcano.html',\n",
|
1087 |
-
" 'http://www.localconditions.com/weather-volcano-california/95689/',\n",
|
1088 |
-
" 'http://www.weatherman.com/us/ca/volcano',\n",
|
1089 |
-
" 'http://www.weatherman.com/us/ca/volcano',\n",
|
1090 |
-
" 'http://www.weatherman.com/us/ca/zip-codes/95689-10-day-weather',\n",
|
1091 |
-
" 'http://www.localconditions.com/weather-volcano-california/95689/',\n",
|
1092 |
-
" 'http://www.weatherman.com/us/ca/volcano']},\n",
|
1093 |
-
" 'query': 'current weather in volcano, ca',\n",
|
1094 |
-
" 'query_id': 114414,\n",
|
1095 |
-
" 'query_type': 'DESCRIPTION',\n",
|
1096 |
-
" 'wellFormedAnswers': ['The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.']}"
|
1097 |
-
]
|
1098 |
-
},
|
1099 |
-
"execution_count": 11,
|
1100 |
-
"metadata": {},
|
1101 |
-
"output_type": "execute_result"
|
1102 |
-
}
|
1103 |
-
],
|
1104 |
-
"source": [
|
1105 |
-
"test_ds = new_ds.select([1])\n",
|
1106 |
-
"test_ds[0]"
|
1107 |
-
]
|
1108 |
-
},
|
1109 |
-
{
|
1110 |
-
"cell_type": "code",
|
1111 |
-
"execution_count": 12,
|
1112 |
-
"id": "56b91cae",
|
1113 |
-
"metadata": {},
|
1114 |
-
"outputs": [
|
1115 |
-
{
|
1116 |
-
"data": {
|
1117 |
-
"text/plain": [
|
1118 |
-
"{'bleu_scores': {'bleu': 1.0,\n",
|
1119 |
-
" 'precisions': [1.0, 1.0, 1.0, 1.0],\n",
|
1120 |
-
" 'brevity_penalty': 1.0,\n",
|
1121 |
-
" 'length_ratio': 1.0,\n",
|
1122 |
-
" 'translation_length': 14,\n",
|
1123 |
-
" 'reference_length': 14},\n",
|
1124 |
-
" 'rouge_scores': {'rouge1': 1.0,\n",
|
1125 |
-
" 'rouge2': 1.0,\n",
|
1126 |
-
" 'rougeL': 1.0,\n",
|
1127 |
-
" 'rougeLsum': 1.0}}"
|
1128 |
-
]
|
1129 |
-
},
|
1130 |
-
"execution_count": 12,
|
1131 |
-
"metadata": {},
|
1132 |
-
"output_type": "execute_result"
|
1133 |
-
}
|
1134 |
-
],
|
1135 |
-
"source": [
|
1136 |
-
"calc_metrics(test_ds)"
|
1137 |
-
]
|
1138 |
-
},
|
1139 |
-
{
|
1140 |
-
"cell_type": "code",
|
1141 |
-
"execution_count": 18,
|
1142 |
-
"id": "56c6bf24",
|
1143 |
-
"metadata": {},
|
1144 |
-
"outputs": [
|
1145 |
-
{
|
1146 |
-
"data": {
|
1147 |
-
"text/plain": [
|
1148 |
-
"['The',\n",
|
1149 |
-
" 'Volcano',\n",
|
1150 |
-
" 'forecast',\n",
|
1151 |
-
" 'for',\n",
|
1152 |
-
" 'Apr',\n",
|
1153 |
-
" '12',\n",
|
1154 |
-
" 'is',\n",
|
1155 |
-
" '52',\n",
|
1156 |
-
" 'degrees',\n",
|
1157 |
-
" 'and',\n",
|
1158 |
-
" 'Patchy',\n",
|
1159 |
-
" 'light',\n",
|
1160 |
-
" 'rain.']"
|
1161 |
-
]
|
1162 |
-
},
|
1163 |
-
"execution_count": 18,
|
1164 |
-
"metadata": {},
|
1165 |
-
"output_type": "execute_result"
|
1166 |
-
}
|
1167 |
-
],
|
1168 |
-
"source": [
|
1169 |
-
"test_ds[0][\"answers\"][0].split()"
|
1170 |
-
]
|
1171 |
-
},
|
1172 |
-
{
|
1173 |
-
"cell_type": "code",
|
1174 |
-
"execution_count": 19,
|
1175 |
-
"id": "77d08267",
|
1176 |
-
"metadata": {},
|
1177 |
-
"outputs": [
|
1178 |
-
{
|
1179 |
-
"data": {
|
1180 |
-
"text/plain": [
|
1181 |
-
"13"
|
1182 |
-
]
|
1183 |
-
},
|
1184 |
-
"execution_count": 19,
|
1185 |
-
"metadata": {},
|
1186 |
-
"output_type": "execute_result"
|
1187 |
-
}
|
1188 |
-
],
|
1189 |
-
"source": [
|
1190 |
-
"len(test_ds[0][\"answers\"][0].split())"
|
1191 |
-
]
|
1192 |
-
},
|
1193 |
-
{
|
1194 |
-
"cell_type": "code",
|
1195 |
-
"execution_count": 22,
|
1196 |
-
"id": "8c19694b",
|
1197 |
-
"metadata": {},
|
1198 |
-
"outputs": [
|
1199 |
-
{
|
1200 |
-
"data": {
|
1201 |
-
"text/plain": [
|
1202 |
-
"{'answers': ['From $26,000 to $39,000 a year'],\n",
|
1203 |
-
" 'passages': {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],\n",
|
1204 |
-
" 'passage_text': ['If you are interested in becoming a pharmacy technician, you’re choosing a career that is in high demand. According to the U.S. Bureau of Labor Statistics (BLS), the career growth is expected to be “much faster than average”, with an employment increase of 32% predicted in the decade spanning 2010 to 2020*.',\n",
|
1205 |
-
" 'What can a pharmacy technician really expect to earn in today’s economy? According to Salary.com, pharmacy technicians make anywhere from $26,000 to $39,000 a year, though most make around $32,000 annually. California has the highest average pharmacy technician wage, at $34,317, according to Open Farm Tech’s website.',\n",
|
1206 |
-
" 'The median annual wage for pharmacy technicians was $30,410 in May 2015. Employment of pharmacy technicians is projected to grow 9 percent from 2014 to 2024, faster than the average for all occupations. Increased demand for prescription medications will lead to more demand for pharmaceutical services.',\n",
|
1207 |
-
" 'The majority of pharmacy techs work in drug stores and hospitals, where the average annual salary was $28,940 and $34,410, respectively**. However, a higher salary can be had if you can find employment with outpatient care centers or physicians’ offices, where the annual pay is in the $37,000-$39,000 range.',\n",
|
1208 |
-
" 'The pharmacy technician salary** depends on a number of factors, from the area and type of employer, to your educational background. Browse pharmacy tech pay for a comparison between similar careers, geographic location, educational and certification requirements, and more.',\n",
|
1209 |
-
" \"Pharmacy Technician Salary. A Pharmacy Technician earns an average wage of $12.68 per hour. The skills that increase pay for this job the most are Mail Order Pharmacy and Long Term Care. People in this job generally don't have more than 20 years' experience. $18,722 - $48,714.\",\n",
|
1210 |
-
" 'Popular Companies. * Please note that all salary figures are approximations based upon third party submissions to Simply Hired. These figures are given to Simply Hired users for the purpose of generalized comparison only. Minimum wage may differ by jurisdiction and you should consult the employer for actual salary figures.',\n",
|
1211 |
-
" 'Pharmacy Technician average salary is $30,288, median salary is $30,534 with a salary range from $21,570 to $34,320. Pharmacy Technician salaries are collected from government agencies and companies. Each salary is associated with a real job position. Pharmacy Technician salary statistics is not exclusive and is for reference only.',\n",
|
1212 |
-
" 'It also states that pharmacy technicians working in an acute care hospital earn an average salary of $37,000 per year, while those working for the military or a pharmaceutical company earn an average salary of $38,000 per year. This represents a difference of more than $10,000, simply due to the health care setting.',\n",
|
1213 |
-
" 'Occupational Employment and Wages, May 2016. 29-2052 Pharmacy Technicians. Prepare medications under the direction of a pharmacist. May measure, mix, count out, label, and record amounts and dosages of medications according to prescription orders. National estimates for this occupation. Industry profile for this occupation.'],\n",
|
1214 |
-
" 'url': ['http://www.pharmacytechschools.com/salary/',\n",
|
1215 |
-
" 'http://www.pharmacytimes.com/contributor/alex-barker-pharmd/2015/06/guide-to-pharmacy-technician-salaries',\n",
|
1216 |
-
" 'https://www.bls.gov/ooh/healthcare/pharmacy-technicians.htm',\n",
|
1217 |
-
" 'http://www.pharmacytechschools.com/salary/',\n",
|
1218 |
-
" 'http://www.pharmacytechschools.com/salary/',\n",
|
1219 |
-
" 'http://www.payscale.com/research/US/Job=Pharmacy_Technician/Hourly_Rate',\n",
|
1220 |
-
" 'http://www.simplyhired.com/salaries-k-certified-pharmacy-technician-jobs.html',\n",
|
1221 |
-
" 'https://www.salarylist.com/jobs/Pharmacy-Technician-Salary.htm',\n",
|
1222 |
-
" 'http://www.pharmacytimes.com/contributor/alex-barker-pharmd/2015/06/guide-to-pharmacy-technician-salaries',\n",
|
1223 |
-
" 'https://www.bls.gov/oes/current/oes292052.htm']},\n",
|
1224 |
-
" 'query': 'average pharmacy tech salary',\n",
|
1225 |
-
" 'query_id': 40287,\n",
|
1226 |
-
" 'query_type': 'NUMERIC',\n",
|
1227 |
-
" 'wellFormedAnswers': ['The average salary for a pharmacy technician is $26,000 to $39,000 in a year.',\n",
|
1228 |
-
" 'The average salary for a pharmacy technician is from $26,000 to $39,000 a year.']}"
|
1229 |
-
]
|
1230 |
-
},
|
1231 |
-
"execution_count": 22,
|
1232 |
-
"metadata": {},
|
1233 |
-
"output_type": "execute_result"
|
1234 |
-
}
|
1235 |
-
],
|
1236 |
-
"source": [
|
1237 |
-
"test_ds = new_ds.select([4])\n",
|
1238 |
-
"test_ds[0]"
|
1239 |
-
]
|
1240 |
-
},
|
1241 |
-
{
|
1242 |
-
"cell_type": "code",
|
1243 |
-
"execution_count": 23,
|
1244 |
-
"id": "34209164",
|
1245 |
-
"metadata": {},
|
1246 |
-
"outputs": [
|
1247 |
-
{
|
1248 |
-
"data": {
|
1249 |
-
"text/plain": [
|
1250 |
-
"{'bleu_scores': {'bleu': 0.19303951204286907,\n",
|
1251 |
-
" 'precisions': [0.875, 0.7142857142857143, 0.5, 0.4],\n",
|
1252 |
-
" 'brevity_penalty': 0.32465246735834974,\n",
|
1253 |
-
" 'length_ratio': 0.47058823529411764,\n",
|
1254 |
-
" 'translation_length': 8,\n",
|
1255 |
-
" 'reference_length': 17},\n",
|
1256 |
-
" 'rouge_scores': {'rouge1': 0.5833333333333334,\n",
|
1257 |
-
" 'rouge2': 0.4545454545454545,\n",
|
1258 |
-
" 'rougeL': 0.5833333333333334,\n",
|
1259 |
-
" 'rougeLsum': 0.5833333333333334}}"
|
1260 |
-
]
|
1261 |
-
},
|
1262 |
-
"execution_count": 23,
|
1263 |
-
"metadata": {},
|
1264 |
-
"output_type": "execute_result"
|
1265 |
-
}
|
1266 |
-
],
|
1267 |
-
"source": [
|
1268 |
-
"calc_metrics(test_ds)"
|
1269 |
-
]
|
1270 |
-
}
|
1271 |
-
],
|
1272 |
-
"metadata": {
|
1273 |
-
"kernelspec": {
|
1274 |
-
"display_name": "Python 3 (ipykernel)",
|
1275 |
-
"language": "python",
|
1276 |
-
"name": "python3"
|
1277 |
-
},
|
1278 |
-
"language_info": {
|
1279 |
-
"codemirror_mode": {
|
1280 |
-
"name": "ipython",
|
1281 |
-
"version": 3
|
1282 |
-
},
|
1283 |
-
"file_extension": ".py",
|
1284 |
-
"mimetype": "text/x-python",
|
1285 |
-
"name": "python",
|
1286 |
-
"nbconvert_exporter": "python",
|
1287 |
-
"pygments_lexer": "ipython3",
|
1288 |
-
"version": "3.10.9"
|
1289 |
-
}
|
1290 |
-
},
|
1291 |
-
"nbformat": 4,
|
1292 |
-
"nbformat_minor": 5
|
1293 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Makefile
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.PHONY: start
|
2 |
+
start:
|
3 |
+
python app.py
|
4 |
+
|
5 |
+
test:
|
6 |
+
python eval_modules/qa_chain_test.py
|
7 |
+
|
8 |
+
tune:
|
9 |
+
./tune_rp.sh
|
10 |
+
|
11 |
+
chat:
|
12 |
+
python eval_modules/qa_chain_test.py chat
|
13 |
+
|
14 |
+
.PHONY: format
|
15 |
+
format:
|
16 |
+
black .
|
17 |
+
|
18 |
+
install:
|
19 |
+
pip install -r requirements.txt
|
20 |
+
|
21 |
+
install-torch:
|
22 |
+
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
23 |
+
|
24 |
+
install-torch-cuda:
|
25 |
+
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
|
README.md
CHANGED
@@ -1,53 +1,168 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.
|
8 |
app_file: app.py
|
9 |
-
pinned:
|
10 |
-
|
11 |
---
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
## Running Locally
|
14 |
|
15 |
1. Check pre-conditions:
|
16 |
|
17 |
- [Git Large File Storage (LFS)](https://git-lfs.com/) must have been installed.
|
18 |
- Run `python --version` to make sure you're running Python version 3.10 or above.
|
19 |
-
-
|
20 |
|
21 |
```
|
22 |
-
|
23 |
```
|
24 |
|
25 |
2. Clone the repo
|
26 |
|
27 |
```
|
28 |
-
git lfs install
|
29 |
-
git clone https://
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
```
|
31 |
|
32 |
-
|
33 |
|
34 |
```
|
35 |
pip install -r requirements.txt
|
36 |
```
|
37 |
|
38 |
-
|
39 |
|
40 |
- By default, environment variables are loaded from `.env.example` file
|
41 |
- If you don't want to use the default settings, copy `.env.example` into `.env`. Your can then update it for your local runs.
|
42 |
|
43 |
-
|
44 |
|
45 |
```
|
46 |
-
|
47 |
```
|
48 |
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
```
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
```
|
|
|
1 |
---
|
2 |
+
title: LLLM QA Eval
|
3 |
+
emoji: 💬
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: purple
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.36.1
|
8 |
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
+
# Evaluate and Optimize Open-Source LLMs' Performance for Question Answering with RAG and Non-RAG
|
14 |
+
|
15 |
+
This project contains the source code, datasets and results for the titled paper.
|
16 |
+
|
17 |
+
## Results for [WebQSP Dataset](./data/datasets/WebQSP.test.wikidata.json)
|
18 |
+
|
19 |
+
| Model Name | RAG | RAG with Chat Template | Non-RAG | Note |
|
20 |
+
| -------------------------------- | ----------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------- | ----------------- |
|
21 |
+
| Phi-3-mini-128k-instruct (batch) | [Phi-3-mini-128k-instruct_wd_rag_batch_4](./data/results/Phi-3-mini-128k-instruct_wd_rag_batch_4.csv) | [Phi-3-mini-128k-instruct_wd_true](./data/results/Phi-3-mini-128k-instruct_wd_true.csv) | [Phi-3-mini-128k-instruct_wd_non_rag_batch_16](./data/results/Phi-3-mini-128k-instruct_wd_non_rag_batch_16.csv) | Evaluated 3 types |
|
22 |
+
| gemma-1.1-2b-it | [gemma-1.1-2b-it_wd](./data/results/gemma-1.1-2b-it_wd.csv) | [gemma-1.1-2b-it_wd_true](./data/results/gemma-1.1-2b-it_wd_true.csv) | [gemma-1.1-2b-it_wd_non_rag](./data/results/gemma-1.1-2b-it_wd_non_rag.csv) | Evaluated 3 types |
|
23 |
+
| gemma-1.1-7b-it | [gemma-1.1-7b-it_wd](./data/results/gemma-1.1-7b-it_wd.csv) | [gemma-1.1-7b-it_wd_true](./data/results/gemma-1.1-7b-it_wd_true.csv) | [gemma-1.1-7b-it_wd_non_rag](./data/results/gemma-1.1-27b-it_wd_non_rag.csv) | Evaluated 3 types |
|
24 |
+
| Mistral-7B-Instruct-v0.2 | [Tune_2024-03-29_11-28-20](./data/results/Tune_2024-03-29_11-28-20.csv) | [Mistral-7B-Instruct-v0.2_wd_true](./data/results/Mistral-7B-Instruct-v0.2_wd_true.csv) | [Tune_2024-04-16_12-24-27](./data/results/Tune_2024-04-16_12-24-27.csv.csv) | Evaluated 3 types |
|
25 |
+
| Llama-2-7b-chat-hf | [Tune_2024-03-20_15-35-37](./data/results/Tune_2024-03-20_15-35-37.csv) | [Llama-2-7b-chat-hf_wd_true](./data/results/Llama-2-7b-chat-hf_wd_true.csv) | [Tune_2024-04-09_09-19-22](./data/results/Tune_2024-04-09_09-19-22.csv) | Evaluated 3 types |
|
26 |
+
| Meta-Llama-3-8B-Instruct | [Meta-Llama-3-8B-Instruct_wd](./data/results/Meta-Llama-3-8B-Instruct_wd.csv) | [Meta-Llama-3-8B-Instruct_wd_true](./data/results/Meta-Llama-3-8B-Instruct_wd_true.csv) | [Meta-Llama-3-8B-Instruct_wd_non_rag](./data/results/Meta-Llama-3-8B-Instruct_wd_non_rag.csv) (generic prompt) | Evaluated 3 types |
|
27 |
+
| | | | [Meta-Llama-3-8B-Instruct_wd_1_non_rag](./data/results/Meta-Llama-3-8B-Instruct_wd_1_non_rag.csv) | Evaluated Non-RAG |
|
28 |
+
| Llama-2-13b-chat-hf | [Tune_2024-03-25_23-32-57](./data/results/Tune_2024-03-25_23-32-57.csv) | [Llama-2-13b-chat-hf_wd_true](./data/results/Llama-2-13b-chat-hf_wd_true.csv) | [Tune_2024-04-10_16-53-38](./data/results/Tune_2024-04-10_16-53-38.csv) | Evaluated 3 types |
|
29 |
+
| Llama-2-70b-chat-hf | [Llama-2-70b-chat-hf_wd](./data/results/Llama-2-70b-chat-hf_wd.csv) | [Llama-2-70b-chat-hf_wd_true](./data/results/Llama-2-70b-chat-hf_wd_true.csv) | [Llama-2-70b-chat-hf_wd_non_rag](./data/results/Llama-2-70b-chat-hf_wd_non_rag.csv) | Evaluated 3 types |
|
30 |
+
| Meta-Llama-3-70B-Instruct | [Meta-Llama-3-70B-Instruct_wd](./data/results/Meta-Llama-3-70B-Instruct_wd.csv) | [Meta-Llama-3-70B-Instruct_wd_true](./data/results/Meta-Llama-3-70B-Instruct_wd_true.csv) | [Meta-Llama-3-70B-Instruct_wd_non_rag](./data/results/Meta-Llama-3-70B-Instruct_wd_non_rag.csv) | Evaluated 3 types |
|
31 |
+
| gpt-3.5-turbo | [gpt-3.5-turbo_rag](./data/results/gpt-3.5-turbo_rag.csv) | | [gpt-3.5-turbo_non_rag](./data/results/gpt-3.5-turbo_non_rag.csv) | Evaluated both |
|
32 |
+
|
33 |
+
## Results for [MS MACRO Dataset](./data/datasets/ms_macro.json)
|
34 |
+
|
35 |
+
| Model Name | RAG | RAG with Chat Template | Non-RAG | Note |
|
36 |
+
| ------------------------- | ---------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------ | ---- |
|
37 |
+
| gemma-1.1-2b-it | [gemma-1.1-2b-it_mm_false](data/results/gemma-1.1-2b-it_mm_true_false.csv) | [gemma-1.1-2b-it_mm_true](data/results/gemma-1.1-2b-it_mm_true.csv) | [gemma-1.1-2b-it_mm_non_rag.csv](data/results/gemma-1.1-2b-it_mm_true_false_non_rag.csv) | |
|
38 |
+
| Phi-3-mini-128k-instruct | [Phi-3-mini-128k-instruct_mm_false](data/results/Phi-3-mini-128k-instruct_mm_false.csv) | [Phi-3-mini-128k-instruct_mm_true](data/results/Phi-3-mini-128k-instruct_mm_true.csv) | [Phi-3-mini-128k-instruct_mm_non_rag.csv](data/results/Phi-3-mini-128k-instruct_mm_non_rag.csv) | |
|
39 |
+
| gemma-1.1-7b-it | [gemma-1.1-7b-it_mm_false](data/results/gemma-1.1-7b-it_mm_false.csv) | [gemma-1.1-7b-it_mm_true](data/results/gemma-1.1-7b-it_mm_true.csv) | [gemma-1.1-7b-it_mm_non_rag.csv](data/results/gemma-1.1-7b-it_mm_non_rag.csv) | |
|
40 |
+
| Mistral-7B-Instruct-v0.2 | [Mistral-7B-Instruct-v0.2_mm_false](data/results/Mistral-7B-Instruct-v0.2_mm_false.csv) | [Mistral-7B-Instruct-v0.2_mm_true](data/results/Mistral-7B-Instruct-v0.2_mm_true.csv) | [Mistral-7B-Instruct-v0.2_mm_false](data/results/Mistral-7B-Instruct-v0.2_mm_non_rag.csv) | |
|
41 |
+
| Llama-2-7b-chat-hf | [Llama-2-7b-chat-hf_mm_false](data/results/Llama-2-7b-chat-hf_mm_true_false.csv) | [Llama-2-7b-chat-hf_mm_true](data/results/Llama-2-7b-chat-hf_mm_true.csv) | [Llama-2-7b-chat-hf_mm_non_rag.csv](data/results/Llama-2-7b-chat-hf_mm_true_false_non_rag.csv) | |
|
42 |
+
| Meta-Llama-3-8B-Instruct | [Meta-Llama-3-8B-Instruct_mm_false](data/results/Meta-Llama-3-8B-Instruct_mm_true_false.csv) | [Meta-Llama-3-8B-Instruct_mm_true](data/results/Meta-Llama-3-8B-Instruct_mm_true.csv) | [Meta-Llama-3-8B-Instruct_mm_non_rag.csv](data/results/Meta-Llama-3-8B-Instruct_mm_true_false_non_rag.csv) | |
|
43 |
+
| Llama-2-13b-chat-hf | [Llama-2-13b-chat-hf_mm_false](data/results/Llama-2-13b-chat-hf_mm_false.csv) | [Llama-2-13b-chat-hf_mm_true](data/results/Llama-2-13b-chat-hf_mm_true.csv) | [Llama-2-13b-chat-hf_mm_non_rag.csv](data/results/Llama-2-13b-chat-hf_mm_non_rag.csv) | |
|
44 |
+
| Llama-2-70b-chat-hf | [Llama-2-70b-chat-hf_mm_false](data/results/Llama-2-70b-chat-hf_mm_false.csv) | [Llama-2-70b-chat-hf_mm_true](data/results/Llama-2-70b-chat-hf_mm_true.csv) | [Llama-2-70b-chat-hf_mm_non_rag.csv](data/results/Llama-2-70b-chat-hf_mm_non_rag.csv) | |
|
45 |
+
| Meta-Llama-3-70B-Instruct | [Meta-Llama-3-70B-Instruct_mm_false](data/results/Meta-Llama-3-70B-Instruct_mm_true_false.csv) | [Meta-Llama-3-70B-Instruct_mm_true](data/results/Meta-Llama-3-70B-Instruct_mm_true.csv) | [Meta-Llama-3-70B-Instruct_mm_non_rag.csv](data/results/Meta-Llama-3-70B-Instruct_mm_true_false_non_rag.csv) | |
|
46 |
+
| gpt-3.5-turbo | [gpt-3.5-turbo_rag](./data/results/gpt-3.5-turbo_mm_RP_1.300.csv) | | [gpt-3.5-turbo_non_rag](./data/results/gpt-3.5-turbo_mm_non_rag_RP_1.300.csv) | |
|
47 |
+
|
48 |
+
## How it works
|
49 |
+
|
50 |
+
We're using an AI methodology, namely Conversational Retrieval Augmentation (CRAG), which uses LLMs off the shelf (i.e., without any fine-tuning), then controls their behavior through clever prompting and conditioning on private “contextual” data, e.g., texts extracted from your PDF files.
|
51 |
+
|
52 |
+
At a very high level, the workflow can be divided into three stages:
|
53 |
+
|
54 |
+
1. Data preprocessing / embedding: This stage involves storing private data (your PDF files) to be retrieved later. Typically, the documents are broken into chunks, passed through an embedding model, then stored the created embeddings in a vectorstore.
|
55 |
+
|
56 |
+
2. Prompt construction / retrieval: When a user submits a query, the application constructs a series of prompts to submit to the language model. A compiled prompt typically combines a prompt template and a set of relevant documents retrieved from the vectorstore.
|
57 |
+
|
58 |
+
3. Prompt execution / inference: Once the prompts have been compiled, they are submitted to a pre-trained LLM for inference—including both proprietary model APIs and open-source or self-trained models.
|
59 |
+
|
60 |
+
Tech stack used includes LangChain, Gradio, Chroma and FAISS.
|
61 |
+
|
62 |
+
- LangChain is an open-source framework that makes it easier to build scalable AI/LLM apps and chatbots.
|
63 |
+
- Gradio is an open-source Python library that is used to build machine learning and data science demos and web applications.
|
64 |
+
- Chroma and FAISS are open-source vectorstores for storing embeddings for your files.
|
65 |
+
|
66 |
## Running Locally
|
67 |
|
68 |
1. Check pre-conditions:
|
69 |
|
70 |
- [Git Large File Storage (LFS)](https://git-lfs.com/) must have been installed.
|
71 |
- Run `python --version` to make sure you're running Python version 3.10 or above.
|
72 |
+
- [CMake](https://cmake.org/) must have been installed. Here is a sample command to install `CMake` on `ubuntu`:
|
73 |
|
74 |
```
|
75 |
+
sudo apt install cmake
|
76 |
```
|
77 |
|
78 |
2. Clone the repo
|
79 |
|
80 |
```
|
81 |
+
git lfs install
|
82 |
+
git clone --recursive https://github.com/smu-ai/Evaluation-of-Orca-2-Models-for-Conversational-RAG.git
|
83 |
+
```
|
84 |
+
|
85 |
+
3. Ensure the latest PyTorch must have been installed.
|
86 |
+
|
87 |
+
```
|
88 |
+
# using CUDA with Nvidia GPU
|
89 |
+
make install-torch-cuda
|
90 |
+
|
91 |
+
# using Apple Silicon or other CPU
|
92 |
+
make install-torch
|
93 |
```
|
94 |
|
95 |
+
4. Install packages
|
96 |
|
97 |
```
|
98 |
pip install -r requirements.txt
|
99 |
```
|
100 |
|
101 |
+
5. Set up your environment variables
|
102 |
|
103 |
- By default, environment variables are loaded from `.env.example` file
|
104 |
- If you don't want to use the default settings, copy `.env.example` into `.env`. Your can then update it for your local runs.
|
105 |
|
106 |
+
6. Run automated test:
|
107 |
|
108 |
```
|
109 |
+
make test
|
110 |
```
|
111 |
|
112 |
+
7. Start the local server at `http://localhost:7860`:
|
113 |
+
|
114 |
+
```
|
115 |
+
make start
|
116 |
+
|
117 |
+
```
|
118 |
+
|
119 |
+
8. Tune repetition penalty parameters:
|
120 |
+
|
121 |
+
```
|
122 |
+
make tune
|
123 |
+
```
|
124 |
+
|
125 |
+
## Talk to Your Own PDF Files
|
126 |
+
|
127 |
+
- The sample PDF files are downloaded from [PCI DSS official website](https://www.pcisecuritystandards.org/document_library/?category=pcidss) and the corresponding embeddings are stored in folders `data/chromadb_1024_512` and `data/faiss_1024_512` with Chroma & FAISS formats respectively, which allows you to run locally without any additional effort.
|
128 |
+
|
129 |
+
- You can also put your own PDF files into any folder specified in `SOURCE_PDFS_PATH` and run the command below to generate embeddings which will be stored in folder `FAISS_INDEX_PATH` or `CHROMADB_INDEX_PATH`. If both `*_INDEX_PATH` env vars are set, `FAISS_INDEX_PATH` takes precedence. Make sure the folder specified by `*_INDEX_PATH` doesn't exist; other wise the command will simply try to load index from the folder and do a simple similarity search, as a way to verify if embeddings are generated and stored properly. Please note the HuggingFace Embedding model specified by `HF_EMBEDDINGS_MODEL_NAME` will be used to generate the embeddings.
|
130 |
+
|
131 |
+
```
|
132 |
+
python ingest.py
|
133 |
+
```
|
134 |
+
|
135 |
+
- Once embeddings are generated, you can test them out locally, or check them into your duplicated space. Please note HF Spaces git server does not allow PDF files to be checked in.
|
136 |
+
|
137 |
+
## Play with Different Large Language Models
|
138 |
+
|
139 |
+
The source code supports different LLM types - as shown at the top of `.env.example`
|
140 |
+
|
141 |
+
```
|
142 |
+
# LLM_MODEL_TYPE=openai
|
143 |
+
# LLM_MODEL_TYPE=gpt4all-j
|
144 |
+
# LLM_MODEL_TYPE=gpt4all
|
145 |
+
# LLM_MODEL_TYPE=llamacpp
|
146 |
+
# LLM_MODEL_TYPE=huggingface
|
147 |
+
# LLM_MODEL_TYPE=mosaicml
|
148 |
+
# LLM_MODEL_TYPE=stablelm
|
149 |
+
# LLM_MODEL_TYPE=openllm
|
150 |
+
LLM_MODEL_TYPE=hftgi
|
151 |
+
```
|
152 |
+
|
153 |
+
- By default, the app runs `microsoft/orca-2-13b` model with HF Text Generation Interface, which runs on a research server and might be down from time to time.
|
154 |
+
|
155 |
+
- Uncomment/comment the above to play with different LLM types. You may also want to update other related env vars. E.g., here's the list of HF models which have been tested with the code:
|
156 |
|
157 |
```
|
158 |
+
# HUGGINGFACE_MODEL_NAME_OR_PATH="microsoft/orca-2-7b"
|
159 |
+
HUGGINGFACE_MODEL_NAME_OR_PATH="microsoft/orca-2-13b"
|
160 |
+
# HUGGINGFACE_MODEL_NAME_OR_PATH="TheBloke/wizardLM-7B-HF"
|
161 |
+
# HUGGINGFACE_MODEL_NAME_OR_PATH="TheBloke/vicuna-7B-1.1-HF"
|
162 |
+
# HUGGINGFACE_MODEL_NAME_OR_PATH="nomic-ai/gpt4all-j"
|
163 |
+
# HUGGINGFACE_MODEL_NAME_OR_PATH="nomic-ai/gpt4all-falcon"
|
164 |
+
# HUGGINGFACE_MODEL_NAME_OR_PATH="lmsys/fastchat-t5-3b-v1.0"
|
165 |
+
# HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-7b-chat-hf"
|
166 |
+
# HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-13b-chat-hf"
|
167 |
+
# HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-70b-chat-hf"
|
168 |
```
|
app.py
CHANGED
@@ -1,38 +1,12 @@
|
|
1 |
import json
|
2 |
-
import gradio as gr
|
3 |
-
import torch
|
4 |
-
from transformers import (
|
5 |
-
AutoModelForCausalLM,
|
6 |
-
AutoTokenizer,
|
7 |
-
TextIteratorStreamer,
|
8 |
-
)
|
9 |
import os
|
10 |
-
|
11 |
-
import
|
12 |
from app_modules.utils import calc_bleu_rouge_scores, detect_repetitions
|
13 |
-
|
14 |
from dotenv import find_dotenv, load_dotenv
|
15 |
|
16 |
found_dotenv = find_dotenv(".env")
|
17 |
|
18 |
-
if len(found_dotenv) == 0:
|
19 |
-
found_dotenv = find_dotenv(".env.example")
|
20 |
-
print(f"loading env vars from: {found_dotenv}")
|
21 |
-
load_dotenv(found_dotenv, override=False)
|
22 |
-
|
23 |
-
subprocess.run(
|
24 |
-
"pip install flash-attn --no-build-isolation",
|
25 |
-
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
26 |
-
shell=True,
|
27 |
-
)
|
28 |
-
|
29 |
-
token = os.getenv("HUGGINGFACE_AUTH_TOKEN")
|
30 |
-
|
31 |
-
model_name = os.getenv(
|
32 |
-
"HUGGINGFACE_MODEL_NAME_OR_PATH", "google/gemma-1.1-2b-it"
|
33 |
-
) # "microsoft/Phi-3-mini-128k-instruct"
|
34 |
-
print(f" model_name: {model_name}")
|
35 |
-
|
36 |
HF_RP = os.getenv("HF_RP", "1.2")
|
37 |
repetition_penalty = float(HF_RP)
|
38 |
print(f" repetition_penalty: {repetition_penalty}")
|
@@ -47,52 +21,21 @@ print(f"Loaded {len(examples)} examples")
|
|
47 |
|
48 |
qa_system_prompt = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
)
|
55 |
-
tok = AutoTokenizer.from_pretrained(model_name, token=token)
|
56 |
-
terminators = [
|
57 |
-
tok.eos_token_id,
|
58 |
-
]
|
59 |
-
|
60 |
-
# Check that MPS is available
|
61 |
-
if not torch.backends.mps.is_available():
|
62 |
-
if not torch.backends.mps.is_built():
|
63 |
-
print(
|
64 |
-
"MPS not available because the current PyTorch install was not "
|
65 |
-
"built with MPS enabled."
|
66 |
-
)
|
67 |
-
else:
|
68 |
-
print(
|
69 |
-
"MPS not available because the current MacOS version is not 12.3+ "
|
70 |
-
"and/or you do not have an MPS-enabled device on this machine."
|
71 |
-
)
|
72 |
-
mps_device = None
|
73 |
-
else:
|
74 |
-
mps_device = torch.device("mps")
|
75 |
-
|
76 |
-
if mps_device is not None:
|
77 |
-
device = mps_device
|
78 |
-
print("Using MPS")
|
79 |
-
elif torch.cuda.is_available():
|
80 |
-
device = torch.device("cuda")
|
81 |
-
print(f"Using GPU: {torch.cuda.get_device_name(device)}")
|
82 |
-
else:
|
83 |
-
device = torch.device("cpu")
|
84 |
-
print("Using CPU")
|
85 |
-
|
86 |
-
model = model.to(device)
|
87 |
|
88 |
|
89 |
def chat(
|
90 |
message,
|
91 |
-
history,
|
|
|
92 |
temperature=0,
|
93 |
repetition_penalty=1.1,
|
94 |
do_sample=True,
|
95 |
max_tokens=1024,
|
|
|
96 |
):
|
97 |
print("repetition_penalty:", repetition_penalty)
|
98 |
chat = []
|
@@ -109,36 +52,58 @@ def chat(
|
|
109 |
|
110 |
chat.append({"role": "user", "content": message})
|
111 |
|
112 |
-
messages =
|
113 |
-
|
114 |
-
streamer = TextIteratorStreamer(
|
115 |
-
tok, timeout=200.0, skip_prompt=True, skip_special_tokens=True
|
116 |
-
)
|
117 |
|
118 |
-
|
119 |
-
temperature = 0.01
|
120 |
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
temperature=temperature,
|
127 |
-
|
128 |
-
)
|
129 |
-
|
130 |
-
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
131 |
-
t.start()
|
132 |
-
|
133 |
-
partial_text = ""
|
134 |
-
for new_text in streamer:
|
135 |
partial_text += new_text
|
136 |
yield partial_text
|
137 |
|
138 |
answer = partial_text
|
139 |
-
(
|
140 |
partial_text += "\n\nRepetition Metrics:\n"
|
141 |
-
partial_text += f"1.
|
142 |
partial_text += f"1. Repetition Score: {repetition_score:.3f}\n"
|
143 |
partial_text += f"1. Total Repetitions: {total_repetitions:.3f}\n"
|
144 |
|
@@ -151,7 +116,7 @@ def chat(
|
|
151 |
scores = calc_bleu_rouge_scores([answer], [questions[index][key]], debug=True)
|
152 |
|
153 |
partial_text += "\n\n Performance Metrics:\n"
|
154 |
-
partial_text += f'1. BLEU: {scores["bleu_scores"]["bleu"]:.3f}\n'
|
155 |
partial_text += f'1. RougeL: {scores["rouge_scores"]["rougeL"]:.3f}\n'
|
156 |
|
157 |
yield partial_text
|
@@ -165,6 +130,7 @@ demo = gr.ChatInterface(
|
|
165 |
label="⚙️ Parameters", open=False, render=False
|
166 |
),
|
167 |
additional_inputs=[
|
|
|
168 |
gr.Slider(
|
169 |
minimum=0, maximum=1, step=0.1, value=0, label="Temperature", render=False
|
170 |
),
|
@@ -185,9 +151,13 @@ demo = gr.ChatInterface(
|
|
185 |
label="Max new tokens",
|
186 |
render=False,
|
187 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
],
|
189 |
-
stop_btn="Stop Generation",
|
190 |
-
title="Chat With LLMs",
|
191 |
-
description=f"Now Running [{model_name}](https://huggingface.co/{model_name})",
|
192 |
)
|
193 |
demo.launch()
|
|
|
1 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import os
|
3 |
+
import gradio as gr
|
4 |
+
from huggingface_hub import InferenceClient
|
5 |
from app_modules.utils import calc_bleu_rouge_scores, detect_repetitions
|
|
|
6 |
from dotenv import find_dotenv, load_dotenv
|
7 |
|
8 |
found_dotenv = find_dotenv(".env")
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
HF_RP = os.getenv("HF_RP", "1.2")
|
11 |
repetition_penalty = float(HF_RP)
|
12 |
print(f" repetition_penalty: {repetition_penalty}")
|
|
|
21 |
|
22 |
qa_system_prompt = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."
|
23 |
|
24 |
+
"""
|
25 |
+
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
26 |
+
"""
|
27 |
+
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
|
30 |
def chat(
|
31 |
message,
|
32 |
+
history: list[tuple[str, str]],
|
33 |
+
system_message,
|
34 |
temperature=0,
|
35 |
repetition_penalty=1.1,
|
36 |
do_sample=True,
|
37 |
max_tokens=1024,
|
38 |
+
top_p=0.95,
|
39 |
):
|
40 |
print("repetition_penalty:", repetition_penalty)
|
41 |
chat = []
|
|
|
52 |
|
53 |
chat.append({"role": "user", "content": message})
|
54 |
|
55 |
+
messages = [{"role": "system", "content": system_message}]
|
56 |
+
messages.append({"role": "user", "content": message})
|
|
|
|
|
|
|
57 |
|
58 |
+
partial_text = ""
|
|
|
59 |
|
60 |
+
# huggingface_hub.utils._errors.HfHubHTTPError: 422 Client Error: Unprocessable Entity for url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta (Request ID: NZamtWmdoSg3flfgRKT0e)
|
61 |
+
# Make sure 'text-generation' task is supported by the model.
|
62 |
+
# for message in client.text_generation(
|
63 |
+
# messages,
|
64 |
+
# stream=True,
|
65 |
+
# temperature=temperature,
|
66 |
+
# top_p=top_p,
|
67 |
+
# repetition_penalty=repetition_penalty,
|
68 |
+
# ):
|
69 |
+
|
70 |
+
# https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta
|
71 |
+
# {
|
72 |
+
# "id": "HuggingFaceH4/zephyr-7b-beta",
|
73 |
+
# "sha": "b70e0c9a2d9e14bd1e812d3c398e5f313e93b473",
|
74 |
+
# "pipeline_tag": "text-generation",
|
75 |
+
# "library_name": "transformers",
|
76 |
+
# "private": false,
|
77 |
+
# "gated": false,
|
78 |
+
# "siblings": [],
|
79 |
+
# "safetensors": {
|
80 |
+
# "parameters": {
|
81 |
+
# "BF16": 7241732096
|
82 |
+
# }
|
83 |
+
# },
|
84 |
+
# "cardData": {
|
85 |
+
# "tags": [
|
86 |
+
# "generated_from_trainer"
|
87 |
+
# ],
|
88 |
+
# "base_model": "mistralai/Mistral-7B-v0.1"
|
89 |
+
# }
|
90 |
+
# }
|
91 |
+
|
92 |
+
for message in client.chat_completion(
|
93 |
+
messages,
|
94 |
+
max_tokens=max_tokens,
|
95 |
+
stream=True,
|
96 |
temperature=temperature,
|
97 |
+
top_p=top_p,
|
98 |
+
):
|
99 |
+
new_text = message.choices[0].delta.content
|
|
|
|
|
|
|
|
|
|
|
100 |
partial_text += new_text
|
101 |
yield partial_text
|
102 |
|
103 |
answer = partial_text
|
104 |
+
(whitespace_score, repetition_score, total_repetitions) = detect_repetitions(answer)
|
105 |
partial_text += "\n\nRepetition Metrics:\n"
|
106 |
+
partial_text += f"1. Whitespace Score: {whitespace_score:.3f}\n"
|
107 |
partial_text += f"1. Repetition Score: {repetition_score:.3f}\n"
|
108 |
partial_text += f"1. Total Repetitions: {total_repetitions:.3f}\n"
|
109 |
|
|
|
116 |
scores = calc_bleu_rouge_scores([answer], [questions[index][key]], debug=True)
|
117 |
|
118 |
partial_text += "\n\n Performance Metrics:\n"
|
119 |
+
partial_text += f'1. BLEU-1: {scores["bleu_scores"]["bleu"]:.3f}\n'
|
120 |
partial_text += f'1. RougeL: {scores["rouge_scores"]["rougeL"]:.3f}\n'
|
121 |
|
122 |
yield partial_text
|
|
|
130 |
label="⚙️ Parameters", open=False, render=False
|
131 |
),
|
132 |
additional_inputs=[
|
133 |
+
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
134 |
gr.Slider(
|
135 |
minimum=0, maximum=1, step=0.1, value=0, label="Temperature", render=False
|
136 |
),
|
|
|
151 |
label="Max new tokens",
|
152 |
render=False,
|
153 |
),
|
154 |
+
gr.Slider(
|
155 |
+
minimum=0.1,
|
156 |
+
maximum=1.0,
|
157 |
+
value=0.95,
|
158 |
+
step=0.05,
|
159 |
+
label="Top-p (nucleus sampling)",
|
160 |
+
),
|
161 |
],
|
|
|
|
|
|
|
162 |
)
|
163 |
demo.launch()
|
app_modules/llm_loader.py
CHANGED
@@ -3,7 +3,7 @@ import sys
|
|
3 |
import threading
|
4 |
from queue import Queue
|
5 |
from typing import Any, Dict, List, Optional
|
6 |
-
|
7 |
import torch
|
8 |
from langchain.callbacks.base import BaseCallbackHandler
|
9 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
@@ -176,7 +176,6 @@ class LLMLoader:
|
|
176 |
load_in_4bit=load_quantized_model == "4bit",
|
177 |
bnb_4bit_use_double_quant=load_quantized_model == "4bit",
|
178 |
load_in_8bit=load_quantized_model == "8bit",
|
179 |
-
bnb_8bit_use_double_quant=load_quantized_model == "8bit",
|
180 |
)
|
181 |
|
182 |
callbacks = []
|
@@ -212,13 +211,19 @@ class LLMLoader:
|
|
212 |
print(f" using model: {MODEL_NAME}")
|
213 |
self.llm = ChatGoogleGenerativeAI(
|
214 |
model=MODEL_NAME,
|
215 |
-
convert_system_message_to_human=True,
|
216 |
callbacks=callbacks,
|
217 |
streaming=True,
|
218 |
safety_settings={
|
|
|
|
|
|
|
219 |
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
|
220 |
},
|
221 |
)
|
|
|
|
|
|
|
|
|
222 |
elif self.llm_model_type.startswith("gpt4all"):
|
223 |
MODEL_PATH = ensure_model_is_downloaded(self.llm_model_type)
|
224 |
self.llm = GPT4All(
|
|
|
3 |
import threading
|
4 |
from queue import Queue
|
5 |
from typing import Any, Dict, List, Optional
|
6 |
+
import google.generativeai as genai
|
7 |
import torch
|
8 |
from langchain.callbacks.base import BaseCallbackHandler
|
9 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
|
|
176 |
load_in_4bit=load_quantized_model == "4bit",
|
177 |
bnb_4bit_use_double_quant=load_quantized_model == "4bit",
|
178 |
load_in_8bit=load_quantized_model == "8bit",
|
|
|
179 |
)
|
180 |
|
181 |
callbacks = []
|
|
|
211 |
print(f" using model: {MODEL_NAME}")
|
212 |
self.llm = ChatGoogleGenerativeAI(
|
213 |
model=MODEL_NAME,
|
|
|
214 |
callbacks=callbacks,
|
215 |
streaming=True,
|
216 |
safety_settings={
|
217 |
+
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
|
218 |
+
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
|
219 |
+
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
|
220 |
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
|
221 |
},
|
222 |
)
|
223 |
+
# for m in genai.list_models():
|
224 |
+
# if "generateContent" in m.supported_generation_methods:
|
225 |
+
# print(m.name)
|
226 |
+
# exit()
|
227 |
elif self.llm_model_type.startswith("gpt4all"):
|
228 |
MODEL_PATH = ensure_model_is_downloaded(self.llm_model_type)
|
229 |
self.llm = GPT4All(
|
app_modules/utils.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
# -*- coding:utf-8 -*-
|
2 |
from __future__ import annotations
|
3 |
|
|
|
4 |
import logging
|
5 |
import os
|
6 |
-
import math
|
7 |
import platform
|
8 |
import re
|
9 |
from pathlib import Path
|
@@ -13,8 +13,6 @@ import requests
|
|
13 |
import torch
|
14 |
from tqdm import tqdm
|
15 |
from langchain.memory import ConversationSummaryBufferMemory
|
16 |
-
import matplotlib.pyplot as plt
|
17 |
-
import matplotlib.ticker as mtick
|
18 |
|
19 |
|
20 |
class LogRecord(logging.LogRecord):
|
@@ -278,153 +276,3 @@ def detect_repetition_scores(text, debug=False):
|
|
278 |
text, debug=debug
|
279 |
)
|
280 |
return pd.Series([newline_score, repetition_score, total_repetitions])
|
281 |
-
|
282 |
-
|
283 |
-
def detect_scores(text, debug=False):
|
284 |
-
newline_score, repetition_score, total_repetitions = detect_repetitions(
|
285 |
-
text, debug=debug
|
286 |
-
)
|
287 |
-
return pd.Series([newline_score, repetition_score, total_repetitions])
|
288 |
-
|
289 |
-
|
290 |
-
def load_with_newline_and_repetition_scores(result_file, force_recalculate=False):
|
291 |
-
print(f"loading result file: {result_file}")
|
292 |
-
df = pd.read_csv(result_file, comment="#", on_bad_lines="warn")
|
293 |
-
|
294 |
-
if (
|
295 |
-
force_recalculate
|
296 |
-
or "newline_score" not in df.columns
|
297 |
-
or "repetition_score" not in df.columns
|
298 |
-
or "total_repetitions" not in df.columns
|
299 |
-
):
|
300 |
-
df[["newline_score", "repetition_score", "total_repetitions"]] = df[
|
301 |
-
"answer"
|
302 |
-
].apply(detect_scores)
|
303 |
-
df.to_csv(result_file, index=False)
|
304 |
-
|
305 |
-
return df
|
306 |
-
|
307 |
-
|
308 |
-
def replace_last(source_string, old_string, new_string):
|
309 |
-
head, _sep, tail = source_string.rpartition(old_string)
|
310 |
-
return head + new_string + tail
|
311 |
-
|
312 |
-
|
313 |
-
df_ms_macro = pd.read_json("./data/datasets/ms_macro.json")
|
314 |
-
|
315 |
-
|
316 |
-
def load_for_repetition_penalty_ms_macro(
|
317 |
-
csv_result_file, repetition_penalty, force_recalculate=False
|
318 |
-
):
|
319 |
-
result_file = replace_last(
|
320 |
-
csv_result_file, ".csv", f"_RP_{repetition_penalty:.3f}.csv"
|
321 |
-
)
|
322 |
-
df = load_with_newline_and_repetition_scores(
|
323 |
-
result_file, force_recalculate=force_recalculate
|
324 |
-
)
|
325 |
-
|
326 |
-
if df["ground_truth"][0] != df_ms_macro["wellFormedAnswers"][0]:
|
327 |
-
df["ground_truth"] = df_ms_macro["wellFormedAnswers"]
|
328 |
-
print("ground_truth updated for:", result_file)
|
329 |
-
df.to_csv(result_file, index=False)
|
330 |
-
return df
|
331 |
-
|
332 |
-
|
333 |
-
def adjust_perf_scores_with_repetition_penalty(result, precision, recall):
|
334 |
-
newline_score = [
|
335 |
-
df["newline_score"].mean() for df in result["df_list_repetition_penalty"]
|
336 |
-
]
|
337 |
-
print(f"newline_score: {newline_score}")
|
338 |
-
|
339 |
-
repetition_score = [
|
340 |
-
df["repetition_score"].mean() for df in result["df_list_repetition_penalty"]
|
341 |
-
]
|
342 |
-
print(f"repetition_score: {repetition_score}")
|
343 |
-
|
344 |
-
precision = [
|
345 |
-
f / math.log10(10 + n + r)
|
346 |
-
for f, n, r in zip(precision, newline_score, repetition_score)
|
347 |
-
]
|
348 |
-
recall = [
|
349 |
-
f / math.log10(10 + n + r)
|
350 |
-
for f, n, r in zip(recall, newline_score, repetition_score)
|
351 |
-
]
|
352 |
-
|
353 |
-
return precision, recall
|
354 |
-
|
355 |
-
|
356 |
-
# MS MACRO
|
357 |
-
def plot_performance_scores_ms_macro(
|
358 |
-
result,
|
359 |
-
models=None,
|
360 |
-
title="Performance",
|
361 |
-
):
|
362 |
-
|
363 |
-
if models is None:
|
364 |
-
models = result.keys()
|
365 |
-
for model in models:
|
366 |
-
print(f"model: {model}")
|
367 |
-
df = result[model]["df_overall"]
|
368 |
-
# print(result[model]["df_list_repetition_penalty"][0].describe())
|
369 |
-
|
370 |
-
# Calculate the statistics
|
371 |
-
bleu1 = list(df["bleu1"])
|
372 |
-
rougeL = list(df["rougeL"])
|
373 |
-
f1 = [2 * (p * r) / (p + r) for p, r in zip(bleu1, rougeL)]
|
374 |
-
best_f1 = max(f1)
|
375 |
-
best_f1_index = f1.index(best_f1)
|
376 |
-
|
377 |
-
bleu1, rougeL = adjust_perf_scores_with_repetition_penalty(
|
378 |
-
result[model], bleu1, rougeL
|
379 |
-
)
|
380 |
-
afrp = [2 * (p * r) / (p + r) for p, r in zip(bleu1, rougeL)]
|
381 |
-
|
382 |
-
# f1 = [df["f1"].mean() for df in result[model]["df_list_repetition_penalty"]]
|
383 |
-
best_afrp = max(afrp)
|
384 |
-
best_afrp_index = afrp.index(best_afrp)
|
385 |
-
|
386 |
-
repetition_penalties = list(df["repetition_penalty"])
|
387 |
-
|
388 |
-
# line plot for precision, recall, f1
|
389 |
-
plt.figure(figsize=(10, 6))
|
390 |
-
|
391 |
-
plt.axvspan(
|
392 |
-
repetition_penalties[best_f1_index] - 0.01,
|
393 |
-
repetition_penalties[best_f1_index] + 0.01,
|
394 |
-
alpha=0.5,
|
395 |
-
edgecolor="none",
|
396 |
-
facecolor="blue",
|
397 |
-
)
|
398 |
-
|
399 |
-
plt.axvspan(
|
400 |
-
repetition_penalties[best_afrp_index] - 0.01,
|
401 |
-
repetition_penalties[best_afrp_index] + 0.01,
|
402 |
-
alpha=0.5,
|
403 |
-
edgecolor="none",
|
404 |
-
facecolor="orange",
|
405 |
-
)
|
406 |
-
|
407 |
-
plt.plot(
|
408 |
-
repetition_penalties,
|
409 |
-
f1,
|
410 |
-
label="Overall Perf Score",
|
411 |
-
marker="D",
|
412 |
-
color="blue",
|
413 |
-
)
|
414 |
-
plt.plot(
|
415 |
-
repetition_penalties,
|
416 |
-
afrp,
|
417 |
-
label="RF Adjusted Perf Score",
|
418 |
-
marker="o",
|
419 |
-
color="orange",
|
420 |
-
)
|
421 |
-
|
422 |
-
plt.xlabel("Repetition Penalties")
|
423 |
-
plt.ylabel("Score")
|
424 |
-
plt.xlim(0.99, 1.31)
|
425 |
-
# y in percentage
|
426 |
-
plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
|
427 |
-
plt.title(f"{model} {title}")
|
428 |
-
plt.legend(bbox_to_anchor=(1.0, 0.5), loc="center left")
|
429 |
-
|
430 |
-
plt.show()
|
|
|
1 |
# -*- coding:utf-8 -*-
|
2 |
from __future__ import annotations
|
3 |
|
4 |
+
import json
|
5 |
import logging
|
6 |
import os
|
|
|
7 |
import platform
|
8 |
import re
|
9 |
from pathlib import Path
|
|
|
13 |
import torch
|
14 |
from tqdm import tqdm
|
15 |
from langchain.memory import ConversationSummaryBufferMemory
|
|
|
|
|
16 |
|
17 |
|
18 |
class LogRecord(logging.LogRecord):
|
|
|
276 |
text, debug=debug
|
277 |
)
|
278 |
return pd.Series([newline_score, repetition_score, total_repetitions])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/datasets/WebQSP.test.wikidata.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/{logs/Phi-3-mini-128k-instruct_mm_false_RP_1.060.txt → eval/Llama-2-13b-chat-hf_wd_true_RP_1.000-t2.json}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1277996feced5bd3ef1a12f0bbfd612219ee32825ced7a5f67ba154b2d61b7a7
|
3 |
+
size 655310
|
data/{logs/Phi-3-mini-128k-instruct_mm_false_RP_1.120.txt → eval/Llama-2-13b-chat-hf_wd_true_RP_1.000-t2_evaluated.json}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb905b34dc2c46254530376a9609e2f6b9011b412c67994220549fb94b9ee72b
|
3 |
+
size 716045
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.020-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4ee64cfc0130b4b323a159ead1a9104989c800578856b1805cc056ab5257e45
|
3 |
+
size 653448
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.020-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:689bc4f321e11fc4d42cd6a3ef1d15f1f87c4228d8248eb7a4e28d635746db73
|
3 |
+
size 714002
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.040-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3b8f1059469d94a2f9f6a0d2216e5d1cf66d7b48cdf39b267a4cc67623e1f47
|
3 |
+
size 650480
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.040-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c01183de7769798e48ac681749f6cb2722ce0c4763e01f03e17569d7b0e8ca5
|
3 |
+
size 710922
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.060-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a12beb32ed61761899bba3a73d6959691f8785328d70c13c9037ed71116f5e2
|
3 |
+
size 654870
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.060-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f30bf65ae09a25f1ac818528d352d3cff9eced98fab349c47b55672a53aa457a
|
3 |
+
size 715516
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.080-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a6404ba60e543621c5be62e9b4d52639671c42f61ed9249afb52428fd70015c
|
3 |
+
size 644399
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.080-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:484ba047b61462ee92bbfa33b335bee05b13c2b667abae414417bfe33da46274
|
3 |
+
size 705020
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.100-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51cb665ebc45ca714f9e03415937c152923f75ca9ccacca2bdf2f2698a009948
|
3 |
+
size 642376
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.100-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96e0d343cded4a11df5bdc1dffa7c2ed9631afce46f706d0c0f8f35d4683df9d
|
3 |
+
size 702987
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.120-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1fe968825c4b87aa7b2a09bf9a0ddcef58cbf58707b8620c0e2c4344b6cbded8
|
3 |
+
size 647286
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.120-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc525d15dc082962e317fb8b3fccf28cc9a6de4d6d765d743016dc34f13ec506
|
3 |
+
size 707966
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.140-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f354e4dd9a889af49831dd797199143be856040b51d152d935216ebcb0d3e0f0
|
3 |
+
size 644443
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.140-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25de5711464fa1015e193c0f452902742fbab711c464a4f87d9774914f3edf2d
|
3 |
+
size 705131
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.160-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:539b143d92be17b1f397e020ff3518ea397fea3f985a36e2db4cd6efdc9b0755
|
3 |
+
size 643126
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.160-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b92bbb2bc9dffa027274dc593cbc61a838bde4a8e54a1b4210d4f921e280b436
|
3 |
+
size 703815
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.180-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e56aa85968670bfe81b541bec3c57cf5763457c76847266411844abc544fc26
|
3 |
+
size 652949
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.180-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2c1e8311f7b644fb2ce62b05db31cd81236ab8bfa94107ac8d33e7e03444871
|
3 |
+
size 713794
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.200-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c4136642c87159fba5861af7b4be13a2f099943041703f4d5e4c86b85c49c9a
|
3 |
+
size 647659
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.200-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:784ad453019ef8ac12c24032d16cbba3d1b11b25439903ebbee7c51382b13531
|
3 |
+
size 708220
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.220-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a467439fdb97136ddfbda188a4e216d8aec1305837d535c2c5c3fe0546c490bb
|
3 |
+
size 647989
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.220-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a1d85371b24da315049092b1adf3229c0a4663e64e453843dfadb596d9f0ef1
|
3 |
+
size 708567
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.240-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39c1241171262c6fb1712ed8089bef7e790b71bf2f327c95052d4087037005f2
|
3 |
+
size 642213
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.240-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4735ae18bf2c0f576f116ec178afb817a8973d9e84fd8fa51e8a3cc05c5b3a6e
|
3 |
+
size 702922
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.260-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79e64c9b8ba5c47da0be79d0e9f2551c2efe31352bd21b94362cf549acd7f3f6
|
3 |
+
size 642810
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.260-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24792de358440e5ac6f44e4711f067de8842a7ef0ee0dca1fda8effa307959b4
|
3 |
+
size 703496
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.280-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a98e4e6f6f07902ba7d098f02b186f2150d1c355769a5576b0645cddee195062
|
3 |
+
size 653962
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.280-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b99d3c8f0c0a3b35c9b707f82bd1657765392da680cf89de1ae71b10a8971b4
|
3 |
+
size 714741
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.300-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:579e3e57776f6bfd53f312f565335a71338e392e2b39ffc7b588a2df5e001e9f
|
3 |
+
size 644777
|
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.300-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be28d56ac8257ca7489d51265d2d71a74abd54330abcd5e4f22b077ba344c651
|
3 |
+
size 705446
|
Llama-2-eval/data/datasets/ms_macro/data-00000-of-00001.arrow → data/eval/Llama-2-70b-chat-hf_wd_RP_1.000-t2.json
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:256520aa638029eb70674b3a93acbd7d21277f2626d756e1cbd55d54dc40b55e
|
3 |
+
size 1046106
|
data/{logs/Phi-3-mini-128k-instruct_mm_false_RP_1.000.txt → eval/Llama-2-70b-chat-hf_wd_RP_1.000-t2_evaluated.json}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7917c1b3993e69ca3d3b9c6b0245646e5f78ea2568b0b669f2e08b37ad9759f1
|
3 |
+
size 1107148
|
data/eval/Llama-2-70b-chat-hf_wd_RP_1.020-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f43b5b7139632e00b30f719868b85d265592ba6e99f04e2e4abb404679ae4ad
|
3 |
+
size 813840
|
data/eval/Llama-2-70b-chat-hf_wd_RP_1.020-t2_evaluated.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cb0c6be15cadd8ef841c8aaceb4d37ce08fbe242757ea9e03be4f3c13c250db
|
3 |
+
size 874638
|
data/eval/Llama-2-70b-chat-hf_wd_RP_1.040-t2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:447323f0b62e5f885fe58667b9698b1b9038f37381a19f0b5eb948b501d653a9
|
3 |
+
size 663779
|