dh-mc commited on
Commit
8f1a330
1 Parent(s): e8b4f46

latest code/data

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +0 -0
  2. Llama-2-eval/data/datasets/ms_macro/dataset_info.json +0 -95
  3. Llama-2-eval/data/datasets/ms_macro/state.json +0 -13
  4. Llama-2-eval/data/results/results_full-a40.csv +0 -10
  5. Llama-2-eval/data/results/results_full-l40.csv +0 -10
  6. Llama-2-eval/notebook/baseline.ipynb +0 -1983
  7. Llama-2-eval/notebook/metrics.ipynb +0 -1293
  8. Makefile +25 -0
  9. README.md +132 -17
  10. app.py +62 -92
  11. app_modules/llm_loader.py +8 -3
  12. app_modules/utils.py +1 -153
  13. data/datasets/WebQSP.test.wikidata.json +0 -0
  14. data/{logs/Phi-3-mini-128k-instruct_mm_false_RP_1.060.txt → eval/Llama-2-13b-chat-hf_wd_true_RP_1.000-t2.json} +2 -2
  15. data/{logs/Phi-3-mini-128k-instruct_mm_false_RP_1.120.txt → eval/Llama-2-13b-chat-hf_wd_true_RP_1.000-t2_evaluated.json} +2 -2
  16. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.020-t2.json +3 -0
  17. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.020-t2_evaluated.json +3 -0
  18. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.040-t2.json +3 -0
  19. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.040-t2_evaluated.json +3 -0
  20. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.060-t2.json +3 -0
  21. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.060-t2_evaluated.json +3 -0
  22. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.080-t2.json +3 -0
  23. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.080-t2_evaluated.json +3 -0
  24. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.100-t2.json +3 -0
  25. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.100-t2_evaluated.json +3 -0
  26. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.120-t2.json +3 -0
  27. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.120-t2_evaluated.json +3 -0
  28. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.140-t2.json +3 -0
  29. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.140-t2_evaluated.json +3 -0
  30. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.160-t2.json +3 -0
  31. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.160-t2_evaluated.json +3 -0
  32. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.180-t2.json +3 -0
  33. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.180-t2_evaluated.json +3 -0
  34. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.200-t2.json +3 -0
  35. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.200-t2_evaluated.json +3 -0
  36. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.220-t2.json +3 -0
  37. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.220-t2_evaluated.json +3 -0
  38. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.240-t2.json +3 -0
  39. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.240-t2_evaluated.json +3 -0
  40. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.260-t2.json +3 -0
  41. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.260-t2_evaluated.json +3 -0
  42. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.280-t2.json +3 -0
  43. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.280-t2_evaluated.json +3 -0
  44. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.300-t2.json +3 -0
  45. data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.300-t2_evaluated.json +3 -0
  46. Llama-2-eval/data/datasets/ms_macro/data-00000-of-00001.arrow → data/eval/Llama-2-70b-chat-hf_wd_RP_1.000-t2.json +2 -2
  47. data/{logs/Phi-3-mini-128k-instruct_mm_false_RP_1.000.txt → eval/Llama-2-70b-chat-hf_wd_RP_1.000-t2_evaluated.json} +2 -2
  48. data/eval/Llama-2-70b-chat-hf_wd_RP_1.020-t2.json +3 -0
  49. data/eval/Llama-2-70b-chat-hf_wd_RP_1.020-t2_evaluated.json +3 -0
  50. data/eval/Llama-2-70b-chat-hf_wd_RP_1.040-t2.json +3 -0
.gitattributes CHANGED
The diff for this file is too large to render. See raw diff
 
Llama-2-eval/data/datasets/ms_macro/dataset_info.json DELETED
@@ -1,95 +0,0 @@
1
- {
2
- "builder_name": "parquet",
3
- "citation": "",
4
- "config_name": "default",
5
- "dataset_name": "ms-macro-wellformed_only",
6
- "dataset_size": 726469485,
7
- "description": "",
8
- "download_checksums": {
9
- "hf://datasets/zhengxuanzenwu/ms-macro-wellformed_only@d6a0dd610474a02e63224176514c0073bb723c7c/data/train-00000-of-00002-0a6f58dc7ee03f61.parquet": {
10
- "num_bytes": 164629356,
11
- "checksum": null
12
- },
13
- "hf://datasets/zhengxuanzenwu/ms-macro-wellformed_only@d6a0dd610474a02e63224176514c0073bb723c7c/data/train-00001-of-00002-5262fd5ec1911156.parquet": {
14
- "num_bytes": 164721520,
15
- "checksum": null
16
- },
17
- "hf://datasets/zhengxuanzenwu/ms-macro-wellformed_only@d6a0dd610474a02e63224176514c0073bb723c7c/data/test-00000-of-00001-f965dd5a841915d3.parquet": {
18
- "num_bytes": 26541566,
19
- "checksum": null
20
- }
21
- },
22
- "download_size": 355892442,
23
- "features": {
24
- "answers": {
25
- "feature": {
26
- "dtype": "string",
27
- "_type": "Value"
28
- },
29
- "_type": "Sequence"
30
- },
31
- "passages": {
32
- "feature": {
33
- "is_selected": {
34
- "dtype": "int32",
35
- "_type": "Value"
36
- },
37
- "passage_text": {
38
- "dtype": "string",
39
- "_type": "Value"
40
- },
41
- "url": {
42
- "dtype": "string",
43
- "_type": "Value"
44
- }
45
- },
46
- "_type": "Sequence"
47
- },
48
- "query": {
49
- "dtype": "string",
50
- "_type": "Value"
51
- },
52
- "query_id": {
53
- "dtype": "int32",
54
- "_type": "Value"
55
- },
56
- "query_type": {
57
- "dtype": "string",
58
- "_type": "Value"
59
- },
60
- "wellFormedAnswers": {
61
- "feature": {
62
- "dtype": "string",
63
- "_type": "Value"
64
- },
65
- "_type": "Sequence"
66
- }
67
- },
68
- "homepage": "",
69
- "license": "",
70
- "size_in_bytes": 1082361927,
71
- "splits": {
72
- "train": {
73
- "name": "train",
74
- "num_bytes": 674327331,
75
- "num_examples": 153725,
76
- "shard_lengths": [
77
- 116863,
78
- 36862
79
- ],
80
- "dataset_name": "ms-macro-wellformed_only"
81
- },
82
- "test": {
83
- "name": "test",
84
- "num_bytes": 52142154,
85
- "num_examples": 12467,
86
- "dataset_name": "ms-macro-wellformed_only"
87
- }
88
- },
89
- "version": {
90
- "version_str": "0.0.0",
91
- "major": 0,
92
- "minor": 0,
93
- "patch": 0
94
- }
95
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Llama-2-eval/data/datasets/ms_macro/state.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "_data_files": [
3
- {
4
- "filename": "data-00000-of-00001.arrow"
5
- }
6
- ],
7
- "_fingerprint": "fe2a26ddba75833a",
8
- "_format_columns": null,
9
- "_format_kwargs": {},
10
- "_format_type": null,
11
- "_output_all_columns": false,
12
- "_split": "test"
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Llama-2-eval/data/results/results_full-a40.csv DELETED
@@ -1,10 +0,0 @@
1
- model_name,repetition_penalty,generation_time,evaluation_time,total_tokens,total_words,tokens_per_second,tokens_per_word,numeric_bleu,numeric_rougeL,description_bleu,description_rougeL,entity_bleu,entity_rougeL,person_bleu,person_rougeL,location_bleu,location_rougeL,overall_bleu,overall_rougeL,total_words_over_total_tokens
2
- gpt-4,,2696.407,1.772,34069,29552,12.635,1.153,0.1732,0.3337,0.1895,0.3248,0.1654,0.3117,0.1879,0.3286,0.4068,0.6213,0.1969,0.3843,0.867
3
- gpt-3.5-turbo,,1492.921,1.786,34353,29917,23.011,1.148,0.1606,0.3178,0.1623,0.2582,0.1296,0.2939,0.2024,0.3462,0.3632,0.5953,0.1761,0.3623,0.871
4
- Llama-2-13b-chat-hf,1.12,2133.992,1.66,33389,24007,15.646,1.391,0.163,0.3345,0.2031,0.3756,0.1632,0.2962,0.1388,0.3045,0.3423,0.5302,0.1846,0.3694,0.719
5
- vicuna-13b-v1.1,1.095,2212.946,1.682,35308,26456,15.955,1.335,0.1285,0.2319,0.1991,0.2812,0.1556,0.2644,0.2009,0.2768,0.3159,0.5761,0.1853,0.3276,0.749
6
- Llama-2-7b-chat-hf,1.19,1280.314,1.793,34349,23987,26.829,1.432,0.1274,0.2383,0.1836,0.2621,0.1572,0.2754,0.17,0.2911,0.3631,0.5383,0.1781,0.3209,0.698
7
- vicuna-7b-v1.1,1.095,975.73,1.574,25932,18714,26.577,1.386,0.1664,0.2838,0.2227,0.3118,0.166,0.2351,0.259,0.2753,0.4542,0.5838,0.2218,0.3379,0.722
8
- wizardLM-7B-HF,1.095,1265.93,1.667,33570,24003,26.518,1.399,0.1367,0.2584,0.2027,0.2882,0.1358,0.2592,0.1985,0.3085,0.4154,0.5794,0.1866,0.3384,0.715
9
- mpt-7b-instruct,1.05,2071.066,1.42,12374,9927,5.975,1.246,0.1804,0.285,0.2589,0.2556,0.2383,0.2468,0.2635,0.2571,0.3512,0.4042,0.2509,0.2897,0.802
10
- gpt4all-j,1.095,5603.316,1.706,31502,27099,5.622,1.162,0.1236,0.2406,0.1708,0.2511,0.143,0.255,0.194,0.2941,0.3721,0.5337,0.1737,0.3153,0.860
 
 
 
 
 
 
 
 
 
 
 
Llama-2-eval/data/results/results_full-l40.csv DELETED
@@ -1,10 +0,0 @@
1
- model_name,repetition_penalty,generation_time,evaluation_time,total_tokens,total_words,tokens_per_second,tokens_per_word,words_per_token_l40,words_per_second,numeric_bleu,numeric_rougeL,description_bleu,description_rougeL,entity_bleu,entity_rougeL,person_bleu,person_rougeL,location_bleu,location_rougeL,overall_bleu,overall_rougeL,total_words_over_total_tokens
2
- gpt-4,,2696.407,1.772,34069,29552,12.635,1.153,0.867,,0.1732,0.3337,0.1895,0.3248,0.1654,0.3117,0.1879,0.3286,0.4068,0.6213,0.1969,0.3843,0.867
3
- gpt-3.5-turbo,,1492.921,1.786,34353,29917,23.011,1.148,0.871,,0.1606,0.3178,0.1623,0.2582,0.1296,0.2939,0.2024,0.3462,0.3632,0.5953,0.1761,0.3623,0.871
4
- Llama-2-13b-chat-hf,1.12,1687.637,1.785,32808,23575,19.44,1.392,0.718,13.969,0.1612,0.3305,0.2061,0.3701,0.1675,0.3018,0.141,0.305,0.3394,0.5288,0.1866,0.368,0.719
5
- vicuna-13b-v1.1,1.095,1799.165,2.197,35543,26613,19.755,1.336,0.749,14.792,0.1274,0.2321,0.1994,0.2834,0.154,0.2631,0.1984,0.2773,0.3194,0.5759,0.1844,0.3256,0.749
6
- Llama-2-7b-chat-hf,1.19,1002.46,6.606,34686,24229,34.601,1.432,0.698,24.170,0.1269,0.2404,0.1824,0.2614,0.157,0.2769,0.1687,0.2896,0.3565,0.5378,0.177,0.3214,0.699
7
- vicuna-7b-v1.1,1.095,758.227,1.432,25827,18638,34.062,1.386,0.722,24.581,0.1673,0.2859,0.2221,0.3096,0.1655,0.2327,0.2576,0.2717,0.4564,0.5849,0.2216,0.3387,0.722
8
- wizardLM-7B-HF,1.095,998.702,1.683,33674,23996,33.718,1.403,0.713,24.027,0.1372,0.259,0.2046,0.2878,0.1354,0.2588,0.1982,0.3083,0.4154,0.5769,0.187,0.3383,0.713
9
- mpt-7b-instruct,1.05,1622.435,1.338,12607,10139,7.77,1.243,0.805,6.249,0.1751,0.2756,0.2569,0.2625,0.2349,0.2456,0.2466,0.2566,0.3522,0.4049,0.2455,0.2889,0.804
10
- gpt4all-j,1.095,3794.429,1.611,31719,27286,8.359,1.162,0.861,7.191,0.1262,0.2443,0.1669,0.251,0.1394,0.2505,0.1937,0.2968,0.3693,0.5348,0.1719,0.3151,0.860
 
 
 
 
 
 
 
 
 
 
 
Llama-2-eval/notebook/baseline.ipynb DELETED
@@ -1,1983 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 5,
6
- "id": "a6d96660",
7
- "metadata": {},
8
- "outputs": [
9
- {
10
- "data": {
11
- "text/plain": [
12
- "True"
13
- ]
14
- },
15
- "execution_count": 5,
16
- "metadata": {},
17
- "output_type": "execute_result"
18
- }
19
- ],
20
- "source": [
21
- "import os\n",
22
- "from dotenv import load_dotenv\n",
23
- "\n",
24
- "load_dotenv()"
25
- ]
26
- },
27
- {
28
- "cell_type": "code",
29
- "execution_count": 11,
30
- "id": "7510ab87",
31
- "metadata": {},
32
- "outputs": [
33
- {
34
- "data": {
35
- "text/plain": [
36
- "DatasetDict({\n",
37
- " train: Dataset({\n",
38
- " features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
39
- " num_rows: 153725\n",
40
- " })\n",
41
- " test: Dataset({\n",
42
- " features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
43
- " num_rows: 12467\n",
44
- " })\n",
45
- "})"
46
- ]
47
- },
48
- "execution_count": 11,
49
- "metadata": {},
50
- "output_type": "execute_result"
51
- }
52
- ],
53
- "source": [
54
- "from datasets import load_dataset\n",
55
- "\n",
56
- "dataset = load_dataset(\"zhengxuanzenwu/ms-macro-wellformed_only\")\n",
57
- "dataset"
58
- ]
59
- },
60
- {
61
- "cell_type": "code",
62
- "execution_count": 12,
63
- "id": "1f4f0e76",
64
- "metadata": {},
65
- "outputs": [
66
- {
67
- "data": {
68
- "text/html": [
69
- "<div>\n",
70
- "<style scoped>\n",
71
- " .dataframe tbody tr th:only-of-type {\n",
72
- " vertical-align: middle;\n",
73
- " }\n",
74
- "\n",
75
- " .dataframe tbody tr th {\n",
76
- " vertical-align: top;\n",
77
- " }\n",
78
- "\n",
79
- " .dataframe thead th {\n",
80
- " text-align: right;\n",
81
- " }\n",
82
- "</style>\n",
83
- "<table border=\"1\" class=\"dataframe\">\n",
84
- " <thead>\n",
85
- " <tr style=\"text-align: right;\">\n",
86
- " <th></th>\n",
87
- " <th>answers</th>\n",
88
- " <th>passages</th>\n",
89
- " <th>query</th>\n",
90
- " <th>query_id</th>\n",
91
- " <th>query_type</th>\n",
92
- " <th>wellFormedAnswers</th>\n",
93
- " </tr>\n",
94
- " </thead>\n",
95
- " <tbody>\n",
96
- " <tr>\n",
97
- " <th>0</th>\n",
98
- " <td>[2,662]</td>\n",
99
- " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
100
- " <td>albany mn population</td>\n",
101
- " <td>15177</td>\n",
102
- " <td>NUMERIC</td>\n",
103
- " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
104
- " </tr>\n",
105
- " <tr>\n",
106
- " <th>1</th>\n",
107
- " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
108
- " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
109
- " <td>current weather in volcano, ca</td>\n",
110
- " <td>114414</td>\n",
111
- " <td>DESCRIPTION</td>\n",
112
- " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
113
- " </tr>\n",
114
- " <tr>\n",
115
- " <th>2</th>\n",
116
- " <td>[Hippocrates]</td>\n",
117
- " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
118
- " <td>____________________ is considered the father ...</td>\n",
119
- " <td>9083</td>\n",
120
- " <td>DESCRIPTION</td>\n",
121
- " <td>[Hippocrates is considered the father of moder...</td>\n",
122
- " </tr>\n",
123
- " <tr>\n",
124
- " <th>3</th>\n",
125
- " <td>[120 days from the date of the Note.]</td>\n",
126
- " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
127
- " <td>how many days is an appraisal good for a fanni...</td>\n",
128
- " <td>281439</td>\n",
129
- " <td>NUMERIC</td>\n",
130
- " <td>[An appraisal is good for 120 days from the da...</td>\n",
131
- " </tr>\n",
132
- " <tr>\n",
133
- " <th>4</th>\n",
134
- " <td>[From $26,000 to $39,000 a year]</td>\n",
135
- " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
136
- " <td>average pharmacy tech salary</td>\n",
137
- " <td>40287</td>\n",
138
- " <td>NUMERIC</td>\n",
139
- " <td>[The average salary for a pharmacy technician ...</td>\n",
140
- " </tr>\n",
141
- " </tbody>\n",
142
- "</table>\n",
143
- "</div>"
144
- ],
145
- "text/plain": [
146
- " answers \\\n",
147
- "0 [2,662] \n",
148
- "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
149
- "2 [Hippocrates] \n",
150
- "3 [120 days from the date of the Note.] \n",
151
- "4 [From $26,000 to $39,000 a year] \n",
152
- "\n",
153
- " passages \\\n",
154
- "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
155
- "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
156
- "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
157
- "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
158
- "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
159
- "\n",
160
- " query query_id query_type \\\n",
161
- "0 albany mn population 15177 NUMERIC \n",
162
- "1 current weather in volcano, ca 114414 DESCRIPTION \n",
163
- "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
164
- "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
165
- "4 average pharmacy tech salary 40287 NUMERIC \n",
166
- "\n",
167
- " wellFormedAnswers \n",
168
- "0 [The population of Albany, Minnesota is 2,662. ] \n",
169
- "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
170
- "2 [Hippocrates is considered the father of moder... \n",
171
- "3 [An appraisal is good for 120 days from the da... \n",
172
- "4 [The average salary for a pharmacy technician ... "
173
- ]
174
- },
175
- "execution_count": 12,
176
- "metadata": {},
177
- "output_type": "execute_result"
178
- }
179
- ],
180
- "source": [
181
- "df = dataset[\"test\"].to_pandas()\n",
182
- "df.head()"
183
- ]
184
- },
185
- {
186
- "cell_type": "code",
187
- "execution_count": 15,
188
- "id": "3e9b4cef",
189
- "metadata": {},
190
- "outputs": [
191
- {
192
- "data": {
193
- "text/plain": [
194
- "{'answers': ['2,662'],\n",
195
- " 'passages': {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0],\n",
196
- " 'passage_text': ['City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.',\n",
197
- " 'Place of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.',\n",
198
- " 'For the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.',\n",
199
- " 'Albany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.',\n",
200
- " 'Sponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.',\n",
201
- " 'Recent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).',\n",
202
- " \"For population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\",\n",
203
- " \"For population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\"],\n",
204
- " 'url': ['http://zipcode.org/city/MN/ALBANY',\n",
205
- " 'http://www.city-data.com/zips/56307.html',\n",
206
- " 'https://en.wikipedia.org/wiki/Albany,_Minnesota',\n",
207
- " 'http://ci.albany.mn.us/index.asp?SEC=A8341FEC-6B8C-47D2-926B-75A89ED4C539&Type=B_BASIC',\n",
208
- " 'https://www.mapquest.com/us/mn/albany-282023394',\n",
209
- " 'http://www.city-data.com/city/Albany-Minnesota.html',\n",
210
- " 'http://www.city-data.com/zips/56307.html',\n",
211
- " 'http://www.city-data.com/city/Albany-Minnesota.html']},\n",
212
- " 'query': 'albany mn population',\n",
213
- " 'query_id': 15177,\n",
214
- " 'query_type': 'NUMERIC',\n",
215
- " 'wellFormedAnswers': ['The population of Albany, Minnesota is 2,662. ']}"
216
- ]
217
- },
218
- "execution_count": 15,
219
- "metadata": {},
220
- "output_type": "execute_result"
221
- }
222
- ],
223
- "source": [
224
- "test = dataset[\"test\"]\n",
225
- "test[0]"
226
- ]
227
- },
228
- {
229
- "cell_type": "code",
230
- "execution_count": 24,
231
- "id": "104dfbea",
232
- "metadata": {},
233
- "outputs": [
234
- {
235
- "data": {
236
- "text/plain": [
237
- "12467"
238
- ]
239
- },
240
- "execution_count": 24,
241
- "metadata": {},
242
- "output_type": "execute_result"
243
- }
244
- ],
245
- "source": [
246
- "test.num_rows"
247
- ]
248
- },
249
- {
250
- "cell_type": "code",
251
- "execution_count": 35,
252
- "id": "01b3a886",
253
- "metadata": {},
254
- "outputs": [
255
- {
256
- "data": {
257
- "text/plain": [
258
- "({'NUMERIC': 3685,\n",
259
- " 'DESCRIPTION': 5487,\n",
260
- " 'ENTITY': 1077,\n",
261
- " 'PERSON': 868,\n",
262
- " 'LOCATION': 1350},\n",
263
- " {'NUMERIC': 179,\n",
264
- " 'DESCRIPTION': 218,\n",
265
- " 'ENTITY': 2403,\n",
266
- " 'LOCATION': 2559,\n",
267
- " 'PERSON': 3966})"
268
- ]
269
- },
270
- "execution_count": 35,
271
- "metadata": {},
272
- "output_type": "execute_result"
273
- }
274
- ],
275
- "source": [
276
- "counts = {}\n",
277
- "indices = {}\n",
278
- "size = 100\n",
279
- "for i in range(test.num_rows):\n",
280
- " row = test[i]\n",
281
- " query_type = row[\"query_type\"]\n",
282
- " if query_type in counts:\n",
283
- " counts[query_type] += 1\n",
284
- " else:\n",
285
- " counts[query_type] = 1\n",
286
- " if counts[query_type] == size:\n",
287
- " indices[query_type] = i\n",
288
- "counts, indices"
289
- ]
290
- },
291
- {
292
- "cell_type": "code",
293
- "execution_count": 39,
294
- "id": "967bc1cd",
295
- "metadata": {},
296
- "outputs": [],
297
- "source": [
298
- "# create new dataset exluding those idx\n",
299
- "baseline = test.select(\n",
300
- " (i for i in range(len(test)) if i <= indices[test[i][\"query_type\"]])\n",
301
- ")"
302
- ]
303
- },
304
- {
305
- "cell_type": "code",
306
- "execution_count": 40,
307
- "id": "9a5fcad5",
308
- "metadata": {},
309
- "outputs": [
310
- {
311
- "data": {
312
- "text/plain": [
313
- "Dataset({\n",
314
- " features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
315
- " num_rows: 500\n",
316
- "})"
317
- ]
318
- },
319
- "execution_count": 40,
320
- "metadata": {},
321
- "output_type": "execute_result"
322
- }
323
- ],
324
- "source": [
325
- "baseline"
326
- ]
327
- },
328
- {
329
- "cell_type": "code",
330
- "execution_count": 41,
331
- "id": "0524a973",
332
- "metadata": {},
333
- "outputs": [
334
- {
335
- "data": {
336
- "text/html": [
337
- "<div>\n",
338
- "<style scoped>\n",
339
- " .dataframe tbody tr th:only-of-type {\n",
340
- " vertical-align: middle;\n",
341
- " }\n",
342
- "\n",
343
- " .dataframe tbody tr th {\n",
344
- " vertical-align: top;\n",
345
- " }\n",
346
- "\n",
347
- " .dataframe thead th {\n",
348
- " text-align: right;\n",
349
- " }\n",
350
- "</style>\n",
351
- "<table border=\"1\" class=\"dataframe\">\n",
352
- " <thead>\n",
353
- " <tr style=\"text-align: right;\">\n",
354
- " <th></th>\n",
355
- " <th>answers</th>\n",
356
- " <th>passages</th>\n",
357
- " <th>query</th>\n",
358
- " <th>query_id</th>\n",
359
- " <th>query_type</th>\n",
360
- " <th>wellFormedAnswers</th>\n",
361
- " </tr>\n",
362
- " </thead>\n",
363
- " <tbody>\n",
364
- " <tr>\n",
365
- " <th>0</th>\n",
366
- " <td>[2,662]</td>\n",
367
- " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
368
- " <td>albany mn population</td>\n",
369
- " <td>15177</td>\n",
370
- " <td>NUMERIC</td>\n",
371
- " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
372
- " </tr>\n",
373
- " <tr>\n",
374
- " <th>1</th>\n",
375
- " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
376
- " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
377
- " <td>current weather in volcano, ca</td>\n",
378
- " <td>114414</td>\n",
379
- " <td>DESCRIPTION</td>\n",
380
- " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
381
- " </tr>\n",
382
- " <tr>\n",
383
- " <th>2</th>\n",
384
- " <td>[Hippocrates]</td>\n",
385
- " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
386
- " <td>____________________ is considered the father ...</td>\n",
387
- " <td>9083</td>\n",
388
- " <td>DESCRIPTION</td>\n",
389
- " <td>[Hippocrates is considered the father of moder...</td>\n",
390
- " </tr>\n",
391
- " <tr>\n",
392
- " <th>3</th>\n",
393
- " <td>[120 days from the date of the Note.]</td>\n",
394
- " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
395
- " <td>how many days is an appraisal good for a fanni...</td>\n",
396
- " <td>281439</td>\n",
397
- " <td>NUMERIC</td>\n",
398
- " <td>[An appraisal is good for 120 days from the da...</td>\n",
399
- " </tr>\n",
400
- " <tr>\n",
401
- " <th>4</th>\n",
402
- " <td>[From $26,000 to $39,000 a year]</td>\n",
403
- " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
404
- " <td>average pharmacy tech salary</td>\n",
405
- " <td>40287</td>\n",
406
- " <td>NUMERIC</td>\n",
407
- " <td>[The average salary for a pharmacy technician ...</td>\n",
408
- " </tr>\n",
409
- " <tr>\n",
410
- " <th>...</th>\n",
411
- " <td>...</td>\n",
412
- " <td>...</td>\n",
413
- " <td>...</td>\n",
414
- " <td>...</td>\n",
415
- " <td>...</td>\n",
416
- " <td>...</td>\n",
417
- " </tr>\n",
418
- " <tr>\n",
419
- " <th>495</th>\n",
420
- " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
421
- " <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
422
- " <td>the pool shower company</td>\n",
423
- " <td>518269</td>\n",
424
- " <td>PERSON</td>\n",
425
- " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
426
- " </tr>\n",
427
- " <tr>\n",
428
- " <th>496</th>\n",
429
- " <td>[Hanson]</td>\n",
430
- " <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
431
- " <td>longest tenured american football players</td>\n",
432
- " <td>442806</td>\n",
433
- " <td>PERSON</td>\n",
434
- " <td>[Hanson is the longest tenured American footba...</td>\n",
435
- " </tr>\n",
436
- " <tr>\n",
437
- " <th>497</th>\n",
438
- " <td>[Mount Able Baptist Church is located at the a...</td>\n",
439
- " <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
440
- " <td>mt. view baptist in pendleton sc</td>\n",
441
- " <td>460250</td>\n",
442
- " <td>PERSON</td>\n",
443
- " <td>[Mount Able Baptist Church is located at the a...</td>\n",
444
- " </tr>\n",
445
- " <tr>\n",
446
- " <th>498</th>\n",
447
- " <td>[Honeysuckle Weeks]</td>\n",
448
- " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
449
- " <td>what actress disappeared for a while</td>\n",
450
- " <td>549739</td>\n",
451
- " <td>PERSON</td>\n",
452
- " <td>[The actress disappeared for a while Honeysuck...</td>\n",
453
- " </tr>\n",
454
- " <tr>\n",
455
- " <th>499</th>\n",
456
- " <td>[African-Nguni]</td>\n",
457
- " <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
458
- " <td>what ethnicity is the surname sabol</td>\n",
459
- " <td>658265</td>\n",
460
- " <td>PERSON</td>\n",
461
- " <td>[The ethnicity of the surname Sabol is African...</td>\n",
462
- " </tr>\n",
463
- " </tbody>\n",
464
- "</table>\n",
465
- "<p>500 rows × 6 columns</p>\n",
466
- "</div>"
467
- ],
468
- "text/plain": [
469
- " answers \\\n",
470
- "0 [2,662] \n",
471
- "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
472
- "2 [Hippocrates] \n",
473
- "3 [120 days from the date of the Note.] \n",
474
- "4 [From $26,000 to $39,000 a year] \n",
475
- ".. ... \n",
476
- "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
477
- "496 [Hanson] \n",
478
- "497 [Mount Able Baptist Church is located at the a... \n",
479
- "498 [Honeysuckle Weeks] \n",
480
- "499 [African-Nguni] \n",
481
- "\n",
482
- " passages \\\n",
483
- "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
484
- "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
485
- "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
486
- "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
487
- "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
488
- ".. ... \n",
489
- "495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
490
- "496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
491
- "497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
492
- "498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
493
- "499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
494
- "\n",
495
- " query query_id query_type \\\n",
496
- "0 albany mn population 15177 NUMERIC \n",
497
- "1 current weather in volcano, ca 114414 DESCRIPTION \n",
498
- "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
499
- "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
500
- "4 average pharmacy tech salary 40287 NUMERIC \n",
501
- ".. ... ... ... \n",
502
- "495 the pool shower company 518269 PERSON \n",
503
- "496 longest tenured american football players 442806 PERSON \n",
504
- "497 mt. view baptist in pendleton sc 460250 PERSON \n",
505
- "498 what actress disappeared for a while 549739 PERSON \n",
506
- "499 what ethnicity is the surname sabol 658265 PERSON \n",
507
- "\n",
508
- " wellFormedAnswers \n",
509
- "0 [The population of Albany, Minnesota is 2,662. ] \n",
510
- "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
511
- "2 [Hippocrates is considered the father of moder... \n",
512
- "3 [An appraisal is good for 120 days from the da... \n",
513
- "4 [The average salary for a pharmacy technician ... \n",
514
- ".. ... \n",
515
- "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
516
- "496 [Hanson is the longest tenured American footba... \n",
517
- "497 [Mount Able Baptist Church is located at the a... \n",
518
- "498 [The actress disappeared for a while Honeysuck... \n",
519
- "499 [The ethnicity of the surname Sabol is African... \n",
520
- "\n",
521
- "[500 rows x 6 columns]"
522
- ]
523
- },
524
- "execution_count": 41,
525
- "metadata": {},
526
- "output_type": "execute_result"
527
- }
528
- ],
529
- "source": [
530
- "baseline.to_pandas()"
531
- ]
532
- },
533
- {
534
- "cell_type": "code",
535
- "execution_count": 42,
536
- "id": "57a195e0",
537
- "metadata": {},
538
- "outputs": [
539
- {
540
- "data": {
541
- "application/vnd.jupyter.widget-view+json": {
542
- "model_id": "66abd394cb054cf1b7459e92d4763d02",
543
- "version_major": 2,
544
- "version_minor": 0
545
- },
546
- "text/plain": [
547
- "Saving the dataset (0/1 shards): 0%| | 0/500 [00:00<?, ? examples/s]"
548
- ]
549
- },
550
- "metadata": {},
551
- "output_type": "display_data"
552
- }
553
- ],
554
- "source": [
555
- "baseline.save_to_disk(\"../data/datasets/ms_macro/\")"
556
- ]
557
- },
558
- {
559
- "cell_type": "code",
560
- "execution_count": 44,
561
- "id": "b72bf3f9",
562
- "metadata": {},
563
- "outputs": [
564
- {
565
- "data": {
566
- "text/plain": [
567
- "Dataset({\n",
568
- " features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
569
- " num_rows: 500\n",
570
- "})"
571
- ]
572
- },
573
- "execution_count": 44,
574
- "metadata": {},
575
- "output_type": "execute_result"
576
- }
577
- ],
578
- "source": [
579
- "from datasets import load_from_disk\n",
580
- "\n",
581
- "new_ds = load_from_disk(\"../data/datasets/ms_macro/\")\n",
582
- "new_ds"
583
- ]
584
- },
585
- {
586
- "cell_type": "code",
587
- "execution_count": 45,
588
- "id": "051bd771",
589
- "metadata": {},
590
- "outputs": [
591
- {
592
- "data": {
593
- "text/plain": [
594
- "({'NUMERIC': 100,\n",
595
- " 'DESCRIPTION': 100,\n",
596
- " 'ENTITY': 100,\n",
597
- " 'PERSON': 100,\n",
598
- " 'LOCATION': 100},\n",
599
- " {'NUMERIC': 179,\n",
600
- " 'DESCRIPTION': 215,\n",
601
- " 'ENTITY': 443,\n",
602
- " 'LOCATION': 461,\n",
603
- " 'PERSON': 499})"
604
- ]
605
- },
606
- "execution_count": 45,
607
- "metadata": {},
608
- "output_type": "execute_result"
609
- }
610
- ],
611
- "source": [
612
- "counts = {}\n",
613
- "indices = {}\n",
614
- "size = 100\n",
615
- "for i in range(new_ds.num_rows):\n",
616
- " row = new_ds[i]\n",
617
- " query_type = row[\"query_type\"]\n",
618
- " if query_type in counts:\n",
619
- " counts[query_type] += 1\n",
620
- " else:\n",
621
- " counts[query_type] = 1\n",
622
- " if counts[query_type] == size:\n",
623
- " indices[query_type] = i\n",
624
- "counts, indices"
625
- ]
626
- },
627
- {
628
- "cell_type": "code",
629
- "execution_count": 46,
630
- "id": "db48dcc4",
631
- "metadata": {},
632
- "outputs": [
633
- {
634
- "data": {
635
- "text/html": [
636
- "<div>\n",
637
- "<style scoped>\n",
638
- " .dataframe tbody tr th:only-of-type {\n",
639
- " vertical-align: middle;\n",
640
- " }\n",
641
- "\n",
642
- " .dataframe tbody tr th {\n",
643
- " vertical-align: top;\n",
644
- " }\n",
645
- "\n",
646
- " .dataframe thead th {\n",
647
- " text-align: right;\n",
648
- " }\n",
649
- "</style>\n",
650
- "<table border=\"1\" class=\"dataframe\">\n",
651
- " <thead>\n",
652
- " <tr style=\"text-align: right;\">\n",
653
- " <th></th>\n",
654
- " <th>answers</th>\n",
655
- " <th>passages</th>\n",
656
- " <th>query</th>\n",
657
- " <th>query_id</th>\n",
658
- " <th>query_type</th>\n",
659
- " <th>wellFormedAnswers</th>\n",
660
- " </tr>\n",
661
- " </thead>\n",
662
- " <tbody>\n",
663
- " <tr>\n",
664
- " <th>0</th>\n",
665
- " <td>[2,662]</td>\n",
666
- " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
667
- " <td>albany mn population</td>\n",
668
- " <td>15177</td>\n",
669
- " <td>NUMERIC</td>\n",
670
- " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
671
- " </tr>\n",
672
- " <tr>\n",
673
- " <th>1</th>\n",
674
- " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
675
- " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
676
- " <td>current weather in volcano, ca</td>\n",
677
- " <td>114414</td>\n",
678
- " <td>DESCRIPTION</td>\n",
679
- " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
680
- " </tr>\n",
681
- " <tr>\n",
682
- " <th>2</th>\n",
683
- " <td>[Hippocrates]</td>\n",
684
- " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
685
- " <td>____________________ is considered the father ...</td>\n",
686
- " <td>9083</td>\n",
687
- " <td>DESCRIPTION</td>\n",
688
- " <td>[Hippocrates is considered the father of moder...</td>\n",
689
- " </tr>\n",
690
- " <tr>\n",
691
- " <th>3</th>\n",
692
- " <td>[120 days from the date of the Note.]</td>\n",
693
- " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
694
- " <td>how many days is an appraisal good for a fanni...</td>\n",
695
- " <td>281439</td>\n",
696
- " <td>NUMERIC</td>\n",
697
- " <td>[An appraisal is good for 120 days from the da...</td>\n",
698
- " </tr>\n",
699
- " <tr>\n",
700
- " <th>4</th>\n",
701
- " <td>[From $26,000 to $39,000 a year]</td>\n",
702
- " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
703
- " <td>average pharmacy tech salary</td>\n",
704
- " <td>40287</td>\n",
705
- " <td>NUMERIC</td>\n",
706
- " <td>[The average salary for a pharmacy technician ...</td>\n",
707
- " </tr>\n",
708
- " <tr>\n",
709
- " <th>...</th>\n",
710
- " <td>...</td>\n",
711
- " <td>...</td>\n",
712
- " <td>...</td>\n",
713
- " <td>...</td>\n",
714
- " <td>...</td>\n",
715
- " <td>...</td>\n",
716
- " </tr>\n",
717
- " <tr>\n",
718
- " <th>495</th>\n",
719
- " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
720
- " <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
721
- " <td>the pool shower company</td>\n",
722
- " <td>518269</td>\n",
723
- " <td>PERSON</td>\n",
724
- " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
725
- " </tr>\n",
726
- " <tr>\n",
727
- " <th>496</th>\n",
728
- " <td>[Hanson]</td>\n",
729
- " <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
730
- " <td>longest tenured american football players</td>\n",
731
- " <td>442806</td>\n",
732
- " <td>PERSON</td>\n",
733
- " <td>[Hanson is the longest tenured American footba...</td>\n",
734
- " </tr>\n",
735
- " <tr>\n",
736
- " <th>497</th>\n",
737
- " <td>[Mount Able Baptist Church is located at the a...</td>\n",
738
- " <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
739
- " <td>mt. view baptist in pendleton sc</td>\n",
740
- " <td>460250</td>\n",
741
- " <td>PERSON</td>\n",
742
- " <td>[Mount Able Baptist Church is located at the a...</td>\n",
743
- " </tr>\n",
744
- " <tr>\n",
745
- " <th>498</th>\n",
746
- " <td>[Honeysuckle Weeks]</td>\n",
747
- " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
748
- " <td>what actress disappeared for a while</td>\n",
749
- " <td>549739</td>\n",
750
- " <td>PERSON</td>\n",
751
- " <td>[The actress disappeared for a while Honeysuck...</td>\n",
752
- " </tr>\n",
753
- " <tr>\n",
754
- " <th>499</th>\n",
755
- " <td>[African-Nguni]</td>\n",
756
- " <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
757
- " <td>what ethnicity is the surname sabol</td>\n",
758
- " <td>658265</td>\n",
759
- " <td>PERSON</td>\n",
760
- " <td>[The ethnicity of the surname Sabol is African...</td>\n",
761
- " </tr>\n",
762
- " </tbody>\n",
763
- "</table>\n",
764
- "<p>500 rows × 6 columns</p>\n",
765
- "</div>"
766
- ],
767
- "text/plain": [
768
- " answers \\\n",
769
- "0 [2,662] \n",
770
- "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
771
- "2 [Hippocrates] \n",
772
- "3 [120 days from the date of the Note.] \n",
773
- "4 [From $26,000 to $39,000 a year] \n",
774
- ".. ... \n",
775
- "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
776
- "496 [Hanson] \n",
777
- "497 [Mount Able Baptist Church is located at the a... \n",
778
- "498 [Honeysuckle Weeks] \n",
779
- "499 [African-Nguni] \n",
780
- "\n",
781
- " passages \\\n",
782
- "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
783
- "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
784
- "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
785
- "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
786
- "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
787
- ".. ... \n",
788
- "495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
789
- "496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
790
- "497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
791
- "498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
792
- "499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
793
- "\n",
794
- " query query_id query_type \\\n",
795
- "0 albany mn population 15177 NUMERIC \n",
796
- "1 current weather in volcano, ca 114414 DESCRIPTION \n",
797
- "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
798
- "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
799
- "4 average pharmacy tech salary 40287 NUMERIC \n",
800
- ".. ... ... ... \n",
801
- "495 the pool shower company 518269 PERSON \n",
802
- "496 longest tenured american football players 442806 PERSON \n",
803
- "497 mt. view baptist in pendleton sc 460250 PERSON \n",
804
- "498 what actress disappeared for a while 549739 PERSON \n",
805
- "499 what ethnicity is the surname sabol 658265 PERSON \n",
806
- "\n",
807
- " wellFormedAnswers \n",
808
- "0 [The population of Albany, Minnesota is 2,662. ] \n",
809
- "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
810
- "2 [Hippocrates is considered the father of moder... \n",
811
- "3 [An appraisal is good for 120 days from the da... \n",
812
- "4 [The average salary for a pharmacy technician ... \n",
813
- ".. ... \n",
814
- "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
815
- "496 [Hanson is the longest tenured American footba... \n",
816
- "497 [Mount Able Baptist Church is located at the a... \n",
817
- "498 [The actress disappeared for a while Honeysuck... \n",
818
- "499 [The ethnicity of the surname Sabol is African... \n",
819
- "\n",
820
- "[500 rows x 6 columns]"
821
- ]
822
- },
823
- "execution_count": 46,
824
- "metadata": {},
825
- "output_type": "execute_result"
826
- }
827
- ],
828
- "source": [
829
- "new_ds.to_pandas()"
830
- ]
831
- },
832
- {
833
- "cell_type": "code",
834
- "execution_count": 47,
835
- "id": "7ed0c22d",
836
- "metadata": {},
837
- "outputs": [],
838
- "source": [
839
- "\"\"\"\n",
840
- "Official evaluation script for QAConv, modified from SQuAD 2.0.\n",
841
- "\n",
842
- " * Copyright (c) 2021, salesforce.com, inc.\n",
843
- " * All rights reserved.\n",
844
- " * SPDX-License-Identifier: BSD-3-Clause\n",
845
- " * For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause\n",
846
- "\n",
847
- "\"\"\"\n",
848
- "\n",
849
- "import collections\n",
850
- "import re\n",
851
- "import string\n",
852
- "\n",
853
- "\n",
854
- "def normalize_answer(s):\n",
855
- " \"\"\"Lower text and remove punctuation, articles and extra whitespace.\"\"\"\n",
856
- "\n",
857
- " def remove_articles(text):\n",
858
- " regex = re.compile(r\"\\b(a|an|the)\\b\", re.UNICODE)\n",
859
- " return re.sub(regex, \" \", text)\n",
860
- "\n",
861
- " def white_space_fix(text):\n",
862
- " return \" \".join(text.split())\n",
863
- "\n",
864
- " def remove_punc(text):\n",
865
- " exclude = set(string.punctuation)\n",
866
- " return \"\".join(ch for ch in text if ch not in exclude)\n",
867
- "\n",
868
- " def lower(text):\n",
869
- " return text.lower()\n",
870
- "\n",
871
- " return white_space_fix(remove_articles(remove_punc(lower(s))))\n",
872
- "\n",
873
- "\n",
874
- "def get_tokens(s):\n",
875
- " if not s:\n",
876
- " return []\n",
877
- " return normalize_answer(s).split()\n",
878
- "\n",
879
- "\n",
880
- "def compute_exact(a_gold, a_pred):\n",
881
- " return int(normalize_answer(a_gold) == normalize_answer(a_pred))\n",
882
- "\n",
883
- "\n",
884
- "def compute_f1(a_gold, a_pred):\n",
885
- " gold_toks = get_tokens(a_gold)\n",
886
- " pred_toks = get_tokens(a_pred)\n",
887
- " common = collections.Counter(gold_toks) & collections.Counter(pred_toks)\n",
888
- " num_same = sum(common.values())\n",
889
- " if len(gold_toks) == 0 or len(pred_toks) == 0:\n",
890
- " # If either is no-answer, then F1 is 1 if they agree, 0 otherwise\n",
891
- " return int(gold_toks == pred_toks)\n",
892
- " if num_same == 0:\n",
893
- " return 0\n",
894
- " precision = 1.0 * num_same / len(pred_toks)\n",
895
- " recall = 1.0 * num_same / len(gold_toks)\n",
896
- " f1 = (2 * precision * recall) / (precision + recall)\n",
897
- " return f1"
898
- ]
899
- },
900
- {
901
- "cell_type": "code",
902
- "execution_count": 49,
903
- "id": "d9ff4756",
904
- "metadata": {},
905
- "outputs": [
906
- {
907
- "data": {
908
- "application/vnd.jupyter.widget-view+json": {
909
- "model_id": "d8a8d425f60a467eb56f6a13a50ed94b",
910
- "version_major": 2,
911
- "version_minor": 0
912
- },
913
- "text/plain": [
914
- "Map: 0%| | 0/500 [00:00<?, ? examples/s]"
915
- ]
916
- },
917
- "metadata": {},
918
- "output_type": "display_data"
919
- },
920
- {
921
- "data": {
922
- "text/plain": [
923
- "Dataset({\n",
924
- " features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers', 'EM', 'F1'],\n",
925
- " num_rows: 500\n",
926
- "})"
927
- ]
928
- },
929
- "execution_count": 49,
930
- "metadata": {},
931
- "output_type": "execute_result"
932
- }
933
- ],
934
- "source": [
935
- "result_all = new_ds.map(\n",
936
- " lambda record, idx: {\n",
937
- " \"EM\": compute_exact(record[\"wellFormedAnswers\"][0], record[\"answers\"][0]),\n",
938
- " \"F1\": compute_f1(record[\"wellFormedAnswers\"][0], record[\"answers\"][0]),\n",
939
- " },\n",
940
- " batched=False,\n",
941
- " with_indices=True,\n",
942
- ")\n",
943
- "result_all"
944
- ]
945
- },
946
- {
947
- "cell_type": "code",
948
- "execution_count": 50,
949
- "id": "31402fb2",
950
- "metadata": {},
951
- "outputs": [
952
- {
953
- "data": {
954
- "text/html": [
955
- "<div>\n",
956
- "<style scoped>\n",
957
- " .dataframe tbody tr th:only-of-type {\n",
958
- " vertical-align: middle;\n",
959
- " }\n",
960
- "\n",
961
- " .dataframe tbody tr th {\n",
962
- " vertical-align: top;\n",
963
- " }\n",
964
- "\n",
965
- " .dataframe thead th {\n",
966
- " text-align: right;\n",
967
- " }\n",
968
- "</style>\n",
969
- "<table border=\"1\" class=\"dataframe\">\n",
970
- " <thead>\n",
971
- " <tr style=\"text-align: right;\">\n",
972
- " <th></th>\n",
973
- " <th>answers</th>\n",
974
- " <th>passages</th>\n",
975
- " <th>query</th>\n",
976
- " <th>query_id</th>\n",
977
- " <th>query_type</th>\n",
978
- " <th>wellFormedAnswers</th>\n",
979
- " <th>EM</th>\n",
980
- " <th>F1</th>\n",
981
- " </tr>\n",
982
- " </thead>\n",
983
- " <tbody>\n",
984
- " <tr>\n",
985
- " <th>0</th>\n",
986
- " <td>[2,662]</td>\n",
987
- " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
988
- " <td>albany mn population</td>\n",
989
- " <td>15177</td>\n",
990
- " <td>NUMERIC</td>\n",
991
- " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
992
- " <td>0</td>\n",
993
- " <td>0.285714</td>\n",
994
- " </tr>\n",
995
- " <tr>\n",
996
- " <th>1</th>\n",
997
- " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
998
- " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
999
- " <td>current weather in volcano, ca</td>\n",
1000
- " <td>114414</td>\n",
1001
- " <td>DESCRIPTION</td>\n",
1002
- " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
1003
- " <td>1</td>\n",
1004
- " <td>1.000000</td>\n",
1005
- " </tr>\n",
1006
- " <tr>\n",
1007
- " <th>2</th>\n",
1008
- " <td>[Hippocrates]</td>\n",
1009
- " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
1010
- " <td>____________________ is considered the father ...</td>\n",
1011
- " <td>9083</td>\n",
1012
- " <td>DESCRIPTION</td>\n",
1013
- " <td>[Hippocrates is considered the father of moder...</td>\n",
1014
- " <td>0</td>\n",
1015
- " <td>0.250000</td>\n",
1016
- " </tr>\n",
1017
- " <tr>\n",
1018
- " <th>3</th>\n",
1019
- " <td>[120 days from the date of the Note.]</td>\n",
1020
- " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
1021
- " <td>how many days is an appraisal good for a fanni...</td>\n",
1022
- " <td>281439</td>\n",
1023
- " <td>NUMERIC</td>\n",
1024
- " <td>[An appraisal is good for 120 days from the da...</td>\n",
1025
- " <td>0</td>\n",
1026
- " <td>0.631579</td>\n",
1027
- " </tr>\n",
1028
- " <tr>\n",
1029
- " <th>4</th>\n",
1030
- " <td>[From $26,000 to $39,000 a year]</td>\n",
1031
- " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
1032
- " <td>average pharmacy tech salary</td>\n",
1033
- " <td>40287</td>\n",
1034
- " <td>NUMERIC</td>\n",
1035
- " <td>[The average salary for a pharmacy technician ...</td>\n",
1036
- " <td>0</td>\n",
1037
- " <td>0.500000</td>\n",
1038
- " </tr>\n",
1039
- " <tr>\n",
1040
- " <th>...</th>\n",
1041
- " <td>...</td>\n",
1042
- " <td>...</td>\n",
1043
- " <td>...</td>\n",
1044
- " <td>...</td>\n",
1045
- " <td>...</td>\n",
1046
- " <td>...</td>\n",
1047
- " <td>...</td>\n",
1048
- " <td>...</td>\n",
1049
- " </tr>\n",
1050
- " <tr>\n",
1051
- " <th>495</th>\n",
1052
- " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
1053
- " <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
1054
- " <td>the pool shower company</td>\n",
1055
- " <td>518269</td>\n",
1056
- " <td>PERSON</td>\n",
1057
- " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
1058
- " <td>1</td>\n",
1059
- " <td>1.000000</td>\n",
1060
- " </tr>\n",
1061
- " <tr>\n",
1062
- " <th>496</th>\n",
1063
- " <td>[Hanson]</td>\n",
1064
- " <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
1065
- " <td>longest tenured american football players</td>\n",
1066
- " <td>442806</td>\n",
1067
- " <td>PERSON</td>\n",
1068
- " <td>[Hanson is the longest tenured American footba...</td>\n",
1069
- " <td>0</td>\n",
1070
- " <td>0.250000</td>\n",
1071
- " </tr>\n",
1072
- " <tr>\n",
1073
- " <th>497</th>\n",
1074
- " <td>[Mount Able Baptist Church is located at the a...</td>\n",
1075
- " <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
1076
- " <td>mt. view baptist in pendleton sc</td>\n",
1077
- " <td>460250</td>\n",
1078
- " <td>PERSON</td>\n",
1079
- " <td>[Mount Able Baptist Church is located at the a...</td>\n",
1080
- " <td>1</td>\n",
1081
- " <td>1.000000</td>\n",
1082
- " </tr>\n",
1083
- " <tr>\n",
1084
- " <th>498</th>\n",
1085
- " <td>[Honeysuckle Weeks]</td>\n",
1086
- " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
1087
- " <td>what actress disappeared for a while</td>\n",
1088
- " <td>549739</td>\n",
1089
- " <td>PERSON</td>\n",
1090
- " <td>[The actress disappeared for a while Honeysuck...</td>\n",
1091
- " <td>0</td>\n",
1092
- " <td>0.500000</td>\n",
1093
- " </tr>\n",
1094
- " <tr>\n",
1095
- " <th>499</th>\n",
1096
- " <td>[African-Nguni]</td>\n",
1097
- " <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
1098
- " <td>what ethnicity is the surname sabol</td>\n",
1099
- " <td>658265</td>\n",
1100
- " <td>PERSON</td>\n",
1101
- " <td>[The ethnicity of the surname Sabol is African...</td>\n",
1102
- " <td>0</td>\n",
1103
- " <td>0.285714</td>\n",
1104
- " </tr>\n",
1105
- " </tbody>\n",
1106
- "</table>\n",
1107
- "<p>500 rows × 8 columns</p>\n",
1108
- "</div>"
1109
- ],
1110
- "text/plain": [
1111
- " answers \\\n",
1112
- "0 [2,662] \n",
1113
- "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
1114
- "2 [Hippocrates] \n",
1115
- "3 [120 days from the date of the Note.] \n",
1116
- "4 [From $26,000 to $39,000 a year] \n",
1117
- ".. ... \n",
1118
- "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
1119
- "496 [Hanson] \n",
1120
- "497 [Mount Able Baptist Church is located at the a... \n",
1121
- "498 [Honeysuckle Weeks] \n",
1122
- "499 [African-Nguni] \n",
1123
- "\n",
1124
- " passages \\\n",
1125
- "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
1126
- "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
1127
- "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
1128
- "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
1129
- "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
1130
- ".. ... \n",
1131
- "495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
1132
- "496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
1133
- "497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
1134
- "498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
1135
- "499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
1136
- "\n",
1137
- " query query_id query_type \\\n",
1138
- "0 albany mn population 15177 NUMERIC \n",
1139
- "1 current weather in volcano, ca 114414 DESCRIPTION \n",
1140
- "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
1141
- "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
1142
- "4 average pharmacy tech salary 40287 NUMERIC \n",
1143
- ".. ... ... ... \n",
1144
- "495 the pool shower company 518269 PERSON \n",
1145
- "496 longest tenured american football players 442806 PERSON \n",
1146
- "497 mt. view baptist in pendleton sc 460250 PERSON \n",
1147
- "498 what actress disappeared for a while 549739 PERSON \n",
1148
- "499 what ethnicity is the surname sabol 658265 PERSON \n",
1149
- "\n",
1150
- " wellFormedAnswers EM F1 \n",
1151
- "0 [The population of Albany, Minnesota is 2,662. ] 0 0.285714 \n",
1152
- "1 [The Volcano forecast for Apr 12 is 52 degrees... 1 1.000000 \n",
1153
- "2 [Hippocrates is considered the father of moder... 0 0.250000 \n",
1154
- "3 [An appraisal is good for 120 days from the da... 0 0.631579 \n",
1155
- "4 [The average salary for a pharmacy technician ... 0 0.500000 \n",
1156
- ".. ... .. ... \n",
1157
- "495 [The Pool Shower, Inc. is a Georgia Domestic P... 1 1.000000 \n",
1158
- "496 [Hanson is the longest tenured American footba... 0 0.250000 \n",
1159
- "497 [Mount Able Baptist Church is located at the a... 1 1.000000 \n",
1160
- "498 [The actress disappeared for a while Honeysuck... 0 0.500000 \n",
1161
- "499 [The ethnicity of the surname Sabol is African... 0 0.285714 \n",
1162
- "\n",
1163
- "[500 rows x 8 columns]"
1164
- ]
1165
- },
1166
- "execution_count": 50,
1167
- "metadata": {},
1168
- "output_type": "execute_result"
1169
- }
1170
- ],
1171
- "source": [
1172
- "result_all.to_pandas()"
1173
- ]
1174
- },
1175
- {
1176
- "cell_type": "code",
1177
- "execution_count": 53,
1178
- "id": "af2d4577",
1179
- "metadata": {},
1180
- "outputs": [
1181
- {
1182
- "name": "stdout",
1183
- "output_type": "stream",
1184
- "text": [
1185
- "Note: you may need to restart the kernel to use updated packages.\n"
1186
- ]
1187
- }
1188
- ],
1189
- "source": [
1190
- "%pip install -q evaluate rouge_score"
1191
- ]
1192
- },
1193
- {
1194
- "cell_type": "code",
1195
- "execution_count": 54,
1196
- "id": "89494c3d",
1197
- "metadata": {},
1198
- "outputs": [],
1199
- "source": [
1200
- "import evaluate\n",
1201
- "\n",
1202
- "bleu = evaluate.load(\"bleu\")\n",
1203
- "rouge = evaluate.load(\"rouge\")"
1204
- ]
1205
- },
1206
- {
1207
- "cell_type": "code",
1208
- "execution_count": 56,
1209
- "id": "e447aa08",
1210
- "metadata": {},
1211
- "outputs": [
1212
- {
1213
- "data": {
1214
- "application/vnd.jupyter.widget-view+json": {
1215
- "model_id": "c87066449ebc44d39a66b1630977f2ac",
1216
- "version_major": 2,
1217
- "version_minor": 0
1218
- },
1219
- "text/plain": [
1220
- "Map: 0%| | 0/500 [00:00<?, ? examples/s]"
1221
- ]
1222
- },
1223
- "metadata": {},
1224
- "output_type": "display_data"
1225
- },
1226
- {
1227
- "data": {
1228
- "text/html": [
1229
- "<div>\n",
1230
- "<style scoped>\n",
1231
- " .dataframe tbody tr th:only-of-type {\n",
1232
- " vertical-align: middle;\n",
1233
- " }\n",
1234
- "\n",
1235
- " .dataframe tbody tr th {\n",
1236
- " vertical-align: top;\n",
1237
- " }\n",
1238
- "\n",
1239
- " .dataframe thead th {\n",
1240
- " text-align: right;\n",
1241
- " }\n",
1242
- "</style>\n",
1243
- "<table border=\"1\" class=\"dataframe\">\n",
1244
- " <thead>\n",
1245
- " <tr style=\"text-align: right;\">\n",
1246
- " <th></th>\n",
1247
- " <th>answers</th>\n",
1248
- " <th>passages</th>\n",
1249
- " <th>query</th>\n",
1250
- " <th>query_id</th>\n",
1251
- " <th>query_type</th>\n",
1252
- " <th>wellFormedAnswers</th>\n",
1253
- " <th>EM</th>\n",
1254
- " <th>F1</th>\n",
1255
- " <th>bleu</th>\n",
1256
- " <th>precisions</th>\n",
1257
- " <th>brevity_penalty</th>\n",
1258
- " <th>length_ratio</th>\n",
1259
- " <th>translation_length</th>\n",
1260
- " <th>reference_length</th>\n",
1261
- " </tr>\n",
1262
- " </thead>\n",
1263
- " <tbody>\n",
1264
- " <tr>\n",
1265
- " <th>0</th>\n",
1266
- " <td>[2,662]</td>\n",
1267
- " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
1268
- " <td>albany mn population</td>\n",
1269
- " <td>15177</td>\n",
1270
- " <td>NUMERIC</td>\n",
1271
- " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
1272
- " <td>0</td>\n",
1273
- " <td>0.285714</td>\n",
1274
- " <td>0.000000</td>\n",
1275
- " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1276
- " <td>0.000335</td>\n",
1277
- " <td>0.111111</td>\n",
1278
- " <td>1</td>\n",
1279
- " <td>9</td>\n",
1280
- " </tr>\n",
1281
- " <tr>\n",
1282
- " <th>1</th>\n",
1283
- " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
1284
- " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
1285
- " <td>current weather in volcano, ca</td>\n",
1286
- " <td>114414</td>\n",
1287
- " <td>DESCRIPTION</td>\n",
1288
- " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
1289
- " <td>1</td>\n",
1290
- " <td>1.000000</td>\n",
1291
- " <td>1.000000</td>\n",
1292
- " <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
1293
- " <td>1.000000</td>\n",
1294
- " <td>1.000000</td>\n",
1295
- " <td>14</td>\n",
1296
- " <td>14</td>\n",
1297
- " </tr>\n",
1298
- " <tr>\n",
1299
- " <th>2</th>\n",
1300
- " <td>[Hippocrates]</td>\n",
1301
- " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
1302
- " <td>____________________ is considered the father ...</td>\n",
1303
- " <td>9083</td>\n",
1304
- " <td>DESCRIPTION</td>\n",
1305
- " <td>[Hippocrates is considered the father of moder...</td>\n",
1306
- " <td>0</td>\n",
1307
- " <td>0.250000</td>\n",
1308
- " <td>0.000000</td>\n",
1309
- " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1310
- " <td>0.000335</td>\n",
1311
- " <td>0.111111</td>\n",
1312
- " <td>1</td>\n",
1313
- " <td>9</td>\n",
1314
- " </tr>\n",
1315
- " <tr>\n",
1316
- " <th>3</th>\n",
1317
- " <td>[120 days from the date of the Note.]</td>\n",
1318
- " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
1319
- " <td>how many days is an appraisal good for a fanni...</td>\n",
1320
- " <td>281439</td>\n",
1321
- " <td>NUMERIC</td>\n",
1322
- " <td>[An appraisal is good for 120 days from the da...</td>\n",
1323
- " <td>0</td>\n",
1324
- " <td>0.631579</td>\n",
1325
- " <td>0.327096</td>\n",
1326
- " <td>[1.0, 0.875, 0.8571428571428571, 0.83333333333...</td>\n",
1327
- " <td>0.367879</td>\n",
1328
- " <td>0.500000</td>\n",
1329
- " <td>9</td>\n",
1330
- " <td>18</td>\n",
1331
- " </tr>\n",
1332
- " <tr>\n",
1333
- " <th>4</th>\n",
1334
- " <td>[From $26,000 to $39,000 a year]</td>\n",
1335
- " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
1336
- " <td>average pharmacy tech salary</td>\n",
1337
- " <td>40287</td>\n",
1338
- " <td>NUMERIC</td>\n",
1339
- " <td>[The average salary for a pharmacy technician ...</td>\n",
1340
- " <td>0</td>\n",
1341
- " <td>0.500000</td>\n",
1342
- " <td>0.193040</td>\n",
1343
- " <td>[0.875, 0.7142857142857143, 0.5, 0.4]</td>\n",
1344
- " <td>0.324652</td>\n",
1345
- " <td>0.470588</td>\n",
1346
- " <td>8</td>\n",
1347
- " <td>17</td>\n",
1348
- " </tr>\n",
1349
- " <tr>\n",
1350
- " <th>...</th>\n",
1351
- " <td>...</td>\n",
1352
- " <td>...</td>\n",
1353
- " <td>...</td>\n",
1354
- " <td>...</td>\n",
1355
- " <td>...</td>\n",
1356
- " <td>...</td>\n",
1357
- " <td>...</td>\n",
1358
- " <td>...</td>\n",
1359
- " <td>...</td>\n",
1360
- " <td>...</td>\n",
1361
- " <td>...</td>\n",
1362
- " <td>...</td>\n",
1363
- " <td>...</td>\n",
1364
- " <td>...</td>\n",
1365
- " </tr>\n",
1366
- " <tr>\n",
1367
- " <th>495</th>\n",
1368
- " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
1369
- " <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
1370
- " <td>the pool shower company</td>\n",
1371
- " <td>518269</td>\n",
1372
- " <td>PERSON</td>\n",
1373
- " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
1374
- " <td>1</td>\n",
1375
- " <td>1.000000</td>\n",
1376
- " <td>1.000000</td>\n",
1377
- " <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
1378
- " <td>1.000000</td>\n",
1379
- " <td>1.000000</td>\n",
1380
- " <td>19</td>\n",
1381
- " <td>19</td>\n",
1382
- " </tr>\n",
1383
- " <tr>\n",
1384
- " <th>496</th>\n",
1385
- " <td>[Hanson]</td>\n",
1386
- " <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
1387
- " <td>longest tenured american football players</td>\n",
1388
- " <td>442806</td>\n",
1389
- " <td>PERSON</td>\n",
1390
- " <td>[Hanson is the longest tenured American footba...</td>\n",
1391
- " <td>0</td>\n",
1392
- " <td>0.250000</td>\n",
1393
- " <td>0.000000</td>\n",
1394
- " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1395
- " <td>0.000335</td>\n",
1396
- " <td>0.111111</td>\n",
1397
- " <td>1</td>\n",
1398
- " <td>9</td>\n",
1399
- " </tr>\n",
1400
- " <tr>\n",
1401
- " <th>497</th>\n",
1402
- " <td>[Mount Able Baptist Church is located at the a...</td>\n",
1403
- " <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
1404
- " <td>mt. view baptist in pendleton sc</td>\n",
1405
- " <td>460250</td>\n",
1406
- " <td>PERSON</td>\n",
1407
- " <td>[Mount Able Baptist Church is located at the a...</td>\n",
1408
- " <td>1</td>\n",
1409
- " <td>1.000000</td>\n",
1410
- " <td>1.000000</td>\n",
1411
- " <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
1412
- " <td>1.000000</td>\n",
1413
- " <td>1.000000</td>\n",
1414
- " <td>21</td>\n",
1415
- " <td>21</td>\n",
1416
- " </tr>\n",
1417
- " <tr>\n",
1418
- " <th>498</th>\n",
1419
- " <td>[Honeysuckle Weeks]</td>\n",
1420
- " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
1421
- " <td>what actress disappeared for a while</td>\n",
1422
- " <td>549739</td>\n",
1423
- " <td>PERSON</td>\n",
1424
- " <td>[The actress disappeared for a while Honeysuck...</td>\n",
1425
- " <td>0</td>\n",
1426
- " <td>0.500000</td>\n",
1427
- " <td>0.000000</td>\n",
1428
- " <td>[1.0, 1.0, 0.0, 0.0]</td>\n",
1429
- " <td>0.030197</td>\n",
1430
- " <td>0.222222</td>\n",
1431
- " <td>2</td>\n",
1432
- " <td>9</td>\n",
1433
- " </tr>\n",
1434
- " <tr>\n",
1435
- " <th>499</th>\n",
1436
- " <td>[African-Nguni]</td>\n",
1437
- " <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
1438
- " <td>what ethnicity is the surname sabol</td>\n",
1439
- " <td>658265</td>\n",
1440
- " <td>PERSON</td>\n",
1441
- " <td>[The ethnicity of the surname Sabol is African...</td>\n",
1442
- " <td>0</td>\n",
1443
- " <td>0.285714</td>\n",
1444
- " <td>0.000000</td>\n",
1445
- " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1446
- " <td>0.000335</td>\n",
1447
- " <td>0.111111</td>\n",
1448
- " <td>1</td>\n",
1449
- " <td>9</td>\n",
1450
- " </tr>\n",
1451
- " </tbody>\n",
1452
- "</table>\n",
1453
- "<p>500 rows × 14 columns</p>\n",
1454
- "</div>"
1455
- ],
1456
- "text/plain": [
1457
- " answers \\\n",
1458
- "0 [2,662] \n",
1459
- "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
1460
- "2 [Hippocrates] \n",
1461
- "3 [120 days from the date of the Note.] \n",
1462
- "4 [From $26,000 to $39,000 a year] \n",
1463
- ".. ... \n",
1464
- "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
1465
- "496 [Hanson] \n",
1466
- "497 [Mount Able Baptist Church is located at the a... \n",
1467
- "498 [Honeysuckle Weeks] \n",
1468
- "499 [African-Nguni] \n",
1469
- "\n",
1470
- " passages \\\n",
1471
- "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
1472
- "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
1473
- "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
1474
- "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
1475
- "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
1476
- ".. ... \n",
1477
- "495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
1478
- "496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
1479
- "497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
1480
- "498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
1481
- "499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
1482
- "\n",
1483
- " query query_id query_type \\\n",
1484
- "0 albany mn population 15177 NUMERIC \n",
1485
- "1 current weather in volcano, ca 114414 DESCRIPTION \n",
1486
- "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
1487
- "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
1488
- "4 average pharmacy tech salary 40287 NUMERIC \n",
1489
- ".. ... ... ... \n",
1490
- "495 the pool shower company 518269 PERSON \n",
1491
- "496 longest tenured american football players 442806 PERSON \n",
1492
- "497 mt. view baptist in pendleton sc 460250 PERSON \n",
1493
- "498 what actress disappeared for a while 549739 PERSON \n",
1494
- "499 what ethnicity is the surname sabol 658265 PERSON \n",
1495
- "\n",
1496
- " wellFormedAnswers EM F1 \\\n",
1497
- "0 [The population of Albany, Minnesota is 2,662. ] 0 0.285714 \n",
1498
- "1 [The Volcano forecast for Apr 12 is 52 degrees... 1 1.000000 \n",
1499
- "2 [Hippocrates is considered the father of moder... 0 0.250000 \n",
1500
- "3 [An appraisal is good for 120 days from the da... 0 0.631579 \n",
1501
- "4 [The average salary for a pharmacy technician ... 0 0.500000 \n",
1502
- ".. ... .. ... \n",
1503
- "495 [The Pool Shower, Inc. is a Georgia Domestic P... 1 1.000000 \n",
1504
- "496 [Hanson is the longest tenured American footba... 0 0.250000 \n",
1505
- "497 [Mount Able Baptist Church is located at the a... 1 1.000000 \n",
1506
- "498 [The actress disappeared for a while Honeysuck... 0 0.500000 \n",
1507
- "499 [The ethnicity of the surname Sabol is African... 0 0.285714 \n",
1508
- "\n",
1509
- " bleu precisions \\\n",
1510
- "0 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1511
- "1 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
1512
- "2 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1513
- "3 0.327096 [1.0, 0.875, 0.8571428571428571, 0.83333333333... \n",
1514
- "4 0.193040 [0.875, 0.7142857142857143, 0.5, 0.4] \n",
1515
- ".. ... ... \n",
1516
- "495 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
1517
- "496 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1518
- "497 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
1519
- "498 0.000000 [1.0, 1.0, 0.0, 0.0] \n",
1520
- "499 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1521
- "\n",
1522
- " brevity_penalty length_ratio translation_length reference_length \n",
1523
- "0 0.000335 0.111111 1 9 \n",
1524
- "1 1.000000 1.000000 14 14 \n",
1525
- "2 0.000335 0.111111 1 9 \n",
1526
- "3 0.367879 0.500000 9 18 \n",
1527
- "4 0.324652 0.470588 8 17 \n",
1528
- ".. ... ... ... ... \n",
1529
- "495 1.000000 1.000000 19 19 \n",
1530
- "496 0.000335 0.111111 1 9 \n",
1531
- "497 1.000000 1.000000 21 21 \n",
1532
- "498 0.030197 0.222222 2 9 \n",
1533
- "499 0.000335 0.111111 1 9 \n",
1534
- "\n",
1535
- "[500 rows x 14 columns]"
1536
- ]
1537
- },
1538
- "execution_count": 56,
1539
- "metadata": {},
1540
- "output_type": "execute_result"
1541
- }
1542
- ],
1543
- "source": [
1544
- "result_all = result_all.map(\n",
1545
- " lambda record: bleu.compute(\n",
1546
- " predictions=[record[\"answers\"][0]], references=[record[\"wellFormedAnswers\"][0]]\n",
1547
- " ),\n",
1548
- " batched=False,\n",
1549
- ")\n",
1550
- "result_all.to_pandas()"
1551
- ]
1552
- },
1553
- {
1554
- "cell_type": "code",
1555
- "execution_count": 57,
1556
- "id": "fbbe31fd",
1557
- "metadata": {},
1558
- "outputs": [
1559
- {
1560
- "data": {
1561
- "application/vnd.jupyter.widget-view+json": {
1562
- "model_id": "88f839b74aa54fcd8c95215e22e30472",
1563
- "version_major": 2,
1564
- "version_minor": 0
1565
- },
1566
- "text/plain": [
1567
- "Map: 0%| | 0/500 [00:00<?, ? examples/s]"
1568
- ]
1569
- },
1570
- "metadata": {},
1571
- "output_type": "display_data"
1572
- },
1573
- {
1574
- "data": {
1575
- "text/html": [
1576
- "<div>\n",
1577
- "<style scoped>\n",
1578
- " .dataframe tbody tr th:only-of-type {\n",
1579
- " vertical-align: middle;\n",
1580
- " }\n",
1581
- "\n",
1582
- " .dataframe tbody tr th {\n",
1583
- " vertical-align: top;\n",
1584
- " }\n",
1585
- "\n",
1586
- " .dataframe thead th {\n",
1587
- " text-align: right;\n",
1588
- " }\n",
1589
- "</style>\n",
1590
- "<table border=\"1\" class=\"dataframe\">\n",
1591
- " <thead>\n",
1592
- " <tr style=\"text-align: right;\">\n",
1593
- " <th></th>\n",
1594
- " <th>answers</th>\n",
1595
- " <th>passages</th>\n",
1596
- " <th>query</th>\n",
1597
- " <th>query_id</th>\n",
1598
- " <th>query_type</th>\n",
1599
- " <th>wellFormedAnswers</th>\n",
1600
- " <th>EM</th>\n",
1601
- " <th>F1</th>\n",
1602
- " <th>bleu</th>\n",
1603
- " <th>precisions</th>\n",
1604
- " <th>brevity_penalty</th>\n",
1605
- " <th>length_ratio</th>\n",
1606
- " <th>translation_length</th>\n",
1607
- " <th>reference_length</th>\n",
1608
- " <th>rouge1</th>\n",
1609
- " <th>rouge2</th>\n",
1610
- " <th>rougeL</th>\n",
1611
- " <th>rougeLsum</th>\n",
1612
- " </tr>\n",
1613
- " </thead>\n",
1614
- " <tbody>\n",
1615
- " <tr>\n",
1616
- " <th>0</th>\n",
1617
- " <td>[2,662]</td>\n",
1618
- " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
1619
- " <td>albany mn population</td>\n",
1620
- " <td>15177</td>\n",
1621
- " <td>NUMERIC</td>\n",
1622
- " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
1623
- " <td>0</td>\n",
1624
- " <td>0.285714</td>\n",
1625
- " <td>0.000000</td>\n",
1626
- " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1627
- " <td>0.000335</td>\n",
1628
- " <td>0.111111</td>\n",
1629
- " <td>1</td>\n",
1630
- " <td>9</td>\n",
1631
- " <td>0.400000</td>\n",
1632
- " <td>0.250000</td>\n",
1633
- " <td>0.400000</td>\n",
1634
- " <td>0.400000</td>\n",
1635
- " </tr>\n",
1636
- " <tr>\n",
1637
- " <th>1</th>\n",
1638
- " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
1639
- " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
1640
- " <td>current weather in volcano, ca</td>\n",
1641
- " <td>114414</td>\n",
1642
- " <td>DESCRIPTION</td>\n",
1643
- " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
1644
- " <td>1</td>\n",
1645
- " <td>1.000000</td>\n",
1646
- " <td>1.000000</td>\n",
1647
- " <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
1648
- " <td>1.000000</td>\n",
1649
- " <td>1.000000</td>\n",
1650
- " <td>14</td>\n",
1651
- " <td>14</td>\n",
1652
- " <td>1.000000</td>\n",
1653
- " <td>1.000000</td>\n",
1654
- " <td>1.000000</td>\n",
1655
- " <td>1.000000</td>\n",
1656
- " </tr>\n",
1657
- " <tr>\n",
1658
- " <th>2</th>\n",
1659
- " <td>[Hippocrates]</td>\n",
1660
- " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
1661
- " <td>____________________ is considered the father ...</td>\n",
1662
- " <td>9083</td>\n",
1663
- " <td>DESCRIPTION</td>\n",
1664
- " <td>[Hippocrates is considered the father of moder...</td>\n",
1665
- " <td>0</td>\n",
1666
- " <td>0.250000</td>\n",
1667
- " <td>0.000000</td>\n",
1668
- " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1669
- " <td>0.000335</td>\n",
1670
- " <td>0.111111</td>\n",
1671
- " <td>1</td>\n",
1672
- " <td>9</td>\n",
1673
- " <td>0.222222</td>\n",
1674
- " <td>0.000000</td>\n",
1675
- " <td>0.222222</td>\n",
1676
- " <td>0.222222</td>\n",
1677
- " </tr>\n",
1678
- " <tr>\n",
1679
- " <th>3</th>\n",
1680
- " <td>[120 days from the date of the Note.]</td>\n",
1681
- " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
1682
- " <td>how many days is an appraisal good for a fanni...</td>\n",
1683
- " <td>281439</td>\n",
1684
- " <td>NUMERIC</td>\n",
1685
- " <td>[An appraisal is good for 120 days from the da...</td>\n",
1686
- " <td>0</td>\n",
1687
- " <td>0.631579</td>\n",
1688
- " <td>0.327096</td>\n",
1689
- " <td>[1.0, 0.875, 0.8571428571428571, 0.83333333333...</td>\n",
1690
- " <td>0.367879</td>\n",
1691
- " <td>0.500000</td>\n",
1692
- " <td>9</td>\n",
1693
- " <td>18</td>\n",
1694
- " <td>0.640000</td>\n",
1695
- " <td>0.608696</td>\n",
1696
- " <td>0.640000</td>\n",
1697
- " <td>0.640000</td>\n",
1698
- " </tr>\n",
1699
- " <tr>\n",
1700
- " <th>4</th>\n",
1701
- " <td>[From $26,000 to $39,000 a year]</td>\n",
1702
- " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
1703
- " <td>average pharmacy tech salary</td>\n",
1704
- " <td>40287</td>\n",
1705
- " <td>NUMERIC</td>\n",
1706
- " <td>[The average salary for a pharmacy technician ...</td>\n",
1707
- " <td>0</td>\n",
1708
- " <td>0.500000</td>\n",
1709
- " <td>0.193040</td>\n",
1710
- " <td>[0.875, 0.7142857142857143, 0.5, 0.4]</td>\n",
1711
- " <td>0.324652</td>\n",
1712
- " <td>0.470588</td>\n",
1713
- " <td>8</td>\n",
1714
- " <td>17</td>\n",
1715
- " <td>0.583333</td>\n",
1716
- " <td>0.454545</td>\n",
1717
- " <td>0.583333</td>\n",
1718
- " <td>0.583333</td>\n",
1719
- " </tr>\n",
1720
- " <tr>\n",
1721
- " <th>...</th>\n",
1722
- " <td>...</td>\n",
1723
- " <td>...</td>\n",
1724
- " <td>...</td>\n",
1725
- " <td>...</td>\n",
1726
- " <td>...</td>\n",
1727
- " <td>...</td>\n",
1728
- " <td>...</td>\n",
1729
- " <td>...</td>\n",
1730
- " <td>...</td>\n",
1731
- " <td>...</td>\n",
1732
- " <td>...</td>\n",
1733
- " <td>...</td>\n",
1734
- " <td>...</td>\n",
1735
- " <td>...</td>\n",
1736
- " <td>...</td>\n",
1737
- " <td>...</td>\n",
1738
- " <td>...</td>\n",
1739
- " <td>...</td>\n",
1740
- " </tr>\n",
1741
- " <tr>\n",
1742
- " <th>495</th>\n",
1743
- " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
1744
- " <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
1745
- " <td>the pool shower company</td>\n",
1746
- " <td>518269</td>\n",
1747
- " <td>PERSON</td>\n",
1748
- " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
1749
- " <td>1</td>\n",
1750
- " <td>1.000000</td>\n",
1751
- " <td>1.000000</td>\n",
1752
- " <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
1753
- " <td>1.000000</td>\n",
1754
- " <td>1.000000</td>\n",
1755
- " <td>19</td>\n",
1756
- " <td>19</td>\n",
1757
- " <td>1.000000</td>\n",
1758
- " <td>1.000000</td>\n",
1759
- " <td>1.000000</td>\n",
1760
- " <td>1.000000</td>\n",
1761
- " </tr>\n",
1762
- " <tr>\n",
1763
- " <th>496</th>\n",
1764
- " <td>[Hanson]</td>\n",
1765
- " <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
1766
- " <td>longest tenured american football players</td>\n",
1767
- " <td>442806</td>\n",
1768
- " <td>PERSON</td>\n",
1769
- " <td>[Hanson is the longest tenured American footba...</td>\n",
1770
- " <td>0</td>\n",
1771
- " <td>0.250000</td>\n",
1772
- " <td>0.000000</td>\n",
1773
- " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1774
- " <td>0.000335</td>\n",
1775
- " <td>0.111111</td>\n",
1776
- " <td>1</td>\n",
1777
- " <td>9</td>\n",
1778
- " <td>0.222222</td>\n",
1779
- " <td>0.000000</td>\n",
1780
- " <td>0.222222</td>\n",
1781
- " <td>0.222222</td>\n",
1782
- " </tr>\n",
1783
- " <tr>\n",
1784
- " <th>497</th>\n",
1785
- " <td>[Mount Able Baptist Church is located at the a...</td>\n",
1786
- " <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
1787
- " <td>mt. view baptist in pendleton sc</td>\n",
1788
- " <td>460250</td>\n",
1789
- " <td>PERSON</td>\n",
1790
- " <td>[Mount Able Baptist Church is located at the a...</td>\n",
1791
- " <td>1</td>\n",
1792
- " <td>1.000000</td>\n",
1793
- " <td>1.000000</td>\n",
1794
- " <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
1795
- " <td>1.000000</td>\n",
1796
- " <td>1.000000</td>\n",
1797
- " <td>21</td>\n",
1798
- " <td>21</td>\n",
1799
- " <td>1.000000</td>\n",
1800
- " <td>1.000000</td>\n",
1801
- " <td>1.000000</td>\n",
1802
- " <td>1.000000</td>\n",
1803
- " </tr>\n",
1804
- " <tr>\n",
1805
- " <th>498</th>\n",
1806
- " <td>[Honeysuckle Weeks]</td>\n",
1807
- " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
1808
- " <td>what actress disappeared for a while</td>\n",
1809
- " <td>549739</td>\n",
1810
- " <td>PERSON</td>\n",
1811
- " <td>[The actress disappeared for a while Honeysuck...</td>\n",
1812
- " <td>0</td>\n",
1813
- " <td>0.500000</td>\n",
1814
- " <td>0.000000</td>\n",
1815
- " <td>[1.0, 1.0, 0.0, 0.0]</td>\n",
1816
- " <td>0.030197</td>\n",
1817
- " <td>0.222222</td>\n",
1818
- " <td>2</td>\n",
1819
- " <td>9</td>\n",
1820
- " <td>0.400000</td>\n",
1821
- " <td>0.250000</td>\n",
1822
- " <td>0.400000</td>\n",
1823
- " <td>0.400000</td>\n",
1824
- " </tr>\n",
1825
- " <tr>\n",
1826
- " <th>499</th>\n",
1827
- " <td>[African-Nguni]</td>\n",
1828
- " <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
1829
- " <td>what ethnicity is the surname sabol</td>\n",
1830
- " <td>658265</td>\n",
1831
- " <td>PERSON</td>\n",
1832
- " <td>[The ethnicity of the surname Sabol is African...</td>\n",
1833
- " <td>0</td>\n",
1834
- " <td>0.285714</td>\n",
1835
- " <td>0.000000</td>\n",
1836
- " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1837
- " <td>0.000335</td>\n",
1838
- " <td>0.111111</td>\n",
1839
- " <td>1</td>\n",
1840
- " <td>9</td>\n",
1841
- " <td>0.363636</td>\n",
1842
- " <td>0.222222</td>\n",
1843
- " <td>0.363636</td>\n",
1844
- " <td>0.363636</td>\n",
1845
- " </tr>\n",
1846
- " </tbody>\n",
1847
- "</table>\n",
1848
- "<p>500 rows × 18 columns</p>\n",
1849
- "</div>"
1850
- ],
1851
- "text/plain": [
1852
- " answers \\\n",
1853
- "0 [2,662] \n",
1854
- "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
1855
- "2 [Hippocrates] \n",
1856
- "3 [120 days from the date of the Note.] \n",
1857
- "4 [From $26,000 to $39,000 a year] \n",
1858
- ".. ... \n",
1859
- "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
1860
- "496 [Hanson] \n",
1861
- "497 [Mount Able Baptist Church is located at the a... \n",
1862
- "498 [Honeysuckle Weeks] \n",
1863
- "499 [African-Nguni] \n",
1864
- "\n",
1865
- " passages \\\n",
1866
- "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
1867
- "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
1868
- "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
1869
- "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
1870
- "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
1871
- ".. ... \n",
1872
- "495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
1873
- "496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
1874
- "497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
1875
- "498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
1876
- "499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
1877
- "\n",
1878
- " query query_id query_type \\\n",
1879
- "0 albany mn population 15177 NUMERIC \n",
1880
- "1 current weather in volcano, ca 114414 DESCRIPTION \n",
1881
- "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
1882
- "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
1883
- "4 average pharmacy tech salary 40287 NUMERIC \n",
1884
- ".. ... ... ... \n",
1885
- "495 the pool shower company 518269 PERSON \n",
1886
- "496 longest tenured american football players 442806 PERSON \n",
1887
- "497 mt. view baptist in pendleton sc 460250 PERSON \n",
1888
- "498 what actress disappeared for a while 549739 PERSON \n",
1889
- "499 what ethnicity is the surname sabol 658265 PERSON \n",
1890
- "\n",
1891
- " wellFormedAnswers EM F1 \\\n",
1892
- "0 [The population of Albany, Minnesota is 2,662. ] 0 0.285714 \n",
1893
- "1 [The Volcano forecast for Apr 12 is 52 degrees... 1 1.000000 \n",
1894
- "2 [Hippocrates is considered the father of moder... 0 0.250000 \n",
1895
- "3 [An appraisal is good for 120 days from the da... 0 0.631579 \n",
1896
- "4 [The average salary for a pharmacy technician ... 0 0.500000 \n",
1897
- ".. ... .. ... \n",
1898
- "495 [The Pool Shower, Inc. is a Georgia Domestic P... 1 1.000000 \n",
1899
- "496 [Hanson is the longest tenured American footba... 0 0.250000 \n",
1900
- "497 [Mount Able Baptist Church is located at the a... 1 1.000000 \n",
1901
- "498 [The actress disappeared for a while Honeysuck... 0 0.500000 \n",
1902
- "499 [The ethnicity of the surname Sabol is African... 0 0.285714 \n",
1903
- "\n",
1904
- " bleu precisions \\\n",
1905
- "0 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1906
- "1 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
1907
- "2 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1908
- "3 0.327096 [1.0, 0.875, 0.8571428571428571, 0.83333333333... \n",
1909
- "4 0.193040 [0.875, 0.7142857142857143, 0.5, 0.4] \n",
1910
- ".. ... ... \n",
1911
- "495 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
1912
- "496 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1913
- "497 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
1914
- "498 0.000000 [1.0, 1.0, 0.0, 0.0] \n",
1915
- "499 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1916
- "\n",
1917
- " brevity_penalty length_ratio translation_length reference_length \\\n",
1918
- "0 0.000335 0.111111 1 9 \n",
1919
- "1 1.000000 1.000000 14 14 \n",
1920
- "2 0.000335 0.111111 1 9 \n",
1921
- "3 0.367879 0.500000 9 18 \n",
1922
- "4 0.324652 0.470588 8 17 \n",
1923
- ".. ... ... ... ... \n",
1924
- "495 1.000000 1.000000 19 19 \n",
1925
- "496 0.000335 0.111111 1 9 \n",
1926
- "497 1.000000 1.000000 21 21 \n",
1927
- "498 0.030197 0.222222 2 9 \n",
1928
- "499 0.000335 0.111111 1 9 \n",
1929
- "\n",
1930
- " rouge1 rouge2 rougeL rougeLsum \n",
1931
- "0 0.400000 0.250000 0.400000 0.400000 \n",
1932
- "1 1.000000 1.000000 1.000000 1.000000 \n",
1933
- "2 0.222222 0.000000 0.222222 0.222222 \n",
1934
- "3 0.640000 0.608696 0.640000 0.640000 \n",
1935
- "4 0.583333 0.454545 0.583333 0.583333 \n",
1936
- ".. ... ... ... ... \n",
1937
- "495 1.000000 1.000000 1.000000 1.000000 \n",
1938
- "496 0.222222 0.000000 0.222222 0.222222 \n",
1939
- "497 1.000000 1.000000 1.000000 1.000000 \n",
1940
- "498 0.400000 0.250000 0.400000 0.400000 \n",
1941
- "499 0.363636 0.222222 0.363636 0.363636 \n",
1942
- "\n",
1943
- "[500 rows x 18 columns]"
1944
- ]
1945
- },
1946
- "execution_count": 57,
1947
- "metadata": {},
1948
- "output_type": "execute_result"
1949
- }
1950
- ],
1951
- "source": [
1952
- "result_all = result_all.map(\n",
1953
- " lambda record: rouge.compute(\n",
1954
- " predictions=[record[\"answers\"][0]], references=[record[\"wellFormedAnswers\"][0]]\n",
1955
- " ),\n",
1956
- " batched=False,\n",
1957
- ")\n",
1958
- "result_all.to_pandas()"
1959
- ]
1960
- }
1961
- ],
1962
- "metadata": {
1963
- "kernelspec": {
1964
- "display_name": "Python 3 (ipykernel)",
1965
- "language": "python",
1966
- "name": "python3"
1967
- },
1968
- "language_info": {
1969
- "codemirror_mode": {
1970
- "name": "ipython",
1971
- "version": 3
1972
- },
1973
- "file_extension": ".py",
1974
- "mimetype": "text/x-python",
1975
- "name": "python",
1976
- "nbconvert_exporter": "python",
1977
- "pygments_lexer": "ipython3",
1978
- "version": "3.11.4"
1979
- }
1980
- },
1981
- "nbformat": 4,
1982
- "nbformat_minor": 5
1983
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Llama-2-eval/notebook/metrics.ipynb DELETED
@@ -1,1293 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "af2d4577",
7
- "metadata": {},
8
- "outputs": [
9
- {
10
- "name": "stdout",
11
- "output_type": "stream",
12
- "text": [
13
- "Note: you may need to restart the kernel to use updated packages.\n"
14
- ]
15
- }
16
- ],
17
- "source": [
18
- "%pip install -q evaluate rouge_score"
19
- ]
20
- },
21
- {
22
- "cell_type": "code",
23
- "execution_count": 2,
24
- "id": "a6d96660",
25
- "metadata": {},
26
- "outputs": [
27
- {
28
- "data": {
29
- "text/plain": [
30
- "True"
31
- ]
32
- },
33
- "execution_count": 2,
34
- "metadata": {},
35
- "output_type": "execute_result"
36
- }
37
- ],
38
- "source": [
39
- "import os\n",
40
- "from dotenv import load_dotenv\n",
41
- "\n",
42
- "load_dotenv()"
43
- ]
44
- },
45
- {
46
- "cell_type": "code",
47
- "execution_count": 3,
48
- "id": "b72bf3f9",
49
- "metadata": {},
50
- "outputs": [
51
- {
52
- "data": {
53
- "text/plain": [
54
- "Dataset({\n",
55
- " features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
56
- " num_rows: 500\n",
57
- "})"
58
- ]
59
- },
60
- "execution_count": 3,
61
- "metadata": {},
62
- "output_type": "execute_result"
63
- }
64
- ],
65
- "source": [
66
- "from datasets import load_from_disk\n",
67
- "\n",
68
- "new_ds = load_from_disk(\"../data/datasets/ms_macro/\")\n",
69
- "new_ds"
70
- ]
71
- },
72
- {
73
- "cell_type": "code",
74
- "execution_count": 4,
75
- "id": "051bd771",
76
- "metadata": {},
77
- "outputs": [
78
- {
79
- "data": {
80
- "text/plain": [
81
- "({'NUMERIC': 100,\n",
82
- " 'DESCRIPTION': 100,\n",
83
- " 'ENTITY': 100,\n",
84
- " 'PERSON': 100,\n",
85
- " 'LOCATION': 100},\n",
86
- " {'NUMERIC': 179,\n",
87
- " 'DESCRIPTION': 215,\n",
88
- " 'ENTITY': 443,\n",
89
- " 'LOCATION': 461,\n",
90
- " 'PERSON': 499})"
91
- ]
92
- },
93
- "execution_count": 4,
94
- "metadata": {},
95
- "output_type": "execute_result"
96
- }
97
- ],
98
- "source": [
99
- "counts = {}\n",
100
- "indices = {}\n",
101
- "size = 100\n",
102
- "for i in range(new_ds.num_rows):\n",
103
- " row = new_ds[i]\n",
104
- " query_type = row[\"query_type\"]\n",
105
- " if query_type in counts:\n",
106
- " counts[query_type] += 1\n",
107
- " else:\n",
108
- " counts[query_type] = 1\n",
109
- " if counts[query_type] == size:\n",
110
- " indices[query_type] = i\n",
111
- "counts, indices"
112
- ]
113
- },
114
- {
115
- "cell_type": "code",
116
- "execution_count": 5,
117
- "id": "db48dcc4",
118
- "metadata": {},
119
- "outputs": [
120
- {
121
- "data": {
122
- "text/html": [
123
- "<div>\n",
124
- "<style scoped>\n",
125
- " .dataframe tbody tr th:only-of-type {\n",
126
- " vertical-align: middle;\n",
127
- " }\n",
128
- "\n",
129
- " .dataframe tbody tr th {\n",
130
- " vertical-align: top;\n",
131
- " }\n",
132
- "\n",
133
- " .dataframe thead th {\n",
134
- " text-align: right;\n",
135
- " }\n",
136
- "</style>\n",
137
- "<table border=\"1\" class=\"dataframe\">\n",
138
- " <thead>\n",
139
- " <tr style=\"text-align: right;\">\n",
140
- " <th></th>\n",
141
- " <th>answers</th>\n",
142
- " <th>passages</th>\n",
143
- " <th>query</th>\n",
144
- " <th>query_id</th>\n",
145
- " <th>query_type</th>\n",
146
- " <th>wellFormedAnswers</th>\n",
147
- " </tr>\n",
148
- " </thead>\n",
149
- " <tbody>\n",
150
- " <tr>\n",
151
- " <th>0</th>\n",
152
- " <td>[2,662]</td>\n",
153
- " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
154
- " <td>albany mn population</td>\n",
155
- " <td>15177</td>\n",
156
- " <td>NUMERIC</td>\n",
157
- " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
158
- " </tr>\n",
159
- " <tr>\n",
160
- " <th>1</th>\n",
161
- " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
162
- " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
163
- " <td>current weather in volcano, ca</td>\n",
164
- " <td>114414</td>\n",
165
- " <td>DESCRIPTION</td>\n",
166
- " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
167
- " </tr>\n",
168
- " <tr>\n",
169
- " <th>2</th>\n",
170
- " <td>[Hippocrates]</td>\n",
171
- " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
172
- " <td>____________________ is considered the father ...</td>\n",
173
- " <td>9083</td>\n",
174
- " <td>DESCRIPTION</td>\n",
175
- " <td>[Hippocrates is considered the father of moder...</td>\n",
176
- " </tr>\n",
177
- " <tr>\n",
178
- " <th>3</th>\n",
179
- " <td>[120 days from the date of the Note.]</td>\n",
180
- " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
181
- " <td>how many days is an appraisal good for a fanni...</td>\n",
182
- " <td>281439</td>\n",
183
- " <td>NUMERIC</td>\n",
184
- " <td>[An appraisal is good for 120 days from the da...</td>\n",
185
- " </tr>\n",
186
- " <tr>\n",
187
- " <th>4</th>\n",
188
- " <td>[From $26,000 to $39,000 a year]</td>\n",
189
- " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
190
- " <td>average pharmacy tech salary</td>\n",
191
- " <td>40287</td>\n",
192
- " <td>NUMERIC</td>\n",
193
- " <td>[The average salary for a pharmacy technician ...</td>\n",
194
- " </tr>\n",
195
- " <tr>\n",
196
- " <th>...</th>\n",
197
- " <td>...</td>\n",
198
- " <td>...</td>\n",
199
- " <td>...</td>\n",
200
- " <td>...</td>\n",
201
- " <td>...</td>\n",
202
- " <td>...</td>\n",
203
- " </tr>\n",
204
- " <tr>\n",
205
- " <th>495</th>\n",
206
- " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
207
- " <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
208
- " <td>the pool shower company</td>\n",
209
- " <td>518269</td>\n",
210
- " <td>PERSON</td>\n",
211
- " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
212
- " </tr>\n",
213
- " <tr>\n",
214
- " <th>496</th>\n",
215
- " <td>[Hanson]</td>\n",
216
- " <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
217
- " <td>longest tenured american football players</td>\n",
218
- " <td>442806</td>\n",
219
- " <td>PERSON</td>\n",
220
- " <td>[Hanson is the longest tenured American footba...</td>\n",
221
- " </tr>\n",
222
- " <tr>\n",
223
- " <th>497</th>\n",
224
- " <td>[Mount Able Baptist Church is located at the a...</td>\n",
225
- " <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
226
- " <td>mt. view baptist in pendleton sc</td>\n",
227
- " <td>460250</td>\n",
228
- " <td>PERSON</td>\n",
229
- " <td>[Mount Able Baptist Church is located at the a...</td>\n",
230
- " </tr>\n",
231
- " <tr>\n",
232
- " <th>498</th>\n",
233
- " <td>[Honeysuckle Weeks]</td>\n",
234
- " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
235
- " <td>what actress disappeared for a while</td>\n",
236
- " <td>549739</td>\n",
237
- " <td>PERSON</td>\n",
238
- " <td>[The actress disappeared for a while Honeysuck...</td>\n",
239
- " </tr>\n",
240
- " <tr>\n",
241
- " <th>499</th>\n",
242
- " <td>[African-Nguni]</td>\n",
243
- " <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
244
- " <td>what ethnicity is the surname sabol</td>\n",
245
- " <td>658265</td>\n",
246
- " <td>PERSON</td>\n",
247
- " <td>[The ethnicity of the surname Sabol is African...</td>\n",
248
- " </tr>\n",
249
- " </tbody>\n",
250
- "</table>\n",
251
- "<p>500 rows × 6 columns</p>\n",
252
- "</div>"
253
- ],
254
- "text/plain": [
255
- " answers \\\n",
256
- "0 [2,662] \n",
257
- "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
258
- "2 [Hippocrates] \n",
259
- "3 [120 days from the date of the Note.] \n",
260
- "4 [From $26,000 to $39,000 a year] \n",
261
- ".. ... \n",
262
- "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
263
- "496 [Hanson] \n",
264
- "497 [Mount Able Baptist Church is located at the a... \n",
265
- "498 [Honeysuckle Weeks] \n",
266
- "499 [African-Nguni] \n",
267
- "\n",
268
- " passages \\\n",
269
- "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
270
- "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
271
- "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
272
- "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
273
- "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
274
- ".. ... \n",
275
- "495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
276
- "496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
277
- "497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
278
- "498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
279
- "499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
280
- "\n",
281
- " query query_id query_type \\\n",
282
- "0 albany mn population 15177 NUMERIC \n",
283
- "1 current weather in volcano, ca 114414 DESCRIPTION \n",
284
- "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
285
- "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
286
- "4 average pharmacy tech salary 40287 NUMERIC \n",
287
- ".. ... ... ... \n",
288
- "495 the pool shower company 518269 PERSON \n",
289
- "496 longest tenured american football players 442806 PERSON \n",
290
- "497 mt. view baptist in pendleton sc 460250 PERSON \n",
291
- "498 what actress disappeared for a while 549739 PERSON \n",
292
- "499 what ethnicity is the surname sabol 658265 PERSON \n",
293
- "\n",
294
- " wellFormedAnswers \n",
295
- "0 [The population of Albany, Minnesota is 2,662. ] \n",
296
- "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
297
- "2 [Hippocrates is considered the father of moder... \n",
298
- "3 [An appraisal is good for 120 days from the da... \n",
299
- "4 [The average salary for a pharmacy technician ... \n",
300
- ".. ... \n",
301
- "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
302
- "496 [Hanson is the longest tenured American footba... \n",
303
- "497 [Mount Able Baptist Church is located at the a... \n",
304
- "498 [The actress disappeared for a while Honeysuck... \n",
305
- "499 [The ethnicity of the surname Sabol is African... \n",
306
- "\n",
307
- "[500 rows x 6 columns]"
308
- ]
309
- },
310
- "execution_count": 5,
311
- "metadata": {},
312
- "output_type": "execute_result"
313
- }
314
- ],
315
- "source": [
316
- "new_ds.to_pandas()"
317
- ]
318
- },
319
- {
320
- "cell_type": "code",
321
- "execution_count": 5,
322
- "id": "89494c3d",
323
- "metadata": {},
324
- "outputs": [],
325
- "source": [
326
- "import evaluate\n",
327
- "\n",
328
- "bleu = evaluate.load(\"bleu\")\n",
329
- "rouge = evaluate.load(\"rouge\")"
330
- ]
331
- },
332
- {
333
- "cell_type": "code",
334
- "execution_count": 6,
335
- "id": "24a818ba",
336
- "metadata": {},
337
- "outputs": [],
338
- "source": [
339
- "def calc_metrics(ds):\n",
340
- " predictions = [ds[i][\"answers\"][0] for i in range(ds.num_rows)]\n",
341
- " references = [ds[i][\"wellFormedAnswers\"][0] for i in range(ds.num_rows)]\n",
342
- " bleu_scores = bleu.compute(predictions=predictions, references=references)\n",
343
- " rouge_scores = rouge.compute(predictions=predictions, references=references)\n",
344
- " return {\"bleu_scores\": bleu_scores, \"rouge_scores\": rouge_scores}"
345
- ]
346
- },
347
- {
348
- "cell_type": "code",
349
- "execution_count": 8,
350
- "id": "e447aa08",
351
- "metadata": {},
352
- "outputs": [
353
- {
354
- "data": {
355
- "text/plain": [
356
- "{'bleu_scores': {'bleu': 0.5842479720128682,\n",
357
- " 'precisions': [0.7814257485940113,\n",
358
- " 0.7185392334265505,\n",
359
- " 0.6801561945331913,\n",
360
- " 0.6543700340522134],\n",
361
- " 'brevity_penalty': 0.8263321448047812,\n",
362
- " 'length_ratio': 0.8398008680112331,\n",
363
- " 'translation_length': 6579,\n",
364
- " 'reference_length': 7834},\n",
365
- " 'rouge_scores': {'rouge1': 0.6301946495853493,\n",
366
- " 'rouge2': 0.5266427189500504,\n",
367
- " 'rougeL': 0.623467453115133,\n",
368
- " 'rougeLsum': 0.6239164817179192}}"
369
- ]
370
- },
371
- "execution_count": 8,
372
- "metadata": {},
373
- "output_type": "execute_result"
374
- }
375
- ],
376
- "source": [
377
- "calc_metrics(new_ds)"
378
- ]
379
- },
380
- {
381
- "cell_type": "code",
382
- "execution_count": 9,
383
- "id": "b29d1f3e",
384
- "metadata": {},
385
- "outputs": [],
386
- "source": [
387
- "def calc_all_metrics(ds):\n",
388
- " result = {}\n",
389
- " result[\"OVERALL\"] = calc_metrics(ds)\n",
390
- " for query_type in indices:\n",
391
- " result[query_type] = calc_metrics(\n",
392
- " ds.filter(lambda example: example[\"query_type\"] == query_type)\n",
393
- " )\n",
394
- "\n",
395
- " return result"
396
- ]
397
- },
398
- {
399
- "cell_type": "code",
400
- "execution_count": 10,
401
- "id": "1a4273da",
402
- "metadata": {},
403
- "outputs": [
404
- {
405
- "data": {
406
- "text/plain": [
407
- "{'OVERALL': {'bleu_scores': {'bleu': 0.5842479720128682,\n",
408
- " 'precisions': [0.7814257485940113,\n",
409
- " 0.7185392334265505,\n",
410
- " 0.6801561945331913,\n",
411
- " 0.6543700340522134],\n",
412
- " 'brevity_penalty': 0.8263321448047812,\n",
413
- " 'length_ratio': 0.8398008680112331,\n",
414
- " 'translation_length': 6579,\n",
415
- " 'reference_length': 7834},\n",
416
- " 'rouge_scores': {'rouge1': 0.6301946495853493,\n",
417
- " 'rouge2': 0.5266427189500504,\n",
418
- " 'rougeL': 0.623467453115133,\n",
419
- " 'rougeLsum': 0.6239164817179192}},\n",
420
- " 'NUMERIC': {'bleu_scores': {'bleu': 0.3589193328591513,\n",
421
- " 'precisions': [0.7536764705882353,\n",
422
- " 0.6494413407821229,\n",
423
- " 0.5884244372990354,\n",
424
- " 0.5657657657657658],\n",
425
- " 'brevity_penalty': 0.5649158870633492,\n",
426
- " 'length_ratio': 0.6365054602184087,\n",
427
- " 'translation_length': 816,\n",
428
- " 'reference_length': 1282},\n",
429
- " 'rouge_scores': {'rouge1': 0.5569863096088544,\n",
430
- " 'rouge2': 0.4262959859853511,\n",
431
- " 'rougeL': 0.5495190228731732,\n",
432
- " 'rougeLsum': 0.5502805905003136}},\n",
433
- " 'DESCRIPTION': {'bleu_scores': {'bleu': 0.7521919521555381,\n",
434
- " 'precisions': [0.8093238135237295,\n",
435
- " 0.761946514686541,\n",
436
- " 0.7335164835164835,\n",
437
- " 0.7077144226161955],\n",
438
- " 'brevity_penalty': 1.0,\n",
439
- " 'length_ratio': 1.0778632865550022,\n",
440
- " 'translation_length': 2381,\n",
441
- " 'reference_length': 2209},\n",
442
- " 'rouge_scores': {'rouge1': 0.8503571429521525,\n",
443
- " 'rouge2': 0.8009206345153658,\n",
444
- " 'rougeL': 0.8406066569954856,\n",
445
- " 'rougeLsum': 0.8405710628479812}},\n",
446
- " 'ENTITY': {'bleu_scores': {'bleu': 0.5057439480363012,\n",
447
- " 'precisions': [0.7135050741608119,\n",
448
- " 0.6375952582557155,\n",
449
- " 0.5884509624197983,\n",
450
- " 0.5555555555555556],\n",
451
- " 'brevity_penalty': 0.8143961563151505,\n",
452
- " 'length_ratio': 0.8296632124352331,\n",
453
- " 'translation_length': 1281,\n",
454
- " 'reference_length': 1544},\n",
455
- " 'rouge_scores': {'rouge1': 0.5877667231458372,\n",
456
- " 'rouge2': 0.48898551862814277,\n",
457
- " 'rougeL': 0.5796676511145928,\n",
458
- " 'rougeLsum': 0.5784518864116339}},\n",
459
- " 'LOCATION': {'bleu_scores': {'bleu': 0.4167786604147962,\n",
460
- " 'precisions': [0.8600583090379009,\n",
461
- " 0.7986348122866894,\n",
462
- " 0.7573385518590998,\n",
463
- " 0.7414529914529915],\n",
464
- " 'brevity_penalty': 0.5288627994571649,\n",
465
- " 'length_ratio': 0.6108637577916296,\n",
466
- " 'translation_length': 686,\n",
467
- " 'reference_length': 1123},\n",
468
- " 'rouge_scores': {'rouge1': 0.5405464995752973,\n",
469
- " 'rouge2': 0.3950940848806123,\n",
470
- " 'rougeL': 0.5400724136440879,\n",
471
- " 'rougeLsum': 0.5389556394979822}},\n",
472
- " 'PERSON': {'bleu_scores': {'bleu': 0.5861084149356606,\n",
473
- " 'precisions': [0.773851590106007,\n",
474
- " 0.7178707224334601,\n",
475
- " 0.6810766721044046,\n",
476
- " 0.6522864538395168],\n",
477
- " 'brevity_penalty': 0.8315596069910627,\n",
478
- " 'length_ratio': 0.844272076372315,\n",
479
- " 'translation_length': 1415,\n",
480
- " 'reference_length': 1676},\n",
481
- " 'rouge_scores': {'rouge1': 0.6119770025611677,\n",
482
- " 'rouge2': 0.522853938087197,\n",
483
- " 'rougeL': 0.6096713664231095,\n",
484
- " 'rougeLsum': 0.6103086543984155}}}"
485
- ]
486
- },
487
- "execution_count": 10,
488
- "metadata": {},
489
- "output_type": "execute_result"
490
- }
491
- ],
492
- "source": [
493
- "calc_all_metrics(new_ds)"
494
- ]
495
- },
496
- {
497
- "cell_type": "code",
498
- "execution_count": 11,
499
- "id": "3698be27",
500
- "metadata": {},
501
- "outputs": [
502
- {
503
- "name": "stdout",
504
- "output_type": "stream",
505
- "text": [
506
- "loading env vars from: /Users/inflaton/code/emtech/gpt/Llama-2-eval/.env\n",
507
- "App init started at 2023-10-10 12:04:33.775140\n",
508
- "Running on: macOS-14.0-arm64-arm-64bit\n",
509
- "MPS is available\n",
510
- "CUDA is NOT available\n",
511
- "hf_embeddings_device_type: mps\n",
512
- "hf_pipeline_device_type: mps\n",
513
- "initializing LLM: openai\n",
514
- " hf_pipeline_device_type: mps\n",
515
- " load_quantized_model: None\n",
516
- " torch_dtype: torch.float32\n",
517
- " n_threds: 24\n",
518
- " using model: gpt-3.5-turbo\n",
519
- "initialization complete\n",
520
- "App init completed in 0.167s\n"
521
- ]
522
- }
523
- ],
524
- "source": [
525
- "import json\n",
526
- "import sys\n",
527
- "import os\n",
528
- "\n",
529
- "os.environ[\"TEST_FIRST_5\"] = \"true\"\n",
530
- "os.environ[\"LANGCHAIN_DEBUG\"] = \"true\"\n",
531
- "\n",
532
- "from pathlib import Path\n",
533
- "\n",
534
- "sys.path.append(str(Path.cwd().parent))\n",
535
- "\n",
536
- "from evaluate_llm_ms_macro import (\n",
537
- " QAChainWithMsMacroDataset,\n",
538
- " llm_loader,\n",
539
- " calc_all_metrics,\n",
540
- ")"
541
- ]
542
- },
543
- {
544
- "cell_type": "code",
545
- "execution_count": 12,
546
- "id": "2395804d",
547
- "metadata": {},
548
- "outputs": [
549
- {
550
- "name": "stdout",
551
- "output_type": "stream",
552
- "text": [
553
- "{'question': 'albany mn population', 'chat_history': []}\n",
554
- "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
555
- "\u001b[0m{\n",
556
- " \"question\": \"albany mn population\",\n",
557
- " \"chat_history\": []\n",
558
- "}\n",
559
- "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
560
- "\u001b[0m[inputs]\n",
561
- "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
562
- "\u001b[0m{\n",
563
- " \"question\": \"albany mn population\",\n",
564
- " \"context\": \"City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\\n\\nPlace of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\\n\\nFor the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nAlbany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\\n\\nSponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nRecent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\\n\\nFor population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\\n\\nFor population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\"\n",
565
- "}\n",
566
- "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
567
- "\u001b[0m{\n",
568
- " \"prompts\": [\n",
569
- " \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nCity of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\\n\\nPlace of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\\n\\nFor the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nAlbany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\\n\\nSponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nRecent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\\n\\nFor population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\\n\\nFor population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\\nHuman: albany mn population\"\n",
570
- " ]\n",
571
- "}\n",
572
- "The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [3.23s] Exiting LLM run with output:\n",
573
- "\u001b[0m{\n",
574
- " \"generations\": [\n",
575
- " [\n",
576
- " {\n",
577
- " \"text\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\",\n",
578
- " \"generation_info\": {\n",
579
- " \"finish_reason\": \"stop\"\n",
580
- " },\n",
581
- " \"message\": {\n",
582
- " \"lc\": 1,\n",
583
- " \"type\": \"constructor\",\n",
584
- " \"id\": [\n",
585
- " \"langchain\",\n",
586
- " \"schema\",\n",
587
- " \"messages\",\n",
588
- " \"AIMessageChunk\"\n",
589
- " ],\n",
590
- " \"kwargs\": {\n",
591
- " \"example\": false,\n",
592
- " \"content\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\",\n",
593
- " \"additional_kwargs\": {}\n",
594
- " }\n",
595
- " }\n",
596
- " }\n",
597
- " ]\n",
598
- " ],\n",
599
- " \"llm_output\": null,\n",
600
- " \"run\": null\n",
601
- "}\n",
602
- "\n",
603
- "\n",
604
- "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [3.23s] Exiting Chain run with output:\n",
605
- "\u001b[0m{\n",
606
- " \"text\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\"\n",
607
- "}\n",
608
- "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [3.23s] Exiting Chain run with output:\n",
609
- "\u001b[0m{\n",
610
- " \"output_text\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\"\n",
611
- "}\n",
612
- "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [3.46s] Exiting Chain run with output:\n",
613
- "\u001b[0m[outputs]\n",
614
- "{'question': 'current weather in volcano, ca', 'chat_history': []}\n",
615
- "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
616
- "\u001b[0m{\n",
617
- " \"question\": \"current weather in volcano, ca\",\n",
618
- " \"chat_history\": []\n",
619
- "}\n",
620
- "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
621
- "\u001b[0m[inputs]\n",
622
- "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
623
- "\u001b[0m{\n",
624
- " \"question\": \"current weather in volcano, ca\",\n",
625
- " \"context\": \"Volcano 10 Day Weather. Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny.\\n\\nCurrent U.S. National Radar--Current. The Current National Weather Radar is shown below with a UTC Time (subtract 5 hours from UTC to get Eastern Time). National Weather Forecast--Current. The Current National Weather Forecast and National Weather Map are shown below.\\n\\nVolcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. There is 49 percentage chance of rain and 3 mph winds from the Southwest.\\n\\nVolcano, CA Weather Data. 1 Volcano, CA Current Weather Data. 2 Sponsored. 3 Volcano, CA Historical Weather Trends. Volcano, CA area 1 Highlights. Volcano, CA Chance of Sunshine. Volcano, CA Historical 1 Temperature. Volcano, CA Rainfall and Snowfall Average. Volcano, CA Energy Demand.\\n\\nVolcano Weather. Volcano weather and daily current conditions with summary and 5 Day forecast including humidity, precipitation, high and low temperatures presented in Fahrenheit and Celsius, barometric pressure, heat index, wind chill, hourly forecast, sunrise, sunset, wind speed with direction, and more.\\n\\nHourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. There is 83 percentage chance of rain and 2 mph winds from the East. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. There is 77 percentage chance of rain and 2 mph winds from the East.\\n\\nVolcano 7 Day Weather. 1 Monday:The Volcano forecast for Apr 03 is 58 degrees and Sunny. There is 34 percentage chance of rain and 5 mph winds from the West. 2 Tuesday:The Volcano forecast for Apr 04 is 59 degrees and Sunny. There is 33 percentage chance of rain and 5 mph winds from the West-Southwest.\\n\\nVolcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. 3 Tuesday:The Volcano forecast for Apr 11 is 49 degrees and Patchy rain possible. Wednesday:The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.\\n\\nVolcano, CA weather and traffic updates by locals. Write your own weather report, forecast, or traffic update: Please note by clicking on Post you acknowledge that you have read the Terms of Service and the report and/or forecast you are posting is in compliance with such terms. Be respectful.\\n\\nHourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. 3 6am:The Volcano, CA forecast for Apr 03 is 41 degrees and Clear. 9am:The Volcano, CA forecast for Apr 03 is 48 degrees and Sunny.\"\n",
626
- "}\n",
627
- "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
628
- "\u001b[0m{\n",
629
- " \"prompts\": [\n",
630
- " \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nVolcano 10 Day Weather. Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny.\\n\\nCurrent U.S. National Radar--Current. The Current National Weather Radar is shown below with a UTC Time (subtract 5 hours from UTC to get Eastern Time). National Weather Forecast--Current. The Current National Weather Forecast and National Weather Map are shown below.\\n\\nVolcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. There is 49 percentage chance of rain and 3 mph winds from the Southwest.\\n\\nVolcano, CA Weather Data. 1 Volcano, CA Current Weather Data. 2 Sponsored. 3 Volcano, CA Historical Weather Trends. Volcano, CA area 1 Highlights. Volcano, CA Chance of Sunshine. Volcano, CA Historical 1 Temperature. Volcano, CA Rainfall and Snowfall Average. Volcano, CA Energy Demand.\\n\\nVolcano Weather. Volcano weather and daily current conditions with summary and 5 Day forecast including humidity, precipitation, high and low temperatures presented in Fahrenheit and Celsius, barometric pressure, heat index, wind chill, hourly forecast, sunrise, sunset, wind speed with direction, and more.\\n\\nHourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. There is 83 percentage chance of rain and 2 mph winds from the East. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. There is 77 percentage chance of rain and 2 mph winds from the East.\\n\\nVolcano 7 Day Weather. 1 Monday:The Volcano forecast for Apr 03 is 58 degrees and Sunny. There is 34 percentage chance of rain and 5 mph winds from the West. 2 Tuesday:The Volcano forecast for Apr 04 is 59 degrees and Sunny. There is 33 percentage chance of rain and 5 mph winds from the West-Southwest.\\n\\nVolcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. 3 Tuesday:The Volcano forecast for Apr 11 is 49 degrees and Patchy rain possible. Wednesday:The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.\\n\\nVolcano, CA weather and traffic updates by locals. Write your own weather report, forecast, or traffic update: Please note by clicking on Post you acknowledge that you have read the Terms of Service and the report and/or forecast you are posting is in compliance with such terms. Be respectful.\\n\\nHourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. 3 6am:The Volcano, CA forecast for Apr 03 is 41 degrees and Clear. 9am:The Volcano, CA forecast for Apr 03 is 48 degrees and Sunny.\\nHuman: current weather in volcano, ca\"\n",
631
- " ]\n",
632
- "}\n",
633
- "I don't have the current weather information for Volcano, CA.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.04s] Exiting LLM run with output:\n",
634
- "\u001b[0m{\n",
635
- " \"generations\": [\n",
636
- " [\n",
637
- " {\n",
638
- " \"text\": \"I don't have the current weather information for Volcano, CA.\",\n",
639
- " \"generation_info\": {\n",
640
- " \"finish_reason\": \"stop\"\n",
641
- " },\n",
642
- " \"message\": {\n",
643
- " \"lc\": 1,\n",
644
- " \"type\": \"constructor\",\n",
645
- " \"id\": [\n",
646
- " \"langchain\",\n",
647
- " \"schema\",\n",
648
- " \"messages\",\n",
649
- " \"AIMessageChunk\"\n",
650
- " ],\n",
651
- " \"kwargs\": {\n",
652
- " \"example\": false,\n",
653
- " \"content\": \"I don't have the current weather information for Volcano, CA.\",\n",
654
- " \"additional_kwargs\": {}\n",
655
- " }\n",
656
- " }\n",
657
- " }\n",
658
- " ]\n",
659
- " ],\n",
660
- " \"llm_output\": null,\n",
661
- " \"run\": null\n",
662
- "}\n",
663
- "\n",
664
- "\n",
665
- "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [1.04s] Exiting Chain run with output:\n",
666
- "\u001b[0m{\n",
667
- " \"text\": \"I don't have the current weather information for Volcano, CA.\"\n",
668
- "}\n",
669
- "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [1.04s] Exiting Chain run with output:\n",
670
- "\u001b[0m{\n",
671
- " \"output_text\": \"I don't have the current weather information for Volcano, CA.\"\n",
672
- "}\n",
673
- "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [1.04s] Exiting Chain run with output:\n",
674
- "\u001b[0m[outputs]\n",
675
- "{'question': '____________________ is considered the father of modern medicine.', 'chat_history': []}\n",
676
- "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
677
- "\u001b[0m{\n",
678
- " \"question\": \"____________________ is considered the father of modern medicine.\",\n",
679
- " \"chat_history\": []\n",
680
- "}\n",
681
- "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
682
- "\u001b[0m[inputs]\n",
683
- "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
684
- "\u001b[0m{\n",
685
- " \"question\": \"____________________ is considered the father of modern medicine.\",\n",
686
- " \"context\": \"Hippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\\n\\nMany of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\\n\\nDespite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\\n\\nAt least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\\n\\n460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\\n\\nTRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \\n\\nThe two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\\n\\nHippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\\n\\nReload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\\n\\nHowever, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\"\n",
687
- "}\n",
688
- "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
689
- "\u001b[0m{\n",
690
- " \"prompts\": [\n",
691
- " \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nHippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\\n\\nMany of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\\n\\nDespite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\\n\\nAt least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\\n\\n460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\\n\\nTRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \\n\\nThe two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\\n\\nHippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\\n\\nReload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\\n\\nHowever, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\\nHuman: ____________________ is considered the father of modern medicine.\"\n",
692
- " ]\n",
693
- "}\n",
694
- "Hippocrates is considered the father of modern medicine.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [654ms] Exiting LLM run with output:\n",
695
- "\u001b[0m{\n",
696
- " \"generations\": [\n",
697
- " [\n",
698
- " {\n",
699
- " \"text\": \"Hippocrates is considered the father of modern medicine.\",\n",
700
- " \"generation_info\": {\n",
701
- " \"finish_reason\": \"stop\"\n",
702
- " },\n",
703
- " \"message\": {\n",
704
- " \"lc\": 1,\n",
705
- " \"type\": \"constructor\",\n",
706
- " \"id\": [\n",
707
- " \"langchain\",\n",
708
- " \"schema\",\n",
709
- " \"messages\",\n",
710
- " \"AIMessageChunk\"\n",
711
- " ],\n",
712
- " \"kwargs\": {\n",
713
- " \"example\": false,\n",
714
- " \"content\": \"Hippocrates is considered the father of modern medicine.\",\n",
715
- " \"additional_kwargs\": {}\n",
716
- " }\n",
717
- " }\n",
718
- " }\n",
719
- " ]\n",
720
- " ],\n",
721
- " \"llm_output\": null,\n",
722
- " \"run\": null\n",
723
- "}\n",
724
- "\n",
725
- "\n",
726
- "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [655ms] Exiting Chain run with output:\n",
727
- "\u001b[0m{\n",
728
- " \"text\": \"Hippocrates is considered the father of modern medicine.\"\n",
729
- "}\n",
730
- "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [655ms] Exiting Chain run with output:\n",
731
- "\u001b[0m{\n",
732
- " \"output_text\": \"Hippocrates is considered the father of modern medicine.\"\n",
733
- "}\n",
734
- "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [657ms] Exiting Chain run with output:\n",
735
- "\u001b[0m[outputs]\n",
736
- "{'question': 'how many days is an appraisal good for a fannie loan', 'chat_history': []}\n",
737
- "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
738
- "\u001b[0m{\n",
739
- " \"question\": \"how many days is an appraisal good for a fannie loan\",\n",
740
- " \"chat_history\": []\n",
741
- "}\n",
742
- "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
743
- "\u001b[0m[inputs]\n",
744
- "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
745
- "\u001b[0m{\n",
746
- " \"question\": \"how many days is an appraisal good for a fannie loan\",\n",
747
- " \"context\": \"New and Updated Underwriting and Eligibility Policies. Age of Credit Documents Selling Guide, B1-1-04, Allowable Age of Credit. Documents. The maximum age of credit documents is reduced from 120 days to 90 days for existing. construction and from 180 days to 120 days for new construction. Credit documents include. credit reports and employment, income, and asset documentation. The age of the documents is.\\n\\nIn no case may the appraisal be dated more than 1 year prior to the date of the Note. Property Inspection Reports/Condition and Marketability Reports (Fannie Mae Form 2070/Freddie Mac Form. 2075 may be dated no earlier than 120 days from the date of the Note. Continued on next page.\\n\\nFannie Mae will allow the use of an origination appraisal for a subsequent transaction if the following requirements are met: 1 The subsequent transaction may only be a Limited Cash-Out Refinance. 2 The appraisal report must not be more than 12 months old on the note date of the subsequent transaction.\\n\\nThe subsequent transaction may only be a Limited Cash-Out Refinance. The appraisal report must not be more than 12 months old on the note date of the subsequent transaction. If the appraisal report is greater than 4 months old on the date of the note and mortgage, then an appraisal update is required.\\n\\nIf they were sold with exposure to the market, listed in. MLS they should be considered. How long is the FHA case # good for (not the appraisal, but the actual case #)? the case number is valid for 6 months unless the appraiser expires prior to the 6 month time frame.\\n\\nNo the borrower can only pay for one appraisal. Your question about Comps is not acceptable, Comps over 1 year old for comps 1-3 are not. acceptable, but supporting comps are with an adequate explanation from the Appraiser. Comps, over one year old would be acceptable, onlywith a waiver request by the lender.\\n\\nThe appraisal may be dated no earlier than 120 days from the date of the Note, regardless of whether the. property was appraised as proposed or existing construction. When the appraisal will be more than 120 days old but less than 1 year old on the date of the Note, the.\\n\\nThis inspection and results of the analysis must be reported on the Appraisal Update and/or Completion Report (Form 1004D). 1 If the appraiser indicates on the Form 1004D that the property value has declined, then the lender must obtain a new appraisal for the property.\\n\\nUnfortunately, that is a complete new order (and expense), as one year is a lifetime where property values are concerned. Thanks for the information, however I asked for the PMi to be removed just over 3 months after the appraisal. In fact, the manager at Nationstar said it was 91 days and not valid per Fannie Mae.\\n\\nReputation: 6463. Actually, Fannie Mae and FHA went to 120 days, but at no time were they ever over 6 months for existing construction. Generally, comparables from August 2009 could not be included on an appraisal report today, so there is no way an appraisal issued then would be acceptable.\"\n",
748
- "}\n",
749
- "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
750
- "\u001b[0m{\n",
751
- " \"prompts\": [\n",
752
- " \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nNew and Updated Underwriting and Eligibility Policies. Age of Credit Documents Selling Guide, B1-1-04, Allowable Age of Credit. Documents. The maximum age of credit documents is reduced from 120 days to 90 days for existing. construction and from 180 days to 120 days for new construction. Credit documents include. credit reports and employment, income, and asset documentation. The age of the documents is.\\n\\nIn no case may the appraisal be dated more than 1 year prior to the date of the Note. Property Inspection Reports/Condition and Marketability Reports (Fannie Mae Form 2070/Freddie Mac Form. 2075 may be dated no earlier than 120 days from the date of the Note. Continued on next page.\\n\\nFannie Mae will allow the use of an origination appraisal for a subsequent transaction if the following requirements are met: 1 The subsequent transaction may only be a Limited Cash-Out Refinance. 2 The appraisal report must not be more than 12 months old on the note date of the subsequent transaction.\\n\\nThe subsequent transaction may only be a Limited Cash-Out Refinance. The appraisal report must not be more than 12 months old on the note date of the subsequent transaction. If the appraisal report is greater than 4 months old on the date of the note and mortgage, then an appraisal update is required.\\n\\nIf they were sold with exposure to the market, listed in. MLS they should be considered. How long is the FHA case # good for (not the appraisal, but the actual case #)? the case number is valid for 6 months unless the appraiser expires prior to the 6 month time frame.\\n\\nNo the borrower can only pay for one appraisal. Your question about Comps is not acceptable, Comps over 1 year old for comps 1-3 are not. acceptable, but supporting comps are with an adequate explanation from the Appraiser. Comps, over one year old would be acceptable, onlywith a waiver request by the lender.\\n\\nThe appraisal may be dated no earlier than 120 days from the date of the Note, regardless of whether the. property was appraised as proposed or existing construction. When the appraisal will be more than 120 days old but less than 1 year old on the date of the Note, the.\\n\\nThis inspection and results of the analysis must be reported on the Appraisal Update and/or Completion Report (Form 1004D). 1 If the appraiser indicates on the Form 1004D that the property value has declined, then the lender must obtain a new appraisal for the property.\\n\\nUnfortunately, that is a complete new order (and expense), as one year is a lifetime where property values are concerned. Thanks for the information, however I asked for the PMi to be removed just over 3 months after the appraisal. In fact, the manager at Nationstar said it was 91 days and not valid per Fannie Mae.\\n\\nReputation: 6463. Actually, Fannie Mae and FHA went to 120 days, but at no time were they ever over 6 months for existing construction. Generally, comparables from August 2009 could not be included on an appraisal report today, so there is no way an appraisal issued then would be acceptable.\\nHuman: how many days is an appraisal good for a fannie loan\"\n",
753
- " ]\n",
754
- "}\n",
755
- "According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.02s] Exiting LLM run with output:\n",
756
- "\u001b[0m{\n",
757
- " \"generations\": [\n",
758
- " [\n",
759
- " {\n",
760
- " \"text\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\",\n",
761
- " \"generation_info\": {\n",
762
- " \"finish_reason\": \"stop\"\n",
763
- " },\n",
764
- " \"message\": {\n",
765
- " \"lc\": 1,\n",
766
- " \"type\": \"constructor\",\n",
767
- " \"id\": [\n",
768
- " \"langchain\",\n",
769
- " \"schema\",\n",
770
- " \"messages\",\n",
771
- " \"AIMessageChunk\"\n",
772
- " ],\n",
773
- " \"kwargs\": {\n",
774
- " \"example\": false,\n",
775
- " \"content\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\",\n",
776
- " \"additional_kwargs\": {}\n",
777
- " }\n",
778
- " }\n",
779
- " }\n",
780
- " ]\n",
781
- " ],\n",
782
- " \"llm_output\": null,\n",
783
- " \"run\": null\n",
784
- "}\n",
785
- "\n",
786
- "\n",
787
- "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [1.02s] Exiting Chain run with output:\n",
788
- "\u001b[0m{\n",
789
- " \"text\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\"\n",
790
- "}\n",
791
- "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [1.02s] Exiting Chain run with output:\n",
792
- "\u001b[0m{\n",
793
- " \"output_text\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\"\n",
794
- "}\n",
795
- "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [1.02s] Exiting Chain run with output:\n",
796
- "\u001b[0m[outputs]\n",
797
- "{'question': 'average pharmacy tech salary', 'chat_history': []}\n",
798
- "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
799
- "\u001b[0m{\n",
800
- " \"question\": \"average pharmacy tech salary\",\n",
801
- " \"chat_history\": []\n",
802
- "}\n",
803
- "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
804
- "\u001b[0m[inputs]\n",
805
- "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
806
- "\u001b[0m{\n",
807
- " \"question\": \"average pharmacy tech salary\",\n",
808
- " \"context\": \"If you are interested in becoming a pharmacy technician, you’re choosing a career that is in high demand. According to the U.S. Bureau of Labor Statistics (BLS), the career growth is expected to be “much faster than average”, with an employment increase of 32% predicted in the decade spanning 2010 to 2020*.\\n\\nWhat can a pharmacy technician really expect to earn in today’s economy? According to Salary.com, pharmacy technicians make anywhere from $26,000 to $39,000 a year, though most make around $32,000 annually. California has the highest average pharmacy technician wage, at $34,317, according to Open Farm Tech’s website.\\n\\nThe median annual wage for pharmacy technicians was $30,410 in May 2015. Employment of pharmacy technicians is projected to grow 9 percent from 2014 to 2024, faster than the average for all occupations. Increased demand for prescription medications will lead to more demand for pharmaceutical services.\\n\\nThe majority of pharmacy techs work in drug stores and hospitals, where the average annual salary was $28,940 and $34,410, respectively**. However, a higher salary can be had if you can find employment with outpatient care centers or physicians’ offices, where the annual pay is in the $37,000-$39,000 range.\\n\\nThe pharmacy technician salary** depends on a number of factors, from the area and type of employer, to your educational background. Browse pharmacy tech pay for a comparison between similar careers, geographic location, educational and certification requirements, and more.\\n\\nPharmacy Technician Salary. A Pharmacy Technician earns an average wage of $12.68 per hour. The skills that increase pay for this job the most are Mail Order Pharmacy and Long Term Care. People in this job generally don't have more than 20 years' experience. $18,722 - $48,714.\\n\\nPopular Companies. * Please note that all salary figures are approximations based upon third party submissions to Simply Hired. These figures are given to Simply Hired users for the purpose of generalized comparison only. Minimum wage may differ by jurisdiction and you should consult the employer for actual salary figures.\\n\\nPharmacy Technician average salary is $30,288, median salary is $30,534 with a salary range from $21,570 to $34,320. Pharmacy Technician salaries are collected from government agencies and companies. Each salary is associated with a real job position. Pharmacy Technician salary statistics is not exclusive and is for reference only.\\n\\nIt also states that pharmacy technicians working in an acute care hospital earn an average salary of $37,000 per year, while those working for the military or a pharmaceutical company earn an average salary of $38,000 per year. This represents a difference of more than $10,000, simply due to the health care setting.\\n\\nOccupational Employment and Wages, May 2016. 29-2052 Pharmacy Technicians. Prepare medications under the direction of a pharmacist. May measure, mix, count out, label, and record amounts and dosages of medications according to prescription orders. National estimates for this occupation. Industry profile for this occupation.\"\n",
809
- "}\n",
810
- "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
811
- "\u001b[0m{\n",
812
- " \"prompts\": [\n",
813
- " \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nIf you are interested in becoming a pharmacy technician, you’re choosing a career that is in high demand. According to the U.S. Bureau of Labor Statistics (BLS), the career growth is expected to be “much faster than average”, with an employment increase of 32% predicted in the decade spanning 2010 to 2020*.\\n\\nWhat can a pharmacy technician really expect to earn in today’s economy? According to Salary.com, pharmacy technicians make anywhere from $26,000 to $39,000 a year, though most make around $32,000 annually. California has the highest average pharmacy technician wage, at $34,317, according to Open Farm Tech’s website.\\n\\nThe median annual wage for pharmacy technicians was $30,410 in May 2015. Employment of pharmacy technicians is projected to grow 9 percent from 2014 to 2024, faster than the average for all occupations. Increased demand for prescription medications will lead to more demand for pharmaceutical services.\\n\\nThe majority of pharmacy techs work in drug stores and hospitals, where the average annual salary was $28,940 and $34,410, respectively**. However, a higher salary can be had if you can find employment with outpatient care centers or physicians’ offices, where the annual pay is in the $37,000-$39,000 range.\\n\\nThe pharmacy technician salary** depends on a number of factors, from the area and type of employer, to your educational background. Browse pharmacy tech pay for a comparison between similar careers, geographic location, educational and certification requirements, and more.\\n\\nPharmacy Technician Salary. A Pharmacy Technician earns an average wage of $12.68 per hour. The skills that increase pay for this job the most are Mail Order Pharmacy and Long Term Care. People in this job generally don't have more than 20 years' experience. $18,722 - $48,714.\\n\\nPopular Companies. * Please note that all salary figures are approximations based upon third party submissions to Simply Hired. These figures are given to Simply Hired users for the purpose of generalized comparison only. Minimum wage may differ by jurisdiction and you should consult the employer for actual salary figures.\\n\\nPharmacy Technician average salary is $30,288, median salary is $30,534 with a salary range from $21,570 to $34,320. Pharmacy Technician salaries are collected from government agencies and companies. Each salary is associated with a real job position. Pharmacy Technician salary statistics is not exclusive and is for reference only.\\n\\nIt also states that pharmacy technicians working in an acute care hospital earn an average salary of $37,000 per year, while those working for the military or a pharmaceutical company earn an average salary of $38,000 per year. This represents a difference of more than $10,000, simply due to the health care setting.\\n\\nOccupational Employment and Wages, May 2016. 29-2052 Pharmacy Technicians. Prepare medications under the direction of a pharmacist. May measure, mix, count out, label, and record amounts and dosages of medications according to prescription orders. National estimates for this occupation. Industry profile for this occupation.\\nHuman: average pharmacy tech salary\"\n",
814
- " ]\n",
815
- "}\n",
816
- "The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.45s] Exiting LLM run with output:\n",
817
- "\u001b[0m{\n",
818
- " \"generations\": [\n",
819
- " [\n",
820
- " {\n",
821
- " \"text\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\",\n",
822
- " \"generation_info\": {\n",
823
- " \"finish_reason\": \"stop\"\n",
824
- " },\n",
825
- " \"message\": {\n",
826
- " \"lc\": 1,\n",
827
- " \"type\": \"constructor\",\n",
828
- " \"id\": [\n",
829
- " \"langchain\",\n",
830
- " \"schema\",\n",
831
- " \"messages\",\n",
832
- " \"AIMessageChunk\"\n",
833
- " ],\n",
834
- " \"kwargs\": {\n",
835
- " \"example\": false,\n",
836
- " \"content\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\",\n",
837
- " \"additional_kwargs\": {}\n",
838
- " }\n",
839
- " }\n",
840
- " }\n",
841
- " ]\n",
842
- " ],\n",
843
- " \"llm_output\": null,\n",
844
- " \"run\": null\n",
845
- "}\n",
846
- "\n",
847
- "\n",
848
- "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [1.45s] Exiting Chain run with output:\n",
849
- "\u001b[0m{\n",
850
- " \"text\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\"\n",
851
- "}\n",
852
- "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [1.46s] Exiting Chain run with output:\n",
853
- "\u001b[0m{\n",
854
- " \"output_text\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\"\n",
855
- "}\n",
856
- "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [1.46s] Exiting Chain run with output:\n",
857
- "\u001b[0m[outputs]\n",
858
- "Q-001: albany mn population\n",
859
- "A-001: The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\n",
860
- "G-001: The population of Albany, Minnesota is 2,662. \n",
861
- "\n",
862
- "Q-002: current weather in volcano, ca\n",
863
- "A-002: I don't have the current weather information for Volcano, CA.\n",
864
- "G-002: The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.\n",
865
- "\n",
866
- "Q-003: ____________________ is considered the father of modern medicine.\n",
867
- "A-003: Hippocrates is considered the father of modern medicine.\n",
868
- "G-003: Hippocrates is considered the father of modern medicine.\n",
869
- "\n",
870
- "Q-004: how many days is an appraisal good for a fannie loan\n",
871
- "A-004: According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\n",
872
- "G-004: An appraisal is good for 120 days from the date of the Note for a Fannie loan.\n",
873
- "\n",
874
- "Q-005: average pharmacy tech salary\n",
875
- "A-005: The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\n",
876
- "G-005: The average salary for a pharmacy technician is $26,000 to $39,000 in a year.\n",
877
- "\n",
878
- "\n",
879
- "\n",
880
- "scores: {\n",
881
- " \"OVERALL\": {\n",
882
- " \"bleu_scores\": {\n",
883
- " \"bleu\": 0.3953488372093023,\n",
884
- " \"precisions\": [\n",
885
- " 0.3953488372093023\n",
886
- " ],\n",
887
- " \"brevity_penalty\": 1.0,\n",
888
- " \"length_ratio\": 1.9253731343283582,\n",
889
- " \"translation_length\": 129,\n",
890
- " \"reference_length\": 67\n",
891
- " },\n",
892
- " \"rouge_scores\": {\n",
893
- " \"rouge1\": 0.5737456342107505,\n",
894
- " \"rouge2\": 0.4160794941282746,\n",
895
- " \"rougeL\": 0.5108953062441435,\n",
896
- " \"rougeLsum\": 0.4989862850327967\n",
897
- " }\n",
898
- " },\n",
899
- " \"NUMERIC\": {\n",
900
- " \"bleu_scores\": {\n",
901
- " \"bleu\": 0.36111111111111116,\n",
902
- " \"precisions\": [\n",
903
- " 0.3611111111111111\n",
904
- " ],\n",
905
- " \"brevity_penalty\": 1.0,\n",
906
- " \"length_ratio\": 2.4545454545454546,\n",
907
- " \"translation_length\": 108,\n",
908
- " \"reference_length\": 44\n",
909
- " },\n",
910
- " \"rouge_scores\": {\n",
911
- " \"rouge1\": 0.5395760570179174,\n",
912
- " \"rouge2\": 0.3694751662231337,\n",
913
- " \"rougeL\": 0.4656557912371866,\n",
914
- " \"rougeLsum\": 0.4656557912371866\n",
915
- " }\n",
916
- " },\n",
917
- " \"DESCRIPTION\": {\n",
918
- " \"bleu_scores\": {\n",
919
- " \"bleu\": 0.5195179673581217,\n",
920
- " \"precisions\": [\n",
921
- " 0.5714285714285714\n",
922
- " ],\n",
923
- " \"brevity_penalty\": 0.909156442876713,\n",
924
- " \"length_ratio\": 0.9130434782608695,\n",
925
- " \"translation_length\": 21,\n",
926
- " \"reference_length\": 23\n",
927
- " },\n",
928
- " \"rouge_scores\": {\n",
929
- " \"rouge1\": 0.625,\n",
930
- " \"rouge2\": 0.5,\n",
931
- " \"rougeL\": 0.5833333333333334,\n",
932
- " \"rougeLsum\": 0.5833333333333334\n",
933
- " }\n",
934
- " }\n",
935
- "}\n",
936
- "\n",
937
- "CPU times: user 512 ms, sys: 63.7 ms, total: 576 ms\n",
938
- "Wall time: 7.85 s\n"
939
- ]
940
- }
941
- ],
942
- "source": [
943
- "%%time\n",
944
- "\n",
945
- "eval_ds = new_ds.select(range(5))\n",
946
- "qa_chain = QAChainWithMsMacroDataset(eval_ds, llm_loader)\n",
947
- "\n",
948
- "answers = []\n",
949
- "for i in range(eval_ds.num_rows):\n",
950
- " inputs = {\"question\": str(eval_ds[i][\"query\"]), \"chat_history\": []}\n",
951
- " result = qa_chain.call_chain(\n",
952
- " inputs,\n",
953
- " None,\n",
954
- " None,\n",
955
- " True,\n",
956
- " )\n",
957
- " answers.append(result[\"answer\"])\n",
958
- "\n",
959
- "result = calc_all_metrics(eval_ds, answers)\n",
960
- "\n",
961
- "for i in range(eval_ds.num_rows):\n",
962
- " n = i + 1\n",
963
- " print(f\"Q-{n:03d}: {eval_ds[i]['query']}\")\n",
964
- " print(f\"A-{n:03d}: {answers[i]}\")\n",
965
- " print(f\"G-{n:03d}: {eval_ds[i]['wellFormedAnswers'][0]}\\n\")\n",
966
- "\n",
967
- "print(f\"\\n\\nscores: {json.dumps(result, indent=2)}\\n\")"
968
- ]
969
- },
970
- {
971
- "cell_type": "code",
972
- "execution_count": 13,
973
- "id": "bae05024",
974
- "metadata": {},
975
- "outputs": [
976
- {
977
- "name": "stdout",
978
- "output_type": "stream",
979
- "text": [
980
- "System: Use the following pieces of context to answer the users question. \n",
981
- "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
982
- "----------------\n",
983
- "City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\n",
984
- "\n",
985
- "Place of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\n",
986
- "\n",
987
- "For the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\n",
988
- "\n",
989
- "Albany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\n",
990
- "\n",
991
- "Sponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\n",
992
- "\n",
993
- "Recent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\n",
994
- "\n",
995
- "For population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\n",
996
- "\n",
997
- "For population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\n",
998
- "Human: albany mn population\n"
999
- ]
1000
- }
1001
- ],
1002
- "source": [
1003
- "print(\n",
1004
- " \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nCity of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\\n\\nPlace of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\\n\\nFor the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nAlbany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\\n\\nSponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nRecent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\\n\\nFor population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\\n\\nFor population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\\nHuman: albany mn population\"\n",
1005
- ")"
1006
- ]
1007
- },
1008
- {
1009
- "cell_type": "code",
1010
- "execution_count": 4,
1011
- "id": "593f574a",
1012
- "metadata": {},
1013
- "outputs": [
1014
- {
1015
- "name": "stdout",
1016
- "output_type": "stream",
1017
- "text": [
1018
- "System: Use the following pieces of context to answer the users question. \n",
1019
- "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
1020
- "----------------\n",
1021
- "Hippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\n",
1022
- "\n",
1023
- "Many of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\n",
1024
- "\n",
1025
- "Despite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\n",
1026
- "\n",
1027
- "At least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\n",
1028
- "\n",
1029
- "460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\n",
1030
- "\n",
1031
- "TRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \n",
1032
- "\n",
1033
- "The two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\n",
1034
- "\n",
1035
- "Hippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\n",
1036
- "\n",
1037
- "Reload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\n",
1038
- "\n",
1039
- "However, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\n",
1040
- "Human: ____________________ is considered the father of modern medicine.\n"
1041
- ]
1042
- }
1043
- ],
1044
- "source": [
1045
- "print(\n",
1046
- " \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nHippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\\n\\nMany of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\\n\\nDespite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\\n\\nAt least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\\n\\n460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\\n\\nTRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \\n\\nThe two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\\n\\nHippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\\n\\nReload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\\n\\nHowever, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\\nHuman: ____________________ is considered the father of modern medicine.\"\n",
1047
- ")"
1048
- ]
1049
- },
1050
- {
1051
- "cell_type": "markdown",
1052
- "id": "5b9204e0",
1053
- "metadata": {},
1054
- "source": [
1055
- "```\n",
1056
- "Q-003: ____________________ is considered the father of modern medicine.\n",
1057
- "A-003: Hippocrates is considered the father of modern medicine.\n",
1058
- "G-003: Hippocrates is considered the father of modern medicine.\n",
1059
- "```"
1060
- ]
1061
- },
1062
- {
1063
- "cell_type": "code",
1064
- "execution_count": 11,
1065
- "id": "5cfc8320",
1066
- "metadata": {},
1067
- "outputs": [
1068
- {
1069
- "data": {
1070
- "text/plain": [
1071
- "{'answers': ['The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.'],\n",
1072
- " 'passages': {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0],\n",
1073
- " 'passage_text': ['Volcano 10 Day Weather. Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny.',\n",
1074
- " 'Current U.S. National Radar--Current. The Current National Weather Radar is shown below with a UTC Time (subtract 5 hours from UTC to get Eastern Time). National Weather Forecast--Current. The Current National Weather Forecast and National Weather Map are shown below.',\n",
1075
- " 'Volcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. There is 49 percentage chance of rain and 3 mph winds from the Southwest.',\n",
1076
- " 'Volcano, CA Weather Data. 1 Volcano, CA Current Weather Data. 2 Sponsored. 3 Volcano, CA Historical Weather Trends. Volcano, CA area 1 Highlights. Volcano, CA Chance of Sunshine. Volcano, CA Historical 1 Temperature. Volcano, CA Rainfall and Snowfall Average. Volcano, CA Energy Demand.',\n",
1077
- " 'Volcano Weather. Volcano weather and daily current conditions with summary and 5 Day forecast including humidity, precipitation, high and low temperatures presented in Fahrenheit and Celsius, barometric pressure, heat index, wind chill, hourly forecast, sunrise, sunset, wind speed with direction, and more.',\n",
1078
- " 'Hourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. There is 83 percentage chance of rain and 2 mph winds from the East. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. There is 77 percentage chance of rain and 2 mph winds from the East.',\n",
1079
- " 'Volcano 7 Day Weather. 1 Monday:The Volcano forecast for Apr 03 is 58 degrees and Sunny. There is 34 percentage chance of rain and 5 mph winds from the West. 2 Tuesday:The Volcano forecast for Apr 04 is 59 degrees and Sunny. There is 33 percentage chance of rain and 5 mph winds from the West-Southwest.',\n",
1080
- " 'Volcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. 3 Tuesday:The Volcano forecast for Apr 11 is 49 degrees and Patchy rain possible. Wednesday:The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.',\n",
1081
- " 'Volcano, CA weather and traffic updates by locals. Write your own weather report, forecast, or traffic update: Please note by clicking on Post you acknowledge that you have read the Terms of Service and the report and/or forecast you are posting is in compliance with such terms. Be respectful.',\n",
1082
- " 'Hourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. 3 6am:The Volcano, CA forecast for Apr 03 is 41 degrees and Clear. 9am:The Volcano, CA forecast for Apr 03 is 48 degrees and Sunny.'],\n",
1083
- " 'url': ['http://www.weatherman.com/us/ca/zip-codes/95689-10-day-weather',\n",
1084
- " 'http://www.fastweather.com/index.php?city=Volcano_CA&g',\n",
1085
- " 'http://www.weatherman.com/us/ca/zip-codes/95689-10-day-weather',\n",
1086
- " 'http://www.homefacts.com/weather/California/Amador-County/Volcano.html',\n",
1087
- " 'http://www.localconditions.com/weather-volcano-california/95689/',\n",
1088
- " 'http://www.weatherman.com/us/ca/volcano',\n",
1089
- " 'http://www.weatherman.com/us/ca/volcano',\n",
1090
- " 'http://www.weatherman.com/us/ca/zip-codes/95689-10-day-weather',\n",
1091
- " 'http://www.localconditions.com/weather-volcano-california/95689/',\n",
1092
- " 'http://www.weatherman.com/us/ca/volcano']},\n",
1093
- " 'query': 'current weather in volcano, ca',\n",
1094
- " 'query_id': 114414,\n",
1095
- " 'query_type': 'DESCRIPTION',\n",
1096
- " 'wellFormedAnswers': ['The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.']}"
1097
- ]
1098
- },
1099
- "execution_count": 11,
1100
- "metadata": {},
1101
- "output_type": "execute_result"
1102
- }
1103
- ],
1104
- "source": [
1105
- "test_ds = new_ds.select([1])\n",
1106
- "test_ds[0]"
1107
- ]
1108
- },
1109
- {
1110
- "cell_type": "code",
1111
- "execution_count": 12,
1112
- "id": "56b91cae",
1113
- "metadata": {},
1114
- "outputs": [
1115
- {
1116
- "data": {
1117
- "text/plain": [
1118
- "{'bleu_scores': {'bleu': 1.0,\n",
1119
- " 'precisions': [1.0, 1.0, 1.0, 1.0],\n",
1120
- " 'brevity_penalty': 1.0,\n",
1121
- " 'length_ratio': 1.0,\n",
1122
- " 'translation_length': 14,\n",
1123
- " 'reference_length': 14},\n",
1124
- " 'rouge_scores': {'rouge1': 1.0,\n",
1125
- " 'rouge2': 1.0,\n",
1126
- " 'rougeL': 1.0,\n",
1127
- " 'rougeLsum': 1.0}}"
1128
- ]
1129
- },
1130
- "execution_count": 12,
1131
- "metadata": {},
1132
- "output_type": "execute_result"
1133
- }
1134
- ],
1135
- "source": [
1136
- "calc_metrics(test_ds)"
1137
- ]
1138
- },
1139
- {
1140
- "cell_type": "code",
1141
- "execution_count": 18,
1142
- "id": "56c6bf24",
1143
- "metadata": {},
1144
- "outputs": [
1145
- {
1146
- "data": {
1147
- "text/plain": [
1148
- "['The',\n",
1149
- " 'Volcano',\n",
1150
- " 'forecast',\n",
1151
- " 'for',\n",
1152
- " 'Apr',\n",
1153
- " '12',\n",
1154
- " 'is',\n",
1155
- " '52',\n",
1156
- " 'degrees',\n",
1157
- " 'and',\n",
1158
- " 'Patchy',\n",
1159
- " 'light',\n",
1160
- " 'rain.']"
1161
- ]
1162
- },
1163
- "execution_count": 18,
1164
- "metadata": {},
1165
- "output_type": "execute_result"
1166
- }
1167
- ],
1168
- "source": [
1169
- "test_ds[0][\"answers\"][0].split()"
1170
- ]
1171
- },
1172
- {
1173
- "cell_type": "code",
1174
- "execution_count": 19,
1175
- "id": "77d08267",
1176
- "metadata": {},
1177
- "outputs": [
1178
- {
1179
- "data": {
1180
- "text/plain": [
1181
- "13"
1182
- ]
1183
- },
1184
- "execution_count": 19,
1185
- "metadata": {},
1186
- "output_type": "execute_result"
1187
- }
1188
- ],
1189
- "source": [
1190
- "len(test_ds[0][\"answers\"][0].split())"
1191
- ]
1192
- },
1193
- {
1194
- "cell_type": "code",
1195
- "execution_count": 22,
1196
- "id": "8c19694b",
1197
- "metadata": {},
1198
- "outputs": [
1199
- {
1200
- "data": {
1201
- "text/plain": [
1202
- "{'answers': ['From $26,000 to $39,000 a year'],\n",
1203
- " 'passages': {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],\n",
1204
- " 'passage_text': ['If you are interested in becoming a pharmacy technician, you’re choosing a career that is in high demand. According to the U.S. Bureau of Labor Statistics (BLS), the career growth is expected to be “much faster than average”, with an employment increase of 32% predicted in the decade spanning 2010 to 2020*.',\n",
1205
- " 'What can a pharmacy technician really expect to earn in today’s economy? According to Salary.com, pharmacy technicians make anywhere from $26,000 to $39,000 a year, though most make around $32,000 annually. California has the highest average pharmacy technician wage, at $34,317, according to Open Farm Tech’s website.',\n",
1206
- " 'The median annual wage for pharmacy technicians was $30,410 in May 2015. Employment of pharmacy technicians is projected to grow 9 percent from 2014 to 2024, faster than the average for all occupations. Increased demand for prescription medications will lead to more demand for pharmaceutical services.',\n",
1207
- " 'The majority of pharmacy techs work in drug stores and hospitals, where the average annual salary was $28,940 and $34,410, respectively**. However, a higher salary can be had if you can find employment with outpatient care centers or physicians’ offices, where the annual pay is in the $37,000-$39,000 range.',\n",
1208
- " 'The pharmacy technician salary** depends on a number of factors, from the area and type of employer, to your educational background. Browse pharmacy tech pay for a comparison between similar careers, geographic location, educational and certification requirements, and more.',\n",
1209
- " \"Pharmacy Technician Salary. A Pharmacy Technician earns an average wage of $12.68 per hour. The skills that increase pay for this job the most are Mail Order Pharmacy and Long Term Care. People in this job generally don't have more than 20 years' experience. $18,722 - $48,714.\",\n",
1210
- " 'Popular Companies. * Please note that all salary figures are approximations based upon third party submissions to Simply Hired. These figures are given to Simply Hired users for the purpose of generalized comparison only. Minimum wage may differ by jurisdiction and you should consult the employer for actual salary figures.',\n",
1211
- " 'Pharmacy Technician average salary is $30,288, median salary is $30,534 with a salary range from $21,570 to $34,320. Pharmacy Technician salaries are collected from government agencies and companies. Each salary is associated with a real job position. Pharmacy Technician salary statistics is not exclusive and is for reference only.',\n",
1212
- " 'It also states that pharmacy technicians working in an acute care hospital earn an average salary of $37,000 per year, while those working for the military or a pharmaceutical company earn an average salary of $38,000 per year. This represents a difference of more than $10,000, simply due to the health care setting.',\n",
1213
- " 'Occupational Employment and Wages, May 2016. 29-2052 Pharmacy Technicians. Prepare medications under the direction of a pharmacist. May measure, mix, count out, label, and record amounts and dosages of medications according to prescription orders. National estimates for this occupation. Industry profile for this occupation.'],\n",
1214
- " 'url': ['http://www.pharmacytechschools.com/salary/',\n",
1215
- " 'http://www.pharmacytimes.com/contributor/alex-barker-pharmd/2015/06/guide-to-pharmacy-technician-salaries',\n",
1216
- " 'https://www.bls.gov/ooh/healthcare/pharmacy-technicians.htm',\n",
1217
- " 'http://www.pharmacytechschools.com/salary/',\n",
1218
- " 'http://www.pharmacytechschools.com/salary/',\n",
1219
- " 'http://www.payscale.com/research/US/Job=Pharmacy_Technician/Hourly_Rate',\n",
1220
- " 'http://www.simplyhired.com/salaries-k-certified-pharmacy-technician-jobs.html',\n",
1221
- " 'https://www.salarylist.com/jobs/Pharmacy-Technician-Salary.htm',\n",
1222
- " 'http://www.pharmacytimes.com/contributor/alex-barker-pharmd/2015/06/guide-to-pharmacy-technician-salaries',\n",
1223
- " 'https://www.bls.gov/oes/current/oes292052.htm']},\n",
1224
- " 'query': 'average pharmacy tech salary',\n",
1225
- " 'query_id': 40287,\n",
1226
- " 'query_type': 'NUMERIC',\n",
1227
- " 'wellFormedAnswers': ['The average salary for a pharmacy technician is $26,000 to $39,000 in a year.',\n",
1228
- " 'The average salary for a pharmacy technician is from $26,000 to $39,000 a year.']}"
1229
- ]
1230
- },
1231
- "execution_count": 22,
1232
- "metadata": {},
1233
- "output_type": "execute_result"
1234
- }
1235
- ],
1236
- "source": [
1237
- "test_ds = new_ds.select([4])\n",
1238
- "test_ds[0]"
1239
- ]
1240
- },
1241
- {
1242
- "cell_type": "code",
1243
- "execution_count": 23,
1244
- "id": "34209164",
1245
- "metadata": {},
1246
- "outputs": [
1247
- {
1248
- "data": {
1249
- "text/plain": [
1250
- "{'bleu_scores': {'bleu': 0.19303951204286907,\n",
1251
- " 'precisions': [0.875, 0.7142857142857143, 0.5, 0.4],\n",
1252
- " 'brevity_penalty': 0.32465246735834974,\n",
1253
- " 'length_ratio': 0.47058823529411764,\n",
1254
- " 'translation_length': 8,\n",
1255
- " 'reference_length': 17},\n",
1256
- " 'rouge_scores': {'rouge1': 0.5833333333333334,\n",
1257
- " 'rouge2': 0.4545454545454545,\n",
1258
- " 'rougeL': 0.5833333333333334,\n",
1259
- " 'rougeLsum': 0.5833333333333334}}"
1260
- ]
1261
- },
1262
- "execution_count": 23,
1263
- "metadata": {},
1264
- "output_type": "execute_result"
1265
- }
1266
- ],
1267
- "source": [
1268
- "calc_metrics(test_ds)"
1269
- ]
1270
- }
1271
- ],
1272
- "metadata": {
1273
- "kernelspec": {
1274
- "display_name": "Python 3 (ipykernel)",
1275
- "language": "python",
1276
- "name": "python3"
1277
- },
1278
- "language_info": {
1279
- "codemirror_mode": {
1280
- "name": "ipython",
1281
- "version": 3
1282
- },
1283
- "file_extension": ".py",
1284
- "mimetype": "text/x-python",
1285
- "name": "python",
1286
- "nbconvert_exporter": "python",
1287
- "pygments_lexer": "ipython3",
1288
- "version": "3.10.9"
1289
- }
1290
- },
1291
- "nbformat": 4,
1292
- "nbformat_minor": 5
1293
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Makefile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: start
2
+ start:
3
+ python app.py
4
+
5
+ test:
6
+ python eval_modules/qa_chain_test.py
7
+
8
+ tune:
9
+ ./tune_rp.sh
10
+
11
+ chat:
12
+ python eval_modules/qa_chain_test.py chat
13
+
14
+ .PHONY: format
15
+ format:
16
+ black .
17
+
18
+ install:
19
+ pip install -r requirements.txt
20
+
21
+ install-torch:
22
+ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
23
+
24
+ install-torch-cuda:
25
+ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
README.md CHANGED
@@ -1,53 +1,168 @@
1
  ---
2
- title: Chat with LLMs
3
- emoji: 🤖💬
4
- colorFrom: purple
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 4.26.0
8
  app_file: app.py
9
- pinned: true
10
- short_description: 'Chat with LLMs'
11
  ---
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  ## Running Locally
14
 
15
  1. Check pre-conditions:
16
 
17
  - [Git Large File Storage (LFS)](https://git-lfs.com/) must have been installed.
18
  - Run `python --version` to make sure you're running Python version 3.10 or above.
19
- - The latest PyTorch must have been installed. Here is a sample `conda` command for Linix/WSL2:
20
 
21
  ```
22
- conda install -y pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
23
  ```
24
 
25
  2. Clone the repo
26
 
27
  ```
28
- git lfs install
29
- git clone https://huggingface.co/spaces/inflaton-ai/llm-qa-bench
 
 
 
 
 
 
 
 
 
 
30
  ```
31
 
32
- 3. Install packages
33
 
34
  ```
35
  pip install -r requirements.txt
36
  ```
37
 
38
- 4. Set up your environment variables
39
 
40
  - By default, environment variables are loaded from `.env.example` file
41
  - If you don't want to use the default settings, copy `.env.example` into `.env`. Your can then update it for your local runs.
42
 
43
- 5. Run automated test:
44
 
45
  ```
46
- python qa_chain_test.py
47
  ```
48
 
49
- 6. Start the local server at `http://localhost:7860`:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  ```
52
- python app.py
 
 
 
 
 
 
 
 
 
53
  ```
 
1
  ---
2
+ title: LLLM QA Eval
3
+ emoji: 💬
4
+ colorFrom: yellow
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.36.1
8
  app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
  ---
12
 
13
+ # Evaluate and Optimize Open-Source LLMs' Performance for Question Answering with RAG and Non-RAG
14
+
15
+ This project contains the source code, datasets and results for the titled paper.
16
+
17
+ ## Results for [WebQSP Dataset](./data/datasets/WebQSP.test.wikidata.json)
18
+
19
+ | Model Name | RAG | RAG with Chat Template | Non-RAG | Note |
20
+ | -------------------------------- | ----------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------- | ----------------- |
21
+ | Phi-3-mini-128k-instruct (batch) | [Phi-3-mini-128k-instruct_wd_rag_batch_4](./data/results/Phi-3-mini-128k-instruct_wd_rag_batch_4.csv) | [Phi-3-mini-128k-instruct_wd_true](./data/results/Phi-3-mini-128k-instruct_wd_true.csv) | [Phi-3-mini-128k-instruct_wd_non_rag_batch_16](./data/results/Phi-3-mini-128k-instruct_wd_non_rag_batch_16.csv) | Evaluated 3 types |
22
+ | gemma-1.1-2b-it | [gemma-1.1-2b-it_wd](./data/results/gemma-1.1-2b-it_wd.csv) | [gemma-1.1-2b-it_wd_true](./data/results/gemma-1.1-2b-it_wd_true.csv) | [gemma-1.1-2b-it_wd_non_rag](./data/results/gemma-1.1-2b-it_wd_non_rag.csv) | Evaluated 3 types |
23
+ | gemma-1.1-7b-it | [gemma-1.1-7b-it_wd](./data/results/gemma-1.1-7b-it_wd.csv) | [gemma-1.1-7b-it_wd_true](./data/results/gemma-1.1-7b-it_wd_true.csv) | [gemma-1.1-7b-it_wd_non_rag](./data/results/gemma-1.1-27b-it_wd_non_rag.csv) | Evaluated 3 types |
24
+ | Mistral-7B-Instruct-v0.2 | [Tune_2024-03-29_11-28-20](./data/results/Tune_2024-03-29_11-28-20.csv) | [Mistral-7B-Instruct-v0.2_wd_true](./data/results/Mistral-7B-Instruct-v0.2_wd_true.csv) | [Tune_2024-04-16_12-24-27](./data/results/Tune_2024-04-16_12-24-27.csv.csv) | Evaluated 3 types |
25
+ | Llama-2-7b-chat-hf | [Tune_2024-03-20_15-35-37](./data/results/Tune_2024-03-20_15-35-37.csv) | [Llama-2-7b-chat-hf_wd_true](./data/results/Llama-2-7b-chat-hf_wd_true.csv) | [Tune_2024-04-09_09-19-22](./data/results/Tune_2024-04-09_09-19-22.csv) | Evaluated 3 types |
26
+ | Meta-Llama-3-8B-Instruct | [Meta-Llama-3-8B-Instruct_wd](./data/results/Meta-Llama-3-8B-Instruct_wd.csv) | [Meta-Llama-3-8B-Instruct_wd_true](./data/results/Meta-Llama-3-8B-Instruct_wd_true.csv) | [Meta-Llama-3-8B-Instruct_wd_non_rag](./data/results/Meta-Llama-3-8B-Instruct_wd_non_rag.csv) (generic prompt) | Evaluated 3 types |
27
+ | | | | [Meta-Llama-3-8B-Instruct_wd_1_non_rag](./data/results/Meta-Llama-3-8B-Instruct_wd_1_non_rag.csv) | Evaluated Non-RAG |
28
+ | Llama-2-13b-chat-hf | [Tune_2024-03-25_23-32-57](./data/results/Tune_2024-03-25_23-32-57.csv) | [Llama-2-13b-chat-hf_wd_true](./data/results/Llama-2-13b-chat-hf_wd_true.csv) | [Tune_2024-04-10_16-53-38](./data/results/Tune_2024-04-10_16-53-38.csv) | Evaluated 3 types |
29
+ | Llama-2-70b-chat-hf | [Llama-2-70b-chat-hf_wd](./data/results/Llama-2-70b-chat-hf_wd.csv) | [Llama-2-70b-chat-hf_wd_true](./data/results/Llama-2-70b-chat-hf_wd_true.csv) | [Llama-2-70b-chat-hf_wd_non_rag](./data/results/Llama-2-70b-chat-hf_wd_non_rag.csv) | Evaluated 3 types |
30
+ | Meta-Llama-3-70B-Instruct | [Meta-Llama-3-70B-Instruct_wd](./data/results/Meta-Llama-3-70B-Instruct_wd.csv) | [Meta-Llama-3-70B-Instruct_wd_true](./data/results/Meta-Llama-3-70B-Instruct_wd_true.csv) | [Meta-Llama-3-70B-Instruct_wd_non_rag](./data/results/Meta-Llama-3-70B-Instruct_wd_non_rag.csv) | Evaluated 3 types |
31
+ | gpt-3.5-turbo | [gpt-3.5-turbo_rag](./data/results/gpt-3.5-turbo_rag.csv) | | [gpt-3.5-turbo_non_rag](./data/results/gpt-3.5-turbo_non_rag.csv) | Evaluated both |
32
+
33
+ ## Results for [MS MACRO Dataset](./data/datasets/ms_macro.json)
34
+
35
+ | Model Name | RAG | RAG with Chat Template | Non-RAG | Note |
36
+ | ------------------------- | ---------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------ | ---- |
37
+ | gemma-1.1-2b-it | [gemma-1.1-2b-it_mm_false](data/results/gemma-1.1-2b-it_mm_true_false.csv) | [gemma-1.1-2b-it_mm_true](data/results/gemma-1.1-2b-it_mm_true.csv) | [gemma-1.1-2b-it_mm_non_rag.csv](data/results/gemma-1.1-2b-it_mm_true_false_non_rag.csv) | |
38
+ | Phi-3-mini-128k-instruct | [Phi-3-mini-128k-instruct_mm_false](data/results/Phi-3-mini-128k-instruct_mm_false.csv) | [Phi-3-mini-128k-instruct_mm_true](data/results/Phi-3-mini-128k-instruct_mm_true.csv) | [Phi-3-mini-128k-instruct_mm_non_rag.csv](data/results/Phi-3-mini-128k-instruct_mm_non_rag.csv) | |
39
+ | gemma-1.1-7b-it | [gemma-1.1-7b-it_mm_false](data/results/gemma-1.1-7b-it_mm_false.csv) | [gemma-1.1-7b-it_mm_true](data/results/gemma-1.1-7b-it_mm_true.csv) | [gemma-1.1-7b-it_mm_non_rag.csv](data/results/gemma-1.1-7b-it_mm_non_rag.csv) | |
40
+ | Mistral-7B-Instruct-v0.2 | [Mistral-7B-Instruct-v0.2_mm_false](data/results/Mistral-7B-Instruct-v0.2_mm_false.csv) | [Mistral-7B-Instruct-v0.2_mm_true](data/results/Mistral-7B-Instruct-v0.2_mm_true.csv) | [Mistral-7B-Instruct-v0.2_mm_false](data/results/Mistral-7B-Instruct-v0.2_mm_non_rag.csv) | |
41
+ | Llama-2-7b-chat-hf | [Llama-2-7b-chat-hf_mm_false](data/results/Llama-2-7b-chat-hf_mm_true_false.csv) | [Llama-2-7b-chat-hf_mm_true](data/results/Llama-2-7b-chat-hf_mm_true.csv) | [Llama-2-7b-chat-hf_mm_non_rag.csv](data/results/Llama-2-7b-chat-hf_mm_true_false_non_rag.csv) | |
42
+ | Meta-Llama-3-8B-Instruct | [Meta-Llama-3-8B-Instruct_mm_false](data/results/Meta-Llama-3-8B-Instruct_mm_true_false.csv) | [Meta-Llama-3-8B-Instruct_mm_true](data/results/Meta-Llama-3-8B-Instruct_mm_true.csv) | [Meta-Llama-3-8B-Instruct_mm_non_rag.csv](data/results/Meta-Llama-3-8B-Instruct_mm_true_false_non_rag.csv) | |
43
+ | Llama-2-13b-chat-hf | [Llama-2-13b-chat-hf_mm_false](data/results/Llama-2-13b-chat-hf_mm_false.csv) | [Llama-2-13b-chat-hf_mm_true](data/results/Llama-2-13b-chat-hf_mm_true.csv) | [Llama-2-13b-chat-hf_mm_non_rag.csv](data/results/Llama-2-13b-chat-hf_mm_non_rag.csv) | |
44
+ | Llama-2-70b-chat-hf | [Llama-2-70b-chat-hf_mm_false](data/results/Llama-2-70b-chat-hf_mm_false.csv) | [Llama-2-70b-chat-hf_mm_true](data/results/Llama-2-70b-chat-hf_mm_true.csv) | [Llama-2-70b-chat-hf_mm_non_rag.csv](data/results/Llama-2-70b-chat-hf_mm_non_rag.csv) | |
45
+ | Meta-Llama-3-70B-Instruct | [Meta-Llama-3-70B-Instruct_mm_false](data/results/Meta-Llama-3-70B-Instruct_mm_true_false.csv) | [Meta-Llama-3-70B-Instruct_mm_true](data/results/Meta-Llama-3-70B-Instruct_mm_true.csv) | [Meta-Llama-3-70B-Instruct_mm_non_rag.csv](data/results/Meta-Llama-3-70B-Instruct_mm_true_false_non_rag.csv) | |
46
+ | gpt-3.5-turbo | [gpt-3.5-turbo_rag](./data/results/gpt-3.5-turbo_mm_RP_1.300.csv) | | [gpt-3.5-turbo_non_rag](./data/results/gpt-3.5-turbo_mm_non_rag_RP_1.300.csv) | |
47
+
48
+ ## How it works
49
+
50
+ We're using an AI methodology, namely Conversational Retrieval Augmentation (CRAG), which uses LLMs off the shelf (i.e., without any fine-tuning), then controls their behavior through clever prompting and conditioning on private “contextual” data, e.g., texts extracted from your PDF files.
51
+
52
+ At a very high level, the workflow can be divided into three stages:
53
+
54
+ 1. Data preprocessing / embedding: This stage involves storing private data (your PDF files) to be retrieved later. Typically, the documents are broken into chunks, passed through an embedding model, then stored the created embeddings in a vectorstore.
55
+
56
+ 2. Prompt construction / retrieval: When a user submits a query, the application constructs a series of prompts to submit to the language model. A compiled prompt typically combines a prompt template and a set of relevant documents retrieved from the vectorstore.
57
+
58
+ 3. Prompt execution / inference: Once the prompts have been compiled, they are submitted to a pre-trained LLM for inference—including both proprietary model APIs and open-source or self-trained models.
59
+
60
+ Tech stack used includes LangChain, Gradio, Chroma and FAISS.
61
+
62
+ - LangChain is an open-source framework that makes it easier to build scalable AI/LLM apps and chatbots.
63
+ - Gradio is an open-source Python library that is used to build machine learning and data science demos and web applications.
64
+ - Chroma and FAISS are open-source vectorstores for storing embeddings for your files.
65
+
66
  ## Running Locally
67
 
68
  1. Check pre-conditions:
69
 
70
  - [Git Large File Storage (LFS)](https://git-lfs.com/) must have been installed.
71
  - Run `python --version` to make sure you're running Python version 3.10 or above.
72
+ - [CMake](https://cmake.org/) must have been installed. Here is a sample command to install `CMake` on `ubuntu`:
73
 
74
  ```
75
+ sudo apt install cmake
76
  ```
77
 
78
  2. Clone the repo
79
 
80
  ```
81
+ git lfs install
82
+ git clone --recursive https://github.com/smu-ai/Evaluation-of-Orca-2-Models-for-Conversational-RAG.git
83
+ ```
84
+
85
+ 3. Ensure the latest PyTorch must have been installed.
86
+
87
+ ```
88
+ # using CUDA with Nvidia GPU
89
+ make install-torch-cuda
90
+
91
+ # using Apple Silicon or other CPU
92
+ make install-torch
93
  ```
94
 
95
+ 4. Install packages
96
 
97
  ```
98
  pip install -r requirements.txt
99
  ```
100
 
101
+ 5. Set up your environment variables
102
 
103
  - By default, environment variables are loaded from `.env.example` file
104
  - If you don't want to use the default settings, copy `.env.example` into `.env`. Your can then update it for your local runs.
105
 
106
+ 6. Run automated test:
107
 
108
  ```
109
+ make test
110
  ```
111
 
112
+ 7. Start the local server at `http://localhost:7860`:
113
+
114
+ ```
115
+ make start
116
+
117
+ ```
118
+
119
+ 8. Tune repetition penalty parameters:
120
+
121
+ ```
122
+ make tune
123
+ ```
124
+
125
+ ## Talk to Your Own PDF Files
126
+
127
+ - The sample PDF files are downloaded from [PCI DSS official website](https://www.pcisecuritystandards.org/document_library/?category=pcidss) and the corresponding embeddings are stored in folders `data/chromadb_1024_512` and `data/faiss_1024_512` with Chroma & FAISS formats respectively, which allows you to run locally without any additional effort.
128
+
129
+ - You can also put your own PDF files into any folder specified in `SOURCE_PDFS_PATH` and run the command below to generate embeddings which will be stored in folder `FAISS_INDEX_PATH` or `CHROMADB_INDEX_PATH`. If both `*_INDEX_PATH` env vars are set, `FAISS_INDEX_PATH` takes precedence. Make sure the folder specified by `*_INDEX_PATH` doesn't exist; other wise the command will simply try to load index from the folder and do a simple similarity search, as a way to verify if embeddings are generated and stored properly. Please note the HuggingFace Embedding model specified by `HF_EMBEDDINGS_MODEL_NAME` will be used to generate the embeddings.
130
+
131
+ ```
132
+ python ingest.py
133
+ ```
134
+
135
+ - Once embeddings are generated, you can test them out locally, or check them into your duplicated space. Please note HF Spaces git server does not allow PDF files to be checked in.
136
+
137
+ ## Play with Different Large Language Models
138
+
139
+ The source code supports different LLM types - as shown at the top of `.env.example`
140
+
141
+ ```
142
+ # LLM_MODEL_TYPE=openai
143
+ # LLM_MODEL_TYPE=gpt4all-j
144
+ # LLM_MODEL_TYPE=gpt4all
145
+ # LLM_MODEL_TYPE=llamacpp
146
+ # LLM_MODEL_TYPE=huggingface
147
+ # LLM_MODEL_TYPE=mosaicml
148
+ # LLM_MODEL_TYPE=stablelm
149
+ # LLM_MODEL_TYPE=openllm
150
+ LLM_MODEL_TYPE=hftgi
151
+ ```
152
+
153
+ - By default, the app runs `microsoft/orca-2-13b` model with HF Text Generation Interface, which runs on a research server and might be down from time to time.
154
+
155
+ - Uncomment/comment the above to play with different LLM types. You may also want to update other related env vars. E.g., here's the list of HF models which have been tested with the code:
156
 
157
  ```
158
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="microsoft/orca-2-7b"
159
+ HUGGINGFACE_MODEL_NAME_OR_PATH="microsoft/orca-2-13b"
160
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="TheBloke/wizardLM-7B-HF"
161
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="TheBloke/vicuna-7B-1.1-HF"
162
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="nomic-ai/gpt4all-j"
163
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="nomic-ai/gpt4all-falcon"
164
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="lmsys/fastchat-t5-3b-v1.0"
165
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-7b-chat-hf"
166
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-13b-chat-hf"
167
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-70b-chat-hf"
168
  ```
app.py CHANGED
@@ -1,38 +1,12 @@
1
  import json
2
- import gradio as gr
3
- import torch
4
- from transformers import (
5
- AutoModelForCausalLM,
6
- AutoTokenizer,
7
- TextIteratorStreamer,
8
- )
9
  import os
10
- from threading import Thread
11
- import subprocess
12
  from app_modules.utils import calc_bleu_rouge_scores, detect_repetitions
13
-
14
  from dotenv import find_dotenv, load_dotenv
15
 
16
  found_dotenv = find_dotenv(".env")
17
 
18
- if len(found_dotenv) == 0:
19
- found_dotenv = find_dotenv(".env.example")
20
- print(f"loading env vars from: {found_dotenv}")
21
- load_dotenv(found_dotenv, override=False)
22
-
23
- subprocess.run(
24
- "pip install flash-attn --no-build-isolation",
25
- env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
26
- shell=True,
27
- )
28
-
29
- token = os.getenv("HUGGINGFACE_AUTH_TOKEN")
30
-
31
- model_name = os.getenv(
32
- "HUGGINGFACE_MODEL_NAME_OR_PATH", "google/gemma-1.1-2b-it"
33
- ) # "microsoft/Phi-3-mini-128k-instruct"
34
- print(f" model_name: {model_name}")
35
-
36
  HF_RP = os.getenv("HF_RP", "1.2")
37
  repetition_penalty = float(HF_RP)
38
  print(f" repetition_penalty: {repetition_penalty}")
@@ -47,52 +21,21 @@ print(f"Loaded {len(examples)} examples")
47
 
48
  qa_system_prompt = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."
49
 
50
- model = AutoModelForCausalLM.from_pretrained(
51
- model_name,
52
- token=token,
53
- trust_remote_code=True,
54
- )
55
- tok = AutoTokenizer.from_pretrained(model_name, token=token)
56
- terminators = [
57
- tok.eos_token_id,
58
- ]
59
-
60
- # Check that MPS is available
61
- if not torch.backends.mps.is_available():
62
- if not torch.backends.mps.is_built():
63
- print(
64
- "MPS not available because the current PyTorch install was not "
65
- "built with MPS enabled."
66
- )
67
- else:
68
- print(
69
- "MPS not available because the current MacOS version is not 12.3+ "
70
- "and/or you do not have an MPS-enabled device on this machine."
71
- )
72
- mps_device = None
73
- else:
74
- mps_device = torch.device("mps")
75
-
76
- if mps_device is not None:
77
- device = mps_device
78
- print("Using MPS")
79
- elif torch.cuda.is_available():
80
- device = torch.device("cuda")
81
- print(f"Using GPU: {torch.cuda.get_device_name(device)}")
82
- else:
83
- device = torch.device("cpu")
84
- print("Using CPU")
85
-
86
- model = model.to(device)
87
 
88
 
89
  def chat(
90
  message,
91
- history,
 
92
  temperature=0,
93
  repetition_penalty=1.1,
94
  do_sample=True,
95
  max_tokens=1024,
 
96
  ):
97
  print("repetition_penalty:", repetition_penalty)
98
  chat = []
@@ -109,36 +52,58 @@ def chat(
109
 
110
  chat.append({"role": "user", "content": message})
111
 
112
- messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
113
- model_inputs = tok([messages], return_tensors="pt").to(device)
114
- streamer = TextIteratorStreamer(
115
- tok, timeout=200.0, skip_prompt=True, skip_special_tokens=True
116
- )
117
 
118
- if temperature == 0:
119
- temperature = 0.01
120
 
121
- generate_kwargs = dict(
122
- model_inputs,
123
- streamer=streamer,
124
- max_new_tokens=max_tokens,
125
- do_sample=do_sample,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  temperature=temperature,
127
- eos_token_id=terminators,
128
- )
129
-
130
- t = Thread(target=model.generate, kwargs=generate_kwargs)
131
- t.start()
132
-
133
- partial_text = ""
134
- for new_text in streamer:
135
  partial_text += new_text
136
  yield partial_text
137
 
138
  answer = partial_text
139
- (newline_score, repetition_score, total_repetitions) = detect_repetitions(answer)
140
  partial_text += "\n\nRepetition Metrics:\n"
141
- partial_text += f"1. Newline Score: {newline_score:.3f}\n"
142
  partial_text += f"1. Repetition Score: {repetition_score:.3f}\n"
143
  partial_text += f"1. Total Repetitions: {total_repetitions:.3f}\n"
144
 
@@ -151,7 +116,7 @@ def chat(
151
  scores = calc_bleu_rouge_scores([answer], [questions[index][key]], debug=True)
152
 
153
  partial_text += "\n\n Performance Metrics:\n"
154
- partial_text += f'1. BLEU: {scores["bleu_scores"]["bleu"]:.3f}\n'
155
  partial_text += f'1. RougeL: {scores["rouge_scores"]["rougeL"]:.3f}\n'
156
 
157
  yield partial_text
@@ -165,6 +130,7 @@ demo = gr.ChatInterface(
165
  label="⚙️ Parameters", open=False, render=False
166
  ),
167
  additional_inputs=[
 
168
  gr.Slider(
169
  minimum=0, maximum=1, step=0.1, value=0, label="Temperature", render=False
170
  ),
@@ -185,9 +151,13 @@ demo = gr.ChatInterface(
185
  label="Max new tokens",
186
  render=False,
187
  ),
 
 
 
 
 
 
 
188
  ],
189
- stop_btn="Stop Generation",
190
- title="Chat With LLMs",
191
- description=f"Now Running [{model_name}](https://huggingface.co/{model_name})",
192
  )
193
  demo.launch()
 
1
  import json
 
 
 
 
 
 
 
2
  import os
3
+ import gradio as gr
4
+ from huggingface_hub import InferenceClient
5
  from app_modules.utils import calc_bleu_rouge_scores, detect_repetitions
 
6
  from dotenv import find_dotenv, load_dotenv
7
 
8
  found_dotenv = find_dotenv(".env")
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  HF_RP = os.getenv("HF_RP", "1.2")
11
  repetition_penalty = float(HF_RP)
12
  print(f" repetition_penalty: {repetition_penalty}")
 
21
 
22
  qa_system_prompt = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."
23
 
24
+ """
25
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
26
+ """
27
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
 
30
  def chat(
31
  message,
32
+ history: list[tuple[str, str]],
33
+ system_message,
34
  temperature=0,
35
  repetition_penalty=1.1,
36
  do_sample=True,
37
  max_tokens=1024,
38
+ top_p=0.95,
39
  ):
40
  print("repetition_penalty:", repetition_penalty)
41
  chat = []
 
52
 
53
  chat.append({"role": "user", "content": message})
54
 
55
+ messages = [{"role": "system", "content": system_message}]
56
+ messages.append({"role": "user", "content": message})
 
 
 
57
 
58
+ partial_text = ""
 
59
 
60
+ # huggingface_hub.utils._errors.HfHubHTTPError: 422 Client Error: Unprocessable Entity for url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta (Request ID: NZamtWmdoSg3flfgRKT0e)
61
+ # Make sure 'text-generation' task is supported by the model.
62
+ # for message in client.text_generation(
63
+ # messages,
64
+ # stream=True,
65
+ # temperature=temperature,
66
+ # top_p=top_p,
67
+ # repetition_penalty=repetition_penalty,
68
+ # ):
69
+
70
+ # https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta
71
+ # {
72
+ # "id": "HuggingFaceH4/zephyr-7b-beta",
73
+ # "sha": "b70e0c9a2d9e14bd1e812d3c398e5f313e93b473",
74
+ # "pipeline_tag": "text-generation",
75
+ # "library_name": "transformers",
76
+ # "private": false,
77
+ # "gated": false,
78
+ # "siblings": [],
79
+ # "safetensors": {
80
+ # "parameters": {
81
+ # "BF16": 7241732096
82
+ # }
83
+ # },
84
+ # "cardData": {
85
+ # "tags": [
86
+ # "generated_from_trainer"
87
+ # ],
88
+ # "base_model": "mistralai/Mistral-7B-v0.1"
89
+ # }
90
+ # }
91
+
92
+ for message in client.chat_completion(
93
+ messages,
94
+ max_tokens=max_tokens,
95
+ stream=True,
96
  temperature=temperature,
97
+ top_p=top_p,
98
+ ):
99
+ new_text = message.choices[0].delta.content
 
 
 
 
 
100
  partial_text += new_text
101
  yield partial_text
102
 
103
  answer = partial_text
104
+ (whitespace_score, repetition_score, total_repetitions) = detect_repetitions(answer)
105
  partial_text += "\n\nRepetition Metrics:\n"
106
+ partial_text += f"1. Whitespace Score: {whitespace_score:.3f}\n"
107
  partial_text += f"1. Repetition Score: {repetition_score:.3f}\n"
108
  partial_text += f"1. Total Repetitions: {total_repetitions:.3f}\n"
109
 
 
116
  scores = calc_bleu_rouge_scores([answer], [questions[index][key]], debug=True)
117
 
118
  partial_text += "\n\n Performance Metrics:\n"
119
+ partial_text += f'1. BLEU-1: {scores["bleu_scores"]["bleu"]:.3f}\n'
120
  partial_text += f'1. RougeL: {scores["rouge_scores"]["rougeL"]:.3f}\n'
121
 
122
  yield partial_text
 
130
  label="⚙️ Parameters", open=False, render=False
131
  ),
132
  additional_inputs=[
133
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
134
  gr.Slider(
135
  minimum=0, maximum=1, step=0.1, value=0, label="Temperature", render=False
136
  ),
 
151
  label="Max new tokens",
152
  render=False,
153
  ),
154
+ gr.Slider(
155
+ minimum=0.1,
156
+ maximum=1.0,
157
+ value=0.95,
158
+ step=0.05,
159
+ label="Top-p (nucleus sampling)",
160
+ ),
161
  ],
 
 
 
162
  )
163
  demo.launch()
app_modules/llm_loader.py CHANGED
@@ -3,7 +3,7 @@ import sys
3
  import threading
4
  from queue import Queue
5
  from typing import Any, Dict, List, Optional
6
-
7
  import torch
8
  from langchain.callbacks.base import BaseCallbackHandler
9
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
@@ -176,7 +176,6 @@ class LLMLoader:
176
  load_in_4bit=load_quantized_model == "4bit",
177
  bnb_4bit_use_double_quant=load_quantized_model == "4bit",
178
  load_in_8bit=load_quantized_model == "8bit",
179
- bnb_8bit_use_double_quant=load_quantized_model == "8bit",
180
  )
181
 
182
  callbacks = []
@@ -212,13 +211,19 @@ class LLMLoader:
212
  print(f" using model: {MODEL_NAME}")
213
  self.llm = ChatGoogleGenerativeAI(
214
  model=MODEL_NAME,
215
- convert_system_message_to_human=True,
216
  callbacks=callbacks,
217
  streaming=True,
218
  safety_settings={
 
 
 
219
  HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
220
  },
221
  )
 
 
 
 
222
  elif self.llm_model_type.startswith("gpt4all"):
223
  MODEL_PATH = ensure_model_is_downloaded(self.llm_model_type)
224
  self.llm = GPT4All(
 
3
  import threading
4
  from queue import Queue
5
  from typing import Any, Dict, List, Optional
6
+ import google.generativeai as genai
7
  import torch
8
  from langchain.callbacks.base import BaseCallbackHandler
9
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 
176
  load_in_4bit=load_quantized_model == "4bit",
177
  bnb_4bit_use_double_quant=load_quantized_model == "4bit",
178
  load_in_8bit=load_quantized_model == "8bit",
 
179
  )
180
 
181
  callbacks = []
 
211
  print(f" using model: {MODEL_NAME}")
212
  self.llm = ChatGoogleGenerativeAI(
213
  model=MODEL_NAME,
 
214
  callbacks=callbacks,
215
  streaming=True,
216
  safety_settings={
217
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
218
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
219
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
220
  HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
221
  },
222
  )
223
+ # for m in genai.list_models():
224
+ # if "generateContent" in m.supported_generation_methods:
225
+ # print(m.name)
226
+ # exit()
227
  elif self.llm_model_type.startswith("gpt4all"):
228
  MODEL_PATH = ensure_model_is_downloaded(self.llm_model_type)
229
  self.llm = GPT4All(
app_modules/utils.py CHANGED
@@ -1,9 +1,9 @@
1
  # -*- coding:utf-8 -*-
2
  from __future__ import annotations
3
 
 
4
  import logging
5
  import os
6
- import math
7
  import platform
8
  import re
9
  from pathlib import Path
@@ -13,8 +13,6 @@ import requests
13
  import torch
14
  from tqdm import tqdm
15
  from langchain.memory import ConversationSummaryBufferMemory
16
- import matplotlib.pyplot as plt
17
- import matplotlib.ticker as mtick
18
 
19
 
20
  class LogRecord(logging.LogRecord):
@@ -278,153 +276,3 @@ def detect_repetition_scores(text, debug=False):
278
  text, debug=debug
279
  )
280
  return pd.Series([newline_score, repetition_score, total_repetitions])
281
-
282
-
283
- def detect_scores(text, debug=False):
284
- newline_score, repetition_score, total_repetitions = detect_repetitions(
285
- text, debug=debug
286
- )
287
- return pd.Series([newline_score, repetition_score, total_repetitions])
288
-
289
-
290
- def load_with_newline_and_repetition_scores(result_file, force_recalculate=False):
291
- print(f"loading result file: {result_file}")
292
- df = pd.read_csv(result_file, comment="#", on_bad_lines="warn")
293
-
294
- if (
295
- force_recalculate
296
- or "newline_score" not in df.columns
297
- or "repetition_score" not in df.columns
298
- or "total_repetitions" not in df.columns
299
- ):
300
- df[["newline_score", "repetition_score", "total_repetitions"]] = df[
301
- "answer"
302
- ].apply(detect_scores)
303
- df.to_csv(result_file, index=False)
304
-
305
- return df
306
-
307
-
308
- def replace_last(source_string, old_string, new_string):
309
- head, _sep, tail = source_string.rpartition(old_string)
310
- return head + new_string + tail
311
-
312
-
313
- df_ms_macro = pd.read_json("./data/datasets/ms_macro.json")
314
-
315
-
316
- def load_for_repetition_penalty_ms_macro(
317
- csv_result_file, repetition_penalty, force_recalculate=False
318
- ):
319
- result_file = replace_last(
320
- csv_result_file, ".csv", f"_RP_{repetition_penalty:.3f}.csv"
321
- )
322
- df = load_with_newline_and_repetition_scores(
323
- result_file, force_recalculate=force_recalculate
324
- )
325
-
326
- if df["ground_truth"][0] != df_ms_macro["wellFormedAnswers"][0]:
327
- df["ground_truth"] = df_ms_macro["wellFormedAnswers"]
328
- print("ground_truth updated for:", result_file)
329
- df.to_csv(result_file, index=False)
330
- return df
331
-
332
-
333
- def adjust_perf_scores_with_repetition_penalty(result, precision, recall):
334
- newline_score = [
335
- df["newline_score"].mean() for df in result["df_list_repetition_penalty"]
336
- ]
337
- print(f"newline_score: {newline_score}")
338
-
339
- repetition_score = [
340
- df["repetition_score"].mean() for df in result["df_list_repetition_penalty"]
341
- ]
342
- print(f"repetition_score: {repetition_score}")
343
-
344
- precision = [
345
- f / math.log10(10 + n + r)
346
- for f, n, r in zip(precision, newline_score, repetition_score)
347
- ]
348
- recall = [
349
- f / math.log10(10 + n + r)
350
- for f, n, r in zip(recall, newline_score, repetition_score)
351
- ]
352
-
353
- return precision, recall
354
-
355
-
356
- # MS MACRO
357
- def plot_performance_scores_ms_macro(
358
- result,
359
- models=None,
360
- title="Performance",
361
- ):
362
-
363
- if models is None:
364
- models = result.keys()
365
- for model in models:
366
- print(f"model: {model}")
367
- df = result[model]["df_overall"]
368
- # print(result[model]["df_list_repetition_penalty"][0].describe())
369
-
370
- # Calculate the statistics
371
- bleu1 = list(df["bleu1"])
372
- rougeL = list(df["rougeL"])
373
- f1 = [2 * (p * r) / (p + r) for p, r in zip(bleu1, rougeL)]
374
- best_f1 = max(f1)
375
- best_f1_index = f1.index(best_f1)
376
-
377
- bleu1, rougeL = adjust_perf_scores_with_repetition_penalty(
378
- result[model], bleu1, rougeL
379
- )
380
- afrp = [2 * (p * r) / (p + r) for p, r in zip(bleu1, rougeL)]
381
-
382
- # f1 = [df["f1"].mean() for df in result[model]["df_list_repetition_penalty"]]
383
- best_afrp = max(afrp)
384
- best_afrp_index = afrp.index(best_afrp)
385
-
386
- repetition_penalties = list(df["repetition_penalty"])
387
-
388
- # line plot for precision, recall, f1
389
- plt.figure(figsize=(10, 6))
390
-
391
- plt.axvspan(
392
- repetition_penalties[best_f1_index] - 0.01,
393
- repetition_penalties[best_f1_index] + 0.01,
394
- alpha=0.5,
395
- edgecolor="none",
396
- facecolor="blue",
397
- )
398
-
399
- plt.axvspan(
400
- repetition_penalties[best_afrp_index] - 0.01,
401
- repetition_penalties[best_afrp_index] + 0.01,
402
- alpha=0.5,
403
- edgecolor="none",
404
- facecolor="orange",
405
- )
406
-
407
- plt.plot(
408
- repetition_penalties,
409
- f1,
410
- label="Overall Perf Score",
411
- marker="D",
412
- color="blue",
413
- )
414
- plt.plot(
415
- repetition_penalties,
416
- afrp,
417
- label="RF Adjusted Perf Score",
418
- marker="o",
419
- color="orange",
420
- )
421
-
422
- plt.xlabel("Repetition Penalties")
423
- plt.ylabel("Score")
424
- plt.xlim(0.99, 1.31)
425
- # y in percentage
426
- plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
427
- plt.title(f"{model} {title}")
428
- plt.legend(bbox_to_anchor=(1.0, 0.5), loc="center left")
429
-
430
- plt.show()
 
1
  # -*- coding:utf-8 -*-
2
  from __future__ import annotations
3
 
4
+ import json
5
  import logging
6
  import os
 
7
  import platform
8
  import re
9
  from pathlib import Path
 
13
  import torch
14
  from tqdm import tqdm
15
  from langchain.memory import ConversationSummaryBufferMemory
 
 
16
 
17
 
18
  class LogRecord(logging.LogRecord):
 
276
  text, debug=debug
277
  )
278
  return pd.Series([newline_score, repetition_score, total_repetitions])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/WebQSP.test.wikidata.json ADDED
The diff for this file is too large to render. See raw diff
 
data/{logs/Phi-3-mini-128k-instruct_mm_false_RP_1.060.txt → eval/Llama-2-13b-chat-hf_wd_true_RP_1.000-t2.json} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f834d6a3c9429ba3f883a923e12c94c8b9a62acfe0cfeb3a1218403e31c7958
3
- size 952195
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1277996feced5bd3ef1a12f0bbfd612219ee32825ced7a5f67ba154b2d61b7a7
3
+ size 655310
data/{logs/Phi-3-mini-128k-instruct_mm_false_RP_1.120.txt → eval/Llama-2-13b-chat-hf_wd_true_RP_1.000-t2_evaluated.json} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9b32298de75ee46fc56fe8434f4de0488954d4e643453af37c31895dc054bb3
3
- size 607038
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb905b34dc2c46254530376a9609e2f6b9011b412c67994220549fb94b9ee72b
3
+ size 716045
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.020-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4ee64cfc0130b4b323a159ead1a9104989c800578856b1805cc056ab5257e45
3
+ size 653448
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.020-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:689bc4f321e11fc4d42cd6a3ef1d15f1f87c4228d8248eb7a4e28d635746db73
3
+ size 714002
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.040-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3b8f1059469d94a2f9f6a0d2216e5d1cf66d7b48cdf39b267a4cc67623e1f47
3
+ size 650480
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.040-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c01183de7769798e48ac681749f6cb2722ce0c4763e01f03e17569d7b0e8ca5
3
+ size 710922
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.060-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a12beb32ed61761899bba3a73d6959691f8785328d70c13c9037ed71116f5e2
3
+ size 654870
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.060-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f30bf65ae09a25f1ac818528d352d3cff9eced98fab349c47b55672a53aa457a
3
+ size 715516
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.080-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a6404ba60e543621c5be62e9b4d52639671c42f61ed9249afb52428fd70015c
3
+ size 644399
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.080-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:484ba047b61462ee92bbfa33b335bee05b13c2b667abae414417bfe33da46274
3
+ size 705020
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.100-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51cb665ebc45ca714f9e03415937c152923f75ca9ccacca2bdf2f2698a009948
3
+ size 642376
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.100-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96e0d343cded4a11df5bdc1dffa7c2ed9631afce46f706d0c0f8f35d4683df9d
3
+ size 702987
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.120-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fe968825c4b87aa7b2a09bf9a0ddcef58cbf58707b8620c0e2c4344b6cbded8
3
+ size 647286
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.120-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc525d15dc082962e317fb8b3fccf28cc9a6de4d6d765d743016dc34f13ec506
3
+ size 707966
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.140-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f354e4dd9a889af49831dd797199143be856040b51d152d935216ebcb0d3e0f0
3
+ size 644443
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.140-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25de5711464fa1015e193c0f452902742fbab711c464a4f87d9774914f3edf2d
3
+ size 705131
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.160-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:539b143d92be17b1f397e020ff3518ea397fea3f985a36e2db4cd6efdc9b0755
3
+ size 643126
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.160-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b92bbb2bc9dffa027274dc593cbc61a838bde4a8e54a1b4210d4f921e280b436
3
+ size 703815
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.180-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e56aa85968670bfe81b541bec3c57cf5763457c76847266411844abc544fc26
3
+ size 652949
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.180-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2c1e8311f7b644fb2ce62b05db31cd81236ab8bfa94107ac8d33e7e03444871
3
+ size 713794
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.200-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c4136642c87159fba5861af7b4be13a2f099943041703f4d5e4c86b85c49c9a
3
+ size 647659
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.200-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:784ad453019ef8ac12c24032d16cbba3d1b11b25439903ebbee7c51382b13531
3
+ size 708220
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.220-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a467439fdb97136ddfbda188a4e216d8aec1305837d535c2c5c3fe0546c490bb
3
+ size 647989
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.220-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a1d85371b24da315049092b1adf3229c0a4663e64e453843dfadb596d9f0ef1
3
+ size 708567
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.240-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39c1241171262c6fb1712ed8089bef7e790b71bf2f327c95052d4087037005f2
3
+ size 642213
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.240-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4735ae18bf2c0f576f116ec178afb817a8973d9e84fd8fa51e8a3cc05c5b3a6e
3
+ size 702922
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.260-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79e64c9b8ba5c47da0be79d0e9f2551c2efe31352bd21b94362cf549acd7f3f6
3
+ size 642810
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.260-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24792de358440e5ac6f44e4711f067de8842a7ef0ee0dca1fda8effa307959b4
3
+ size 703496
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.280-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a98e4e6f6f07902ba7d098f02b186f2150d1c355769a5576b0645cddee195062
3
+ size 653962
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.280-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b99d3c8f0c0a3b35c9b707f82bd1657765392da680cf89de1ae71b10a8971b4
3
+ size 714741
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.300-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:579e3e57776f6bfd53f312f565335a71338e392e2b39ffc7b588a2df5e001e9f
3
+ size 644777
data/eval/Llama-2-13b-chat-hf_wd_true_RP_1.300-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be28d56ac8257ca7489d51265d2d71a74abd54330abcd5e4f22b077ba344c651
3
+ size 705446
Llama-2-eval/data/datasets/ms_macro/data-00000-of-00001.arrow → data/eval/Llama-2-70b-chat-hf_wd_RP_1.000-t2.json RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ef1814438c08fe1bcd56be04a29c7dbe96f09420be471fdfc36d61c1500f13c
3
- size 2068896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:256520aa638029eb70674b3a93acbd7d21277f2626d756e1cbd55d54dc40b55e
3
+ size 1046106
data/{logs/Phi-3-mini-128k-instruct_mm_false_RP_1.000.txt → eval/Llama-2-70b-chat-hf_wd_RP_1.000-t2_evaluated.json} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34860965667ba1a520aa539d7893315c0769b8fcccc7eb9a2b83d3165629d434
3
- size 1412667
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7917c1b3993e69ca3d3b9c6b0245646e5f78ea2568b0b669f2e08b37ad9759f1
3
+ size 1107148
data/eval/Llama-2-70b-chat-hf_wd_RP_1.020-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f43b5b7139632e00b30f719868b85d265592ba6e99f04e2e4abb404679ae4ad
3
+ size 813840
data/eval/Llama-2-70b-chat-hf_wd_RP_1.020-t2_evaluated.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cb0c6be15cadd8ef841c8aaceb4d37ce08fbe242757ea9e03be4f3c13c250db
3
+ size 874638
data/eval/Llama-2-70b-chat-hf_wd_RP_1.040-t2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:447323f0b62e5f885fe58667b9698b1b9038f37381a19f0b5eb948b501d653a9
3
+ size 663779