dh-mc commited on
Commit
32a6937
1 Parent(s): c662eeb

ms macro dataset

Browse files
.env.example ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LLM_MODEL_TYPE=huggingface
2
+ # LLM_MODEL_TYPE=openai
3
+ # LLM_MODEL_TYPE=hftgi
4
+ # LLM_MODEL_TYPE=ollama
5
+ # LLM_MODEL_TYPE=google
6
+ # LLM_MODEL_TYPE=vllm
7
+
8
+ HUGGINGFACE_AUTH_TOKEN=
9
+
10
+ HFTGI_SERVER_URL=
11
+
12
+ OPENAI_API_KEY=
13
+
14
+ GOOGLE_API_KEY=
15
+
16
+ # if unset, default to "gpt-3.5-turbo"
17
+ OPENAI_MODEL_NAME=
18
+
19
+ # GEMINI_MODEL_NAME=gemini-1.5-pro-latest
20
+
21
+ # OLLAMA_MODEL_NAME=orca2:7b
22
+ # OLLAMA_MODEL_NAME=mistral:7b
23
+ # OLLAMA_MODEL_NAME=gemma:7b
24
+ # OLLAMA_MODEL_NAME=llama2:7b
25
+ OLLAMA_MODEL_NAME=llama3:8b
26
+
27
+ OLLAMA_RP=1.15
28
+
29
+
30
+ # cpu, mps or cuda:0 - if unset, use whatever detected
31
+ HF_EMBEDDINGS_DEVICE_TYPE=
32
+ HF_PIPELINE_DEVICE_TYPE=
33
+
34
+ # uncomment one of the below to load corresponding quantized model
35
+ # LOAD_QUANTIZED_MODEL=4bit
36
+ # LOAD_QUANTIZED_MODEL=8bit
37
+
38
+ QA_WITH_RAG=true
39
+ # QA_WITH_RAG=false
40
+
41
+ RETRIEVER_TYPE=questions_file
42
+ # RETRIEVER_TYPE=vectorstore
43
+
44
+ QUESTIONS_FILE_PATH="./data/datasets/ms_macro.json"
45
+
46
+ DISABLE_MODEL_PRELOADING=true
47
+ CHAT_HISTORY_ENABLED=false
48
+ SHOW_PARAM_SETTINGS=false
49
+ SHARE_GRADIO_APP=false
50
+
51
+ # if unset, default to "hkunlp/instructor-xl"
52
+ HF_EMBEDDINGS_MODEL_NAME="hkunlp/instructor-large"
53
+
54
+ # number of cpu cores - used to set n_threads for GPT4ALL & LlamaCpp models
55
+ NUMBER_OF_CPU_CORES=
56
+
57
+ USING_TORCH_BFLOAT16=true
58
+
59
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="databricks/dolly-v2-3b"
60
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="databricks/dolly-v2-7b"
61
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="databricks/dolly-v2-12b"
62
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="TheBloke/wizardLM-7B-HF"
63
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="TheBloke/vicuna-7B-1.1-HF"
64
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="nomic-ai/gpt4all-j"
65
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="nomic-ai/gpt4all-falcon"
66
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="lmsys/fastchat-t5-3b-v1.0"
67
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-7b-chat-hf"
68
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-13b-chat-hf"
69
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-70b-chat-hf"
70
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Meta-Llama-3-8B-Instruct"
71
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Meta-Llama-3-70B-Instruct"
72
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="microsoft/Orca-2-7b"
73
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="microsoft/Orca-2-13b"
74
+ HUGGINGFACE_MODEL_NAME_OR_PATH="google/gemma-1.1-2b-it"
75
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="google/gemma-1.1-7b-it"
76
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="microsoft/Phi-3-mini-128k-instruct"
77
+ # HUGGINGFACE_MODEL_NAME_OR_PATH="mistralai/Mistral-7B-Instruct-v0.2"
.gitignore ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.out
2
+ *.log
3
+ pdfs/
4
+ .vscode/
5
+
6
+ # Byte-compiled / optimized / DLL files
7
+ __pycache__/
8
+ *.py[cod]
9
+ *$py.class
10
+
11
+ # C extensions
12
+ *.so
13
+
14
+ # Distribution / packaging
15
+ .Python
16
+ build/
17
+ develop-eggs/
18
+ dist/
19
+ downloads/
20
+ eggs/
21
+ .eggs/
22
+ lib/
23
+ lib64/
24
+ parts/
25
+ sdist/
26
+ var/
27
+ wheels/
28
+ pip-wheel-metadata/
29
+ share/python-wheels/
30
+ *.egg-info/
31
+ .installed.cfg
32
+ *.egg
33
+ MANIFEST
34
+
35
+ # PyInstaller
36
+ # Usually these files are written by a python script from a template
37
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
38
+ *.manifest
39
+ *.spec
40
+
41
+ # Installer logs
42
+ pip-log.txt
43
+ pip-delete-this-directory.txt
44
+
45
+ # Unit test / coverage reports
46
+ htmlcov/
47
+ .tox/
48
+ .nox/
49
+ .coverage
50
+ .coverage.*
51
+ .cache
52
+ nosetests.xml
53
+ coverage.xml
54
+ *.cover
55
+ *.py,cover
56
+ .hypothesis/
57
+ .pytest_cache/
58
+
59
+ # Translations
60
+ *.mo
61
+ *.pot
62
+
63
+ # Django stuff:
64
+ # *.log
65
+ local_settings.py
66
+ db.sqlite3
67
+ db.sqlite3-journal
68
+
69
+ # Flask stuff:
70
+ instance/
71
+ .webassets-cache
72
+
73
+ # Scrapy stuff:
74
+ .scrapy
75
+
76
+ # Sphinx documentation
77
+ docs/_build/
78
+
79
+ # PyBuilder
80
+ target/
81
+
82
+ # Jupyter Notebook
83
+ .ipynb_checkpoints
84
+
85
+ # IPython
86
+ profile_default/
87
+ ipython_config.py
88
+
89
+ # pyenv
90
+ .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100
+ __pypackages__/
101
+
102
+ # Celery stuff
103
+ celerybeat-schedule
104
+ celerybeat.pid
105
+
106
+ # SageMath parsed files
107
+ *.sage.py
108
+
109
+ # Environments
110
+ .env
111
+ .venv
112
+ env/
113
+ venv/
114
+ ENV/
115
+ env.bak/
116
+ venv.bak/
117
+
118
+ # Spyder project settings
119
+ .spyderproject
120
+ .spyproject
121
+
122
+ # Rope project settings
123
+ .ropeproject
124
+
125
+ # mkdocs documentation
126
+ /site
127
+
128
+ # mypy
129
+ .mypy_cache/
130
+ .dmypy.json
131
+ dmypy.json
132
+
133
+ # Pyre type checker
134
+ .pyre/
135
+
136
+ # JetBrains
137
+ .idea
138
+
139
+ *.db
140
+
141
+ .DS_Store
142
+
143
+ vectorstore.pkl
144
+ langchain.readthedocs.io/
145
+
146
+ models/
147
+ data/logs/hftgi-2024-03-18.txt
148
+ qa_*_all_results.csv
149
+ qa_*_test_results.csv
Llama-2-eval/data/datasets/ms_macro/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef1814438c08fe1bcd56be04a29c7dbe96f09420be471fdfc36d61c1500f13c
3
+ size 2068896
Llama-2-eval/data/datasets/ms_macro/dataset_info.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "parquet",
3
+ "citation": "",
4
+ "config_name": "default",
5
+ "dataset_name": "ms-macro-wellformed_only",
6
+ "dataset_size": 726469485,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/zhengxuanzenwu/ms-macro-wellformed_only@d6a0dd610474a02e63224176514c0073bb723c7c/data/train-00000-of-00002-0a6f58dc7ee03f61.parquet": {
10
+ "num_bytes": 164629356,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/zhengxuanzenwu/ms-macro-wellformed_only@d6a0dd610474a02e63224176514c0073bb723c7c/data/train-00001-of-00002-5262fd5ec1911156.parquet": {
14
+ "num_bytes": 164721520,
15
+ "checksum": null
16
+ },
17
+ "hf://datasets/zhengxuanzenwu/ms-macro-wellformed_only@d6a0dd610474a02e63224176514c0073bb723c7c/data/test-00000-of-00001-f965dd5a841915d3.parquet": {
18
+ "num_bytes": 26541566,
19
+ "checksum": null
20
+ }
21
+ },
22
+ "download_size": 355892442,
23
+ "features": {
24
+ "answers": {
25
+ "feature": {
26
+ "dtype": "string",
27
+ "_type": "Value"
28
+ },
29
+ "_type": "Sequence"
30
+ },
31
+ "passages": {
32
+ "feature": {
33
+ "is_selected": {
34
+ "dtype": "int32",
35
+ "_type": "Value"
36
+ },
37
+ "passage_text": {
38
+ "dtype": "string",
39
+ "_type": "Value"
40
+ },
41
+ "url": {
42
+ "dtype": "string",
43
+ "_type": "Value"
44
+ }
45
+ },
46
+ "_type": "Sequence"
47
+ },
48
+ "query": {
49
+ "dtype": "string",
50
+ "_type": "Value"
51
+ },
52
+ "query_id": {
53
+ "dtype": "int32",
54
+ "_type": "Value"
55
+ },
56
+ "query_type": {
57
+ "dtype": "string",
58
+ "_type": "Value"
59
+ },
60
+ "wellFormedAnswers": {
61
+ "feature": {
62
+ "dtype": "string",
63
+ "_type": "Value"
64
+ },
65
+ "_type": "Sequence"
66
+ }
67
+ },
68
+ "homepage": "",
69
+ "license": "",
70
+ "size_in_bytes": 1082361927,
71
+ "splits": {
72
+ "train": {
73
+ "name": "train",
74
+ "num_bytes": 674327331,
75
+ "num_examples": 153725,
76
+ "shard_lengths": [
77
+ 116863,
78
+ 36862
79
+ ],
80
+ "dataset_name": "ms-macro-wellformed_only"
81
+ },
82
+ "test": {
83
+ "name": "test",
84
+ "num_bytes": 52142154,
85
+ "num_examples": 12467,
86
+ "dataset_name": "ms-macro-wellformed_only"
87
+ }
88
+ },
89
+ "version": {
90
+ "version_str": "0.0.0",
91
+ "major": 0,
92
+ "minor": 0,
93
+ "patch": 0
94
+ }
95
+ }
Llama-2-eval/data/datasets/ms_macro/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "fe2a26ddba75833a",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "test"
13
+ }
Llama-2-eval/data/results/results_full-a40.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name,repetition_penalty,generation_time,evaluation_time,total_tokens,total_words,tokens_per_second,tokens_per_word,numeric_bleu,numeric_rougeL,description_bleu,description_rougeL,entity_bleu,entity_rougeL,person_bleu,person_rougeL,location_bleu,location_rougeL,overall_bleu,overall_rougeL,total_words_over_total_tokens
2
+ gpt-4,,2696.407,1.772,34069,29552,12.635,1.153,0.1732,0.3337,0.1895,0.3248,0.1654,0.3117,0.1879,0.3286,0.4068,0.6213,0.1969,0.3843,0.867
3
+ gpt-3.5-turbo,,1492.921,1.786,34353,29917,23.011,1.148,0.1606,0.3178,0.1623,0.2582,0.1296,0.2939,0.2024,0.3462,0.3632,0.5953,0.1761,0.3623,0.871
4
+ Llama-2-13b-chat-hf,1.12,2133.992,1.66,33389,24007,15.646,1.391,0.163,0.3345,0.2031,0.3756,0.1632,0.2962,0.1388,0.3045,0.3423,0.5302,0.1846,0.3694,0.719
5
+ vicuna-13b-v1.1,1.095,2212.946,1.682,35308,26456,15.955,1.335,0.1285,0.2319,0.1991,0.2812,0.1556,0.2644,0.2009,0.2768,0.3159,0.5761,0.1853,0.3276,0.749
6
+ Llama-2-7b-chat-hf,1.19,1280.314,1.793,34349,23987,26.829,1.432,0.1274,0.2383,0.1836,0.2621,0.1572,0.2754,0.17,0.2911,0.3631,0.5383,0.1781,0.3209,0.698
7
+ vicuna-7b-v1.1,1.095,975.73,1.574,25932,18714,26.577,1.386,0.1664,0.2838,0.2227,0.3118,0.166,0.2351,0.259,0.2753,0.4542,0.5838,0.2218,0.3379,0.722
8
+ wizardLM-7B-HF,1.095,1265.93,1.667,33570,24003,26.518,1.399,0.1367,0.2584,0.2027,0.2882,0.1358,0.2592,0.1985,0.3085,0.4154,0.5794,0.1866,0.3384,0.715
9
+ mpt-7b-instruct,1.05,2071.066,1.42,12374,9927,5.975,1.246,0.1804,0.285,0.2589,0.2556,0.2383,0.2468,0.2635,0.2571,0.3512,0.4042,0.2509,0.2897,0.802
10
+ gpt4all-j,1.095,5603.316,1.706,31502,27099,5.622,1.162,0.1236,0.2406,0.1708,0.2511,0.143,0.255,0.194,0.2941,0.3721,0.5337,0.1737,0.3153,0.860
Llama-2-eval/data/results/results_full-l40.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name,repetition_penalty,generation_time,evaluation_time,total_tokens,total_words,tokens_per_second,tokens_per_word,words_per_token_l40,words_per_second,numeric_bleu,numeric_rougeL,description_bleu,description_rougeL,entity_bleu,entity_rougeL,person_bleu,person_rougeL,location_bleu,location_rougeL,overall_bleu,overall_rougeL,total_words_over_total_tokens
2
+ gpt-4,,2696.407,1.772,34069,29552,12.635,1.153,0.867,,0.1732,0.3337,0.1895,0.3248,0.1654,0.3117,0.1879,0.3286,0.4068,0.6213,0.1969,0.3843,0.867
3
+ gpt-3.5-turbo,,1492.921,1.786,34353,29917,23.011,1.148,0.871,,0.1606,0.3178,0.1623,0.2582,0.1296,0.2939,0.2024,0.3462,0.3632,0.5953,0.1761,0.3623,0.871
4
+ Llama-2-13b-chat-hf,1.12,1687.637,1.785,32808,23575,19.44,1.392,0.718,13.969,0.1612,0.3305,0.2061,0.3701,0.1675,0.3018,0.141,0.305,0.3394,0.5288,0.1866,0.368,0.719
5
+ vicuna-13b-v1.1,1.095,1799.165,2.197,35543,26613,19.755,1.336,0.749,14.792,0.1274,0.2321,0.1994,0.2834,0.154,0.2631,0.1984,0.2773,0.3194,0.5759,0.1844,0.3256,0.749
6
+ Llama-2-7b-chat-hf,1.19,1002.46,6.606,34686,24229,34.601,1.432,0.698,24.170,0.1269,0.2404,0.1824,0.2614,0.157,0.2769,0.1687,0.2896,0.3565,0.5378,0.177,0.3214,0.699
7
+ vicuna-7b-v1.1,1.095,758.227,1.432,25827,18638,34.062,1.386,0.722,24.581,0.1673,0.2859,0.2221,0.3096,0.1655,0.2327,0.2576,0.2717,0.4564,0.5849,0.2216,0.3387,0.722
8
+ wizardLM-7B-HF,1.095,998.702,1.683,33674,23996,33.718,1.403,0.713,24.027,0.1372,0.259,0.2046,0.2878,0.1354,0.2588,0.1982,0.3083,0.4154,0.5769,0.187,0.3383,0.713
9
+ mpt-7b-instruct,1.05,1622.435,1.338,12607,10139,7.77,1.243,0.805,6.249,0.1751,0.2756,0.2569,0.2625,0.2349,0.2456,0.2466,0.2566,0.3522,0.4049,0.2455,0.2889,0.804
10
+ gpt4all-j,1.095,3794.429,1.611,31719,27286,8.359,1.162,0.861,7.191,0.1262,0.2443,0.1669,0.251,0.1394,0.2505,0.1937,0.2968,0.3693,0.5348,0.1719,0.3151,0.860
Llama-2-eval/notebook/baseline.ipynb ADDED
@@ -0,0 +1,1983 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 5,
6
+ "id": "a6d96660",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "data": {
11
+ "text/plain": [
12
+ "True"
13
+ ]
14
+ },
15
+ "execution_count": 5,
16
+ "metadata": {},
17
+ "output_type": "execute_result"
18
+ }
19
+ ],
20
+ "source": [
21
+ "import os\n",
22
+ "from dotenv import load_dotenv\n",
23
+ "\n",
24
+ "load_dotenv()"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": 11,
30
+ "id": "7510ab87",
31
+ "metadata": {},
32
+ "outputs": [
33
+ {
34
+ "data": {
35
+ "text/plain": [
36
+ "DatasetDict({\n",
37
+ " train: Dataset({\n",
38
+ " features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
39
+ " num_rows: 153725\n",
40
+ " })\n",
41
+ " test: Dataset({\n",
42
+ " features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
43
+ " num_rows: 12467\n",
44
+ " })\n",
45
+ "})"
46
+ ]
47
+ },
48
+ "execution_count": 11,
49
+ "metadata": {},
50
+ "output_type": "execute_result"
51
+ }
52
+ ],
53
+ "source": [
54
+ "from datasets import load_dataset\n",
55
+ "\n",
56
+ "dataset = load_dataset(\"zhengxuanzenwu/ms-macro-wellformed_only\")\n",
57
+ "dataset"
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "code",
62
+ "execution_count": 12,
63
+ "id": "1f4f0e76",
64
+ "metadata": {},
65
+ "outputs": [
66
+ {
67
+ "data": {
68
+ "text/html": [
69
+ "<div>\n",
70
+ "<style scoped>\n",
71
+ " .dataframe tbody tr th:only-of-type {\n",
72
+ " vertical-align: middle;\n",
73
+ " }\n",
74
+ "\n",
75
+ " .dataframe tbody tr th {\n",
76
+ " vertical-align: top;\n",
77
+ " }\n",
78
+ "\n",
79
+ " .dataframe thead th {\n",
80
+ " text-align: right;\n",
81
+ " }\n",
82
+ "</style>\n",
83
+ "<table border=\"1\" class=\"dataframe\">\n",
84
+ " <thead>\n",
85
+ " <tr style=\"text-align: right;\">\n",
86
+ " <th></th>\n",
87
+ " <th>answers</th>\n",
88
+ " <th>passages</th>\n",
89
+ " <th>query</th>\n",
90
+ " <th>query_id</th>\n",
91
+ " <th>query_type</th>\n",
92
+ " <th>wellFormedAnswers</th>\n",
93
+ " </tr>\n",
94
+ " </thead>\n",
95
+ " <tbody>\n",
96
+ " <tr>\n",
97
+ " <th>0</th>\n",
98
+ " <td>[2,662]</td>\n",
99
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
100
+ " <td>albany mn population</td>\n",
101
+ " <td>15177</td>\n",
102
+ " <td>NUMERIC</td>\n",
103
+ " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
104
+ " </tr>\n",
105
+ " <tr>\n",
106
+ " <th>1</th>\n",
107
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
108
+ " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
109
+ " <td>current weather in volcano, ca</td>\n",
110
+ " <td>114414</td>\n",
111
+ " <td>DESCRIPTION</td>\n",
112
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
113
+ " </tr>\n",
114
+ " <tr>\n",
115
+ " <th>2</th>\n",
116
+ " <td>[Hippocrates]</td>\n",
117
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
118
+ " <td>____________________ is considered the father ...</td>\n",
119
+ " <td>9083</td>\n",
120
+ " <td>DESCRIPTION</td>\n",
121
+ " <td>[Hippocrates is considered the father of moder...</td>\n",
122
+ " </tr>\n",
123
+ " <tr>\n",
124
+ " <th>3</th>\n",
125
+ " <td>[120 days from the date of the Note.]</td>\n",
126
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
127
+ " <td>how many days is an appraisal good for a fanni...</td>\n",
128
+ " <td>281439</td>\n",
129
+ " <td>NUMERIC</td>\n",
130
+ " <td>[An appraisal is good for 120 days from the da...</td>\n",
131
+ " </tr>\n",
132
+ " <tr>\n",
133
+ " <th>4</th>\n",
134
+ " <td>[From $26,000 to $39,000 a year]</td>\n",
135
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
136
+ " <td>average pharmacy tech salary</td>\n",
137
+ " <td>40287</td>\n",
138
+ " <td>NUMERIC</td>\n",
139
+ " <td>[The average salary for a pharmacy technician ...</td>\n",
140
+ " </tr>\n",
141
+ " </tbody>\n",
142
+ "</table>\n",
143
+ "</div>"
144
+ ],
145
+ "text/plain": [
146
+ " answers \\\n",
147
+ "0 [2,662] \n",
148
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
149
+ "2 [Hippocrates] \n",
150
+ "3 [120 days from the date of the Note.] \n",
151
+ "4 [From $26,000 to $39,000 a year] \n",
152
+ "\n",
153
+ " passages \\\n",
154
+ "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
155
+ "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
156
+ "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
157
+ "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
158
+ "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
159
+ "\n",
160
+ " query query_id query_type \\\n",
161
+ "0 albany mn population 15177 NUMERIC \n",
162
+ "1 current weather in volcano, ca 114414 DESCRIPTION \n",
163
+ "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
164
+ "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
165
+ "4 average pharmacy tech salary 40287 NUMERIC \n",
166
+ "\n",
167
+ " wellFormedAnswers \n",
168
+ "0 [The population of Albany, Minnesota is 2,662. ] \n",
169
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
170
+ "2 [Hippocrates is considered the father of moder... \n",
171
+ "3 [An appraisal is good for 120 days from the da... \n",
172
+ "4 [The average salary for a pharmacy technician ... "
173
+ ]
174
+ },
175
+ "execution_count": 12,
176
+ "metadata": {},
177
+ "output_type": "execute_result"
178
+ }
179
+ ],
180
+ "source": [
181
+ "df = dataset[\"test\"].to_pandas()\n",
182
+ "df.head()"
183
+ ]
184
+ },
185
+ {
186
+ "cell_type": "code",
187
+ "execution_count": 15,
188
+ "id": "3e9b4cef",
189
+ "metadata": {},
190
+ "outputs": [
191
+ {
192
+ "data": {
193
+ "text/plain": [
194
+ "{'answers': ['2,662'],\n",
195
+ " 'passages': {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0],\n",
196
+ " 'passage_text': ['City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.',\n",
197
+ " 'Place of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.',\n",
198
+ " 'For the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.',\n",
199
+ " 'Albany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.',\n",
200
+ " 'Sponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.',\n",
201
+ " 'Recent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).',\n",
202
+ " \"For population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\",\n",
203
+ " \"For population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\"],\n",
204
+ " 'url': ['http://zipcode.org/city/MN/ALBANY',\n",
205
+ " 'http://www.city-data.com/zips/56307.html',\n",
206
+ " 'https://en.wikipedia.org/wiki/Albany,_Minnesota',\n",
207
+ " 'http://ci.albany.mn.us/index.asp?SEC=A8341FEC-6B8C-47D2-926B-75A89ED4C539&Type=B_BASIC',\n",
208
+ " 'https://www.mapquest.com/us/mn/albany-282023394',\n",
209
+ " 'http://www.city-data.com/city/Albany-Minnesota.html',\n",
210
+ " 'http://www.city-data.com/zips/56307.html',\n",
211
+ " 'http://www.city-data.com/city/Albany-Minnesota.html']},\n",
212
+ " 'query': 'albany mn population',\n",
213
+ " 'query_id': 15177,\n",
214
+ " 'query_type': 'NUMERIC',\n",
215
+ " 'wellFormedAnswers': ['The population of Albany, Minnesota is 2,662. ']}"
216
+ ]
217
+ },
218
+ "execution_count": 15,
219
+ "metadata": {},
220
+ "output_type": "execute_result"
221
+ }
222
+ ],
223
+ "source": [
224
+ "test = dataset[\"test\"]\n",
225
+ "test[0]"
226
+ ]
227
+ },
228
+ {
229
+ "cell_type": "code",
230
+ "execution_count": 24,
231
+ "id": "104dfbea",
232
+ "metadata": {},
233
+ "outputs": [
234
+ {
235
+ "data": {
236
+ "text/plain": [
237
+ "12467"
238
+ ]
239
+ },
240
+ "execution_count": 24,
241
+ "metadata": {},
242
+ "output_type": "execute_result"
243
+ }
244
+ ],
245
+ "source": [
246
+ "test.num_rows"
247
+ ]
248
+ },
249
+ {
250
+ "cell_type": "code",
251
+ "execution_count": 35,
252
+ "id": "01b3a886",
253
+ "metadata": {},
254
+ "outputs": [
255
+ {
256
+ "data": {
257
+ "text/plain": [
258
+ "({'NUMERIC': 3685,\n",
259
+ " 'DESCRIPTION': 5487,\n",
260
+ " 'ENTITY': 1077,\n",
261
+ " 'PERSON': 868,\n",
262
+ " 'LOCATION': 1350},\n",
263
+ " {'NUMERIC': 179,\n",
264
+ " 'DESCRIPTION': 218,\n",
265
+ " 'ENTITY': 2403,\n",
266
+ " 'LOCATION': 2559,\n",
267
+ " 'PERSON': 3966})"
268
+ ]
269
+ },
270
+ "execution_count": 35,
271
+ "metadata": {},
272
+ "output_type": "execute_result"
273
+ }
274
+ ],
275
+ "source": [
276
+ "counts = {}\n",
277
+ "indices = {}\n",
278
+ "size = 100\n",
279
+ "for i in range(test.num_rows):\n",
280
+ " row = test[i]\n",
281
+ " query_type = row[\"query_type\"]\n",
282
+ " if query_type in counts:\n",
283
+ " counts[query_type] += 1\n",
284
+ " else:\n",
285
+ " counts[query_type] = 1\n",
286
+ " if counts[query_type] == size:\n",
287
+ " indices[query_type] = i\n",
288
+ "counts, indices"
289
+ ]
290
+ },
291
+ {
292
+ "cell_type": "code",
293
+ "execution_count": 39,
294
+ "id": "967bc1cd",
295
+ "metadata": {},
296
+ "outputs": [],
297
+ "source": [
298
+ "# create new dataset exluding those idx\n",
299
+ "baseline = test.select(\n",
300
+ " (i for i in range(len(test)) if i <= indices[test[i][\"query_type\"]])\n",
301
+ ")"
302
+ ]
303
+ },
304
+ {
305
+ "cell_type": "code",
306
+ "execution_count": 40,
307
+ "id": "9a5fcad5",
308
+ "metadata": {},
309
+ "outputs": [
310
+ {
311
+ "data": {
312
+ "text/plain": [
313
+ "Dataset({\n",
314
+ " features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
315
+ " num_rows: 500\n",
316
+ "})"
317
+ ]
318
+ },
319
+ "execution_count": 40,
320
+ "metadata": {},
321
+ "output_type": "execute_result"
322
+ }
323
+ ],
324
+ "source": [
325
+ "baseline"
326
+ ]
327
+ },
328
+ {
329
+ "cell_type": "code",
330
+ "execution_count": 41,
331
+ "id": "0524a973",
332
+ "metadata": {},
333
+ "outputs": [
334
+ {
335
+ "data": {
336
+ "text/html": [
337
+ "<div>\n",
338
+ "<style scoped>\n",
339
+ " .dataframe tbody tr th:only-of-type {\n",
340
+ " vertical-align: middle;\n",
341
+ " }\n",
342
+ "\n",
343
+ " .dataframe tbody tr th {\n",
344
+ " vertical-align: top;\n",
345
+ " }\n",
346
+ "\n",
347
+ " .dataframe thead th {\n",
348
+ " text-align: right;\n",
349
+ " }\n",
350
+ "</style>\n",
351
+ "<table border=\"1\" class=\"dataframe\">\n",
352
+ " <thead>\n",
353
+ " <tr style=\"text-align: right;\">\n",
354
+ " <th></th>\n",
355
+ " <th>answers</th>\n",
356
+ " <th>passages</th>\n",
357
+ " <th>query</th>\n",
358
+ " <th>query_id</th>\n",
359
+ " <th>query_type</th>\n",
360
+ " <th>wellFormedAnswers</th>\n",
361
+ " </tr>\n",
362
+ " </thead>\n",
363
+ " <tbody>\n",
364
+ " <tr>\n",
365
+ " <th>0</th>\n",
366
+ " <td>[2,662]</td>\n",
367
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
368
+ " <td>albany mn population</td>\n",
369
+ " <td>15177</td>\n",
370
+ " <td>NUMERIC</td>\n",
371
+ " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
372
+ " </tr>\n",
373
+ " <tr>\n",
374
+ " <th>1</th>\n",
375
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
376
+ " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
377
+ " <td>current weather in volcano, ca</td>\n",
378
+ " <td>114414</td>\n",
379
+ " <td>DESCRIPTION</td>\n",
380
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
381
+ " </tr>\n",
382
+ " <tr>\n",
383
+ " <th>2</th>\n",
384
+ " <td>[Hippocrates]</td>\n",
385
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
386
+ " <td>____________________ is considered the father ...</td>\n",
387
+ " <td>9083</td>\n",
388
+ " <td>DESCRIPTION</td>\n",
389
+ " <td>[Hippocrates is considered the father of moder...</td>\n",
390
+ " </tr>\n",
391
+ " <tr>\n",
392
+ " <th>3</th>\n",
393
+ " <td>[120 days from the date of the Note.]</td>\n",
394
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
395
+ " <td>how many days is an appraisal good for a fanni...</td>\n",
396
+ " <td>281439</td>\n",
397
+ " <td>NUMERIC</td>\n",
398
+ " <td>[An appraisal is good for 120 days from the da...</td>\n",
399
+ " </tr>\n",
400
+ " <tr>\n",
401
+ " <th>4</th>\n",
402
+ " <td>[From $26,000 to $39,000 a year]</td>\n",
403
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
404
+ " <td>average pharmacy tech salary</td>\n",
405
+ " <td>40287</td>\n",
406
+ " <td>NUMERIC</td>\n",
407
+ " <td>[The average salary for a pharmacy technician ...</td>\n",
408
+ " </tr>\n",
409
+ " <tr>\n",
410
+ " <th>...</th>\n",
411
+ " <td>...</td>\n",
412
+ " <td>...</td>\n",
413
+ " <td>...</td>\n",
414
+ " <td>...</td>\n",
415
+ " <td>...</td>\n",
416
+ " <td>...</td>\n",
417
+ " </tr>\n",
418
+ " <tr>\n",
419
+ " <th>495</th>\n",
420
+ " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
421
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
422
+ " <td>the pool shower company</td>\n",
423
+ " <td>518269</td>\n",
424
+ " <td>PERSON</td>\n",
425
+ " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
426
+ " </tr>\n",
427
+ " <tr>\n",
428
+ " <th>496</th>\n",
429
+ " <td>[Hanson]</td>\n",
430
+ " <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
431
+ " <td>longest tenured american football players</td>\n",
432
+ " <td>442806</td>\n",
433
+ " <td>PERSON</td>\n",
434
+ " <td>[Hanson is the longest tenured American footba...</td>\n",
435
+ " </tr>\n",
436
+ " <tr>\n",
437
+ " <th>497</th>\n",
438
+ " <td>[Mount Able Baptist Church is located at the a...</td>\n",
439
+ " <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
440
+ " <td>mt. view baptist in pendleton sc</td>\n",
441
+ " <td>460250</td>\n",
442
+ " <td>PERSON</td>\n",
443
+ " <td>[Mount Able Baptist Church is located at the a...</td>\n",
444
+ " </tr>\n",
445
+ " <tr>\n",
446
+ " <th>498</th>\n",
447
+ " <td>[Honeysuckle Weeks]</td>\n",
448
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
449
+ " <td>what actress disappeared for a while</td>\n",
450
+ " <td>549739</td>\n",
451
+ " <td>PERSON</td>\n",
452
+ " <td>[The actress disappeared for a while Honeysuck...</td>\n",
453
+ " </tr>\n",
454
+ " <tr>\n",
455
+ " <th>499</th>\n",
456
+ " <td>[African-Nguni]</td>\n",
457
+ " <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
458
+ " <td>what ethnicity is the surname sabol</td>\n",
459
+ " <td>658265</td>\n",
460
+ " <td>PERSON</td>\n",
461
+ " <td>[The ethnicity of the surname Sabol is African...</td>\n",
462
+ " </tr>\n",
463
+ " </tbody>\n",
464
+ "</table>\n",
465
+ "<p>500 rows × 6 columns</p>\n",
466
+ "</div>"
467
+ ],
468
+ "text/plain": [
469
+ " answers \\\n",
470
+ "0 [2,662] \n",
471
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
472
+ "2 [Hippocrates] \n",
473
+ "3 [120 days from the date of the Note.] \n",
474
+ "4 [From $26,000 to $39,000 a year] \n",
475
+ ".. ... \n",
476
+ "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
477
+ "496 [Hanson] \n",
478
+ "497 [Mount Able Baptist Church is located at the a... \n",
479
+ "498 [Honeysuckle Weeks] \n",
480
+ "499 [African-Nguni] \n",
481
+ "\n",
482
+ " passages \\\n",
483
+ "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
484
+ "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
485
+ "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
486
+ "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
487
+ "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
488
+ ".. ... \n",
489
+ "495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
490
+ "496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
491
+ "497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
492
+ "498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
493
+ "499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
494
+ "\n",
495
+ " query query_id query_type \\\n",
496
+ "0 albany mn population 15177 NUMERIC \n",
497
+ "1 current weather in volcano, ca 114414 DESCRIPTION \n",
498
+ "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
499
+ "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
500
+ "4 average pharmacy tech salary 40287 NUMERIC \n",
501
+ ".. ... ... ... \n",
502
+ "495 the pool shower company 518269 PERSON \n",
503
+ "496 longest tenured american football players 442806 PERSON \n",
504
+ "497 mt. view baptist in pendleton sc 460250 PERSON \n",
505
+ "498 what actress disappeared for a while 549739 PERSON \n",
506
+ "499 what ethnicity is the surname sabol 658265 PERSON \n",
507
+ "\n",
508
+ " wellFormedAnswers \n",
509
+ "0 [The population of Albany, Minnesota is 2,662. ] \n",
510
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
511
+ "2 [Hippocrates is considered the father of moder... \n",
512
+ "3 [An appraisal is good for 120 days from the da... \n",
513
+ "4 [The average salary for a pharmacy technician ... \n",
514
+ ".. ... \n",
515
+ "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
516
+ "496 [Hanson is the longest tenured American footba... \n",
517
+ "497 [Mount Able Baptist Church is located at the a... \n",
518
+ "498 [The actress disappeared for a while Honeysuck... \n",
519
+ "499 [The ethnicity of the surname Sabol is African... \n",
520
+ "\n",
521
+ "[500 rows x 6 columns]"
522
+ ]
523
+ },
524
+ "execution_count": 41,
525
+ "metadata": {},
526
+ "output_type": "execute_result"
527
+ }
528
+ ],
529
+ "source": [
530
+ "baseline.to_pandas()"
531
+ ]
532
+ },
533
+ {
534
+ "cell_type": "code",
535
+ "execution_count": 42,
536
+ "id": "57a195e0",
537
+ "metadata": {},
538
+ "outputs": [
539
+ {
540
+ "data": {
541
+ "application/vnd.jupyter.widget-view+json": {
542
+ "model_id": "66abd394cb054cf1b7459e92d4763d02",
543
+ "version_major": 2,
544
+ "version_minor": 0
545
+ },
546
+ "text/plain": [
547
+ "Saving the dataset (0/1 shards): 0%| | 0/500 [00:00<?, ? examples/s]"
548
+ ]
549
+ },
550
+ "metadata": {},
551
+ "output_type": "display_data"
552
+ }
553
+ ],
554
+ "source": [
555
+ "baseline.save_to_disk(\"../data/datasets/ms_macro/\")"
556
+ ]
557
+ },
558
+ {
559
+ "cell_type": "code",
560
+ "execution_count": 44,
561
+ "id": "b72bf3f9",
562
+ "metadata": {},
563
+ "outputs": [
564
+ {
565
+ "data": {
566
+ "text/plain": [
567
+ "Dataset({\n",
568
+ " features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
569
+ " num_rows: 500\n",
570
+ "})"
571
+ ]
572
+ },
573
+ "execution_count": 44,
574
+ "metadata": {},
575
+ "output_type": "execute_result"
576
+ }
577
+ ],
578
+ "source": [
579
+ "from datasets import load_from_disk\n",
580
+ "\n",
581
+ "new_ds = load_from_disk(\"../data/datasets/ms_macro/\")\n",
582
+ "new_ds"
583
+ ]
584
+ },
585
+ {
586
+ "cell_type": "code",
587
+ "execution_count": 45,
588
+ "id": "051bd771",
589
+ "metadata": {},
590
+ "outputs": [
591
+ {
592
+ "data": {
593
+ "text/plain": [
594
+ "({'NUMERIC': 100,\n",
595
+ " 'DESCRIPTION': 100,\n",
596
+ " 'ENTITY': 100,\n",
597
+ " 'PERSON': 100,\n",
598
+ " 'LOCATION': 100},\n",
599
+ " {'NUMERIC': 179,\n",
600
+ " 'DESCRIPTION': 215,\n",
601
+ " 'ENTITY': 443,\n",
602
+ " 'LOCATION': 461,\n",
603
+ " 'PERSON': 499})"
604
+ ]
605
+ },
606
+ "execution_count": 45,
607
+ "metadata": {},
608
+ "output_type": "execute_result"
609
+ }
610
+ ],
611
+ "source": [
612
+ "counts = {}\n",
613
+ "indices = {}\n",
614
+ "size = 100\n",
615
+ "for i in range(new_ds.num_rows):\n",
616
+ " row = new_ds[i]\n",
617
+ " query_type = row[\"query_type\"]\n",
618
+ " if query_type in counts:\n",
619
+ " counts[query_type] += 1\n",
620
+ " else:\n",
621
+ " counts[query_type] = 1\n",
622
+ " if counts[query_type] == size:\n",
623
+ " indices[query_type] = i\n",
624
+ "counts, indices"
625
+ ]
626
+ },
627
+ {
628
+ "cell_type": "code",
629
+ "execution_count": 46,
630
+ "id": "db48dcc4",
631
+ "metadata": {},
632
+ "outputs": [
633
+ {
634
+ "data": {
635
+ "text/html": [
636
+ "<div>\n",
637
+ "<style scoped>\n",
638
+ " .dataframe tbody tr th:only-of-type {\n",
639
+ " vertical-align: middle;\n",
640
+ " }\n",
641
+ "\n",
642
+ " .dataframe tbody tr th {\n",
643
+ " vertical-align: top;\n",
644
+ " }\n",
645
+ "\n",
646
+ " .dataframe thead th {\n",
647
+ " text-align: right;\n",
648
+ " }\n",
649
+ "</style>\n",
650
+ "<table border=\"1\" class=\"dataframe\">\n",
651
+ " <thead>\n",
652
+ " <tr style=\"text-align: right;\">\n",
653
+ " <th></th>\n",
654
+ " <th>answers</th>\n",
655
+ " <th>passages</th>\n",
656
+ " <th>query</th>\n",
657
+ " <th>query_id</th>\n",
658
+ " <th>query_type</th>\n",
659
+ " <th>wellFormedAnswers</th>\n",
660
+ " </tr>\n",
661
+ " </thead>\n",
662
+ " <tbody>\n",
663
+ " <tr>\n",
664
+ " <th>0</th>\n",
665
+ " <td>[2,662]</td>\n",
666
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
667
+ " <td>albany mn population</td>\n",
668
+ " <td>15177</td>\n",
669
+ " <td>NUMERIC</td>\n",
670
+ " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
671
+ " </tr>\n",
672
+ " <tr>\n",
673
+ " <th>1</th>\n",
674
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
675
+ " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
676
+ " <td>current weather in volcano, ca</td>\n",
677
+ " <td>114414</td>\n",
678
+ " <td>DESCRIPTION</td>\n",
679
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
680
+ " </tr>\n",
681
+ " <tr>\n",
682
+ " <th>2</th>\n",
683
+ " <td>[Hippocrates]</td>\n",
684
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
685
+ " <td>____________________ is considered the father ...</td>\n",
686
+ " <td>9083</td>\n",
687
+ " <td>DESCRIPTION</td>\n",
688
+ " <td>[Hippocrates is considered the father of moder...</td>\n",
689
+ " </tr>\n",
690
+ " <tr>\n",
691
+ " <th>3</th>\n",
692
+ " <td>[120 days from the date of the Note.]</td>\n",
693
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
694
+ " <td>how many days is an appraisal good for a fanni...</td>\n",
695
+ " <td>281439</td>\n",
696
+ " <td>NUMERIC</td>\n",
697
+ " <td>[An appraisal is good for 120 days from the da...</td>\n",
698
+ " </tr>\n",
699
+ " <tr>\n",
700
+ " <th>4</th>\n",
701
+ " <td>[From $26,000 to $39,000 a year]</td>\n",
702
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
703
+ " <td>average pharmacy tech salary</td>\n",
704
+ " <td>40287</td>\n",
705
+ " <td>NUMERIC</td>\n",
706
+ " <td>[The average salary for a pharmacy technician ...</td>\n",
707
+ " </tr>\n",
708
+ " <tr>\n",
709
+ " <th>...</th>\n",
710
+ " <td>...</td>\n",
711
+ " <td>...</td>\n",
712
+ " <td>...</td>\n",
713
+ " <td>...</td>\n",
714
+ " <td>...</td>\n",
715
+ " <td>...</td>\n",
716
+ " </tr>\n",
717
+ " <tr>\n",
718
+ " <th>495</th>\n",
719
+ " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
720
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
721
+ " <td>the pool shower company</td>\n",
722
+ " <td>518269</td>\n",
723
+ " <td>PERSON</td>\n",
724
+ " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
725
+ " </tr>\n",
726
+ " <tr>\n",
727
+ " <th>496</th>\n",
728
+ " <td>[Hanson]</td>\n",
729
+ " <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
730
+ " <td>longest tenured american football players</td>\n",
731
+ " <td>442806</td>\n",
732
+ " <td>PERSON</td>\n",
733
+ " <td>[Hanson is the longest tenured American footba...</td>\n",
734
+ " </tr>\n",
735
+ " <tr>\n",
736
+ " <th>497</th>\n",
737
+ " <td>[Mount Able Baptist Church is located at the a...</td>\n",
738
+ " <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
739
+ " <td>mt. view baptist in pendleton sc</td>\n",
740
+ " <td>460250</td>\n",
741
+ " <td>PERSON</td>\n",
742
+ " <td>[Mount Able Baptist Church is located at the a...</td>\n",
743
+ " </tr>\n",
744
+ " <tr>\n",
745
+ " <th>498</th>\n",
746
+ " <td>[Honeysuckle Weeks]</td>\n",
747
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
748
+ " <td>what actress disappeared for a while</td>\n",
749
+ " <td>549739</td>\n",
750
+ " <td>PERSON</td>\n",
751
+ " <td>[The actress disappeared for a while Honeysuck...</td>\n",
752
+ " </tr>\n",
753
+ " <tr>\n",
754
+ " <th>499</th>\n",
755
+ " <td>[African-Nguni]</td>\n",
756
+ " <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
757
+ " <td>what ethnicity is the surname sabol</td>\n",
758
+ " <td>658265</td>\n",
759
+ " <td>PERSON</td>\n",
760
+ " <td>[The ethnicity of the surname Sabol is African...</td>\n",
761
+ " </tr>\n",
762
+ " </tbody>\n",
763
+ "</table>\n",
764
+ "<p>500 rows × 6 columns</p>\n",
765
+ "</div>"
766
+ ],
767
+ "text/plain": [
768
+ " answers \\\n",
769
+ "0 [2,662] \n",
770
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
771
+ "2 [Hippocrates] \n",
772
+ "3 [120 days from the date of the Note.] \n",
773
+ "4 [From $26,000 to $39,000 a year] \n",
774
+ ".. ... \n",
775
+ "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
776
+ "496 [Hanson] \n",
777
+ "497 [Mount Able Baptist Church is located at the a... \n",
778
+ "498 [Honeysuckle Weeks] \n",
779
+ "499 [African-Nguni] \n",
780
+ "\n",
781
+ " passages \\\n",
782
+ "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
783
+ "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
784
+ "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
785
+ "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
786
+ "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
787
+ ".. ... \n",
788
+ "495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
789
+ "496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
790
+ "497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
791
+ "498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
792
+ "499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
793
+ "\n",
794
+ " query query_id query_type \\\n",
795
+ "0 albany mn population 15177 NUMERIC \n",
796
+ "1 current weather in volcano, ca 114414 DESCRIPTION \n",
797
+ "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
798
+ "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
799
+ "4 average pharmacy tech salary 40287 NUMERIC \n",
800
+ ".. ... ... ... \n",
801
+ "495 the pool shower company 518269 PERSON \n",
802
+ "496 longest tenured american football players 442806 PERSON \n",
803
+ "497 mt. view baptist in pendleton sc 460250 PERSON \n",
804
+ "498 what actress disappeared for a while 549739 PERSON \n",
805
+ "499 what ethnicity is the surname sabol 658265 PERSON \n",
806
+ "\n",
807
+ " wellFormedAnswers \n",
808
+ "0 [The population of Albany, Minnesota is 2,662. ] \n",
809
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
810
+ "2 [Hippocrates is considered the father of moder... \n",
811
+ "3 [An appraisal is good for 120 days from the da... \n",
812
+ "4 [The average salary for a pharmacy technician ... \n",
813
+ ".. ... \n",
814
+ "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
815
+ "496 [Hanson is the longest tenured American footba... \n",
816
+ "497 [Mount Able Baptist Church is located at the a... \n",
817
+ "498 [The actress disappeared for a while Honeysuck... \n",
818
+ "499 [The ethnicity of the surname Sabol is African... \n",
819
+ "\n",
820
+ "[500 rows x 6 columns]"
821
+ ]
822
+ },
823
+ "execution_count": 46,
824
+ "metadata": {},
825
+ "output_type": "execute_result"
826
+ }
827
+ ],
828
+ "source": [
829
+ "new_ds.to_pandas()"
830
+ ]
831
+ },
832
+ {
833
+ "cell_type": "code",
834
+ "execution_count": 47,
835
+ "id": "7ed0c22d",
836
+ "metadata": {},
837
+ "outputs": [],
838
+ "source": [
839
+ "\"\"\"\n",
840
+ "Official evaluation script for QAConv, modified from SQuAD 2.0.\n",
841
+ "\n",
842
+ " * Copyright (c) 2021, salesforce.com, inc.\n",
843
+ " * All rights reserved.\n",
844
+ " * SPDX-License-Identifier: BSD-3-Clause\n",
845
+ " * For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause\n",
846
+ "\n",
847
+ "\"\"\"\n",
848
+ "\n",
849
+ "import collections\n",
850
+ "import re\n",
851
+ "import string\n",
852
+ "\n",
853
+ "\n",
854
+ "def normalize_answer(s):\n",
855
+ " \"\"\"Lower text and remove punctuation, articles and extra whitespace.\"\"\"\n",
856
+ "\n",
857
+ " def remove_articles(text):\n",
858
+ " regex = re.compile(r\"\\b(a|an|the)\\b\", re.UNICODE)\n",
859
+ " return re.sub(regex, \" \", text)\n",
860
+ "\n",
861
+ " def white_space_fix(text):\n",
862
+ " return \" \".join(text.split())\n",
863
+ "\n",
864
+ " def remove_punc(text):\n",
865
+ " exclude = set(string.punctuation)\n",
866
+ " return \"\".join(ch for ch in text if ch not in exclude)\n",
867
+ "\n",
868
+ " def lower(text):\n",
869
+ " return text.lower()\n",
870
+ "\n",
871
+ " return white_space_fix(remove_articles(remove_punc(lower(s))))\n",
872
+ "\n",
873
+ "\n",
874
+ "def get_tokens(s):\n",
875
+ " if not s:\n",
876
+ " return []\n",
877
+ " return normalize_answer(s).split()\n",
878
+ "\n",
879
+ "\n",
880
+ "def compute_exact(a_gold, a_pred):\n",
881
+ " return int(normalize_answer(a_gold) == normalize_answer(a_pred))\n",
882
+ "\n",
883
+ "\n",
884
+ "def compute_f1(a_gold, a_pred):\n",
885
+ " gold_toks = get_tokens(a_gold)\n",
886
+ " pred_toks = get_tokens(a_pred)\n",
887
+ " common = collections.Counter(gold_toks) & collections.Counter(pred_toks)\n",
888
+ " num_same = sum(common.values())\n",
889
+ " if len(gold_toks) == 0 or len(pred_toks) == 0:\n",
890
+ " # If either is no-answer, then F1 is 1 if they agree, 0 otherwise\n",
891
+ " return int(gold_toks == pred_toks)\n",
892
+ " if num_same == 0:\n",
893
+ " return 0\n",
894
+ " precision = 1.0 * num_same / len(pred_toks)\n",
895
+ " recall = 1.0 * num_same / len(gold_toks)\n",
896
+ " f1 = (2 * precision * recall) / (precision + recall)\n",
897
+ " return f1"
898
+ ]
899
+ },
900
+ {
901
+ "cell_type": "code",
902
+ "execution_count": 49,
903
+ "id": "d9ff4756",
904
+ "metadata": {},
905
+ "outputs": [
906
+ {
907
+ "data": {
908
+ "application/vnd.jupyter.widget-view+json": {
909
+ "model_id": "d8a8d425f60a467eb56f6a13a50ed94b",
910
+ "version_major": 2,
911
+ "version_minor": 0
912
+ },
913
+ "text/plain": [
914
+ "Map: 0%| | 0/500 [00:00<?, ? examples/s]"
915
+ ]
916
+ },
917
+ "metadata": {},
918
+ "output_type": "display_data"
919
+ },
920
+ {
921
+ "data": {
922
+ "text/plain": [
923
+ "Dataset({\n",
924
+ " features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers', 'EM', 'F1'],\n",
925
+ " num_rows: 500\n",
926
+ "})"
927
+ ]
928
+ },
929
+ "execution_count": 49,
930
+ "metadata": {},
931
+ "output_type": "execute_result"
932
+ }
933
+ ],
934
+ "source": [
935
+ "result_all = new_ds.map(\n",
936
+ " lambda record, idx: {\n",
937
+ " \"EM\": compute_exact(record[\"wellFormedAnswers\"][0], record[\"answers\"][0]),\n",
938
+ " \"F1\": compute_f1(record[\"wellFormedAnswers\"][0], record[\"answers\"][0]),\n",
939
+ " },\n",
940
+ " batched=False,\n",
941
+ " with_indices=True,\n",
942
+ ")\n",
943
+ "result_all"
944
+ ]
945
+ },
946
+ {
947
+ "cell_type": "code",
948
+ "execution_count": 50,
949
+ "id": "31402fb2",
950
+ "metadata": {},
951
+ "outputs": [
952
+ {
953
+ "data": {
954
+ "text/html": [
955
+ "<div>\n",
956
+ "<style scoped>\n",
957
+ " .dataframe tbody tr th:only-of-type {\n",
958
+ " vertical-align: middle;\n",
959
+ " }\n",
960
+ "\n",
961
+ " .dataframe tbody tr th {\n",
962
+ " vertical-align: top;\n",
963
+ " }\n",
964
+ "\n",
965
+ " .dataframe thead th {\n",
966
+ " text-align: right;\n",
967
+ " }\n",
968
+ "</style>\n",
969
+ "<table border=\"1\" class=\"dataframe\">\n",
970
+ " <thead>\n",
971
+ " <tr style=\"text-align: right;\">\n",
972
+ " <th></th>\n",
973
+ " <th>answers</th>\n",
974
+ " <th>passages</th>\n",
975
+ " <th>query</th>\n",
976
+ " <th>query_id</th>\n",
977
+ " <th>query_type</th>\n",
978
+ " <th>wellFormedAnswers</th>\n",
979
+ " <th>EM</th>\n",
980
+ " <th>F1</th>\n",
981
+ " </tr>\n",
982
+ " </thead>\n",
983
+ " <tbody>\n",
984
+ " <tr>\n",
985
+ " <th>0</th>\n",
986
+ " <td>[2,662]</td>\n",
987
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
988
+ " <td>albany mn population</td>\n",
989
+ " <td>15177</td>\n",
990
+ " <td>NUMERIC</td>\n",
991
+ " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
992
+ " <td>0</td>\n",
993
+ " <td>0.285714</td>\n",
994
+ " </tr>\n",
995
+ " <tr>\n",
996
+ " <th>1</th>\n",
997
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
998
+ " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
999
+ " <td>current weather in volcano, ca</td>\n",
1000
+ " <td>114414</td>\n",
1001
+ " <td>DESCRIPTION</td>\n",
1002
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
1003
+ " <td>1</td>\n",
1004
+ " <td>1.000000</td>\n",
1005
+ " </tr>\n",
1006
+ " <tr>\n",
1007
+ " <th>2</th>\n",
1008
+ " <td>[Hippocrates]</td>\n",
1009
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
1010
+ " <td>____________________ is considered the father ...</td>\n",
1011
+ " <td>9083</td>\n",
1012
+ " <td>DESCRIPTION</td>\n",
1013
+ " <td>[Hippocrates is considered the father of moder...</td>\n",
1014
+ " <td>0</td>\n",
1015
+ " <td>0.250000</td>\n",
1016
+ " </tr>\n",
1017
+ " <tr>\n",
1018
+ " <th>3</th>\n",
1019
+ " <td>[120 days from the date of the Note.]</td>\n",
1020
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
1021
+ " <td>how many days is an appraisal good for a fanni...</td>\n",
1022
+ " <td>281439</td>\n",
1023
+ " <td>NUMERIC</td>\n",
1024
+ " <td>[An appraisal is good for 120 days from the da...</td>\n",
1025
+ " <td>0</td>\n",
1026
+ " <td>0.631579</td>\n",
1027
+ " </tr>\n",
1028
+ " <tr>\n",
1029
+ " <th>4</th>\n",
1030
+ " <td>[From $26,000 to $39,000 a year]</td>\n",
1031
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
1032
+ " <td>average pharmacy tech salary</td>\n",
1033
+ " <td>40287</td>\n",
1034
+ " <td>NUMERIC</td>\n",
1035
+ " <td>[The average salary for a pharmacy technician ...</td>\n",
1036
+ " <td>0</td>\n",
1037
+ " <td>0.500000</td>\n",
1038
+ " </tr>\n",
1039
+ " <tr>\n",
1040
+ " <th>...</th>\n",
1041
+ " <td>...</td>\n",
1042
+ " <td>...</td>\n",
1043
+ " <td>...</td>\n",
1044
+ " <td>...</td>\n",
1045
+ " <td>...</td>\n",
1046
+ " <td>...</td>\n",
1047
+ " <td>...</td>\n",
1048
+ " <td>...</td>\n",
1049
+ " </tr>\n",
1050
+ " <tr>\n",
1051
+ " <th>495</th>\n",
1052
+ " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
1053
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
1054
+ " <td>the pool shower company</td>\n",
1055
+ " <td>518269</td>\n",
1056
+ " <td>PERSON</td>\n",
1057
+ " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
1058
+ " <td>1</td>\n",
1059
+ " <td>1.000000</td>\n",
1060
+ " </tr>\n",
1061
+ " <tr>\n",
1062
+ " <th>496</th>\n",
1063
+ " <td>[Hanson]</td>\n",
1064
+ " <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
1065
+ " <td>longest tenured american football players</td>\n",
1066
+ " <td>442806</td>\n",
1067
+ " <td>PERSON</td>\n",
1068
+ " <td>[Hanson is the longest tenured American footba...</td>\n",
1069
+ " <td>0</td>\n",
1070
+ " <td>0.250000</td>\n",
1071
+ " </tr>\n",
1072
+ " <tr>\n",
1073
+ " <th>497</th>\n",
1074
+ " <td>[Mount Able Baptist Church is located at the a...</td>\n",
1075
+ " <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
1076
+ " <td>mt. view baptist in pendleton sc</td>\n",
1077
+ " <td>460250</td>\n",
1078
+ " <td>PERSON</td>\n",
1079
+ " <td>[Mount Able Baptist Church is located at the a...</td>\n",
1080
+ " <td>1</td>\n",
1081
+ " <td>1.000000</td>\n",
1082
+ " </tr>\n",
1083
+ " <tr>\n",
1084
+ " <th>498</th>\n",
1085
+ " <td>[Honeysuckle Weeks]</td>\n",
1086
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
1087
+ " <td>what actress disappeared for a while</td>\n",
1088
+ " <td>549739</td>\n",
1089
+ " <td>PERSON</td>\n",
1090
+ " <td>[The actress disappeared for a while Honeysuck...</td>\n",
1091
+ " <td>0</td>\n",
1092
+ " <td>0.500000</td>\n",
1093
+ " </tr>\n",
1094
+ " <tr>\n",
1095
+ " <th>499</th>\n",
1096
+ " <td>[African-Nguni]</td>\n",
1097
+ " <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
1098
+ " <td>what ethnicity is the surname sabol</td>\n",
1099
+ " <td>658265</td>\n",
1100
+ " <td>PERSON</td>\n",
1101
+ " <td>[The ethnicity of the surname Sabol is African...</td>\n",
1102
+ " <td>0</td>\n",
1103
+ " <td>0.285714</td>\n",
1104
+ " </tr>\n",
1105
+ " </tbody>\n",
1106
+ "</table>\n",
1107
+ "<p>500 rows × 8 columns</p>\n",
1108
+ "</div>"
1109
+ ],
1110
+ "text/plain": [
1111
+ " answers \\\n",
1112
+ "0 [2,662] \n",
1113
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
1114
+ "2 [Hippocrates] \n",
1115
+ "3 [120 days from the date of the Note.] \n",
1116
+ "4 [From $26,000 to $39,000 a year] \n",
1117
+ ".. ... \n",
1118
+ "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
1119
+ "496 [Hanson] \n",
1120
+ "497 [Mount Able Baptist Church is located at the a... \n",
1121
+ "498 [Honeysuckle Weeks] \n",
1122
+ "499 [African-Nguni] \n",
1123
+ "\n",
1124
+ " passages \\\n",
1125
+ "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
1126
+ "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
1127
+ "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
1128
+ "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
1129
+ "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
1130
+ ".. ... \n",
1131
+ "495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
1132
+ "496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
1133
+ "497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
1134
+ "498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
1135
+ "499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
1136
+ "\n",
1137
+ " query query_id query_type \\\n",
1138
+ "0 albany mn population 15177 NUMERIC \n",
1139
+ "1 current weather in volcano, ca 114414 DESCRIPTION \n",
1140
+ "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
1141
+ "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
1142
+ "4 average pharmacy tech salary 40287 NUMERIC \n",
1143
+ ".. ... ... ... \n",
1144
+ "495 the pool shower company 518269 PERSON \n",
1145
+ "496 longest tenured american football players 442806 PERSON \n",
1146
+ "497 mt. view baptist in pendleton sc 460250 PERSON \n",
1147
+ "498 what actress disappeared for a while 549739 PERSON \n",
1148
+ "499 what ethnicity is the surname sabol 658265 PERSON \n",
1149
+ "\n",
1150
+ " wellFormedAnswers EM F1 \n",
1151
+ "0 [The population of Albany, Minnesota is 2,662. ] 0 0.285714 \n",
1152
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... 1 1.000000 \n",
1153
+ "2 [Hippocrates is considered the father of moder... 0 0.250000 \n",
1154
+ "3 [An appraisal is good for 120 days from the da... 0 0.631579 \n",
1155
+ "4 [The average salary for a pharmacy technician ... 0 0.500000 \n",
1156
+ ".. ... .. ... \n",
1157
+ "495 [The Pool Shower, Inc. is a Georgia Domestic P... 1 1.000000 \n",
1158
+ "496 [Hanson is the longest tenured American footba... 0 0.250000 \n",
1159
+ "497 [Mount Able Baptist Church is located at the a... 1 1.000000 \n",
1160
+ "498 [The actress disappeared for a while Honeysuck... 0 0.500000 \n",
1161
+ "499 [The ethnicity of the surname Sabol is African... 0 0.285714 \n",
1162
+ "\n",
1163
+ "[500 rows x 8 columns]"
1164
+ ]
1165
+ },
1166
+ "execution_count": 50,
1167
+ "metadata": {},
1168
+ "output_type": "execute_result"
1169
+ }
1170
+ ],
1171
+ "source": [
1172
+ "result_all.to_pandas()"
1173
+ ]
1174
+ },
1175
+ {
1176
+ "cell_type": "code",
1177
+ "execution_count": 53,
1178
+ "id": "af2d4577",
1179
+ "metadata": {},
1180
+ "outputs": [
1181
+ {
1182
+ "name": "stdout",
1183
+ "output_type": "stream",
1184
+ "text": [
1185
+ "Note: you may need to restart the kernel to use updated packages.\n"
1186
+ ]
1187
+ }
1188
+ ],
1189
+ "source": [
1190
+ "%pip install -q evaluate rouge_score"
1191
+ ]
1192
+ },
1193
+ {
1194
+ "cell_type": "code",
1195
+ "execution_count": 54,
1196
+ "id": "89494c3d",
1197
+ "metadata": {},
1198
+ "outputs": [],
1199
+ "source": [
1200
+ "import evaluate\n",
1201
+ "\n",
1202
+ "bleu = evaluate.load(\"bleu\")\n",
1203
+ "rouge = evaluate.load(\"rouge\")"
1204
+ ]
1205
+ },
1206
+ {
1207
+ "cell_type": "code",
1208
+ "execution_count": 56,
1209
+ "id": "e447aa08",
1210
+ "metadata": {},
1211
+ "outputs": [
1212
+ {
1213
+ "data": {
1214
+ "application/vnd.jupyter.widget-view+json": {
1215
+ "model_id": "c87066449ebc44d39a66b1630977f2ac",
1216
+ "version_major": 2,
1217
+ "version_minor": 0
1218
+ },
1219
+ "text/plain": [
1220
+ "Map: 0%| | 0/500 [00:00<?, ? examples/s]"
1221
+ ]
1222
+ },
1223
+ "metadata": {},
1224
+ "output_type": "display_data"
1225
+ },
1226
+ {
1227
+ "data": {
1228
+ "text/html": [
1229
+ "<div>\n",
1230
+ "<style scoped>\n",
1231
+ " .dataframe tbody tr th:only-of-type {\n",
1232
+ " vertical-align: middle;\n",
1233
+ " }\n",
1234
+ "\n",
1235
+ " .dataframe tbody tr th {\n",
1236
+ " vertical-align: top;\n",
1237
+ " }\n",
1238
+ "\n",
1239
+ " .dataframe thead th {\n",
1240
+ " text-align: right;\n",
1241
+ " }\n",
1242
+ "</style>\n",
1243
+ "<table border=\"1\" class=\"dataframe\">\n",
1244
+ " <thead>\n",
1245
+ " <tr style=\"text-align: right;\">\n",
1246
+ " <th></th>\n",
1247
+ " <th>answers</th>\n",
1248
+ " <th>passages</th>\n",
1249
+ " <th>query</th>\n",
1250
+ " <th>query_id</th>\n",
1251
+ " <th>query_type</th>\n",
1252
+ " <th>wellFormedAnswers</th>\n",
1253
+ " <th>EM</th>\n",
1254
+ " <th>F1</th>\n",
1255
+ " <th>bleu</th>\n",
1256
+ " <th>precisions</th>\n",
1257
+ " <th>brevity_penalty</th>\n",
1258
+ " <th>length_ratio</th>\n",
1259
+ " <th>translation_length</th>\n",
1260
+ " <th>reference_length</th>\n",
1261
+ " </tr>\n",
1262
+ " </thead>\n",
1263
+ " <tbody>\n",
1264
+ " <tr>\n",
1265
+ " <th>0</th>\n",
1266
+ " <td>[2,662]</td>\n",
1267
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
1268
+ " <td>albany mn population</td>\n",
1269
+ " <td>15177</td>\n",
1270
+ " <td>NUMERIC</td>\n",
1271
+ " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
1272
+ " <td>0</td>\n",
1273
+ " <td>0.285714</td>\n",
1274
+ " <td>0.000000</td>\n",
1275
+ " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1276
+ " <td>0.000335</td>\n",
1277
+ " <td>0.111111</td>\n",
1278
+ " <td>1</td>\n",
1279
+ " <td>9</td>\n",
1280
+ " </tr>\n",
1281
+ " <tr>\n",
1282
+ " <th>1</th>\n",
1283
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
1284
+ " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
1285
+ " <td>current weather in volcano, ca</td>\n",
1286
+ " <td>114414</td>\n",
1287
+ " <td>DESCRIPTION</td>\n",
1288
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
1289
+ " <td>1</td>\n",
1290
+ " <td>1.000000</td>\n",
1291
+ " <td>1.000000</td>\n",
1292
+ " <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
1293
+ " <td>1.000000</td>\n",
1294
+ " <td>1.000000</td>\n",
1295
+ " <td>14</td>\n",
1296
+ " <td>14</td>\n",
1297
+ " </tr>\n",
1298
+ " <tr>\n",
1299
+ " <th>2</th>\n",
1300
+ " <td>[Hippocrates]</td>\n",
1301
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
1302
+ " <td>____________________ is considered the father ...</td>\n",
1303
+ " <td>9083</td>\n",
1304
+ " <td>DESCRIPTION</td>\n",
1305
+ " <td>[Hippocrates is considered the father of moder...</td>\n",
1306
+ " <td>0</td>\n",
1307
+ " <td>0.250000</td>\n",
1308
+ " <td>0.000000</td>\n",
1309
+ " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1310
+ " <td>0.000335</td>\n",
1311
+ " <td>0.111111</td>\n",
1312
+ " <td>1</td>\n",
1313
+ " <td>9</td>\n",
1314
+ " </tr>\n",
1315
+ " <tr>\n",
1316
+ " <th>3</th>\n",
1317
+ " <td>[120 days from the date of the Note.]</td>\n",
1318
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
1319
+ " <td>how many days is an appraisal good for a fanni...</td>\n",
1320
+ " <td>281439</td>\n",
1321
+ " <td>NUMERIC</td>\n",
1322
+ " <td>[An appraisal is good for 120 days from the da...</td>\n",
1323
+ " <td>0</td>\n",
1324
+ " <td>0.631579</td>\n",
1325
+ " <td>0.327096</td>\n",
1326
+ " <td>[1.0, 0.875, 0.8571428571428571, 0.83333333333...</td>\n",
1327
+ " <td>0.367879</td>\n",
1328
+ " <td>0.500000</td>\n",
1329
+ " <td>9</td>\n",
1330
+ " <td>18</td>\n",
1331
+ " </tr>\n",
1332
+ " <tr>\n",
1333
+ " <th>4</th>\n",
1334
+ " <td>[From $26,000 to $39,000 a year]</td>\n",
1335
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
1336
+ " <td>average pharmacy tech salary</td>\n",
1337
+ " <td>40287</td>\n",
1338
+ " <td>NUMERIC</td>\n",
1339
+ " <td>[The average salary for a pharmacy technician ...</td>\n",
1340
+ " <td>0</td>\n",
1341
+ " <td>0.500000</td>\n",
1342
+ " <td>0.193040</td>\n",
1343
+ " <td>[0.875, 0.7142857142857143, 0.5, 0.4]</td>\n",
1344
+ " <td>0.324652</td>\n",
1345
+ " <td>0.470588</td>\n",
1346
+ " <td>8</td>\n",
1347
+ " <td>17</td>\n",
1348
+ " </tr>\n",
1349
+ " <tr>\n",
1350
+ " <th>...</th>\n",
1351
+ " <td>...</td>\n",
1352
+ " <td>...</td>\n",
1353
+ " <td>...</td>\n",
1354
+ " <td>...</td>\n",
1355
+ " <td>...</td>\n",
1356
+ " <td>...</td>\n",
1357
+ " <td>...</td>\n",
1358
+ " <td>...</td>\n",
1359
+ " <td>...</td>\n",
1360
+ " <td>...</td>\n",
1361
+ " <td>...</td>\n",
1362
+ " <td>...</td>\n",
1363
+ " <td>...</td>\n",
1364
+ " <td>...</td>\n",
1365
+ " </tr>\n",
1366
+ " <tr>\n",
1367
+ " <th>495</th>\n",
1368
+ " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
1369
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
1370
+ " <td>the pool shower company</td>\n",
1371
+ " <td>518269</td>\n",
1372
+ " <td>PERSON</td>\n",
1373
+ " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
1374
+ " <td>1</td>\n",
1375
+ " <td>1.000000</td>\n",
1376
+ " <td>1.000000</td>\n",
1377
+ " <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
1378
+ " <td>1.000000</td>\n",
1379
+ " <td>1.000000</td>\n",
1380
+ " <td>19</td>\n",
1381
+ " <td>19</td>\n",
1382
+ " </tr>\n",
1383
+ " <tr>\n",
1384
+ " <th>496</th>\n",
1385
+ " <td>[Hanson]</td>\n",
1386
+ " <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
1387
+ " <td>longest tenured american football players</td>\n",
1388
+ " <td>442806</td>\n",
1389
+ " <td>PERSON</td>\n",
1390
+ " <td>[Hanson is the longest tenured American footba...</td>\n",
1391
+ " <td>0</td>\n",
1392
+ " <td>0.250000</td>\n",
1393
+ " <td>0.000000</td>\n",
1394
+ " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1395
+ " <td>0.000335</td>\n",
1396
+ " <td>0.111111</td>\n",
1397
+ " <td>1</td>\n",
1398
+ " <td>9</td>\n",
1399
+ " </tr>\n",
1400
+ " <tr>\n",
1401
+ " <th>497</th>\n",
1402
+ " <td>[Mount Able Baptist Church is located at the a...</td>\n",
1403
+ " <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
1404
+ " <td>mt. view baptist in pendleton sc</td>\n",
1405
+ " <td>460250</td>\n",
1406
+ " <td>PERSON</td>\n",
1407
+ " <td>[Mount Able Baptist Church is located at the a...</td>\n",
1408
+ " <td>1</td>\n",
1409
+ " <td>1.000000</td>\n",
1410
+ " <td>1.000000</td>\n",
1411
+ " <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
1412
+ " <td>1.000000</td>\n",
1413
+ " <td>1.000000</td>\n",
1414
+ " <td>21</td>\n",
1415
+ " <td>21</td>\n",
1416
+ " </tr>\n",
1417
+ " <tr>\n",
1418
+ " <th>498</th>\n",
1419
+ " <td>[Honeysuckle Weeks]</td>\n",
1420
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
1421
+ " <td>what actress disappeared for a while</td>\n",
1422
+ " <td>549739</td>\n",
1423
+ " <td>PERSON</td>\n",
1424
+ " <td>[The actress disappeared for a while Honeysuck...</td>\n",
1425
+ " <td>0</td>\n",
1426
+ " <td>0.500000</td>\n",
1427
+ " <td>0.000000</td>\n",
1428
+ " <td>[1.0, 1.0, 0.0, 0.0]</td>\n",
1429
+ " <td>0.030197</td>\n",
1430
+ " <td>0.222222</td>\n",
1431
+ " <td>2</td>\n",
1432
+ " <td>9</td>\n",
1433
+ " </tr>\n",
1434
+ " <tr>\n",
1435
+ " <th>499</th>\n",
1436
+ " <td>[African-Nguni]</td>\n",
1437
+ " <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
1438
+ " <td>what ethnicity is the surname sabol</td>\n",
1439
+ " <td>658265</td>\n",
1440
+ " <td>PERSON</td>\n",
1441
+ " <td>[The ethnicity of the surname Sabol is African...</td>\n",
1442
+ " <td>0</td>\n",
1443
+ " <td>0.285714</td>\n",
1444
+ " <td>0.000000</td>\n",
1445
+ " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1446
+ " <td>0.000335</td>\n",
1447
+ " <td>0.111111</td>\n",
1448
+ " <td>1</td>\n",
1449
+ " <td>9</td>\n",
1450
+ " </tr>\n",
1451
+ " </tbody>\n",
1452
+ "</table>\n",
1453
+ "<p>500 rows × 14 columns</p>\n",
1454
+ "</div>"
1455
+ ],
1456
+ "text/plain": [
1457
+ " answers \\\n",
1458
+ "0 [2,662] \n",
1459
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
1460
+ "2 [Hippocrates] \n",
1461
+ "3 [120 days from the date of the Note.] \n",
1462
+ "4 [From $26,000 to $39,000 a year] \n",
1463
+ ".. ... \n",
1464
+ "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
1465
+ "496 [Hanson] \n",
1466
+ "497 [Mount Able Baptist Church is located at the a... \n",
1467
+ "498 [Honeysuckle Weeks] \n",
1468
+ "499 [African-Nguni] \n",
1469
+ "\n",
1470
+ " passages \\\n",
1471
+ "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
1472
+ "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
1473
+ "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
1474
+ "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
1475
+ "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
1476
+ ".. ... \n",
1477
+ "495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
1478
+ "496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
1479
+ "497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
1480
+ "498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
1481
+ "499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
1482
+ "\n",
1483
+ " query query_id query_type \\\n",
1484
+ "0 albany mn population 15177 NUMERIC \n",
1485
+ "1 current weather in volcano, ca 114414 DESCRIPTION \n",
1486
+ "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
1487
+ "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
1488
+ "4 average pharmacy tech salary 40287 NUMERIC \n",
1489
+ ".. ... ... ... \n",
1490
+ "495 the pool shower company 518269 PERSON \n",
1491
+ "496 longest tenured american football players 442806 PERSON \n",
1492
+ "497 mt. view baptist in pendleton sc 460250 PERSON \n",
1493
+ "498 what actress disappeared for a while 549739 PERSON \n",
1494
+ "499 what ethnicity is the surname sabol 658265 PERSON \n",
1495
+ "\n",
1496
+ " wellFormedAnswers EM F1 \\\n",
1497
+ "0 [The population of Albany, Minnesota is 2,662. ] 0 0.285714 \n",
1498
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... 1 1.000000 \n",
1499
+ "2 [Hippocrates is considered the father of moder... 0 0.250000 \n",
1500
+ "3 [An appraisal is good for 120 days from the da... 0 0.631579 \n",
1501
+ "4 [The average salary for a pharmacy technician ... 0 0.500000 \n",
1502
+ ".. ... .. ... \n",
1503
+ "495 [The Pool Shower, Inc. is a Georgia Domestic P... 1 1.000000 \n",
1504
+ "496 [Hanson is the longest tenured American footba... 0 0.250000 \n",
1505
+ "497 [Mount Able Baptist Church is located at the a... 1 1.000000 \n",
1506
+ "498 [The actress disappeared for a while Honeysuck... 0 0.500000 \n",
1507
+ "499 [The ethnicity of the surname Sabol is African... 0 0.285714 \n",
1508
+ "\n",
1509
+ " bleu precisions \\\n",
1510
+ "0 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1511
+ "1 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
1512
+ "2 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1513
+ "3 0.327096 [1.0, 0.875, 0.8571428571428571, 0.83333333333... \n",
1514
+ "4 0.193040 [0.875, 0.7142857142857143, 0.5, 0.4] \n",
1515
+ ".. ... ... \n",
1516
+ "495 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
1517
+ "496 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1518
+ "497 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
1519
+ "498 0.000000 [1.0, 1.0, 0.0, 0.0] \n",
1520
+ "499 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1521
+ "\n",
1522
+ " brevity_penalty length_ratio translation_length reference_length \n",
1523
+ "0 0.000335 0.111111 1 9 \n",
1524
+ "1 1.000000 1.000000 14 14 \n",
1525
+ "2 0.000335 0.111111 1 9 \n",
1526
+ "3 0.367879 0.500000 9 18 \n",
1527
+ "4 0.324652 0.470588 8 17 \n",
1528
+ ".. ... ... ... ... \n",
1529
+ "495 1.000000 1.000000 19 19 \n",
1530
+ "496 0.000335 0.111111 1 9 \n",
1531
+ "497 1.000000 1.000000 21 21 \n",
1532
+ "498 0.030197 0.222222 2 9 \n",
1533
+ "499 0.000335 0.111111 1 9 \n",
1534
+ "\n",
1535
+ "[500 rows x 14 columns]"
1536
+ ]
1537
+ },
1538
+ "execution_count": 56,
1539
+ "metadata": {},
1540
+ "output_type": "execute_result"
1541
+ }
1542
+ ],
1543
+ "source": [
1544
+ "result_all = result_all.map(\n",
1545
+ " lambda record: bleu.compute(\n",
1546
+ " predictions=[record[\"answers\"][0]], references=[record[\"wellFormedAnswers\"][0]]\n",
1547
+ " ),\n",
1548
+ " batched=False,\n",
1549
+ ")\n",
1550
+ "result_all.to_pandas()"
1551
+ ]
1552
+ },
1553
+ {
1554
+ "cell_type": "code",
1555
+ "execution_count": 57,
1556
+ "id": "fbbe31fd",
1557
+ "metadata": {},
1558
+ "outputs": [
1559
+ {
1560
+ "data": {
1561
+ "application/vnd.jupyter.widget-view+json": {
1562
+ "model_id": "88f839b74aa54fcd8c95215e22e30472",
1563
+ "version_major": 2,
1564
+ "version_minor": 0
1565
+ },
1566
+ "text/plain": [
1567
+ "Map: 0%| | 0/500 [00:00<?, ? examples/s]"
1568
+ ]
1569
+ },
1570
+ "metadata": {},
1571
+ "output_type": "display_data"
1572
+ },
1573
+ {
1574
+ "data": {
1575
+ "text/html": [
1576
+ "<div>\n",
1577
+ "<style scoped>\n",
1578
+ " .dataframe tbody tr th:only-of-type {\n",
1579
+ " vertical-align: middle;\n",
1580
+ " }\n",
1581
+ "\n",
1582
+ " .dataframe tbody tr th {\n",
1583
+ " vertical-align: top;\n",
1584
+ " }\n",
1585
+ "\n",
1586
+ " .dataframe thead th {\n",
1587
+ " text-align: right;\n",
1588
+ " }\n",
1589
+ "</style>\n",
1590
+ "<table border=\"1\" class=\"dataframe\">\n",
1591
+ " <thead>\n",
1592
+ " <tr style=\"text-align: right;\">\n",
1593
+ " <th></th>\n",
1594
+ " <th>answers</th>\n",
1595
+ " <th>passages</th>\n",
1596
+ " <th>query</th>\n",
1597
+ " <th>query_id</th>\n",
1598
+ " <th>query_type</th>\n",
1599
+ " <th>wellFormedAnswers</th>\n",
1600
+ " <th>EM</th>\n",
1601
+ " <th>F1</th>\n",
1602
+ " <th>bleu</th>\n",
1603
+ " <th>precisions</th>\n",
1604
+ " <th>brevity_penalty</th>\n",
1605
+ " <th>length_ratio</th>\n",
1606
+ " <th>translation_length</th>\n",
1607
+ " <th>reference_length</th>\n",
1608
+ " <th>rouge1</th>\n",
1609
+ " <th>rouge2</th>\n",
1610
+ " <th>rougeL</th>\n",
1611
+ " <th>rougeLsum</th>\n",
1612
+ " </tr>\n",
1613
+ " </thead>\n",
1614
+ " <tbody>\n",
1615
+ " <tr>\n",
1616
+ " <th>0</th>\n",
1617
+ " <td>[2,662]</td>\n",
1618
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
1619
+ " <td>albany mn population</td>\n",
1620
+ " <td>15177</td>\n",
1621
+ " <td>NUMERIC</td>\n",
1622
+ " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
1623
+ " <td>0</td>\n",
1624
+ " <td>0.285714</td>\n",
1625
+ " <td>0.000000</td>\n",
1626
+ " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1627
+ " <td>0.000335</td>\n",
1628
+ " <td>0.111111</td>\n",
1629
+ " <td>1</td>\n",
1630
+ " <td>9</td>\n",
1631
+ " <td>0.400000</td>\n",
1632
+ " <td>0.250000</td>\n",
1633
+ " <td>0.400000</td>\n",
1634
+ " <td>0.400000</td>\n",
1635
+ " </tr>\n",
1636
+ " <tr>\n",
1637
+ " <th>1</th>\n",
1638
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
1639
+ " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
1640
+ " <td>current weather in volcano, ca</td>\n",
1641
+ " <td>114414</td>\n",
1642
+ " <td>DESCRIPTION</td>\n",
1643
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
1644
+ " <td>1</td>\n",
1645
+ " <td>1.000000</td>\n",
1646
+ " <td>1.000000</td>\n",
1647
+ " <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
1648
+ " <td>1.000000</td>\n",
1649
+ " <td>1.000000</td>\n",
1650
+ " <td>14</td>\n",
1651
+ " <td>14</td>\n",
1652
+ " <td>1.000000</td>\n",
1653
+ " <td>1.000000</td>\n",
1654
+ " <td>1.000000</td>\n",
1655
+ " <td>1.000000</td>\n",
1656
+ " </tr>\n",
1657
+ " <tr>\n",
1658
+ " <th>2</th>\n",
1659
+ " <td>[Hippocrates]</td>\n",
1660
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
1661
+ " <td>____________________ is considered the father ...</td>\n",
1662
+ " <td>9083</td>\n",
1663
+ " <td>DESCRIPTION</td>\n",
1664
+ " <td>[Hippocrates is considered the father of moder...</td>\n",
1665
+ " <td>0</td>\n",
1666
+ " <td>0.250000</td>\n",
1667
+ " <td>0.000000</td>\n",
1668
+ " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1669
+ " <td>0.000335</td>\n",
1670
+ " <td>0.111111</td>\n",
1671
+ " <td>1</td>\n",
1672
+ " <td>9</td>\n",
1673
+ " <td>0.222222</td>\n",
1674
+ " <td>0.000000</td>\n",
1675
+ " <td>0.222222</td>\n",
1676
+ " <td>0.222222</td>\n",
1677
+ " </tr>\n",
1678
+ " <tr>\n",
1679
+ " <th>3</th>\n",
1680
+ " <td>[120 days from the date of the Note.]</td>\n",
1681
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
1682
+ " <td>how many days is an appraisal good for a fanni...</td>\n",
1683
+ " <td>281439</td>\n",
1684
+ " <td>NUMERIC</td>\n",
1685
+ " <td>[An appraisal is good for 120 days from the da...</td>\n",
1686
+ " <td>0</td>\n",
1687
+ " <td>0.631579</td>\n",
1688
+ " <td>0.327096</td>\n",
1689
+ " <td>[1.0, 0.875, 0.8571428571428571, 0.83333333333...</td>\n",
1690
+ " <td>0.367879</td>\n",
1691
+ " <td>0.500000</td>\n",
1692
+ " <td>9</td>\n",
1693
+ " <td>18</td>\n",
1694
+ " <td>0.640000</td>\n",
1695
+ " <td>0.608696</td>\n",
1696
+ " <td>0.640000</td>\n",
1697
+ " <td>0.640000</td>\n",
1698
+ " </tr>\n",
1699
+ " <tr>\n",
1700
+ " <th>4</th>\n",
1701
+ " <td>[From $26,000 to $39,000 a year]</td>\n",
1702
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
1703
+ " <td>average pharmacy tech salary</td>\n",
1704
+ " <td>40287</td>\n",
1705
+ " <td>NUMERIC</td>\n",
1706
+ " <td>[The average salary for a pharmacy technician ...</td>\n",
1707
+ " <td>0</td>\n",
1708
+ " <td>0.500000</td>\n",
1709
+ " <td>0.193040</td>\n",
1710
+ " <td>[0.875, 0.7142857142857143, 0.5, 0.4]</td>\n",
1711
+ " <td>0.324652</td>\n",
1712
+ " <td>0.470588</td>\n",
1713
+ " <td>8</td>\n",
1714
+ " <td>17</td>\n",
1715
+ " <td>0.583333</td>\n",
1716
+ " <td>0.454545</td>\n",
1717
+ " <td>0.583333</td>\n",
1718
+ " <td>0.583333</td>\n",
1719
+ " </tr>\n",
1720
+ " <tr>\n",
1721
+ " <th>...</th>\n",
1722
+ " <td>...</td>\n",
1723
+ " <td>...</td>\n",
1724
+ " <td>...</td>\n",
1725
+ " <td>...</td>\n",
1726
+ " <td>...</td>\n",
1727
+ " <td>...</td>\n",
1728
+ " <td>...</td>\n",
1729
+ " <td>...</td>\n",
1730
+ " <td>...</td>\n",
1731
+ " <td>...</td>\n",
1732
+ " <td>...</td>\n",
1733
+ " <td>...</td>\n",
1734
+ " <td>...</td>\n",
1735
+ " <td>...</td>\n",
1736
+ " <td>...</td>\n",
1737
+ " <td>...</td>\n",
1738
+ " <td>...</td>\n",
1739
+ " <td>...</td>\n",
1740
+ " </tr>\n",
1741
+ " <tr>\n",
1742
+ " <th>495</th>\n",
1743
+ " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
1744
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
1745
+ " <td>the pool shower company</td>\n",
1746
+ " <td>518269</td>\n",
1747
+ " <td>PERSON</td>\n",
1748
+ " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
1749
+ " <td>1</td>\n",
1750
+ " <td>1.000000</td>\n",
1751
+ " <td>1.000000</td>\n",
1752
+ " <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
1753
+ " <td>1.000000</td>\n",
1754
+ " <td>1.000000</td>\n",
1755
+ " <td>19</td>\n",
1756
+ " <td>19</td>\n",
1757
+ " <td>1.000000</td>\n",
1758
+ " <td>1.000000</td>\n",
1759
+ " <td>1.000000</td>\n",
1760
+ " <td>1.000000</td>\n",
1761
+ " </tr>\n",
1762
+ " <tr>\n",
1763
+ " <th>496</th>\n",
1764
+ " <td>[Hanson]</td>\n",
1765
+ " <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
1766
+ " <td>longest tenured american football players</td>\n",
1767
+ " <td>442806</td>\n",
1768
+ " <td>PERSON</td>\n",
1769
+ " <td>[Hanson is the longest tenured American footba...</td>\n",
1770
+ " <td>0</td>\n",
1771
+ " <td>0.250000</td>\n",
1772
+ " <td>0.000000</td>\n",
1773
+ " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1774
+ " <td>0.000335</td>\n",
1775
+ " <td>0.111111</td>\n",
1776
+ " <td>1</td>\n",
1777
+ " <td>9</td>\n",
1778
+ " <td>0.222222</td>\n",
1779
+ " <td>0.000000</td>\n",
1780
+ " <td>0.222222</td>\n",
1781
+ " <td>0.222222</td>\n",
1782
+ " </tr>\n",
1783
+ " <tr>\n",
1784
+ " <th>497</th>\n",
1785
+ " <td>[Mount Able Baptist Church is located at the a...</td>\n",
1786
+ " <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
1787
+ " <td>mt. view baptist in pendleton sc</td>\n",
1788
+ " <td>460250</td>\n",
1789
+ " <td>PERSON</td>\n",
1790
+ " <td>[Mount Able Baptist Church is located at the a...</td>\n",
1791
+ " <td>1</td>\n",
1792
+ " <td>1.000000</td>\n",
1793
+ " <td>1.000000</td>\n",
1794
+ " <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
1795
+ " <td>1.000000</td>\n",
1796
+ " <td>1.000000</td>\n",
1797
+ " <td>21</td>\n",
1798
+ " <td>21</td>\n",
1799
+ " <td>1.000000</td>\n",
1800
+ " <td>1.000000</td>\n",
1801
+ " <td>1.000000</td>\n",
1802
+ " <td>1.000000</td>\n",
1803
+ " </tr>\n",
1804
+ " <tr>\n",
1805
+ " <th>498</th>\n",
1806
+ " <td>[Honeysuckle Weeks]</td>\n",
1807
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
1808
+ " <td>what actress disappeared for a while</td>\n",
1809
+ " <td>549739</td>\n",
1810
+ " <td>PERSON</td>\n",
1811
+ " <td>[The actress disappeared for a while Honeysuck...</td>\n",
1812
+ " <td>0</td>\n",
1813
+ " <td>0.500000</td>\n",
1814
+ " <td>0.000000</td>\n",
1815
+ " <td>[1.0, 1.0, 0.0, 0.0]</td>\n",
1816
+ " <td>0.030197</td>\n",
1817
+ " <td>0.222222</td>\n",
1818
+ " <td>2</td>\n",
1819
+ " <td>9</td>\n",
1820
+ " <td>0.400000</td>\n",
1821
+ " <td>0.250000</td>\n",
1822
+ " <td>0.400000</td>\n",
1823
+ " <td>0.400000</td>\n",
1824
+ " </tr>\n",
1825
+ " <tr>\n",
1826
+ " <th>499</th>\n",
1827
+ " <td>[African-Nguni]</td>\n",
1828
+ " <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
1829
+ " <td>what ethnicity is the surname sabol</td>\n",
1830
+ " <td>658265</td>\n",
1831
+ " <td>PERSON</td>\n",
1832
+ " <td>[The ethnicity of the surname Sabol is African...</td>\n",
1833
+ " <td>0</td>\n",
1834
+ " <td>0.285714</td>\n",
1835
+ " <td>0.000000</td>\n",
1836
+ " <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
1837
+ " <td>0.000335</td>\n",
1838
+ " <td>0.111111</td>\n",
1839
+ " <td>1</td>\n",
1840
+ " <td>9</td>\n",
1841
+ " <td>0.363636</td>\n",
1842
+ " <td>0.222222</td>\n",
1843
+ " <td>0.363636</td>\n",
1844
+ " <td>0.363636</td>\n",
1845
+ " </tr>\n",
1846
+ " </tbody>\n",
1847
+ "</table>\n",
1848
+ "<p>500 rows × 18 columns</p>\n",
1849
+ "</div>"
1850
+ ],
1851
+ "text/plain": [
1852
+ " answers \\\n",
1853
+ "0 [2,662] \n",
1854
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
1855
+ "2 [Hippocrates] \n",
1856
+ "3 [120 days from the date of the Note.] \n",
1857
+ "4 [From $26,000 to $39,000 a year] \n",
1858
+ ".. ... \n",
1859
+ "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
1860
+ "496 [Hanson] \n",
1861
+ "497 [Mount Able Baptist Church is located at the a... \n",
1862
+ "498 [Honeysuckle Weeks] \n",
1863
+ "499 [African-Nguni] \n",
1864
+ "\n",
1865
+ " passages \\\n",
1866
+ "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
1867
+ "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
1868
+ "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
1869
+ "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
1870
+ "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
1871
+ ".. ... \n",
1872
+ "495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
1873
+ "496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
1874
+ "497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
1875
+ "498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
1876
+ "499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
1877
+ "\n",
1878
+ " query query_id query_type \\\n",
1879
+ "0 albany mn population 15177 NUMERIC \n",
1880
+ "1 current weather in volcano, ca 114414 DESCRIPTION \n",
1881
+ "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
1882
+ "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
1883
+ "4 average pharmacy tech salary 40287 NUMERIC \n",
1884
+ ".. ... ... ... \n",
1885
+ "495 the pool shower company 518269 PERSON \n",
1886
+ "496 longest tenured american football players 442806 PERSON \n",
1887
+ "497 mt. view baptist in pendleton sc 460250 PERSON \n",
1888
+ "498 what actress disappeared for a while 549739 PERSON \n",
1889
+ "499 what ethnicity is the surname sabol 658265 PERSON \n",
1890
+ "\n",
1891
+ " wellFormedAnswers EM F1 \\\n",
1892
+ "0 [The population of Albany, Minnesota is 2,662. ] 0 0.285714 \n",
1893
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... 1 1.000000 \n",
1894
+ "2 [Hippocrates is considered the father of moder... 0 0.250000 \n",
1895
+ "3 [An appraisal is good for 120 days from the da... 0 0.631579 \n",
1896
+ "4 [The average salary for a pharmacy technician ... 0 0.500000 \n",
1897
+ ".. ... .. ... \n",
1898
+ "495 [The Pool Shower, Inc. is a Georgia Domestic P... 1 1.000000 \n",
1899
+ "496 [Hanson is the longest tenured American footba... 0 0.250000 \n",
1900
+ "497 [Mount Able Baptist Church is located at the a... 1 1.000000 \n",
1901
+ "498 [The actress disappeared for a while Honeysuck... 0 0.500000 \n",
1902
+ "499 [The ethnicity of the surname Sabol is African... 0 0.285714 \n",
1903
+ "\n",
1904
+ " bleu precisions \\\n",
1905
+ "0 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1906
+ "1 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
1907
+ "2 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1908
+ "3 0.327096 [1.0, 0.875, 0.8571428571428571, 0.83333333333... \n",
1909
+ "4 0.193040 [0.875, 0.7142857142857143, 0.5, 0.4] \n",
1910
+ ".. ... ... \n",
1911
+ "495 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
1912
+ "496 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1913
+ "497 1.000000 [1.0, 1.0, 1.0, 1.0] \n",
1914
+ "498 0.000000 [1.0, 1.0, 0.0, 0.0] \n",
1915
+ "499 0.000000 [1.0, 0.0, 0.0, 0.0] \n",
1916
+ "\n",
1917
+ " brevity_penalty length_ratio translation_length reference_length \\\n",
1918
+ "0 0.000335 0.111111 1 9 \n",
1919
+ "1 1.000000 1.000000 14 14 \n",
1920
+ "2 0.000335 0.111111 1 9 \n",
1921
+ "3 0.367879 0.500000 9 18 \n",
1922
+ "4 0.324652 0.470588 8 17 \n",
1923
+ ".. ... ... ... ... \n",
1924
+ "495 1.000000 1.000000 19 19 \n",
1925
+ "496 0.000335 0.111111 1 9 \n",
1926
+ "497 1.000000 1.000000 21 21 \n",
1927
+ "498 0.030197 0.222222 2 9 \n",
1928
+ "499 0.000335 0.111111 1 9 \n",
1929
+ "\n",
1930
+ " rouge1 rouge2 rougeL rougeLsum \n",
1931
+ "0 0.400000 0.250000 0.400000 0.400000 \n",
1932
+ "1 1.000000 1.000000 1.000000 1.000000 \n",
1933
+ "2 0.222222 0.000000 0.222222 0.222222 \n",
1934
+ "3 0.640000 0.608696 0.640000 0.640000 \n",
1935
+ "4 0.583333 0.454545 0.583333 0.583333 \n",
1936
+ ".. ... ... ... ... \n",
1937
+ "495 1.000000 1.000000 1.000000 1.000000 \n",
1938
+ "496 0.222222 0.000000 0.222222 0.222222 \n",
1939
+ "497 1.000000 1.000000 1.000000 1.000000 \n",
1940
+ "498 0.400000 0.250000 0.400000 0.400000 \n",
1941
+ "499 0.363636 0.222222 0.363636 0.363636 \n",
1942
+ "\n",
1943
+ "[500 rows x 18 columns]"
1944
+ ]
1945
+ },
1946
+ "execution_count": 57,
1947
+ "metadata": {},
1948
+ "output_type": "execute_result"
1949
+ }
1950
+ ],
1951
+ "source": [
1952
+ "result_all = result_all.map(\n",
1953
+ " lambda record: rouge.compute(\n",
1954
+ " predictions=[record[\"answers\"][0]], references=[record[\"wellFormedAnswers\"][0]]\n",
1955
+ " ),\n",
1956
+ " batched=False,\n",
1957
+ ")\n",
1958
+ "result_all.to_pandas()"
1959
+ ]
1960
+ }
1961
+ ],
1962
+ "metadata": {
1963
+ "kernelspec": {
1964
+ "display_name": "Python 3 (ipykernel)",
1965
+ "language": "python",
1966
+ "name": "python3"
1967
+ },
1968
+ "language_info": {
1969
+ "codemirror_mode": {
1970
+ "name": "ipython",
1971
+ "version": 3
1972
+ },
1973
+ "file_extension": ".py",
1974
+ "mimetype": "text/x-python",
1975
+ "name": "python",
1976
+ "nbconvert_exporter": "python",
1977
+ "pygments_lexer": "ipython3",
1978
+ "version": "3.11.4"
1979
+ }
1980
+ },
1981
+ "nbformat": 4,
1982
+ "nbformat_minor": 5
1983
+ }
Llama-2-eval/notebook/metrics.ipynb ADDED
@@ -0,0 +1,1293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "af2d4577",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "Note: you may need to restart the kernel to use updated packages.\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "%pip install -q evaluate rouge_score"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 2,
24
+ "id": "a6d96660",
25
+ "metadata": {},
26
+ "outputs": [
27
+ {
28
+ "data": {
29
+ "text/plain": [
30
+ "True"
31
+ ]
32
+ },
33
+ "execution_count": 2,
34
+ "metadata": {},
35
+ "output_type": "execute_result"
36
+ }
37
+ ],
38
+ "source": [
39
+ "import os\n",
40
+ "from dotenv import load_dotenv\n",
41
+ "\n",
42
+ "load_dotenv()"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": 3,
48
+ "id": "b72bf3f9",
49
+ "metadata": {},
50
+ "outputs": [
51
+ {
52
+ "data": {
53
+ "text/plain": [
54
+ "Dataset({\n",
55
+ " features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
56
+ " num_rows: 500\n",
57
+ "})"
58
+ ]
59
+ },
60
+ "execution_count": 3,
61
+ "metadata": {},
62
+ "output_type": "execute_result"
63
+ }
64
+ ],
65
+ "source": [
66
+ "from datasets import load_from_disk\n",
67
+ "\n",
68
+ "new_ds = load_from_disk(\"../data/datasets/ms_macro/\")\n",
69
+ "new_ds"
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "code",
74
+ "execution_count": 4,
75
+ "id": "051bd771",
76
+ "metadata": {},
77
+ "outputs": [
78
+ {
79
+ "data": {
80
+ "text/plain": [
81
+ "({'NUMERIC': 100,\n",
82
+ " 'DESCRIPTION': 100,\n",
83
+ " 'ENTITY': 100,\n",
84
+ " 'PERSON': 100,\n",
85
+ " 'LOCATION': 100},\n",
86
+ " {'NUMERIC': 179,\n",
87
+ " 'DESCRIPTION': 215,\n",
88
+ " 'ENTITY': 443,\n",
89
+ " 'LOCATION': 461,\n",
90
+ " 'PERSON': 499})"
91
+ ]
92
+ },
93
+ "execution_count": 4,
94
+ "metadata": {},
95
+ "output_type": "execute_result"
96
+ }
97
+ ],
98
+ "source": [
99
+ "counts = {}\n",
100
+ "indices = {}\n",
101
+ "size = 100\n",
102
+ "for i in range(new_ds.num_rows):\n",
103
+ " row = new_ds[i]\n",
104
+ " query_type = row[\"query_type\"]\n",
105
+ " if query_type in counts:\n",
106
+ " counts[query_type] += 1\n",
107
+ " else:\n",
108
+ " counts[query_type] = 1\n",
109
+ " if counts[query_type] == size:\n",
110
+ " indices[query_type] = i\n",
111
+ "counts, indices"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "execution_count": 5,
117
+ "id": "db48dcc4",
118
+ "metadata": {},
119
+ "outputs": [
120
+ {
121
+ "data": {
122
+ "text/html": [
123
+ "<div>\n",
124
+ "<style scoped>\n",
125
+ " .dataframe tbody tr th:only-of-type {\n",
126
+ " vertical-align: middle;\n",
127
+ " }\n",
128
+ "\n",
129
+ " .dataframe tbody tr th {\n",
130
+ " vertical-align: top;\n",
131
+ " }\n",
132
+ "\n",
133
+ " .dataframe thead th {\n",
134
+ " text-align: right;\n",
135
+ " }\n",
136
+ "</style>\n",
137
+ "<table border=\"1\" class=\"dataframe\">\n",
138
+ " <thead>\n",
139
+ " <tr style=\"text-align: right;\">\n",
140
+ " <th></th>\n",
141
+ " <th>answers</th>\n",
142
+ " <th>passages</th>\n",
143
+ " <th>query</th>\n",
144
+ " <th>query_id</th>\n",
145
+ " <th>query_type</th>\n",
146
+ " <th>wellFormedAnswers</th>\n",
147
+ " </tr>\n",
148
+ " </thead>\n",
149
+ " <tbody>\n",
150
+ " <tr>\n",
151
+ " <th>0</th>\n",
152
+ " <td>[2,662]</td>\n",
153
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
154
+ " <td>albany mn population</td>\n",
155
+ " <td>15177</td>\n",
156
+ " <td>NUMERIC</td>\n",
157
+ " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
158
+ " </tr>\n",
159
+ " <tr>\n",
160
+ " <th>1</th>\n",
161
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
162
+ " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
163
+ " <td>current weather in volcano, ca</td>\n",
164
+ " <td>114414</td>\n",
165
+ " <td>DESCRIPTION</td>\n",
166
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
167
+ " </tr>\n",
168
+ " <tr>\n",
169
+ " <th>2</th>\n",
170
+ " <td>[Hippocrates]</td>\n",
171
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
172
+ " <td>____________________ is considered the father ...</td>\n",
173
+ " <td>9083</td>\n",
174
+ " <td>DESCRIPTION</td>\n",
175
+ " <td>[Hippocrates is considered the father of moder...</td>\n",
176
+ " </tr>\n",
177
+ " <tr>\n",
178
+ " <th>3</th>\n",
179
+ " <td>[120 days from the date of the Note.]</td>\n",
180
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
181
+ " <td>how many days is an appraisal good for a fanni...</td>\n",
182
+ " <td>281439</td>\n",
183
+ " <td>NUMERIC</td>\n",
184
+ " <td>[An appraisal is good for 120 days from the da...</td>\n",
185
+ " </tr>\n",
186
+ " <tr>\n",
187
+ " <th>4</th>\n",
188
+ " <td>[From $26,000 to $39,000 a year]</td>\n",
189
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
190
+ " <td>average pharmacy tech salary</td>\n",
191
+ " <td>40287</td>\n",
192
+ " <td>NUMERIC</td>\n",
193
+ " <td>[The average salary for a pharmacy technician ...</td>\n",
194
+ " </tr>\n",
195
+ " <tr>\n",
196
+ " <th>...</th>\n",
197
+ " <td>...</td>\n",
198
+ " <td>...</td>\n",
199
+ " <td>...</td>\n",
200
+ " <td>...</td>\n",
201
+ " <td>...</td>\n",
202
+ " <td>...</td>\n",
203
+ " </tr>\n",
204
+ " <tr>\n",
205
+ " <th>495</th>\n",
206
+ " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
207
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
208
+ " <td>the pool shower company</td>\n",
209
+ " <td>518269</td>\n",
210
+ " <td>PERSON</td>\n",
211
+ " <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
212
+ " </tr>\n",
213
+ " <tr>\n",
214
+ " <th>496</th>\n",
215
+ " <td>[Hanson]</td>\n",
216
+ " <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
217
+ " <td>longest tenured american football players</td>\n",
218
+ " <td>442806</td>\n",
219
+ " <td>PERSON</td>\n",
220
+ " <td>[Hanson is the longest tenured American footba...</td>\n",
221
+ " </tr>\n",
222
+ " <tr>\n",
223
+ " <th>497</th>\n",
224
+ " <td>[Mount Able Baptist Church is located at the a...</td>\n",
225
+ " <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
226
+ " <td>mt. view baptist in pendleton sc</td>\n",
227
+ " <td>460250</td>\n",
228
+ " <td>PERSON</td>\n",
229
+ " <td>[Mount Able Baptist Church is located at the a...</td>\n",
230
+ " </tr>\n",
231
+ " <tr>\n",
232
+ " <th>498</th>\n",
233
+ " <td>[Honeysuckle Weeks]</td>\n",
234
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
235
+ " <td>what actress disappeared for a while</td>\n",
236
+ " <td>549739</td>\n",
237
+ " <td>PERSON</td>\n",
238
+ " <td>[The actress disappeared for a while Honeysuck...</td>\n",
239
+ " </tr>\n",
240
+ " <tr>\n",
241
+ " <th>499</th>\n",
242
+ " <td>[African-Nguni]</td>\n",
243
+ " <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
244
+ " <td>what ethnicity is the surname sabol</td>\n",
245
+ " <td>658265</td>\n",
246
+ " <td>PERSON</td>\n",
247
+ " <td>[The ethnicity of the surname Sabol is African...</td>\n",
248
+ " </tr>\n",
249
+ " </tbody>\n",
250
+ "</table>\n",
251
+ "<p>500 rows × 6 columns</p>\n",
252
+ "</div>"
253
+ ],
254
+ "text/plain": [
255
+ " answers \\\n",
256
+ "0 [2,662] \n",
257
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
258
+ "2 [Hippocrates] \n",
259
+ "3 [120 days from the date of the Note.] \n",
260
+ "4 [From $26,000 to $39,000 a year] \n",
261
+ ".. ... \n",
262
+ "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
263
+ "496 [Hanson] \n",
264
+ "497 [Mount Able Baptist Church is located at the a... \n",
265
+ "498 [Honeysuckle Weeks] \n",
266
+ "499 [African-Nguni] \n",
267
+ "\n",
268
+ " passages \\\n",
269
+ "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
270
+ "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
271
+ "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
272
+ "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
273
+ "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
274
+ ".. ... \n",
275
+ "495 {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]... \n",
276
+ "496 {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]... \n",
277
+ "497 {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '... \n",
278
+ "498 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]... \n",
279
+ "499 {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas... \n",
280
+ "\n",
281
+ " query query_id query_type \\\n",
282
+ "0 albany mn population 15177 NUMERIC \n",
283
+ "1 current weather in volcano, ca 114414 DESCRIPTION \n",
284
+ "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
285
+ "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
286
+ "4 average pharmacy tech salary 40287 NUMERIC \n",
287
+ ".. ... ... ... \n",
288
+ "495 the pool shower company 518269 PERSON \n",
289
+ "496 longest tenured american football players 442806 PERSON \n",
290
+ "497 mt. view baptist in pendleton sc 460250 PERSON \n",
291
+ "498 what actress disappeared for a while 549739 PERSON \n",
292
+ "499 what ethnicity is the surname sabol 658265 PERSON \n",
293
+ "\n",
294
+ " wellFormedAnswers \n",
295
+ "0 [The population of Albany, Minnesota is 2,662. ] \n",
296
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
297
+ "2 [Hippocrates is considered the father of moder... \n",
298
+ "3 [An appraisal is good for 120 days from the da... \n",
299
+ "4 [The average salary for a pharmacy technician ... \n",
300
+ ".. ... \n",
301
+ "495 [The Pool Shower, Inc. is a Georgia Domestic P... \n",
302
+ "496 [Hanson is the longest tenured American footba... \n",
303
+ "497 [Mount Able Baptist Church is located at the a... \n",
304
+ "498 [The actress disappeared for a while Honeysuck... \n",
305
+ "499 [The ethnicity of the surname Sabol is African... \n",
306
+ "\n",
307
+ "[500 rows x 6 columns]"
308
+ ]
309
+ },
310
+ "execution_count": 5,
311
+ "metadata": {},
312
+ "output_type": "execute_result"
313
+ }
314
+ ],
315
+ "source": [
316
+ "new_ds.to_pandas()"
317
+ ]
318
+ },
319
+ {
320
+ "cell_type": "code",
321
+ "execution_count": 5,
322
+ "id": "89494c3d",
323
+ "metadata": {},
324
+ "outputs": [],
325
+ "source": [
326
+ "import evaluate\n",
327
+ "\n",
328
+ "bleu = evaluate.load(\"bleu\")\n",
329
+ "rouge = evaluate.load(\"rouge\")"
330
+ ]
331
+ },
332
+ {
333
+ "cell_type": "code",
334
+ "execution_count": 6,
335
+ "id": "24a818ba",
336
+ "metadata": {},
337
+ "outputs": [],
338
+ "source": [
339
+ "def calc_metrics(ds):\n",
340
+ " predictions = [ds[i][\"answers\"][0] for i in range(ds.num_rows)]\n",
341
+ " references = [ds[i][\"wellFormedAnswers\"][0] for i in range(ds.num_rows)]\n",
342
+ " bleu_scores = bleu.compute(predictions=predictions, references=references)\n",
343
+ " rouge_scores = rouge.compute(predictions=predictions, references=references)\n",
344
+ " return {\"bleu_scores\": bleu_scores, \"rouge_scores\": rouge_scores}"
345
+ ]
346
+ },
347
+ {
348
+ "cell_type": "code",
349
+ "execution_count": 8,
350
+ "id": "e447aa08",
351
+ "metadata": {},
352
+ "outputs": [
353
+ {
354
+ "data": {
355
+ "text/plain": [
356
+ "{'bleu_scores': {'bleu': 0.5842479720128682,\n",
357
+ " 'precisions': [0.7814257485940113,\n",
358
+ " 0.7185392334265505,\n",
359
+ " 0.6801561945331913,\n",
360
+ " 0.6543700340522134],\n",
361
+ " 'brevity_penalty': 0.8263321448047812,\n",
362
+ " 'length_ratio': 0.8398008680112331,\n",
363
+ " 'translation_length': 6579,\n",
364
+ " 'reference_length': 7834},\n",
365
+ " 'rouge_scores': {'rouge1': 0.6301946495853493,\n",
366
+ " 'rouge2': 0.5266427189500504,\n",
367
+ " 'rougeL': 0.623467453115133,\n",
368
+ " 'rougeLsum': 0.6239164817179192}}"
369
+ ]
370
+ },
371
+ "execution_count": 8,
372
+ "metadata": {},
373
+ "output_type": "execute_result"
374
+ }
375
+ ],
376
+ "source": [
377
+ "calc_metrics(new_ds)"
378
+ ]
379
+ },
380
+ {
381
+ "cell_type": "code",
382
+ "execution_count": 9,
383
+ "id": "b29d1f3e",
384
+ "metadata": {},
385
+ "outputs": [],
386
+ "source": [
387
+ "def calc_all_metrics(ds):\n",
388
+ " result = {}\n",
389
+ " result[\"OVERALL\"] = calc_metrics(ds)\n",
390
+ " for query_type in indices:\n",
391
+ " result[query_type] = calc_metrics(\n",
392
+ " ds.filter(lambda example: example[\"query_type\"] == query_type)\n",
393
+ " )\n",
394
+ "\n",
395
+ " return result"
396
+ ]
397
+ },
398
+ {
399
+ "cell_type": "code",
400
+ "execution_count": 10,
401
+ "id": "1a4273da",
402
+ "metadata": {},
403
+ "outputs": [
404
+ {
405
+ "data": {
406
+ "text/plain": [
407
+ "{'OVERALL': {'bleu_scores': {'bleu': 0.5842479720128682,\n",
408
+ " 'precisions': [0.7814257485940113,\n",
409
+ " 0.7185392334265505,\n",
410
+ " 0.6801561945331913,\n",
411
+ " 0.6543700340522134],\n",
412
+ " 'brevity_penalty': 0.8263321448047812,\n",
413
+ " 'length_ratio': 0.8398008680112331,\n",
414
+ " 'translation_length': 6579,\n",
415
+ " 'reference_length': 7834},\n",
416
+ " 'rouge_scores': {'rouge1': 0.6301946495853493,\n",
417
+ " 'rouge2': 0.5266427189500504,\n",
418
+ " 'rougeL': 0.623467453115133,\n",
419
+ " 'rougeLsum': 0.6239164817179192}},\n",
420
+ " 'NUMERIC': {'bleu_scores': {'bleu': 0.3589193328591513,\n",
421
+ " 'precisions': [0.7536764705882353,\n",
422
+ " 0.6494413407821229,\n",
423
+ " 0.5884244372990354,\n",
424
+ " 0.5657657657657658],\n",
425
+ " 'brevity_penalty': 0.5649158870633492,\n",
426
+ " 'length_ratio': 0.6365054602184087,\n",
427
+ " 'translation_length': 816,\n",
428
+ " 'reference_length': 1282},\n",
429
+ " 'rouge_scores': {'rouge1': 0.5569863096088544,\n",
430
+ " 'rouge2': 0.4262959859853511,\n",
431
+ " 'rougeL': 0.5495190228731732,\n",
432
+ " 'rougeLsum': 0.5502805905003136}},\n",
433
+ " 'DESCRIPTION': {'bleu_scores': {'bleu': 0.7521919521555381,\n",
434
+ " 'precisions': [0.8093238135237295,\n",
435
+ " 0.761946514686541,\n",
436
+ " 0.7335164835164835,\n",
437
+ " 0.7077144226161955],\n",
438
+ " 'brevity_penalty': 1.0,\n",
439
+ " 'length_ratio': 1.0778632865550022,\n",
440
+ " 'translation_length': 2381,\n",
441
+ " 'reference_length': 2209},\n",
442
+ " 'rouge_scores': {'rouge1': 0.8503571429521525,\n",
443
+ " 'rouge2': 0.8009206345153658,\n",
444
+ " 'rougeL': 0.8406066569954856,\n",
445
+ " 'rougeLsum': 0.8405710628479812}},\n",
446
+ " 'ENTITY': {'bleu_scores': {'bleu': 0.5057439480363012,\n",
447
+ " 'precisions': [0.7135050741608119,\n",
448
+ " 0.6375952582557155,\n",
449
+ " 0.5884509624197983,\n",
450
+ " 0.5555555555555556],\n",
451
+ " 'brevity_penalty': 0.8143961563151505,\n",
452
+ " 'length_ratio': 0.8296632124352331,\n",
453
+ " 'translation_length': 1281,\n",
454
+ " 'reference_length': 1544},\n",
455
+ " 'rouge_scores': {'rouge1': 0.5877667231458372,\n",
456
+ " 'rouge2': 0.48898551862814277,\n",
457
+ " 'rougeL': 0.5796676511145928,\n",
458
+ " 'rougeLsum': 0.5784518864116339}},\n",
459
+ " 'LOCATION': {'bleu_scores': {'bleu': 0.4167786604147962,\n",
460
+ " 'precisions': [0.8600583090379009,\n",
461
+ " 0.7986348122866894,\n",
462
+ " 0.7573385518590998,\n",
463
+ " 0.7414529914529915],\n",
464
+ " 'brevity_penalty': 0.5288627994571649,\n",
465
+ " 'length_ratio': 0.6108637577916296,\n",
466
+ " 'translation_length': 686,\n",
467
+ " 'reference_length': 1123},\n",
468
+ " 'rouge_scores': {'rouge1': 0.5405464995752973,\n",
469
+ " 'rouge2': 0.3950940848806123,\n",
470
+ " 'rougeL': 0.5400724136440879,\n",
471
+ " 'rougeLsum': 0.5389556394979822}},\n",
472
+ " 'PERSON': {'bleu_scores': {'bleu': 0.5861084149356606,\n",
473
+ " 'precisions': [0.773851590106007,\n",
474
+ " 0.7178707224334601,\n",
475
+ " 0.6810766721044046,\n",
476
+ " 0.6522864538395168],\n",
477
+ " 'brevity_penalty': 0.8315596069910627,\n",
478
+ " 'length_ratio': 0.844272076372315,\n",
479
+ " 'translation_length': 1415,\n",
480
+ " 'reference_length': 1676},\n",
481
+ " 'rouge_scores': {'rouge1': 0.6119770025611677,\n",
482
+ " 'rouge2': 0.522853938087197,\n",
483
+ " 'rougeL': 0.6096713664231095,\n",
484
+ " 'rougeLsum': 0.6103086543984155}}}"
485
+ ]
486
+ },
487
+ "execution_count": 10,
488
+ "metadata": {},
489
+ "output_type": "execute_result"
490
+ }
491
+ ],
492
+ "source": [
493
+ "calc_all_metrics(new_ds)"
494
+ ]
495
+ },
496
+ {
497
+ "cell_type": "code",
498
+ "execution_count": 11,
499
+ "id": "3698be27",
500
+ "metadata": {},
501
+ "outputs": [
502
+ {
503
+ "name": "stdout",
504
+ "output_type": "stream",
505
+ "text": [
506
+ "loading env vars from: /Users/inflaton/code/emtech/gpt/Llama-2-eval/.env\n",
507
+ "App init started at 2023-10-10 12:04:33.775140\n",
508
+ "Running on: macOS-14.0-arm64-arm-64bit\n",
509
+ "MPS is available\n",
510
+ "CUDA is NOT available\n",
511
+ "hf_embeddings_device_type: mps\n",
512
+ "hf_pipeline_device_type: mps\n",
513
+ "initializing LLM: openai\n",
514
+ " hf_pipeline_device_type: mps\n",
515
+ " load_quantized_model: None\n",
516
+ " torch_dtype: torch.float32\n",
517
+ " n_threds: 24\n",
518
+ " using model: gpt-3.5-turbo\n",
519
+ "initialization complete\n",
520
+ "App init completed in 0.167s\n"
521
+ ]
522
+ }
523
+ ],
524
+ "source": [
525
+ "import json\n",
526
+ "import sys\n",
527
+ "import os\n",
528
+ "\n",
529
+ "os.environ[\"TEST_FIRST_5\"] = \"true\"\n",
530
+ "os.environ[\"LANGCHAIN_DEBUG\"] = \"true\"\n",
531
+ "\n",
532
+ "from pathlib import Path\n",
533
+ "\n",
534
+ "sys.path.append(str(Path.cwd().parent))\n",
535
+ "\n",
536
+ "from evaluate_llm_ms_macro import (\n",
537
+ " QAChainWithMsMacroDataset,\n",
538
+ " llm_loader,\n",
539
+ " calc_all_metrics,\n",
540
+ ")"
541
+ ]
542
+ },
543
+ {
544
+ "cell_type": "code",
545
+ "execution_count": 12,
546
+ "id": "2395804d",
547
+ "metadata": {},
548
+ "outputs": [
549
+ {
550
+ "name": "stdout",
551
+ "output_type": "stream",
552
+ "text": [
553
+ "{'question': 'albany mn population', 'chat_history': []}\n",
554
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
555
+ "\u001b[0m{\n",
556
+ " \"question\": \"albany mn population\",\n",
557
+ " \"chat_history\": []\n",
558
+ "}\n",
559
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
560
+ "\u001b[0m[inputs]\n",
561
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
562
+ "\u001b[0m{\n",
563
+ " \"question\": \"albany mn population\",\n",
564
+ " \"context\": \"City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\\n\\nPlace of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\\n\\nFor the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nAlbany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\\n\\nSponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nRecent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\\n\\nFor population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\\n\\nFor population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\"\n",
565
+ "}\n",
566
+ "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
567
+ "\u001b[0m{\n",
568
+ " \"prompts\": [\n",
569
+ " \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nCity of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\\n\\nPlace of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\\n\\nFor the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nAlbany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\\n\\nSponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nRecent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\\n\\nFor population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\\n\\nFor population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\\nHuman: albany mn population\"\n",
570
+ " ]\n",
571
+ "}\n",
572
+ "The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [3.23s] Exiting LLM run with output:\n",
573
+ "\u001b[0m{\n",
574
+ " \"generations\": [\n",
575
+ " [\n",
576
+ " {\n",
577
+ " \"text\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\",\n",
578
+ " \"generation_info\": {\n",
579
+ " \"finish_reason\": \"stop\"\n",
580
+ " },\n",
581
+ " \"message\": {\n",
582
+ " \"lc\": 1,\n",
583
+ " \"type\": \"constructor\",\n",
584
+ " \"id\": [\n",
585
+ " \"langchain\",\n",
586
+ " \"schema\",\n",
587
+ " \"messages\",\n",
588
+ " \"AIMessageChunk\"\n",
589
+ " ],\n",
590
+ " \"kwargs\": {\n",
591
+ " \"example\": false,\n",
592
+ " \"content\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\",\n",
593
+ " \"additional_kwargs\": {}\n",
594
+ " }\n",
595
+ " }\n",
596
+ " }\n",
597
+ " ]\n",
598
+ " ],\n",
599
+ " \"llm_output\": null,\n",
600
+ " \"run\": null\n",
601
+ "}\n",
602
+ "\n",
603
+ "\n",
604
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [3.23s] Exiting Chain run with output:\n",
605
+ "\u001b[0m{\n",
606
+ " \"text\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\"\n",
607
+ "}\n",
608
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [3.23s] Exiting Chain run with output:\n",
609
+ "\u001b[0m{\n",
610
+ " \"output_text\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\"\n",
611
+ "}\n",
612
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [3.46s] Exiting Chain run with output:\n",
613
+ "\u001b[0m[outputs]\n",
614
+ "{'question': 'current weather in volcano, ca', 'chat_history': []}\n",
615
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
616
+ "\u001b[0m{\n",
617
+ " \"question\": \"current weather in volcano, ca\",\n",
618
+ " \"chat_history\": []\n",
619
+ "}\n",
620
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
621
+ "\u001b[0m[inputs]\n",
622
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
623
+ "\u001b[0m{\n",
624
+ " \"question\": \"current weather in volcano, ca\",\n",
625
+ " \"context\": \"Volcano 10 Day Weather. Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny.\\n\\nCurrent U.S. National Radar--Current. The Current National Weather Radar is shown below with a UTC Time (subtract 5 hours from UTC to get Eastern Time). National Weather Forecast--Current. The Current National Weather Forecast and National Weather Map are shown below.\\n\\nVolcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. There is 49 percentage chance of rain and 3 mph winds from the Southwest.\\n\\nVolcano, CA Weather Data. 1 Volcano, CA Current Weather Data. 2 Sponsored. 3 Volcano, CA Historical Weather Trends. Volcano, CA area 1 Highlights. Volcano, CA Chance of Sunshine. Volcano, CA Historical 1 Temperature. Volcano, CA Rainfall and Snowfall Average. Volcano, CA Energy Demand.\\n\\nVolcano Weather. Volcano weather and daily current conditions with summary and 5 Day forecast including humidity, precipitation, high and low temperatures presented in Fahrenheit and Celsius, barometric pressure, heat index, wind chill, hourly forecast, sunrise, sunset, wind speed with direction, and more.\\n\\nHourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. There is 83 percentage chance of rain and 2 mph winds from the East. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. There is 77 percentage chance of rain and 2 mph winds from the East.\\n\\nVolcano 7 Day Weather. 1 Monday:The Volcano forecast for Apr 03 is 58 degrees and Sunny. There is 34 percentage chance of rain and 5 mph winds from the West. 2 Tuesday:The Volcano forecast for Apr 04 is 59 degrees and Sunny. There is 33 percentage chance of rain and 5 mph winds from the West-Southwest.\\n\\nVolcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. 3 Tuesday:The Volcano forecast for Apr 11 is 49 degrees and Patchy rain possible. Wednesday:The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.\\n\\nVolcano, CA weather and traffic updates by locals. Write your own weather report, forecast, or traffic update: Please note by clicking on Post you acknowledge that you have read the Terms of Service and the report and/or forecast you are posting is in compliance with such terms. Be respectful.\\n\\nHourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. 3 6am:The Volcano, CA forecast for Apr 03 is 41 degrees and Clear. 9am:The Volcano, CA forecast for Apr 03 is 48 degrees and Sunny.\"\n",
626
+ "}\n",
627
+ "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
628
+ "\u001b[0m{\n",
629
+ " \"prompts\": [\n",
630
+ " \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nVolcano 10 Day Weather. Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny.\\n\\nCurrent U.S. National Radar--Current. The Current National Weather Radar is shown below with a UTC Time (subtract 5 hours from UTC to get Eastern Time). National Weather Forecast--Current. The Current National Weather Forecast and National Weather Map are shown below.\\n\\nVolcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. There is 49 percentage chance of rain and 3 mph winds from the Southwest.\\n\\nVolcano, CA Weather Data. 1 Volcano, CA Current Weather Data. 2 Sponsored. 3 Volcano, CA Historical Weather Trends. Volcano, CA area 1 Highlights. Volcano, CA Chance of Sunshine. Volcano, CA Historical 1 Temperature. Volcano, CA Rainfall and Snowfall Average. Volcano, CA Energy Demand.\\n\\nVolcano Weather. Volcano weather and daily current conditions with summary and 5 Day forecast including humidity, precipitation, high and low temperatures presented in Fahrenheit and Celsius, barometric pressure, heat index, wind chill, hourly forecast, sunrise, sunset, wind speed with direction, and more.\\n\\nHourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. There is 83 percentage chance of rain and 2 mph winds from the East. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. There is 77 percentage chance of rain and 2 mph winds from the East.\\n\\nVolcano 7 Day Weather. 1 Monday:The Volcano forecast for Apr 03 is 58 degrees and Sunny. There is 34 percentage chance of rain and 5 mph winds from the West. 2 Tuesday:The Volcano forecast for Apr 04 is 59 degrees and Sunny. There is 33 percentage chance of rain and 5 mph winds from the West-Southwest.\\n\\nVolcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. 3 Tuesday:The Volcano forecast for Apr 11 is 49 degrees and Patchy rain possible. Wednesday:The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.\\n\\nVolcano, CA weather and traffic updates by locals. Write your own weather report, forecast, or traffic update: Please note by clicking on Post you acknowledge that you have read the Terms of Service and the report and/or forecast you are posting is in compliance with such terms. Be respectful.\\n\\nHourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. 3 6am:The Volcano, CA forecast for Apr 03 is 41 degrees and Clear. 9am:The Volcano, CA forecast for Apr 03 is 48 degrees and Sunny.\\nHuman: current weather in volcano, ca\"\n",
631
+ " ]\n",
632
+ "}\n",
633
+ "I don't have the current weather information for Volcano, CA.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.04s] Exiting LLM run with output:\n",
634
+ "\u001b[0m{\n",
635
+ " \"generations\": [\n",
636
+ " [\n",
637
+ " {\n",
638
+ " \"text\": \"I don't have the current weather information for Volcano, CA.\",\n",
639
+ " \"generation_info\": {\n",
640
+ " \"finish_reason\": \"stop\"\n",
641
+ " },\n",
642
+ " \"message\": {\n",
643
+ " \"lc\": 1,\n",
644
+ " \"type\": \"constructor\",\n",
645
+ " \"id\": [\n",
646
+ " \"langchain\",\n",
647
+ " \"schema\",\n",
648
+ " \"messages\",\n",
649
+ " \"AIMessageChunk\"\n",
650
+ " ],\n",
651
+ " \"kwargs\": {\n",
652
+ " \"example\": false,\n",
653
+ " \"content\": \"I don't have the current weather information for Volcano, CA.\",\n",
654
+ " \"additional_kwargs\": {}\n",
655
+ " }\n",
656
+ " }\n",
657
+ " }\n",
658
+ " ]\n",
659
+ " ],\n",
660
+ " \"llm_output\": null,\n",
661
+ " \"run\": null\n",
662
+ "}\n",
663
+ "\n",
664
+ "\n",
665
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [1.04s] Exiting Chain run with output:\n",
666
+ "\u001b[0m{\n",
667
+ " \"text\": \"I don't have the current weather information for Volcano, CA.\"\n",
668
+ "}\n",
669
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [1.04s] Exiting Chain run with output:\n",
670
+ "\u001b[0m{\n",
671
+ " \"output_text\": \"I don't have the current weather information for Volcano, CA.\"\n",
672
+ "}\n",
673
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [1.04s] Exiting Chain run with output:\n",
674
+ "\u001b[0m[outputs]\n",
675
+ "{'question': '____________________ is considered the father of modern medicine.', 'chat_history': []}\n",
676
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
677
+ "\u001b[0m{\n",
678
+ " \"question\": \"____________________ is considered the father of modern medicine.\",\n",
679
+ " \"chat_history\": []\n",
680
+ "}\n",
681
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
682
+ "\u001b[0m[inputs]\n",
683
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
684
+ "\u001b[0m{\n",
685
+ " \"question\": \"____________________ is considered the father of modern medicine.\",\n",
686
+ " \"context\": \"Hippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\\n\\nMany of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\\n\\nDespite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\\n\\nAt least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\\n\\n460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\\n\\nTRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \\n\\nThe two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\\n\\nHippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\\n\\nReload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\\n\\nHowever, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\"\n",
687
+ "}\n",
688
+ "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
689
+ "\u001b[0m{\n",
690
+ " \"prompts\": [\n",
691
+ " \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nHippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\\n\\nMany of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\\n\\nDespite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\\n\\nAt least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\\n\\n460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\\n\\nTRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \\n\\nThe two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\\n\\nHippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\\n\\nReload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\\n\\nHowever, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\\nHuman: ____________________ is considered the father of modern medicine.\"\n",
692
+ " ]\n",
693
+ "}\n",
694
+ "Hippocrates is considered the father of modern medicine.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [654ms] Exiting LLM run with output:\n",
695
+ "\u001b[0m{\n",
696
+ " \"generations\": [\n",
697
+ " [\n",
698
+ " {\n",
699
+ " \"text\": \"Hippocrates is considered the father of modern medicine.\",\n",
700
+ " \"generation_info\": {\n",
701
+ " \"finish_reason\": \"stop\"\n",
702
+ " },\n",
703
+ " \"message\": {\n",
704
+ " \"lc\": 1,\n",
705
+ " \"type\": \"constructor\",\n",
706
+ " \"id\": [\n",
707
+ " \"langchain\",\n",
708
+ " \"schema\",\n",
709
+ " \"messages\",\n",
710
+ " \"AIMessageChunk\"\n",
711
+ " ],\n",
712
+ " \"kwargs\": {\n",
713
+ " \"example\": false,\n",
714
+ " \"content\": \"Hippocrates is considered the father of modern medicine.\",\n",
715
+ " \"additional_kwargs\": {}\n",
716
+ " }\n",
717
+ " }\n",
718
+ " }\n",
719
+ " ]\n",
720
+ " ],\n",
721
+ " \"llm_output\": null,\n",
722
+ " \"run\": null\n",
723
+ "}\n",
724
+ "\n",
725
+ "\n",
726
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [655ms] Exiting Chain run with output:\n",
727
+ "\u001b[0m{\n",
728
+ " \"text\": \"Hippocrates is considered the father of modern medicine.\"\n",
729
+ "}\n",
730
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [655ms] Exiting Chain run with output:\n",
731
+ "\u001b[0m{\n",
732
+ " \"output_text\": \"Hippocrates is considered the father of modern medicine.\"\n",
733
+ "}\n",
734
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [657ms] Exiting Chain run with output:\n",
735
+ "\u001b[0m[outputs]\n",
736
+ "{'question': 'how many days is an appraisal good for a fannie loan', 'chat_history': []}\n",
737
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
738
+ "\u001b[0m{\n",
739
+ " \"question\": \"how many days is an appraisal good for a fannie loan\",\n",
740
+ " \"chat_history\": []\n",
741
+ "}\n",
742
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
743
+ "\u001b[0m[inputs]\n",
744
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
745
+ "\u001b[0m{\n",
746
+ " \"question\": \"how many days is an appraisal good for a fannie loan\",\n",
747
+ " \"context\": \"New and Updated Underwriting and Eligibility Policies. Age of Credit Documents Selling Guide, B1-1-04, Allowable Age of Credit. Documents. The maximum age of credit documents is reduced from 120 days to 90 days for existing. construction and from 180 days to 120 days for new construction. Credit documents include. credit reports and employment, income, and asset documentation. The age of the documents is.\\n\\nIn no case may the appraisal be dated more than 1 year prior to the date of the Note. Property Inspection Reports/Condition and Marketability Reports (Fannie Mae Form 2070/Freddie Mac Form. 2075 may be dated no earlier than 120 days from the date of the Note. Continued on next page.\\n\\nFannie Mae will allow the use of an origination appraisal for a subsequent transaction if the following requirements are met: 1 The subsequent transaction may only be a Limited Cash-Out Refinance. 2 The appraisal report must not be more than 12 months old on the note date of the subsequent transaction.\\n\\nThe subsequent transaction may only be a Limited Cash-Out Refinance. The appraisal report must not be more than 12 months old on the note date of the subsequent transaction. If the appraisal report is greater than 4 months old on the date of the note and mortgage, then an appraisal update is required.\\n\\nIf they were sold with exposure to the market, listed in. MLS they should be considered. How long is the FHA case # good for (not the appraisal, but the actual case #)? the case number is valid for 6 months unless the appraiser expires prior to the 6 month time frame.\\n\\nNo the borrower can only pay for one appraisal. Your question about Comps is not acceptable, Comps over 1 year old for comps 1-3 are not. acceptable, but supporting comps are with an adequate explanation from the Appraiser. Comps, over one year old would be acceptable, onlywith a waiver request by the lender.\\n\\nThe appraisal may be dated no earlier than 120 days from the date of the Note, regardless of whether the. property was appraised as proposed or existing construction. When the appraisal will be more than 120 days old but less than 1 year old on the date of the Note, the.\\n\\nThis inspection and results of the analysis must be reported on the Appraisal Update and/or Completion Report (Form 1004D). 1 If the appraiser indicates on the Form 1004D that the property value has declined, then the lender must obtain a new appraisal for the property.\\n\\nUnfortunately, that is a complete new order (and expense), as one year is a lifetime where property values are concerned. Thanks for the information, however I asked for the PMi to be removed just over 3 months after the appraisal. In fact, the manager at Nationstar said it was 91 days and not valid per Fannie Mae.\\n\\nReputation: 6463. Actually, Fannie Mae and FHA went to 120 days, but at no time were they ever over 6 months for existing construction. Generally, comparables from August 2009 could not be included on an appraisal report today, so there is no way an appraisal issued then would be acceptable.\"\n",
748
+ "}\n",
749
+ "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
750
+ "\u001b[0m{\n",
751
+ " \"prompts\": [\n",
752
+ " \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nNew and Updated Underwriting and Eligibility Policies. Age of Credit Documents Selling Guide, B1-1-04, Allowable Age of Credit. Documents. The maximum age of credit documents is reduced from 120 days to 90 days for existing. construction and from 180 days to 120 days for new construction. Credit documents include. credit reports and employment, income, and asset documentation. The age of the documents is.\\n\\nIn no case may the appraisal be dated more than 1 year prior to the date of the Note. Property Inspection Reports/Condition and Marketability Reports (Fannie Mae Form 2070/Freddie Mac Form. 2075 may be dated no earlier than 120 days from the date of the Note. Continued on next page.\\n\\nFannie Mae will allow the use of an origination appraisal for a subsequent transaction if the following requirements are met: 1 The subsequent transaction may only be a Limited Cash-Out Refinance. 2 The appraisal report must not be more than 12 months old on the note date of the subsequent transaction.\\n\\nThe subsequent transaction may only be a Limited Cash-Out Refinance. The appraisal report must not be more than 12 months old on the note date of the subsequent transaction. If the appraisal report is greater than 4 months old on the date of the note and mortgage, then an appraisal update is required.\\n\\nIf they were sold with exposure to the market, listed in. MLS they should be considered. How long is the FHA case # good for (not the appraisal, but the actual case #)? the case number is valid for 6 months unless the appraiser expires prior to the 6 month time frame.\\n\\nNo the borrower can only pay for one appraisal. Your question about Comps is not acceptable, Comps over 1 year old for comps 1-3 are not. acceptable, but supporting comps are with an adequate explanation from the Appraiser. Comps, over one year old would be acceptable, onlywith a waiver request by the lender.\\n\\nThe appraisal may be dated no earlier than 120 days from the date of the Note, regardless of whether the. property was appraised as proposed or existing construction. When the appraisal will be more than 120 days old but less than 1 year old on the date of the Note, the.\\n\\nThis inspection and results of the analysis must be reported on the Appraisal Update and/or Completion Report (Form 1004D). 1 If the appraiser indicates on the Form 1004D that the property value has declined, then the lender must obtain a new appraisal for the property.\\n\\nUnfortunately, that is a complete new order (and expense), as one year is a lifetime where property values are concerned. Thanks for the information, however I asked for the PMi to be removed just over 3 months after the appraisal. In fact, the manager at Nationstar said it was 91 days and not valid per Fannie Mae.\\n\\nReputation: 6463. Actually, Fannie Mae and FHA went to 120 days, but at no time were they ever over 6 months for existing construction. Generally, comparables from August 2009 could not be included on an appraisal report today, so there is no way an appraisal issued then would be acceptable.\\nHuman: how many days is an appraisal good for a fannie loan\"\n",
753
+ " ]\n",
754
+ "}\n",
755
+ "According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.02s] Exiting LLM run with output:\n",
756
+ "\u001b[0m{\n",
757
+ " \"generations\": [\n",
758
+ " [\n",
759
+ " {\n",
760
+ " \"text\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\",\n",
761
+ " \"generation_info\": {\n",
762
+ " \"finish_reason\": \"stop\"\n",
763
+ " },\n",
764
+ " \"message\": {\n",
765
+ " \"lc\": 1,\n",
766
+ " \"type\": \"constructor\",\n",
767
+ " \"id\": [\n",
768
+ " \"langchain\",\n",
769
+ " \"schema\",\n",
770
+ " \"messages\",\n",
771
+ " \"AIMessageChunk\"\n",
772
+ " ],\n",
773
+ " \"kwargs\": {\n",
774
+ " \"example\": false,\n",
775
+ " \"content\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\",\n",
776
+ " \"additional_kwargs\": {}\n",
777
+ " }\n",
778
+ " }\n",
779
+ " }\n",
780
+ " ]\n",
781
+ " ],\n",
782
+ " \"llm_output\": null,\n",
783
+ " \"run\": null\n",
784
+ "}\n",
785
+ "\n",
786
+ "\n",
787
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [1.02s] Exiting Chain run with output:\n",
788
+ "\u001b[0m{\n",
789
+ " \"text\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\"\n",
790
+ "}\n",
791
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [1.02s] Exiting Chain run with output:\n",
792
+ "\u001b[0m{\n",
793
+ " \"output_text\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\"\n",
794
+ "}\n",
795
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [1.02s] Exiting Chain run with output:\n",
796
+ "\u001b[0m[outputs]\n",
797
+ "{'question': 'average pharmacy tech salary', 'chat_history': []}\n",
798
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
799
+ "\u001b[0m{\n",
800
+ " \"question\": \"average pharmacy tech salary\",\n",
801
+ " \"chat_history\": []\n",
802
+ "}\n",
803
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
804
+ "\u001b[0m[inputs]\n",
805
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
806
+ "\u001b[0m{\n",
807
+ " \"question\": \"average pharmacy tech salary\",\n",
808
+ " \"context\": \"If you are interested in becoming a pharmacy technician, you’re choosing a career that is in high demand. According to the U.S. Bureau of Labor Statistics (BLS), the career growth is expected to be “much faster than average”, with an employment increase of 32% predicted in the decade spanning 2010 to 2020*.\\n\\nWhat can a pharmacy technician really expect to earn in today’s economy? According to Salary.com, pharmacy technicians make anywhere from $26,000 to $39,000 a year, though most make around $32,000 annually. California has the highest average pharmacy technician wage, at $34,317, according to Open Farm Tech’s website.\\n\\nThe median annual wage for pharmacy technicians was $30,410 in May 2015. Employment of pharmacy technicians is projected to grow 9 percent from 2014 to 2024, faster than the average for all occupations. Increased demand for prescription medications will lead to more demand for pharmaceutical services.\\n\\nThe majority of pharmacy techs work in drug stores and hospitals, where the average annual salary was $28,940 and $34,410, respectively**. However, a higher salary can be had if you can find employment with outpatient care centers or physicians’ offices, where the annual pay is in the $37,000-$39,000 range.\\n\\nThe pharmacy technician salary** depends on a number of factors, from the area and type of employer, to your educational background. Browse pharmacy tech pay for a comparison between similar careers, geographic location, educational and certification requirements, and more.\\n\\nPharmacy Technician Salary. A Pharmacy Technician earns an average wage of $12.68 per hour. The skills that increase pay for this job the most are Mail Order Pharmacy and Long Term Care. People in this job generally don't have more than 20 years' experience. $18,722 - $48,714.\\n\\nPopular Companies. * Please note that all salary figures are approximations based upon third party submissions to Simply Hired. These figures are given to Simply Hired users for the purpose of generalized comparison only. Minimum wage may differ by jurisdiction and you should consult the employer for actual salary figures.\\n\\nPharmacy Technician average salary is $30,288, median salary is $30,534 with a salary range from $21,570 to $34,320. Pharmacy Technician salaries are collected from government agencies and companies. Each salary is associated with a real job position. Pharmacy Technician salary statistics is not exclusive and is for reference only.\\n\\nIt also states that pharmacy technicians working in an acute care hospital earn an average salary of $37,000 per year, while those working for the military or a pharmaceutical company earn an average salary of $38,000 per year. This represents a difference of more than $10,000, simply due to the health care setting.\\n\\nOccupational Employment and Wages, May 2016. 29-2052 Pharmacy Technicians. Prepare medications under the direction of a pharmacist. May measure, mix, count out, label, and record amounts and dosages of medications according to prescription orders. National estimates for this occupation. Industry profile for this occupation.\"\n",
809
+ "}\n",
810
+ "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
811
+ "\u001b[0m{\n",
812
+ " \"prompts\": [\n",
813
+ " \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nIf you are interested in becoming a pharmacy technician, you’re choosing a career that is in high demand. According to the U.S. Bureau of Labor Statistics (BLS), the career growth is expected to be “much faster than average”, with an employment increase of 32% predicted in the decade spanning 2010 to 2020*.\\n\\nWhat can a pharmacy technician really expect to earn in today’s economy? According to Salary.com, pharmacy technicians make anywhere from $26,000 to $39,000 a year, though most make around $32,000 annually. California has the highest average pharmacy technician wage, at $34,317, according to Open Farm Tech’s website.\\n\\nThe median annual wage for pharmacy technicians was $30,410 in May 2015. Employment of pharmacy technicians is projected to grow 9 percent from 2014 to 2024, faster than the average for all occupations. Increased demand for prescription medications will lead to more demand for pharmaceutical services.\\n\\nThe majority of pharmacy techs work in drug stores and hospitals, where the average annual salary was $28,940 and $34,410, respectively**. However, a higher salary can be had if you can find employment with outpatient care centers or physicians’ offices, where the annual pay is in the $37,000-$39,000 range.\\n\\nThe pharmacy technician salary** depends on a number of factors, from the area and type of employer, to your educational background. Browse pharmacy tech pay for a comparison between similar careers, geographic location, educational and certification requirements, and more.\\n\\nPharmacy Technician Salary. A Pharmacy Technician earns an average wage of $12.68 per hour. The skills that increase pay for this job the most are Mail Order Pharmacy and Long Term Care. People in this job generally don't have more than 20 years' experience. $18,722 - $48,714.\\n\\nPopular Companies. * Please note that all salary figures are approximations based upon third party submissions to Simply Hired. These figures are given to Simply Hired users for the purpose of generalized comparison only. Minimum wage may differ by jurisdiction and you should consult the employer for actual salary figures.\\n\\nPharmacy Technician average salary is $30,288, median salary is $30,534 with a salary range from $21,570 to $34,320. Pharmacy Technician salaries are collected from government agencies and companies. Each salary is associated with a real job position. Pharmacy Technician salary statistics is not exclusive and is for reference only.\\n\\nIt also states that pharmacy technicians working in an acute care hospital earn an average salary of $37,000 per year, while those working for the military or a pharmaceutical company earn an average salary of $38,000 per year. This represents a difference of more than $10,000, simply due to the health care setting.\\n\\nOccupational Employment and Wages, May 2016. 29-2052 Pharmacy Technicians. Prepare medications under the direction of a pharmacist. May measure, mix, count out, label, and record amounts and dosages of medications according to prescription orders. National estimates for this occupation. Industry profile for this occupation.\\nHuman: average pharmacy tech salary\"\n",
814
+ " ]\n",
815
+ "}\n",
816
+ "The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.45s] Exiting LLM run with output:\n",
817
+ "\u001b[0m{\n",
818
+ " \"generations\": [\n",
819
+ " [\n",
820
+ " {\n",
821
+ " \"text\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\",\n",
822
+ " \"generation_info\": {\n",
823
+ " \"finish_reason\": \"stop\"\n",
824
+ " },\n",
825
+ " \"message\": {\n",
826
+ " \"lc\": 1,\n",
827
+ " \"type\": \"constructor\",\n",
828
+ " \"id\": [\n",
829
+ " \"langchain\",\n",
830
+ " \"schema\",\n",
831
+ " \"messages\",\n",
832
+ " \"AIMessageChunk\"\n",
833
+ " ],\n",
834
+ " \"kwargs\": {\n",
835
+ " \"example\": false,\n",
836
+ " \"content\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\",\n",
837
+ " \"additional_kwargs\": {}\n",
838
+ " }\n",
839
+ " }\n",
840
+ " }\n",
841
+ " ]\n",
842
+ " ],\n",
843
+ " \"llm_output\": null,\n",
844
+ " \"run\": null\n",
845
+ "}\n",
846
+ "\n",
847
+ "\n",
848
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [1.45s] Exiting Chain run with output:\n",
849
+ "\u001b[0m{\n",
850
+ " \"text\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\"\n",
851
+ "}\n",
852
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [1.46s] Exiting Chain run with output:\n",
853
+ "\u001b[0m{\n",
854
+ " \"output_text\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\"\n",
855
+ "}\n",
856
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [1.46s] Exiting Chain run with output:\n",
857
+ "\u001b[0m[outputs]\n",
858
+ "Q-001: albany mn population\n",
859
+ "A-001: The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\n",
860
+ "G-001: The population of Albany, Minnesota is 2,662. \n",
861
+ "\n",
862
+ "Q-002: current weather in volcano, ca\n",
863
+ "A-002: I don't have the current weather information for Volcano, CA.\n",
864
+ "G-002: The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.\n",
865
+ "\n",
866
+ "Q-003: ____________________ is considered the father of modern medicine.\n",
867
+ "A-003: Hippocrates is considered the father of modern medicine.\n",
868
+ "G-003: Hippocrates is considered the father of modern medicine.\n",
869
+ "\n",
870
+ "Q-004: how many days is an appraisal good for a fannie loan\n",
871
+ "A-004: According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\n",
872
+ "G-004: An appraisal is good for 120 days from the date of the Note for a Fannie loan.\n",
873
+ "\n",
874
+ "Q-005: average pharmacy tech salary\n",
875
+ "A-005: The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\n",
876
+ "G-005: The average salary for a pharmacy technician is $26,000 to $39,000 in a year.\n",
877
+ "\n",
878
+ "\n",
879
+ "\n",
880
+ "scores: {\n",
881
+ " \"OVERALL\": {\n",
882
+ " \"bleu_scores\": {\n",
883
+ " \"bleu\": 0.3953488372093023,\n",
884
+ " \"precisions\": [\n",
885
+ " 0.3953488372093023\n",
886
+ " ],\n",
887
+ " \"brevity_penalty\": 1.0,\n",
888
+ " \"length_ratio\": 1.9253731343283582,\n",
889
+ " \"translation_length\": 129,\n",
890
+ " \"reference_length\": 67\n",
891
+ " },\n",
892
+ " \"rouge_scores\": {\n",
893
+ " \"rouge1\": 0.5737456342107505,\n",
894
+ " \"rouge2\": 0.4160794941282746,\n",
895
+ " \"rougeL\": 0.5108953062441435,\n",
896
+ " \"rougeLsum\": 0.4989862850327967\n",
897
+ " }\n",
898
+ " },\n",
899
+ " \"NUMERIC\": {\n",
900
+ " \"bleu_scores\": {\n",
901
+ " \"bleu\": 0.36111111111111116,\n",
902
+ " \"precisions\": [\n",
903
+ " 0.3611111111111111\n",
904
+ " ],\n",
905
+ " \"brevity_penalty\": 1.0,\n",
906
+ " \"length_ratio\": 2.4545454545454546,\n",
907
+ " \"translation_length\": 108,\n",
908
+ " \"reference_length\": 44\n",
909
+ " },\n",
910
+ " \"rouge_scores\": {\n",
911
+ " \"rouge1\": 0.5395760570179174,\n",
912
+ " \"rouge2\": 0.3694751662231337,\n",
913
+ " \"rougeL\": 0.4656557912371866,\n",
914
+ " \"rougeLsum\": 0.4656557912371866\n",
915
+ " }\n",
916
+ " },\n",
917
+ " \"DESCRIPTION\": {\n",
918
+ " \"bleu_scores\": {\n",
919
+ " \"bleu\": 0.5195179673581217,\n",
920
+ " \"precisions\": [\n",
921
+ " 0.5714285714285714\n",
922
+ " ],\n",
923
+ " \"brevity_penalty\": 0.909156442876713,\n",
924
+ " \"length_ratio\": 0.9130434782608695,\n",
925
+ " \"translation_length\": 21,\n",
926
+ " \"reference_length\": 23\n",
927
+ " },\n",
928
+ " \"rouge_scores\": {\n",
929
+ " \"rouge1\": 0.625,\n",
930
+ " \"rouge2\": 0.5,\n",
931
+ " \"rougeL\": 0.5833333333333334,\n",
932
+ " \"rougeLsum\": 0.5833333333333334\n",
933
+ " }\n",
934
+ " }\n",
935
+ "}\n",
936
+ "\n",
937
+ "CPU times: user 512 ms, sys: 63.7 ms, total: 576 ms\n",
938
+ "Wall time: 7.85 s\n"
939
+ ]
940
+ }
941
+ ],
942
+ "source": [
943
+ "%%time\n",
944
+ "\n",
945
+ "eval_ds = new_ds.select(range(5))\n",
946
+ "qa_chain = QAChainWithMsMacroDataset(eval_ds, llm_loader)\n",
947
+ "\n",
948
+ "answers = []\n",
949
+ "for i in range(eval_ds.num_rows):\n",
950
+ " inputs = {\"question\": str(eval_ds[i][\"query\"]), \"chat_history\": []}\n",
951
+ " result = qa_chain.call_chain(\n",
952
+ " inputs,\n",
953
+ " None,\n",
954
+ " None,\n",
955
+ " True,\n",
956
+ " )\n",
957
+ " answers.append(result[\"answer\"])\n",
958
+ "\n",
959
+ "result = calc_all_metrics(eval_ds, answers)\n",
960
+ "\n",
961
+ "for i in range(eval_ds.num_rows):\n",
962
+ " n = i + 1\n",
963
+ " print(f\"Q-{n:03d}: {eval_ds[i]['query']}\")\n",
964
+ " print(f\"A-{n:03d}: {answers[i]}\")\n",
965
+ " print(f\"G-{n:03d}: {eval_ds[i]['wellFormedAnswers'][0]}\\n\")\n",
966
+ "\n",
967
+ "print(f\"\\n\\nscores: {json.dumps(result, indent=2)}\\n\")"
968
+ ]
969
+ },
970
+ {
971
+ "cell_type": "code",
972
+ "execution_count": 13,
973
+ "id": "bae05024",
974
+ "metadata": {},
975
+ "outputs": [
976
+ {
977
+ "name": "stdout",
978
+ "output_type": "stream",
979
+ "text": [
980
+ "System: Use the following pieces of context to answer the users question. \n",
981
+ "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
982
+ "----------------\n",
983
+ "City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\n",
984
+ "\n",
985
+ "Place of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\n",
986
+ "\n",
987
+ "For the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\n",
988
+ "\n",
989
+ "Albany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\n",
990
+ "\n",
991
+ "Sponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\n",
992
+ "\n",
993
+ "Recent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\n",
994
+ "\n",
995
+ "For population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\n",
996
+ "\n",
997
+ "For population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\n",
998
+ "Human: albany mn population\n"
999
+ ]
1000
+ }
1001
+ ],
1002
+ "source": [
1003
+ "print(\n",
1004
+ " \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nCity of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\\n\\nPlace of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\\n\\nFor the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nAlbany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\\n\\nSponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nRecent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\\n\\nFor population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\\n\\nFor population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\\nHuman: albany mn population\"\n",
1005
+ ")"
1006
+ ]
1007
+ },
1008
+ {
1009
+ "cell_type": "code",
1010
+ "execution_count": 4,
1011
+ "id": "593f574a",
1012
+ "metadata": {},
1013
+ "outputs": [
1014
+ {
1015
+ "name": "stdout",
1016
+ "output_type": "stream",
1017
+ "text": [
1018
+ "System: Use the following pieces of context to answer the users question. \n",
1019
+ "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
1020
+ "----------------\n",
1021
+ "Hippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\n",
1022
+ "\n",
1023
+ "Many of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\n",
1024
+ "\n",
1025
+ "Despite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\n",
1026
+ "\n",
1027
+ "At least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\n",
1028
+ "\n",
1029
+ "460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\n",
1030
+ "\n",
1031
+ "TRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \n",
1032
+ "\n",
1033
+ "The two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\n",
1034
+ "\n",
1035
+ "Hippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\n",
1036
+ "\n",
1037
+ "Reload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\n",
1038
+ "\n",
1039
+ "However, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\n",
1040
+ "Human: ____________________ is considered the father of modern medicine.\n"
1041
+ ]
1042
+ }
1043
+ ],
1044
+ "source": [
1045
+ "print(\n",
1046
+ " \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nHippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\\n\\nMany of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\\n\\nDespite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\\n\\nAt least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\\n\\n460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\\n\\nTRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \\n\\nThe two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\\n\\nHippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\\n\\nReload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\\n\\nHowever, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\\nHuman: ____________________ is considered the father of modern medicine.\"\n",
1047
+ ")"
1048
+ ]
1049
+ },
1050
+ {
1051
+ "cell_type": "markdown",
1052
+ "id": "5b9204e0",
1053
+ "metadata": {},
1054
+ "source": [
1055
+ "```\n",
1056
+ "Q-003: ____________________ is considered the father of modern medicine.\n",
1057
+ "A-003: Hippocrates is considered the father of modern medicine.\n",
1058
+ "G-003: Hippocrates is considered the father of modern medicine.\n",
1059
+ "```"
1060
+ ]
1061
+ },
1062
+ {
1063
+ "cell_type": "code",
1064
+ "execution_count": 11,
1065
+ "id": "5cfc8320",
1066
+ "metadata": {},
1067
+ "outputs": [
1068
+ {
1069
+ "data": {
1070
+ "text/plain": [
1071
+ "{'answers': ['The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.'],\n",
1072
+ " 'passages': {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0],\n",
1073
+ " 'passage_text': ['Volcano 10 Day Weather. Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny.',\n",
1074
+ " 'Current U.S. National Radar--Current. The Current National Weather Radar is shown below with a UTC Time (subtract 5 hours from UTC to get Eastern Time). National Weather Forecast--Current. The Current National Weather Forecast and National Weather Map are shown below.',\n",
1075
+ " 'Volcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. There is 49 percentage chance of rain and 3 mph winds from the Southwest.',\n",
1076
+ " 'Volcano, CA Weather Data. 1 Volcano, CA Current Weather Data. 2 Sponsored. 3 Volcano, CA Historical Weather Trends. Volcano, CA area 1 Highlights. Volcano, CA Chance of Sunshine. Volcano, CA Historical 1 Temperature. Volcano, CA Rainfall and Snowfall Average. Volcano, CA Energy Demand.',\n",
1077
+ " 'Volcano Weather. Volcano weather and daily current conditions with summary and 5 Day forecast including humidity, precipitation, high and low temperatures presented in Fahrenheit and Celsius, barometric pressure, heat index, wind chill, hourly forecast, sunrise, sunset, wind speed with direction, and more.',\n",
1078
+ " 'Hourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. There is 83 percentage chance of rain and 2 mph winds from the East. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. There is 77 percentage chance of rain and 2 mph winds from the East.',\n",
1079
+ " 'Volcano 7 Day Weather. 1 Monday:The Volcano forecast for Apr 03 is 58 degrees and Sunny. There is 34 percentage chance of rain and 5 mph winds from the West. 2 Tuesday:The Volcano forecast for Apr 04 is 59 degrees and Sunny. There is 33 percentage chance of rain and 5 mph winds from the West-Southwest.',\n",
1080
+ " 'Volcano 10 Day Weather. 1 Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. 2 Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. 3 Tuesday:The Volcano forecast for Apr 11 is 49 degrees and Patchy rain possible. Wednesday:The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.',\n",
1081
+ " 'Volcano, CA weather and traffic updates by locals. Write your own weather report, forecast, or traffic update: Please note by clicking on Post you acknowledge that you have read the Terms of Service and the report and/or forecast you are posting is in compliance with such terms. Be respectful.',\n",
1082
+ " 'Hourly Forecast Detailed. 1 0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. 2 3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. 3 6am:The Volcano, CA forecast for Apr 03 is 41 degrees and Clear. 9am:The Volcano, CA forecast for Apr 03 is 48 degrees and Sunny.'],\n",
1083
+ " 'url': ['http://www.weatherman.com/us/ca/zip-codes/95689-10-day-weather',\n",
1084
+ " 'http://www.fastweather.com/index.php?city=Volcano_CA&g',\n",
1085
+ " 'http://www.weatherman.com/us/ca/zip-codes/95689-10-day-weather',\n",
1086
+ " 'http://www.homefacts.com/weather/California/Amador-County/Volcano.html',\n",
1087
+ " 'http://www.localconditions.com/weather-volcano-california/95689/',\n",
1088
+ " 'http://www.weatherman.com/us/ca/volcano',\n",
1089
+ " 'http://www.weatherman.com/us/ca/volcano',\n",
1090
+ " 'http://www.weatherman.com/us/ca/zip-codes/95689-10-day-weather',\n",
1091
+ " 'http://www.localconditions.com/weather-volcano-california/95689/',\n",
1092
+ " 'http://www.weatherman.com/us/ca/volcano']},\n",
1093
+ " 'query': 'current weather in volcano, ca',\n",
1094
+ " 'query_id': 114414,\n",
1095
+ " 'query_type': 'DESCRIPTION',\n",
1096
+ " 'wellFormedAnswers': ['The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.']}"
1097
+ ]
1098
+ },
1099
+ "execution_count": 11,
1100
+ "metadata": {},
1101
+ "output_type": "execute_result"
1102
+ }
1103
+ ],
1104
+ "source": [
1105
+ "test_ds = new_ds.select([1])\n",
1106
+ "test_ds[0]"
1107
+ ]
1108
+ },
1109
+ {
1110
+ "cell_type": "code",
1111
+ "execution_count": 12,
1112
+ "id": "56b91cae",
1113
+ "metadata": {},
1114
+ "outputs": [
1115
+ {
1116
+ "data": {
1117
+ "text/plain": [
1118
+ "{'bleu_scores': {'bleu': 1.0,\n",
1119
+ " 'precisions': [1.0, 1.0, 1.0, 1.0],\n",
1120
+ " 'brevity_penalty': 1.0,\n",
1121
+ " 'length_ratio': 1.0,\n",
1122
+ " 'translation_length': 14,\n",
1123
+ " 'reference_length': 14},\n",
1124
+ " 'rouge_scores': {'rouge1': 1.0,\n",
1125
+ " 'rouge2': 1.0,\n",
1126
+ " 'rougeL': 1.0,\n",
1127
+ " 'rougeLsum': 1.0}}"
1128
+ ]
1129
+ },
1130
+ "execution_count": 12,
1131
+ "metadata": {},
1132
+ "output_type": "execute_result"
1133
+ }
1134
+ ],
1135
+ "source": [
1136
+ "calc_metrics(test_ds)"
1137
+ ]
1138
+ },
1139
+ {
1140
+ "cell_type": "code",
1141
+ "execution_count": 18,
1142
+ "id": "56c6bf24",
1143
+ "metadata": {},
1144
+ "outputs": [
1145
+ {
1146
+ "data": {
1147
+ "text/plain": [
1148
+ "['The',\n",
1149
+ " 'Volcano',\n",
1150
+ " 'forecast',\n",
1151
+ " 'for',\n",
1152
+ " 'Apr',\n",
1153
+ " '12',\n",
1154
+ " 'is',\n",
1155
+ " '52',\n",
1156
+ " 'degrees',\n",
1157
+ " 'and',\n",
1158
+ " 'Patchy',\n",
1159
+ " 'light',\n",
1160
+ " 'rain.']"
1161
+ ]
1162
+ },
1163
+ "execution_count": 18,
1164
+ "metadata": {},
1165
+ "output_type": "execute_result"
1166
+ }
1167
+ ],
1168
+ "source": [
1169
+ "test_ds[0][\"answers\"][0].split()"
1170
+ ]
1171
+ },
1172
+ {
1173
+ "cell_type": "code",
1174
+ "execution_count": 19,
1175
+ "id": "77d08267",
1176
+ "metadata": {},
1177
+ "outputs": [
1178
+ {
1179
+ "data": {
1180
+ "text/plain": [
1181
+ "13"
1182
+ ]
1183
+ },
1184
+ "execution_count": 19,
1185
+ "metadata": {},
1186
+ "output_type": "execute_result"
1187
+ }
1188
+ ],
1189
+ "source": [
1190
+ "len(test_ds[0][\"answers\"][0].split())"
1191
+ ]
1192
+ },
1193
+ {
1194
+ "cell_type": "code",
1195
+ "execution_count": 22,
1196
+ "id": "8c19694b",
1197
+ "metadata": {},
1198
+ "outputs": [
1199
+ {
1200
+ "data": {
1201
+ "text/plain": [
1202
+ "{'answers': ['From $26,000 to $39,000 a year'],\n",
1203
+ " 'passages': {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],\n",
1204
+ " 'passage_text': ['If you are interested in becoming a pharmacy technician, you’re choosing a career that is in high demand. According to the U.S. Bureau of Labor Statistics (BLS), the career growth is expected to be “much faster than average”, with an employment increase of 32% predicted in the decade spanning 2010 to 2020*.',\n",
1205
+ " 'What can a pharmacy technician really expect to earn in today’s economy? According to Salary.com, pharmacy technicians make anywhere from $26,000 to $39,000 a year, though most make around $32,000 annually. California has the highest average pharmacy technician wage, at $34,317, according to Open Farm Tech’s website.',\n",
1206
+ " 'The median annual wage for pharmacy technicians was $30,410 in May 2015. Employment of pharmacy technicians is projected to grow 9 percent from 2014 to 2024, faster than the average for all occupations. Increased demand for prescription medications will lead to more demand for pharmaceutical services.',\n",
1207
+ " 'The majority of pharmacy techs work in drug stores and hospitals, where the average annual salary was $28,940 and $34,410, respectively**. However, a higher salary can be had if you can find employment with outpatient care centers or physicians’ offices, where the annual pay is in the $37,000-$39,000 range.',\n",
1208
+ " 'The pharmacy technician salary** depends on a number of factors, from the area and type of employer, to your educational background. Browse pharmacy tech pay for a comparison between similar careers, geographic location, educational and certification requirements, and more.',\n",
1209
+ " \"Pharmacy Technician Salary. A Pharmacy Technician earns an average wage of $12.68 per hour. The skills that increase pay for this job the most are Mail Order Pharmacy and Long Term Care. People in this job generally don't have more than 20 years' experience. $18,722 - $48,714.\",\n",
1210
+ " 'Popular Companies. * Please note that all salary figures are approximations based upon third party submissions to Simply Hired. These figures are given to Simply Hired users for the purpose of generalized comparison only. Minimum wage may differ by jurisdiction and you should consult the employer for actual salary figures.',\n",
1211
+ " 'Pharmacy Technician average salary is $30,288, median salary is $30,534 with a salary range from $21,570 to $34,320. Pharmacy Technician salaries are collected from government agencies and companies. Each salary is associated with a real job position. Pharmacy Technician salary statistics is not exclusive and is for reference only.',\n",
1212
+ " 'It also states that pharmacy technicians working in an acute care hospital earn an average salary of $37,000 per year, while those working for the military or a pharmaceutical company earn an average salary of $38,000 per year. This represents a difference of more than $10,000, simply due to the health care setting.',\n",
1213
+ " 'Occupational Employment and Wages, May 2016. 29-2052 Pharmacy Technicians. Prepare medications under the direction of a pharmacist. May measure, mix, count out, label, and record amounts and dosages of medications according to prescription orders. National estimates for this occupation. Industry profile for this occupation.'],\n",
1214
+ " 'url': ['http://www.pharmacytechschools.com/salary/',\n",
1215
+ " 'http://www.pharmacytimes.com/contributor/alex-barker-pharmd/2015/06/guide-to-pharmacy-technician-salaries',\n",
1216
+ " 'https://www.bls.gov/ooh/healthcare/pharmacy-technicians.htm',\n",
1217
+ " 'http://www.pharmacytechschools.com/salary/',\n",
1218
+ " 'http://www.pharmacytechschools.com/salary/',\n",
1219
+ " 'http://www.payscale.com/research/US/Job=Pharmacy_Technician/Hourly_Rate',\n",
1220
+ " 'http://www.simplyhired.com/salaries-k-certified-pharmacy-technician-jobs.html',\n",
1221
+ " 'https://www.salarylist.com/jobs/Pharmacy-Technician-Salary.htm',\n",
1222
+ " 'http://www.pharmacytimes.com/contributor/alex-barker-pharmd/2015/06/guide-to-pharmacy-technician-salaries',\n",
1223
+ " 'https://www.bls.gov/oes/current/oes292052.htm']},\n",
1224
+ " 'query': 'average pharmacy tech salary',\n",
1225
+ " 'query_id': 40287,\n",
1226
+ " 'query_type': 'NUMERIC',\n",
1227
+ " 'wellFormedAnswers': ['The average salary for a pharmacy technician is $26,000 to $39,000 in a year.',\n",
1228
+ " 'The average salary for a pharmacy technician is from $26,000 to $39,000 a year.']}"
1229
+ ]
1230
+ },
1231
+ "execution_count": 22,
1232
+ "metadata": {},
1233
+ "output_type": "execute_result"
1234
+ }
1235
+ ],
1236
+ "source": [
1237
+ "test_ds = new_ds.select([4])\n",
1238
+ "test_ds[0]"
1239
+ ]
1240
+ },
1241
+ {
1242
+ "cell_type": "code",
1243
+ "execution_count": 23,
1244
+ "id": "34209164",
1245
+ "metadata": {},
1246
+ "outputs": [
1247
+ {
1248
+ "data": {
1249
+ "text/plain": [
1250
+ "{'bleu_scores': {'bleu': 0.19303951204286907,\n",
1251
+ " 'precisions': [0.875, 0.7142857142857143, 0.5, 0.4],\n",
1252
+ " 'brevity_penalty': 0.32465246735834974,\n",
1253
+ " 'length_ratio': 0.47058823529411764,\n",
1254
+ " 'translation_length': 8,\n",
1255
+ " 'reference_length': 17},\n",
1256
+ " 'rouge_scores': {'rouge1': 0.5833333333333334,\n",
1257
+ " 'rouge2': 0.4545454545454545,\n",
1258
+ " 'rougeL': 0.5833333333333334,\n",
1259
+ " 'rougeLsum': 0.5833333333333334}}"
1260
+ ]
1261
+ },
1262
+ "execution_count": 23,
1263
+ "metadata": {},
1264
+ "output_type": "execute_result"
1265
+ }
1266
+ ],
1267
+ "source": [
1268
+ "calc_metrics(test_ds)"
1269
+ ]
1270
+ }
1271
+ ],
1272
+ "metadata": {
1273
+ "kernelspec": {
1274
+ "display_name": "Python 3 (ipykernel)",
1275
+ "language": "python",
1276
+ "name": "python3"
1277
+ },
1278
+ "language_info": {
1279
+ "codemirror_mode": {
1280
+ "name": "ipython",
1281
+ "version": 3
1282
+ },
1283
+ "file_extension": ".py",
1284
+ "mimetype": "text/x-python",
1285
+ "name": "python",
1286
+ "nbconvert_exporter": "python",
1287
+ "pygments_lexer": "ipython3",
1288
+ "version": "3.10.9"
1289
+ }
1290
+ },
1291
+ "nbformat": 4,
1292
+ "nbformat_minor": 5
1293
+ }
README.md CHANGED
@@ -1,13 +1,53 @@
1
  ---
2
- title: Phi-3-mini-128k-instruct
3
  emoji: 🤖💬
4
  colorFrom: purple
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 4.19.1
8
  app_file: app.py
9
  pinned: true
10
  short_description: 'Chat with LLMs'
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Chat with LLMs
3
  emoji: 🤖💬
4
  colorFrom: purple
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 4.26.0
8
  app_file: app.py
9
  pinned: true
10
  short_description: 'Chat with LLMs'
11
  ---
12
 
13
+ ## Running Locally
14
+
15
+ 1. Check pre-conditions:
16
+
17
+ - [Git Large File Storage (LFS)](https://git-lfs.com/) must have been installed.
18
+ - Run `python --version` to make sure you're running Python version 3.10 or above.
19
+ - The latest PyTorch with GPU support must have been installed. Here is a sample `conda` command:
20
+
21
+ ```
22
+ conda install -y pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
23
+ ```
24
+
25
+
26
+ 1. Clone the repo
27
+
28
+ ```
29
+ git lfs install
30
+ git clone https://huggingface.co/spaces/inflaton-ai/llm-qa-bench
31
+ ```
32
+
33
+ 3. Install packages
34
+
35
+ ```
36
+ pip install -r requirements.txt
37
+
38
+ 4. Set up your environment variables
39
+
40
+ - By default, environment variables are loaded from `.env.example` file
41
+ - If you don't want to use the default settings, copy `.env.example` into `.env`. Your can then update it for your local runs.
42
+
43
+ 5. Run automated test:
44
+
45
+ ```
46
+ python qa_chain_test.py
47
+ ```
48
+
49
+ 6. Start the local server at `http://localhost:7860`:
50
+
51
+ ```
52
+ python app.py
53
+ ```
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  import torch
3
  from transformers import (
@@ -8,7 +9,6 @@ from transformers import (
8
  import os
9
  from threading import Thread
10
  import spaces
11
- import time
12
  import subprocess
13
 
14
  subprocess.run(
@@ -17,20 +17,51 @@ subprocess.run(
17
  shell=True,
18
  )
19
 
20
- token = os.environ["HF_TOKEN"]
 
 
 
21
 
 
 
 
 
 
 
 
 
 
22
 
23
  model = AutoModelForCausalLM.from_pretrained(
24
- "microsoft/Phi-3-mini-128k-instruct",
25
  token=token,
26
  trust_remote_code=True,
27
  )
28
- tok = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", token=token)
29
  terminators = [
30
  tok.eos_token_id,
31
  ]
32
 
33
- if torch.cuda.is_available():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  device = torch.device("cuda")
35
  print(f"Using GPU: {torch.cuda.get_device_name(device)}")
36
  else:
@@ -38,27 +69,34 @@ else:
38
  print("Using CPU")
39
 
40
  model = model.to(device)
41
- # Dispatch Errors
42
 
43
 
44
  @spaces.GPU(duration=60)
45
- def chat(message, history, temperature, do_sample, max_tokens):
 
46
  chat = []
47
  for item in history:
48
  chat.append({"role": "user", "content": item[0]})
49
  if item[1] is not None:
50
  chat.append({"role": "assistant", "content": item[1]})
 
 
 
 
 
 
51
  chat.append({"role": "user", "content": message})
 
52
  messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
53
  model_inputs = tok([messages], return_tensors="pt").to(device)
54
  streamer = TextIteratorStreamer(
55
- tok, timeout=20.0, skip_prompt=True, skip_special_tokens=True
56
  )
57
  generate_kwargs = dict(
58
  model_inputs,
59
  streamer=streamer,
60
  max_new_tokens=max_tokens,
61
- do_sample=True,
62
  temperature=temperature,
63
  eos_token_id=terminators,
64
  )
@@ -79,8 +117,7 @@ def chat(message, history, temperature, do_sample, max_tokens):
79
 
80
  demo = gr.ChatInterface(
81
  fn=chat,
82
- examples=[["Write me a poem about Machine Learning."]],
83
- # multimodal=False,
84
  additional_inputs_accordion=gr.Accordion(
85
  label="⚙️ Parameters", open=False, render=False
86
  ),
@@ -88,6 +125,14 @@ demo = gr.ChatInterface(
88
  gr.Slider(
89
  minimum=0, maximum=1, step=0.1, value=0.9, label="Temperature", render=False
90
  ),
 
 
 
 
 
 
 
 
91
  gr.Checkbox(label="Sampling", value=True),
92
  gr.Slider(
93
  minimum=128,
@@ -100,6 +145,6 @@ demo = gr.ChatInterface(
100
  ],
101
  stop_btn="Stop Generation",
102
  title="Chat With LLMs",
103
- description="Now Running [microsoft/Phi-3-mini-128k-instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct)",
104
  )
105
  demo.launch()
 
1
+ import json
2
  import gradio as gr
3
  import torch
4
  from transformers import (
 
9
  import os
10
  from threading import Thread
11
  import spaces
 
12
  import subprocess
13
 
14
  subprocess.run(
 
17
  shell=True,
18
  )
19
 
20
+ token = os.getenv("HF_TOKEN")
21
+ model_name = (
22
+ os.getenv("MODEL_NAME") or "google/gemma-1.1-2b-it"
23
+ ) # "microsoft/Phi-3-mini-128k-instruct"
24
 
25
+ questions_file_path = (
26
+ os.getenv("QUESTIONS_FILE_PATH") or "./data/datasets/ms_macro.json"
27
+ )
28
+
29
+ questions = json.loads(open(questions_file_path).read())
30
+ examples = [[question["question"].strip()] for question in questions]
31
+ print(f"Loaded {len(examples)} examples")
32
+
33
+ qa_system_prompt = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."
34
 
35
  model = AutoModelForCausalLM.from_pretrained(
36
+ model_name,
37
  token=token,
38
  trust_remote_code=True,
39
  )
40
+ tok = AutoTokenizer.from_pretrained(model_name, token=token)
41
  terminators = [
42
  tok.eos_token_id,
43
  ]
44
 
45
+ # Check that MPS is available
46
+ if not torch.backends.mps.is_available():
47
+ if not torch.backends.mps.is_built():
48
+ print(
49
+ "MPS not available because the current PyTorch install was not "
50
+ "built with MPS enabled."
51
+ )
52
+ else:
53
+ print(
54
+ "MPS not available because the current MacOS version is not 12.3+ "
55
+ "and/or you do not have an MPS-enabled device on this machine."
56
+ )
57
+ mps_device = None
58
+ else:
59
+ mps_device = torch.device("mps")
60
+
61
+ if mps_device is not None:
62
+ device = mps_device
63
+ print("Using MPS")
64
+ elif torch.cuda.is_available():
65
  device = torch.device("cuda")
66
  print(f"Using GPU: {torch.cuda.get_device_name(device)}")
67
  else:
 
69
  print("Using CPU")
70
 
71
  model = model.to(device)
 
72
 
73
 
74
  @spaces.GPU(duration=60)
75
+ def chat(message, history, temperature, repetition_penalty, do_sample, max_tokens):
76
+ print("repetition_penalty:", repetition_penalty)
77
  chat = []
78
  for item in history:
79
  chat.append({"role": "user", "content": item[0]})
80
  if item[1] is not None:
81
  chat.append({"role": "assistant", "content": item[1]})
82
+
83
+ if [message] in examples:
84
+ index = examples.index([message])
85
+ message = f"{qa_system_prompt}\n\n{questions[index]['context']}\n\nQuestion: {message}"
86
+ print(message)
87
+
88
  chat.append({"role": "user", "content": message})
89
+
90
  messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
91
  model_inputs = tok([messages], return_tensors="pt").to(device)
92
  streamer = TextIteratorStreamer(
93
+ tok, timeout=200.0, skip_prompt=True, skip_special_tokens=True
94
  )
95
  generate_kwargs = dict(
96
  model_inputs,
97
  streamer=streamer,
98
  max_new_tokens=max_tokens,
99
+ do_sample=do_sample,
100
  temperature=temperature,
101
  eos_token_id=terminators,
102
  )
 
117
 
118
  demo = gr.ChatInterface(
119
  fn=chat,
120
+ examples=examples,
 
121
  additional_inputs_accordion=gr.Accordion(
122
  label="⚙️ Parameters", open=False, render=False
123
  ),
 
125
  gr.Slider(
126
  minimum=0, maximum=1, step=0.1, value=0.9, label="Temperature", render=False
127
  ),
128
+ gr.Slider(
129
+ minimum=1.0,
130
+ maximum=1.5,
131
+ step=0.1,
132
+ value=1.2,
133
+ label="Repetition Penalty",
134
+ render=False,
135
+ ),
136
  gr.Checkbox(label="Sampling", value=True),
137
  gr.Slider(
138
  minimum=128,
 
145
  ],
146
  stop_btn="Stop Generation",
147
  title="Chat With LLMs",
148
+ description=f"Now Running [{model_name}](https://huggingface.co/{model_name})",
149
  )
150
  demo.launch()
app_modules/init.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Main entrypoint for the app."""
2
+
3
+ import os
4
+ from timeit import default_timer as timer
5
+ from typing import List, Optional
6
+
7
+ from dotenv import find_dotenv, load_dotenv
8
+ from langchain_community.embeddings import HuggingFaceInstructEmbeddings
9
+ from langchain.vectorstores.chroma import Chroma
10
+ from langchain.vectorstores.faiss import FAISS
11
+
12
+ from app_modules.llm_loader import LLMLoader
13
+ from app_modules.utils import get_device_types, init_settings, load_spacy_model
14
+
15
+ found_dotenv = find_dotenv(".env")
16
+
17
+ if len(found_dotenv) == 0:
18
+ found_dotenv = find_dotenv(".env.example")
19
+ print(f"loading env vars from: {found_dotenv}")
20
+ load_dotenv(found_dotenv, override=False)
21
+
22
+ # Constants
23
+ init_settings()
24
+
25
+ if os.environ.get("LANGCHAIN_DEBUG") == "true":
26
+ from langchain.globals import set_debug
27
+
28
+ set_debug(True)
29
+
30
+ from app_modules.llm_qa_chain import QAChain
31
+ from app_modules.llm_chat_chain import ChatChain
32
+ import nltk
33
+
34
+
35
+ def app_init():
36
+ # https://github.com/huggingface/transformers/issues/17611
37
+ os.environ["CURL_CA_BUNDLE"] = ""
38
+
39
+ nltk.download("punkt")
40
+
41
+ hf_embeddings_device_type, hf_pipeline_device_type = get_device_types()
42
+ print(f"hf_embeddings_device_type: {hf_embeddings_device_type}")
43
+ print(f"hf_pipeline_device_type: {hf_pipeline_device_type}")
44
+
45
+ hf_embeddings_model_name = (
46
+ os.environ.get("HF_EMBEDDINGS_MODEL_NAME") or "hkunlp/instructor-xl"
47
+ )
48
+
49
+ n_threds = int(os.environ.get("NUMBER_OF_CPU_CORES") or "4")
50
+ index_path = os.environ.get("FAISS_INDEX_PATH") or os.environ.get(
51
+ "CHROMADB_INDEX_PATH"
52
+ )
53
+ using_faiss = os.environ.get("FAISS_INDEX_PATH") is not None
54
+ llm_model_type = os.environ.get("LLM_MODEL_TYPE")
55
+
56
+ debug_metrics = os.getenv("DEBUG_METRICS", "false").lower() == "true"
57
+
58
+ if debug_metrics:
59
+ start = timer()
60
+ load_spacy_model()
61
+ end = timer()
62
+ print(f"Completed in {end - start:.3f}s")
63
+
64
+ qa_with_rag = os.getenv("QA_WITH_RAG", "true").lower() == "true"
65
+ print(f"qa_with_rag: {qa_with_rag}")
66
+
67
+ retrieve_from_questions_file = os.getenv("RETRIEVER_TYPE") == "questions_file"
68
+ print(f"retrieve_from_questions_file: {retrieve_from_questions_file}", flush=True)
69
+
70
+ if qa_with_rag and not retrieve_from_questions_file or debug_metrics:
71
+ print(f"hf_embeddings_model_name: {hf_embeddings_model_name}")
72
+ start = timer()
73
+ embeddings = HuggingFaceInstructEmbeddings(
74
+ model_name=hf_embeddings_model_name,
75
+ model_kwargs={"device": hf_embeddings_device_type},
76
+ )
77
+ end = timer()
78
+
79
+ print(f"Completed in {end - start:.3f}s")
80
+
81
+ vectorstore = None
82
+ if qa_with_rag and not retrieve_from_questions_file:
83
+ start = timer()
84
+
85
+ print(
86
+ f"Load index from {index_path} with {'FAISS' if using_faiss else 'Chroma'}"
87
+ )
88
+
89
+ if not os.path.isdir(index_path):
90
+ raise ValueError(f"{index_path} does not exist!")
91
+ elif using_faiss:
92
+ vectorstore = FAISS.load_local(
93
+ index_path, embeddings, allow_dangerous_deserialization=True
94
+ )
95
+ else:
96
+ vectorstore = Chroma(
97
+ embedding_function=embeddings, persist_directory=index_path
98
+ )
99
+
100
+ end = timer()
101
+
102
+ print(f"Completed in {end - start:.3f}s")
103
+
104
+ start = timer()
105
+ llm_loader = LLMLoader(llm_model_type)
106
+ llm_loader.init(n_threds=n_threds, hf_pipeline_device_type=hf_pipeline_device_type)
107
+ if qa_with_rag:
108
+ qa_chain = QAChain(vectorstore, llm_loader)
109
+ else:
110
+ qa_chain = ChatChain(llm_loader)
111
+ end = timer()
112
+ print(f"Completed in {end - start:.3f}s")
113
+
114
+ return llm_loader, qa_chain
app_modules/llm_chat_chain.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import re
4
+
5
+ from langchain.chains import ConversationChain, LLMChain
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain.chains.base import Chain
8
+
9
+ from app_modules.llm_inference import LLMInference
10
+ from app_modules.utils import CustomizedConversationSummaryBufferMemory
11
+ from langchain.chains import LLMChain
12
+ from langchain.globals import get_debug
13
+
14
+ chat_history_enabled = os.getenv("CHAT_HISTORY_ENABLED", "false").lower() == "true"
15
+ B_INST, E_INST = "[INST]", "[/INST]"
16
+
17
+
18
+ def get_system_prompt_and_user_message(orca=False):
19
+ # system_prompt = "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
20
+ system_prompt = (
21
+ "You are Orca, an AI language model created by Microsoft. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
22
+ if orca
23
+ else "You are a chatbot having a conversation with a human."
24
+ )
25
+
26
+ user_message = "{input}"
27
+
28
+ if chat_history_enabled:
29
+ user_message = "Chat History:\n\n{history} \n\n" + user_message
30
+ system_prompt += " Read the chat history to get context."
31
+
32
+ return system_prompt, user_message
33
+
34
+
35
+ def create_llama_2_prompt_template():
36
+ B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
37
+
38
+ system_prompt, user_message = get_system_prompt_and_user_message()
39
+
40
+ SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS
41
+ prompt_template = B_INST + SYSTEM_PROMPT + user_message + E_INST
42
+ return prompt_template
43
+
44
+
45
+ def create_llama_3_prompt_template():
46
+ system_prompt, user_message = get_system_prompt_and_user_message()
47
+ prompt_template = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
48
+ { system_prompt }<|eot_id|><|start_header_id|>user<|end_header_id|>
49
+ { user_message }<|eot_id|><|start_header_id|>assistant<|end_header_id|>
50
+ """
51
+
52
+ return prompt_template
53
+
54
+
55
+ def create_phi_3_prompt_template():
56
+ system_prompt, user_message = get_system_prompt_and_user_message()
57
+ prompt_template = f"""<|system|>
58
+ { system_prompt }<|end|>
59
+ <|user|>
60
+ { user_message }<|end|>
61
+ <|assistant|>
62
+ """
63
+
64
+ return prompt_template
65
+
66
+
67
+ def create_orca_2_prompt_template():
68
+ system_prompt, user_message = get_system_prompt_and_user_message(orca=False)
69
+
70
+ prompt_template = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant"
71
+ return prompt_template
72
+
73
+
74
+ def create_mistral_prompt_template():
75
+ system_prompt, user_message = get_system_prompt_and_user_message()
76
+
77
+ prompt_template = B_INST + system_prompt + "\n\n" + user_message + E_INST
78
+ return prompt_template
79
+
80
+
81
+ def create_gemma_prompt_template():
82
+ return "<start_of_turn>user\n{input}<end_of_turn>\n<start_of_turn>model\n"
83
+
84
+
85
+ def create_prompt_template(model_name):
86
+ print(f"creating prompt template for model: {model_name}")
87
+ if re.search(r"llama-?2", model_name, re.IGNORECASE):
88
+ return create_llama_2_prompt_template()
89
+ elif re.search(r"llama-?3", model_name, re.IGNORECASE):
90
+ return create_llama_3_prompt_template()
91
+ elif re.search(r"phi-?3", model_name, re.IGNORECASE):
92
+ return create_phi_3_prompt_template()
93
+ elif model_name.lower().startswith("orca"):
94
+ return create_orca_2_prompt_template()
95
+ elif model_name.lower().startswith("mistral"):
96
+ return create_mistral_prompt_template()
97
+ elif model_name.lower().startswith("gemma"):
98
+ return create_gemma_prompt_template()
99
+
100
+ return (
101
+ """You are a chatbot having a conversation with a human.
102
+ {history}
103
+ Human: {input}
104
+ Chatbot:"""
105
+ if chat_history_enabled
106
+ else """You are a chatbot having a conversation with a human.
107
+ Human: {input}
108
+ Chatbot:"""
109
+ )
110
+
111
+
112
+ class ChatChain(LLMInference):
113
+ def __init__(self, llm_loader):
114
+ super().__init__(llm_loader)
115
+
116
+ def create_chain(self) -> Chain:
117
+ template = create_prompt_template(self.llm_loader.model_name)
118
+ print(f"template: {template}")
119
+
120
+ if chat_history_enabled:
121
+ prompt = PromptTemplate(
122
+ input_variables=["history", "input"], template=template
123
+ )
124
+ memory = CustomizedConversationSummaryBufferMemory(
125
+ llm=self.llm_loader.llm, max_token_limit=1024, return_messages=False
126
+ )
127
+
128
+ llm_chain = ConversationChain(
129
+ llm=self.llm_loader.llm,
130
+ prompt=prompt,
131
+ verbose=False,
132
+ memory=memory,
133
+ )
134
+ else:
135
+ prompt = PromptTemplate(input_variables=["input"], template=template)
136
+ llm_chain = LLMChain(llm=self.llm_loader.llm, prompt=prompt)
137
+
138
+ return llm_chain
139
+
140
+ def _process_inputs(self, inputs):
141
+ if not isinstance(inputs, list):
142
+ inputs = {"input": inputs["question"]}
143
+ elif self.llm_loader.llm_model_type == "huggingface":
144
+ inputs = [
145
+ [
146
+ {
147
+ "role": "system",
148
+ "content": self.get_system_message(i),
149
+ },
150
+ {
151
+ "role": "user",
152
+ "content": self.get_user_message(i),
153
+ },
154
+ ]
155
+ for i in inputs
156
+ ]
157
+ else:
158
+ inputs = [{"input": i["question"]} for i in inputs]
159
+
160
+ if get_debug():
161
+ print("_process_inputs:", json.dumps(inputs, indent=4))
162
+
163
+ return inputs
164
+
165
+ def get_system_message(self, input) -> Chain:
166
+ return get_system_prompt_and_user_message()[0]
167
+
168
+ def get_user_message(self, input) -> Chain:
169
+ return input["question"]
app_modules/llm_inference.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import abc
2
+ import json
3
+ import os
4
+ import re
5
+ import time
6
+ import urllib
7
+ from queue import Queue
8
+ from threading import Thread
9
+ from typing import List, Optional
10
+ from urllib.parse import quote, urlparse, urlunparse
11
+
12
+ from langchain.chains.base import Chain
13
+
14
+ from app_modules.llm_loader import LLMLoader, TextIteratorStreamer
15
+ from app_modules.utils import remove_extra_spaces
16
+
17
+
18
+ class LLMInference(metaclass=abc.ABCMeta):
19
+ def __init__(self, llm_loader):
20
+ self.llm_loader = llm_loader
21
+ self.chain = None
22
+ self.pattern = re.compile(r"\s*<.+>$")
23
+
24
+ @abc.abstractmethod
25
+ def create_chain(self) -> Chain:
26
+ pass
27
+
28
+ def get_chain(self) -> Chain:
29
+ if self.chain is None:
30
+ self.chain = self.create_chain()
31
+
32
+ return self.chain
33
+
34
+ def reset(self) -> None:
35
+ self.chain = None
36
+
37
+ def _process_inputs(self, inputs):
38
+ return inputs
39
+
40
+ def _normalize_result(self, result):
41
+ # print(f"_normalize_result: {result}")
42
+ if isinstance(result, list):
43
+ result = result[0]
44
+
45
+ key = "text" if "text" in result else "generated_text"
46
+ if key in result:
47
+ result["answer"] = result[key]
48
+ del result[key]
49
+
50
+ result["answer"] = self.pattern.sub("", result["answer"])
51
+ return result
52
+
53
+ def _process_results(self, results):
54
+ if isinstance(results, list):
55
+ return [self._normalize_result(result) for result in results]
56
+
57
+ return self._normalize_result(results)
58
+
59
+ def _run_batch(self, chain, inputs):
60
+ if self.llm_loader.llm_model_type == "huggingface":
61
+ results = self.llm_loader.llm.pipeline(inputs)
62
+ else:
63
+ results = chain.batch(inputs)
64
+
65
+ return results
66
+
67
+ def run_chain(self, chain, inputs, callbacks: Optional[List] = []):
68
+ inputs = self._process_inputs(inputs)
69
+
70
+ # check if inputs is an array
71
+ if isinstance(inputs, list):
72
+ results = self._run_batch(chain, inputs)
73
+ else:
74
+ results = chain.invoke(inputs, {"callbacks": callbacks})
75
+
76
+ return self._process_results(results)
77
+
78
+ def call_chain(
79
+ self,
80
+ inputs,
81
+ streaming_handler,
82
+ q: Queue = None,
83
+ testing: bool = False,
84
+ ):
85
+ print(json.dumps(inputs, indent=4))
86
+ if self.llm_loader.huggingfaceStreamingEnabled():
87
+ self.llm_loader.lock.acquire()
88
+
89
+ try:
90
+ if self.llm_loader.huggingfaceStreamingEnabled():
91
+ self.llm_loader.streamer.reset(q)
92
+
93
+ chain = self.get_chain()
94
+ result = (
95
+ self._run_chain_with_streaming_handler(
96
+ chain, inputs, streaming_handler, testing
97
+ )
98
+ if streaming_handler is not None
99
+ else self.run_chain(chain, inputs)
100
+ )
101
+
102
+ if "answer" in result:
103
+ result["answer"] = remove_extra_spaces(result["answer"])
104
+
105
+ return result
106
+ finally:
107
+ if self.llm_loader.huggingfaceStreamingEnabled():
108
+ self.llm_loader.lock.release()
109
+
110
+ def _execute_chain(self, chain, inputs, q, sh):
111
+ q.put(self.run_chain(chain, inputs, callbacks=[sh]))
112
+
113
+ def _run_chain_with_streaming_handler(
114
+ self, chain, inputs, streaming_handler, testing
115
+ ):
116
+ que = Queue()
117
+
118
+ t = Thread(
119
+ target=self._execute_chain,
120
+ args=(chain, inputs, que, streaming_handler),
121
+ )
122
+ t.start()
123
+
124
+ if self.llm_loader.huggingfaceStreamingEnabled():
125
+ count = (
126
+ 2
127
+ if "chat_history" in inputs and len(inputs.get("chat_history")) > 0
128
+ else 1
129
+ )
130
+
131
+ while count > 0:
132
+ try:
133
+ for token in self.llm_loader.streamer:
134
+ if not testing:
135
+ streaming_handler.on_llm_new_token(token)
136
+
137
+ self.llm_loader.streamer.reset()
138
+ count -= 1
139
+ except Exception:
140
+ if not testing:
141
+ print("nothing generated yet - retry in 0.5s")
142
+ time.sleep(0.5)
143
+
144
+ t.join()
145
+ return que.get()
app_modules/llm_loader.py ADDED
@@ -0,0 +1,579 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import threading
4
+ from queue import Queue
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ import torch
8
+ from langchain.callbacks.base import BaseCallbackHandler
9
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
10
+ from langchain_openai.chat_models import ChatOpenAI
11
+ from langchain_openai.llms import OpenAI
12
+ from langchain_google_genai import (
13
+ ChatGoogleGenerativeAI,
14
+ HarmBlockThreshold,
15
+ HarmCategory,
16
+ )
17
+ from langchain_community.llms import (
18
+ HuggingFaceTextGenInference,
19
+ CTransformers,
20
+ GPT4All,
21
+ HuggingFacePipeline,
22
+ LlamaCpp,
23
+ VLLM,
24
+ )
25
+ from langchain_community.chat_models import ChatOllama
26
+ from langchain.schema import LLMResult
27
+ from transformers import (
28
+ AutoConfig,
29
+ AutoModelForCausalLM,
30
+ AutoModelForSeq2SeqLM,
31
+ AutoTokenizer,
32
+ BitsAndBytesConfig,
33
+ StoppingCriteria,
34
+ StoppingCriteriaList,
35
+ T5Tokenizer,
36
+ TextStreamer,
37
+ pipeline,
38
+ )
39
+
40
+ from app_modules.utils import ensure_model_is_downloaded
41
+
42
+
43
+ class TextIteratorStreamer(TextStreamer, StreamingStdOutCallbackHandler):
44
+ def __init__(
45
+ self,
46
+ tokenizer: "AutoTokenizer",
47
+ skip_prompt: bool = False,
48
+ timeout: Optional[float] = None,
49
+ for_huggingface: bool = False,
50
+ **decode_kwargs,
51
+ ):
52
+ super().__init__(tokenizer, skip_prompt, **decode_kwargs)
53
+ self.text_queue = Queue()
54
+ self.stop_signal = None
55
+ self.timeout = timeout
56
+ self.total_tokens = 0
57
+ self.for_huggingface = for_huggingface
58
+ self.end_token = ""
59
+
60
+ def on_finalized_text(self, text: str, stream_end: bool = False):
61
+ super().on_finalized_text(text, stream_end=stream_end)
62
+
63
+ """Put the new text in the queue. If the stream is ending, also put a stop signal in the queue."""
64
+ self.text_queue.put(text, timeout=self.timeout)
65
+ self.total_tokens = self.total_tokens + 1
66
+ if stream_end:
67
+ print("\n")
68
+ self.text_queue.put("\n", timeout=self.timeout)
69
+ self.text_queue.put(self.stop_signal, timeout=self.timeout)
70
+
71
+ def check_end_token(self, token):
72
+ new_token = self.end_token + token
73
+ if "<|im_end|>".startswith(new_token):
74
+ self.end_token = "" if new_token == "<|im_end|>" else new_token
75
+ return None
76
+ elif self.end_token != "":
77
+ self.end_token = ""
78
+
79
+ return new_token
80
+
81
+ def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
82
+ token = self.check_end_token(token)
83
+ if token:
84
+ sys.stdout.write(token)
85
+ sys.stdout.flush()
86
+ self.text_queue.put(token, timeout=self.timeout)
87
+ self.total_tokens = self.total_tokens + 1
88
+
89
+ def on_llm_start(
90
+ self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
91
+ ) -> Any:
92
+ # print("on_llm_start:", serialized, prompts)
93
+ pass
94
+
95
+ def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
96
+ print("\n")
97
+ self.text_queue.put("\n", timeout=self.timeout)
98
+ self.text_queue.put(self.stop_signal, timeout=self.timeout)
99
+
100
+ def __iter__(self):
101
+ return self
102
+
103
+ def __next__(self):
104
+ value = self.text_queue.get(timeout=self.timeout)
105
+ if value == self.stop_signal:
106
+ raise StopIteration()
107
+ else:
108
+ return value
109
+
110
+ def reset(self, q: Queue = None):
111
+ # print("resetting TextIteratorStreamer")
112
+ self.text_queue = q if q is not None else Queue()
113
+ self.end_token = ""
114
+
115
+ def empty(self):
116
+ return self.text_queue.empty()
117
+
118
+
119
+ class LLMLoader:
120
+ def __init__(self, llm_model_type):
121
+ self.llm_model_type = llm_model_type
122
+ self.llm = None
123
+ self.streamer = TextIteratorStreamer(
124
+ "",
125
+ for_huggingface=True,
126
+ )
127
+ self.max_tokens_limit = 4096
128
+ self.search_kwargs = {"k": 8}
129
+ self.lock = threading.Lock()
130
+ self.model_name = os.getenv("HUGGINGFACE_MODEL_NAME_OR_PATH").split("/")[-1]
131
+ self.repetition_penalty = ""
132
+ self.batch_size = int(os.getenv("BATCH_SIZE", "1"))
133
+
134
+ def _init_hf_streamer(self, tokenizer):
135
+ if self.batch_size == 1:
136
+ self.streamer = TextIteratorStreamer(
137
+ tokenizer,
138
+ timeout=10.0,
139
+ skip_prompt=True,
140
+ skip_special_tokens=True,
141
+ for_huggingface=True,
142
+ )
143
+ else:
144
+ self.streamer = None
145
+
146
+ def huggingfaceStreamingEnabled(self):
147
+ return self.streamer is not None
148
+
149
+ def init(
150
+ self,
151
+ custom_handler: Optional[BaseCallbackHandler] = None,
152
+ n_threds: int = 4,
153
+ hf_pipeline_device_type: str = None,
154
+ ):
155
+ print("initializing LLM: " + self.llm_model_type)
156
+
157
+ if hf_pipeline_device_type is None:
158
+ hf_pipeline_device_type = "cpu"
159
+
160
+ using_cuda = hf_pipeline_device_type.startswith("cuda")
161
+ using_mps = hf_pipeline_device_type.startswith("mps")
162
+ torch_dtype = torch.float16 if using_cuda or using_mps else torch.float32
163
+ if not using_mps and os.environ.get("USING_TORCH_BFLOAT16") == "true":
164
+ torch_dtype = torch.bfloat16
165
+
166
+ load_quantized_model = os.environ.get("LOAD_QUANTIZED_MODEL")
167
+
168
+ print(f" hf_pipeline_device_type: {hf_pipeline_device_type}")
169
+ print(f" load_quantized_model: {load_quantized_model}")
170
+ print(f" torch_dtype: {torch_dtype}")
171
+ print(f" n_threds: {n_threds}")
172
+
173
+ torch.set_default_dtype(torch_dtype)
174
+
175
+ double_quant_config = BitsAndBytesConfig(
176
+ load_in_4bit=load_quantized_model == "4bit",
177
+ bnb_4bit_use_double_quant=load_quantized_model == "4bit",
178
+ load_in_8bit=load_quantized_model == "8bit",
179
+ bnb_8bit_use_double_quant=load_quantized_model == "8bit",
180
+ )
181
+
182
+ callbacks = []
183
+ if self.streamer is not None and self.streamer.for_huggingface:
184
+ callbacks.append(self.streamer)
185
+ if custom_handler is not None:
186
+ callbacks.append(custom_handler)
187
+
188
+ if self.llm is None:
189
+ if self.llm_model_type == "openai":
190
+ MODEL_NAME = os.environ.get("OPENAI_MODEL_NAME") or "gpt-3.5-turbo"
191
+ print(f" using model: {MODEL_NAME}")
192
+ self.model_name = MODEL_NAME
193
+ self.llm = (
194
+ OpenAI(
195
+ model_name=MODEL_NAME,
196
+ streaming=True,
197
+ callbacks=callbacks,
198
+ verbose=True,
199
+ temperature=0,
200
+ )
201
+ if "instruct" in MODEL_NAME
202
+ else ChatOpenAI(
203
+ model_name=MODEL_NAME,
204
+ streaming=True,
205
+ callbacks=callbacks,
206
+ verbose=True,
207
+ temperature=0,
208
+ )
209
+ )
210
+ elif self.llm_model_type == "google":
211
+ MODEL_NAME = os.environ.get("GOOGLE_MODEL_NAME") or "gemini-pro"
212
+ print(f" using model: {MODEL_NAME}")
213
+ self.llm = ChatGoogleGenerativeAI(
214
+ model=MODEL_NAME,
215
+ convert_system_message_to_human=True,
216
+ callbacks=callbacks,
217
+ streaming=True,
218
+ safety_settings={
219
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
220
+ },
221
+ )
222
+ elif self.llm_model_type.startswith("gpt4all"):
223
+ MODEL_PATH = ensure_model_is_downloaded(self.llm_model_type)
224
+ self.llm = GPT4All(
225
+ model=MODEL_PATH,
226
+ max_tokens=2048,
227
+ n_threads=n_threds,
228
+ backend="gptj" if self.llm_model_type == "gpt4all-j" else "llama",
229
+ callbacks=callbacks,
230
+ verbose=True,
231
+ use_mlock=True,
232
+ )
233
+ elif self.llm_model_type == "llamacpp":
234
+ MODEL_PATH = ensure_model_is_downloaded(self.llm_model_type)
235
+ self.llm = LlamaCpp(
236
+ model_path=MODEL_PATH,
237
+ n_ctx=8192,
238
+ n_threads=n_threds,
239
+ seed=0,
240
+ temperature=0,
241
+ max_tokens=2048,
242
+ callbacks=callbacks,
243
+ verbose=True,
244
+ use_mlock=True,
245
+ )
246
+ elif self.llm_model_type == "ctransformers":
247
+ MODEL_PATH = ensure_model_is_downloaded(self.llm_model_type)
248
+ config = {
249
+ "max_new_tokens": self.max_tokens_limit,
250
+ "repetition_penalty": 1.1,
251
+ }
252
+ self.llm = CTransformers(
253
+ model=MODEL_PATH,
254
+ model_type="llama",
255
+ config=config,
256
+ callbacks=callbacks,
257
+ )
258
+ elif self.llm_model_type == "hftgi":
259
+ HFTGI_SERVER_URL = os.environ.get("HFTGI_SERVER_URL")
260
+ HFTGI_RP = os.environ.get("HFTGI_RP")
261
+ repetition_penalty = 1.120 if HFTGI_RP is None else float(HFTGI_RP)
262
+ print(f" repetition_penalty: {repetition_penalty}")
263
+ self.repetition_penalty = repetition_penalty
264
+ self.max_tokens_limit = 4096
265
+ self.llm = HuggingFaceTextGenInference(
266
+ inference_server_url=HFTGI_SERVER_URL,
267
+ max_new_tokens=self.max_tokens_limit / 2,
268
+ # top_k=0,
269
+ top_p=0.95,
270
+ # typical_p=0.95,
271
+ temperature=0.01,
272
+ repetition_penalty=repetition_penalty,
273
+ callbacks=callbacks,
274
+ timeout=600,
275
+ streaming=True,
276
+ )
277
+ elif self.llm_model_type == "ollama":
278
+ MODEL_NAME = os.environ.get("OLLAMA_MODEL_NAME") or "mistral"
279
+ self.model_name = MODEL_NAME
280
+ print(f" loading model: {MODEL_NAME}")
281
+
282
+ OLLAMA_RP = os.getenv("OLLAMA_RP")
283
+ repetition_penalty = float(OLLAMA_RP) if OLLAMA_RP else 1.15
284
+ self.repetition_penalty = repetition_penalty
285
+ print(f" repetition_penalty: {repetition_penalty}")
286
+
287
+ self.llm = ChatOllama(
288
+ model=MODEL_NAME,
289
+ callbacks=callbacks,
290
+ temperature=0,
291
+ repeat_penalty=repetition_penalty,
292
+ max_new_tokens=2048,
293
+ max_tokens=8192,
294
+ )
295
+ elif self.llm_model_type == "vllm":
296
+ MODEL_NAME = (
297
+ os.environ.get("HUGGINGFACE_MODEL_NAME_OR_PATH")
298
+ or "google/gemma-1.1-2b-it"
299
+ )
300
+ print(f" loading model: {MODEL_NAME}")
301
+
302
+ VLLM_RP = os.getenv("HF_RP")
303
+ repetition_penalty = float(VLLM_RP) if VLLM_RP else 1.15
304
+ self.repetition_penalty = repetition_penalty
305
+ print(f" repetition_penalty: {repetition_penalty}")
306
+
307
+ vllm_kwargs = {
308
+ "max_model_len": 4096,
309
+ "enforce_eager": True,
310
+ }
311
+
312
+ quantization = os.getenv("VLLM_QUANTIZATION")
313
+ if quantization:
314
+ vllm_kwargs["quantization"] = quantization
315
+
316
+ self.llm = VLLM(
317
+ model=MODEL_NAME,
318
+ callbacks=callbacks,
319
+ temperature=0,
320
+ repeat_penalty=repetition_penalty,
321
+ top_p=0.95,
322
+ max_new_tokens=2048,
323
+ max_tokens=8192,
324
+ tensor_parallel_size=torch.cuda.device_count(),
325
+ trust_remote_code=True,
326
+ vllm_kwargs=vllm_kwargs,
327
+ )
328
+ elif self.llm_model_type.startswith("huggingface"):
329
+ MODEL_NAME_OR_PATH = os.environ.get("HUGGINGFACE_MODEL_NAME_OR_PATH")
330
+ print(f" loading model: {MODEL_NAME_OR_PATH}")
331
+
332
+ hf_auth_token = (
333
+ os.environ.get("HUGGINGFACE_AUTH_TOKEN")
334
+ if "Llama-2" in MODEL_NAME_OR_PATH
335
+ or "gemma" in MODEL_NAME_OR_PATH
336
+ or "Mistral" in MODEL_NAME_OR_PATH
337
+ else None
338
+ )
339
+ transformers_offline = os.environ.get("TRANSFORMERS_OFFLINE") == "1"
340
+ token = (
341
+ hf_auth_token
342
+ if hf_auth_token is not None
343
+ and len(hf_auth_token) > 0
344
+ and not transformers_offline
345
+ else None
346
+ )
347
+ print(f" HF auth token: {str(token)[-5:]}")
348
+
349
+ if "Llama-2" in MODEL_NAME_OR_PATH:
350
+ self.max_tokens_limit = 4096
351
+ elif "TinyLlama" in MODEL_NAME_OR_PATH:
352
+ self.max_tokens_limit = 1024
353
+
354
+ class StopOnTokens(StoppingCriteria):
355
+ def __call__(
356
+ self,
357
+ input_ids: torch.LongTensor,
358
+ scores: torch.FloatTensor,
359
+ **kwargs,
360
+ ) -> bool:
361
+ stop_ids = [
362
+ 2
363
+ ] # IDs of tokens where the generation should stop.
364
+ for stop_id in stop_ids:
365
+ if (
366
+ input_ids[0][-1] == stop_id
367
+ ): # Checking if the last generated token is a stop token.
368
+ return True
369
+ return False
370
+
371
+ stopping_criteria = StoppingCriteriaList([StopOnTokens()])
372
+
373
+ is_t5 = "t5" in MODEL_NAME_OR_PATH
374
+ temperature = (
375
+ 0.01
376
+ if "gpt4all-j" in MODEL_NAME_OR_PATH
377
+ or "dolly" in MODEL_NAME_OR_PATH
378
+ or "Qwen" in MODEL_NAME_OR_PATH
379
+ or "Llama" in MODEL_NAME_OR_PATH
380
+ or "Orca-2" in MODEL_NAME_OR_PATH
381
+ or "phi-2" in MODEL_NAME_OR_PATH
382
+ or "Phi-3" in MODEL_NAME_OR_PATH
383
+ or "Mistral" in MODEL_NAME_OR_PATH
384
+ or "gemma" in MODEL_NAME_OR_PATH
385
+ else 0
386
+ )
387
+
388
+ use_fast = (
389
+ "stable" in MODEL_NAME_OR_PATH
390
+ or "RedPajama" in MODEL_NAME_OR_PATH
391
+ or "dolly" in MODEL_NAME_OR_PATH
392
+ )
393
+ padding_side = "left" # if "dolly" in MODEL_NAME_OR_PATH else None
394
+
395
+ config = (
396
+ AutoConfig.from_pretrained(
397
+ MODEL_NAME_OR_PATH,
398
+ trust_remote_code=True,
399
+ token=token,
400
+ fp32=hf_pipeline_device_type == "cpu",
401
+ bf16=(
402
+ hf_pipeline_device_type != "cpu"
403
+ and torch_dtype == torch.bfloat16
404
+ ),
405
+ fp16=(
406
+ hf_pipeline_device_type != "cpu"
407
+ and torch_dtype != torch.bfloat16
408
+ ),
409
+ )
410
+ if "Qwen" in MODEL_NAME_OR_PATH
411
+ else AutoConfig.from_pretrained(
412
+ MODEL_NAME_OR_PATH,
413
+ trust_remote_code=True,
414
+ token=token,
415
+ )
416
+ )
417
+
418
+ # config.attn_config["attn_impl"] = "triton"
419
+ # config.max_seq_len = 4096
420
+ # config.init_device = hf_pipeline_device_type
421
+
422
+ tokenizer = (
423
+ T5Tokenizer.from_pretrained(
424
+ MODEL_NAME_OR_PATH,
425
+ token=token,
426
+ )
427
+ if is_t5
428
+ else AutoTokenizer.from_pretrained(
429
+ MODEL_NAME_OR_PATH,
430
+ use_fast=use_fast,
431
+ trust_remote_code=True,
432
+ padding_side=padding_side,
433
+ token=token,
434
+ )
435
+ )
436
+
437
+ self._init_hf_streamer(tokenizer)
438
+
439
+ task = "text2text-generation" if is_t5 else "text-generation"
440
+
441
+ return_full_text = True if "dolly" in MODEL_NAME_OR_PATH else False
442
+
443
+ repetition_penalty = (
444
+ 1.15
445
+ if "falcon" in MODEL_NAME_OR_PATH
446
+ else (1.25 if "dolly" in MODEL_NAME_OR_PATH else 1.1)
447
+ )
448
+
449
+ HF_RP = os.environ.get("HF_RP")
450
+ if HF_RP is not None and len(HF_RP) > 0:
451
+ repetition_penalty = float(HF_RP)
452
+ print(f" repetition_penalty: {repetition_penalty}")
453
+ self.repetition_penalty = repetition_penalty
454
+ self.model_name = MODEL_NAME_OR_PATH.split("/")[-1]
455
+
456
+ if load_quantized_model is not None:
457
+ model = (
458
+ AutoModelForSeq2SeqLM.from_pretrained(
459
+ MODEL_NAME_OR_PATH,
460
+ config=config,
461
+ quantization_config=double_quant_config,
462
+ trust_remote_code=True,
463
+ token=token,
464
+ )
465
+ if is_t5
466
+ else AutoModelForCausalLM.from_pretrained(
467
+ MODEL_NAME_OR_PATH,
468
+ config=config,
469
+ quantization_config=double_quant_config,
470
+ trust_remote_code=True,
471
+ token=token,
472
+ )
473
+ )
474
+
475
+ print(f"Model memory footprint: {model.get_memory_footprint()}")
476
+
477
+ eos_token_id = -1
478
+ # starchat-beta uses a special <|end|> token with ID 49155 to denote ends of a turn
479
+ if "starchat" in MODEL_NAME_OR_PATH:
480
+ eos_token_id = 49155
481
+ pad_token_id = eos_token_id
482
+
483
+ pipe = (
484
+ pipeline(
485
+ task,
486
+ model=model,
487
+ tokenizer=tokenizer,
488
+ eos_token_id=eos_token_id,
489
+ pad_token_id=pad_token_id,
490
+ streamer=self.streamer,
491
+ return_full_text=return_full_text, # langchain expects the full text
492
+ device_map="auto",
493
+ trust_remote_code=True,
494
+ max_new_tokens=2048,
495
+ do_sample=True,
496
+ temperature=0.01,
497
+ top_p=0.95,
498
+ top_k=50,
499
+ repetition_penalty=repetition_penalty,
500
+ )
501
+ if eos_token_id != -1
502
+ else pipeline(
503
+ task,
504
+ model=model,
505
+ tokenizer=tokenizer,
506
+ streamer=self.streamer,
507
+ return_full_text=return_full_text, # langchain expects the full text
508
+ device_map="auto",
509
+ trust_remote_code=True,
510
+ max_new_tokens=2048,
511
+ do_sample=True,
512
+ temperature=temperature,
513
+ top_p=0.95,
514
+ top_k=0, # select from top 0 tokens (because zero, relies on top_p)
515
+ repetition_penalty=repetition_penalty,
516
+ )
517
+ )
518
+ else:
519
+ if os.environ.get("DISABLE_MODEL_PRELOADING") != "true":
520
+ model = (
521
+ AutoModelForSeq2SeqLM.from_pretrained(
522
+ MODEL_NAME_OR_PATH,
523
+ config=config,
524
+ trust_remote_code=True,
525
+ )
526
+ if is_t5
527
+ else (
528
+ AutoModelForCausalLM.from_pretrained(
529
+ MODEL_NAME_OR_PATH,
530
+ config=config,
531
+ trust_remote_code=True,
532
+ )
533
+ if "Qwen" in MODEL_NAME_OR_PATH
534
+ else (
535
+ AutoModelForCausalLM.from_pretrained(
536
+ MODEL_NAME_OR_PATH,
537
+ config=config,
538
+ trust_remote_code=True,
539
+ )
540
+ if token is None
541
+ else AutoModelForCausalLM.from_pretrained(
542
+ MODEL_NAME_OR_PATH,
543
+ config=config,
544
+ trust_remote_code=True,
545
+ token=token,
546
+ )
547
+ )
548
+ )
549
+ )
550
+ print(f"Model memory footprint: {model.get_memory_footprint()}")
551
+ model = model.eval()
552
+ # print(f"Model memory footprint: {model.get_memory_footprint()}")
553
+ else:
554
+ model = MODEL_NAME_OR_PATH
555
+
556
+ pipe = pipeline(
557
+ task,
558
+ model=model,
559
+ tokenizer=tokenizer,
560
+ streamer=self.streamer,
561
+ return_full_text=return_full_text, # langchain expects the full text
562
+ device_map="auto",
563
+ torch_dtype=torch_dtype,
564
+ max_new_tokens=2048,
565
+ trust_remote_code=True,
566
+ do_sample=True,
567
+ temperature=temperature,
568
+ top_p=0.95,
569
+ top_k=0, # select from top 0 tokens (because zero, relies on top_p)
570
+ repetition_penalty=repetition_penalty,
571
+ token=token,
572
+ batch_size=self.batch_size,
573
+ )
574
+
575
+ pipe.model.config.pad_token_id = pipe.model.config.eos_token_id
576
+ pipe.tokenizer.pad_token_id = pipe.model.config.eos_token_id
577
+ self.llm = HuggingFacePipeline(pipeline=pipe, callbacks=callbacks)
578
+
579
+ print("initialization complete")
app_modules/llm_qa_chain.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from typing import List
4
+ import pandas as pd
5
+ from langchain.chains import ConversationalRetrievalChain
6
+ from langchain.chains.base import Chain
7
+ from app_modules.llm_inference import LLMInference
8
+ from app_modules.utils import CustomizedConversationSummaryBufferMemory
9
+ from langchain_core.retrievers import BaseRetriever
10
+ from langchain_core.documents import Document
11
+ from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
12
+ from langchain.globals import get_debug
13
+
14
+ retrieve_from_questions_file = os.getenv("RETRIEVER_TYPE") == "questions_file"
15
+
16
+ if retrieve_from_questions_file:
17
+ questions_file_path = os.getenv("QUESTIONS_FILE_PATH")
18
+ questions_df = pd.read_json(questions_file_path)
19
+ print(f"Questions file loaded: {questions_file_path}", flush=True)
20
+
21
+
22
+ class DatasetRetriever(BaseRetriever):
23
+ def _get_relevant_documents(
24
+ self, query: str, *, run_manager: CallbackManagerForRetrieverRun
25
+ ) -> List[Document]:
26
+ """Get documents relevant to a query.
27
+ Args:
28
+ query: String to find relevant documents for
29
+ run_manager: The callbacks handler to use
30
+ Returns:
31
+ List of relevant documents
32
+ """
33
+ docs = []
34
+ df = questions_df
35
+
36
+ # find the query in the df
37
+ filtered = df[df["question"].str.lower() == query.lower()]
38
+
39
+ # iterate over the filtered df
40
+ for i in range(len(filtered)):
41
+ docs.append(
42
+ Document(
43
+ page_content=filtered.iloc[i]["context"],
44
+ metadata={"source": filtered.iloc[i]["id"]},
45
+ )
46
+ )
47
+
48
+ if not docs:
49
+ print(f"No documents found for query: {query}", flush=True)
50
+
51
+ return docs
52
+
53
+
54
+ class QAChain(LLMInference):
55
+ def __init__(self, vectorstore, llm_loader):
56
+ super().__init__(llm_loader)
57
+ self.vectorstore = vectorstore
58
+
59
+ def create_chain(self) -> Chain:
60
+ if retrieve_from_questions_file:
61
+ retriever = DatasetRetriever()
62
+ else:
63
+ retriever = self.vectorstore.as_retriever(
64
+ search_kwargs=self.llm_loader.search_kwargs
65
+ )
66
+
67
+ if os.environ.get("CHAT_HISTORY_ENABLED") == "true":
68
+ memory = CustomizedConversationSummaryBufferMemory(
69
+ llm=self.llm_loader.llm,
70
+ output_key="answer",
71
+ memory_key="chat_history",
72
+ max_token_limit=1024,
73
+ return_messages=True,
74
+ )
75
+ qa = ConversationalRetrievalChain.from_llm(
76
+ self.llm_loader.llm,
77
+ memory=memory,
78
+ chain_type="stuff",
79
+ retriever=retriever,
80
+ get_chat_history=lambda h: h,
81
+ return_source_documents=True,
82
+ )
83
+ else:
84
+ qa = ConversationalRetrievalChain.from_llm(
85
+ self.llm_loader.llm,
86
+ retriever=retriever,
87
+ max_tokens_limit=8192, # self.llm_loader.max_tokens_limit,
88
+ return_source_documents=True,
89
+ )
90
+
91
+ return qa
92
+
93
+ def _process_inputs(self, inputs):
94
+ if isinstance(inputs, list) and self.llm_loader.llm_model_type == "huggingface":
95
+ inputs = [self.get_prompt(i) for i in inputs]
96
+
97
+ if get_debug():
98
+ print("_process_inputs:", json.dumps(inputs, indent=4))
99
+
100
+ return inputs
101
+
102
+ def get_prompt(self, inputs):
103
+ qa_system_prompt = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."
104
+
105
+ df = questions_df
106
+ query = inputs["question"]
107
+
108
+ # find the query in the df
109
+ filtered = df[df["question"].str.lower() == query.lower()]
110
+
111
+ context = filtered.iloc[0]["context"]
112
+
113
+ return (
114
+ f"{qa_system_prompt}\n\n{context}\n\nQuestion: {query}\n\nHelpful Answer:"
115
+ )
app_modules/utils.py ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import logging
6
+ import os
7
+ import platform
8
+ import re
9
+ from pathlib import Path
10
+
11
+ import requests
12
+ import torch
13
+ from tqdm import tqdm
14
+ from langchain.memory import ConversationSummaryBufferMemory
15
+
16
+
17
+ class LogRecord(logging.LogRecord):
18
+ def getMessage(self):
19
+ msg = self.msg
20
+ if self.args:
21
+ if isinstance(self.args, dict):
22
+ msg = msg.format(**self.args)
23
+ else:
24
+ msg = msg.format(*self.args)
25
+ return msg
26
+
27
+
28
+ class Logger(logging.Logger):
29
+ def makeRecord(
30
+ self,
31
+ name,
32
+ level,
33
+ fn,
34
+ lno,
35
+ msg,
36
+ args,
37
+ exc_info,
38
+ func=None,
39
+ extra=None,
40
+ sinfo=None,
41
+ ):
42
+ rv = LogRecord(name, level, fn, lno, msg, args, exc_info, func, sinfo)
43
+ if extra is not None:
44
+ for key in extra:
45
+ rv.__dict__[key] = extra[key]
46
+ return rv
47
+
48
+
49
+ def init_settings():
50
+ logging.setLoggerClass(Logger)
51
+ logging.basicConfig(
52
+ level=logging.WARNING,
53
+ format="%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s",
54
+ )
55
+
56
+
57
+ def remove_extra_spaces(text):
58
+ return re.sub(" +", " ", text.strip())
59
+
60
+
61
+ def print_llm_response(llm_response, debug_retrieval=True):
62
+ answer = llm_response["answer"] if "answer" in llm_response else None
63
+ if answer is None:
64
+ answer = llm_response["response"] if "response" in llm_response else None
65
+
66
+ if answer is not None:
67
+ print("\n\n***Answer:")
68
+ print(answer)
69
+
70
+ source_documents = (
71
+ llm_response["source_documents"] if "source_documents" in llm_response else None
72
+ )
73
+ if source_documents is None:
74
+ source_documents = (
75
+ llm_response["sourceDocs"] if "sourceDocs" in llm_response else None
76
+ )
77
+
78
+ if debug_retrieval and source_documents is not None:
79
+ print("\nSources:")
80
+ for index, source in enumerate(source_documents):
81
+ metadata = source["metadata"] if "metadata" in source else source.metadata
82
+ if "page" in metadata:
83
+ print(f" Page: {metadata['page']}", end="")
84
+
85
+ print(
86
+ f" Source {index + 1}: "
87
+ + str(metadata["url"] if "url" in metadata else metadata["source"])
88
+ )
89
+ print(
90
+ source["page_content"]
91
+ if "page_content" in source
92
+ else source.page_content
93
+ )
94
+
95
+ if "chat_history" in llm_response:
96
+ print("\nChat History:")
97
+ print(llm_response["chat_history"])
98
+
99
+
100
+ def get_device_types():
101
+ print("Running on: ", platform.platform())
102
+ print("MPS is", "NOT" if not torch.backends.mps.is_available() else "", "available")
103
+ print("CUDA is", "NOT" if not torch.cuda.is_available() else "", "available")
104
+ device_type_available = "cpu"
105
+
106
+ if not torch.backends.mps.is_available():
107
+ if not torch.backends.mps.is_built():
108
+ print(
109
+ "MPS not available because the current PyTorch install was not "
110
+ "built with MPS enabled."
111
+ )
112
+ else:
113
+ print(
114
+ "MPS not available because the current MacOS version is not 12.3+ "
115
+ "and/or you do not have an MPS-enabled device on this machine."
116
+ )
117
+ else:
118
+ device_type_available = "mps"
119
+
120
+ if torch.cuda.is_available():
121
+ print("CUDA is available, we have found ", torch.cuda.device_count(), " GPU(s)")
122
+ print(torch.cuda.get_device_name(0))
123
+ print("CUDA version: " + torch.version.cuda)
124
+ device_type_available = f"cuda:{torch.cuda.current_device()}"
125
+
126
+ return (
127
+ os.environ.get("HF_EMBEDDINGS_DEVICE_TYPE") or device_type_available,
128
+ os.environ.get("HF_PIPELINE_DEVICE_TYPE") or device_type_available,
129
+ )
130
+
131
+
132
+ def ensure_model_is_downloaded(llm_model_type):
133
+ if llm_model_type.startswith("gpt4all"):
134
+ local_path = (
135
+ os.environ.get("GPT4ALL_J_MODEL_PATH")
136
+ if llm_model_type == "gpt4all-j"
137
+ else os.environ.get("GPT4ALL_MODEL_PATH")
138
+ )
139
+ url = (
140
+ os.environ.get("GPT4ALL_J_DOWNLOAD_LINK")
141
+ if llm_model_type == "gpt4all-j"
142
+ else os.environ.get("GPT4ALL_DOWNLOAD_LINK")
143
+ )
144
+ elif llm_model_type == "llamacpp":
145
+ local_path = os.environ.get("LLAMACPP_MODEL_PATH")
146
+ url = os.environ.get("LLAMACPP_DOWNLOAD_LINK")
147
+ elif llm_model_type == "ctransformers":
148
+ local_path = os.environ.get("CTRANSFORMERS_MODEL_PATH")
149
+ url = os.environ.get("CTRANSFORMERS_DOWNLOAD_LINK")
150
+ else:
151
+ raise ValueError(f"wrong model typle: {llm_model_type}")
152
+
153
+ path = Path(local_path)
154
+
155
+ if path.is_file():
156
+ print(f"model: {local_path} exists")
157
+ else:
158
+ print(f"downloading model: {local_path} from {url} ...")
159
+ path.parent.mkdir(parents=True, exist_ok=True)
160
+
161
+ # send a GET request to the URL to download the file. Stream since it's large
162
+ response = requests.get(url, stream=True)
163
+
164
+ # open the file in binary mode and write the contents of the response to it in chunks
165
+ # This is a large file, so be prepared to wait.
166
+ with open(local_path, "wb") as f:
167
+ for chunk in tqdm(response.iter_content(chunk_size=8192)):
168
+ if chunk:
169
+ f.write(chunk)
170
+
171
+ return local_path
172
+
173
+
174
+ class CustomizedConversationSummaryBufferMemory(ConversationSummaryBufferMemory):
175
+ def save_context(self, inputs, outputs) -> None:
176
+ for key in outputs:
177
+ if isinstance(outputs[key], str):
178
+ outputs[key] = outputs[key].replace("<|im_end|>", "")
179
+ return super().save_context(inputs, outputs)
180
+
181
+ def predict_new_summary(self, messages, existing_summary) -> str:
182
+ return (
183
+ super()
184
+ .predict_new_summary(messages, existing_summary)
185
+ .replace("<|im_end|>", "")
186
+ )
187
+
188
+
189
+ def CalculateDistance(entry1, entry2, distance_calculator):
190
+ if entry1 == entry2:
191
+ return 0
192
+ distance = distance_calculator.evaluate_string_pairs(
193
+ prediction=entry1, prediction_b=entry2
194
+ )
195
+ # print(f"entry1: {entry1}, entry2: {entry2}, distance: {distance['score']}")
196
+ return distance["score"]
197
+
198
+
199
+ def FindInList(entry, elist, distance_calculator=None, debug=False):
200
+ for item in elist:
201
+ if distance_calculator is not None:
202
+ distance = CalculateDistance(entry, item, distance_calculator)
203
+ if distance < distance_threshold:
204
+ if debug:
205
+ print(
206
+ f"FindInList - matched by distance {distance:.3f}: {entry} - {item}"
207
+ )
208
+ return True
209
+ if entry == item:
210
+ return True
211
+ return False
212
+
213
+
214
+ def CalculatePRF1F2(
215
+ goldAnswerList, predAnswerList, distance_calculator=None, debug=False
216
+ ):
217
+ if len(goldAnswerList) == 0:
218
+ if len(predAnswerList) == 0:
219
+ return [
220
+ 1.0,
221
+ 1.0,
222
+ 1.0,
223
+ 1.0,
224
+ ] # consider it 'correct' when there is no labeled answer, and also no predicted answer
225
+ else:
226
+ return [
227
+ 0.0,
228
+ 1.0,
229
+ 0.0,
230
+ 0.0,
231
+ ] # precision=0 and recall=1 when there is no labeled answer, but has some predicted answer(s)
232
+ elif len(predAnswerList) == 0:
233
+ return [
234
+ 1.0,
235
+ 0.0,
236
+ 0.0,
237
+ 0.0,
238
+ ] # precision=1 and recall=0 when there is labeled answer(s), but no predicted answer
239
+ else:
240
+ glist = goldAnswerList
241
+ plist = predAnswerList
242
+
243
+ tp = 1e-40 # numerical trick
244
+ fp = 0.0
245
+ fn = 0.0
246
+
247
+ for gentry in glist:
248
+ if FindInList(
249
+ gentry, plist, distance_calculator=distance_calculator, debug=True
250
+ ):
251
+ tp += 1
252
+ else:
253
+ fn += 1
254
+ for pentry in plist:
255
+ if not FindInList(pentry, glist, distance_calculator=distance_calculator):
256
+ fp += 1
257
+
258
+ precision = tp / (tp + fp)
259
+ recall = tp / (tp + fn)
260
+
261
+ f1 = (2 * precision * recall) / (precision + recall)
262
+ f2 = (5 * precision * recall) / (4 * precision + recall)
263
+ return [precision, recall, f1, f2]
264
+
265
+
266
+ nlp = None
267
+ distance_threshold = 0.05
268
+
269
+
270
+ def load_spacy_model():
271
+ import spacy
272
+
273
+ global nlp
274
+ if nlp is not None:
275
+ return nlp
276
+
277
+ global distance_threshold
278
+ distance_threshold = float(os.getenv("DISTANCE_THRESHOLD", "0.05"))
279
+
280
+ spacy_model_name = os.getenv("SPACY_MODEL_NAME", "en_core_web_trf")
281
+
282
+ while True:
283
+ try:
284
+ print(f"loading spacy model from {spacy_model_name}")
285
+ nlp = spacy.load(spacy_model_name)
286
+ print(f"loaded spacy model from {spacy_model_name}")
287
+ return nlp
288
+ except OSError:
289
+ print(f"downloading spacy model {spacy_model_name}")
290
+ spacy.cli.download(spacy_model_name)
291
+ print(f"downloaded spacy model {spacy_model_name}")
292
+
293
+
294
+ def clean_text(text):
295
+ text = text.lower()
296
+ text = text.replace('"', "")
297
+ text = text.replace(".", "")
298
+ # text = text.replace("ō", "o")
299
+ return text
300
+
301
+
302
+ def get_entities_in_text(text, debug=False):
303
+ nlp = load_spacy_model()
304
+ doc = nlp(text)
305
+ entities_in_text = []
306
+ for word in doc.ents:
307
+ if debug:
308
+ print(word.text, word.label_)
309
+ entity = clean_text(word.text)
310
+ if entity not in entities_in_text:
311
+ entities_in_text.append(entity)
312
+
313
+ entities_in_text.sort()
314
+ return entities_in_text
315
+
316
+
317
+ def calculate_metrics(question, answer, distance_calculator=None, debug=False):
318
+ ground_truth = question["answers"]
319
+ ground_truth.sort()
320
+
321
+ if debug:
322
+ print(f"question: {question}")
323
+ print(f"answer: {answer}")
324
+
325
+ print("entities_in_question ---------------")
326
+ entities_in_question = get_entities_in_text(question["question"], debug)
327
+
328
+ print("entities_in_answer -----------------")
329
+ entities_in_answer = get_entities_in_text(answer, debug)
330
+
331
+ print("done with NER with spaCy -----------")
332
+
333
+ entities_in_answer.sort()
334
+
335
+ predAnswerList = [
336
+ pentry
337
+ for pentry in entities_in_answer
338
+ if not FindInList(pentry, entities_in_question)
339
+ ]
340
+
341
+ print(f"entities_in_question: {entities_in_question}")
342
+ print(f"entities_in_answer: {entities_in_answer}")
343
+ print(f"ground_truth: {ground_truth}")
344
+ print(f"pred_answers: {predAnswerList}")
345
+
346
+ precision, recall, f1, f2 = CalculatePRF1F2(
347
+ ground_truth,
348
+ predAnswerList,
349
+ debug=debug,
350
+ distance_calculator=distance_calculator,
351
+ )
352
+ print(f"precision: {precision}, recall: {recall}, f1: {f1}, f2: {f2}")
353
+ else:
354
+ precision = 0.0
355
+ recall = 0.0
356
+ f1 = 0.0
357
+ f2 = 0.0
358
+ entities_in_answer = []
359
+ entities_in_question = []
360
+
361
+ return (
362
+ precision,
363
+ recall,
364
+ f1,
365
+ f2,
366
+ entities_in_answer,
367
+ ground_truth,
368
+ entities_in_question,
369
+ )
370
+
371
+
372
+ def calculate_metrics_gemini(question, answer, debug=False):
373
+ precision = 0.0
374
+ recall = 0.0
375
+ f1 = 0.0
376
+
377
+ return (precision, recall, f1)
378
+
379
+
380
+ if __name__ == "__main__":
381
+ from langchain_community.embeddings import HuggingFaceInstructEmbeddings
382
+ from langchain.evaluation import load_evaluator
383
+
384
+ hf_embeddings_device_type, hf_pipeline_device_type = get_device_types()
385
+ print(f"hf_embeddings_device_type: {hf_embeddings_device_type}")
386
+ print(f"hf_pipeline_device_type: {hf_pipeline_device_type}")
387
+
388
+ hf_embeddings_model_name = "hkunlp/instructor-large"
389
+ print(f"hf_embeddings_model_name: {hf_embeddings_model_name}")
390
+ embeddings = HuggingFaceInstructEmbeddings(
391
+ model_name=hf_embeddings_model_name,
392
+ model_kwargs={"device": hf_embeddings_device_type},
393
+ )
394
+
395
+ hf_evaluator = load_evaluator("pairwise_embedding_distance", embeddings=embeddings)
396
+
397
+ question = {
398
+ "question": "what does jamaican people speak",
399
+ "entities_in_question": ["jamaican"],
400
+ "answers": ["jamaican english", "jamaican creole english language"],
401
+ }
402
+ answer = "Jamaican people primarily speak Jamaican Patois, which is an English-based creole language with significant West African influences. It is spoken as a native language by the majority of Jamaicans and also exists in various forms among Jamaican expatriates and non-Jamaicans in different parts of the world. The phonology of Jamaican Patois includes around 21 consonants (with some dialectal variation regarding the status of /h/ as a phoneme) and between nine and sixteen vowels, some of which are capable of nasalization or lengthening. There are also instances of palatalization in Jamaican Patois, where certain consonants appear to be phonemic in some dialects but may be considered phonetic in others. For example, the palatal stops [c], [ɟ], and [ɲ] may be analyzed as phonemes or as instances of phonetic palatalization depending on the account."
403
+ calculate_metrics(question, answer, distance_calculator=hf_evaluator, debug=True)
404
+
405
+ question = {
406
+ "question": "who is governor of ohio 2011",
407
+ "entities_in_question": ["2011"],
408
+ "answers": ["john kasich", "return j. meigs, jr.", "ted strickland"],
409
+ }
410
+ answer = "The lieutenant governor of Ohio in 2011 was Mary Taylor, who served alongside Governor John Kasich. She assumed office on January 10, 2011, after being elected as the lieutenant governor in the 2010 election. During her tenure, she faced criticism for using the state airplane for personal errands and reportedly had high turnover among her staff."
411
+ calculate_metrics(question, answer, distance_calculator=hf_evaluator, debug=True)
412
+
413
+ question = {
414
+ "question": "where is the fukushima daiichi nuclear power station",
415
+ "entities_in_question": ["the fukushima daiichi nuclear power station"],
416
+ "answers": ["japan", "okuma"],
417
+ }
418
+ answer = "The Fukushima Daiichi Nuclear Power Station is located in the towns of Ōkuma and Futaba in Fukushima Prefecture, Japan."
419
+ calculate_metrics(question, answer, distance_calculator=hf_evaluator, debug=True)
data/datasets/ms_macro.json ADDED
The diff for this file is too large to render. See raw diff
 
notebook/01_Data_Preprocessing.ipynb ADDED
@@ -0,0 +1,813 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "a6d96660",
7
+ "metadata": {
8
+ "metadata": {}
9
+ },
10
+ "outputs": [
11
+ {
12
+ "name": "stdout",
13
+ "output_type": "stream",
14
+ "text": [
15
+ "workding dir: /Users/inflaton/code/emtech/gpt/llm-qa-bench\n"
16
+ ]
17
+ }
18
+ ],
19
+ "source": [
20
+ "import os\n",
21
+ "import sys\n",
22
+ "from pathlib import Path\n",
23
+ "\n",
24
+ "workding_dir = str(Path.cwd().parent)\n",
25
+ "os.chdir(workding_dir)\n",
26
+ "sys.path.append(workding_dir)\n",
27
+ "print(\"workding dir:\", workding_dir)"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": 2,
33
+ "id": "b72bf3f9",
34
+ "metadata": {
35
+ "metadata": {}
36
+ },
37
+ "outputs": [
38
+ {
39
+ "name": "stderr",
40
+ "output_type": "stream",
41
+ "text": [
42
+ "/Users/inflaton/anaconda3/envs/llm-qa-bench/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
43
+ " from .autonotebook import tqdm as notebook_tqdm\n"
44
+ ]
45
+ },
46
+ {
47
+ "data": {
48
+ "text/plain": [
49
+ "Dataset({\n",
50
+ " features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
51
+ " num_rows: 500\n",
52
+ "})"
53
+ ]
54
+ },
55
+ "execution_count": 2,
56
+ "metadata": {},
57
+ "output_type": "execute_result"
58
+ }
59
+ ],
60
+ "source": [
61
+ "from datasets import load_from_disk\n",
62
+ "\n",
63
+ "new_ds = load_from_disk(\"./Llama-2-eval/data/datasets/ms_macro/\")\n",
64
+ "new_ds"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": 3,
70
+ "id": "051bd771",
71
+ "metadata": {
72
+ "metadata": {}
73
+ },
74
+ "outputs": [
75
+ {
76
+ "data": {
77
+ "text/plain": [
78
+ "({'NUMERIC': 100,\n",
79
+ " 'DESCRIPTION': 100,\n",
80
+ " 'ENTITY': 100,\n",
81
+ " 'PERSON': 100,\n",
82
+ " 'LOCATION': 100},\n",
83
+ " {'NUMERIC': 179,\n",
84
+ " 'DESCRIPTION': 215,\n",
85
+ " 'ENTITY': 443,\n",
86
+ " 'LOCATION': 461,\n",
87
+ " 'PERSON': 499})"
88
+ ]
89
+ },
90
+ "execution_count": 3,
91
+ "metadata": {},
92
+ "output_type": "execute_result"
93
+ }
94
+ ],
95
+ "source": [
96
+ "counts = {}\n",
97
+ "indices = {}\n",
98
+ "size = 100\n",
99
+ "for i in range(new_ds.num_rows):\n",
100
+ " row = new_ds[i]\n",
101
+ " query_type = row[\"query_type\"]\n",
102
+ " if query_type in counts:\n",
103
+ " counts[query_type] += 1\n",
104
+ " else:\n",
105
+ " counts[query_type] = 1\n",
106
+ " if counts[query_type] == size:\n",
107
+ " indices[query_type] = i\n",
108
+ "counts, indices"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": 4,
114
+ "id": "db48dcc4",
115
+ "metadata": {
116
+ "metadata": {}
117
+ },
118
+ "outputs": [],
119
+ "source": [
120
+ "df = new_ds.to_pandas()"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ "execution_count": 5,
126
+ "id": "a39dea83",
127
+ "metadata": {
128
+ "metadata": {}
129
+ },
130
+ "outputs": [
131
+ {
132
+ "data": {
133
+ "text/html": [
134
+ "<div>\n",
135
+ "<style scoped>\n",
136
+ " .dataframe tbody tr th:only-of-type {\n",
137
+ " vertical-align: middle;\n",
138
+ " }\n",
139
+ "\n",
140
+ " .dataframe tbody tr th {\n",
141
+ " vertical-align: top;\n",
142
+ " }\n",
143
+ "\n",
144
+ " .dataframe thead th {\n",
145
+ " text-align: right;\n",
146
+ " }\n",
147
+ "</style>\n",
148
+ "<table border=\"1\" class=\"dataframe\">\n",
149
+ " <thead>\n",
150
+ " <tr style=\"text-align: right;\">\n",
151
+ " <th></th>\n",
152
+ " <th>answers</th>\n",
153
+ " <th>passages</th>\n",
154
+ " <th>query</th>\n",
155
+ " <th>query_id</th>\n",
156
+ " <th>query_type</th>\n",
157
+ " <th>wellFormedAnswers</th>\n",
158
+ " </tr>\n",
159
+ " </thead>\n",
160
+ " <tbody>\n",
161
+ " <tr>\n",
162
+ " <th>0</th>\n",
163
+ " <td>[2,662]</td>\n",
164
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
165
+ " <td>albany mn population</td>\n",
166
+ " <td>15177</td>\n",
167
+ " <td>NUMERIC</td>\n",
168
+ " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
169
+ " </tr>\n",
170
+ " <tr>\n",
171
+ " <th>1</th>\n",
172
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
173
+ " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
174
+ " <td>current weather in volcano, ca</td>\n",
175
+ " <td>114414</td>\n",
176
+ " <td>DESCRIPTION</td>\n",
177
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
178
+ " </tr>\n",
179
+ " <tr>\n",
180
+ " <th>2</th>\n",
181
+ " <td>[Hippocrates]</td>\n",
182
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
183
+ " <td>____________________ is considered the father ...</td>\n",
184
+ " <td>9083</td>\n",
185
+ " <td>DESCRIPTION</td>\n",
186
+ " <td>[Hippocrates is considered the father of moder...</td>\n",
187
+ " </tr>\n",
188
+ " <tr>\n",
189
+ " <th>3</th>\n",
190
+ " <td>[120 days from the date of the Note.]</td>\n",
191
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
192
+ " <td>how many days is an appraisal good for a fanni...</td>\n",
193
+ " <td>281439</td>\n",
194
+ " <td>NUMERIC</td>\n",
195
+ " <td>[An appraisal is good for 120 days from the da...</td>\n",
196
+ " </tr>\n",
197
+ " <tr>\n",
198
+ " <th>4</th>\n",
199
+ " <td>[From $26,000 to $39,000 a year]</td>\n",
200
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
201
+ " <td>average pharmacy tech salary</td>\n",
202
+ " <td>40287</td>\n",
203
+ " <td>NUMERIC</td>\n",
204
+ " <td>[The average salary for a pharmacy technician ...</td>\n",
205
+ " </tr>\n",
206
+ " </tbody>\n",
207
+ "</table>\n",
208
+ "</div>"
209
+ ],
210
+ "text/plain": [
211
+ " answers \\\n",
212
+ "0 [2,662] \n",
213
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
214
+ "2 [Hippocrates] \n",
215
+ "3 [120 days from the date of the Note.] \n",
216
+ "4 [From $26,000 to $39,000 a year] \n",
217
+ "\n",
218
+ " passages \\\n",
219
+ "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
220
+ "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
221
+ "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
222
+ "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
223
+ "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
224
+ "\n",
225
+ " query query_id query_type \\\n",
226
+ "0 albany mn population 15177 NUMERIC \n",
227
+ "1 current weather in volcano, ca 114414 DESCRIPTION \n",
228
+ "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
229
+ "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
230
+ "4 average pharmacy tech salary 40287 NUMERIC \n",
231
+ "\n",
232
+ " wellFormedAnswers \n",
233
+ "0 [The population of Albany, Minnesota is 2,662. ] \n",
234
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
235
+ "2 [Hippocrates is considered the father of moder... \n",
236
+ "3 [An appraisal is good for 120 days from the da... \n",
237
+ "4 [The average salary for a pharmacy technician ... "
238
+ ]
239
+ },
240
+ "execution_count": 5,
241
+ "metadata": {},
242
+ "output_type": "execute_result"
243
+ }
244
+ ],
245
+ "source": [
246
+ "df.head()"
247
+ ]
248
+ },
249
+ {
250
+ "cell_type": "code",
251
+ "execution_count": 6,
252
+ "id": "7f0d2ca1",
253
+ "metadata": {
254
+ "metadata": {}
255
+ },
256
+ "outputs": [],
257
+ "source": [
258
+ "df.rename(columns={\"query\": \"question\", \"query_id\": \"id\"}, inplace=True)"
259
+ ]
260
+ },
261
+ {
262
+ "cell_type": "code",
263
+ "execution_count": 7,
264
+ "id": "e1cde4c1",
265
+ "metadata": {
266
+ "metadata": {}
267
+ },
268
+ "outputs": [
269
+ {
270
+ "data": {
271
+ "text/html": [
272
+ "<div>\n",
273
+ "<style scoped>\n",
274
+ " .dataframe tbody tr th:only-of-type {\n",
275
+ " vertical-align: middle;\n",
276
+ " }\n",
277
+ "\n",
278
+ " .dataframe tbody tr th {\n",
279
+ " vertical-align: top;\n",
280
+ " }\n",
281
+ "\n",
282
+ " .dataframe thead th {\n",
283
+ " text-align: right;\n",
284
+ " }\n",
285
+ "</style>\n",
286
+ "<table border=\"1\" class=\"dataframe\">\n",
287
+ " <thead>\n",
288
+ " <tr style=\"text-align: right;\">\n",
289
+ " <th></th>\n",
290
+ " <th>answers</th>\n",
291
+ " <th>passages</th>\n",
292
+ " <th>question</th>\n",
293
+ " <th>id</th>\n",
294
+ " <th>query_type</th>\n",
295
+ " <th>wellFormedAnswers</th>\n",
296
+ " </tr>\n",
297
+ " </thead>\n",
298
+ " <tbody>\n",
299
+ " <tr>\n",
300
+ " <th>0</th>\n",
301
+ " <td>[2,662]</td>\n",
302
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
303
+ " <td>albany mn population</td>\n",
304
+ " <td>15177</td>\n",
305
+ " <td>NUMERIC</td>\n",
306
+ " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
307
+ " </tr>\n",
308
+ " <tr>\n",
309
+ " <th>1</th>\n",
310
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
311
+ " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
312
+ " <td>current weather in volcano, ca</td>\n",
313
+ " <td>114414</td>\n",
314
+ " <td>DESCRIPTION</td>\n",
315
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
316
+ " </tr>\n",
317
+ " <tr>\n",
318
+ " <th>2</th>\n",
319
+ " <td>[Hippocrates]</td>\n",
320
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
321
+ " <td>____________________ is considered the father ...</td>\n",
322
+ " <td>9083</td>\n",
323
+ " <td>DESCRIPTION</td>\n",
324
+ " <td>[Hippocrates is considered the father of moder...</td>\n",
325
+ " </tr>\n",
326
+ " <tr>\n",
327
+ " <th>3</th>\n",
328
+ " <td>[120 days from the date of the Note.]</td>\n",
329
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
330
+ " <td>how many days is an appraisal good for a fanni...</td>\n",
331
+ " <td>281439</td>\n",
332
+ " <td>NUMERIC</td>\n",
333
+ " <td>[An appraisal is good for 120 days from the da...</td>\n",
334
+ " </tr>\n",
335
+ " <tr>\n",
336
+ " <th>4</th>\n",
337
+ " <td>[From $26,000 to $39,000 a year]</td>\n",
338
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
339
+ " <td>average pharmacy tech salary</td>\n",
340
+ " <td>40287</td>\n",
341
+ " <td>NUMERIC</td>\n",
342
+ " <td>[The average salary for a pharmacy technician ...</td>\n",
343
+ " </tr>\n",
344
+ " </tbody>\n",
345
+ "</table>\n",
346
+ "</div>"
347
+ ],
348
+ "text/plain": [
349
+ " answers \\\n",
350
+ "0 [2,662] \n",
351
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
352
+ "2 [Hippocrates] \n",
353
+ "3 [120 days from the date of the Note.] \n",
354
+ "4 [From $26,000 to $39,000 a year] \n",
355
+ "\n",
356
+ " passages \\\n",
357
+ "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
358
+ "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
359
+ "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
360
+ "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
361
+ "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
362
+ "\n",
363
+ " question id query_type \\\n",
364
+ "0 albany mn population 15177 NUMERIC \n",
365
+ "1 current weather in volcano, ca 114414 DESCRIPTION \n",
366
+ "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
367
+ "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
368
+ "4 average pharmacy tech salary 40287 NUMERIC \n",
369
+ "\n",
370
+ " wellFormedAnswers \n",
371
+ "0 [The population of Albany, Minnesota is 2,662. ] \n",
372
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
373
+ "2 [Hippocrates is considered the father of moder... \n",
374
+ "3 [An appraisal is good for 120 days from the da... \n",
375
+ "4 [The average salary for a pharmacy technician ... "
376
+ ]
377
+ },
378
+ "execution_count": 7,
379
+ "metadata": {},
380
+ "output_type": "execute_result"
381
+ }
382
+ ],
383
+ "source": [
384
+ "df.head()"
385
+ ]
386
+ },
387
+ {
388
+ "cell_type": "code",
389
+ "execution_count": 8,
390
+ "id": "89494c3d",
391
+ "metadata": {
392
+ "metadata": {}
393
+ },
394
+ "outputs": [],
395
+ "source": [
396
+ "import numpy as np\n",
397
+ "\n",
398
+ "\n",
399
+ "def generate_context(row, debug=False):\n",
400
+ " passages = row[\"passages\"]\n",
401
+ " if debug:\n",
402
+ " print(\"question:\", row[\"question\"])\n",
403
+ " print(passages)\n",
404
+ "\n",
405
+ " passage_text = passages[\"passage_text\"]\n",
406
+ " context = \"\\n\\n\".join(passage_text)\n",
407
+ "\n",
408
+ " return context"
409
+ ]
410
+ },
411
+ {
412
+ "cell_type": "code",
413
+ "execution_count": 9,
414
+ "id": "0dc959f6",
415
+ "metadata": {
416
+ "metadata": {}
417
+ },
418
+ "outputs": [
419
+ {
420
+ "name": "stdout",
421
+ "output_type": "stream",
422
+ "text": [
423
+ "question: albany mn population\n",
424
+ "{'is_selected': array([0, 0, 0, 1, 0, 0, 0, 0], dtype=int32), 'passage_text': array(['City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.',\n",
425
+ " 'Place of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.',\n",
426
+ " 'For the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.',\n",
427
+ " 'Albany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.',\n",
428
+ " 'Sponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.',\n",
429
+ " 'Recent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).',\n",
430
+ " \"For population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\",\n",
431
+ " \"For population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\"],\n",
432
+ " dtype=object), 'url': array(['http://zipcode.org/city/MN/ALBANY',\n",
433
+ " 'http://www.city-data.com/zips/56307.html',\n",
434
+ " 'https://en.wikipedia.org/wiki/Albany,_Minnesota',\n",
435
+ " 'http://ci.albany.mn.us/index.asp?SEC=A8341FEC-6B8C-47D2-926B-75A89ED4C539&Type=B_BASIC',\n",
436
+ " 'https://www.mapquest.com/us/mn/albany-282023394',\n",
437
+ " 'http://www.city-data.com/city/Albany-Minnesota.html',\n",
438
+ " 'http://www.city-data.com/zips/56307.html',\n",
439
+ " 'http://www.city-data.com/city/Albany-Minnesota.html'],\n",
440
+ " dtype=object)}\n",
441
+ "City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\n",
442
+ "\n",
443
+ "Place of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\n",
444
+ "\n",
445
+ "For the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\n",
446
+ "\n",
447
+ "Albany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\n",
448
+ "\n",
449
+ "Sponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\n",
450
+ "\n",
451
+ "Recent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\n",
452
+ "\n",
453
+ "For population 25 years and over in 56307: 1 High school or higher: 87.4%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\n",
454
+ "\n",
455
+ "For population 25 years and over in Albany: 1 High school or higher: 86.7%. 2 Bachelor's degree or higher: 15.4%. 3 Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\n",
456
+ "CPU times: user 255 µs, sys: 41 µs, total: 296 µs\n",
457
+ "Wall time: 294 µs\n"
458
+ ]
459
+ }
460
+ ],
461
+ "source": [
462
+ "%%time\n",
463
+ "context = generate_context(df.iloc[0], debug=True)\n",
464
+ "print(context)"
465
+ ]
466
+ },
467
+ {
468
+ "cell_type": "code",
469
+ "execution_count": 10,
470
+ "id": "d887d92e",
471
+ "metadata": {
472
+ "metadata": {}
473
+ },
474
+ "outputs": [
475
+ {
476
+ "name": "stderr",
477
+ "output_type": "stream",
478
+ "text": [
479
+ "100%|██████████| 500/500 [00:00<00:00, 213125.20it/s]"
480
+ ]
481
+ },
482
+ {
483
+ "name": "stdout",
484
+ "output_type": "stream",
485
+ "text": [
486
+ "CPU times: user 3.19 ms, sys: 1.47 ms, total: 4.67 ms\n",
487
+ "Wall time: 4.01 ms\n"
488
+ ]
489
+ },
490
+ {
491
+ "name": "stderr",
492
+ "output_type": "stream",
493
+ "text": [
494
+ "\n"
495
+ ]
496
+ }
497
+ ],
498
+ "source": [
499
+ "%%time\n",
500
+ "from tqdm import tqdm\n",
501
+ "\n",
502
+ "tqdm.pandas()\n",
503
+ "\n",
504
+ "df[\"context\"] = df.progress_apply(\n",
505
+ " generate_context, axis=1\n",
506
+ ")"
507
+ ]
508
+ },
509
+ {
510
+ "cell_type": "code",
511
+ "execution_count": 11,
512
+ "id": "dfdf1d5a",
513
+ "metadata": {
514
+ "metadata": {}
515
+ },
516
+ "outputs": [
517
+ {
518
+ "data": {
519
+ "text/html": [
520
+ "<div>\n",
521
+ "<style scoped>\n",
522
+ " .dataframe tbody tr th:only-of-type {\n",
523
+ " vertical-align: middle;\n",
524
+ " }\n",
525
+ "\n",
526
+ " .dataframe tbody tr th {\n",
527
+ " vertical-align: top;\n",
528
+ " }\n",
529
+ "\n",
530
+ " .dataframe thead th {\n",
531
+ " text-align: right;\n",
532
+ " }\n",
533
+ "</style>\n",
534
+ "<table border=\"1\" class=\"dataframe\">\n",
535
+ " <thead>\n",
536
+ " <tr style=\"text-align: right;\">\n",
537
+ " <th></th>\n",
538
+ " <th>answers</th>\n",
539
+ " <th>passages</th>\n",
540
+ " <th>question</th>\n",
541
+ " <th>id</th>\n",
542
+ " <th>query_type</th>\n",
543
+ " <th>wellFormedAnswers</th>\n",
544
+ " <th>context</th>\n",
545
+ " </tr>\n",
546
+ " </thead>\n",
547
+ " <tbody>\n",
548
+ " <tr>\n",
549
+ " <th>0</th>\n",
550
+ " <td>[2,662]</td>\n",
551
+ " <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
552
+ " <td>albany mn population</td>\n",
553
+ " <td>15177</td>\n",
554
+ " <td>NUMERIC</td>\n",
555
+ " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
556
+ " <td>City of Albany, MN Zip Codes. City of Albany, ...</td>\n",
557
+ " </tr>\n",
558
+ " <tr>\n",
559
+ " <th>1</th>\n",
560
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
561
+ " <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
562
+ " <td>current weather in volcano, ca</td>\n",
563
+ " <td>114414</td>\n",
564
+ " <td>DESCRIPTION</td>\n",
565
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
566
+ " <td>Volcano 10 Day Weather. Sunday:The Volcano for...</td>\n",
567
+ " </tr>\n",
568
+ " <tr>\n",
569
+ " <th>2</th>\n",
570
+ " <td>[Hippocrates]</td>\n",
571
+ " <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
572
+ " <td>____________________ is considered the father ...</td>\n",
573
+ " <td>9083</td>\n",
574
+ " <td>DESCRIPTION</td>\n",
575
+ " <td>[Hippocrates is considered the father of moder...</td>\n",
576
+ " <td>Hippocrates is widely considered to be the Fat...</td>\n",
577
+ " </tr>\n",
578
+ " <tr>\n",
579
+ " <th>3</th>\n",
580
+ " <td>[120 days from the date of the Note.]</td>\n",
581
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
582
+ " <td>how many days is an appraisal good for a fanni...</td>\n",
583
+ " <td>281439</td>\n",
584
+ " <td>NUMERIC</td>\n",
585
+ " <td>[An appraisal is good for 120 days from the da...</td>\n",
586
+ " <td>New and Updated Underwriting and Eligibility P...</td>\n",
587
+ " </tr>\n",
588
+ " <tr>\n",
589
+ " <th>4</th>\n",
590
+ " <td>[From $26,000 to $39,000 a year]</td>\n",
591
+ " <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
592
+ " <td>average pharmacy tech salary</td>\n",
593
+ " <td>40287</td>\n",
594
+ " <td>NUMERIC</td>\n",
595
+ " <td>[The average salary for a pharmacy technician ...</td>\n",
596
+ " <td>If you are interested in becoming a pharmacy t...</td>\n",
597
+ " </tr>\n",
598
+ " </tbody>\n",
599
+ "</table>\n",
600
+ "</div>"
601
+ ],
602
+ "text/plain": [
603
+ " answers \\\n",
604
+ "0 [2,662] \n",
605
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
606
+ "2 [Hippocrates] \n",
607
+ "3 [120 days from the date of the Note.] \n",
608
+ "4 [From $26,000 to $39,000 a year] \n",
609
+ "\n",
610
+ " passages \\\n",
611
+ "0 {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas... \n",
612
+ "1 {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]... \n",
613
+ "2 {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]... \n",
614
+ "3 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
615
+ "4 {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]... \n",
616
+ "\n",
617
+ " question id query_type \\\n",
618
+ "0 albany mn population 15177 NUMERIC \n",
619
+ "1 current weather in volcano, ca 114414 DESCRIPTION \n",
620
+ "2 ____________________ is considered the father ... 9083 DESCRIPTION \n",
621
+ "3 how many days is an appraisal good for a fanni... 281439 NUMERIC \n",
622
+ "4 average pharmacy tech salary 40287 NUMERIC \n",
623
+ "\n",
624
+ " wellFormedAnswers \\\n",
625
+ "0 [The population of Albany, Minnesota is 2,662. ] \n",
626
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
627
+ "2 [Hippocrates is considered the father of moder... \n",
628
+ "3 [An appraisal is good for 120 days from the da... \n",
629
+ "4 [The average salary for a pharmacy technician ... \n",
630
+ "\n",
631
+ " context \n",
632
+ "0 City of Albany, MN Zip Codes. City of Albany, ... \n",
633
+ "1 Volcano 10 Day Weather. Sunday:The Volcano for... \n",
634
+ "2 Hippocrates is widely considered to be the Fat... \n",
635
+ "3 New and Updated Underwriting and Eligibility P... \n",
636
+ "4 If you are interested in becoming a pharmacy t... "
637
+ ]
638
+ },
639
+ "execution_count": 11,
640
+ "metadata": {},
641
+ "output_type": "execute_result"
642
+ }
643
+ ],
644
+ "source": [
645
+ "df.head()"
646
+ ]
647
+ },
648
+ {
649
+ "cell_type": "code",
650
+ "execution_count": 14,
651
+ "id": "8a1050b9",
652
+ "metadata": {
653
+ "metadata": {}
654
+ },
655
+ "outputs": [
656
+ {
657
+ "data": {
658
+ "text/html": [
659
+ "<div>\n",
660
+ "<style scoped>\n",
661
+ " .dataframe tbody tr th:only-of-type {\n",
662
+ " vertical-align: middle;\n",
663
+ " }\n",
664
+ "\n",
665
+ " .dataframe tbody tr th {\n",
666
+ " vertical-align: top;\n",
667
+ " }\n",
668
+ "\n",
669
+ " .dataframe thead th {\n",
670
+ " text-align: right;\n",
671
+ " }\n",
672
+ "</style>\n",
673
+ "<table border=\"1\" class=\"dataframe\">\n",
674
+ " <thead>\n",
675
+ " <tr style=\"text-align: right;\">\n",
676
+ " <th></th>\n",
677
+ " <th>id</th>\n",
678
+ " <th>question</th>\n",
679
+ " <th>answers</th>\n",
680
+ " <th>wellFormedAnswers</th>\n",
681
+ " <th>context</th>\n",
682
+ " <th>query_type</th>\n",
683
+ " </tr>\n",
684
+ " </thead>\n",
685
+ " <tbody>\n",
686
+ " <tr>\n",
687
+ " <th>0</th>\n",
688
+ " <td>15177</td>\n",
689
+ " <td>albany mn population</td>\n",
690
+ " <td>[2,662]</td>\n",
691
+ " <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
692
+ " <td>City of Albany, MN Zip Codes. City of Albany, ...</td>\n",
693
+ " <td>NUMERIC</td>\n",
694
+ " </tr>\n",
695
+ " <tr>\n",
696
+ " <th>1</th>\n",
697
+ " <td>114414</td>\n",
698
+ " <td>current weather in volcano, ca</td>\n",
699
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
700
+ " <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
701
+ " <td>Volcano 10 Day Weather. Sunday:The Volcano for...</td>\n",
702
+ " <td>DESCRIPTION</td>\n",
703
+ " </tr>\n",
704
+ " <tr>\n",
705
+ " <th>2</th>\n",
706
+ " <td>9083</td>\n",
707
+ " <td>____________________ is considered the father ...</td>\n",
708
+ " <td>[Hippocrates]</td>\n",
709
+ " <td>[Hippocrates is considered the father of moder...</td>\n",
710
+ " <td>Hippocrates is widely considered to be the Fat...</td>\n",
711
+ " <td>DESCRIPTION</td>\n",
712
+ " </tr>\n",
713
+ " <tr>\n",
714
+ " <th>3</th>\n",
715
+ " <td>281439</td>\n",
716
+ " <td>how many days is an appraisal good for a fanni...</td>\n",
717
+ " <td>[120 days from the date of the Note.]</td>\n",
718
+ " <td>[An appraisal is good for 120 days from the da...</td>\n",
719
+ " <td>New and Updated Underwriting and Eligibility P...</td>\n",
720
+ " <td>NUMERIC</td>\n",
721
+ " </tr>\n",
722
+ " <tr>\n",
723
+ " <th>4</th>\n",
724
+ " <td>40287</td>\n",
725
+ " <td>average pharmacy tech salary</td>\n",
726
+ " <td>[From $26,000 to $39,000 a year]</td>\n",
727
+ " <td>[The average salary for a pharmacy technician ...</td>\n",
728
+ " <td>If you are interested in becoming a pharmacy t...</td>\n",
729
+ " <td>NUMERIC</td>\n",
730
+ " </tr>\n",
731
+ " </tbody>\n",
732
+ "</table>\n",
733
+ "</div>"
734
+ ],
735
+ "text/plain": [
736
+ " id question \\\n",
737
+ "0 15177 albany mn population \n",
738
+ "1 114414 current weather in volcano, ca \n",
739
+ "2 9083 ____________________ is considered the father ... \n",
740
+ "3 281439 how many days is an appraisal good for a fanni... \n",
741
+ "4 40287 average pharmacy tech salary \n",
742
+ "\n",
743
+ " answers \\\n",
744
+ "0 [2,662] \n",
745
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
746
+ "2 [Hippocrates] \n",
747
+ "3 [120 days from the date of the Note.] \n",
748
+ "4 [From $26,000 to $39,000 a year] \n",
749
+ "\n",
750
+ " wellFormedAnswers \\\n",
751
+ "0 [The population of Albany, Minnesota is 2,662. ] \n",
752
+ "1 [The Volcano forecast for Apr 12 is 52 degrees... \n",
753
+ "2 [Hippocrates is considered the father of moder... \n",
754
+ "3 [An appraisal is good for 120 days from the da... \n",
755
+ "4 [The average salary for a pharmacy technician ... \n",
756
+ "\n",
757
+ " context query_type \n",
758
+ "0 City of Albany, MN Zip Codes. City of Albany, ... NUMERIC \n",
759
+ "1 Volcano 10 Day Weather. Sunday:The Volcano for... DESCRIPTION \n",
760
+ "2 Hippocrates is widely considered to be the Fat... DESCRIPTION \n",
761
+ "3 New and Updated Underwriting and Eligibility P... NUMERIC \n",
762
+ "4 If you are interested in becoming a pharmacy t... NUMERIC "
763
+ ]
764
+ },
765
+ "execution_count": 14,
766
+ "metadata": {},
767
+ "output_type": "execute_result"
768
+ }
769
+ ],
770
+ "source": [
771
+ "# reordering columns\n",
772
+ "df = df[[\"id\", \"question\", \"answers\", \"wellFormedAnswers\", \"context\", \"query_type\"]]\n",
773
+ "df.head()"
774
+ ]
775
+ },
776
+ {
777
+ "cell_type": "code",
778
+ "execution_count": 15,
779
+ "id": "24a818ba",
780
+ "metadata": {
781
+ "metadata": {}
782
+ },
783
+ "outputs": [],
784
+ "source": [
785
+ "# save df to json with indent=4\n",
786
+ "df.to_json(\n",
787
+ " \"./data/datasets/ms_macro.json\", orient=\"records\", indent=4\n",
788
+ ")"
789
+ ]
790
+ }
791
+ ],
792
+ "metadata": {
793
+ "kernelspec": {
794
+ "display_name": "Python 3 (ipykernel)",
795
+ "language": "python",
796
+ "name": "python3"
797
+ },
798
+ "language_info": {
799
+ "codemirror_mode": {
800
+ "name": "ipython",
801
+ "version": 3
802
+ },
803
+ "file_extension": ".py",
804
+ "mimetype": "text/x-python",
805
+ "name": "python",
806
+ "nbconvert_exporter": "python",
807
+ "pygments_lexer": "ipython3",
808
+ "version": "3.11.9"
809
+ }
810
+ },
811
+ "nbformat": 4,
812
+ "nbformat_minor": 5
813
+ }
qa_chain_test.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import sys
4
+ import pandas as pd
5
+ from timeit import default_timer as timer
6
+ import nltk
7
+
8
+ chatting = len(sys.argv) > 1 and sys.argv[1] == "chat"
9
+
10
+ if chatting:
11
+ os.environ["BATCH_SIZE"] = "1"
12
+
13
+ from app_modules.init import app_init
14
+ from app_modules.llm_qa_chain import QAChain
15
+ from app_modules.utils import print_llm_response
16
+
17
+ llm_loader, qa_chain = app_init()
18
+
19
+ if chatting:
20
+ print("Starting chat mode")
21
+ while True:
22
+ question = input("Please enter your question: ")
23
+ if question.lower() == "exit":
24
+ break
25
+ result = qa_chain.call_chain({"question": question, "chat_history": []}, None)
26
+ print_llm_response(result)
27
+
28
+ sys.exit(0)
29
+
30
+ num_of_questions = 0
31
+
32
+ if len(sys.argv) > 1:
33
+ num_of_questions = int(sys.argv[1])
34
+
35
+ # Create an empty DataFrame with column names
36
+ df = pd.DataFrame(
37
+ columns=[
38
+ "id",
39
+ "question",
40
+ "answer",
41
+ ]
42
+ )
43
+
44
+ batch_size = int(os.getenv("BATCH_SIZE", "1"))
45
+ print(f"Batch size: {batch_size}")
46
+
47
+ questions_file_path = os.environ.get("QUESTIONS_FILE_PATH")
48
+ debug_retrieval = os.getenv("DEBUG_RETRIEVAL", "false").lower() == "true"
49
+
50
+ # Open the file for reading
51
+ print(f"Reading questions from file: {questions_file_path}")
52
+ test_data = json.loads(open(questions_file_path).read())
53
+
54
+ if isinstance(test_data, dict):
55
+ questions = [test_data[key] for key in test_data.keys()]
56
+ ids = [key for key in test_data.keys()]
57
+ else:
58
+ questions = test_data
59
+ ids = [row["id"] for row in questions]
60
+
61
+ if num_of_questions > 0:
62
+ questions = questions[:num_of_questions]
63
+
64
+ print(f"Number of questions: {len(questions)}")
65
+
66
+ if __name__ == "__main__":
67
+ chat_start = timer()
68
+ index = 0
69
+
70
+ while index < len(questions):
71
+ batch_ids = ids[index : index + batch_size]
72
+ batch_questions = [q["question"] for q in questions[index : index + batch_size]]
73
+
74
+ if isinstance(qa_chain, QAChain):
75
+ inputs = [{"question": q, "chat_history": []} for q in batch_questions]
76
+ else:
77
+ inputs = [{"question": q} for q in batch_questions]
78
+
79
+ start = timer()
80
+ result = qa_chain.call_chain(inputs, None)
81
+ end = timer()
82
+ print(f"Completed in {end - start:.3f}s")
83
+
84
+ # print("result:", result)
85
+ batch_answers = [r["answer"] for r in result]
86
+
87
+ for id, question, answer in zip(batch_ids, batch_questions, batch_answers):
88
+ df.loc[len(df)] = {
89
+ "id": id,
90
+ "question": question,
91
+ "answer": answer,
92
+ }
93
+
94
+ index += batch_size
95
+
96
+ for r in result:
97
+ print_llm_response(r, debug_retrieval)
98
+
99
+ chat_end = timer()
100
+ total_time = chat_end - chat_start
101
+ print(f"Total time used: {total_time:.3f} s")
102
+
103
+ df2 = pd.DataFrame(
104
+ columns=[
105
+ "id",
106
+ "question",
107
+ "answer",
108
+ "word_count",
109
+ "ground_truth",
110
+ ]
111
+ )
112
+
113
+ for i in range(len(df)):
114
+ question = questions[i]
115
+ answer = df["answer"][i]
116
+ query = df["question"][i]
117
+ id = df["id"][i]
118
+
119
+ ground_truth = question["answers"]
120
+
121
+ word_count = len(nltk.word_tokenize(answer))
122
+
123
+ df2.loc[len(df2)] = {
124
+ "id": id,
125
+ "question": query,
126
+ "answer": answer,
127
+ "word_count": word_count,
128
+ "ground_truth": ground_truth,
129
+ }
130
+
131
+ pd.options.display.float_format = "{:.3f}".format
132
+ print(df2.describe())
133
+
134
+ word_count = df2["word_count"].sum()
135
+
136
+ csv_file = (
137
+ os.getenv("TEST_RESULTS_CSV_FILE") or f"qa_batch_{batch_size}_test_results.csv"
138
+ )
139
+ with open(csv_file, "w") as f:
140
+ f.write(
141
+ f"# RAG: {isinstance(qa_chain, QAChain)} questions: {questions_file_path}\n"
142
+ )
143
+ f.write(
144
+ f"# model: {llm_loader.model_name} repetition_penalty: {llm_loader.repetition_penalty}\n"
145
+ )
146
+
147
+ df2.to_csv(csv_file, mode="a", index=False, header=True)
148
+ print(f"test results saved to file: {csv_file}")
149
+
150
+ df = pd.DataFrame(
151
+ {
152
+ "model": [llm_loader.model_name],
153
+ "repetition_penalty": [llm_loader.repetition_penalty],
154
+ "word_count": [word_count],
155
+ "inference_time": [total_time],
156
+ "inference_speed": [word_count / total_time],
157
+ }
158
+ )
159
+
160
+ print(f"Number of words generated: {word_count}")
161
+ print(f"Average generation speed: {word_count / total_time:.3f} words/s")
162
+
163
+ csv_file = os.getenv("ALL_RESULTS_CSV_FILE") or "qa_chain_all_results.csv"
164
+ file_existed = os.path.exists(csv_file) and os.path.getsize(csv_file) > 0
165
+ df.to_csv(csv_file, mode="a", index=False, header=not file_existed)
166
+ print(f"all results appended to file: {csv_file}")
requirements.txt CHANGED
@@ -1,8 +1,12 @@
1
-
2
- gradio
3
- spaces
4
- torch==2.2.0
5
- git+https://github.com/huggingface/transformers/
6
- optimum
7
- accelerate
8
- bitsandbytes
 
 
 
 
 
1
+ nltk==3.8.1
2
+ langchain==0.1.16
3
+ langchain-openai==0.1.3
4
+ langchain_google_genai==1.0.2
5
+ transformers==4.40.1
6
+ accelerate==0.29.3
7
+ python-dotenv==1.0.1
8
+ gradio==4.26.0
9
+ spaces==0.27.1
10
+ black==24.4.0
11
+ chardet==5.2.0
12
+ sentencepiece==0.2.0