llm-qa-bench

Sleeping

App Files Files Community

dh-mc commited on May 4

Commit

32a6937

•

1 Parent(s): c662eeb

ms macro dataset

Browse files

Files changed (21) hide show

.env.example +77 -0
.gitignore +149 -0
Llama-2-eval/data/datasets/ms_macro/data-00000-of-00001.arrow +3 -0
Llama-2-eval/data/datasets/ms_macro/dataset_info.json +95 -0
Llama-2-eval/data/datasets/ms_macro/state.json +13 -0
Llama-2-eval/data/results/results_full-a40.csv +10 -0
Llama-2-eval/data/results/results_full-l40.csv +10 -0
Llama-2-eval/notebook/baseline.ipynb +1983 -0
Llama-2-eval/notebook/metrics.ipynb +1293 -0
README.md +43 -3
app.py +57 -12
app_modules/init.py +114 -0
app_modules/llm_chat_chain.py +169 -0
app_modules/llm_inference.py +145 -0
app_modules/llm_loader.py +579 -0
app_modules/llm_qa_chain.py +115 -0
app_modules/utils.py +419 -0
data/datasets/ms_macro.json +0 -0
notebook/01_Data_Preprocessing.ipynb +813 -0
qa_chain_test.py +166 -0
requirements.txt +12 -8

.env.example ADDED Viewed

	@@ -0,0 +1,77 @@

+LLM_MODEL_TYPE=huggingface
+# LLM_MODEL_TYPE=openai
+# LLM_MODEL_TYPE=hftgi
+# LLM_MODEL_TYPE=ollama
+# LLM_MODEL_TYPE=google
+# LLM_MODEL_TYPE=vllm
+HUGGINGFACE_AUTH_TOKEN=
+HFTGI_SERVER_URL=
+OPENAI_API_KEY=
+GOOGLE_API_KEY=
+# if unset, default to "gpt-3.5-turbo"
+OPENAI_MODEL_NAME=
+# GEMINI_MODEL_NAME=gemini-1.5-pro-latest
+# OLLAMA_MODEL_NAME=orca2:7b
+# OLLAMA_MODEL_NAME=mistral:7b
+# OLLAMA_MODEL_NAME=gemma:7b
+# OLLAMA_MODEL_NAME=llama2:7b
+OLLAMA_MODEL_NAME=llama3:8b
+OLLAMA_RP=1.15
+# cpu, mps or cuda:0 - if unset, use whatever detected
+HF_EMBEDDINGS_DEVICE_TYPE=
+HF_PIPELINE_DEVICE_TYPE=
+# uncomment one of the below to load corresponding quantized model
+# LOAD_QUANTIZED_MODEL=4bit
+# LOAD_QUANTIZED_MODEL=8bit
+QA_WITH_RAG=true
+# QA_WITH_RAG=false
+RETRIEVER_TYPE=questions_file
+# RETRIEVER_TYPE=vectorstore
+QUESTIONS_FILE_PATH="./data/datasets/ms_macro.json"
+DISABLE_MODEL_PRELOADING=true
+CHAT_HISTORY_ENABLED=false
+SHOW_PARAM_SETTINGS=false
+SHARE_GRADIO_APP=false
+# if unset, default to "hkunlp/instructor-xl"
+HF_EMBEDDINGS_MODEL_NAME="hkunlp/instructor-large"
+# number of cpu cores - used to set n_threads for GPT4ALL & LlamaCpp models
+NUMBER_OF_CPU_CORES=
+USING_TORCH_BFLOAT16=true
+# HUGGINGFACE_MODEL_NAME_OR_PATH="databricks/dolly-v2-3b"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="databricks/dolly-v2-7b"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="databricks/dolly-v2-12b"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="TheBloke/wizardLM-7B-HF"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="TheBloke/vicuna-7B-1.1-HF"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="nomic-ai/gpt4all-j"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="nomic-ai/gpt4all-falcon"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="lmsys/fastchat-t5-3b-v1.0"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-7b-chat-hf"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-13b-chat-hf"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-70b-chat-hf"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Meta-Llama-3-8B-Instruct"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Meta-Llama-3-70B-Instruct"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="microsoft/Orca-2-7b"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="microsoft/Orca-2-13b"
+HUGGINGFACE_MODEL_NAME_OR_PATH="google/gemma-1.1-2b-it"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="google/gemma-1.1-7b-it"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="microsoft/Phi-3-mini-128k-instruct"
+# HUGGINGFACE_MODEL_NAME_OR_PATH="mistralai/Mistral-7B-Instruct-v0.2"

.gitignore ADDED Viewed

	@@ -0,0 +1,149 @@

+*.out
+*.log
+pdfs/
+.vscode/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+# *.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# JetBrains
+.idea
+*.db
+.DS_Store
+vectorstore.pkl
+langchain.readthedocs.io/
+models/
+data/logs/hftgi-2024-03-18.txt
+qa_*_all_results.csv
+qa_*_test_results.csv

Llama-2-eval/data/datasets/ms_macro/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ef1814438c08fe1bcd56be04a29c7dbe96f09420be471fdfc36d61c1500f13c
+size 2068896

Llama-2-eval/data/datasets/ms_macro/dataset_info.json ADDED Viewed

	@@ -0,0 +1,95 @@

+{
+  "builder_name": "parquet",
+  "citation": "",
+  "config_name": "default",
+  "dataset_name": "ms-macro-wellformed_only",
+  "dataset_size": 726469485,
+  "description": "",
+  "download_checksums": {
+    "hf://datasets/zhengxuanzenwu/ms-macro-wellformed_only@d6a0dd610474a02e63224176514c0073bb723c7c/data/train-00000-of-00002-0a6f58dc7ee03f61.parquet": {
+      "num_bytes": 164629356,
+      "checksum": null
+    },
+    "hf://datasets/zhengxuanzenwu/ms-macro-wellformed_only@d6a0dd610474a02e63224176514c0073bb723c7c/data/train-00001-of-00002-5262fd5ec1911156.parquet": {
+      "num_bytes": 164721520,
+      "checksum": null
+    },
+    "hf://datasets/zhengxuanzenwu/ms-macro-wellformed_only@d6a0dd610474a02e63224176514c0073bb723c7c/data/test-00000-of-00001-f965dd5a841915d3.parquet": {
+      "num_bytes": 26541566,
+      "checksum": null
+    }
+  },
+  "download_size": 355892442,
+  "features": {
+    "answers": {
+      "feature": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    },
+    "passages": {
+      "feature": {
+        "is_selected": {
+          "dtype": "int32",
+          "_type": "Value"
+        },
+        "passage_text": {
+          "dtype": "string",
+          "_type": "Value"
+        },
+        "url": {
+          "dtype": "string",
+          "_type": "Value"
+        }
+      },
+      "_type": "Sequence"
+    },
+    "query": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "query_id": {
+      "dtype": "int32",
+      "_type": "Value"
+    },
+    "query_type": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "wellFormedAnswers": {
+      "feature": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    }
+  },
+  "homepage": "",
+  "license": "",
+  "size_in_bytes": 1082361927,
+  "splits": {
+    "train": {
+      "name": "train",
+      "num_bytes": 674327331,
+      "num_examples": 153725,
+      "shard_lengths": [
+        116863,
+        36862
+      ],
+      "dataset_name": "ms-macro-wellformed_only"
+    },
+    "test": {
+      "name": "test",
+      "num_bytes": 52142154,
+      "num_examples": 12467,
+      "dataset_name": "ms-macro-wellformed_only"
+    }
+  },
+  "version": {
+    "version_str": "0.0.0",
+    "major": 0,
+    "minor": 0,
+    "patch": 0
+  }
+}

Llama-2-eval/data/datasets/ms_macro/state.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "fe2a26ddba75833a",
+  "_format_columns": null,
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": "test"
+}

Llama-2-eval/data/results/results_full-a40.csv ADDED Viewed

	@@ -0,0 +1,10 @@

+model_name,repetition_penalty,generation_time,evaluation_time,total_tokens,total_words,tokens_per_second,tokens_per_word,numeric_bleu,numeric_rougeL,description_bleu,description_rougeL,entity_bleu,entity_rougeL,person_bleu,person_rougeL,location_bleu,location_rougeL,overall_bleu,overall_rougeL,total_words_over_total_tokens
+gpt-4,,2696.407,1.772,34069,29552,12.635,1.153,0.1732,0.3337,0.1895,0.3248,0.1654,0.3117,0.1879,0.3286,0.4068,0.6213,0.1969,0.3843,0.867
+gpt-3.5-turbo,,1492.921,1.786,34353,29917,23.011,1.148,0.1606,0.3178,0.1623,0.2582,0.1296,0.2939,0.2024,0.3462,0.3632,0.5953,0.1761,0.3623,0.871
+Llama-2-13b-chat-hf,1.12,2133.992,1.66,33389,24007,15.646,1.391,0.163,0.3345,0.2031,0.3756,0.1632,0.2962,0.1388,0.3045,0.3423,0.5302,0.1846,0.3694,0.719
+vicuna-13b-v1.1,1.095,2212.946,1.682,35308,26456,15.955,1.335,0.1285,0.2319,0.1991,0.2812,0.1556,0.2644,0.2009,0.2768,0.3159,0.5761,0.1853,0.3276,0.749
+Llama-2-7b-chat-hf,1.19,1280.314,1.793,34349,23987,26.829,1.432,0.1274,0.2383,0.1836,0.2621,0.1572,0.2754,0.17,0.2911,0.3631,0.5383,0.1781,0.3209,0.698
+vicuna-7b-v1.1,1.095,975.73,1.574,25932,18714,26.577,1.386,0.1664,0.2838,0.2227,0.3118,0.166,0.2351,0.259,0.2753,0.4542,0.5838,0.2218,0.3379,0.722
+wizardLM-7B-HF,1.095,1265.93,1.667,33570,24003,26.518,1.399,0.1367,0.2584,0.2027,0.2882,0.1358,0.2592,0.1985,0.3085,0.4154,0.5794,0.1866,0.3384,0.715
+mpt-7b-instruct,1.05,2071.066,1.42,12374,9927,5.975,1.246,0.1804,0.285,0.2589,0.2556,0.2383,0.2468,0.2635,0.2571,0.3512,0.4042,0.2509,0.2897,0.802
+gpt4all-j,1.095,5603.316,1.706,31502,27099,5.622,1.162,0.1236,0.2406,0.1708,0.2511,0.143,0.255,0.194,0.2941,0.3721,0.5337,0.1737,0.3153,0.860

Llama-2-eval/data/results/results_full-l40.csv ADDED Viewed

	@@ -0,0 +1,10 @@

+model_name,repetition_penalty,generation_time,evaluation_time,total_tokens,total_words,tokens_per_second,tokens_per_word,words_per_token_l40,words_per_second,numeric_bleu,numeric_rougeL,description_bleu,description_rougeL,entity_bleu,entity_rougeL,person_bleu,person_rougeL,location_bleu,location_rougeL,overall_bleu,overall_rougeL,total_words_over_total_tokens
+gpt-4,,2696.407,1.772,34069,29552,12.635,1.153,0.867,,0.1732,0.3337,0.1895,0.3248,0.1654,0.3117,0.1879,0.3286,0.4068,0.6213,0.1969,0.3843,0.867
+gpt-3.5-turbo,,1492.921,1.786,34353,29917,23.011,1.148,0.871,,0.1606,0.3178,0.1623,0.2582,0.1296,0.2939,0.2024,0.3462,0.3632,0.5953,0.1761,0.3623,0.871
+Llama-2-13b-chat-hf,1.12,1687.637,1.785,32808,23575,19.44,1.392,0.718,13.969,0.1612,0.3305,0.2061,0.3701,0.1675,0.3018,0.141,0.305,0.3394,0.5288,0.1866,0.368,0.719
+vicuna-13b-v1.1,1.095,1799.165,2.197,35543,26613,19.755,1.336,0.749,14.792,0.1274,0.2321,0.1994,0.2834,0.154,0.2631,0.1984,0.2773,0.3194,0.5759,0.1844,0.3256,0.749
+Llama-2-7b-chat-hf,1.19,1002.46,6.606,34686,24229,34.601,1.432,0.698,24.170,0.1269,0.2404,0.1824,0.2614,0.157,0.2769,0.1687,0.2896,0.3565,0.5378,0.177,0.3214,0.699
+vicuna-7b-v1.1,1.095,758.227,1.432,25827,18638,34.062,1.386,0.722,24.581,0.1673,0.2859,0.2221,0.3096,0.1655,0.2327,0.2576,0.2717,0.4564,0.5849,0.2216,0.3387,0.722
+wizardLM-7B-HF,1.095,998.702,1.683,33674,23996,33.718,1.403,0.713,24.027,0.1372,0.259,0.2046,0.2878,0.1354,0.2588,0.1982,0.3083,0.4154,0.5769,0.187,0.3383,0.713
+mpt-7b-instruct,1.05,1622.435,1.338,12607,10139,7.77,1.243,0.805,6.249,0.1751,0.2756,0.2569,0.2625,0.2349,0.2456,0.2466,0.2566,0.3522,0.4049,0.2455,0.2889,0.804
+gpt4all-j,1.095,3794.429,1.611,31719,27286,8.359,1.162,0.861,7.191,0.1262,0.2443,0.1669,0.251,0.1394,0.2505,0.1937,0.2968,0.3693,0.5348,0.1719,0.3151,0.860

Llama-2-eval/notebook/baseline.ipynb ADDED Viewed

	@@ -0,0 +1,1983 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "a6d96660",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "load_dotenv()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "7510ab87",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DatasetDict({\n",
+       "    train: Dataset({\n",
+       "        features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
+       "        num_rows: 153725\n",
+       "    })\n",
+       "    test: Dataset({\n",
+       "        features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
+       "        num_rows: 12467\n",
+       "    })\n",
+       "})"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "dataset = load_dataset(\"zhengxuanzenwu/ms-macro-wellformed_only\")\n",
+    "dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "1f4f0e76",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>answers</th>\n",
+       "      <th>passages</th>\n",
+       "      <th>query</th>\n",
+       "      <th>query_id</th>\n",
+       "      <th>query_type</th>\n",
+       "      <th>wellFormedAnswers</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>[2,662]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>albany mn population</td>\n",
+       "      <td>15177</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
+       "      <td>current weather in volcano, ca</td>\n",
+       "      <td>114414</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>[Hippocrates]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
+       "      <td>____________________ is considered the father ...</td>\n",
+       "      <td>9083</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[Hippocrates is considered the father of moder...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>[120 days from the date of the Note.]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>how many days is an appraisal good for a fanni...</td>\n",
+       "      <td>281439</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[An appraisal is good for 120 days from the da...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>[From $26,000 to $39,000 a year]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>average pharmacy tech salary</td>\n",
+       "      <td>40287</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The average salary for a pharmacy technician ...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                             answers  \\\n",
+       "0                                            [2,662]   \n",
+       "1  [The Volcano forecast for Apr 12 is 52 degrees...   \n",
+       "2                                      [Hippocrates]   \n",
+       "3              [120 days from the date of the Note.]   \n",
+       "4                   [From $26,000 to $39,000 a year]   \n",
+       "\n",
+       "                                            passages  \\\n",
+       "0  {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...   \n",
+       "1  {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...   \n",
+       "2  {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...   \n",
+       "3  {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "4  {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "\n",
+       "                                               query  query_id   query_type  \\\n",
+       "0                               albany mn population     15177      NUMERIC   \n",
+       "1                     current weather in volcano, ca    114414  DESCRIPTION   \n",
+       "2  ____________________ is considered the father ...      9083  DESCRIPTION   \n",
+       "3  how many days is an appraisal good for a fanni...    281439      NUMERIC   \n",
+       "4                       average pharmacy tech salary     40287      NUMERIC   \n",
+       "\n",
+       "                                   wellFormedAnswers  \n",
+       "0   [The population of Albany, Minnesota is 2,662. ]  \n",
+       "1  [The Volcano forecast for Apr 12 is 52 degrees...  \n",
+       "2  [Hippocrates is considered the father of moder...  \n",
+       "3  [An appraisal is good for 120 days from the da...  \n",
+       "4  [The average salary for a pharmacy technician ...  "
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = dataset[\"test\"].to_pandas()\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "3e9b4cef",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'answers': ['2,662'],\n",
+       " 'passages': {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0],\n",
+       "  'passage_text': ['City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.',\n",
+       "   'Place of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.',\n",
+       "   'For the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.',\n",
+       "   'Albany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.',\n",
+       "   'Sponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.',\n",
+       "   'Recent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).',\n",
+       "   \"For population 25 years and over in 56307: 1  High school or higher: 87.4%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\",\n",
+       "   \"For population 25 years and over in Albany: 1  High school or higher: 86.7%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\"],\n",
+       "  'url': ['http://zipcode.org/city/MN/ALBANY',\n",
+       "   'http://www.city-data.com/zips/56307.html',\n",
+       "   'https://en.wikipedia.org/wiki/Albany,_Minnesota',\n",
+       "   'http://ci.albany.mn.us/index.asp?SEC=A8341FEC-6B8C-47D2-926B-75A89ED4C539&Type=B_BASIC',\n",
+       "   'https://www.mapquest.com/us/mn/albany-282023394',\n",
+       "   'http://www.city-data.com/city/Albany-Minnesota.html',\n",
+       "   'http://www.city-data.com/zips/56307.html',\n",
+       "   'http://www.city-data.com/city/Albany-Minnesota.html']},\n",
+       " 'query': 'albany mn population',\n",
+       " 'query_id': 15177,\n",
+       " 'query_type': 'NUMERIC',\n",
+       " 'wellFormedAnswers': ['The population of Albany, Minnesota is 2,662. ']}"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test = dataset[\"test\"]\n",
+    "test[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "104dfbea",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "12467"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test.num_rows"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "01b3a886",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "({'NUMERIC': 3685,\n",
+       "  'DESCRIPTION': 5487,\n",
+       "  'ENTITY': 1077,\n",
+       "  'PERSON': 868,\n",
+       "  'LOCATION': 1350},\n",
+       " {'NUMERIC': 179,\n",
+       "  'DESCRIPTION': 218,\n",
+       "  'ENTITY': 2403,\n",
+       "  'LOCATION': 2559,\n",
+       "  'PERSON': 3966})"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "counts = {}\n",
+    "indices = {}\n",
+    "size = 100\n",
+    "for i in range(test.num_rows):\n",
+    "    row = test[i]\n",
+    "    query_type = row[\"query_type\"]\n",
+    "    if query_type in counts:\n",
+    "        counts[query_type] += 1\n",
+    "    else:\n",
+    "        counts[query_type] = 1\n",
+    "    if counts[query_type] == size:\n",
+    "        indices[query_type] = i\n",
+    "counts, indices"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "967bc1cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create new dataset exluding those idx\n",
+    "baseline = test.select(\n",
+    "    (i for i in range(len(test)) if i <= indices[test[i][\"query_type\"]])\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "9a5fcad5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Dataset({\n",
+       "    features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
+       "    num_rows: 500\n",
+       "})"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "baseline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "0524a973",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>answers</th>\n",
+       "      <th>passages</th>\n",
+       "      <th>query</th>\n",
+       "      <th>query_id</th>\n",
+       "      <th>query_type</th>\n",
+       "      <th>wellFormedAnswers</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>[2,662]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>albany mn population</td>\n",
+       "      <td>15177</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
+       "      <td>current weather in volcano, ca</td>\n",
+       "      <td>114414</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>[Hippocrates]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
+       "      <td>____________________ is considered the father ...</td>\n",
+       "      <td>9083</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[Hippocrates is considered the father of moder...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>[120 days from the date of the Note.]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>how many days is an appraisal good for a fanni...</td>\n",
+       "      <td>281439</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[An appraisal is good for 120 days from the da...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>[From $26,000 to $39,000 a year]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>average pharmacy tech salary</td>\n",
+       "      <td>40287</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The average salary for a pharmacy technician ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>495</th>\n",
+       "      <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
+       "      <td>the pool shower company</td>\n",
+       "      <td>518269</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>496</th>\n",
+       "      <td>[Hanson]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>longest tenured american football players</td>\n",
+       "      <td>442806</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[Hanson is the longest tenured American footba...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>497</th>\n",
+       "      <td>[Mount Able Baptist Church is located at the a...</td>\n",
+       "      <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
+       "      <td>mt. view baptist in pendleton sc</td>\n",
+       "      <td>460250</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[Mount Able Baptist Church is located at the a...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>498</th>\n",
+       "      <td>[Honeysuckle Weeks]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>what actress disappeared for a while</td>\n",
+       "      <td>549739</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The actress disappeared for a while Honeysuck...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>499</th>\n",
+       "      <td>[African-Nguni]</td>\n",
+       "      <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>what ethnicity is the surname sabol</td>\n",
+       "      <td>658265</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The ethnicity of the surname Sabol is African...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>500 rows × 6 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               answers  \\\n",
+       "0                                              [2,662]   \n",
+       "1    [The Volcano forecast for Apr 12 is 52 degrees...   \n",
+       "2                                        [Hippocrates]   \n",
+       "3                [120 days from the date of the Note.]   \n",
+       "4                     [From $26,000 to $39,000 a year]   \n",
+       "..                                                 ...   \n",
+       "495  [The Pool Shower, Inc. is a Georgia Domestic P...   \n",
+       "496                                           [Hanson]   \n",
+       "497  [Mount Able Baptist Church is located at the a...   \n",
+       "498                                [Honeysuckle Weeks]   \n",
+       "499                                    [African-Nguni]   \n",
+       "\n",
+       "                                              passages  \\\n",
+       "0    {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...   \n",
+       "1    {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...   \n",
+       "2    {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...   \n",
+       "3    {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "4    {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "..                                                 ...   \n",
+       "495  {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...   \n",
+       "496  {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...   \n",
+       "497  {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...   \n",
+       "498  {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...   \n",
+       "499  {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...   \n",
+       "\n",
+       "                                                 query  query_id   query_type  \\\n",
+       "0                                 albany mn population     15177      NUMERIC   \n",
+       "1                       current weather in volcano, ca    114414  DESCRIPTION   \n",
+       "2    ____________________ is considered the father ...      9083  DESCRIPTION   \n",
+       "3    how many days is an appraisal good for a fanni...    281439      NUMERIC   \n",
+       "4                         average pharmacy tech salary     40287      NUMERIC   \n",
+       "..                                                 ...       ...          ...   \n",
+       "495                            the pool shower company    518269       PERSON   \n",
+       "496          longest tenured american football players    442806       PERSON   \n",
+       "497                   mt. view baptist in pendleton sc    460250       PERSON   \n",
+       "498               what actress disappeared for a while    549739       PERSON   \n",
+       "499                what ethnicity is the surname sabol    658265       PERSON   \n",
+       "\n",
+       "                                     wellFormedAnswers  \n",
+       "0     [The population of Albany, Minnesota is 2,662. ]  \n",
+       "1    [The Volcano forecast for Apr 12 is 52 degrees...  \n",
+       "2    [Hippocrates is considered the father of moder...  \n",
+       "3    [An appraisal is good for 120 days from the da...  \n",
+       "4    [The average salary for a pharmacy technician ...  \n",
+       "..                                                 ...  \n",
+       "495  [The Pool Shower, Inc. is a Georgia Domestic P...  \n",
+       "496  [Hanson is the longest tenured American footba...  \n",
+       "497  [Mount Able Baptist Church is located at the a...  \n",
+       "498  [The actress disappeared for a while Honeysuck...  \n",
+       "499  [The ethnicity of the surname Sabol is African...  \n",
+       "\n",
+       "[500 rows x 6 columns]"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "baseline.to_pandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "57a195e0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "66abd394cb054cf1b7459e92d4763d02",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Saving the dataset (0/1 shards):   0%|          | 0/500 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "baseline.save_to_disk(\"../data/datasets/ms_macro/\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "b72bf3f9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Dataset({\n",
+       "    features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
+       "    num_rows: 500\n",
+       "})"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from datasets import load_from_disk\n",
+    "\n",
+    "new_ds = load_from_disk(\"../data/datasets/ms_macro/\")\n",
+    "new_ds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "051bd771",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "({'NUMERIC': 100,\n",
+       "  'DESCRIPTION': 100,\n",
+       "  'ENTITY': 100,\n",
+       "  'PERSON': 100,\n",
+       "  'LOCATION': 100},\n",
+       " {'NUMERIC': 179,\n",
+       "  'DESCRIPTION': 215,\n",
+       "  'ENTITY': 443,\n",
+       "  'LOCATION': 461,\n",
+       "  'PERSON': 499})"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "counts = {}\n",
+    "indices = {}\n",
+    "size = 100\n",
+    "for i in range(new_ds.num_rows):\n",
+    "    row = new_ds[i]\n",
+    "    query_type = row[\"query_type\"]\n",
+    "    if query_type in counts:\n",
+    "        counts[query_type] += 1\n",
+    "    else:\n",
+    "        counts[query_type] = 1\n",
+    "    if counts[query_type] == size:\n",
+    "        indices[query_type] = i\n",
+    "counts, indices"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "id": "db48dcc4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>answers</th>\n",
+       "      <th>passages</th>\n",
+       "      <th>query</th>\n",
+       "      <th>query_id</th>\n",
+       "      <th>query_type</th>\n",
+       "      <th>wellFormedAnswers</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>[2,662]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>albany mn population</td>\n",
+       "      <td>15177</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
+       "      <td>current weather in volcano, ca</td>\n",
+       "      <td>114414</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>[Hippocrates]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
+       "      <td>____________________ is considered the father ...</td>\n",
+       "      <td>9083</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[Hippocrates is considered the father of moder...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>[120 days from the date of the Note.]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>how many days is an appraisal good for a fanni...</td>\n",
+       "      <td>281439</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[An appraisal is good for 120 days from the da...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>[From $26,000 to $39,000 a year]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>average pharmacy tech salary</td>\n",
+       "      <td>40287</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The average salary for a pharmacy technician ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>495</th>\n",
+       "      <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
+       "      <td>the pool shower company</td>\n",
+       "      <td>518269</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>496</th>\n",
+       "      <td>[Hanson]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>longest tenured american football players</td>\n",
+       "      <td>442806</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[Hanson is the longest tenured American footba...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>497</th>\n",
+       "      <td>[Mount Able Baptist Church is located at the a...</td>\n",
+       "      <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
+       "      <td>mt. view baptist in pendleton sc</td>\n",
+       "      <td>460250</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[Mount Able Baptist Church is located at the a...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>498</th>\n",
+       "      <td>[Honeysuckle Weeks]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>what actress disappeared for a while</td>\n",
+       "      <td>549739</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The actress disappeared for a while Honeysuck...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>499</th>\n",
+       "      <td>[African-Nguni]</td>\n",
+       "      <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>what ethnicity is the surname sabol</td>\n",
+       "      <td>658265</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The ethnicity of the surname Sabol is African...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>500 rows × 6 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               answers  \\\n",
+       "0                                              [2,662]   \n",
+       "1    [The Volcano forecast for Apr 12 is 52 degrees...   \n",
+       "2                                        [Hippocrates]   \n",
+       "3                [120 days from the date of the Note.]   \n",
+       "4                     [From $26,000 to $39,000 a year]   \n",
+       "..                                                 ...   \n",
+       "495  [The Pool Shower, Inc. is a Georgia Domestic P...   \n",
+       "496                                           [Hanson]   \n",
+       "497  [Mount Able Baptist Church is located at the a...   \n",
+       "498                                [Honeysuckle Weeks]   \n",
+       "499                                    [African-Nguni]   \n",
+       "\n",
+       "                                              passages  \\\n",
+       "0    {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...   \n",
+       "1    {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...   \n",
+       "2    {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...   \n",
+       "3    {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "4    {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "..                                                 ...   \n",
+       "495  {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...   \n",
+       "496  {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...   \n",
+       "497  {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...   \n",
+       "498  {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...   \n",
+       "499  {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...   \n",
+       "\n",
+       "                                                 query  query_id   query_type  \\\n",
+       "0                                 albany mn population     15177      NUMERIC   \n",
+       "1                       current weather in volcano, ca    114414  DESCRIPTION   \n",
+       "2    ____________________ is considered the father ...      9083  DESCRIPTION   \n",
+       "3    how many days is an appraisal good for a fanni...    281439      NUMERIC   \n",
+       "4                         average pharmacy tech salary     40287      NUMERIC   \n",
+       "..                                                 ...       ...          ...   \n",
+       "495                            the pool shower company    518269       PERSON   \n",
+       "496          longest tenured american football players    442806       PERSON   \n",
+       "497                   mt. view baptist in pendleton sc    460250       PERSON   \n",
+       "498               what actress disappeared for a while    549739       PERSON   \n",
+       "499                what ethnicity is the surname sabol    658265       PERSON   \n",
+       "\n",
+       "                                     wellFormedAnswers  \n",
+       "0     [The population of Albany, Minnesota is 2,662. ]  \n",
+       "1    [The Volcano forecast for Apr 12 is 52 degrees...  \n",
+       "2    [Hippocrates is considered the father of moder...  \n",
+       "3    [An appraisal is good for 120 days from the da...  \n",
+       "4    [The average salary for a pharmacy technician ...  \n",
+       "..                                                 ...  \n",
+       "495  [The Pool Shower, Inc. is a Georgia Domestic P...  \n",
+       "496  [Hanson is the longest tenured American footba...  \n",
+       "497  [Mount Able Baptist Church is located at the a...  \n",
+       "498  [The actress disappeared for a while Honeysuck...  \n",
+       "499  [The ethnicity of the surname Sabol is African...  \n",
+       "\n",
+       "[500 rows x 6 columns]"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "new_ds.to_pandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "7ed0c22d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "Official evaluation script for QAConv, modified from SQuAD 2.0.\n",
+    "\n",
+    " * Copyright (c) 2021, salesforce.com, inc.\n",
+    " * All rights reserved.\n",
+    " * SPDX-License-Identifier: BSD-3-Clause\n",
+    " * For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause\n",
+    "\n",
+    "\"\"\"\n",
+    "\n",
+    "import collections\n",
+    "import re\n",
+    "import string\n",
+    "\n",
+    "\n",
+    "def normalize_answer(s):\n",
+    "    \"\"\"Lower text and remove punctuation, articles and extra whitespace.\"\"\"\n",
+    "\n",
+    "    def remove_articles(text):\n",
+    "        regex = re.compile(r\"\\b(a|an|the)\\b\", re.UNICODE)\n",
+    "        return re.sub(regex, \" \", text)\n",
+    "\n",
+    "    def white_space_fix(text):\n",
+    "        return \" \".join(text.split())\n",
+    "\n",
+    "    def remove_punc(text):\n",
+    "        exclude = set(string.punctuation)\n",
+    "        return \"\".join(ch for ch in text if ch not in exclude)\n",
+    "\n",
+    "    def lower(text):\n",
+    "        return text.lower()\n",
+    "\n",
+    "    return white_space_fix(remove_articles(remove_punc(lower(s))))\n",
+    "\n",
+    "\n",
+    "def get_tokens(s):\n",
+    "    if not s:\n",
+    "        return []\n",
+    "    return normalize_answer(s).split()\n",
+    "\n",
+    "\n",
+    "def compute_exact(a_gold, a_pred):\n",
+    "    return int(normalize_answer(a_gold) == normalize_answer(a_pred))\n",
+    "\n",
+    "\n",
+    "def compute_f1(a_gold, a_pred):\n",
+    "    gold_toks = get_tokens(a_gold)\n",
+    "    pred_toks = get_tokens(a_pred)\n",
+    "    common = collections.Counter(gold_toks) & collections.Counter(pred_toks)\n",
+    "    num_same = sum(common.values())\n",
+    "    if len(gold_toks) == 0 or len(pred_toks) == 0:\n",
+    "        # If either is no-answer, then F1 is 1 if they agree, 0 otherwise\n",
+    "        return int(gold_toks == pred_toks)\n",
+    "    if num_same == 0:\n",
+    "        return 0\n",
+    "    precision = 1.0 * num_same / len(pred_toks)\n",
+    "    recall = 1.0 * num_same / len(gold_toks)\n",
+    "    f1 = (2 * precision * recall) / (precision + recall)\n",
+    "    return f1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "id": "d9ff4756",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d8a8d425f60a467eb56f6a13a50ed94b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/500 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Dataset({\n",
+       "    features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers', 'EM', 'F1'],\n",
+       "    num_rows: 500\n",
+       "})"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result_all = new_ds.map(\n",
+    "    lambda record, idx: {\n",
+    "        \"EM\": compute_exact(record[\"wellFormedAnswers\"][0], record[\"answers\"][0]),\n",
+    "        \"F1\": compute_f1(record[\"wellFormedAnswers\"][0], record[\"answers\"][0]),\n",
+    "    },\n",
+    "    batched=False,\n",
+    "    with_indices=True,\n",
+    ")\n",
+    "result_all"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "id": "31402fb2",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>answers</th>\n",
+       "      <th>passages</th>\n",
+       "      <th>query</th>\n",
+       "      <th>query_id</th>\n",
+       "      <th>query_type</th>\n",
+       "      <th>wellFormedAnswers</th>\n",
+       "      <th>EM</th>\n",
+       "      <th>F1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>[2,662]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>albany mn population</td>\n",
+       "      <td>15177</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.285714</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
+       "      <td>current weather in volcano, ca</td>\n",
+       "      <td>114414</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>[Hippocrates]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
+       "      <td>____________________ is considered the father ...</td>\n",
+       "      <td>9083</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[Hippocrates is considered the father of moder...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.250000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>[120 days from the date of the Note.]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>how many days is an appraisal good for a fanni...</td>\n",
+       "      <td>281439</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[An appraisal is good for 120 days from the da...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.631579</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>[From $26,000 to $39,000 a year]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>average pharmacy tech salary</td>\n",
+       "      <td>40287</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The average salary for a pharmacy technician ...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.500000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>495</th>\n",
+       "      <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
+       "      <td>the pool shower company</td>\n",
+       "      <td>518269</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>496</th>\n",
+       "      <td>[Hanson]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>longest tenured american football players</td>\n",
+       "      <td>442806</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[Hanson is the longest tenured American footba...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.250000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>497</th>\n",
+       "      <td>[Mount Able Baptist Church is located at the a...</td>\n",
+       "      <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
+       "      <td>mt. view baptist in pendleton sc</td>\n",
+       "      <td>460250</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[Mount Able Baptist Church is located at the a...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>498</th>\n",
+       "      <td>[Honeysuckle Weeks]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>what actress disappeared for a while</td>\n",
+       "      <td>549739</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The actress disappeared for a while Honeysuck...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.500000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>499</th>\n",
+       "      <td>[African-Nguni]</td>\n",
+       "      <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>what ethnicity is the surname sabol</td>\n",
+       "      <td>658265</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The ethnicity of the surname Sabol is African...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.285714</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>500 rows × 8 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               answers  \\\n",
+       "0                                              [2,662]   \n",
+       "1    [The Volcano forecast for Apr 12 is 52 degrees...   \n",
+       "2                                        [Hippocrates]   \n",
+       "3                [120 days from the date of the Note.]   \n",
+       "4                     [From $26,000 to $39,000 a year]   \n",
+       "..                                                 ...   \n",
+       "495  [The Pool Shower, Inc. is a Georgia Domestic P...   \n",
+       "496                                           [Hanson]   \n",
+       "497  [Mount Able Baptist Church is located at the a...   \n",
+       "498                                [Honeysuckle Weeks]   \n",
+       "499                                    [African-Nguni]   \n",
+       "\n",
+       "                                              passages  \\\n",
+       "0    {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...   \n",
+       "1    {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...   \n",
+       "2    {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...   \n",
+       "3    {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "4    {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "..                                                 ...   \n",
+       "495  {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...   \n",
+       "496  {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...   \n",
+       "497  {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...   \n",
+       "498  {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...   \n",
+       "499  {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...   \n",
+       "\n",
+       "                                                 query  query_id   query_type  \\\n",
+       "0                                 albany mn population     15177      NUMERIC   \n",
+       "1                       current weather in volcano, ca    114414  DESCRIPTION   \n",
+       "2    ____________________ is considered the father ...      9083  DESCRIPTION   \n",
+       "3    how many days is an appraisal good for a fanni...    281439      NUMERIC   \n",
+       "4                         average pharmacy tech salary     40287      NUMERIC   \n",
+       "..                                                 ...       ...          ...   \n",
+       "495                            the pool shower company    518269       PERSON   \n",
+       "496          longest tenured american football players    442806       PERSON   \n",
+       "497                   mt. view baptist in pendleton sc    460250       PERSON   \n",
+       "498               what actress disappeared for a while    549739       PERSON   \n",
+       "499                what ethnicity is the surname sabol    658265       PERSON   \n",
+       "\n",
+       "                                     wellFormedAnswers  EM        F1  \n",
+       "0     [The population of Albany, Minnesota is 2,662. ]   0  0.285714  \n",
+       "1    [The Volcano forecast for Apr 12 is 52 degrees...   1  1.000000  \n",
+       "2    [Hippocrates is considered the father of moder...   0  0.250000  \n",
+       "3    [An appraisal is good for 120 days from the da...   0  0.631579  \n",
+       "4    [The average salary for a pharmacy technician ...   0  0.500000  \n",
+       "..                                                 ...  ..       ...  \n",
+       "495  [The Pool Shower, Inc. is a Georgia Domestic P...   1  1.000000  \n",
+       "496  [Hanson is the longest tenured American footba...   0  0.250000  \n",
+       "497  [Mount Able Baptist Church is located at the a...   1  1.000000  \n",
+       "498  [The actress disappeared for a while Honeysuck...   0  0.500000  \n",
+       "499  [The ethnicity of the surname Sabol is African...   0  0.285714  \n",
+       "\n",
+       "[500 rows x 8 columns]"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result_all.to_pandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "id": "af2d4577",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install -q evaluate rouge_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "id": "89494c3d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import evaluate\n",
+    "\n",
+    "bleu = evaluate.load(\"bleu\")\n",
+    "rouge = evaluate.load(\"rouge\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "id": "e447aa08",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c87066449ebc44d39a66b1630977f2ac",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/500 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>answers</th>\n",
+       "      <th>passages</th>\n",
+       "      <th>query</th>\n",
+       "      <th>query_id</th>\n",
+       "      <th>query_type</th>\n",
+       "      <th>wellFormedAnswers</th>\n",
+       "      <th>EM</th>\n",
+       "      <th>F1</th>\n",
+       "      <th>bleu</th>\n",
+       "      <th>precisions</th>\n",
+       "      <th>brevity_penalty</th>\n",
+       "      <th>length_ratio</th>\n",
+       "      <th>translation_length</th>\n",
+       "      <th>reference_length</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>[2,662]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>albany mn population</td>\n",
+       "      <td>15177</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.285714</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
+       "      <td>0.000335</td>\n",
+       "      <td>0.111111</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
+       "      <td>current weather in volcano, ca</td>\n",
+       "      <td>114414</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>14</td>\n",
+       "      <td>14</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>[Hippocrates]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
+       "      <td>____________________ is considered the father ...</td>\n",
+       "      <td>9083</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[Hippocrates is considered the father of moder...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.250000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
+       "      <td>0.000335</td>\n",
+       "      <td>0.111111</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>[120 days from the date of the Note.]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>how many days is an appraisal good for a fanni...</td>\n",
+       "      <td>281439</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[An appraisal is good for 120 days from the da...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.631579</td>\n",
+       "      <td>0.327096</td>\n",
+       "      <td>[1.0, 0.875, 0.8571428571428571, 0.83333333333...</td>\n",
+       "      <td>0.367879</td>\n",
+       "      <td>0.500000</td>\n",
+       "      <td>9</td>\n",
+       "      <td>18</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>[From $26,000 to $39,000 a year]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>average pharmacy tech salary</td>\n",
+       "      <td>40287</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The average salary for a pharmacy technician ...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.500000</td>\n",
+       "      <td>0.193040</td>\n",
+       "      <td>[0.875, 0.7142857142857143, 0.5, 0.4]</td>\n",
+       "      <td>0.324652</td>\n",
+       "      <td>0.470588</td>\n",
+       "      <td>8</td>\n",
+       "      <td>17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>495</th>\n",
+       "      <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
+       "      <td>the pool shower company</td>\n",
+       "      <td>518269</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>19</td>\n",
+       "      <td>19</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>496</th>\n",
+       "      <td>[Hanson]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>longest tenured american football players</td>\n",
+       "      <td>442806</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[Hanson is the longest tenured American footba...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.250000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
+       "      <td>0.000335</td>\n",
+       "      <td>0.111111</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>497</th>\n",
+       "      <td>[Mount Able Baptist Church is located at the a...</td>\n",
+       "      <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
+       "      <td>mt. view baptist in pendleton sc</td>\n",
+       "      <td>460250</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[Mount Able Baptist Church is located at the a...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>21</td>\n",
+       "      <td>21</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>498</th>\n",
+       "      <td>[Honeysuckle Weeks]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>what actress disappeared for a while</td>\n",
+       "      <td>549739</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The actress disappeared for a while Honeysuck...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.500000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>[1.0, 1.0, 0.0, 0.0]</td>\n",
+       "      <td>0.030197</td>\n",
+       "      <td>0.222222</td>\n",
+       "      <td>2</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>499</th>\n",
+       "      <td>[African-Nguni]</td>\n",
+       "      <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>what ethnicity is the surname sabol</td>\n",
+       "      <td>658265</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The ethnicity of the surname Sabol is African...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.285714</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
+       "      <td>0.000335</td>\n",
+       "      <td>0.111111</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>500 rows × 14 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               answers  \\\n",
+       "0                                              [2,662]   \n",
+       "1    [The Volcano forecast for Apr 12 is 52 degrees...   \n",
+       "2                                        [Hippocrates]   \n",
+       "3                [120 days from the date of the Note.]   \n",
+       "4                     [From $26,000 to $39,000 a year]   \n",
+       "..                                                 ...   \n",
+       "495  [The Pool Shower, Inc. is a Georgia Domestic P...   \n",
+       "496                                           [Hanson]   \n",
+       "497  [Mount Able Baptist Church is located at the a...   \n",
+       "498                                [Honeysuckle Weeks]   \n",
+       "499                                    [African-Nguni]   \n",
+       "\n",
+       "                                              passages  \\\n",
+       "0    {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...   \n",
+       "1    {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...   \n",
+       "2    {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...   \n",
+       "3    {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "4    {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "..                                                 ...   \n",
+       "495  {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...   \n",
+       "496  {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...   \n",
+       "497  {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...   \n",
+       "498  {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...   \n",
+       "499  {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...   \n",
+       "\n",
+       "                                                 query  query_id   query_type  \\\n",
+       "0                                 albany mn population     15177      NUMERIC   \n",
+       "1                       current weather in volcano, ca    114414  DESCRIPTION   \n",
+       "2    ____________________ is considered the father ...      9083  DESCRIPTION   \n",
+       "3    how many days is an appraisal good for a fanni...    281439      NUMERIC   \n",
+       "4                         average pharmacy tech salary     40287      NUMERIC   \n",
+       "..                                                 ...       ...          ...   \n",
+       "495                            the pool shower company    518269       PERSON   \n",
+       "496          longest tenured american football players    442806       PERSON   \n",
+       "497                   mt. view baptist in pendleton sc    460250       PERSON   \n",
+       "498               what actress disappeared for a while    549739       PERSON   \n",
+       "499                what ethnicity is the surname sabol    658265       PERSON   \n",
+       "\n",
+       "                                     wellFormedAnswers  EM        F1  \\\n",
+       "0     [The population of Albany, Minnesota is 2,662. ]   0  0.285714   \n",
+       "1    [The Volcano forecast for Apr 12 is 52 degrees...   1  1.000000   \n",
+       "2    [Hippocrates is considered the father of moder...   0  0.250000   \n",
+       "3    [An appraisal is good for 120 days from the da...   0  0.631579   \n",
+       "4    [The average salary for a pharmacy technician ...   0  0.500000   \n",
+       "..                                                 ...  ..       ...   \n",
+       "495  [The Pool Shower, Inc. is a Georgia Domestic P...   1  1.000000   \n",
+       "496  [Hanson is the longest tenured American footba...   0  0.250000   \n",
+       "497  [Mount Able Baptist Church is located at the a...   1  1.000000   \n",
+       "498  [The actress disappeared for a while Honeysuck...   0  0.500000   \n",
+       "499  [The ethnicity of the surname Sabol is African...   0  0.285714   \n",
+       "\n",
+       "         bleu                                         precisions  \\\n",
+       "0    0.000000                               [1.0, 0.0, 0.0, 0.0]   \n",
+       "1    1.000000                               [1.0, 1.0, 1.0, 1.0]   \n",
+       "2    0.000000                               [1.0, 0.0, 0.0, 0.0]   \n",
+       "3    0.327096  [1.0, 0.875, 0.8571428571428571, 0.83333333333...   \n",
+       "4    0.193040              [0.875, 0.7142857142857143, 0.5, 0.4]   \n",
+       "..        ...                                                ...   \n",
+       "495  1.000000                               [1.0, 1.0, 1.0, 1.0]   \n",
+       "496  0.000000                               [1.0, 0.0, 0.0, 0.0]   \n",
+       "497  1.000000                               [1.0, 1.0, 1.0, 1.0]   \n",
+       "498  0.000000                               [1.0, 1.0, 0.0, 0.0]   \n",
+       "499  0.000000                               [1.0, 0.0, 0.0, 0.0]   \n",
+       "\n",
+       "     brevity_penalty  length_ratio  translation_length  reference_length  \n",
+       "0           0.000335      0.111111                   1                 9  \n",
+       "1           1.000000      1.000000                  14                14  \n",
+       "2           0.000335      0.111111                   1                 9  \n",
+       "3           0.367879      0.500000                   9                18  \n",
+       "4           0.324652      0.470588                   8                17  \n",
+       "..               ...           ...                 ...               ...  \n",
+       "495         1.000000      1.000000                  19                19  \n",
+       "496         0.000335      0.111111                   1                 9  \n",
+       "497         1.000000      1.000000                  21                21  \n",
+       "498         0.030197      0.222222                   2                 9  \n",
+       "499         0.000335      0.111111                   1                 9  \n",
+       "\n",
+       "[500 rows x 14 columns]"
+      ]
+     },
+     "execution_count": 56,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result_all = result_all.map(\n",
+    "    lambda record: bleu.compute(\n",
+    "        predictions=[record[\"answers\"][0]], references=[record[\"wellFormedAnswers\"][0]]\n",
+    "    ),\n",
+    "    batched=False,\n",
+    ")\n",
+    "result_all.to_pandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "id": "fbbe31fd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "88f839b74aa54fcd8c95215e22e30472",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/500 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>answers</th>\n",
+       "      <th>passages</th>\n",
+       "      <th>query</th>\n",
+       "      <th>query_id</th>\n",
+       "      <th>query_type</th>\n",
+       "      <th>wellFormedAnswers</th>\n",
+       "      <th>EM</th>\n",
+       "      <th>F1</th>\n",
+       "      <th>bleu</th>\n",
+       "      <th>precisions</th>\n",
+       "      <th>brevity_penalty</th>\n",
+       "      <th>length_ratio</th>\n",
+       "      <th>translation_length</th>\n",
+       "      <th>reference_length</th>\n",
+       "      <th>rouge1</th>\n",
+       "      <th>rouge2</th>\n",
+       "      <th>rougeL</th>\n",
+       "      <th>rougeLsum</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>[2,662]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>albany mn population</td>\n",
+       "      <td>15177</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.285714</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
+       "      <td>0.000335</td>\n",
+       "      <td>0.111111</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.400000</td>\n",
+       "      <td>0.250000</td>\n",
+       "      <td>0.400000</td>\n",
+       "      <td>0.400000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
+       "      <td>current weather in volcano, ca</td>\n",
+       "      <td>114414</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>14</td>\n",
+       "      <td>14</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>[Hippocrates]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
+       "      <td>____________________ is considered the father ...</td>\n",
+       "      <td>9083</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[Hippocrates is considered the father of moder...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.250000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
+       "      <td>0.000335</td>\n",
+       "      <td>0.111111</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.222222</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.222222</td>\n",
+       "      <td>0.222222</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>[120 days from the date of the Note.]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>how many days is an appraisal good for a fanni...</td>\n",
+       "      <td>281439</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[An appraisal is good for 120 days from the da...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.631579</td>\n",
+       "      <td>0.327096</td>\n",
+       "      <td>[1.0, 0.875, 0.8571428571428571, 0.83333333333...</td>\n",
+       "      <td>0.367879</td>\n",
+       "      <td>0.500000</td>\n",
+       "      <td>9</td>\n",
+       "      <td>18</td>\n",
+       "      <td>0.640000</td>\n",
+       "      <td>0.608696</td>\n",
+       "      <td>0.640000</td>\n",
+       "      <td>0.640000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>[From $26,000 to $39,000 a year]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>average pharmacy tech salary</td>\n",
+       "      <td>40287</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The average salary for a pharmacy technician ...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.500000</td>\n",
+       "      <td>0.193040</td>\n",
+       "      <td>[0.875, 0.7142857142857143, 0.5, 0.4]</td>\n",
+       "      <td>0.324652</td>\n",
+       "      <td>0.470588</td>\n",
+       "      <td>8</td>\n",
+       "      <td>17</td>\n",
+       "      <td>0.583333</td>\n",
+       "      <td>0.454545</td>\n",
+       "      <td>0.583333</td>\n",
+       "      <td>0.583333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>495</th>\n",
+       "      <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
+       "      <td>the pool shower company</td>\n",
+       "      <td>518269</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>19</td>\n",
+       "      <td>19</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>496</th>\n",
+       "      <td>[Hanson]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>longest tenured american football players</td>\n",
+       "      <td>442806</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[Hanson is the longest tenured American footba...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.250000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
+       "      <td>0.000335</td>\n",
+       "      <td>0.111111</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.222222</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.222222</td>\n",
+       "      <td>0.222222</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>497</th>\n",
+       "      <td>[Mount Able Baptist Church is located at the a...</td>\n",
+       "      <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
+       "      <td>mt. view baptist in pendleton sc</td>\n",
+       "      <td>460250</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[Mount Able Baptist Church is located at the a...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>[1.0, 1.0, 1.0, 1.0]</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>21</td>\n",
+       "      <td>21</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>498</th>\n",
+       "      <td>[Honeysuckle Weeks]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>what actress disappeared for a while</td>\n",
+       "      <td>549739</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The actress disappeared for a while Honeysuck...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.500000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>[1.0, 1.0, 0.0, 0.0]</td>\n",
+       "      <td>0.030197</td>\n",
+       "      <td>0.222222</td>\n",
+       "      <td>2</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.400000</td>\n",
+       "      <td>0.250000</td>\n",
+       "      <td>0.400000</td>\n",
+       "      <td>0.400000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>499</th>\n",
+       "      <td>[African-Nguni]</td>\n",
+       "      <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>what ethnicity is the surname sabol</td>\n",
+       "      <td>658265</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The ethnicity of the surname Sabol is African...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.285714</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
+       "      <td>0.000335</td>\n",
+       "      <td>0.111111</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.363636</td>\n",
+       "      <td>0.222222</td>\n",
+       "      <td>0.363636</td>\n",
+       "      <td>0.363636</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>500 rows × 18 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               answers  \\\n",
+       "0                                              [2,662]   \n",
+       "1    [The Volcano forecast for Apr 12 is 52 degrees...   \n",
+       "2                                        [Hippocrates]   \n",
+       "3                [120 days from the date of the Note.]   \n",
+       "4                     [From $26,000 to $39,000 a year]   \n",
+       "..                                                 ...   \n",
+       "495  [The Pool Shower, Inc. is a Georgia Domestic P...   \n",
+       "496                                           [Hanson]   \n",
+       "497  [Mount Able Baptist Church is located at the a...   \n",
+       "498                                [Honeysuckle Weeks]   \n",
+       "499                                    [African-Nguni]   \n",
+       "\n",
+       "                                              passages  \\\n",
+       "0    {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...   \n",
+       "1    {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...   \n",
+       "2    {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...   \n",
+       "3    {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "4    {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "..                                                 ...   \n",
+       "495  {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...   \n",
+       "496  {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...   \n",
+       "497  {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...   \n",
+       "498  {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...   \n",
+       "499  {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...   \n",
+       "\n",
+       "                                                 query  query_id   query_type  \\\n",
+       "0                                 albany mn population     15177      NUMERIC   \n",
+       "1                       current weather in volcano, ca    114414  DESCRIPTION   \n",
+       "2    ____________________ is considered the father ...      9083  DESCRIPTION   \n",
+       "3    how many days is an appraisal good for a fanni...    281439      NUMERIC   \n",
+       "4                         average pharmacy tech salary     40287      NUMERIC   \n",
+       "..                                                 ...       ...          ...   \n",
+       "495                            the pool shower company    518269       PERSON   \n",
+       "496          longest tenured american football players    442806       PERSON   \n",
+       "497                   mt. view baptist in pendleton sc    460250       PERSON   \n",
+       "498               what actress disappeared for a while    549739       PERSON   \n",
+       "499                what ethnicity is the surname sabol    658265       PERSON   \n",
+       "\n",
+       "                                     wellFormedAnswers  EM        F1  \\\n",
+       "0     [The population of Albany, Minnesota is 2,662. ]   0  0.285714   \n",
+       "1    [The Volcano forecast for Apr 12 is 52 degrees...   1  1.000000   \n",
+       "2    [Hippocrates is considered the father of moder...   0  0.250000   \n",
+       "3    [An appraisal is good for 120 days from the da...   0  0.631579   \n",
+       "4    [The average salary for a pharmacy technician ...   0  0.500000   \n",
+       "..                                                 ...  ..       ...   \n",
+       "495  [The Pool Shower, Inc. is a Georgia Domestic P...   1  1.000000   \n",
+       "496  [Hanson is the longest tenured American footba...   0  0.250000   \n",
+       "497  [Mount Able Baptist Church is located at the a...   1  1.000000   \n",
+       "498  [The actress disappeared for a while Honeysuck...   0  0.500000   \n",
+       "499  [The ethnicity of the surname Sabol is African...   0  0.285714   \n",
+       "\n",
+       "         bleu                                         precisions  \\\n",
+       "0    0.000000                               [1.0, 0.0, 0.0, 0.0]   \n",
+       "1    1.000000                               [1.0, 1.0, 1.0, 1.0]   \n",
+       "2    0.000000                               [1.0, 0.0, 0.0, 0.0]   \n",
+       "3    0.327096  [1.0, 0.875, 0.8571428571428571, 0.83333333333...   \n",
+       "4    0.193040              [0.875, 0.7142857142857143, 0.5, 0.4]   \n",
+       "..        ...                                                ...   \n",
+       "495  1.000000                               [1.0, 1.0, 1.0, 1.0]   \n",
+       "496  0.000000                               [1.0, 0.0, 0.0, 0.0]   \n",
+       "497  1.000000                               [1.0, 1.0, 1.0, 1.0]   \n",
+       "498  0.000000                               [1.0, 1.0, 0.0, 0.0]   \n",
+       "499  0.000000                               [1.0, 0.0, 0.0, 0.0]   \n",
+       "\n",
+       "     brevity_penalty  length_ratio  translation_length  reference_length  \\\n",
+       "0           0.000335      0.111111                   1                 9   \n",
+       "1           1.000000      1.000000                  14                14   \n",
+       "2           0.000335      0.111111                   1                 9   \n",
+       "3           0.367879      0.500000                   9                18   \n",
+       "4           0.324652      0.470588                   8                17   \n",
+       "..               ...           ...                 ...               ...   \n",
+       "495         1.000000      1.000000                  19                19   \n",
+       "496         0.000335      0.111111                   1                 9   \n",
+       "497         1.000000      1.000000                  21                21   \n",
+       "498         0.030197      0.222222                   2                 9   \n",
+       "499         0.000335      0.111111                   1                 9   \n",
+       "\n",
+       "       rouge1    rouge2    rougeL  rougeLsum  \n",
+       "0    0.400000  0.250000  0.400000   0.400000  \n",
+       "1    1.000000  1.000000  1.000000   1.000000  \n",
+       "2    0.222222  0.000000  0.222222   0.222222  \n",
+       "3    0.640000  0.608696  0.640000   0.640000  \n",
+       "4    0.583333  0.454545  0.583333   0.583333  \n",
+       "..        ...       ...       ...        ...  \n",
+       "495  1.000000  1.000000  1.000000   1.000000  \n",
+       "496  0.222222  0.000000  0.222222   0.222222  \n",
+       "497  1.000000  1.000000  1.000000   1.000000  \n",
+       "498  0.400000  0.250000  0.400000   0.400000  \n",
+       "499  0.363636  0.222222  0.363636   0.363636  \n",
+       "\n",
+       "[500 rows x 18 columns]"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result_all = result_all.map(\n",
+    "    lambda record: rouge.compute(\n",
+    "        predictions=[record[\"answers\"][0]], references=[record[\"wellFormedAnswers\"][0]]\n",
+    "    ),\n",
+    "    batched=False,\n",
+    ")\n",
+    "result_all.to_pandas()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

Llama-2-eval/notebook/metrics.ipynb ADDED Viewed

	@@ -0,0 +1,1293 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "af2d4577",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install -q evaluate rouge_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "a6d96660",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "load_dotenv()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "b72bf3f9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Dataset({\n",
+       "    features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
+       "    num_rows: 500\n",
+       "})"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from datasets import load_from_disk\n",
+    "\n",
+    "new_ds = load_from_disk(\"../data/datasets/ms_macro/\")\n",
+    "new_ds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "051bd771",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "({'NUMERIC': 100,\n",
+       "  'DESCRIPTION': 100,\n",
+       "  'ENTITY': 100,\n",
+       "  'PERSON': 100,\n",
+       "  'LOCATION': 100},\n",
+       " {'NUMERIC': 179,\n",
+       "  'DESCRIPTION': 215,\n",
+       "  'ENTITY': 443,\n",
+       "  'LOCATION': 461,\n",
+       "  'PERSON': 499})"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "counts = {}\n",
+    "indices = {}\n",
+    "size = 100\n",
+    "for i in range(new_ds.num_rows):\n",
+    "    row = new_ds[i]\n",
+    "    query_type = row[\"query_type\"]\n",
+    "    if query_type in counts:\n",
+    "        counts[query_type] += 1\n",
+    "    else:\n",
+    "        counts[query_type] = 1\n",
+    "    if counts[query_type] == size:\n",
+    "        indices[query_type] = i\n",
+    "counts, indices"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "db48dcc4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>answers</th>\n",
+       "      <th>passages</th>\n",
+       "      <th>query</th>\n",
+       "      <th>query_id</th>\n",
+       "      <th>query_type</th>\n",
+       "      <th>wellFormedAnswers</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>[2,662]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>albany mn population</td>\n",
+       "      <td>15177</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
+       "      <td>current weather in volcano, ca</td>\n",
+       "      <td>114414</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>[Hippocrates]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
+       "      <td>____________________ is considered the father ...</td>\n",
+       "      <td>9083</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[Hippocrates is considered the father of moder...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>[120 days from the date of the Note.]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>how many days is an appraisal good for a fanni...</td>\n",
+       "      <td>281439</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[An appraisal is good for 120 days from the da...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>[From $26,000 to $39,000 a year]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>average pharmacy tech salary</td>\n",
+       "      <td>40287</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The average salary for a pharmacy technician ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>495</th>\n",
+       "      <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...</td>\n",
+       "      <td>the pool shower company</td>\n",
+       "      <td>518269</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The Pool Shower, Inc. is a Georgia Domestic P...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>496</th>\n",
+       "      <td>[Hanson]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>longest tenured american football players</td>\n",
+       "      <td>442806</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[Hanson is the longest tenured American footba...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>497</th>\n",
+       "      <td>[Mount Able Baptist Church is located at the a...</td>\n",
+       "      <td>{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...</td>\n",
+       "      <td>mt. view baptist in pendleton sc</td>\n",
+       "      <td>460250</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[Mount Able Baptist Church is located at the a...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>498</th>\n",
+       "      <td>[Honeysuckle Weeks]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>what actress disappeared for a while</td>\n",
+       "      <td>549739</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The actress disappeared for a while Honeysuck...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>499</th>\n",
+       "      <td>[African-Nguni]</td>\n",
+       "      <td>{'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>what ethnicity is the surname sabol</td>\n",
+       "      <td>658265</td>\n",
+       "      <td>PERSON</td>\n",
+       "      <td>[The ethnicity of the surname Sabol is African...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>500 rows × 6 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               answers  \\\n",
+       "0                                              [2,662]   \n",
+       "1    [The Volcano forecast for Apr 12 is 52 degrees...   \n",
+       "2                                        [Hippocrates]   \n",
+       "3                [120 days from the date of the Note.]   \n",
+       "4                     [From $26,000 to $39,000 a year]   \n",
+       "..                                                 ...   \n",
+       "495  [The Pool Shower, Inc. is a Georgia Domestic P...   \n",
+       "496                                           [Hanson]   \n",
+       "497  [Mount Able Baptist Church is located at the a...   \n",
+       "498                                [Honeysuckle Weeks]   \n",
+       "499                                    [African-Nguni]   \n",
+       "\n",
+       "                                              passages  \\\n",
+       "0    {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...   \n",
+       "1    {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...   \n",
+       "2    {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...   \n",
+       "3    {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "4    {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "..                                                 ...   \n",
+       "495  {'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...   \n",
+       "496  {'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...   \n",
+       "497  {'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...   \n",
+       "498  {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...   \n",
+       "499  {'is_selected': [0, 0, 1, 0, 0, 0, 0, 0], 'pas...   \n",
+       "\n",
+       "                                                 query  query_id   query_type  \\\n",
+       "0                                 albany mn population     15177      NUMERIC   \n",
+       "1                       current weather in volcano, ca    114414  DESCRIPTION   \n",
+       "2    ____________________ is considered the father ...      9083  DESCRIPTION   \n",
+       "3    how many days is an appraisal good for a fanni...    281439      NUMERIC   \n",
+       "4                         average pharmacy tech salary     40287      NUMERIC   \n",
+       "..                                                 ...       ...          ...   \n",
+       "495                            the pool shower company    518269       PERSON   \n",
+       "496          longest tenured american football players    442806       PERSON   \n",
+       "497                   mt. view baptist in pendleton sc    460250       PERSON   \n",
+       "498               what actress disappeared for a while    549739       PERSON   \n",
+       "499                what ethnicity is the surname sabol    658265       PERSON   \n",
+       "\n",
+       "                                     wellFormedAnswers  \n",
+       "0     [The population of Albany, Minnesota is 2,662. ]  \n",
+       "1    [The Volcano forecast for Apr 12 is 52 degrees...  \n",
+       "2    [Hippocrates is considered the father of moder...  \n",
+       "3    [An appraisal is good for 120 days from the da...  \n",
+       "4    [The average salary for a pharmacy technician ...  \n",
+       "..                                                 ...  \n",
+       "495  [The Pool Shower, Inc. is a Georgia Domestic P...  \n",
+       "496  [Hanson is the longest tenured American footba...  \n",
+       "497  [Mount Able Baptist Church is located at the a...  \n",
+       "498  [The actress disappeared for a while Honeysuck...  \n",
+       "499  [The ethnicity of the surname Sabol is African...  \n",
+       "\n",
+       "[500 rows x 6 columns]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "new_ds.to_pandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "89494c3d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import evaluate\n",
+    "\n",
+    "bleu = evaluate.load(\"bleu\")\n",
+    "rouge = evaluate.load(\"rouge\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "24a818ba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def calc_metrics(ds):\n",
+    "    predictions = [ds[i][\"answers\"][0] for i in range(ds.num_rows)]\n",
+    "    references = [ds[i][\"wellFormedAnswers\"][0] for i in range(ds.num_rows)]\n",
+    "    bleu_scores = bleu.compute(predictions=predictions, references=references)\n",
+    "    rouge_scores = rouge.compute(predictions=predictions, references=references)\n",
+    "    return {\"bleu_scores\": bleu_scores, \"rouge_scores\": rouge_scores}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "e447aa08",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'bleu_scores': {'bleu': 0.5842479720128682,\n",
+       "  'precisions': [0.7814257485940113,\n",
+       "   0.7185392334265505,\n",
+       "   0.6801561945331913,\n",
+       "   0.6543700340522134],\n",
+       "  'brevity_penalty': 0.8263321448047812,\n",
+       "  'length_ratio': 0.8398008680112331,\n",
+       "  'translation_length': 6579,\n",
+       "  'reference_length': 7834},\n",
+       " 'rouge_scores': {'rouge1': 0.6301946495853493,\n",
+       "  'rouge2': 0.5266427189500504,\n",
+       "  'rougeL': 0.623467453115133,\n",
+       "  'rougeLsum': 0.6239164817179192}}"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "calc_metrics(new_ds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "b29d1f3e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def calc_all_metrics(ds):\n",
+    "    result = {}\n",
+    "    result[\"OVERALL\"] = calc_metrics(ds)\n",
+    "    for query_type in indices:\n",
+    "        result[query_type] = calc_metrics(\n",
+    "            ds.filter(lambda example: example[\"query_type\"] == query_type)\n",
+    "        )\n",
+    "\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "1a4273da",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'OVERALL': {'bleu_scores': {'bleu': 0.5842479720128682,\n",
+       "   'precisions': [0.7814257485940113,\n",
+       "    0.7185392334265505,\n",
+       "    0.6801561945331913,\n",
+       "    0.6543700340522134],\n",
+       "   'brevity_penalty': 0.8263321448047812,\n",
+       "   'length_ratio': 0.8398008680112331,\n",
+       "   'translation_length': 6579,\n",
+       "   'reference_length': 7834},\n",
+       "  'rouge_scores': {'rouge1': 0.6301946495853493,\n",
+       "   'rouge2': 0.5266427189500504,\n",
+       "   'rougeL': 0.623467453115133,\n",
+       "   'rougeLsum': 0.6239164817179192}},\n",
+       " 'NUMERIC': {'bleu_scores': {'bleu': 0.3589193328591513,\n",
+       "   'precisions': [0.7536764705882353,\n",
+       "    0.6494413407821229,\n",
+       "    0.5884244372990354,\n",
+       "    0.5657657657657658],\n",
+       "   'brevity_penalty': 0.5649158870633492,\n",
+       "   'length_ratio': 0.6365054602184087,\n",
+       "   'translation_length': 816,\n",
+       "   'reference_length': 1282},\n",
+       "  'rouge_scores': {'rouge1': 0.5569863096088544,\n",
+       "   'rouge2': 0.4262959859853511,\n",
+       "   'rougeL': 0.5495190228731732,\n",
+       "   'rougeLsum': 0.5502805905003136}},\n",
+       " 'DESCRIPTION': {'bleu_scores': {'bleu': 0.7521919521555381,\n",
+       "   'precisions': [0.8093238135237295,\n",
+       "    0.761946514686541,\n",
+       "    0.7335164835164835,\n",
+       "    0.7077144226161955],\n",
+       "   'brevity_penalty': 1.0,\n",
+       "   'length_ratio': 1.0778632865550022,\n",
+       "   'translation_length': 2381,\n",
+       "   'reference_length': 2209},\n",
+       "  'rouge_scores': {'rouge1': 0.8503571429521525,\n",
+       "   'rouge2': 0.8009206345153658,\n",
+       "   'rougeL': 0.8406066569954856,\n",
+       "   'rougeLsum': 0.8405710628479812}},\n",
+       " 'ENTITY': {'bleu_scores': {'bleu': 0.5057439480363012,\n",
+       "   'precisions': [0.7135050741608119,\n",
+       "    0.6375952582557155,\n",
+       "    0.5884509624197983,\n",
+       "    0.5555555555555556],\n",
+       "   'brevity_penalty': 0.8143961563151505,\n",
+       "   'length_ratio': 0.8296632124352331,\n",
+       "   'translation_length': 1281,\n",
+       "   'reference_length': 1544},\n",
+       "  'rouge_scores': {'rouge1': 0.5877667231458372,\n",
+       "   'rouge2': 0.48898551862814277,\n",
+       "   'rougeL': 0.5796676511145928,\n",
+       "   'rougeLsum': 0.5784518864116339}},\n",
+       " 'LOCATION': {'bleu_scores': {'bleu': 0.4167786604147962,\n",
+       "   'precisions': [0.8600583090379009,\n",
+       "    0.7986348122866894,\n",
+       "    0.7573385518590998,\n",
+       "    0.7414529914529915],\n",
+       "   'brevity_penalty': 0.5288627994571649,\n",
+       "   'length_ratio': 0.6108637577916296,\n",
+       "   'translation_length': 686,\n",
+       "   'reference_length': 1123},\n",
+       "  'rouge_scores': {'rouge1': 0.5405464995752973,\n",
+       "   'rouge2': 0.3950940848806123,\n",
+       "   'rougeL': 0.5400724136440879,\n",
+       "   'rougeLsum': 0.5389556394979822}},\n",
+       " 'PERSON': {'bleu_scores': {'bleu': 0.5861084149356606,\n",
+       "   'precisions': [0.773851590106007,\n",
+       "    0.7178707224334601,\n",
+       "    0.6810766721044046,\n",
+       "    0.6522864538395168],\n",
+       "   'brevity_penalty': 0.8315596069910627,\n",
+       "   'length_ratio': 0.844272076372315,\n",
+       "   'translation_length': 1415,\n",
+       "   'reference_length': 1676},\n",
+       "  'rouge_scores': {'rouge1': 0.6119770025611677,\n",
+       "   'rouge2': 0.522853938087197,\n",
+       "   'rougeL': 0.6096713664231095,\n",
+       "   'rougeLsum': 0.6103086543984155}}}"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "calc_all_metrics(new_ds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "3698be27",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loading env vars from: /Users/inflaton/code/emtech/gpt/Llama-2-eval/.env\n",
+      "App init started at 2023-10-10 12:04:33.775140\n",
+      "Running on:  macOS-14.0-arm64-arm-64bit\n",
+      "MPS is  available\n",
+      "CUDA is NOT available\n",
+      "hf_embeddings_device_type: mps\n",
+      "hf_pipeline_device_type: mps\n",
+      "initializing LLM: openai\n",
+      "  hf_pipeline_device_type: mps\n",
+      "     load_quantized_model: None\n",
+      "              torch_dtype: torch.float32\n",
+      "                 n_threds: 24\n",
+      "              using model: gpt-3.5-turbo\n",
+      "initialization complete\n",
+      "App init completed in 0.167s\n"
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "\n",
+    "os.environ[\"TEST_FIRST_5\"] = \"true\"\n",
+    "os.environ[\"LANGCHAIN_DEBUG\"] = \"true\"\n",
+    "\n",
+    "from pathlib import Path\n",
+    "\n",
+    "sys.path.append(str(Path.cwd().parent))\n",
+    "\n",
+    "from evaluate_llm_ms_macro import (\n",
+    "    QAChainWithMsMacroDataset,\n",
+    "    llm_loader,\n",
+    "    calc_all_metrics,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "2395804d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'question': 'albany mn population', 'chat_history': []}\n",
+      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
+      "\u001b[0m{\n",
+      "  \"question\": \"albany mn population\",\n",
+      "  \"chat_history\": []\n",
+      "}\n",
+      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
+      "\u001b[0m[inputs]\n",
+      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
+      "\u001b[0m{\n",
+      "  \"question\": \"albany mn population\",\n",
+      "  \"context\": \"City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\\n\\nPlace of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\\n\\nFor the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nAlbany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\\n\\nSponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nRecent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\\n\\nFor population 25 years and over in 56307: 1  High school or higher: 87.4%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\\n\\nFor population 25 years and over in Albany: 1  High school or higher: 86.7%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\"\n",
+      "}\n",
+      "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
+      "\u001b[0m{\n",
+      "  \"prompts\": [\n",
+      "    \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nCity of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\\n\\nPlace of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\\n\\nFor the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nAlbany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\\n\\nSponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nRecent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\\n\\nFor population 25 years and over in 56307: 1  High school or higher: 87.4%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\\n\\nFor population 25 years and over in Albany: 1  High school or higher: 86.7%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\\nHuman: albany mn population\"\n",
+      "  ]\n",
+      "}\n",
+      "The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [3.23s] Exiting LLM run with output:\n",
+      "\u001b[0m{\n",
+      "  \"generations\": [\n",
+      "    [\n",
+      "      {\n",
+      "        \"text\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\",\n",
+      "        \"generation_info\": {\n",
+      "          \"finish_reason\": \"stop\"\n",
+      "        },\n",
+      "        \"message\": {\n",
+      "          \"lc\": 1,\n",
+      "          \"type\": \"constructor\",\n",
+      "          \"id\": [\n",
+      "            \"langchain\",\n",
+      "            \"schema\",\n",
+      "            \"messages\",\n",
+      "            \"AIMessageChunk\"\n",
+      "          ],\n",
+      "          \"kwargs\": {\n",
+      "            \"example\": false,\n",
+      "            \"content\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\",\n",
+      "            \"additional_kwargs\": {}\n",
+      "          }\n",
+      "        }\n",
+      "      }\n",
+      "    ]\n",
+      "  ],\n",
+      "  \"llm_output\": null,\n",
+      "  \"run\": null\n",
+      "}\n",
+      "\n",
+      "\n",
+      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [3.23s] Exiting Chain run with output:\n",
+      "\u001b[0m{\n",
+      "  \"text\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\"\n",
+      "}\n",
+      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [3.23s] Exiting Chain run with output:\n",
+      "\u001b[0m{\n",
+      "  \"output_text\": \"The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\"\n",
+      "}\n",
+      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [3.46s] Exiting Chain run with output:\n",
+      "\u001b[0m[outputs]\n",
+      "{'question': 'current weather in volcano, ca', 'chat_history': []}\n",
+      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
+      "\u001b[0m{\n",
+      "  \"question\": \"current weather in volcano, ca\",\n",
+      "  \"chat_history\": []\n",
+      "}\n",
+      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
+      "\u001b[0m[inputs]\n",
+      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
+      "\u001b[0m{\n",
+      "  \"question\": \"current weather in volcano, ca\",\n",
+      "  \"context\": \"Volcano 10 Day Weather. Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny.\\n\\nCurrent U.S. National Radar--Current. The Current National Weather Radar is shown below with a UTC Time (subtract 5 hours from UTC to get Eastern Time). National Weather Forecast--Current. The Current National Weather Forecast and National Weather Map are shown below.\\n\\nVolcano 10 Day Weather. 1  Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. 2  Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. There is 49 percentage chance of rain and 3 mph winds from the Southwest.\\n\\nVolcano, CA Weather Data. 1  Volcano, CA Current Weather Data. 2  Sponsored. 3  Volcano, CA Historical Weather Trends.  Volcano, CA area 1  Highlights. Volcano, CA Chance of Sunshine.  Volcano, CA Historical 1  Temperature. Volcano, CA Rainfall and Snowfall Average.  Volcano, CA Energy Demand.\\n\\nVolcano Weather. Volcano weather and daily current conditions with summary and 5 Day forecast including humidity, precipitation, high and low temperatures presented in Fahrenheit and Celsius, barometric pressure, heat index, wind chill, hourly forecast, sunrise, sunset, wind speed with direction, and more.\\n\\nHourly Forecast Detailed. 1  0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. There is 83 percentage chance of rain and 2 mph winds from the East. 2  3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. There is 77 percentage chance of rain and 2 mph winds from the East.\\n\\nVolcano 7 Day Weather. 1  Monday:The Volcano forecast for Apr 03 is 58 degrees and Sunny. There is 34 percentage chance of rain and 5 mph winds from the West. 2  Tuesday:The Volcano forecast for Apr 04 is 59 degrees and Sunny. There is 33 percentage chance of rain and 5 mph winds from the West-Southwest.\\n\\nVolcano 10 Day Weather. 1  Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. 2  Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. 3  Tuesday:The Volcano forecast for Apr 11 is 49 degrees and Patchy rain possible.  Wednesday:The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.\\n\\nVolcano, CA weather and traffic updates by locals. Write your own weather report, forecast, or traffic update: Please note by clicking on Post you acknowledge that you have read the Terms of Service and the report and/or forecast you are posting is in compliance with such terms. Be respectful.\\n\\nHourly Forecast Detailed. 1  0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. 2  3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. 3  6am:The Volcano, CA forecast for Apr 03 is 41 degrees and Clear.  9am:The Volcano, CA forecast for Apr 03 is 48 degrees and Sunny.\"\n",
+      "}\n",
+      "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
+      "\u001b[0m{\n",
+      "  \"prompts\": [\n",
+      "    \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nVolcano 10 Day Weather. Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny.\\n\\nCurrent U.S. National Radar--Current. The Current National Weather Radar is shown below with a UTC Time (subtract 5 hours from UTC to get Eastern Time). National Weather Forecast--Current. The Current National Weather Forecast and National Weather Map are shown below.\\n\\nVolcano 10 Day Weather. 1  Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. 2  Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. There is 49 percentage chance of rain and 3 mph winds from the Southwest.\\n\\nVolcano, CA Weather Data. 1  Volcano, CA Current Weather Data. 2  Sponsored. 3  Volcano, CA Historical Weather Trends.  Volcano, CA area 1  Highlights. Volcano, CA Chance of Sunshine.  Volcano, CA Historical 1  Temperature. Volcano, CA Rainfall and Snowfall Average.  Volcano, CA Energy Demand.\\n\\nVolcano Weather. Volcano weather and daily current conditions with summary and 5 Day forecast including humidity, precipitation, high and low temperatures presented in Fahrenheit and Celsius, barometric pressure, heat index, wind chill, hourly forecast, sunrise, sunset, wind speed with direction, and more.\\n\\nHourly Forecast Detailed. 1  0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. There is 83 percentage chance of rain and 2 mph winds from the East. 2  3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. There is 77 percentage chance of rain and 2 mph winds from the East.\\n\\nVolcano 7 Day Weather. 1  Monday:The Volcano forecast for Apr 03 is 58 degrees and Sunny. There is 34 percentage chance of rain and 5 mph winds from the West. 2  Tuesday:The Volcano forecast for Apr 04 is 59 degrees and Sunny. There is 33 percentage chance of rain and 5 mph winds from the West-Southwest.\\n\\nVolcano 10 Day Weather. 1  Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. 2  Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. 3  Tuesday:The Volcano forecast for Apr 11 is 49 degrees and Patchy rain possible.  Wednesday:The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.\\n\\nVolcano, CA weather and traffic updates by locals. Write your own weather report, forecast, or traffic update: Please note by clicking on Post you acknowledge that you have read the Terms of Service and the report and/or forecast you are posting is in compliance with such terms. Be respectful.\\n\\nHourly Forecast Detailed. 1  0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. 2  3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. 3  6am:The Volcano, CA forecast for Apr 03 is 41 degrees and Clear.  9am:The Volcano, CA forecast for Apr 03 is 48 degrees and Sunny.\\nHuman: current weather in volcano, ca\"\n",
+      "  ]\n",
+      "}\n",
+      "I don't have the current weather information for Volcano, CA.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.04s] Exiting LLM run with output:\n",
+      "\u001b[0m{\n",
+      "  \"generations\": [\n",
+      "    [\n",
+      "      {\n",
+      "        \"text\": \"I don't have the current weather information for Volcano, CA.\",\n",
+      "        \"generation_info\": {\n",
+      "          \"finish_reason\": \"stop\"\n",
+      "        },\n",
+      "        \"message\": {\n",
+      "          \"lc\": 1,\n",
+      "          \"type\": \"constructor\",\n",
+      "          \"id\": [\n",
+      "            \"langchain\",\n",
+      "            \"schema\",\n",
+      "            \"messages\",\n",
+      "            \"AIMessageChunk\"\n",
+      "          ],\n",
+      "          \"kwargs\": {\n",
+      "            \"example\": false,\n",
+      "            \"content\": \"I don't have the current weather information for Volcano, CA.\",\n",
+      "            \"additional_kwargs\": {}\n",
+      "          }\n",
+      "        }\n",
+      "      }\n",
+      "    ]\n",
+      "  ],\n",
+      "  \"llm_output\": null,\n",
+      "  \"run\": null\n",
+      "}\n",
+      "\n",
+      "\n",
+      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [1.04s] Exiting Chain run with output:\n",
+      "\u001b[0m{\n",
+      "  \"text\": \"I don't have the current weather information for Volcano, CA.\"\n",
+      "}\n",
+      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [1.04s] Exiting Chain run with output:\n",
+      "\u001b[0m{\n",
+      "  \"output_text\": \"I don't have the current weather information for Volcano, CA.\"\n",
+      "}\n",
+      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [1.04s] Exiting Chain run with output:\n",
+      "\u001b[0m[outputs]\n",
+      "{'question': '____________________ is considered the father of modern medicine.', 'chat_history': []}\n",
+      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
+      "\u001b[0m{\n",
+      "  \"question\": \"____________________ is considered the father of modern medicine.\",\n",
+      "  \"chat_history\": []\n",
+      "}\n",
+      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
+      "\u001b[0m[inputs]\n",
+      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
+      "\u001b[0m{\n",
+      "  \"question\": \"____________________ is considered the father of modern medicine.\",\n",
+      "  \"context\": \"Hippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\\n\\nMany of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\\n\\nDespite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\\n\\nAt least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\\n\\n460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\\n\\nTRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \\n\\nThe two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\\n\\nHippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\\n\\nReload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\\n\\nHowever, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\"\n",
+      "}\n",
+      "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
+      "\u001b[0m{\n",
+      "  \"prompts\": [\n",
+      "    \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nHippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\\n\\nMany of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\\n\\nDespite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\\n\\nAt least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\\n\\n460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\\n\\nTRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \\n\\nThe two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\\n\\nHippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\\n\\nReload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\\n\\nHowever, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\\nHuman: ____________________ is considered the father of modern medicine.\"\n",
+      "  ]\n",
+      "}\n",
+      "Hippocrates is considered the father of modern medicine.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [654ms] Exiting LLM run with output:\n",
+      "\u001b[0m{\n",
+      "  \"generations\": [\n",
+      "    [\n",
+      "      {\n",
+      "        \"text\": \"Hippocrates is considered the father of modern medicine.\",\n",
+      "        \"generation_info\": {\n",
+      "          \"finish_reason\": \"stop\"\n",
+      "        },\n",
+      "        \"message\": {\n",
+      "          \"lc\": 1,\n",
+      "          \"type\": \"constructor\",\n",
+      "          \"id\": [\n",
+      "            \"langchain\",\n",
+      "            \"schema\",\n",
+      "            \"messages\",\n",
+      "            \"AIMessageChunk\"\n",
+      "          ],\n",
+      "          \"kwargs\": {\n",
+      "            \"example\": false,\n",
+      "            \"content\": \"Hippocrates is considered the father of modern medicine.\",\n",
+      "            \"additional_kwargs\": {}\n",
+      "          }\n",
+      "        }\n",
+      "      }\n",
+      "    ]\n",
+      "  ],\n",
+      "  \"llm_output\": null,\n",
+      "  \"run\": null\n",
+      "}\n",
+      "\n",
+      "\n",
+      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [655ms] Exiting Chain run with output:\n",
+      "\u001b[0m{\n",
+      "  \"text\": \"Hippocrates is considered the father of modern medicine.\"\n",
+      "}\n",
+      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [655ms] Exiting Chain run with output:\n",
+      "\u001b[0m{\n",
+      "  \"output_text\": \"Hippocrates is considered the father of modern medicine.\"\n",
+      "}\n",
+      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [657ms] Exiting Chain run with output:\n",
+      "\u001b[0m[outputs]\n",
+      "{'question': 'how many days is an appraisal good for a fannie loan', 'chat_history': []}\n",
+      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
+      "\u001b[0m{\n",
+      "  \"question\": \"how many days is an appraisal good for a fannie loan\",\n",
+      "  \"chat_history\": []\n",
+      "}\n",
+      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
+      "\u001b[0m[inputs]\n",
+      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
+      "\u001b[0m{\n",
+      "  \"question\": \"how many days is an appraisal good for a fannie loan\",\n",
+      "  \"context\": \"New and Updated Underwriting and Eligibility Policies. Age of Credit Documents Selling Guide, B1-1-04, Allowable Age of Credit. Documents. The maximum age of credit documents is reduced from 120 days to 90 days for existing. construction and from 180 days to 120 days for new construction. Credit documents include. credit reports and employment, income, and asset documentation. The age of the documents is.\\n\\nIn no case may the appraisal be dated more than 1 year prior to the date of the Note. Property Inspection Reports/Condition and Marketability Reports (Fannie Mae Form 2070/Freddie Mac Form. 2075 may be dated no earlier than 120 days from the date of the Note. Continued on next page.\\n\\nFannie Mae will allow the use of an origination appraisal for a subsequent transaction if the following requirements are met: 1  The subsequent transaction may only be a Limited Cash-Out Refinance. 2  The appraisal report must not be more than 12 months old on the note date of the subsequent transaction.\\n\\nThe subsequent transaction may only be a Limited Cash-Out Refinance. The appraisal report must not be more than 12 months old on the note date of the subsequent transaction. If the appraisal report is greater than 4 months old on the date of the note and mortgage, then an appraisal update is required.\\n\\nIf they were sold with exposure to the market, listed in. MLS they should be considered. How long is the FHA case # good for (not the appraisal, but the actual case #)? the case number is valid for 6 months unless the appraiser expires prior to the 6 month time frame.\\n\\nNo the borrower can only pay for one appraisal. Your question about Comps is not acceptable, Comps over 1 year old for comps 1-3 are not. acceptable, but supporting comps are with an adequate explanation from the Appraiser. Comps, over one year old would be acceptable, onlywith a waiver request by the lender.\\n\\nThe appraisal may be dated no earlier than 120 days from the date of the Note, regardless of whether the. property was appraised as proposed or existing construction. When the appraisal will be more than 120 days old but less than 1 year old on the date of the Note, the.\\n\\nThis inspection and results of the analysis must be reported on the Appraisal Update and/or Completion Report (Form 1004D). 1  If the appraiser indicates on the Form 1004D that the property value has declined, then the lender must obtain a new appraisal for the property.\\n\\nUnfortunately, that is a complete new order (and expense), as one year is a lifetime where property values are concerned. Thanks for the information, however I asked for the PMi to be removed just over 3 months after the appraisal. In fact, the manager at Nationstar said it was 91 days and not valid per Fannie Mae.\\n\\nReputation: 6463. Actually, Fannie Mae and FHA went to 120 days, but at no time were they ever over 6 months for existing construction. Generally, comparables from August 2009 could not be included on an appraisal report today, so there is no way an appraisal issued then would be acceptable.\"\n",
+      "}\n",
+      "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
+      "\u001b[0m{\n",
+      "  \"prompts\": [\n",
+      "    \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nNew and Updated Underwriting and Eligibility Policies. Age of Credit Documents Selling Guide, B1-1-04, Allowable Age of Credit. Documents. The maximum age of credit documents is reduced from 120 days to 90 days for existing. construction and from 180 days to 120 days for new construction. Credit documents include. credit reports and employment, income, and asset documentation. The age of the documents is.\\n\\nIn no case may the appraisal be dated more than 1 year prior to the date of the Note. Property Inspection Reports/Condition and Marketability Reports (Fannie Mae Form 2070/Freddie Mac Form. 2075 may be dated no earlier than 120 days from the date of the Note. Continued on next page.\\n\\nFannie Mae will allow the use of an origination appraisal for a subsequent transaction if the following requirements are met: 1  The subsequent transaction may only be a Limited Cash-Out Refinance. 2  The appraisal report must not be more than 12 months old on the note date of the subsequent transaction.\\n\\nThe subsequent transaction may only be a Limited Cash-Out Refinance. The appraisal report must not be more than 12 months old on the note date of the subsequent transaction. If the appraisal report is greater than 4 months old on the date of the note and mortgage, then an appraisal update is required.\\n\\nIf they were sold with exposure to the market, listed in. MLS they should be considered. How long is the FHA case # good for (not the appraisal, but the actual case #)? the case number is valid for 6 months unless the appraiser expires prior to the 6 month time frame.\\n\\nNo the borrower can only pay for one appraisal. Your question about Comps is not acceptable, Comps over 1 year old for comps 1-3 are not. acceptable, but supporting comps are with an adequate explanation from the Appraiser. Comps, over one year old would be acceptable, onlywith a waiver request by the lender.\\n\\nThe appraisal may be dated no earlier than 120 days from the date of the Note, regardless of whether the. property was appraised as proposed or existing construction. When the appraisal will be more than 120 days old but less than 1 year old on the date of the Note, the.\\n\\nThis inspection and results of the analysis must be reported on the Appraisal Update and/or Completion Report (Form 1004D). 1  If the appraiser indicates on the Form 1004D that the property value has declined, then the lender must obtain a new appraisal for the property.\\n\\nUnfortunately, that is a complete new order (and expense), as one year is a lifetime where property values are concerned. Thanks for the information, however I asked for the PMi to be removed just over 3 months after the appraisal. In fact, the manager at Nationstar said it was 91 days and not valid per Fannie Mae.\\n\\nReputation: 6463. Actually, Fannie Mae and FHA went to 120 days, but at no time were they ever over 6 months for existing construction. Generally, comparables from August 2009 could not be included on an appraisal report today, so there is no way an appraisal issued then would be acceptable.\\nHuman: how many days is an appraisal good for a fannie loan\"\n",
+      "  ]\n",
+      "}\n",
+      "According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.02s] Exiting LLM run with output:\n",
+      "\u001b[0m{\n",
+      "  \"generations\": [\n",
+      "    [\n",
+      "      {\n",
+      "        \"text\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\",\n",
+      "        \"generation_info\": {\n",
+      "          \"finish_reason\": \"stop\"\n",
+      "        },\n",
+      "        \"message\": {\n",
+      "          \"lc\": 1,\n",
+      "          \"type\": \"constructor\",\n",
+      "          \"id\": [\n",
+      "            \"langchain\",\n",
+      "            \"schema\",\n",
+      "            \"messages\",\n",
+      "            \"AIMessageChunk\"\n",
+      "          ],\n",
+      "          \"kwargs\": {\n",
+      "            \"example\": false,\n",
+      "            \"content\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\",\n",
+      "            \"additional_kwargs\": {}\n",
+      "          }\n",
+      "        }\n",
+      "      }\n",
+      "    ]\n",
+      "  ],\n",
+      "  \"llm_output\": null,\n",
+      "  \"run\": null\n",
+      "}\n",
+      "\n",
+      "\n",
+      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [1.02s] Exiting Chain run with output:\n",
+      "\u001b[0m{\n",
+      "  \"text\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\"\n",
+      "}\n",
+      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [1.02s] Exiting Chain run with output:\n",
+      "\u001b[0m{\n",
+      "  \"output_text\": \"According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\"\n",
+      "}\n",
+      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [1.02s] Exiting Chain run with output:\n",
+      "\u001b[0m[outputs]\n",
+      "{'question': 'average pharmacy tech salary', 'chat_history': []}\n",
+      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:\n",
+      "\u001b[0m{\n",
+      "  \"question\": \"average pharmacy tech salary\",\n",
+      "  \"chat_history\": []\n",
+      "}\n",
+      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:\n",
+      "\u001b[0m[inputs]\n",
+      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
+      "\u001b[0m{\n",
+      "  \"question\": \"average pharmacy tech salary\",\n",
+      "  \"context\": \"If you are interested in becoming a pharmacy technician, you’re choosing a career that is in high demand. According to the U.S. Bureau of Labor Statistics (BLS), the career growth is expected to be “much faster than average”, with an employment increase of 32% predicted in the decade spanning 2010 to 2020*.\\n\\nWhat can a pharmacy technician really expect to earn in today’s economy? According to Salary.com, pharmacy technicians make anywhere from $26,000 to $39,000 a year, though most make around $32,000 annually. California has the highest average pharmacy technician wage, at $34,317, according to Open Farm Tech’s website.\\n\\nThe median annual wage for pharmacy technicians was $30,410 in May 2015. Employment of pharmacy technicians is projected to grow 9 percent from 2014 to 2024, faster than the average for all occupations. Increased demand for prescription medications will lead to more demand for pharmaceutical services.\\n\\nThe majority of pharmacy techs work in drug stores and hospitals, where the average annual salary was $28,940 and $34,410, respectively**. However, a higher salary can be had if you can find employment with outpatient care centers or physicians’ offices, where the annual pay is in the $37,000-$39,000 range.\\n\\nThe pharmacy technician salary** depends on a number of factors, from the area and type of employer, to your educational background. Browse pharmacy tech pay for a comparison between similar careers, geographic location, educational and certification requirements, and more.\\n\\nPharmacy Technician Salary. A Pharmacy Technician earns an average wage of $12.68 per hour. The skills that increase pay for this job the most are Mail Order Pharmacy and Long Term Care. People in this job generally don't have more than 20 years' experience. $18,722 - $48,714.\\n\\nPopular Companies. * Please note that all salary figures are approximations based upon third party submissions to Simply Hired. These figures are given to Simply Hired users for the purpose of generalized comparison only. Minimum wage may differ by jurisdiction and you should consult the employer for actual salary figures.\\n\\nPharmacy Technician average salary is $30,288, median salary is $30,534 with a salary range from $21,570 to $34,320. Pharmacy Technician salaries are collected from government agencies and companies. Each salary is associated with a real job position. Pharmacy Technician salary statistics is not exclusive and is for reference only.\\n\\nIt also states that pharmacy technicians working in an acute care hospital earn an average salary of $37,000 per year, while those working for the military or a pharmaceutical company earn an average salary of $38,000 per year. This represents a difference of more than $10,000, simply due to the health care setting.\\n\\nOccupational Employment and Wages, May 2016. 29-2052 Pharmacy Technicians. Prepare medications under the direction of a pharmacist. May measure, mix, count out, label, and record amounts and dosages of medications according to prescription orders. National estimates for this occupation. Industry profile for this occupation.\"\n",
+      "}\n",
+      "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] Entering LLM run with input:\n",
+      "\u001b[0m{\n",
+      "  \"prompts\": [\n",
+      "    \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nIf you are interested in becoming a pharmacy technician, you’re choosing a career that is in high demand. According to the U.S. Bureau of Labor Statistics (BLS), the career growth is expected to be “much faster than average”, with an employment increase of 32% predicted in the decade spanning 2010 to 2020*.\\n\\nWhat can a pharmacy technician really expect to earn in today’s economy? According to Salary.com, pharmacy technicians make anywhere from $26,000 to $39,000 a year, though most make around $32,000 annually. California has the highest average pharmacy technician wage, at $34,317, according to Open Farm Tech’s website.\\n\\nThe median annual wage for pharmacy technicians was $30,410 in May 2015. Employment of pharmacy technicians is projected to grow 9 percent from 2014 to 2024, faster than the average for all occupations. Increased demand for prescription medications will lead to more demand for pharmaceutical services.\\n\\nThe majority of pharmacy techs work in drug stores and hospitals, where the average annual salary was $28,940 and $34,410, respectively**. However, a higher salary can be had if you can find employment with outpatient care centers or physicians’ offices, where the annual pay is in the $37,000-$39,000 range.\\n\\nThe pharmacy technician salary** depends on a number of factors, from the area and type of employer, to your educational background. Browse pharmacy tech pay for a comparison between similar careers, geographic location, educational and certification requirements, and more.\\n\\nPharmacy Technician Salary. A Pharmacy Technician earns an average wage of $12.68 per hour. The skills that increase pay for this job the most are Mail Order Pharmacy and Long Term Care. People in this job generally don't have more than 20 years' experience. $18,722 - $48,714.\\n\\nPopular Companies. * Please note that all salary figures are approximations based upon third party submissions to Simply Hired. These figures are given to Simply Hired users for the purpose of generalized comparison only. Minimum wage may differ by jurisdiction and you should consult the employer for actual salary figures.\\n\\nPharmacy Technician average salary is $30,288, median salary is $30,534 with a salary range from $21,570 to $34,320. Pharmacy Technician salaries are collected from government agencies and companies. Each salary is associated with a real job position. Pharmacy Technician salary statistics is not exclusive and is for reference only.\\n\\nIt also states that pharmacy technicians working in an acute care hospital earn an average salary of $37,000 per year, while those working for the military or a pharmaceutical company earn an average salary of $38,000 per year. This represents a difference of more than $10,000, simply due to the health care setting.\\n\\nOccupational Employment and Wages, May 2016. 29-2052 Pharmacy Technicians. Prepare medications under the direction of a pharmacist. May measure, mix, count out, label, and record amounts and dosages of medications according to prescription orders. National estimates for this occupation. Industry profile for this occupation.\\nHuman: average pharmacy tech salary\"\n",
+      "  ]\n",
+      "}\n",
+      "The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.45s] Exiting LLM run with output:\n",
+      "\u001b[0m{\n",
+      "  \"generations\": [\n",
+      "    [\n",
+      "      {\n",
+      "        \"text\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\",\n",
+      "        \"generation_info\": {\n",
+      "          \"finish_reason\": \"stop\"\n",
+      "        },\n",
+      "        \"message\": {\n",
+      "          \"lc\": 1,\n",
+      "          \"type\": \"constructor\",\n",
+      "          \"id\": [\n",
+      "            \"langchain\",\n",
+      "            \"schema\",\n",
+      "            \"messages\",\n",
+      "            \"AIMessageChunk\"\n",
+      "          ],\n",
+      "          \"kwargs\": {\n",
+      "            \"example\": false,\n",
+      "            \"content\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\",\n",
+      "            \"additional_kwargs\": {}\n",
+      "          }\n",
+      "        }\n",
+      "      }\n",
+      "    ]\n",
+      "  ],\n",
+      "  \"llm_output\": null,\n",
+      "  \"run\": null\n",
+      "}\n",
+      "\n",
+      "\n",
+      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] [1.45s] Exiting Chain run with output:\n",
+      "\u001b[0m{\n",
+      "  \"text\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\"\n",
+      "}\n",
+      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] [1.46s] Exiting Chain run with output:\n",
+      "\u001b[0m{\n",
+      "  \"output_text\": \"The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\"\n",
+      "}\n",
+      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:ConversationalRetrievalChain] [1.46s] Exiting Chain run with output:\n",
+      "\u001b[0m[outputs]\n",
+      "Q-001: albany mn population\n",
+      "A-001: The population of Albany, Minnesota is approximately 2,561 as of the 2010 census. However, according to a 2017 US Census estimate, the community population has increased to 2,662 people.\n",
+      "G-001: The population of Albany, Minnesota is 2,662. \n",
+      "\n",
+      "Q-002: current weather in volcano, ca\n",
+      "A-002: I don't have the current weather information for Volcano, CA.\n",
+      "G-002: The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.\n",
+      "\n",
+      "Q-003: ____________________ is considered the father of modern medicine.\n",
+      "A-003: Hippocrates is considered the father of modern medicine.\n",
+      "G-003: Hippocrates is considered the father of modern medicine.\n",
+      "\n",
+      "Q-004: how many days is an appraisal good for a fannie loan\n",
+      "A-004: According to the provided information, an appraisal for a Fannie Mae loan is typically valid for up to 120 days from the date of the Note.\n",
+      "G-004: An appraisal is good for 120 days from the date of the Note for a Fannie loan.\n",
+      "\n",
+      "Q-005: average pharmacy tech salary\n",
+      "A-005: The average salary for a pharmacy technician can vary depending on factors such as location, employer, and experience. However, based on the information provided, the average salary for a pharmacy technician is around $30,000 to $34,000 per year.\n",
+      "G-005: The average salary for a pharmacy technician is $26,000 to $39,000 in a year.\n",
+      "\n",
+      "\n",
+      "\n",
+      "scores: {\n",
+      "  \"OVERALL\": {\n",
+      "    \"bleu_scores\": {\n",
+      "      \"bleu\": 0.3953488372093023,\n",
+      "      \"precisions\": [\n",
+      "        0.3953488372093023\n",
+      "      ],\n",
+      "      \"brevity_penalty\": 1.0,\n",
+      "      \"length_ratio\": 1.9253731343283582,\n",
+      "      \"translation_length\": 129,\n",
+      "      \"reference_length\": 67\n",
+      "    },\n",
+      "    \"rouge_scores\": {\n",
+      "      \"rouge1\": 0.5737456342107505,\n",
+      "      \"rouge2\": 0.4160794941282746,\n",
+      "      \"rougeL\": 0.5108953062441435,\n",
+      "      \"rougeLsum\": 0.4989862850327967\n",
+      "    }\n",
+      "  },\n",
+      "  \"NUMERIC\": {\n",
+      "    \"bleu_scores\": {\n",
+      "      \"bleu\": 0.36111111111111116,\n",
+      "      \"precisions\": [\n",
+      "        0.3611111111111111\n",
+      "      ],\n",
+      "      \"brevity_penalty\": 1.0,\n",
+      "      \"length_ratio\": 2.4545454545454546,\n",
+      "      \"translation_length\": 108,\n",
+      "      \"reference_length\": 44\n",
+      "    },\n",
+      "    \"rouge_scores\": {\n",
+      "      \"rouge1\": 0.5395760570179174,\n",
+      "      \"rouge2\": 0.3694751662231337,\n",
+      "      \"rougeL\": 0.4656557912371866,\n",
+      "      \"rougeLsum\": 0.4656557912371866\n",
+      "    }\n",
+      "  },\n",
+      "  \"DESCRIPTION\": {\n",
+      "    \"bleu_scores\": {\n",
+      "      \"bleu\": 0.5195179673581217,\n",
+      "      \"precisions\": [\n",
+      "        0.5714285714285714\n",
+      "      ],\n",
+      "      \"brevity_penalty\": 0.909156442876713,\n",
+      "      \"length_ratio\": 0.9130434782608695,\n",
+      "      \"translation_length\": 21,\n",
+      "      \"reference_length\": 23\n",
+      "    },\n",
+      "    \"rouge_scores\": {\n",
+      "      \"rouge1\": 0.625,\n",
+      "      \"rouge2\": 0.5,\n",
+      "      \"rougeL\": 0.5833333333333334,\n",
+      "      \"rougeLsum\": 0.5833333333333334\n",
+      "    }\n",
+      "  }\n",
+      "}\n",
+      "\n",
+      "CPU times: user 512 ms, sys: 63.7 ms, total: 576 ms\n",
+      "Wall time: 7.85 s\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "\n",
+    "eval_ds = new_ds.select(range(5))\n",
+    "qa_chain = QAChainWithMsMacroDataset(eval_ds, llm_loader)\n",
+    "\n",
+    "answers = []\n",
+    "for i in range(eval_ds.num_rows):\n",
+    "    inputs = {\"question\": str(eval_ds[i][\"query\"]), \"chat_history\": []}\n",
+    "    result = qa_chain.call_chain(\n",
+    "        inputs,\n",
+    "        None,\n",
+    "        None,\n",
+    "        True,\n",
+    "    )\n",
+    "    answers.append(result[\"answer\"])\n",
+    "\n",
+    "result = calc_all_metrics(eval_ds, answers)\n",
+    "\n",
+    "for i in range(eval_ds.num_rows):\n",
+    "    n = i + 1\n",
+    "    print(f\"Q-{n:03d}: {eval_ds[i]['query']}\")\n",
+    "    print(f\"A-{n:03d}: {answers[i]}\")\n",
+    "    print(f\"G-{n:03d}: {eval_ds[i]['wellFormedAnswers'][0]}\\n\")\n",
+    "\n",
+    "print(f\"\\n\\nscores: {json.dumps(result, indent=2)}\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "bae05024",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "System: Use the following pieces of context to answer the users question. \n",
+      "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
+      "----------------\n",
+      "City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\n",
+      "\n",
+      "Place of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\n",
+      "\n",
+      "For the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\n",
+      "\n",
+      "Albany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\n",
+      "\n",
+      "Sponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\n",
+      "\n",
+      "Recent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\n",
+      "\n",
+      "For population 25 years and over in 56307: 1  High school or higher: 87.4%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\n",
+      "\n",
+      "For population 25 years and over in Albany: 1  High school or higher: 86.7%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\n",
+      "Human: albany mn population\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\n",
+    "    \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nCity of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\\n\\nPlace of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\\n\\nFor the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nAlbany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\\n\\nSponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\\n\\nRecent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\\n\\nFor population 25 years and over in 56307: 1  High school or higher: 87.4%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\\n\\nFor population 25 years and over in Albany: 1  High school or higher: 86.7%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\\nHuman: albany mn population\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "593f574a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "System: Use the following pieces of context to answer the users question. \n",
+      "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
+      "----------------\n",
+      "Hippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\n",
+      "\n",
+      "Many of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\n",
+      "\n",
+      "Despite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\n",
+      "\n",
+      "At least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\n",
+      "\n",
+      "460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\n",
+      "\n",
+      "TRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \n",
+      "\n",
+      "The two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\n",
+      "\n",
+      "Hippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\n",
+      "\n",
+      "Reload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\n",
+      "\n",
+      "However, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\n",
+      "Human: ____________________ is considered the father of modern medicine.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\n",
+    "    \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\nHippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\\n\\nMany of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\\n\\nDespite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\\n\\nAt least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\\n\\n460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\\n\\nTRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \\n\\nThe two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\\n\\nHippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\\n\\nReload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\\n\\nHowever, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\\nHuman: ____________________ is considered the father of modern medicine.\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5b9204e0",
+   "metadata": {},
+   "source": [
+    "```\n",
+    "Q-003: ____________________ is considered the father of modern medicine.\n",
+    "A-003: Hippocrates is considered the father of modern medicine.\n",
+    "G-003: Hippocrates is considered the father of modern medicine.\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "5cfc8320",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'answers': ['The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.'],\n",
+       " 'passages': {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0],\n",
+       "  'passage_text': ['Volcano 10 Day Weather. Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny.',\n",
+       "   'Current U.S. National Radar--Current. The Current National Weather Radar is shown below with a UTC Time (subtract 5 hours from UTC to get Eastern Time). National Weather Forecast--Current. The Current National Weather Forecast and National Weather Map are shown below.',\n",
+       "   'Volcano 10 Day Weather. 1  Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. 2  Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. There is 49 percentage chance of rain and 3 mph winds from the Southwest.',\n",
+       "   'Volcano, CA Weather Data. 1  Volcano, CA Current Weather Data. 2  Sponsored. 3  Volcano, CA Historical Weather Trends.  Volcano, CA area 1  Highlights. Volcano, CA Chance of Sunshine.  Volcano, CA Historical 1  Temperature. Volcano, CA Rainfall and Snowfall Average.  Volcano, CA Energy Demand.',\n",
+       "   'Volcano Weather. Volcano weather and daily current conditions with summary and 5 Day forecast including humidity, precipitation, high and low temperatures presented in Fahrenheit and Celsius, barometric pressure, heat index, wind chill, hourly forecast, sunrise, sunset, wind speed with direction, and more.',\n",
+       "   'Hourly Forecast Detailed. 1  0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. There is 83 percentage chance of rain and 2 mph winds from the East. 2  3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. There is 77 percentage chance of rain and 2 mph winds from the East.',\n",
+       "   'Volcano 7 Day Weather. 1  Monday:The Volcano forecast for Apr 03 is 58 degrees and Sunny. There is 34 percentage chance of rain and 5 mph winds from the West. 2  Tuesday:The Volcano forecast for Apr 04 is 59 degrees and Sunny. There is 33 percentage chance of rain and 5 mph winds from the West-Southwest.',\n",
+       "   'Volcano 10 Day Weather. 1  Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. 2  Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. 3  Tuesday:The Volcano forecast for Apr 11 is 49 degrees and Patchy rain possible.  Wednesday:The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.',\n",
+       "   'Volcano, CA weather and traffic updates by locals. Write your own weather report, forecast, or traffic update: Please note by clicking on Post you acknowledge that you have read the Terms of Service and the report and/or forecast you are posting is in compliance with such terms. Be respectful.',\n",
+       "   'Hourly Forecast Detailed. 1  0am:The Volcano, CA forecast for Apr 03 is 48 degrees and Patchy rain possible. 2  3am:The Volcano, CA forecast for Apr 03 is 44 degrees and Clear. 3  6am:The Volcano, CA forecast for Apr 03 is 41 degrees and Clear.  9am:The Volcano, CA forecast for Apr 03 is 48 degrees and Sunny.'],\n",
+       "  'url': ['http://www.weatherman.com/us/ca/zip-codes/95689-10-day-weather',\n",
+       "   'http://www.fastweather.com/index.php?city=Volcano_CA&g',\n",
+       "   'http://www.weatherman.com/us/ca/zip-codes/95689-10-day-weather',\n",
+       "   'http://www.homefacts.com/weather/California/Amador-County/Volcano.html',\n",
+       "   'http://www.localconditions.com/weather-volcano-california/95689/',\n",
+       "   'http://www.weatherman.com/us/ca/volcano',\n",
+       "   'http://www.weatherman.com/us/ca/volcano',\n",
+       "   'http://www.weatherman.com/us/ca/zip-codes/95689-10-day-weather',\n",
+       "   'http://www.localconditions.com/weather-volcano-california/95689/',\n",
+       "   'http://www.weatherman.com/us/ca/volcano']},\n",
+       " 'query': 'current weather in volcano, ca',\n",
+       " 'query_id': 114414,\n",
+       " 'query_type': 'DESCRIPTION',\n",
+       " 'wellFormedAnswers': ['The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.']}"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_ds = new_ds.select([1])\n",
+    "test_ds[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "56b91cae",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'bleu_scores': {'bleu': 1.0,\n",
+       "  'precisions': [1.0, 1.0, 1.0, 1.0],\n",
+       "  'brevity_penalty': 1.0,\n",
+       "  'length_ratio': 1.0,\n",
+       "  'translation_length': 14,\n",
+       "  'reference_length': 14},\n",
+       " 'rouge_scores': {'rouge1': 1.0,\n",
+       "  'rouge2': 1.0,\n",
+       "  'rougeL': 1.0,\n",
+       "  'rougeLsum': 1.0}}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "calc_metrics(test_ds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "56c6bf24",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['The',\n",
+       " 'Volcano',\n",
+       " 'forecast',\n",
+       " 'for',\n",
+       " 'Apr',\n",
+       " '12',\n",
+       " 'is',\n",
+       " '52',\n",
+       " 'degrees',\n",
+       " 'and',\n",
+       " 'Patchy',\n",
+       " 'light',\n",
+       " 'rain.']"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_ds[0][\"answers\"][0].split()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "77d08267",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "13"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(test_ds[0][\"answers\"][0].split())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "8c19694b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'answers': ['From $26,000 to $39,000 a year'],\n",
+       " 'passages': {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],\n",
+       "  'passage_text': ['If you are interested in becoming a pharmacy technician, you’re choosing a career that is in high demand. According to the U.S. Bureau of Labor Statistics (BLS), the career growth is expected to be “much faster than average”, with an employment increase of 32% predicted in the decade spanning 2010 to 2020*.',\n",
+       "   'What can a pharmacy technician really expect to earn in today’s economy? According to Salary.com, pharmacy technicians make anywhere from $26,000 to $39,000 a year, though most make around $32,000 annually. California has the highest average pharmacy technician wage, at $34,317, according to Open Farm Tech’s website.',\n",
+       "   'The median annual wage for pharmacy technicians was $30,410 in May 2015. Employment of pharmacy technicians is projected to grow 9 percent from 2014 to 2024, faster than the average for all occupations. Increased demand for prescription medications will lead to more demand for pharmaceutical services.',\n",
+       "   'The majority of pharmacy techs work in drug stores and hospitals, where the average annual salary was $28,940 and $34,410, respectively**. However, a higher salary can be had if you can find employment with outpatient care centers or physicians’ offices, where the annual pay is in the $37,000-$39,000 range.',\n",
+       "   'The pharmacy technician salary** depends on a number of factors, from the area and type of employer, to your educational background. Browse pharmacy tech pay for a comparison between similar careers, geographic location, educational and certification requirements, and more.',\n",
+       "   \"Pharmacy Technician Salary. A Pharmacy Technician earns an average wage of $12.68 per hour. The skills that increase pay for this job the most are Mail Order Pharmacy and Long Term Care. People in this job generally don't have more than 20 years' experience. $18,722 - $48,714.\",\n",
+       "   'Popular Companies. * Please note that all salary figures are approximations based upon third party submissions to Simply Hired. These figures are given to Simply Hired users for the purpose of generalized comparison only. Minimum wage may differ by jurisdiction and you should consult the employer for actual salary figures.',\n",
+       "   'Pharmacy Technician average salary is $30,288, median salary is $30,534 with a salary range from $21,570 to $34,320. Pharmacy Technician salaries are collected from government agencies and companies. Each salary is associated with a real job position. Pharmacy Technician salary statistics is not exclusive and is for reference only.',\n",
+       "   'It also states that pharmacy technicians working in an acute care hospital earn an average salary of $37,000 per year, while those working for the military or a pharmaceutical company earn an average salary of $38,000 per year. This represents a difference of more than $10,000, simply due to the health care setting.',\n",
+       "   'Occupational Employment and Wages, May 2016. 29-2052 Pharmacy Technicians. Prepare medications under the direction of a pharmacist. May measure, mix, count out, label, and record amounts and dosages of medications according to prescription orders. National estimates for this occupation. Industry profile for this occupation.'],\n",
+       "  'url': ['http://www.pharmacytechschools.com/salary/',\n",
+       "   'http://www.pharmacytimes.com/contributor/alex-barker-pharmd/2015/06/guide-to-pharmacy-technician-salaries',\n",
+       "   'https://www.bls.gov/ooh/healthcare/pharmacy-technicians.htm',\n",
+       "   'http://www.pharmacytechschools.com/salary/',\n",
+       "   'http://www.pharmacytechschools.com/salary/',\n",
+       "   'http://www.payscale.com/research/US/Job=Pharmacy_Technician/Hourly_Rate',\n",
+       "   'http://www.simplyhired.com/salaries-k-certified-pharmacy-technician-jobs.html',\n",
+       "   'https://www.salarylist.com/jobs/Pharmacy-Technician-Salary.htm',\n",
+       "   'http://www.pharmacytimes.com/contributor/alex-barker-pharmd/2015/06/guide-to-pharmacy-technician-salaries',\n",
+       "   'https://www.bls.gov/oes/current/oes292052.htm']},\n",
+       " 'query': 'average pharmacy tech salary',\n",
+       " 'query_id': 40287,\n",
+       " 'query_type': 'NUMERIC',\n",
+       " 'wellFormedAnswers': ['The average salary for a pharmacy technician is $26,000 to $39,000 in a year.',\n",
+       "  'The average salary for a pharmacy technician is from $26,000 to $39,000 a year.']}"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_ds = new_ds.select([4])\n",
+    "test_ds[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "34209164",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'bleu_scores': {'bleu': 0.19303951204286907,\n",
+       "  'precisions': [0.875, 0.7142857142857143, 0.5, 0.4],\n",
+       "  'brevity_penalty': 0.32465246735834974,\n",
+       "  'length_ratio': 0.47058823529411764,\n",
+       "  'translation_length': 8,\n",
+       "  'reference_length': 17},\n",
+       " 'rouge_scores': {'rouge1': 0.5833333333333334,\n",
+       "  'rouge2': 0.4545454545454545,\n",
+       "  'rougeL': 0.5833333333333334,\n",
+       "  'rougeLsum': 0.5833333333333334}}"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "calc_metrics(test_ds)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

README.md CHANGED Viewed

@@ -1,13 +1,53 @@
 ---
-title: Phi-3-mini-128k-instruct
 emoji: 🤖💬
 colorFrom: purple
 colorTo: blue
 sdk: gradio
-sdk_version: 4.19.1
 app_file: app.py
 pinned: true
 short_description: 'Chat with LLMs'
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Chat with LLMs
 emoji: 🤖💬
 colorFrom: purple
 colorTo: blue
 sdk: gradio
+sdk_version: 4.26.0
 app_file: app.py
 pinned: true
 short_description: 'Chat with LLMs'
 ---
+## Running Locally
+1. Check pre-conditions:
+- [Git Large File Storage (LFS)](https://git-lfs.com/) must have been installed.
+- Run `python --version` to make sure you're running Python version 3.10 or above.
+- The latest PyTorch with GPU support must have been installed. Here is a sample `conda` command:
+```
+conda install -y pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
+```
+1. Clone the repo
+```
+git lfs install
+git clone https://huggingface.co/spaces/inflaton-ai/llm-qa-bench
+```
+3. Install packages
+```
+pip install -r requirements.txt
+4. Set up your environment variables
+- By default, environment variables are loaded from `.env.example` file
+- If you don't want to use the default settings, copy `.env.example` into `.env`. Your can then update it for your local runs.
+5. Run automated test:
+```
+python qa_chain_test.py
+```
+6. Start the local server at `http://localhost:7860`:
+```
+python app.py
+```

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 import torch
 from transformers import (
@@ -8,7 +9,6 @@ from transformers import (
 import os
 from threading import Thread
 import spaces
-import time
 import subprocess
 subprocess.run(
@@ -17,20 +17,51 @@ subprocess.run(
     shell=True,
 )
-token = os.environ["HF_TOKEN"]
 model = AutoModelForCausalLM.from_pretrained(
-    "microsoft/Phi-3-mini-128k-instruct",
     token=token,
     trust_remote_code=True,
 )
-tok = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", token=token)
 terminators = [
     tok.eos_token_id,
 ]
-if torch.cuda.is_available():
     device = torch.device("cuda")
     print(f"Using GPU: {torch.cuda.get_device_name(device)}")
 else:
@@ -38,27 +69,34 @@ else:
     print("Using CPU")
 model = model.to(device)
-# Dispatch Errors
 @spaces.GPU(duration=60)
-def chat(message, history, temperature, do_sample, max_tokens):
     chat = []
     for item in history:
         chat.append({"role": "user", "content": item[0]})
         if item[1] is not None:
             chat.append({"role": "assistant", "content": item[1]})
     chat.append({"role": "user", "content": message})
     messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
     model_inputs = tok([messages], return_tensors="pt").to(device)
     streamer = TextIteratorStreamer(
-        tok, timeout=20.0, skip_prompt=True, skip_special_tokens=True
     )
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
         max_new_tokens=max_tokens,
-        do_sample=True,
         temperature=temperature,
         eos_token_id=terminators,
     )
@@ -79,8 +117,7 @@ def chat(message, history, temperature, do_sample, max_tokens):
 demo = gr.ChatInterface(
     fn=chat,
-    examples=[["Write me a poem about Machine Learning."]],
-    # multimodal=False,
     additional_inputs_accordion=gr.Accordion(
         label="⚙️ Parameters", open=False, render=False
     ),
@@ -88,6 +125,14 @@ demo = gr.ChatInterface(
         gr.Slider(
             minimum=0, maximum=1, step=0.1, value=0.9, label="Temperature", render=False
         ),
         gr.Checkbox(label="Sampling", value=True),
         gr.Slider(
             minimum=128,
@@ -100,6 +145,6 @@ demo = gr.ChatInterface(
     ],
     stop_btn="Stop Generation",
     title="Chat With LLMs",
-    description="Now Running [microsoft/Phi-3-mini-128k-instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct)",
 )
 demo.launch()

+import json
 import gradio as gr
 import torch
 from transformers import (
 import os
 from threading import Thread
 import spaces
 import subprocess
 subprocess.run(
     shell=True,
 )
+token = os.getenv("HF_TOKEN")
+model_name = (
+    os.getenv("MODEL_NAME") or "google/gemma-1.1-2b-it"
+)  # "microsoft/Phi-3-mini-128k-instruct"
+questions_file_path = (
+    os.getenv("QUESTIONS_FILE_PATH") or "./data/datasets/ms_macro.json"
+)
+questions = json.loads(open(questions_file_path).read())
+examples = [[question["question"].strip()] for question in questions]
+print(f"Loaded {len(examples)} examples")
+qa_system_prompt = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."
 model = AutoModelForCausalLM.from_pretrained(
+    model_name,
     token=token,
     trust_remote_code=True,
 )
+tok = AutoTokenizer.from_pretrained(model_name, token=token)
 terminators = [
     tok.eos_token_id,
 ]
+# Check that MPS is available
+if not torch.backends.mps.is_available():
+    if not torch.backends.mps.is_built():
+        print(
+            "MPS not available because the current PyTorch install was not "
+            "built with MPS enabled."
+        )
+    else:
+        print(
+            "MPS not available because the current MacOS version is not 12.3+ "
+            "and/or you do not have an MPS-enabled device on this machine."
+        )
+    mps_device = None
+else:
+    mps_device = torch.device("mps")
+if mps_device is not None:
+    device = mps_device
+    print("Using MPS")
+elif torch.cuda.is_available():
     device = torch.device("cuda")
     print(f"Using GPU: {torch.cuda.get_device_name(device)}")
 else:
     print("Using CPU")
 model = model.to(device)
 @spaces.GPU(duration=60)
+def chat(message, history, temperature, repetition_penalty, do_sample, max_tokens):
+    print("repetition_penalty:", repetition_penalty)
     chat = []
     for item in history:
         chat.append({"role": "user", "content": item[0]})
         if item[1] is not None:
             chat.append({"role": "assistant", "content": item[1]})
+    if [message] in examples:
+        index = examples.index([message])
+        message = f"{qa_system_prompt}\n\n{questions[index]['context']}\n\nQuestion: {message}"
+        print(message)
     chat.append({"role": "user", "content": message})
     messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
     model_inputs = tok([messages], return_tensors="pt").to(device)
     streamer = TextIteratorStreamer(
+        tok, timeout=200.0, skip_prompt=True, skip_special_tokens=True
     )
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
         max_new_tokens=max_tokens,
+        do_sample=do_sample,
         temperature=temperature,
         eos_token_id=terminators,
     )
 demo = gr.ChatInterface(
     fn=chat,
+    examples=examples,
     additional_inputs_accordion=gr.Accordion(
         label="⚙️ Parameters", open=False, render=False
     ),
         gr.Slider(
             minimum=0, maximum=1, step=0.1, value=0.9, label="Temperature", render=False
         ),
+        gr.Slider(
+            minimum=1.0,
+            maximum=1.5,
+            step=0.1,
+            value=1.2,
+            label="Repetition Penalty",
+            render=False,
+        ),
         gr.Checkbox(label="Sampling", value=True),
         gr.Slider(
             minimum=128,
     ],
     stop_btn="Stop Generation",
     title="Chat With LLMs",
+    description=f"Now Running [{model_name}](https://huggingface.co/{model_name})",
 )
 demo.launch()

app_modules/init.py ADDED Viewed

	@@ -0,0 +1,114 @@

+"""Main entrypoint for the app."""
+import os
+from timeit import default_timer as timer
+from typing import List, Optional
+from dotenv import find_dotenv, load_dotenv
+from langchain_community.embeddings import HuggingFaceInstructEmbeddings
+from langchain.vectorstores.chroma import Chroma
+from langchain.vectorstores.faiss import FAISS
+from app_modules.llm_loader import LLMLoader
+from app_modules.utils import get_device_types, init_settings, load_spacy_model
+found_dotenv = find_dotenv(".env")
+if len(found_dotenv) == 0:
+    found_dotenv = find_dotenv(".env.example")
+print(f"loading env vars from: {found_dotenv}")
+load_dotenv(found_dotenv, override=False)
+# Constants
+init_settings()
+if os.environ.get("LANGCHAIN_DEBUG") == "true":
+    from langchain.globals import set_debug
+    set_debug(True)
+from app_modules.llm_qa_chain import QAChain
+from app_modules.llm_chat_chain import ChatChain
+import nltk
+def app_init():
+    # https://github.com/huggingface/transformers/issues/17611
+    os.environ["CURL_CA_BUNDLE"] = ""
+    nltk.download("punkt")
+    hf_embeddings_device_type, hf_pipeline_device_type = get_device_types()
+    print(f"hf_embeddings_device_type: {hf_embeddings_device_type}")
+    print(f"hf_pipeline_device_type: {hf_pipeline_device_type}")
+    hf_embeddings_model_name = (
+        os.environ.get("HF_EMBEDDINGS_MODEL_NAME") or "hkunlp/instructor-xl"
+    )
+    n_threds = int(os.environ.get("NUMBER_OF_CPU_CORES") or "4")
+    index_path = os.environ.get("FAISS_INDEX_PATH") or os.environ.get(
+        "CHROMADB_INDEX_PATH"
+    )
+    using_faiss = os.environ.get("FAISS_INDEX_PATH") is not None
+    llm_model_type = os.environ.get("LLM_MODEL_TYPE")
+    debug_metrics = os.getenv("DEBUG_METRICS", "false").lower() == "true"
+    if debug_metrics:
+        start = timer()
+        load_spacy_model()
+        end = timer()
+        print(f"Completed in {end - start:.3f}s")
+    qa_with_rag = os.getenv("QA_WITH_RAG", "true").lower() == "true"
+    print(f"qa_with_rag: {qa_with_rag}")
+    retrieve_from_questions_file = os.getenv("RETRIEVER_TYPE") == "questions_file"
+    print(f"retrieve_from_questions_file: {retrieve_from_questions_file}", flush=True)
+    if qa_with_rag and not retrieve_from_questions_file or debug_metrics:
+        print(f"hf_embeddings_model_name: {hf_embeddings_model_name}")
+        start = timer()
+        embeddings = HuggingFaceInstructEmbeddings(
+            model_name=hf_embeddings_model_name,
+            model_kwargs={"device": hf_embeddings_device_type},
+        )
+        end = timer()
+        print(f"Completed in {end - start:.3f}s")
+    vectorstore = None
+    if qa_with_rag and not retrieve_from_questions_file:
+        start = timer()
+        print(
+            f"Load index from {index_path} with {'FAISS' if using_faiss else 'Chroma'}"
+        )
+        if not os.path.isdir(index_path):
+            raise ValueError(f"{index_path} does not exist!")
+        elif using_faiss:
+            vectorstore = FAISS.load_local(
+                index_path, embeddings, allow_dangerous_deserialization=True
+            )
+        else:
+            vectorstore = Chroma(
+                embedding_function=embeddings, persist_directory=index_path
+            )
+        end = timer()
+        print(f"Completed in {end - start:.3f}s")
+    start = timer()
+    llm_loader = LLMLoader(llm_model_type)
+    llm_loader.init(n_threds=n_threds, hf_pipeline_device_type=hf_pipeline_device_type)
+    if qa_with_rag:
+        qa_chain = QAChain(vectorstore, llm_loader)
+    else:
+        qa_chain = ChatChain(llm_loader)
+    end = timer()
+    print(f"Completed in {end - start:.3f}s")
+    return llm_loader, qa_chain

app_modules/llm_chat_chain.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import json
+import os
+import re
+from langchain.chains import ConversationChain, LLMChain
+from langchain.prompts import PromptTemplate
+from langchain.chains.base import Chain
+from app_modules.llm_inference import LLMInference
+from app_modules.utils import CustomizedConversationSummaryBufferMemory
+from langchain.chains import LLMChain
+from langchain.globals import get_debug
+chat_history_enabled = os.getenv("CHAT_HISTORY_ENABLED", "false").lower() == "true"
+B_INST, E_INST = "[INST]", "[/INST]"
+def get_system_prompt_and_user_message(orca=False):
+    # system_prompt = "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
+    system_prompt = (
+        "You are Orca, an AI language model created by Microsoft. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
+        if orca
+        else "You are a chatbot having a conversation with a human."
+    )
+    user_message = "{input}"
+    if chat_history_enabled:
+        user_message = "Chat History:\n\n{history} \n\n" + user_message
+        system_prompt += " Read the chat history to get context."
+    return system_prompt, user_message
+def create_llama_2_prompt_template():
+    B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
+    system_prompt, user_message = get_system_prompt_and_user_message()
+    SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS
+    prompt_template = B_INST + SYSTEM_PROMPT + user_message + E_INST
+    return prompt_template
+def create_llama_3_prompt_template():
+    system_prompt, user_message = get_system_prompt_and_user_message()
+    prompt_template = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+{ system_prompt }<|eot_id|><|start_header_id|>user<|end_header_id|>
+{ user_message }<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+"""
+    return prompt_template
+def create_phi_3_prompt_template():
+    system_prompt, user_message = get_system_prompt_and_user_message()
+    prompt_template = f"""<|system|>
+{ system_prompt }<|end|>
+<|user|>
+{ user_message }<|end|>
+<|assistant|>
+"""
+    return prompt_template
+def create_orca_2_prompt_template():
+    system_prompt, user_message = get_system_prompt_and_user_message(orca=False)
+    prompt_template = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant"
+    return prompt_template
+def create_mistral_prompt_template():
+    system_prompt, user_message = get_system_prompt_and_user_message()
+    prompt_template = B_INST + system_prompt + "\n\n" + user_message + E_INST
+    return prompt_template
+def create_gemma_prompt_template():
+    return "<start_of_turn>user\n{input}<end_of_turn>\n<start_of_turn>model\n"
+def create_prompt_template(model_name):
+    print(f"creating prompt template for model: {model_name}")
+    if re.search(r"llama-?2", model_name, re.IGNORECASE):
+        return create_llama_2_prompt_template()
+    elif re.search(r"llama-?3", model_name, re.IGNORECASE):
+        return create_llama_3_prompt_template()
+    elif re.search(r"phi-?3", model_name, re.IGNORECASE):
+        return create_phi_3_prompt_template()
+    elif model_name.lower().startswith("orca"):
+        return create_orca_2_prompt_template()
+    elif model_name.lower().startswith("mistral"):
+        return create_mistral_prompt_template()
+    elif model_name.lower().startswith("gemma"):
+        return create_gemma_prompt_template()
+    return (
+        """You are a chatbot having a conversation with a human.
+{history}
+Human: {input}
+Chatbot:"""
+        if chat_history_enabled
+        else """You are a chatbot having a conversation with a human.
+Human: {input}
+Chatbot:"""
+    )
+class ChatChain(LLMInference):
+    def __init__(self, llm_loader):
+        super().__init__(llm_loader)
+    def create_chain(self) -> Chain:
+        template = create_prompt_template(self.llm_loader.model_name)
+        print(f"template: {template}")
+        if chat_history_enabled:
+            prompt = PromptTemplate(
+                input_variables=["history", "input"], template=template
+            )
+            memory = CustomizedConversationSummaryBufferMemory(
+                llm=self.llm_loader.llm, max_token_limit=1024, return_messages=False
+            )
+            llm_chain = ConversationChain(
+                llm=self.llm_loader.llm,
+                prompt=prompt,
+                verbose=False,
+                memory=memory,
+            )
+        else:
+            prompt = PromptTemplate(input_variables=["input"], template=template)
+            llm_chain = LLMChain(llm=self.llm_loader.llm, prompt=prompt)
+        return llm_chain
+    def _process_inputs(self, inputs):
+        if not isinstance(inputs, list):
+            inputs = {"input": inputs["question"]}
+        elif self.llm_loader.llm_model_type == "huggingface":
+            inputs = [
+                [
+                    {
+                        "role": "system",
+                        "content": self.get_system_message(i),
+                    },
+                    {
+                        "role": "user",
+                        "content": self.get_user_message(i),
+                    },
+                ]
+                for i in inputs
+            ]
+        else:
+            inputs = [{"input": i["question"]} for i in inputs]
+        if get_debug():
+            print("_process_inputs:", json.dumps(inputs, indent=4))
+        return inputs
+    def get_system_message(self, input) -> Chain:
+        return get_system_prompt_and_user_message()[0]
+    def get_user_message(self, input) -> Chain:
+        return input["question"]

app_modules/llm_inference.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import abc
+import json
+import os
+import re
+import time
+import urllib
+from queue import Queue
+from threading import Thread
+from typing import List, Optional
+from urllib.parse import quote, urlparse, urlunparse
+from langchain.chains.base import Chain
+from app_modules.llm_loader import LLMLoader, TextIteratorStreamer
+from app_modules.utils import remove_extra_spaces
+class LLMInference(metaclass=abc.ABCMeta):
+    def __init__(self, llm_loader):
+        self.llm_loader = llm_loader
+        self.chain = None
+        self.pattern = re.compile(r"\s*<.+>$")
+    @abc.abstractmethod
+    def create_chain(self) -> Chain:
+        pass
+    def get_chain(self) -> Chain:
+        if self.chain is None:
+            self.chain = self.create_chain()
+        return self.chain
+    def reset(self) -> None:
+        self.chain = None
+    def _process_inputs(self, inputs):
+        return inputs
+    def _normalize_result(self, result):
+        # print(f"_normalize_result: {result}")
+        if isinstance(result, list):
+            result = result[0]
+        key = "text" if "text" in result else "generated_text"
+        if key in result:
+            result["answer"] = result[key]
+            del result[key]
+        result["answer"] = self.pattern.sub("", result["answer"])
+        return result
+    def _process_results(self, results):
+        if isinstance(results, list):
+            return [self._normalize_result(result) for result in results]
+        return self._normalize_result(results)
+    def _run_batch(self, chain, inputs):
+        if self.llm_loader.llm_model_type == "huggingface":
+            results = self.llm_loader.llm.pipeline(inputs)
+        else:
+            results = chain.batch(inputs)
+        return results
+    def run_chain(self, chain, inputs, callbacks: Optional[List] = []):
+        inputs = self._process_inputs(inputs)
+        # check if inputs is an array
+        if isinstance(inputs, list):
+            results = self._run_batch(chain, inputs)
+        else:
+            results = chain.invoke(inputs, {"callbacks": callbacks})
+        return self._process_results(results)
+    def call_chain(
+        self,
+        inputs,
+        streaming_handler,
+        q: Queue = None,
+        testing: bool = False,
+    ):
+        print(json.dumps(inputs, indent=4))
+        if self.llm_loader.huggingfaceStreamingEnabled():
+            self.llm_loader.lock.acquire()
+        try:
+            if self.llm_loader.huggingfaceStreamingEnabled():
+                self.llm_loader.streamer.reset(q)
+            chain = self.get_chain()
+            result = (
+                self._run_chain_with_streaming_handler(
+                    chain, inputs, streaming_handler, testing
+                )
+                if streaming_handler is not None
+                else self.run_chain(chain, inputs)
+            )
+            if "answer" in result:
+                result["answer"] = remove_extra_spaces(result["answer"])
+            return result
+        finally:
+            if self.llm_loader.huggingfaceStreamingEnabled():
+                self.llm_loader.lock.release()
+    def _execute_chain(self, chain, inputs, q, sh):
+        q.put(self.run_chain(chain, inputs, callbacks=[sh]))
+    def _run_chain_with_streaming_handler(
+        self, chain, inputs, streaming_handler, testing
+    ):
+        que = Queue()
+        t = Thread(
+            target=self._execute_chain,
+            args=(chain, inputs, que, streaming_handler),
+        )
+        t.start()
+        if self.llm_loader.huggingfaceStreamingEnabled():
+            count = (
+                2
+                if "chat_history" in inputs and len(inputs.get("chat_history")) > 0
+                else 1
+            )
+            while count > 0:
+                try:
+                    for token in self.llm_loader.streamer:
+                        if not testing:
+                            streaming_handler.on_llm_new_token(token)
+                    self.llm_loader.streamer.reset()
+                    count -= 1
+                except Exception:
+                    if not testing:
+                        print("nothing generated yet - retry in 0.5s")
+                    time.sleep(0.5)
+        t.join()
+        return que.get()

app_modules/llm_loader.py ADDED Viewed

	@@ -0,0 +1,579 @@

+import os
+import sys
+import threading
+from queue import Queue
+from typing import Any, Dict, List, Optional
+import torch
+from langchain.callbacks.base import BaseCallbackHandler
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain_openai.chat_models import ChatOpenAI
+from langchain_openai.llms import OpenAI
+from langchain_google_genai import (
+    ChatGoogleGenerativeAI,
+    HarmBlockThreshold,
+    HarmCategory,
+)
+from langchain_community.llms import (
+    HuggingFaceTextGenInference,
+    CTransformers,
+    GPT4All,
+    HuggingFacePipeline,
+    LlamaCpp,
+    VLLM,
+)
+from langchain_community.chat_models import ChatOllama
+from langchain.schema import LLMResult
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoModelForSeq2SeqLM,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+    StoppingCriteria,
+    StoppingCriteriaList,
+    T5Tokenizer,
+    TextStreamer,
+    pipeline,
+)
+from app_modules.utils import ensure_model_is_downloaded
+class TextIteratorStreamer(TextStreamer, StreamingStdOutCallbackHandler):
+    def __init__(
+        self,
+        tokenizer: "AutoTokenizer",
+        skip_prompt: bool = False,
+        timeout: Optional[float] = None,
+        for_huggingface: bool = False,
+        **decode_kwargs,
+    ):
+        super().__init__(tokenizer, skip_prompt, **decode_kwargs)
+        self.text_queue = Queue()
+        self.stop_signal = None
+        self.timeout = timeout
+        self.total_tokens = 0
+        self.for_huggingface = for_huggingface
+        self.end_token = ""
+    def on_finalized_text(self, text: str, stream_end: bool = False):
+        super().on_finalized_text(text, stream_end=stream_end)
+        """Put the new text in the queue. If the stream is ending, also put a stop signal in the queue."""
+        self.text_queue.put(text, timeout=self.timeout)
+        self.total_tokens = self.total_tokens + 1
+        if stream_end:
+            print("\n")
+            self.text_queue.put("\n", timeout=self.timeout)
+            self.text_queue.put(self.stop_signal, timeout=self.timeout)
+    def check_end_token(self, token):
+        new_token = self.end_token + token
+        if "<|im_end|>".startswith(new_token):
+            self.end_token = "" if new_token == "<|im_end|>" else new_token
+            return None
+        elif self.end_token != "":
+            self.end_token = ""
+        return new_token
+    def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
+        token = self.check_end_token(token)
+        if token:
+            sys.stdout.write(token)
+            sys.stdout.flush()
+            self.text_queue.put(token, timeout=self.timeout)
+            self.total_tokens = self.total_tokens + 1
+    def on_llm_start(
+        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
+    ) -> Any:
+        # print("on_llm_start:", serialized, prompts)
+        pass
+    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
+        print("\n")
+        self.text_queue.put("\n", timeout=self.timeout)
+        self.text_queue.put(self.stop_signal, timeout=self.timeout)
+    def __iter__(self):
+        return self
+    def __next__(self):
+        value = self.text_queue.get(timeout=self.timeout)
+        if value == self.stop_signal:
+            raise StopIteration()
+        else:
+            return value
+    def reset(self, q: Queue = None):
+        # print("resetting TextIteratorStreamer")
+        self.text_queue = q if q is not None else Queue()
+        self.end_token = ""
+    def empty(self):
+        return self.text_queue.empty()
+class LLMLoader:
+    def __init__(self, llm_model_type):
+        self.llm_model_type = llm_model_type
+        self.llm = None
+        self.streamer = TextIteratorStreamer(
+            "",
+            for_huggingface=True,
+        )
+        self.max_tokens_limit = 4096
+        self.search_kwargs = {"k": 8}
+        self.lock = threading.Lock()
+        self.model_name = os.getenv("HUGGINGFACE_MODEL_NAME_OR_PATH").split("/")[-1]
+        self.repetition_penalty = ""
+        self.batch_size = int(os.getenv("BATCH_SIZE", "1"))
+    def _init_hf_streamer(self, tokenizer):
+        if self.batch_size == 1:
+            self.streamer = TextIteratorStreamer(
+                tokenizer,
+                timeout=10.0,
+                skip_prompt=True,
+                skip_special_tokens=True,
+                for_huggingface=True,
+            )
+        else:
+            self.streamer = None
+    def huggingfaceStreamingEnabled(self):
+        return self.streamer is not None
+    def init(
+        self,
+        custom_handler: Optional[BaseCallbackHandler] = None,
+        n_threds: int = 4,
+        hf_pipeline_device_type: str = None,
+    ):
+        print("initializing LLM: " + self.llm_model_type)
+        if hf_pipeline_device_type is None:
+            hf_pipeline_device_type = "cpu"
+        using_cuda = hf_pipeline_device_type.startswith("cuda")
+        using_mps = hf_pipeline_device_type.startswith("mps")
+        torch_dtype = torch.float16 if using_cuda or using_mps else torch.float32
+        if not using_mps and os.environ.get("USING_TORCH_BFLOAT16") == "true":
+            torch_dtype = torch.bfloat16
+        load_quantized_model = os.environ.get("LOAD_QUANTIZED_MODEL")
+        print(f"  hf_pipeline_device_type: {hf_pipeline_device_type}")
+        print(f"     load_quantized_model: {load_quantized_model}")
+        print(f"              torch_dtype: {torch_dtype}")
+        print(f"                 n_threds: {n_threds}")
+        torch.set_default_dtype(torch_dtype)
+        double_quant_config = BitsAndBytesConfig(
+            load_in_4bit=load_quantized_model == "4bit",
+            bnb_4bit_use_double_quant=load_quantized_model == "4bit",
+            load_in_8bit=load_quantized_model == "8bit",
+            bnb_8bit_use_double_quant=load_quantized_model == "8bit",
+        )
+        callbacks = []
+        if self.streamer is not None and self.streamer.for_huggingface:
+            callbacks.append(self.streamer)
+        if custom_handler is not None:
+            callbacks.append(custom_handler)
+        if self.llm is None:
+            if self.llm_model_type == "openai":
+                MODEL_NAME = os.environ.get("OPENAI_MODEL_NAME") or "gpt-3.5-turbo"
+                print(f"              using model: {MODEL_NAME}")
+                self.model_name = MODEL_NAME
+                self.llm = (
+                    OpenAI(
+                        model_name=MODEL_NAME,
+                        streaming=True,
+                        callbacks=callbacks,
+                        verbose=True,
+                        temperature=0,
+                    )
+                    if "instruct" in MODEL_NAME
+                    else ChatOpenAI(
+                        model_name=MODEL_NAME,
+                        streaming=True,
+                        callbacks=callbacks,
+                        verbose=True,
+                        temperature=0,
+                    )
+                )
+            elif self.llm_model_type == "google":
+                MODEL_NAME = os.environ.get("GOOGLE_MODEL_NAME") or "gemini-pro"
+                print(f"              using model: {MODEL_NAME}")
+                self.llm = ChatGoogleGenerativeAI(
+                    model=MODEL_NAME,
+                    convert_system_message_to_human=True,
+                    callbacks=callbacks,
+                    streaming=True,
+                    safety_settings={
+                        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
+                    },
+                )
+            elif self.llm_model_type.startswith("gpt4all"):
+                MODEL_PATH = ensure_model_is_downloaded(self.llm_model_type)
+                self.llm = GPT4All(
+                    model=MODEL_PATH,
+                    max_tokens=2048,
+                    n_threads=n_threds,
+                    backend="gptj" if self.llm_model_type == "gpt4all-j" else "llama",
+                    callbacks=callbacks,
+                    verbose=True,
+                    use_mlock=True,
+                )
+            elif self.llm_model_type == "llamacpp":
+                MODEL_PATH = ensure_model_is_downloaded(self.llm_model_type)
+                self.llm = LlamaCpp(
+                    model_path=MODEL_PATH,
+                    n_ctx=8192,
+                    n_threads=n_threds,
+                    seed=0,
+                    temperature=0,
+                    max_tokens=2048,
+                    callbacks=callbacks,
+                    verbose=True,
+                    use_mlock=True,
+                )
+            elif self.llm_model_type == "ctransformers":
+                MODEL_PATH = ensure_model_is_downloaded(self.llm_model_type)
+                config = {
+                    "max_new_tokens": self.max_tokens_limit,
+                    "repetition_penalty": 1.1,
+                }
+                self.llm = CTransformers(
+                    model=MODEL_PATH,
+                    model_type="llama",
+                    config=config,
+                    callbacks=callbacks,
+                )
+            elif self.llm_model_type == "hftgi":
+                HFTGI_SERVER_URL = os.environ.get("HFTGI_SERVER_URL")
+                HFTGI_RP = os.environ.get("HFTGI_RP")
+                repetition_penalty = 1.120 if HFTGI_RP is None else float(HFTGI_RP)
+                print(f"       repetition_penalty: {repetition_penalty}")
+                self.repetition_penalty = repetition_penalty
+                self.max_tokens_limit = 4096
+                self.llm = HuggingFaceTextGenInference(
+                    inference_server_url=HFTGI_SERVER_URL,
+                    max_new_tokens=self.max_tokens_limit / 2,
+                    # top_k=0,
+                    top_p=0.95,
+                    # typical_p=0.95,
+                    temperature=0.01,
+                    repetition_penalty=repetition_penalty,
+                    callbacks=callbacks,
+                    timeout=600,
+                    streaming=True,
+                )
+            elif self.llm_model_type == "ollama":
+                MODEL_NAME = os.environ.get("OLLAMA_MODEL_NAME") or "mistral"
+                self.model_name = MODEL_NAME
+                print(f"            loading model: {MODEL_NAME}")
+                OLLAMA_RP = os.getenv("OLLAMA_RP")
+                repetition_penalty = float(OLLAMA_RP) if OLLAMA_RP else 1.15
+                self.repetition_penalty = repetition_penalty
+                print(f"       repetition_penalty: {repetition_penalty}")
+                self.llm = ChatOllama(
+                    model=MODEL_NAME,
+                    callbacks=callbacks,
+                    temperature=0,
+                    repeat_penalty=repetition_penalty,
+                    max_new_tokens=2048,
+                    max_tokens=8192,
+                )
+            elif self.llm_model_type == "vllm":
+                MODEL_NAME = (
+                    os.environ.get("HUGGINGFACE_MODEL_NAME_OR_PATH")
+                    or "google/gemma-1.1-2b-it"
+                )
+                print(f"            loading model: {MODEL_NAME}")
+                VLLM_RP = os.getenv("HF_RP")
+                repetition_penalty = float(VLLM_RP) if VLLM_RP else 1.15
+                self.repetition_penalty = repetition_penalty
+                print(f"       repetition_penalty: {repetition_penalty}")
+                vllm_kwargs = {
+                    "max_model_len": 4096,
+                    "enforce_eager": True,
+                }
+                quantization = os.getenv("VLLM_QUANTIZATION")
+                if quantization:
+                    vllm_kwargs["quantization"] = quantization
+                self.llm = VLLM(
+                    model=MODEL_NAME,
+                    callbacks=callbacks,
+                    temperature=0,
+                    repeat_penalty=repetition_penalty,
+                    top_p=0.95,
+                    max_new_tokens=2048,
+                    max_tokens=8192,
+                    tensor_parallel_size=torch.cuda.device_count(),
+                    trust_remote_code=True,
+                    vllm_kwargs=vllm_kwargs,
+                )
+            elif self.llm_model_type.startswith("huggingface"):
+                MODEL_NAME_OR_PATH = os.environ.get("HUGGINGFACE_MODEL_NAME_OR_PATH")
+                print(f"            loading model: {MODEL_NAME_OR_PATH}")
+                hf_auth_token = (
+                    os.environ.get("HUGGINGFACE_AUTH_TOKEN")
+                    if "Llama-2" in MODEL_NAME_OR_PATH
+                    or "gemma" in MODEL_NAME_OR_PATH
+                    or "Mistral" in MODEL_NAME_OR_PATH
+                    else None
+                )
+                transformers_offline = os.environ.get("TRANSFORMERS_OFFLINE") == "1"
+                token = (
+                    hf_auth_token
+                    if hf_auth_token is not None
+                    and len(hf_auth_token) > 0
+                    and not transformers_offline
+                    else None
+                )
+                print(f"            HF auth token: {str(token)[-5:]}")
+                if "Llama-2" in MODEL_NAME_OR_PATH:
+                    self.max_tokens_limit = 4096
+                elif "TinyLlama" in MODEL_NAME_OR_PATH:
+                    self.max_tokens_limit = 1024
+                    class StopOnTokens(StoppingCriteria):
+                        def __call__(
+                            self,
+                            input_ids: torch.LongTensor,
+                            scores: torch.FloatTensor,
+                            **kwargs,
+                        ) -> bool:
+                            stop_ids = [
+                                2
+                            ]  # IDs of tokens where the generation should stop.
+                            for stop_id in stop_ids:
+                                if (
+                                    input_ids[0][-1] == stop_id
+                                ):  # Checking if the last generated token is a stop token.
+                                    return True
+                            return False
+                    stopping_criteria = StoppingCriteriaList([StopOnTokens()])
+                is_t5 = "t5" in MODEL_NAME_OR_PATH
+                temperature = (
+                    0.01
+                    if "gpt4all-j" in MODEL_NAME_OR_PATH
+                    or "dolly" in MODEL_NAME_OR_PATH
+                    or "Qwen" in MODEL_NAME_OR_PATH
+                    or "Llama" in MODEL_NAME_OR_PATH
+                    or "Orca-2" in MODEL_NAME_OR_PATH
+                    or "phi-2" in MODEL_NAME_OR_PATH
+                    or "Phi-3" in MODEL_NAME_OR_PATH
+                    or "Mistral" in MODEL_NAME_OR_PATH
+                    or "gemma" in MODEL_NAME_OR_PATH
+                    else 0
+                )
+                use_fast = (
+                    "stable" in MODEL_NAME_OR_PATH
+                    or "RedPajama" in MODEL_NAME_OR_PATH
+                    or "dolly" in MODEL_NAME_OR_PATH
+                )
+                padding_side = "left"  # if "dolly" in MODEL_NAME_OR_PATH else None
+                config = (
+                    AutoConfig.from_pretrained(
+                        MODEL_NAME_OR_PATH,
+                        trust_remote_code=True,
+                        token=token,
+                        fp32=hf_pipeline_device_type == "cpu",
+                        bf16=(
+                            hf_pipeline_device_type != "cpu"
+                            and torch_dtype == torch.bfloat16
+                        ),
+                        fp16=(
+                            hf_pipeline_device_type != "cpu"
+                            and torch_dtype != torch.bfloat16
+                        ),
+                    )
+                    if "Qwen" in MODEL_NAME_OR_PATH
+                    else AutoConfig.from_pretrained(
+                        MODEL_NAME_OR_PATH,
+                        trust_remote_code=True,
+                        token=token,
+                    )
+                )
+                # config.attn_config["attn_impl"] = "triton"
+                # config.max_seq_len = 4096
+                # config.init_device = hf_pipeline_device_type
+                tokenizer = (
+                    T5Tokenizer.from_pretrained(
+                        MODEL_NAME_OR_PATH,
+                        token=token,
+                    )
+                    if is_t5
+                    else AutoTokenizer.from_pretrained(
+                        MODEL_NAME_OR_PATH,
+                        use_fast=use_fast,
+                        trust_remote_code=True,
+                        padding_side=padding_side,
+                        token=token,
+                    )
+                )
+                self._init_hf_streamer(tokenizer)
+                task = "text2text-generation" if is_t5 else "text-generation"
+                return_full_text = True if "dolly" in MODEL_NAME_OR_PATH else False
+                repetition_penalty = (
+                    1.15
+                    if "falcon" in MODEL_NAME_OR_PATH
+                    else (1.25 if "dolly" in MODEL_NAME_OR_PATH else 1.1)
+                )
+                HF_RP = os.environ.get("HF_RP")
+                if HF_RP is not None and len(HF_RP) > 0:
+                    repetition_penalty = float(HF_RP)
+                print(f"       repetition_penalty: {repetition_penalty}")
+                self.repetition_penalty = repetition_penalty
+                self.model_name = MODEL_NAME_OR_PATH.split("/")[-1]
+                if load_quantized_model is not None:
+                    model = (
+                        AutoModelForSeq2SeqLM.from_pretrained(
+                            MODEL_NAME_OR_PATH,
+                            config=config,
+                            quantization_config=double_quant_config,
+                            trust_remote_code=True,
+                            token=token,
+                        )
+                        if is_t5
+                        else AutoModelForCausalLM.from_pretrained(
+                            MODEL_NAME_OR_PATH,
+                            config=config,
+                            quantization_config=double_quant_config,
+                            trust_remote_code=True,
+                            token=token,
+                        )
+                    )
+                    print(f"Model memory footprint: {model.get_memory_footprint()}")
+                    eos_token_id = -1
+                    # starchat-beta uses a special <|end|> token with ID 49155 to denote ends of a turn
+                    if "starchat" in MODEL_NAME_OR_PATH:
+                        eos_token_id = 49155
+                    pad_token_id = eos_token_id
+                    pipe = (
+                        pipeline(
+                            task,
+                            model=model,
+                            tokenizer=tokenizer,
+                            eos_token_id=eos_token_id,
+                            pad_token_id=pad_token_id,
+                            streamer=self.streamer,
+                            return_full_text=return_full_text,  # langchain expects the full text
+                            device_map="auto",
+                            trust_remote_code=True,
+                            max_new_tokens=2048,
+                            do_sample=True,
+                            temperature=0.01,
+                            top_p=0.95,
+                            top_k=50,
+                            repetition_penalty=repetition_penalty,
+                        )
+                        if eos_token_id != -1
+                        else pipeline(
+                            task,
+                            model=model,
+                            tokenizer=tokenizer,
+                            streamer=self.streamer,
+                            return_full_text=return_full_text,  # langchain expects the full text
+                            device_map="auto",
+                            trust_remote_code=True,
+                            max_new_tokens=2048,
+                            do_sample=True,
+                            temperature=temperature,
+                            top_p=0.95,
+                            top_k=0,  # select from top 0 tokens (because zero, relies on top_p)
+                            repetition_penalty=repetition_penalty,
+                        )
+                    )
+                else:
+                    if os.environ.get("DISABLE_MODEL_PRELOADING") != "true":
+                        model = (
+                            AutoModelForSeq2SeqLM.from_pretrained(
+                                MODEL_NAME_OR_PATH,
+                                config=config,
+                                trust_remote_code=True,
+                            )
+                            if is_t5
+                            else (
+                                AutoModelForCausalLM.from_pretrained(
+                                    MODEL_NAME_OR_PATH,
+                                    config=config,
+                                    trust_remote_code=True,
+                                )
+                                if "Qwen" in MODEL_NAME_OR_PATH
+                                else (
+                                    AutoModelForCausalLM.from_pretrained(
+                                        MODEL_NAME_OR_PATH,
+                                        config=config,
+                                        trust_remote_code=True,
+                                    )
+                                    if token is None
+                                    else AutoModelForCausalLM.from_pretrained(
+                                        MODEL_NAME_OR_PATH,
+                                        config=config,
+                                        trust_remote_code=True,
+                                        token=token,
+                                    )
+                                )
+                            )
+                        )
+                        print(f"Model memory footprint: {model.get_memory_footprint()}")
+                        model = model.eval()
+                        # print(f"Model memory footprint: {model.get_memory_footprint()}")
+                    else:
+                        model = MODEL_NAME_OR_PATH
+                    pipe = pipeline(
+                        task,
+                        model=model,
+                        tokenizer=tokenizer,
+                        streamer=self.streamer,
+                        return_full_text=return_full_text,  # langchain expects the full text
+                        device_map="auto",
+                        torch_dtype=torch_dtype,
+                        max_new_tokens=2048,
+                        trust_remote_code=True,
+                        do_sample=True,
+                        temperature=temperature,
+                        top_p=0.95,
+                        top_k=0,  # select from top 0 tokens (because zero, relies on top_p)
+                        repetition_penalty=repetition_penalty,
+                        token=token,
+                        batch_size=self.batch_size,
+                    )
+                pipe.model.config.pad_token_id = pipe.model.config.eos_token_id
+                pipe.tokenizer.pad_token_id = pipe.model.config.eos_token_id
+                self.llm = HuggingFacePipeline(pipeline=pipe, callbacks=callbacks)
+        print("initialization complete")

app_modules/llm_qa_chain.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import json
+import os
+from typing import List
+import pandas as pd
+from langchain.chains import ConversationalRetrievalChain
+from langchain.chains.base import Chain
+from app_modules.llm_inference import LLMInference
+from app_modules.utils import CustomizedConversationSummaryBufferMemory
+from langchain_core.retrievers import BaseRetriever
+from langchain_core.documents import Document
+from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
+from langchain.globals import get_debug
+retrieve_from_questions_file = os.getenv("RETRIEVER_TYPE") == "questions_file"
+if retrieve_from_questions_file:
+    questions_file_path = os.getenv("QUESTIONS_FILE_PATH")
+    questions_df = pd.read_json(questions_file_path)
+    print(f"Questions file loaded: {questions_file_path}", flush=True)
+class DatasetRetriever(BaseRetriever):
+    def _get_relevant_documents(
+        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
+    ) -> List[Document]:
+        """Get documents relevant to a query.
+        Args:
+            query: String to find relevant documents for
+            run_manager: The callbacks handler to use
+        Returns:
+            List of relevant documents
+        """
+        docs = []
+        df = questions_df
+        # find the query in the df
+        filtered = df[df["question"].str.lower() == query.lower()]
+        # iterate over the filtered df
+        for i in range(len(filtered)):
+            docs.append(
+                Document(
+                    page_content=filtered.iloc[i]["context"],
+                    metadata={"source": filtered.iloc[i]["id"]},
+                )
+            )
+        if not docs:
+            print(f"No documents found for query: {query}", flush=True)
+        return docs
+class QAChain(LLMInference):
+    def __init__(self, vectorstore, llm_loader):
+        super().__init__(llm_loader)
+        self.vectorstore = vectorstore
+    def create_chain(self) -> Chain:
+        if retrieve_from_questions_file:
+            retriever = DatasetRetriever()
+        else:
+            retriever = self.vectorstore.as_retriever(
+                search_kwargs=self.llm_loader.search_kwargs
+            )
+        if os.environ.get("CHAT_HISTORY_ENABLED") == "true":
+            memory = CustomizedConversationSummaryBufferMemory(
+                llm=self.llm_loader.llm,
+                output_key="answer",
+                memory_key="chat_history",
+                max_token_limit=1024,
+                return_messages=True,
+            )
+            qa = ConversationalRetrievalChain.from_llm(
+                self.llm_loader.llm,
+                memory=memory,
+                chain_type="stuff",
+                retriever=retriever,
+                get_chat_history=lambda h: h,
+                return_source_documents=True,
+            )
+        else:
+            qa = ConversationalRetrievalChain.from_llm(
+                self.llm_loader.llm,
+                retriever=retriever,
+                max_tokens_limit=8192,  # self.llm_loader.max_tokens_limit,
+                return_source_documents=True,
+            )
+        return qa
+    def _process_inputs(self, inputs):
+        if isinstance(inputs, list) and self.llm_loader.llm_model_type == "huggingface":
+            inputs = [self.get_prompt(i) for i in inputs]
+        if get_debug():
+            print("_process_inputs:", json.dumps(inputs, indent=4))
+        return inputs
+    def get_prompt(self, inputs):
+        qa_system_prompt = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."
+        df = questions_df
+        query = inputs["question"]
+        # find the query in the df
+        filtered = df[df["question"].str.lower() == query.lower()]
+        context = filtered.iloc[0]["context"]
+        return (
+            f"{qa_system_prompt}\n\n{context}\n\nQuestion: {query}\n\nHelpful Answer:"
+        )

app_modules/utils.py ADDED Viewed

	@@ -0,0 +1,419 @@

+# -*- coding:utf-8 -*-
+from __future__ import annotations
+import json
+import logging
+import os
+import platform
+import re
+from pathlib import Path
+import requests
+import torch
+from tqdm import tqdm
+from langchain.memory import ConversationSummaryBufferMemory
+class LogRecord(logging.LogRecord):
+    def getMessage(self):
+        msg = self.msg
+        if self.args:
+            if isinstance(self.args, dict):
+                msg = msg.format(**self.args)
+            else:
+                msg = msg.format(*self.args)
+        return msg
+class Logger(logging.Logger):
+    def makeRecord(
+        self,
+        name,
+        level,
+        fn,
+        lno,
+        msg,
+        args,
+        exc_info,
+        func=None,
+        extra=None,
+        sinfo=None,
+    ):
+        rv = LogRecord(name, level, fn, lno, msg, args, exc_info, func, sinfo)
+        if extra is not None:
+            for key in extra:
+                rv.__dict__[key] = extra[key]
+        return rv
+def init_settings():
+    logging.setLoggerClass(Logger)
+    logging.basicConfig(
+        level=logging.WARNING,
+        format="%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s",
+    )
+def remove_extra_spaces(text):
+    return re.sub(" +", " ", text.strip())
+def print_llm_response(llm_response, debug_retrieval=True):
+    answer = llm_response["answer"] if "answer" in llm_response else None
+    if answer is None:
+        answer = llm_response["response"] if "response" in llm_response else None
+    if answer is not None:
+        print("\n\n***Answer:")
+        print(answer)
+    source_documents = (
+        llm_response["source_documents"] if "source_documents" in llm_response else None
+    )
+    if source_documents is None:
+        source_documents = (
+            llm_response["sourceDocs"] if "sourceDocs" in llm_response else None
+        )
+    if debug_retrieval and source_documents is not None:
+        print("\nSources:")
+        for index, source in enumerate(source_documents):
+            metadata = source["metadata"] if "metadata" in source else source.metadata
+            if "page" in metadata:
+                print(f" Page:  {metadata['page']}", end="")
+            print(
+                f" Source {index + 1}: "
+                + str(metadata["url"] if "url" in metadata else metadata["source"])
+            )
+            print(
+                source["page_content"]
+                if "page_content" in source
+                else source.page_content
+            )
+    if "chat_history" in llm_response:
+        print("\nChat History:")
+        print(llm_response["chat_history"])
+def get_device_types():
+    print("Running on: ", platform.platform())
+    print("MPS is", "NOT" if not torch.backends.mps.is_available() else "", "available")
+    print("CUDA is", "NOT" if not torch.cuda.is_available() else "", "available")
+    device_type_available = "cpu"
+    if not torch.backends.mps.is_available():
+        if not torch.backends.mps.is_built():
+            print(
+                "MPS not available because the current PyTorch install was not "
+                "built with MPS enabled."
+            )
+        else:
+            print(
+                "MPS not available because the current MacOS version is not 12.3+ "
+                "and/or you do not have an MPS-enabled device on this machine."
+            )
+    else:
+        device_type_available = "mps"
+    if torch.cuda.is_available():
+        print("CUDA is available, we have found ", torch.cuda.device_count(), " GPU(s)")
+        print(torch.cuda.get_device_name(0))
+        print("CUDA version: " + torch.version.cuda)
+        device_type_available = f"cuda:{torch.cuda.current_device()}"
+    return (
+        os.environ.get("HF_EMBEDDINGS_DEVICE_TYPE") or device_type_available,
+        os.environ.get("HF_PIPELINE_DEVICE_TYPE") or device_type_available,
+    )
+def ensure_model_is_downloaded(llm_model_type):
+    if llm_model_type.startswith("gpt4all"):
+        local_path = (
+            os.environ.get("GPT4ALL_J_MODEL_PATH")
+            if llm_model_type == "gpt4all-j"
+            else os.environ.get("GPT4ALL_MODEL_PATH")
+        )
+        url = (
+            os.environ.get("GPT4ALL_J_DOWNLOAD_LINK")
+            if llm_model_type == "gpt4all-j"
+            else os.environ.get("GPT4ALL_DOWNLOAD_LINK")
+        )
+    elif llm_model_type == "llamacpp":
+        local_path = os.environ.get("LLAMACPP_MODEL_PATH")
+        url = os.environ.get("LLAMACPP_DOWNLOAD_LINK")
+    elif llm_model_type == "ctransformers":
+        local_path = os.environ.get("CTRANSFORMERS_MODEL_PATH")
+        url = os.environ.get("CTRANSFORMERS_DOWNLOAD_LINK")
+    else:
+        raise ValueError(f"wrong model typle: {llm_model_type}")
+    path = Path(local_path)
+    if path.is_file():
+        print(f"model: {local_path} exists")
+    else:
+        print(f"downloading model: {local_path} from {url} ...")
+        path.parent.mkdir(parents=True, exist_ok=True)
+        # send a GET request to the URL to download the file. Stream since it's large
+        response = requests.get(url, stream=True)
+        # open the file in binary mode and write the contents of the response to it in chunks
+        # This is a large file, so be prepared to wait.
+        with open(local_path, "wb") as f:
+            for chunk in tqdm(response.iter_content(chunk_size=8192)):
+                if chunk:
+                    f.write(chunk)
+    return local_path
+class CustomizedConversationSummaryBufferMemory(ConversationSummaryBufferMemory):
+    def save_context(self, inputs, outputs) -> None:
+        for key in outputs:
+            if isinstance(outputs[key], str):
+                outputs[key] = outputs[key].replace("<|im_end|>", "")
+        return super().save_context(inputs, outputs)
+    def predict_new_summary(self, messages, existing_summary) -> str:
+        return (
+            super()
+            .predict_new_summary(messages, existing_summary)
+            .replace("<|im_end|>", "")
+        )
+def CalculateDistance(entry1, entry2, distance_calculator):
+    if entry1 == entry2:
+        return 0
+    distance = distance_calculator.evaluate_string_pairs(
+        prediction=entry1, prediction_b=entry2
+    )
+    # print(f"entry1: {entry1}, entry2: {entry2}, distance: {distance['score']}")
+    return distance["score"]
+def FindInList(entry, elist, distance_calculator=None, debug=False):
+    for item in elist:
+        if distance_calculator is not None:
+            distance = CalculateDistance(entry, item, distance_calculator)
+            if distance < distance_threshold:
+                if debug:
+                    print(
+                        f"FindInList - matched by distance {distance:.3f}: {entry} - {item}"
+                    )
+                return True
+        if entry == item:
+            return True
+    return False
+def CalculatePRF1F2(
+    goldAnswerList, predAnswerList, distance_calculator=None, debug=False
+):
+    if len(goldAnswerList) == 0:
+        if len(predAnswerList) == 0:
+            return [
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+            ]  # consider it 'correct' when there is no labeled answer, and also no predicted answer
+        else:
+            return [
+                0.0,
+                1.0,
+                0.0,
+                0.0,
+            ]  # precision=0 and recall=1 when there is no labeled answer, but has some predicted answer(s)
+    elif len(predAnswerList) == 0:
+        return [
+            1.0,
+            0.0,
+            0.0,
+            0.0,
+        ]  # precision=1 and recall=0 when there is labeled answer(s), but no predicted answer
+    else:
+        glist = goldAnswerList
+        plist = predAnswerList
+        tp = 1e-40  # numerical trick
+        fp = 0.0
+        fn = 0.0
+        for gentry in glist:
+            if FindInList(
+                gentry, plist, distance_calculator=distance_calculator, debug=True
+            ):
+                tp += 1
+            else:
+                fn += 1
+        for pentry in plist:
+            if not FindInList(pentry, glist, distance_calculator=distance_calculator):
+                fp += 1
+        precision = tp / (tp + fp)
+        recall = tp / (tp + fn)
+        f1 = (2 * precision * recall) / (precision + recall)
+        f2 = (5 * precision * recall) / (4 * precision + recall)
+        return [precision, recall, f1, f2]
+nlp = None
+distance_threshold = 0.05
+def load_spacy_model():
+    import spacy
+    global nlp
+    if nlp is not None:
+        return nlp
+    global distance_threshold
+    distance_threshold = float(os.getenv("DISTANCE_THRESHOLD", "0.05"))
+    spacy_model_name = os.getenv("SPACY_MODEL_NAME", "en_core_web_trf")
+    while True:
+        try:
+            print(f"loading spacy model from {spacy_model_name}")
+            nlp = spacy.load(spacy_model_name)
+            print(f"loaded spacy model from {spacy_model_name}")
+            return nlp
+        except OSError:
+            print(f"downloading spacy model {spacy_model_name}")
+            spacy.cli.download(spacy_model_name)
+            print(f"downloaded spacy model {spacy_model_name}")
+def clean_text(text):
+    text = text.lower()
+    text = text.replace('"', "")
+    text = text.replace(".", "")
+    # text = text.replace("ō", "o")
+    return text
+def get_entities_in_text(text, debug=False):
+    nlp = load_spacy_model()
+    doc = nlp(text)
+    entities_in_text = []
+    for word in doc.ents:
+        if debug:
+            print(word.text, word.label_)
+        entity = clean_text(word.text)
+        if entity not in entities_in_text:
+            entities_in_text.append(entity)
+    entities_in_text.sort()
+    return entities_in_text
+def calculate_metrics(question, answer, distance_calculator=None, debug=False):
+    ground_truth = question["answers"]
+    ground_truth.sort()
+    if debug:
+        print(f"question: {question}")
+        print(f"answer: {answer}")
+        print("entities_in_question ---------------")
+        entities_in_question = get_entities_in_text(question["question"], debug)
+        print("entities_in_answer -----------------")
+        entities_in_answer = get_entities_in_text(answer, debug)
+        print("done with NER with spaCy -----------")
+        entities_in_answer.sort()
+        predAnswerList = [
+            pentry
+            for pentry in entities_in_answer
+            if not FindInList(pentry, entities_in_question)
+        ]
+        print(f"entities_in_question: {entities_in_question}")
+        print(f"entities_in_answer: {entities_in_answer}")
+        print(f"ground_truth: {ground_truth}")
+        print(f"pred_answers: {predAnswerList}")
+        precision, recall, f1, f2 = CalculatePRF1F2(
+            ground_truth,
+            predAnswerList,
+            debug=debug,
+            distance_calculator=distance_calculator,
+        )
+        print(f"precision: {precision}, recall: {recall}, f1: {f1}, f2: {f2}")
+    else:
+        precision = 0.0
+        recall = 0.0
+        f1 = 0.0
+        f2 = 0.0
+        entities_in_answer = []
+        entities_in_question = []
+    return (
+        precision,
+        recall,
+        f1,
+        f2,
+        entities_in_answer,
+        ground_truth,
+        entities_in_question,
+    )
+def calculate_metrics_gemini(question, answer, debug=False):
+    precision = 0.0
+    recall = 0.0
+    f1 = 0.0
+    return (precision, recall, f1)
+if __name__ == "__main__":
+    from langchain_community.embeddings import HuggingFaceInstructEmbeddings
+    from langchain.evaluation import load_evaluator
+    hf_embeddings_device_type, hf_pipeline_device_type = get_device_types()
+    print(f"hf_embeddings_device_type: {hf_embeddings_device_type}")
+    print(f"hf_pipeline_device_type: {hf_pipeline_device_type}")
+    hf_embeddings_model_name = "hkunlp/instructor-large"
+    print(f"hf_embeddings_model_name: {hf_embeddings_model_name}")
+    embeddings = HuggingFaceInstructEmbeddings(
+        model_name=hf_embeddings_model_name,
+        model_kwargs={"device": hf_embeddings_device_type},
+    )
+    hf_evaluator = load_evaluator("pairwise_embedding_distance", embeddings=embeddings)
+    question = {
+        "question": "what does jamaican people speak",
+        "entities_in_question": ["jamaican"],
+        "answers": ["jamaican english", "jamaican creole english language"],
+    }
+    answer = "Jamaican people primarily speak Jamaican Patois, which is an English-based creole language with significant West African influences. It is spoken as a native language by the majority of Jamaicans and also exists in various forms among Jamaican expatriates and non-Jamaicans in different parts of the world. The phonology of Jamaican Patois includes around 21 consonants (with some dialectal variation regarding the status of /h/ as a phoneme) and between nine and sixteen vowels, some of which are capable of nasalization or lengthening. There are also instances of palatalization in Jamaican Patois, where certain consonants appear to be phonemic in some dialects but may be considered phonetic in others. For example, the palatal stops [c], [ɟ], and [ɲ] may be analyzed as phonemes or as instances of phonetic palatalization depending on the account."
+    calculate_metrics(question, answer, distance_calculator=hf_evaluator, debug=True)
+    question = {
+        "question": "who is governor of ohio 2011",
+        "entities_in_question": ["2011"],
+        "answers": ["john kasich", "return j. meigs, jr.", "ted strickland"],
+    }
+    answer = "The lieutenant governor of Ohio in 2011 was Mary Taylor, who served alongside Governor John Kasich. She assumed office on January 10, 2011, after being elected as the lieutenant governor in the 2010 election. During her tenure, she faced criticism for using the state airplane for personal errands and reportedly had high turnover among her staff."
+    calculate_metrics(question, answer, distance_calculator=hf_evaluator, debug=True)
+    question = {
+        "question": "where is the fukushima daiichi nuclear power station",
+        "entities_in_question": ["the fukushima daiichi nuclear power station"],
+        "answers": ["japan", "okuma"],
+    }
+    answer = "The Fukushima Daiichi Nuclear Power Station is located in the towns of Ōkuma and Futaba in Fukushima Prefecture, Japan."
+    calculate_metrics(question, answer, distance_calculator=hf_evaluator, debug=True)

data/datasets/ms_macro.json ADDED Viewed

The diff for this file is too large to render. See raw diff

notebook/01_Data_Preprocessing.ipynb ADDED Viewed

	@@ -0,0 +1,813 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "a6d96660",
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "workding dir: /Users/inflaton/code/emtech/gpt/llm-qa-bench\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "from pathlib import Path\n",
+    "\n",
+    "workding_dir = str(Path.cwd().parent)\n",
+    "os.chdir(workding_dir)\n",
+    "sys.path.append(workding_dir)\n",
+    "print(\"workding dir:\", workding_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "b72bf3f9",
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/inflaton/anaconda3/envs/llm-qa-bench/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Dataset({\n",
+       "    features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],\n",
+       "    num_rows: 500\n",
+       "})"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from datasets import load_from_disk\n",
+    "\n",
+    "new_ds = load_from_disk(\"./Llama-2-eval/data/datasets/ms_macro/\")\n",
+    "new_ds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "051bd771",
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "({'NUMERIC': 100,\n",
+       "  'DESCRIPTION': 100,\n",
+       "  'ENTITY': 100,\n",
+       "  'PERSON': 100,\n",
+       "  'LOCATION': 100},\n",
+       " {'NUMERIC': 179,\n",
+       "  'DESCRIPTION': 215,\n",
+       "  'ENTITY': 443,\n",
+       "  'LOCATION': 461,\n",
+       "  'PERSON': 499})"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "counts = {}\n",
+    "indices = {}\n",
+    "size = 100\n",
+    "for i in range(new_ds.num_rows):\n",
+    "    row = new_ds[i]\n",
+    "    query_type = row[\"query_type\"]\n",
+    "    if query_type in counts:\n",
+    "        counts[query_type] += 1\n",
+    "    else:\n",
+    "        counts[query_type] = 1\n",
+    "    if counts[query_type] == size:\n",
+    "        indices[query_type] = i\n",
+    "counts, indices"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "db48dcc4",
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [],
+   "source": [
+    "df = new_ds.to_pandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "a39dea83",
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>answers</th>\n",
+       "      <th>passages</th>\n",
+       "      <th>query</th>\n",
+       "      <th>query_id</th>\n",
+       "      <th>query_type</th>\n",
+       "      <th>wellFormedAnswers</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>[2,662]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>albany mn population</td>\n",
+       "      <td>15177</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
+       "      <td>current weather in volcano, ca</td>\n",
+       "      <td>114414</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>[Hippocrates]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
+       "      <td>____________________ is considered the father ...</td>\n",
+       "      <td>9083</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[Hippocrates is considered the father of moder...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>[120 days from the date of the Note.]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>how many days is an appraisal good for a fanni...</td>\n",
+       "      <td>281439</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[An appraisal is good for 120 days from the da...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>[From $26,000 to $39,000 a year]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>average pharmacy tech salary</td>\n",
+       "      <td>40287</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The average salary for a pharmacy technician ...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                             answers  \\\n",
+       "0                                            [2,662]   \n",
+       "1  [The Volcano forecast for Apr 12 is 52 degrees...   \n",
+       "2                                      [Hippocrates]   \n",
+       "3              [120 days from the date of the Note.]   \n",
+       "4                   [From $26,000 to $39,000 a year]   \n",
+       "\n",
+       "                                            passages  \\\n",
+       "0  {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...   \n",
+       "1  {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...   \n",
+       "2  {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...   \n",
+       "3  {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "4  {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "\n",
+       "                                               query  query_id   query_type  \\\n",
+       "0                               albany mn population     15177      NUMERIC   \n",
+       "1                     current weather in volcano, ca    114414  DESCRIPTION   \n",
+       "2  ____________________ is considered the father ...      9083  DESCRIPTION   \n",
+       "3  how many days is an appraisal good for a fanni...    281439      NUMERIC   \n",
+       "4                       average pharmacy tech salary     40287      NUMERIC   \n",
+       "\n",
+       "                                   wellFormedAnswers  \n",
+       "0   [The population of Albany, Minnesota is 2,662. ]  \n",
+       "1  [The Volcano forecast for Apr 12 is 52 degrees...  \n",
+       "2  [Hippocrates is considered the father of moder...  \n",
+       "3  [An appraisal is good for 120 days from the da...  \n",
+       "4  [The average salary for a pharmacy technician ...  "
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "7f0d2ca1",
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [],
+   "source": [
+    "df.rename(columns={\"query\": \"question\", \"query_id\": \"id\"}, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "e1cde4c1",
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>answers</th>\n",
+       "      <th>passages</th>\n",
+       "      <th>question</th>\n",
+       "      <th>id</th>\n",
+       "      <th>query_type</th>\n",
+       "      <th>wellFormedAnswers</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>[2,662]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>albany mn population</td>\n",
+       "      <td>15177</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
+       "      <td>current weather in volcano, ca</td>\n",
+       "      <td>114414</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>[Hippocrates]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
+       "      <td>____________________ is considered the father ...</td>\n",
+       "      <td>9083</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[Hippocrates is considered the father of moder...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>[120 days from the date of the Note.]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>how many days is an appraisal good for a fanni...</td>\n",
+       "      <td>281439</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[An appraisal is good for 120 days from the da...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>[From $26,000 to $39,000 a year]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>average pharmacy tech salary</td>\n",
+       "      <td>40287</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The average salary for a pharmacy technician ...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                             answers  \\\n",
+       "0                                            [2,662]   \n",
+       "1  [The Volcano forecast for Apr 12 is 52 degrees...   \n",
+       "2                                      [Hippocrates]   \n",
+       "3              [120 days from the date of the Note.]   \n",
+       "4                   [From $26,000 to $39,000 a year]   \n",
+       "\n",
+       "                                            passages  \\\n",
+       "0  {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...   \n",
+       "1  {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...   \n",
+       "2  {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...   \n",
+       "3  {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "4  {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "\n",
+       "                                            question      id   query_type  \\\n",
+       "0                               albany mn population   15177      NUMERIC   \n",
+       "1                     current weather in volcano, ca  114414  DESCRIPTION   \n",
+       "2  ____________________ is considered the father ...    9083  DESCRIPTION   \n",
+       "3  how many days is an appraisal good for a fanni...  281439      NUMERIC   \n",
+       "4                       average pharmacy tech salary   40287      NUMERIC   \n",
+       "\n",
+       "                                   wellFormedAnswers  \n",
+       "0   [The population of Albany, Minnesota is 2,662. ]  \n",
+       "1  [The Volcano forecast for Apr 12 is 52 degrees...  \n",
+       "2  [Hippocrates is considered the father of moder...  \n",
+       "3  [An appraisal is good for 120 days from the da...  \n",
+       "4  [The average salary for a pharmacy technician ...  "
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "89494c3d",
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "\n",
+    "def generate_context(row, debug=False):\n",
+    "    passages = row[\"passages\"]\n",
+    "    if debug:\n",
+    "        print(\"question:\", row[\"question\"])\n",
+    "        print(passages)\n",
+    "\n",
+    "    passage_text = passages[\"passage_text\"]\n",
+    "    context = \"\\n\\n\".join(passage_text)\n",
+    "\n",
+    "    return context"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "0dc959f6",
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "question: albany mn population\n",
+      "{'is_selected': array([0, 0, 0, 1, 0, 0, 0, 0], dtype=int32), 'passage_text': array(['City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.',\n",
+      "       'Place of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.',\n",
+      "       'For the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.',\n",
+      "       'Albany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.',\n",
+      "       'Sponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.',\n",
+      "       'Recent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).',\n",
+      "       \"For population 25 years and over in 56307: 1  High school or higher: 87.4%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\",\n",
+      "       \"For population 25 years and over in Albany: 1  High school or higher: 86.7%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\"],\n",
+      "      dtype=object), 'url': array(['http://zipcode.org/city/MN/ALBANY',\n",
+      "       'http://www.city-data.com/zips/56307.html',\n",
+      "       'https://en.wikipedia.org/wiki/Albany,_Minnesota',\n",
+      "       'http://ci.albany.mn.us/index.asp?SEC=A8341FEC-6B8C-47D2-926B-75A89ED4C539&Type=B_BASIC',\n",
+      "       'https://www.mapquest.com/us/mn/albany-282023394',\n",
+      "       'http://www.city-data.com/city/Albany-Minnesota.html',\n",
+      "       'http://www.city-data.com/zips/56307.html',\n",
+      "       'http://www.city-data.com/city/Albany-Minnesota.html'],\n",
+      "      dtype=object)}\n",
+      "City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\n",
+      "\n",
+      "Place of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\n",
+      "\n",
+      "For the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\n",
+      "\n",
+      "Albany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\n",
+      "\n",
+      "Sponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\n",
+      "\n",
+      "Recent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\n",
+      "\n",
+      "For population 25 years and over in 56307: 1  High school or higher: 87.4%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\n",
+      "\n",
+      "For population 25 years and over in Albany: 1  High school or higher: 86.7%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\n",
+      "CPU times: user 255 µs, sys: 41 µs, total: 296 µs\n",
+      "Wall time: 294 µs\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "context = generate_context(df.iloc[0], debug=True)\n",
+    "print(context)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "d887d92e",
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 500/500 [00:00<00:00, 213125.20it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 3.19 ms, sys: 1.47 ms, total: 4.67 ms\n",
+      "Wall time: 4.01 ms\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "from tqdm import tqdm\n",
+    "\n",
+    "tqdm.pandas()\n",
+    "\n",
+    "df[\"context\"] = df.progress_apply(\n",
+    "    generate_context, axis=1\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "dfdf1d5a",
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>answers</th>\n",
+       "      <th>passages</th>\n",
+       "      <th>question</th>\n",
+       "      <th>id</th>\n",
+       "      <th>query_type</th>\n",
+       "      <th>wellFormedAnswers</th>\n",
+       "      <th>context</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>[2,662]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...</td>\n",
+       "      <td>albany mn population</td>\n",
+       "      <td>15177</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
+       "      <td>City of Albany, MN Zip Codes. City of Albany, ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...</td>\n",
+       "      <td>current weather in volcano, ca</td>\n",
+       "      <td>114414</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>Volcano 10 Day Weather. Sunday:The Volcano for...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>[Hippocrates]</td>\n",
+       "      <td>{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...</td>\n",
+       "      <td>____________________ is considered the father ...</td>\n",
+       "      <td>9083</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "      <td>[Hippocrates is considered the father of moder...</td>\n",
+       "      <td>Hippocrates is widely considered to be the Fat...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>[120 days from the date of the Note.]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>how many days is an appraisal good for a fanni...</td>\n",
+       "      <td>281439</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[An appraisal is good for 120 days from the da...</td>\n",
+       "      <td>New and Updated Underwriting and Eligibility P...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>[From $26,000 to $39,000 a year]</td>\n",
+       "      <td>{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...</td>\n",
+       "      <td>average pharmacy tech salary</td>\n",
+       "      <td>40287</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "      <td>[The average salary for a pharmacy technician ...</td>\n",
+       "      <td>If you are interested in becoming a pharmacy t...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                             answers  \\\n",
+       "0                                            [2,662]   \n",
+       "1  [The Volcano forecast for Apr 12 is 52 degrees...   \n",
+       "2                                      [Hippocrates]   \n",
+       "3              [120 days from the date of the Note.]   \n",
+       "4                   [From $26,000 to $39,000 a year]   \n",
+       "\n",
+       "                                            passages  \\\n",
+       "0  {'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...   \n",
+       "1  {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...   \n",
+       "2  {'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...   \n",
+       "3  {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "4  {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...   \n",
+       "\n",
+       "                                            question      id   query_type  \\\n",
+       "0                               albany mn population   15177      NUMERIC   \n",
+       "1                     current weather in volcano, ca  114414  DESCRIPTION   \n",
+       "2  ____________________ is considered the father ...    9083  DESCRIPTION   \n",
+       "3  how many days is an appraisal good for a fanni...  281439      NUMERIC   \n",
+       "4                       average pharmacy tech salary   40287      NUMERIC   \n",
+       "\n",
+       "                                   wellFormedAnswers  \\\n",
+       "0   [The population of Albany, Minnesota is 2,662. ]   \n",
+       "1  [The Volcano forecast for Apr 12 is 52 degrees...   \n",
+       "2  [Hippocrates is considered the father of moder...   \n",
+       "3  [An appraisal is good for 120 days from the da...   \n",
+       "4  [The average salary for a pharmacy technician ...   \n",
+       "\n",
+       "                                             context  \n",
+       "0  City of Albany, MN Zip Codes. City of Albany, ...  \n",
+       "1  Volcano 10 Day Weather. Sunday:The Volcano for...  \n",
+       "2  Hippocrates is widely considered to be the Fat...  \n",
+       "3  New and Updated Underwriting and Eligibility P...  \n",
+       "4  If you are interested in becoming a pharmacy t...  "
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "8a1050b9",
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>question</th>\n",
+       "      <th>answers</th>\n",
+       "      <th>wellFormedAnswers</th>\n",
+       "      <th>context</th>\n",
+       "      <th>query_type</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>15177</td>\n",
+       "      <td>albany mn population</td>\n",
+       "      <td>[2,662]</td>\n",
+       "      <td>[The population of Albany, Minnesota is 2,662. ]</td>\n",
+       "      <td>City of Albany, MN Zip Codes. City of Albany, ...</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>114414</td>\n",
+       "      <td>current weather in volcano, ca</td>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>[The Volcano forecast for Apr 12 is 52 degrees...</td>\n",
+       "      <td>Volcano 10 Day Weather. Sunday:The Volcano for...</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>9083</td>\n",
+       "      <td>____________________ is considered the father ...</td>\n",
+       "      <td>[Hippocrates]</td>\n",
+       "      <td>[Hippocrates is considered the father of moder...</td>\n",
+       "      <td>Hippocrates is widely considered to be the Fat...</td>\n",
+       "      <td>DESCRIPTION</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>281439</td>\n",
+       "      <td>how many days is an appraisal good for a fanni...</td>\n",
+       "      <td>[120 days from the date of the Note.]</td>\n",
+       "      <td>[An appraisal is good for 120 days from the da...</td>\n",
+       "      <td>New and Updated Underwriting and Eligibility P...</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>40287</td>\n",
+       "      <td>average pharmacy tech salary</td>\n",
+       "      <td>[From $26,000 to $39,000 a year]</td>\n",
+       "      <td>[The average salary for a pharmacy technician ...</td>\n",
+       "      <td>If you are interested in becoming a pharmacy t...</td>\n",
+       "      <td>NUMERIC</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       id                                           question  \\\n",
+       "0   15177                               albany mn population   \n",
+       "1  114414                     current weather in volcano, ca   \n",
+       "2    9083  ____________________ is considered the father ...   \n",
+       "3  281439  how many days is an appraisal good for a fanni...   \n",
+       "4   40287                       average pharmacy tech salary   \n",
+       "\n",
+       "                                             answers  \\\n",
+       "0                                            [2,662]   \n",
+       "1  [The Volcano forecast for Apr 12 is 52 degrees...   \n",
+       "2                                      [Hippocrates]   \n",
+       "3              [120 days from the date of the Note.]   \n",
+       "4                   [From $26,000 to $39,000 a year]   \n",
+       "\n",
+       "                                   wellFormedAnswers  \\\n",
+       "0   [The population of Albany, Minnesota is 2,662. ]   \n",
+       "1  [The Volcano forecast for Apr 12 is 52 degrees...   \n",
+       "2  [Hippocrates is considered the father of moder...   \n",
+       "3  [An appraisal is good for 120 days from the da...   \n",
+       "4  [The average salary for a pharmacy technician ...   \n",
+       "\n",
+       "                                             context   query_type  \n",
+       "0  City of Albany, MN Zip Codes. City of Albany, ...      NUMERIC  \n",
+       "1  Volcano 10 Day Weather. Sunday:The Volcano for...  DESCRIPTION  \n",
+       "2  Hippocrates is widely considered to be the Fat...  DESCRIPTION  \n",
+       "3  New and Updated Underwriting and Eligibility P...      NUMERIC  \n",
+       "4  If you are interested in becoming a pharmacy t...      NUMERIC  "
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# reordering columns\n",
+    "df = df[[\"id\", \"question\", \"answers\", \"wellFormedAnswers\", \"context\", \"query_type\"]]\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "24a818ba",
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [],
+   "source": [
+    "# save df to json with indent=4\n",
+    "df.to_json(\n",
+    "    \"./data/datasets/ms_macro.json\", orient=\"records\", indent=4\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

qa_chain_test.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import json
+import os
+import sys
+import pandas as pd
+from timeit import default_timer as timer
+import nltk
+chatting = len(sys.argv) > 1 and sys.argv[1] == "chat"
+if chatting:
+    os.environ["BATCH_SIZE"] = "1"
+from app_modules.init import app_init
+from app_modules.llm_qa_chain import QAChain
+from app_modules.utils import print_llm_response
+llm_loader, qa_chain = app_init()
+if chatting:
+    print("Starting chat mode")
+    while True:
+        question = input("Please enter your question: ")
+        if question.lower() == "exit":
+            break
+        result = qa_chain.call_chain({"question": question, "chat_history": []}, None)
+        print_llm_response(result)
+    sys.exit(0)
+num_of_questions = 0
+if len(sys.argv) > 1:
+    num_of_questions = int(sys.argv[1])
+# Create an empty DataFrame with column names
+df = pd.DataFrame(
+    columns=[
+        "id",
+        "question",
+        "answer",
+    ]
+)
+batch_size = int(os.getenv("BATCH_SIZE", "1"))
+print(f"Batch size: {batch_size}")
+questions_file_path = os.environ.get("QUESTIONS_FILE_PATH")
+debug_retrieval = os.getenv("DEBUG_RETRIEVAL", "false").lower() == "true"
+# Open the file for reading
+print(f"Reading questions from file: {questions_file_path}")
+test_data = json.loads(open(questions_file_path).read())
+if isinstance(test_data, dict):
+    questions = [test_data[key] for key in test_data.keys()]
+    ids = [key for key in test_data.keys()]
+else:
+    questions = test_data
+    ids = [row["id"] for row in questions]
+if num_of_questions > 0:
+    questions = questions[:num_of_questions]
+print(f"Number of questions: {len(questions)}")
+if __name__ == "__main__":
+    chat_start = timer()
+    index = 0
+    while index < len(questions):
+        batch_ids = ids[index : index + batch_size]
+        batch_questions = [q["question"] for q in questions[index : index + batch_size]]
+        if isinstance(qa_chain, QAChain):
+            inputs = [{"question": q, "chat_history": []} for q in batch_questions]
+        else:
+            inputs = [{"question": q} for q in batch_questions]
+        start = timer()
+        result = qa_chain.call_chain(inputs, None)
+        end = timer()
+        print(f"Completed in {end - start:.3f}s")
+        # print("result:", result)
+        batch_answers = [r["answer"] for r in result]
+        for id, question, answer in zip(batch_ids, batch_questions, batch_answers):
+            df.loc[len(df)] = {
+                "id": id,
+                "question": question,
+                "answer": answer,
+            }
+        index += batch_size
+        for r in result:
+            print_llm_response(r, debug_retrieval)
+    chat_end = timer()
+    total_time = chat_end - chat_start
+    print(f"Total time used: {total_time:.3f} s")
+    df2 = pd.DataFrame(
+        columns=[
+            "id",
+            "question",
+            "answer",
+            "word_count",
+            "ground_truth",
+        ]
+    )
+    for i in range(len(df)):
+        question = questions[i]
+        answer = df["answer"][i]
+        query = df["question"][i]
+        id = df["id"][i]
+        ground_truth = question["answers"]
+        word_count = len(nltk.word_tokenize(answer))
+        df2.loc[len(df2)] = {
+            "id": id,
+            "question": query,
+            "answer": answer,
+            "word_count": word_count,
+            "ground_truth": ground_truth,
+        }
+    pd.options.display.float_format = "{:.3f}".format
+    print(df2.describe())
+    word_count = df2["word_count"].sum()
+    csv_file = (
+        os.getenv("TEST_RESULTS_CSV_FILE") or f"qa_batch_{batch_size}_test_results.csv"
+    )
+    with open(csv_file, "w") as f:
+        f.write(
+            f"# RAG: {isinstance(qa_chain, QAChain)} questions: {questions_file_path}\n"
+        )
+        f.write(
+            f"# model: {llm_loader.model_name} repetition_penalty: {llm_loader.repetition_penalty}\n"
+        )
+    df2.to_csv(csv_file, mode="a", index=False, header=True)
+    print(f"test results saved to file: {csv_file}")
+    df = pd.DataFrame(
+        {
+            "model": [llm_loader.model_name],
+            "repetition_penalty": [llm_loader.repetition_penalty],
+            "word_count": [word_count],
+            "inference_time": [total_time],
+            "inference_speed": [word_count / total_time],
+        }
+    )
+    print(f"Number of words generated: {word_count}")
+    print(f"Average generation speed: {word_count / total_time:.3f} words/s")
+    csv_file = os.getenv("ALL_RESULTS_CSV_FILE") or "qa_chain_all_results.csv"
+    file_existed = os.path.exists(csv_file) and os.path.getsize(csv_file) > 0
+    df.to_csv(csv_file, mode="a", index=False, header=not file_existed)
+    print(f"all results appended to file: {csv_file}")

requirements.txt CHANGED Viewed

@@ -1,8 +1,12 @@
-gradio
-spaces
-torch==2.2.0
-git+https://github.com/huggingface/transformers/
-optimum
-accelerate
-bitsandbytes

+nltk==3.8.1
+langchain==0.1.16
+langchain-openai==0.1.3
+langchain_google_genai==1.0.2
+transformers==4.40.1
+accelerate==0.29.3
+python-dotenv==1.0.1
+gradio==4.26.0
+spaces==0.27.1
+black==24.4.0
+chardet==5.2.0
+sentencepiece==0.2.0