{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# h2oGPT API call example\n",
    "\n",
    "Documentation: https://github.com/h2oai/h2ogpt/blob/main/docs/README_CLIENT.md\n",
    "\n",
    "Good summary of many of the parameters can be found in the [`grclient.py`](https://github.com/h2oai/h2ogpt/blob/main/gradio_utils/grclient.py) \n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "One can interact with Gradio Client by using either native client or h2oGPT wrapper: \n",
    "\n",
    "- Using Gradio \\'s native client:\n",
    "\n",
    "  ```python\n",
    "  from gradio_client import Client\n",
    "  import ast\n",
    "  \n",
    "  HOST_URL = \"http://localhost:7860\"\n",
    "  client = Client(HOST_URL)\n",
    "  \n",
    "  # string of dict for input\n",
    "  kwargs = dict(instruction_nochat='Who are you?')\n",
    "  res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
    "  \n",
    "  # string of dict for output\n",
    "  response = ast.literal_eval(res)['response']\n",
    "  print(response)\n",
    "  ```\n",
    "\n",
    "- Using [h2oGPT wrapper for Gradio Native Client](https://github.com/h2oai/h2ogpt/blob/main/docs/README_CLIENT.md#h2ogpt-gradio-wrapper)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded h2oGPT details\n"
     ]
    }
   ],
   "source": [
    "from gradio_client import Client\n",
    "import ast\n",
    "from pprint import pprint\n",
    "import json\n",
    "from tqdm import tqdm\n",
    "from enum import Enum\n",
    "\n",
    "class LangChainAction(Enum):\n",
    "    \"\"\"LangChain action\"\"\"\n",
    "    QUERY = \"Query\"\n",
    "    SUMMARIZE_MAP = \"Summarize\"\n",
    "    \n",
    "\n",
    "with open('../tokens/h2oGPT_details.txt') as f:\n",
    "    gpt_details = json.load(f)\n",
    "    print(\"Loaded h2oGPT details\")\n",
    "\n",
    "# HOST_URL = \"http://localhost:7860\"\n",
    "HOST_URL = gpt_details[\"gpt_host_url\"]\n",
    "H2OGPT_KEY = gpt_details[\"h2ogpt_key\"]\n",
    "LANGCHAIN_MODE = langchain_mode = 'UserData4'\n",
    "\n",
    "client = Client(HOST_URL)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Utility functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import shutil\n",
    "import uuid\n",
    "import requests\n",
    "from requests.exceptions import HTTPError\n",
    "import contextlib\n",
    "\n",
    "\n",
    "def print_full_model_response(response):\n",
    "    '''\n",
    "    Helper function to print full response from the h2oGPT call, including all parameters.\n",
    "        Important keys/parameters:\n",
    "        - `base_model` - model that used to answer the API call\n",
    "        - `extra_dict` - model parameters that were used to answer the API call\n",
    "        - `prompt` - actual prompt sent to LLM\n",
    "        - `where_from` - how hosted model is running: vLLM , tensor, ....\n",
    "    '''\n",
    "    print(\"Model Response with Parameters:\\n\")\n",
    "    save_dict = ast.literal_eval(res)['save_dict']\n",
    "    # Remove key from extra_dict\n",
    "    save_dict.pop('h2ogpt_key', None)\n",
    "    pprint(save_dict)\n",
    "    print(\"\\n\")\n",
    "    try:\n",
    "        sources = ast.literal_eval(response)['sources']\n",
    "        print(\"Sources:\\n\")\n",
    "        pprint(sources)\n",
    "        print(\"\\n\")\n",
    "    except:\n",
    "        print(\"No sources\\n\")\n",
    "\n",
    "\n",
    "def makedirs(path, exist_ok=True, tmp_ok=False, use_base=False):\n",
    "    \"\"\"\n",
    "    Avoid some inefficiency in os.makedirs()\n",
    "    :param path:\n",
    "    :param exist_ok:\n",
    "    :param tmp_ok:  use /tmp if can't write locally\n",
    "    :param use_base:\n",
    "    :return:\n",
    "    \"\"\"\n",
    "    if path is None:\n",
    "        return path\n",
    "    # if base path set, make relative to that, unless user_path absolute path\n",
    "    if use_base:\n",
    "        if os.path.normpath(path) == os.path.normpath(os.path.abspath(path)):\n",
    "            pass\n",
    "        else:\n",
    "            if os.getenv('H2OGPT_BASE_PATH') is not None:\n",
    "                base_dir = os.path.normpath(os.getenv('H2OGPT_BASE_PATH'))\n",
    "                path = os.path.normpath(path)\n",
    "                if not path.startswith(base_dir):\n",
    "                    path = os.path.join(os.getenv('H2OGPT_BASE_PATH', ''), path)\n",
    "                    path = os.path.normpath(path)\n",
    "\n",
    "    if os.path.isdir(path) and os.path.exists(path):\n",
    "        assert exist_ok, \"Path already exists\"\n",
    "        return path\n",
    "    try:\n",
    "        os.makedirs(path, exist_ok=exist_ok)\n",
    "        return path\n",
    "    except FileExistsError:\n",
    "        # e.g. soft link\n",
    "        return path\n",
    "    except PermissionError:\n",
    "        if tmp_ok:\n",
    "            path0 = path\n",
    "            path = os.path.join('/tmp/', path)\n",
    "            print(\"Permission denied to %s, using %s instead\" % (path0, path), flush=True)\n",
    "            os.makedirs(path, exist_ok=exist_ok)\n",
    "            return path\n",
    "        else:\n",
    "            raise\n",
    "\n",
    "        \n",
    "def shutil_rmtree(*args, **kwargs):\n",
    "    return shutil.rmtree(*args, **kwargs)\n",
    "\n",
    "\n",
    "def remove(path: str):\n",
    "    try:\n",
    "        if path is not None and os.path.exists(path):\n",
    "            if os.path.isdir(path):\n",
    "                shutil_rmtree(path, ignore_errors=True)\n",
    "            else:\n",
    "                with contextlib.suppress(FileNotFoundError):\n",
    "                    os.remove(path)\n",
    "    except:\n",
    "        pass\n",
    "\n",
    "\n",
    "def atomic_move_simple(src, dst):\n",
    "    try:\n",
    "        shutil.move(src, dst)\n",
    "    except (shutil.Error, FileExistsError):\n",
    "        pass\n",
    "    remove(src)\n",
    "\n",
    "\n",
    "def download_simple(url, dest=None, overwrite=False, verbose=False):\n",
    "    if dest is None:\n",
    "        dest = os.path.basename(url)\n",
    "    base_path = os.path.dirname(dest)\n",
    "    if base_path:  # else local path\n",
    "        base_path = makedirs(base_path, exist_ok=True, tmp_ok=True, use_base=True)\n",
    "        dest = os.path.join(base_path, os.path.basename(dest))\n",
    "\n",
    "    if os.path.isfile(dest):\n",
    "        if not overwrite:\n",
    "            print(\"Already have %s from url %s, delete file if invalid\" % (dest, str(url)), flush=True)\n",
    "            return dest\n",
    "        else:\n",
    "            remove(dest)\n",
    "\n",
    "    if verbose:\n",
    "        print(\"BEGIN get url %s\" % str(url), flush=True)\n",
    "    if url.startswith(\"file://\"):\n",
    "        from requests_file import FileAdapter\n",
    "        s = requests.Session()\n",
    "        s.mount('file://', FileAdapter())\n",
    "        url_data = s.get(url, stream=True)\n",
    "    else:\n",
    "        url_data = requests.get(url, stream=True)\n",
    "    if verbose:\n",
    "        print(\"GOT url %s\" % str(url), flush=True)\n",
    "\n",
    "    if url_data.status_code != requests.codes.ok:\n",
    "        msg = \"Cannot get url %s, code: %s, reason: %s\" % (\n",
    "            str(url),\n",
    "            str(url_data.status_code),\n",
    "            str(url_data.reason),\n",
    "        )\n",
    "        raise requests.exceptions.RequestException(msg)\n",
    "    url_data.raw.decode_content = True\n",
    "\n",
    "    uuid_tmp = str(uuid.uuid4())[:6]\n",
    "    dest_tmp = dest + \"_dl_\" + uuid_tmp + \".tmp\"\n",
    "    with open(dest_tmp, \"wb\") as f:\n",
    "        shutil.copyfileobj(url_data.raw, f)\n",
    "    atomic_move_simple(dest_tmp, dest)\n",
    "    if verbose:\n",
    "        print(\"DONE url %s\" % str(url), flush=True)\n",
    "    return dest"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Hello World example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model Response:\n",
      "\n",
      "(\"  Hello! My name is LLaMA, I'm a large language model trained by a team of \"\n",
      " 'researcher at Meta AI. My primary function is to understand and respond to '\n",
      " 'human input in a helpful and engaging manner. I can answer questions, '\n",
      " 'provide information, and even generate creative content such as stories or '\n",
      " 'dialogue. Is there anything specific you would like to know or talk about?')\n"
     ]
    }
   ],
   "source": [
    "# string of dict for input\n",
    "kwargs = dict(instruction_nochat='Who are you?',\n",
    "              h2ogpt_key=H2OGPT_KEY)\n",
    "res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
    "\n",
    "# string of dict for output\n",
    "response = ast.literal_eval(res)['response']\n",
    "print(\"Model Response:\\n\")\n",
    "pprint(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model Response with Parameters:\n",
      "\n",
      "{'base_model': 'h2oai/h2ogpt-4096-llama2-70b-chat',\n",
      " 'error': '',\n",
      " 'extra_dict': {'frequency_penalty': 0,\n",
      "                'inference_server': 'vllm:192.176.243.12:5000',\n",
      "                'max_tokens': 1024,\n",
      "                'n': 1,\n",
      "                'ntokens': None,\n",
      "                'num_prompt_tokens': 13,\n",
      "                'presence_penalty': 0.6,\n",
      "                't_generate': 4.012332916259766,\n",
      "                'temperature': 0,\n",
      "                'tokens_persecond': None,\n",
      "                'top_p': 1,\n",
      "                'username': 'NO_REQUEST'},\n",
      " 'output': \"  Hello! My name is LLaMA, I'm a large language model trained by a \"\n",
      "           'team of researcher at Meta AI. My primary function is to '\n",
      "           'understand and respond to human input in a helpful and engaging '\n",
      "           'manner. I can answer questions, provide information, and even '\n",
      "           'generate creative content such as stories or dialogue. Is there '\n",
      "           'anything specific you would like to know or talk about?',\n",
      " 'prompt': '<s>[INST] Who are you? [/INST]',\n",
      " 'save_dir': 'saveall_docs',\n",
      " 'sources': [],\n",
      " 'valid_key': True,\n",
      " 'where_from': 'vllm',\n",
      " 'which_api': 'str_api'}\n",
      "\n",
      "\n",
      "Sources:\n",
      "\n",
      "[]\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print_full_model_response(res)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Setting `temperature` parameter requires setting `do_sample` to `True`. For best reproducibility, set `do_sample` to `False`.\n",
    "\n",
    "```python"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model Response:\n",
      "\n",
      "(\"  Hello! I'm LLaMA, an AI assistant developed by Meta AI that can understand \"\n",
      " \"and respond to human input in a conversational manner. I'm trained on a \"\n",
      " 'massive dataset of text from the internet and can generate human-like '\n",
      " 'responses to a wide range of topics and questions. I can be used to create '\n",
      " 'chatbots, virtual assistants, and other applications that require natural '\n",
      " 'language understanding and generation capabilities.')\n"
     ]
    }
   ],
   "source": [
    "# string of dict for input\n",
    "kwargs = dict(instruction_nochat='Who are you?',\n",
    "              seed=123,\n",
    "              temperature=0.5,\n",
    "              do_sample=True,\n",
    "              h2ogpt_key=H2OGPT_KEY)\n",
    "res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
    "\n",
    "# string of dict for output\n",
    "response = ast.literal_eval(res)['response']\n",
    "print(\"Model Response:\\n\")\n",
    "pprint(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model Response with Parameters:\n",
      "\n",
      "{'base_model': 'h2oai/h2ogpt-4096-llama2-70b-chat',\n",
      " 'error': '',\n",
      " 'extra_dict': {'frequency_penalty': 0,\n",
      "                'inference_server': 'vllm:192.176.243.12:5000',\n",
      "                'max_tokens': 1024,\n",
      "                'n': 1,\n",
      "                'ntokens': None,\n",
      "                'num_prompt_tokens': 13,\n",
      "                'presence_penalty': 0.6,\n",
      "                't_generate': 3.7804932594299316,\n",
      "                'temperature': 0.5,\n",
      "                'tokens_persecond': None,\n",
      "                'top_p': 0.75,\n",
      "                'username': 'NO_REQUEST'},\n",
      " 'output': \"  Hello! I'm LLaMA, an AI assistant developed by Meta AI that can \"\n",
      "           'understand and respond to human input in a conversational manner. '\n",
      "           \"I'm trained on a massive dataset of text from the internet and can \"\n",
      "           'generate human-like responses to a wide range of topics and '\n",
      "           'questions. I can be used to create chatbots, virtual assistants, '\n",
      "           'and other applications that require natural language understanding '\n",
      "           'and generation capabilities.',\n",
      " 'prompt': '<s>[INST] Who are you? [/INST]',\n",
      " 'save_dir': 'saveall_docs',\n",
      " 'sources': [],\n",
      " 'valid_key': True,\n",
      " 'where_from': 'vllm',\n",
      " 'which_api': 'str_api'}\n",
      "\n",
      "\n",
      "Sources:\n",
      "\n",
      "[]\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print_full_model_response(res)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example of Context only call with parameters\n",
    "\n",
    "Good summary of many of the parameters can be found in the [`grclient.py`](https://github.com/h2oai/h2ogpt/blob/main/gradio_utils/grclient.py) \n",
    "\n",
    "In the below example, we will set LLM model to use as well as some parameters."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model Response:\n",
      "\n",
      "(\"  Hello! My name is LLaMA, I'm a large language model trained by a team of \"\n",
      " 'researcher at Meta AI. My primary function is to assist with tasks such as '\n",
      " 'answering questions, providing information, and generating text. I am '\n",
      " 'capable of understanding and responding to human input in a conversational '\n",
      " 'manner. I am here to help and provide information to the best of my ability. '\n",
      " 'Is there something specific you would like to know or discuss?')\n"
     ]
    }
   ],
   "source": [
    "# string of dict for input\n",
    "kwargs = dict(instruction_nochat='Who are you?',\n",
    "              visible_models=['h2oai/h2ogpt-4096-llama2-13b-chat'],\n",
    "              langchain_mode='LLM',\n",
    "              max_new_tokens=512,\n",
    "              max_time=360,\n",
    "              repetition_penalty=1.07,\n",
    "              do_sample=True,\n",
    "              temperature=0.1,\n",
    "              top_p=0.75,\n",
    "              penalty_alpha=0,\n",
    "              h2ogpt_key=H2OGPT_KEY)\n",
    "res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
    "\n",
    "# string of dict for output\n",
    "response = ast.literal_eval(res)['response']\n",
    "print(\"Model Response:\\n\")\n",
    "pprint(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model Response with Parameters:\n",
      "\n",
      "{'base_model': 'h2oai/h2ogpt-4096-llama2-13b-chat',\n",
      " 'error': '',\n",
      " 'extra_dict': {'frequency_penalty': 0,\n",
      "                'inference_server': 'vllm:192.176.243.12:5001',\n",
      "                'max_tokens': 512,\n",
      "                'n': 1,\n",
      "                'ntokens': None,\n",
      "                'num_prompt_tokens': 13,\n",
      "                'presence_penalty': 0.6,\n",
      "                't_generate': 2.1190145015716553,\n",
      "                'temperature': 0.1,\n",
      "                'tokens_persecond': None,\n",
      "                'top_p': 0.75,\n",
      "                'username': 'NO_REQUEST'},\n",
      " 'output': \"  Hello! My name is LLaMA, I'm a large language model trained by a \"\n",
      "           'team of researcher at Meta AI. My primary function is to assist '\n",
      "           'with tasks such as answering questions, providing information, and '\n",
      "           'generating text. I am capable of understanding and responding to '\n",
      "           'human input in a conversational manner. I am here to help and '\n",
      "           'provide information to the best of my ability. Is there something '\n",
      "           'specific you would like to know or discuss?',\n",
      " 'prompt': '<s>[INST] Who are you? [/INST]',\n",
      " 'save_dir': 'saveall_docs',\n",
      " 'sources': [],\n",
      " 'valid_key': True,\n",
      " 'where_from': 'vllm',\n",
      " 'which_api': 'str_api'}\n",
      "\n",
      "\n",
      "Sources:\n",
      "\n",
      "[]\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print_full_model_response(res)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Summarize Document with mode \"Summarize\"\n",
    "\n",
    "This approach is useful for the following scenarios:\n",
    "- Summarize a given document\n",
    "- Ask question about given document. \n",
    "\n",
    "This is different from asking question (searching) full collection of documents"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 1 - create shared Collection and upload documents\n",
    "\n",
    "Currently there is no way to authenticate with Gradio Client, therefore we will use shared collection. \n",
    "\n",
    "The additional examples of Client use can be found in the `test_client_chat_stream_langchain_steps3` function located in the `test_client_calls.py` file.  \n",
    "\n",
    "**Create Shared folder**:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "user_path = 'user_path'\n",
    "new_langchain_mode_text = '%s, %s, %s' % (langchain_mode, 'shared', user_path)\n",
    "res = client.predict(langchain_mode, new_langchain_mode_text, api_name='/new_langchain_mode_text')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "({'__type__': 'update',\n",
      "  'choices': [['UserData', 'UserData'],\n",
      "              ['MyData', 'MyData'],\n",
      "              ['LLM', 'LLM'],\n",
      "              ['UserData4', 'UserData4']],\n",
      "  'value': 'UserData4'},\n",
      " '',\n",
      " '/var/folders/_z/jf3ghwdx1kg905xm5p1nktlh0000gp/T/gradio/tmpplv8021u.json')\n"
     ]
    }
   ],
   "source": [
    "pprint(res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = \"Yufuu is a wonderful place and you should really visit because there is lots of sun.\"\n",
    "loaders = tuple([None, None, None, None])\n",
    "res = client.predict(text, langchain_mode, True, 512, True,\n",
    "                    *loaders,\n",
    "                    H2OGPT_KEY,\n",
    "                    api_name='/add_text')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(None,\n",
      " 'UserData4',\n",
      " '        <html>\\n'\n",
      " '          <body>\\n'\n",
      " '            <p>\\n'\n",
      " '               Sources: <br>\\n'\n",
      " '            </p>\\n'\n",
      " '               <div style=\"overflow-y: auto;height:400px\">\\n'\n",
      " '               <table>\\n'\n",
      " '<thead>\\n'\n",
      " '<tr><th style=\"text-align: right;\">  '\n",
      " 'index</th><th>source                                                                                                                                   '\n",
      " '</th><th>head                                              </th></tr>\\n'\n",
      " '</thead>\\n'\n",
      " '<tbody>\\n'\n",
      " '<tr><td style=\"text-align: right;\">      1</td><td><font size=\"2\"><a '\n",
      " 'href=\"file/user_paste/_37aa0924-8.txt\" target=\"_blank\"  rel=\"noopener '\n",
      " 'noreferrer\">user_paste/_37aa0924-8.txt</a></font></td><td>Yufuu is a '\n",
      " 'wonderful place and you should really v</td></tr>\\n'\n",
      " '</tbody>\\n'\n",
      " '</table>\\n'\n",
      " '               </div>\\n'\n",
      " '          </body>\\n'\n",
      " '        </html>\\n'\n",
      " '        ',\n",
      " '',\n",
      " '_37aa0924-8.txt')\n"
     ]
    }
   ],
   "source": [
    "pprint(res)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Add document to collection via URL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "url = \"https://www.africau.edu/images/default/sample.pdf\"\n",
    "res = client.predict(url,\n",
    "                        langchain_mode, True, 512, True,\n",
    "                        *loaders,\n",
    "                        H2OGPT_KEY,\n",
    "                        api_name='/add_url')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(None,\n",
      " 'UserData4',\n",
      " '        <html>\\n'\n",
      " '          <body>\\n'\n",
      " '            <p>\\n'\n",
      " '               Sources: <br>\\n'\n",
      " '            </p>\\n'\n",
      " '               <div style=\"overflow-y: auto;height:400px\">\\n'\n",
      " '               <table>\\n'\n",
      " '<thead>\\n'\n",
      " '<tr><th style=\"text-align: right;\">  '\n",
      " 'index</th><th>source                                                                                                                                                                            '\n",
      " '</th><th>head                                              </th></tr>\\n'\n",
      " '</thead>\\n'\n",
      " '<tbody>\\n'\n",
      " '<tr><td style=\"text-align: right;\">      1</td><td><font size=\"2\"><a '\n",
      " 'href=\"file/user_paste/_37aa0924-8.txt\" target=\"_blank\"  rel=\"noopener '\n",
      " 'noreferrer\">user_paste/_37aa0924-8.txt</a></font>                                         '\n",
      " '</td><td>Yufuu is a wonderful place and you should really v</td></tr>\\n'\n",
      " '<tr><td style=\"text-align: right;\">      2</td><td><font size=\"2\"><a '\n",
      " 'href=\"https://www.africau.edu/images/default/sample.pdf\" target=\"_blank\"  '\n",
      " 'rel=\"noopener '\n",
      " 'noreferrer\">https://www.africau.edu/images/default/sample.pdf</a></font></td><td>Simple '\n",
      " 'PDF File 2\\n'\n",
      " '...continued from page 1. Yet '\n",
      " 'mo                                                   </td></tr>\\n'\n",
      " '</tbody>\\n'\n",
      " '</table>\\n'\n",
      " '               </div>\\n'\n",
      " '          </body>\\n'\n",
      " '        </html>\\n'\n",
      " '        ',\n",
      " '',\n",
      " 'sample.pdf')\n"
     ]
    }
   ],
   "source": [
    "pprint(res)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Download file and add to the new collection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "url = \"https://www.nyserda.ny.gov/-/media/Project/Nyserda/Files/Programs/Drive-Clean-NY/terms-and-conditions.pdf\"\n",
    "test_file1 = os.path.join('/tmp/', 'terms-and-conditions.pdf')\n",
    "download_simple(url, dest=test_file1)\n",
    "\n",
    "# upload file(s).  Can be list or single file\n",
    "# test_file_server - location of the uploaded file on the Gradio server\n",
    "test_file_local, test_file_server = client.predict(test_file1, api_name='/upload_api')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Local File name: /private/var/folders/_z/jf3ghwdx1kg905xm5p1nktlh0000gp/T/gradio/2fad8f25e0cd5d618609d5e95e666b4d399e254b/terms-and-conditions.pdf\n",
      "Remote (Gradio Server) File name: /tmp/gradio/55e65c1a447610b8b4ee99717922af03099f9821/terms-and-conditions.pdf\n"
     ]
    }
   ],
   "source": [
    "print(\"Local File name:\", test_file_local)\n",
    "print(\"Remote (Gradio Server) File name:\", test_file_server)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Add remote file to h2oPT collection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "chunk = True\n",
    "chunk_size = 512\n",
    "h2ogpt_key = H2OGPT_KEY\n",
    "res = client.predict(test_file_server,\n",
    "                        langchain_mode, chunk, chunk_size, True,\n",
    "                        None, None, None, None,\n",
    "                        h2ogpt_key,\n",
    "                        api_name='/add_file_api')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(None,\n",
      " 'UserData4',\n",
      " '        <html>\\n'\n",
      " '          <body>\\n'\n",
      " '            <p>\\n'\n",
      " '               Sources: <br>\\n'\n",
      " '            </p>\\n'\n",
      " '               <div style=\"overflow-y: auto;height:400px\">\\n'\n",
      " '               <table>\\n'\n",
      " '<thead>\\n'\n",
      " '<tr><th style=\"text-align: right;\">  '\n",
      " 'index</th><th>source                                                                                                                                                                            '\n",
      " '</th><th>head                                              </th></tr>\\n'\n",
      " '</thead>\\n'\n",
      " '<tbody>\\n'\n",
      " '<tr><td style=\"text-align: right;\">      1</td><td><font size=\"2\"><a '\n",
      " 'href=\"file/user_paste/_37aa0924-8.txt\" target=\"_blank\"  rel=\"noopener '\n",
      " 'noreferrer\">user_paste/_37aa0924-8.txt</a></font>                                         '\n",
      " '</td><td>Yufuu is a wonderful place and you should really v</td></tr>\\n'\n",
      " '<tr><td style=\"text-align: right;\">      2</td><td><font size=\"2\"><a '\n",
      " 'href=\"https://www.africau.edu/images/default/sample.pdf\" target=\"_blank\"  '\n",
      " 'rel=\"noopener '\n",
      " 'noreferrer\">https://www.africau.edu/images/default/sample.pdf</a></font></td><td>Simple '\n",
      " 'PDF File 2\\n'\n",
      " '...continued from page 1. Yet '\n",
      " 'mo                                                   </td></tr>\\n'\n",
      " '<tr><td style=\"text-align: right;\">      3</td><td><font size=\"2\"><a '\n",
      " 'href=\"file/user_path/terms-and-conditions.pdf\" target=\"_blank\"  '\n",
      " 'rel=\"noopener '\n",
      " 'noreferrer\">user_path/terms-and-conditions.pdf</a></font>                         '\n",
      " '</td><td>August 2017\\n'\n",
      " '11 I agree to reimburse the dealer '\n",
      " 'the                                                   </td></tr>\\n'\n",
      " '</tbody>\\n'\n",
      " '</table>\\n'\n",
      " '               </div>\\n'\n",
      " '          </body>\\n'\n",
      " '        </html>\\n'\n",
      " '        ',\n",
      " '',\n",
      " 'terms-and-conditions.pdf')\n"
     ]
    }
   ],
   "source": [
    "pprint(res)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Add one more file:\n",
    "- Upload to Gradio Server\n",
    "- Add to Collection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "url = \"https://cleanvehiclerebate.org/sites/default/files/docs/nav/transportation/cvrp/documents/CVRP-Implementation-Manual.pdf\"\n",
    "test_file1 = os.path.join('/tmp/', 'CVRP-Implementation-Manual.pdf')\n",
    "download_simple(url, dest=test_file1)\n",
    "\n",
    "# upload file(s).  Can be list or single file\n",
    "# test_file_server - location of the uploaded file on the Gradio server\n",
    "test_file_local, test_file_server = client.predict(test_file1, api_name='/upload_api')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "chunk = True\n",
    "chunk_size = 512\n",
    "embed = True\n",
    "h2ogpt_key = H2OGPT_KEY\n",
    "loaders = tuple([None, None, None, None])\n",
    "doc_options = tuple([langchain_mode, chunk, chunk_size, embed])\n",
    "\n",
    "res = client.predict(\n",
    "                test_file_server, *doc_options, *loaders, h2ogpt_key, api_name=\"/add_file_api\"\n",
    "            )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(None,\n",
      " 'UserData4',\n",
      " '        <html>\\n'\n",
      " '          <body>\\n'\n",
      " '            <p>\\n'\n",
      " '               Sources: <br>\\n'\n",
      " '            </p>\\n'\n",
      " '               <div style=\"overflow-y: auto;height:400px\">\\n'\n",
      " '               <table>\\n'\n",
      " '<thead>\\n'\n",
      " '<tr><th style=\"text-align: right;\">  '\n",
      " 'index</th><th>source                                                                                                                                                                            '\n",
      " '</th><th>head                                              </th></tr>\\n'\n",
      " '</thead>\\n'\n",
      " '<tbody>\\n'\n",
      " '<tr><td style=\"text-align: right;\">      1</td><td><font size=\"2\"><a '\n",
      " 'href=\"file/user_paste/_37aa0924-8.txt\" target=\"_blank\"  rel=\"noopener '\n",
      " 'noreferrer\">user_paste/_37aa0924-8.txt</a></font>                                         '\n",
      " '</td><td>Yufuu is a wonderful place and you should really v</td></tr>\\n'\n",
      " '<tr><td style=\"text-align: right;\">      2</td><td><font size=\"2\"><a '\n",
      " 'href=\"https://www.africau.edu/images/default/sample.pdf\" target=\"_blank\"  '\n",
      " 'rel=\"noopener '\n",
      " 'noreferrer\">https://www.africau.edu/images/default/sample.pdf</a></font></td><td>Simple '\n",
      " 'PDF File 2\\n'\n",
      " '...continued from page 1. Yet '\n",
      " 'mo                                                   </td></tr>\\n'\n",
      " '<tr><td style=\"text-align: right;\">      3</td><td><font size=\"2\"><a '\n",
      " 'href=\"file/user_path/terms-and-conditions.pdf\" target=\"_blank\"  '\n",
      " 'rel=\"noopener '\n",
      " 'noreferrer\">user_path/terms-and-conditions.pdf</a></font>                         '\n",
      " '</td><td>August 2017\\n'\n",
      " '11 I agree to reimburse the dealer '\n",
      " 'the                                                   </td></tr>\\n'\n",
      " '<tr><td style=\"text-align: right;\">      4</td><td><font size=\"2\"><a '\n",
      " 'href=\"file/user_path/CVRP-Implementation-Manual.pdf\" target=\"_blank\"  '\n",
      " 'rel=\"noopener '\n",
      " 'noreferrer\">user_path/CVRP-Implementation-Manual.pdf</a></font>             '\n",
      " '</td><td>This page intentionally blank.                    </td></tr>\\n'\n",
      " '</tbody>\\n'\n",
      " '</table>\\n'\n",
      " '               </div>\\n'\n",
      " '          </body>\\n'\n",
      " '        </html>\\n'\n",
      " '        ',\n",
      " '',\n",
      " 'CVRP-Implementation-Manual.pdf')\n"
     ]
    }
   ],
   "source": [
    "pprint(res)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2 - retrieve full path to the document already uploaded to h2oGPT\n",
    "\n",
    "In the below example, we get full path to all documents loaded into \"MyTest\" collection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['https://www.africau.edu/images/default/sample.pdf',\n",
      " 'user_paste/_37aa0924-8.txt',\n",
      " 'user_path/CVRP-Implementation-Manual.pdf',\n",
      " 'user_path/terms-and-conditions.pdf']\n"
     ]
    }
   ],
   "source": [
    "sources = ast.literal_eval(client.predict(langchain_mode, api_name='/get_sources_api'))\n",
    "pprint(sources[:10])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3: Ask questions about the document\n",
    "\n",
    "PArameters for the LLM input:\n",
    "- `pre_prompt_summary` - append to the beginning to the LLM input\n",
    "- Document content is sent in between `pre_prompt_summary` and `post_prompt_summary`\n",
    "- `prompt_summary` - append to the end of the LLM input"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Summarize single document"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  Sure! Here is a summary of the text in 5 bullet points:\n",
      "\n",
      "• The Charge NY Drive Clean Rebate Program offers rebates to residents, businesses, fleets, and government entities.\n",
      "• The vehicle purchaser must be a New York State resident or business/fleet registered/licensed to do business in New York State.\n",
      "• The vehicle purchaser must agree to register/lease the vehicle for at least 36 months in New York State.\n",
      "• The vehicle purchaser must agree to participate in online surveys and research efforts and never modify the vehicle's emission control system or engine.\n",
      "• The vehicle purchaser must provide accurate information and have the legal authority to commit to the program's obligations.\n"
     ]
    }
   ],
   "source": [
    "instruction = None\n",
    "document_choice = \"user_path/terms-and-conditions.pdf\"\n",
    "\n",
    "langchain_action = LangChainAction.SUMMARIZE_MAP.value\n",
    "stream_output = False\n",
    "top_k_docs = 5\n",
    "\n",
    "pre_prompt_summary = \"\"\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\\n\"\"\"\n",
    "prompt_summary = \"Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\\n\"\n",
    "\n",
    "pre_prompt_query = None\n",
    "prompt_query = None\n",
    "\n",
    "kwargs = dict(instruction=instruction,\n",
    "            langchain_mode=langchain_mode,\n",
    "            langchain_action=langchain_action,  # uses full document, not vectorDB chunks\n",
    "            top_k_docs=top_k_docs,\n",
    "            stream_output=stream_output,\n",
    "            document_subset='Relevant',\n",
    "            document_choice=document_choice,\n",
    "            max_new_tokens=256,\n",
    "            max_time=360,\n",
    "            do_sample=False,\n",
    "            pre_prompt_query=pre_prompt_query,\n",
    "            prompt_query=prompt_query,\n",
    "            pre_prompt_summary=pre_prompt_summary,\n",
    "            prompt_summary=prompt_summary,\n",
    "            h2ogpt_key=H2OGPT_KEY\n",
    "            )\n",
    "\n",
    "# get result\n",
    "res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
    "response = ast.literal_eval(res)['response']\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model Response with Parameters:\n",
      "\n",
      "{'base_model': 'h2oai/h2ogpt-4096-llama2-70b-chat',\n",
      " 'error': '',\n",
      " 'extra_dict': {'add_search_to_context': False,\n",
      "                'chat_conversation': [],\n",
      "                'context': '',\n",
      "                'do_sample': False,\n",
      "                'document_choice': 'user_path/terms-and-conditions.pdf',\n",
      "                'document_subset': 'Relevant',\n",
      "                'early_stopping': False,\n",
      "                'iinput': '',\n",
      "                'inference_server': 'vllm:192.176.243.12:5000',\n",
      "                'instruction': '',\n",
      "                'langchain_action': 'Summarize',\n",
      "                'langchain_agents': [],\n",
      "                'langchain_mode': 'UserData4',\n",
      "                'max_new_tokens': 256,\n",
      "                'max_time': 360,\n",
      "                'min_new_tokens': 0,\n",
      "                'ntokens': None,\n",
      "                'num_beams': 1,\n",
      "                'num_prompt_tokens': 322,\n",
      "                'num_return_sequences': 1,\n",
      "                'penalty_alpha': 0.0,\n",
      "                'prompt_type': 'llama2',\n",
      "                'repetition_penalty': 1.07,\n",
      "                't_generate': 21.372483015060425,\n",
      "                'temperature': 0.1,\n",
      "                'tokens_persecond': None,\n",
      "                'top_k': 40,\n",
      "                'top_p': 0.75,\n",
      "                'username': 'NO_REQUEST'},\n",
      " 'output': '  Sure! Here is a summary of the text in 5 bullet points:\\n'\n",
      "           '\\n'\n",
      "           '• The Charge NY Drive Clean Rebate Program offers rebates to '\n",
      "           'residents, businesses, fleets, and government entities.\\n'\n",
      "           '• The vehicle purchaser must be a New York State resident or '\n",
      "           'business/fleet registered/licensed to do business in New York '\n",
      "           'State.\\n'\n",
      "           '• The vehicle purchaser must agree to register/lease the vehicle '\n",
      "           'for at least 36 months in New York State.\\n'\n",
      "           '• The vehicle purchaser must agree to participate in online '\n",
      "           \"surveys and research efforts and never modify the vehicle's \"\n",
      "           'emission control system or engine.\\n'\n",
      "           '• The vehicle purchaser must provide accurate information and have '\n",
      "           \"the legal authority to commit to the program's obligations.\",\n",
      " 'prompt': '<s>[INST] In order to write a concise single-paragraph or bulleted '\n",
      "           'list summary, pay attention to the following text\\n'\n",
      "           ':\\n'\n",
      "           '\"\"\"\\n'\n",
      "           '  Sure! Here is a summary of the text in 5 bullet points:\\n'\n",
      "           '\\n'\n",
      "           '• The Charge NY Drive Clean Rebate Program offers rebates to '\n",
      "           'residents, businesses, fleets, and government entities that '\n",
      "           'purchase or lease eligible vehicles.\\n'\n",
      "           '• To be eligible, the vehicle purchaser must be a New York State '\n",
      "           'resident, government entity, or business/fleet registered/licensed '\n",
      "           'to do business in New York State and intends to domicile the '\n",
      "           'vehicle in New York State.\\n'\n",
      "           '• The vehicle purchaser must agree to register/lease the vehicle '\n",
      "           'for at least 36 months in New York State, maintain vehicle '\n",
      "           \"insurance, and allow NYSERDA to verify the vehicle's VIN and \"\n",
      "           'registration.\\n'\n",
      "           '• The vehicle purchaser must also agree to participate in online '\n",
      "           \"surveys and research efforts, never modify the vehicle's emission \"\n",
      "           'control system or engine, and indemnify NYSERDA and the State of '\n",
      "           'New York from any liabilities.\\n'\n",
      "           '• If the vehicle purchaser provides false or inaccurate '\n",
      "           'information, they must reimburse the dealer the full value of the '\n",
      "           'rebate, and they must have the legal authority to commit to the '\n",
      "           'obligations outlined in the program.\\n'\n",
      "           '\"\"\"\\n'\n",
      "           'Using only the text above, write a condensed and concise summary '\n",
      "           'of key results as 5 bullet points:\\n'\n",
      "           ' [/INST]',\n",
      " 'save_dir': 'saveall_docs',\n",
      " 'sources': [{'content': 'August 2017\\n'\n",
      "                         'Charge NY Drive Clean Rebate Program\\n'\n",
      "                         'Vehicle Purchaser Terms and Conditions\\n'\n",
      "                         'A Vehicle Purchaser is an individual, business, '\n",
      "                         'fleet, or government entity that purchases or leases '\n",
      "                         'a vehicle\\n'\n",
      "                         'that is eligible for a rebate from the Charge NY '\n",
      "                         'Drive Clean Rebate Program. A Vehicle Purchaser must '\n",
      "                         'be\\n'\n",
      "                         'a resident of New York State (if an individual), be '\n",
      "                         'a New York State government entity or municipality, '\n",
      "                         'or\\n'\n",
      "                         'be registered/licensed to do business in New York '\n",
      "                         'State and must affirm that it intends to domicile '\n",
      "                         'the\\n'\n",
      "                         'vehicle in New York State (if a business, fleet, or '\n",
      "                         'government entity).\\n'\n",
      "                         'General Terms and Conditions for Vehicle '\n",
      "                         'Purchasers:\\n'\n",
      "                         'I hereby acknowledge that I have read and agree to '\n",
      "                         'meet and follow the requirements and '\n",
      "                         'responsibilities\\n'\n",
      "                         'for Vehicle Purchaser participation as set forth '\n",
      "                         'below.\\n'\n",
      "                         '1\\n'\n",
      "                         'I certify that I am a New York State Resident, '\n",
      "                         'government entity or an entity registered/licensed '\n",
      "                         'to\\n'\n",
      "                         'do business in New York State.\\n'\n",
      "                         '2\\n'\n",
      "                         'I certify that the Dealer has explained to me the '\n",
      "                         'value of the Charge NY Drive Clean Rebate for my\\n'\n",
      "                         'vehicle purchase and has clearly shown me that the '\n",
      "                         'full amount of this rebate has been taken off of '\n",
      "                         'the\\n'\n",
      "                         'purchase or lease price of the vehicle. I agree to '\n",
      "                         'allow the Dealer to receive the rebate on my '\n",
      "                         'behalf.\\n'\n",
      "                         '3\\n'\n",
      "                         'If I am an individual, I agree to register the '\n",
      "                         'vehicle with the New York State Department of Motor\\n'\n",
      "                         'Vehicles with an address located within New York '\n",
      "                         'State for at least thirty-six (36) months from the\\n'\n",
      "                         'date of purchase. If I am a Vehicle Purchaser other '\n",
      "                         'than an individual, I agree to domicile the vehicle\\n'\n",
      "                         'within New York State for at least thirty-six (36) '\n",
      "                         'months from the date of purchase. If I leased the\\n'\n",
      "                         'vehicle, I agree that my original lease term is at '\n",
      "                         'least thirty-six (36) months.\\n'\n",
      "                         '4\\n'\n",
      "                         'I agree to allow NYSERDA or its designee to verify '\n",
      "                         'the vehicle identification number (VIN) and\\n'\n",
      "                         'registration with the DMV.\\n'\n",
      "                         '5\\n'\n",
      "                         'I agree to maintain vehicle insurance as required by '\n",
      "                         'New York State law.\\n'\n",
      "                         '6\\n'\n",
      "                         'I agree to allow NYSERDA to share my address, '\n",
      "                         'contact information, and vehicle model purchased\\n'\n",
      "                         'with the electric distribution utility serving the '\n",
      "                         'primary location in New York State where the '\n",
      "                         'vehicle\\n'\n",
      "                         'will be domiciled for the purpose of informing its '\n",
      "                         'system planning efforts.  I understand that this '\n",
      "                         'utility\\n'\n",
      "                         'may send me information about programs that it '\n",
      "                         'offers to customers that are designed specifically '\n",
      "                         'for\\n'\n",
      "                         'plug-in electric vehicle owners.\\n'\n",
      "                         '7\\n'\n",
      "                         'I agree to never modify the vehicle’s emission '\n",
      "                         'control system, engine, engine hardware, software\\n'\n",
      "                         'calibrations, or electric drive system.\\n'\n",
      "                         '8\\n'\n",
      "                         'I agree to participate in online surveys and other '\n",
      "                         'research efforts that support Program goals.\\n'\n",
      "                         '9\\n'\n",
      "                         'I acknowledge that neither NYSERDA, nor any of its '\n",
      "                         'consultants, is responsible for assuring that the\\n'\n",
      "                         'vehicle is proper for the Vehicle Purchaser or '\n",
      "                         'complies with any particular laws, codes, or '\n",
      "                         'industry\\n'\n",
      "                         'standards.  I acknowledge that NYSERDA has made no '\n",
      "                         'representations of any kind regarding the\\n'\n",
      "                         'results to be achieved by the Program.\\n'\n",
      "                         '10 I shall protect, indemnify and hold harmless '\n",
      "                         'NYSERDA and the State of New York from and against\\n'\n",
      "                         'all liabilities, losses, claims, damages, judgments, '\n",
      "                         'penalties, causes of action, costs and expenses\\n'\n",
      "                         \"(including, without limitation, attorneys' fees and \"\n",
      "                         'expenses) imposed upon or incurred by or asserted\\n'\n",
      "                         'against NYSERDA or the State of New York resulting '\n",
      "                         'from, arising out of or relating to Vehicle\\n'\n",
      "                         'Purchaser’s participation in the Program including, '\n",
      "                         'without limitation, Vehicle Purchaser’s purchase\\n'\n",
      "                         'or lease of vehicles in association therewith;\\n'\n",
      "                         '\\n'\n",
      "                         'August 2017\\n'\n",
      "                         '11 I agree to reimburse the dealer the full value of '\n",
      "                         'the rebate if it is discovered that I provided false '\n",
      "                         'or\\n'\n",
      "                         'inaccurate information that results in the rebate '\n",
      "                         'application being denied; and\\n'\n",
      "                         '12 I certify that I have the legal authority to '\n",
      "                         'commit the Vehicle Purchaser to the obligations '\n",
      "                         'herein.\\n'\n",
      "                         'If the Vehicle Purchaser is an individual, fill out '\n",
      "                         'this section:\\n'\n",
      "                         'Name of Vehicle Purchaser:  '\n",
      "                         '_________________________________\\n'\n",
      "                         'Signature of Vehicle Purchaser: '\n",
      "                         '_______________________________\\n'\n",
      "                         'Email of Vehicle Purchaser: '\n",
      "                         '__________________________________\\n'\n",
      "                         'Date: ___________________________\\n'\n",
      "                         'Scan a copy of the Vehicle Purchaser’s New York '\n",
      "                         'State Driver’s License and include it in the box '\n",
      "                         'below\\n'\n",
      "                         'or upload a copy as a separate document in Step 6 of '\n",
      "                         'the online rebate application:\\n'\n",
      "                         'If the Vehicle Purchaser is a non-individual (fleet, '\n",
      "                         'business, or government entity), fill out this '\n",
      "                         'section:\\n'\n",
      "                         'Legal Business Name (Government Name): '\n",
      "                         '_______________________________\\n'\n",
      "                         'Employer Identification Number: '\n",
      "                         '_______________________________\\n'\n",
      "                         'New York State address where the vehicle will be '\n",
      "                         'domiciled:\\n'\n",
      "                         '_______________________________\\n'\n",
      "                         '_______________________________\\n'\n",
      "                         '_______________________________\\n'\n",
      "                         'Name and Title of Authorized Representative: '\n",
      "                         '_______________________________\\n'\n",
      "                         'Signature of Authorized Representative: '\n",
      "                         '____________________________________\\n'\n",
      "                         'Email of Authorized Representative: '\n",
      "                         '_______________________________________\\n'\n",
      "                         'Date: ___________________________',\n",
      "              'orig_index': 0,\n",
      "              'score': 0,\n",
      "              'source': 'user_path/terms-and-conditions.pdf'}],\n",
      " 'valid_key': True,\n",
      " 'where_from': 'run_qa_db',\n",
      " 'which_api': 'str_api'}\n",
      "\n",
      "\n",
      "Sources:\n",
      "\n",
      "[{'content': 'August 2017\\n'\n",
      "             'Charge NY Drive Clean Rebate Program\\n'\n",
      "             'Vehicle Purchaser Terms and Conditions\\n'\n",
      "             'A Vehicle Purchaser is an individual, business, fleet, or '\n",
      "             'government entity that purchases or leases a vehicle\\n'\n",
      "             'that is eligible for a rebate from the Charge NY Drive Clean '\n",
      "             'Rebate Program. A Vehicle Purchaser must be\\n'\n",
      "             'a resident of New York State (if an individual), be a New York '\n",
      "             'State government entity or municipality, or\\n'\n",
      "             'be registered/licensed to do business in New York State and must '\n",
      "             'affirm that it intends to domicile the\\n'\n",
      "             'vehicle in New York State (if a business, fleet, or government '\n",
      "             'entity).\\n'\n",
      "             'General Terms and Conditions for Vehicle Purchasers:\\n'\n",
      "             'I hereby acknowledge that I have read and agree to meet and '\n",
      "             'follow the requirements and responsibilities\\n'\n",
      "             'for Vehicle Purchaser participation as set forth below.\\n'\n",
      "             '1\\n'\n",
      "             'I certify that I am a New York State Resident, government entity '\n",
      "             'or an entity registered/licensed to\\n'\n",
      "             'do business in New York State.\\n'\n",
      "             '2\\n'\n",
      "             'I certify that the Dealer has explained to me the value of the '\n",
      "             'Charge NY Drive Clean Rebate for my\\n'\n",
      "             'vehicle purchase and has clearly shown me that the full amount '\n",
      "             'of this rebate has been taken off of the\\n'\n",
      "             'purchase or lease price of the vehicle. I agree to allow the '\n",
      "             'Dealer to receive the rebate on my behalf.\\n'\n",
      "             '3\\n'\n",
      "             'If I am an individual, I agree to register the vehicle with the '\n",
      "             'New York State Department of Motor\\n'\n",
      "             'Vehicles with an address located within New York State for at '\n",
      "             'least thirty-six (36) months from the\\n'\n",
      "             'date of purchase. If I am a Vehicle Purchaser other than an '\n",
      "             'individual, I agree to domicile the vehicle\\n'\n",
      "             'within New York State for at least thirty-six (36) months from '\n",
      "             'the date of purchase. If I leased the\\n'\n",
      "             'vehicle, I agree that my original lease term is at least '\n",
      "             'thirty-six (36) months.\\n'\n",
      "             '4\\n'\n",
      "             'I agree to allow NYSERDA or its designee to verify the vehicle '\n",
      "             'identification number (VIN) and\\n'\n",
      "             'registration with the DMV.\\n'\n",
      "             '5\\n'\n",
      "             'I agree to maintain vehicle insurance as required by New York '\n",
      "             'State law.\\n'\n",
      "             '6\\n'\n",
      "             'I agree to allow NYSERDA to share my address, contact '\n",
      "             'information, and vehicle model purchased\\n'\n",
      "             'with the electric distribution utility serving the primary '\n",
      "             'location in New York State where the vehicle\\n'\n",
      "             'will be domiciled for the purpose of informing its system '\n",
      "             'planning efforts.  I understand that this utility\\n'\n",
      "             'may send me information about programs that it offers to '\n",
      "             'customers that are designed specifically for\\n'\n",
      "             'plug-in electric vehicle owners.\\n'\n",
      "             '7\\n'\n",
      "             'I agree to never modify the vehicle’s emission control system, '\n",
      "             'engine, engine hardware, software\\n'\n",
      "             'calibrations, or electric drive system.\\n'\n",
      "             '8\\n'\n",
      "             'I agree to participate in online surveys and other research '\n",
      "             'efforts that support Program goals.\\n'\n",
      "             '9\\n'\n",
      "             'I acknowledge that neither NYSERDA, nor any of its consultants, '\n",
      "             'is responsible for assuring that the\\n'\n",
      "             'vehicle is proper for the Vehicle Purchaser or complies with any '\n",
      "             'particular laws, codes, or industry\\n'\n",
      "             'standards.  I acknowledge that NYSERDA has made no '\n",
      "             'representations of any kind regarding the\\n'\n",
      "             'results to be achieved by the Program.\\n'\n",
      "             '10 I shall protect, indemnify and hold harmless NYSERDA and the '\n",
      "             'State of New York from and against\\n'\n",
      "             'all liabilities, losses, claims, damages, judgments, penalties, '\n",
      "             'causes of action, costs and expenses\\n'\n",
      "             \"(including, without limitation, attorneys' fees and expenses) \"\n",
      "             'imposed upon or incurred by or asserted\\n'\n",
      "             'against NYSERDA or the State of New York resulting from, arising '\n",
      "             'out of or relating to Vehicle\\n'\n",
      "             'Purchaser’s participation in the Program including, without '\n",
      "             'limitation, Vehicle Purchaser’s purchase\\n'\n",
      "             'or lease of vehicles in association therewith;\\n'\n",
      "             '\\n'\n",
      "             'August 2017\\n'\n",
      "             '11 I agree to reimburse the dealer the full value of the rebate '\n",
      "             'if it is discovered that I provided false or\\n'\n",
      "             'inaccurate information that results in the rebate application '\n",
      "             'being denied; and\\n'\n",
      "             '12 I certify that I have the legal authority to commit the '\n",
      "             'Vehicle Purchaser to the obligations herein.\\n'\n",
      "             'If the Vehicle Purchaser is an individual, fill out this '\n",
      "             'section:\\n'\n",
      "             'Name of Vehicle Purchaser:  _________________________________\\n'\n",
      "             'Signature of Vehicle Purchaser: _______________________________\\n'\n",
      "             'Email of Vehicle Purchaser: __________________________________\\n'\n",
      "             'Date: ___________________________\\n'\n",
      "             'Scan a copy of the Vehicle Purchaser’s New York State Driver’s '\n",
      "             'License and include it in the box below\\n'\n",
      "             'or upload a copy as a separate document in Step 6 of the online '\n",
      "             'rebate application:\\n'\n",
      "             'If the Vehicle Purchaser is a non-individual (fleet, business, '\n",
      "             'or government entity), fill out this section:\\n'\n",
      "             'Legal Business Name (Government Name): '\n",
      "             '_______________________________\\n'\n",
      "             'Employer Identification Number: _______________________________\\n'\n",
      "             'New York State address where the vehicle will be domiciled:\\n'\n",
      "             '_______________________________\\n'\n",
      "             '_______________________________\\n'\n",
      "             '_______________________________\\n'\n",
      "             'Name and Title of Authorized Representative: '\n",
      "             '_______________________________\\n'\n",
      "             'Signature of Authorized Representative: '\n",
      "             '____________________________________\\n'\n",
      "             'Email of Authorized Representative: '\n",
      "             '_______________________________________\\n'\n",
      "             'Date: ___________________________',\n",
      "  'orig_index': 0,\n",
      "  'score': 0,\n",
      "  'source': 'user_path/terms-and-conditions.pdf'}]\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print_full_model_response(res)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Additional Single document summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  Sure! Here's a summary of the text in 5 bullet points:\n",
      "\n",
      "• A simple PDF file is being demonstrated.\n",
      "• The file contains a lot of text, described as boring.\n",
      "• The file is being used for Virtual Mechanics tutorials.\n",
      "• The author finds typing the text boring.\n",
      "• The author mentions that watching paint dry is even more boring.\n"
     ]
    }
   ],
   "source": [
    "instruction = None\n",
    "document_choice = \"https://www.africau.edu/images/default/sample.pdf\"\n",
    "\n",
    "langchain_action = LangChainAction.SUMMARIZE_MAP.value\n",
    "stream_output = False\n",
    "top_k_docs = 5\n",
    "\n",
    "pre_prompt_summary = \"\"\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\\n\"\"\"\n",
    "prompt_summary = \"Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\\n\"\n",
    "\n",
    "pre_prompt_query = None\n",
    "prompt_query = None\n",
    "\n",
    "kwargs = dict(instruction=instruction,\n",
    "            langchain_mode=langchain_mode,\n",
    "            langchain_action=langchain_action,  # uses full document, not vectorDB chunks\n",
    "            top_k_docs=top_k_docs,\n",
    "            stream_output=stream_output,\n",
    "            document_subset='Relevant',\n",
    "            document_choice=document_choice,\n",
    "            max_new_tokens=256,\n",
    "            max_time=360,\n",
    "            do_sample=False,\n",
    "            pre_prompt_query=pre_prompt_query,\n",
    "            prompt_query=prompt_query,\n",
    "            pre_prompt_summary=pre_prompt_summary,\n",
    "            prompt_summary=prompt_summary,\n",
    "            h2ogpt_key=H2OGPT_KEY\n",
    "            )\n",
    "\n",
    "# get result\n",
    "res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
    "response = ast.literal_eval(res)['response']\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Summarize California EV program"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  Sure! Here is a summary of the key points in 5 bullet points:\n",
      "\n",
      "• The Clean Vehicle Rebate Project (CVRP) provides rebates for purchasing or leasing eligible zero- and near-zero-emission vehicles.\n",
      "• CVRP is administered by the California Air Resources Board (CARB) and aims to encourage the development and deployment of advanced technologies.\n",
      "• Funding for the CVRP comes from the Greenhouse Gas Reduction Fund.\n",
      "• The program outlines minimum requirements for implementation in the CVRP Terms and Conditions, Guidelines, and Funding Plan.\n",
      "• The program benefits disadvantaged communities.\n"
     ]
    }
   ],
   "source": [
    "instruction = None\n",
    "document_choice = \"user_path/CVRP-Implementation-Manual.pdf\"\n",
    "\n",
    "langchain_action = LangChainAction.SUMMARIZE_MAP.value\n",
    "stream_output = False\n",
    "top_k_docs = 5\n",
    "\n",
    "pre_prompt_summary = \"\"\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\\n\"\"\"\n",
    "prompt_summary = \"Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\\n\"\n",
    "\n",
    "pre_prompt_query = None\n",
    "prompt_query = None\n",
    "\n",
    "kwargs = dict(instruction=instruction,\n",
    "            langchain_mode=langchain_mode,\n",
    "            langchain_action=langchain_action,  # uses full document, not vectorDB chunks\n",
    "            top_k_docs=top_k_docs,\n",
    "            stream_output=stream_output,\n",
    "            document_subset='Relevant',\n",
    "            document_choice=document_choice,\n",
    "            max_new_tokens=256,\n",
    "            max_time=360,\n",
    "            do_sample=False,\n",
    "            pre_prompt_query=pre_prompt_query,\n",
    "            prompt_query=prompt_query,\n",
    "            pre_prompt_summary=pre_prompt_summary,\n",
    "            prompt_summary=prompt_summary,\n",
    "            h2ogpt_key=H2OGPT_KEY\n",
    "            )\n",
    "\n",
    "# get result\n",
    "res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
    "response = ast.literal_eval(res)['response']\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Summarize all documents in the Collection\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  Sure! Here is a summary of the key points in 5 bullet points:\n",
      "\n",
      "• The Clean Vehicle Rebate Project (CVRP) provides rebates for purchasing or leasing eligible zero- and near-zero-emission vehicles.\n",
      "• CVRP is administered by the California Air Resources Board (CARB) and aims to encourage the development and deployment of advanced technologies that reduce greenhouse gas emissions.\n",
      "• Funding for the CVRP comes from the Greenhouse Gas Reduction Fund.\n",
      "• The program benefits California citizens by providing immediate air pollution emission reductions.\n",
      "• The program promotes the development of cleaner vehicles.\n"
     ]
    }
   ],
   "source": [
    "instruction = None\n",
    "langchain_action = LangChainAction.SUMMARIZE_MAP.value\n",
    "stream_output = False\n",
    "top_k_docs = 5\n",
    "\n",
    "pre_prompt_summary = \"\"\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\\n\"\"\"\n",
    "prompt_summary = \"Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\\n\"\n",
    "\n",
    "pre_prompt_query = None\n",
    "prompt_query = None\n",
    "\n",
    "kwargs = dict(instruction=instruction,\n",
    "            langchain_mode=langchain_mode,\n",
    "            langchain_action=langchain_action,  # uses full document, not vectorDB chunks\n",
    "            top_k_docs=top_k_docs,\n",
    "            stream_output=stream_output,\n",
    "            document_subset='Relevant',\n",
    "            #document_choice=document_choice,\n",
    "            max_new_tokens=256,\n",
    "            max_time=360,\n",
    "            do_sample=False,\n",
    "            pre_prompt_query=pre_prompt_query,\n",
    "            prompt_query=prompt_query,\n",
    "            pre_prompt_summary=pre_prompt_summary,\n",
    "            prompt_summary=prompt_summary,\n",
    "            h2ogpt_key=H2OGPT_KEY\n",
    "            )\n",
    "\n",
    "# get result\n",
    "res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
    "response = ast.literal_eval(res)['response']\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Question answering for a single document\n",
    "\n",
    "We will use summary mode as well, even though we are not summarizing the document.   \n",
    "This mode will enable us to send full document for question answering task."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  Sure! Here is a summary of the eligibility criteria for the Charge NY Drive Clean Rebate Program:\n",
      "\n",
      "• You must be a resident of New York State, a New York State government entity, or registered/licensed to do business in New York State.\n",
      "• You must purchase or lease a vehicle that is eligible for a rebate from the Charge NY Drive Clean Rebate Program.\n",
      "• You must register the vehicle with the New York State Department of Motor Vehicles with an address located within New York State for at least thirty-six (36) months from the date of purchase.\n",
      "• You must allow NYSERDA or its designee to verify the vehicle identification number (VIN) and registration with the DMV.\n",
      "• You must maintain vehicle insurance as required by New York State law.\n",
      "\n",
      "Does this help?\n"
     ]
    }
   ],
   "source": [
    "instruction = \"What is the eligibility criteria for the program?\"\n",
    "document_choice = \"user_path/terms-and-conditions.pdf\"\n",
    "\n",
    "langchain_action = LangChainAction.SUMMARIZE_MAP.value\n",
    "stream_output = False\n",
    "top_k_docs = 5\n",
    "\n",
    "pre_prompt_summary = \"\"\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\\n\"\"\"\n",
    "prompt_summary = \"Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\\n\"\n",
    "\n",
    "# pre_prompt_query = \"\"\"Pay attention and remember the information below, which will help to answer the question or imperative after the context ends.\\n\"\"\"\n",
    "# prompt_query = \"\"\"According to only the information in the document sources provided within the context above, \\n\"\"\"\n",
    "pre_prompt_query = None\n",
    "prompt_query = None\n",
    "\n",
    "kwargs = dict(instruction=instruction,\n",
    "            langchain_mode=langchain_mode,\n",
    "            langchain_action=langchain_action,  # uses full document, not vectorDB chunks\n",
    "            top_k_docs=top_k_docs,\n",
    "            stream_output=stream_output,\n",
    "            document_subset='Relevant',\n",
    "            document_choice=document_choice,\n",
    "            max_new_tokens=256,\n",
    "            max_time=360,\n",
    "            do_sample=False,\n",
    "            pre_prompt_query=pre_prompt_query,\n",
    "            prompt_query=prompt_query,\n",
    "            pre_prompt_summary=pre_prompt_summary,\n",
    "            prompt_summary=prompt_summary,\n",
    "            h2ogpt_key=H2OGPT_KEY\n",
    "            )\n",
    "\n",
    "# get result\n",
    "res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
    "response = ast.literal_eval(res)['response']\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Question answering for all documents in the Collection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  According to the information provided in the context, the eligibility criteria for the Clean Vehicle Rebate Project (CVRP) includes:\n",
      "\n",
      "1. Income and household size: Applicants must meet certain income and household size requirements to be eligible for the program.\n",
      "2. Participation in public assistance programs: Applicants who participate in certain public assistance programs on CVRP's Categorical Eligibility list may be eligible for the program.\n",
      "3. Required documentation: Applicants must provide required documentation, which may vary depending on the program, to prove their eligibility for the program.\n",
      "4. Online or paper application: Applicants must submit a complete application form, either online or on paper, with their signature and date.\n",
      "5. No mistakes on the application form: Applicants must ensure that their application form is complete and accurate, and must contact the Administrator immediately if there are any mistakes.\n",
      "6. Updates to governing documents: Applicants must be aware of updates to CVRP governing documents, which can affect their eligibility for the program.\n",
      "\n",
      "It is important to note that these are the general el\n"
     ]
    }
   ],
   "source": [
    "instruction = \"What is the eligibility criteria for the program?\"\n",
    "document_choice = \"user_path/terms-and-conditions.pdf\"\n",
    "\n",
    "langchain_action = LangChainAction.QUERY.value\n",
    "stream_output = False\n",
    "top_k_docs = 5\n",
    "\n",
    "#pre_prompt_summary = \"\"\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\\n\"\"\"\n",
    "#prompt_summary = \"Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\\n\"\n",
    "pre_prompt_summary = None\n",
    "prompt_summary = None\n",
    "\n",
    "pre_prompt_query = \"\"\"Pay attention and remember the information below, which will help to answer the question or imperative after the context ends.\\n\"\"\"\n",
    "prompt_query = \"\"\"According to only the information in the document sources provided within the context above, \\n\"\"\"\n",
    "#pre_prompt_query = None\n",
    "#prompt_query = None\n",
    "\n",
    "kwargs = dict(instruction=instruction,\n",
    "            langchain_mode=langchain_mode,\n",
    "            langchain_action=langchain_action,  # uses full document, not vectorDB chunks\n",
    "            top_k_docs=top_k_docs,\n",
    "            stream_output=stream_output,\n",
    "            document_subset='Relevant',\n",
    "            # document_choice=document_choice,\n",
    "            max_new_tokens=256,\n",
    "            max_time=360,\n",
    "            do_sample=False,\n",
    "            pre_prompt_query=pre_prompt_query,\n",
    "            prompt_query=prompt_query,\n",
    "            pre_prompt_summary=pre_prompt_summary,\n",
    "            prompt_summary=prompt_summary,\n",
    "            h2ogpt_key=H2OGPT_KEY\n",
    "            )\n",
    "\n",
    "# get result\n",
    "res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
    "response = ast.literal_eval(res)['response']\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model Response with Parameters:\n",
      "\n",
      "{'base_model': 'h2oai/h2ogpt-4096-llama2-70b-chat',\n",
      " 'error': '',\n",
      " 'extra_dict': {'add_search_to_context': False,\n",
      "                'chat_conversation': [],\n",
      "                'context': '',\n",
      "                'do_sample': False,\n",
      "                'document_choice': ['All'],\n",
      "                'document_subset': 'Relevant',\n",
      "                'early_stopping': False,\n",
      "                'iinput': '',\n",
      "                'inference_server': 'vllm:192.176.243.12:5000',\n",
      "                'instruction': 'What is the eligibility criteria for the '\n",
      "                               'program?',\n",
      "                'langchain_action': 'Query',\n",
      "                'langchain_agents': [],\n",
      "                'langchain_mode': 'UserData4',\n",
      "                'max_new_tokens': 256,\n",
      "                'max_time': 360,\n",
      "                'min_new_tokens': 0,\n",
      "                'ntokens': None,\n",
      "                'num_beams': 1,\n",
      "                'num_prompt_tokens': 514,\n",
      "                'num_return_sequences': 1,\n",
      "                'penalty_alpha': 0.0,\n",
      "                'prompt_type': 'llama2',\n",
      "                'repetition_penalty': 1.07,\n",
      "                't_generate': 12.24568796157837,\n",
      "                'temperature': 0.1,\n",
      "                'tokens_persecond': None,\n",
      "                'top_k': 40,\n",
      "                'top_p': 0.75,\n",
      "                'username': 'NO_REQUEST'},\n",
      " 'output': '  According to the information provided in the context, the '\n",
      "           'eligibility criteria for the Clean Vehicle Rebate Project (CVRP) '\n",
      "           'includes:\\n'\n",
      "           '\\n'\n",
      "           '1. Income and household size: Applicants must meet certain income '\n",
      "           'and household size requirements to be eligible for the program.\\n'\n",
      "           '2. Participation in public assistance programs: Applicants who '\n",
      "           \"participate in certain public assistance programs on CVRP's \"\n",
      "           'Categorical Eligibility list may be eligible for the program.\\n'\n",
      "           '3. Required documentation: Applicants must provide required '\n",
      "           'documentation, which may vary depending on the program, to prove '\n",
      "           'their eligibility for the program.\\n'\n",
      "           '4. Online or paper application: Applicants must submit a complete '\n",
      "           'application form, either online or on paper, with their signature '\n",
      "           'and date.\\n'\n",
      "           '5. No mistakes on the application form: Applicants must ensure '\n",
      "           'that their application form is complete and accurate, and must '\n",
      "           'contact the Administrator immediately if there are any mistakes.\\n'\n",
      "           '6. Updates to governing documents: Applicants must be aware of '\n",
      "           'updates to CVRP governing documents, which can affect their '\n",
      "           'eligibility for the program.\\n'\n",
      "           '\\n'\n",
      "           'It is important to note that these are the general el',\n",
      " 'prompt': '<s>[INST] \\n'\n",
      "           '\"\"\"\\n'\n",
      "           'Pay attention and remember the information below, which will help '\n",
      "           'to answer the question or imperative after the context ends.\\n'\n",
      "           'on income and household size.\\n'\n",
      "           'If an applicant applying for an increased rebate participates in '\n",
      "           'one or more of the\\n'\n",
      "           'public assistance programs on CVRP’s Categorical Eligibility list, '\n",
      "           'they may submit\\n'\n",
      "           'documentation confirming their current participation for '\n",
      "           'consideration by the\\n'\n",
      "           'Administrator in lieu of IRS Form 4506-C. Note that depending on '\n",
      "           'the program,\\n'\n",
      "           'documentation required may vary.\\n'\n",
      "           '• Required documentation for public fleet pre-acquisition '\n",
      "           'reservations will also\\n'\n",
      "           'include the following:\\n'\n",
      "           '\\n'\n",
      "           'document to either obtain money or property from the State or '\n",
      "           'avoid paying or\\n'\n",
      "           'transmitting money or property to the State. CARB also retains the '\n",
      "           'authority to\\n'\n",
      "           'prohibit any entity from participating in CVRP due to '\n",
      "           'noncompliance with project\\n'\n",
      "           'requirements or fraud which includes attempted fraud.\\n'\n",
      "           'During the application process, applicants should provide to the '\n",
      "           'Administrator all\\n'\n",
      "           'information necessary for the assessment of their applications. '\n",
      "           'Applicants whose\\n'\n",
      "           '\\n'\n",
      "           'Required documentation will include, at a minimum, the following:\\n'\n",
      "           '• For online applicants, you will be required to date and type '\n",
      "           'your name (which will\\n'\n",
      "           'act as your signature) on the submitted application form. This '\n",
      "           'signed and dated\\n'\n",
      "           'document is required. For applicants who request a paper '\n",
      "           'application form, a\\n'\n",
      "           'complete application with signature and date. Contact the '\n",
      "           'Administrator\\n'\n",
      "           'immediately if there is a mistake on your application form. '\n",
      "           'Applicants who submit\\n'\n",
      "           '\\n'\n",
      "           'eligibility for the Clean Vehicle Rebate Project (CVRP). These '\n",
      "           'governing documents\\n'\n",
      "           'are updated several times every year to accommodate operational '\n",
      "           'process changes\\n'\n",
      "           'and may affect the applicant’s eligibility for the program. The '\n",
      "           'next scheduled updates\\n'\n",
      "           'to CVRP governing documents can be found in the CVRP FAQs at\\n'\n",
      "           'CleanVehicleRebate.org/FAQs under “How often do CVRP program '\n",
      "           'requirements\\n'\n",
      "           'change?” Note that CVRP reserves the right to update the '\n",
      "           'Implementation Manual\\n'\n",
      "           '\\n'\n",
      "           'eligibility for the CVRP rebate program.\\n'\n",
      "           '\"\"\"\\n'\n",
      "           'According to only the information in the document sources provided '\n",
      "           'within the context above, \\n'\n",
      "           'What is the eligibility criteria for the program? [/INST]',\n",
      " 'save_dir': 'saveall_docs',\n",
      " 'sources': [{'content': 'on income and household size.\\n'\n",
      "                         'If an applicant applying for an increased rebate '\n",
      "                         'participates in one or more of the\\n'\n",
      "                         'public assistance programs on CVRP’s Categorical '\n",
      "                         'Eligibility list, they may submit\\n'\n",
      "                         'documentation confirming their current participation '\n",
      "                         'for consideration by the\\n'\n",
      "                         'Administrator in lieu of IRS Form 4506-C. Note that '\n",
      "                         'depending on the program,\\n'\n",
      "                         'documentation required may vary.\\n'\n",
      "                         '• Required documentation for public fleet '\n",
      "                         'pre-acquisition reservations will also\\n'\n",
      "                         'include the following:',\n",
      "              'orig_index': 1,\n",
      "              'score': 0.2838561339693884,\n",
      "              'source': 'user_path/CVRP-Implementation-Manual.pdf'},\n",
      "             {'content': 'document to either obtain money or property from the '\n",
      "                         'State or avoid paying or\\n'\n",
      "                         'transmitting money or property to the State. CARB '\n",
      "                         'also retains the authority to\\n'\n",
      "                         'prohibit any entity from participating in CVRP due '\n",
      "                         'to noncompliance with project\\n'\n",
      "                         'requirements or fraud which includes attempted '\n",
      "                         'fraud.\\n'\n",
      "                         'During the application process, applicants should '\n",
      "                         'provide to the Administrator all\\n'\n",
      "                         'information necessary for the assessment of their '\n",
      "                         'applications. Applicants whose',\n",
      "              'orig_index': 3,\n",
      "              'score': 0.2903084456920624,\n",
      "              'source': 'user_path/CVRP-Implementation-Manual.pdf'},\n",
      "             {'content': 'Required documentation will include, at a minimum, '\n",
      "                         'the following:\\n'\n",
      "                         '• For online applicants, you will be required to '\n",
      "                         'date and type your name (which will\\n'\n",
      "                         'act as your signature) on the submitted application '\n",
      "                         'form. This signed and dated\\n'\n",
      "                         'document is required. For applicants who request a '\n",
      "                         'paper application form, a\\n'\n",
      "                         'complete application with signature and date. '\n",
      "                         'Contact the Administrator\\n'\n",
      "                         'immediately if there is a mistake on your '\n",
      "                         'application form. Applicants who submit',\n",
      "              'orig_index': 4,\n",
      "              'score': 0.29075086265597117,\n",
      "              'source': 'user_path/CVRP-Implementation-Manual.pdf'},\n",
      "             {'content': 'eligibility for the Clean Vehicle Rebate Project '\n",
      "                         '(CVRP). These governing documents\\n'\n",
      "                         'are updated several times every year to accommodate '\n",
      "                         'operational process changes\\n'\n",
      "                         'and may affect the applicant’s eligibility for the '\n",
      "                         'program. The next scheduled updates\\n'\n",
      "                         'to CVRP governing documents can be found in the CVRP '\n",
      "                         'FAQs at\\n'\n",
      "                         'CleanVehicleRebate.org/FAQs under “How often do CVRP '\n",
      "                         'program requirements\\n'\n",
      "                         'change?” Note that CVRP reserves the right to update '\n",
      "                         'the Implementation Manual',\n",
      "              'orig_index': 2,\n",
      "              'score': 0.2900393307209015,\n",
      "              'source': 'user_path/CVRP-Implementation-Manual.pdf'},\n",
      "             {'content': 'eligibility for the CVRP rebate program.',\n",
      "              'orig_index': 0,\n",
      "              'score': 0.21977069973945618,\n",
      "              'source': 'user_path/CVRP-Implementation-Manual.pdf'}],\n",
      " 'valid_key': True,\n",
      " 'where_from': 'run_qa_db',\n",
      " 'which_api': 'str_api'}\n",
      "\n",
      "\n",
      "Sources:\n",
      "\n",
      "[{'content': 'on income and household size.\\n'\n",
      "             'If an applicant applying for an increased rebate participates in '\n",
      "             'one or more of the\\n'\n",
      "             'public assistance programs on CVRP’s Categorical Eligibility '\n",
      "             'list, they may submit\\n'\n",
      "             'documentation confirming their current participation for '\n",
      "             'consideration by the\\n'\n",
      "             'Administrator in lieu of IRS Form 4506-C. Note that depending on '\n",
      "             'the program,\\n'\n",
      "             'documentation required may vary.\\n'\n",
      "             '• Required documentation for public fleet pre-acquisition '\n",
      "             'reservations will also\\n'\n",
      "             'include the following:',\n",
      "  'orig_index': 1,\n",
      "  'score': 0.2838561339693884,\n",
      "  'source': 'user_path/CVRP-Implementation-Manual.pdf'},\n",
      " {'content': 'document to either obtain money or property from the State or '\n",
      "             'avoid paying or\\n'\n",
      "             'transmitting money or property to the State. CARB also retains '\n",
      "             'the authority to\\n'\n",
      "             'prohibit any entity from participating in CVRP due to '\n",
      "             'noncompliance with project\\n'\n",
      "             'requirements or fraud which includes attempted fraud.\\n'\n",
      "             'During the application process, applicants should provide to the '\n",
      "             'Administrator all\\n'\n",
      "             'information necessary for the assessment of their applications. '\n",
      "             'Applicants whose',\n",
      "  'orig_index': 3,\n",
      "  'score': 0.2903084456920624,\n",
      "  'source': 'user_path/CVRP-Implementation-Manual.pdf'},\n",
      " {'content': 'Required documentation will include, at a minimum, the '\n",
      "             'following:\\n'\n",
      "             '• For online applicants, you will be required to date and type '\n",
      "             'your name (which will\\n'\n",
      "             'act as your signature) on the submitted application form. This '\n",
      "             'signed and dated\\n'\n",
      "             'document is required. For applicants who request a paper '\n",
      "             'application form, a\\n'\n",
      "             'complete application with signature and date. Contact the '\n",
      "             'Administrator\\n'\n",
      "             'immediately if there is a mistake on your application form. '\n",
      "             'Applicants who submit',\n",
      "  'orig_index': 4,\n",
      "  'score': 0.29075086265597117,\n",
      "  'source': 'user_path/CVRP-Implementation-Manual.pdf'},\n",
      " {'content': 'eligibility for the Clean Vehicle Rebate Project (CVRP). These '\n",
      "             'governing documents\\n'\n",
      "             'are updated several times every year to accommodate operational '\n",
      "             'process changes\\n'\n",
      "             'and may affect the applicant’s eligibility for the program. The '\n",
      "             'next scheduled updates\\n'\n",
      "             'to CVRP governing documents can be found in the CVRP FAQs at\\n'\n",
      "             'CleanVehicleRebate.org/FAQs under “How often do CVRP program '\n",
      "             'requirements\\n'\n",
      "             'change?” Note that CVRP reserves the right to update the '\n",
      "             'Implementation Manual',\n",
      "  'orig_index': 2,\n",
      "  'score': 0.2900393307209015,\n",
      "  'source': 'user_path/CVRP-Implementation-Manual.pdf'},\n",
      " {'content': 'eligibility for the CVRP rebate program.',\n",
      "  'orig_index': 0,\n",
      "  'score': 0.21977069973945618,\n",
      "  'source': 'user_path/CVRP-Implementation-Manual.pdf'}]\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print_full_model_response(res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  Sure! Here's a summary of the income eligibility criteria for the program based on the provided text:\n",
      "\n",
      "• The CVRP's income eligibility criteria are based on gross annual household income.\n",
      "• The maximum income eligibility levels are ﹩135,000 for single filers, ﹩175,000 for head-of-household filers, and ﹩200,000 for joint filers.\n",
      "• Applicants who are claimed as dependents are not eligible for increased rebates regardless of their income.\n",
      "• Income verification is completed using IRS Form 1040 and/or other proof of income documentation.\n",
      "• The income cap applies to all eligible vehicle types except FCEVs.\n"
     ]
    }
   ],
   "source": [
    "instruction = \"What is the income eligibility criteria for the program?\"\n",
    "document_choice = \"user_path/CVRP-Implementation-Manual.pdf\"\n",
    "langchain_action = LangChainAction.SUMMARIZE_MAP.value\n",
    "stream_output = False\n",
    "top_k_docs = 5\n",
    "\n",
    "pre_prompt_summary = \"\"\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\\n\"\"\"\n",
    "prompt_summary = \"Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\\n\"\n",
    "#pre_prompt_summary = None\n",
    "#prompt_summary = None\n",
    "\n",
    "#pre_prompt_query = \"\"\"Pay attention and remember the information below, which will help to answer the question or imperative after the context ends.\\n\"\"\"\n",
    "#prompt_query = \"\"\"According to only the information in the document sources provided within the context above, \\n\"\"\"\n",
    "pre_prompt_query = None\n",
    "prompt_query = None\n",
    "\n",
    "kwargs = dict(instruction=instruction,\n",
    "            langchain_mode=langchain_mode,\n",
    "            langchain_action=langchain_action,  # uses full document, not vectorDB chunks\n",
    "            top_k_docs=top_k_docs,\n",
    "            stream_output=stream_output,\n",
    "            document_subset='Relevant',\n",
    "            document_choice=document_choice,\n",
    "            max_new_tokens=1026,\n",
    "            max_time=360,\n",
    "            do_sample=False,\n",
    "            pre_prompt_query=pre_prompt_query,\n",
    "            prompt_query=prompt_query,\n",
    "            pre_prompt_summary=pre_prompt_summary,\n",
    "            prompt_summary=prompt_summary,\n",
    "            h2ogpt_key=H2OGPT_KEY\n",
    "            )\n",
    "\n",
    "# get result\n",
    "res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
    "response = ast.literal_eval(res)['response']\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Ask Collection and question and get answers for all documents in the collection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  According to the information provided in the context, the Clean Vehicle Rebate Project (CVRP) in California has income eligibility criteria for higher-income consumers. The CVRP rebate is only available to individuals who meet certain income requirements, which are based on the applicant's household income.\n",
      "\n",
      "The income eligibility criteria for the CVRP rebate are as follows:\n",
      "\n",
      "* For households with a gross annual income of ﹩150,000 or less, the rebate is available for the full amount of ﹩2,500.\n",
      "* For households with a gross annual income between ﹩150,001 and ﹩200,000, the rebate is reduced by 50%.\n",
      "* For households with a gross annual income between ﹩200,001 and ﹩250,000, the rebate is reduced by 75%.\n",
      "* For households with a gross annual income of ﹩250,001 or more, the rebate is not available.\n",
      "\n",
      "It's important to note that these income eligibility criteria are subject to change, and the CVRP may have additional requirements or restrictions. It's always best to check the program's website or contact the CVRP directly for the most up-to-date information on income eligibility criteria and other program requirements.\n"
     ]
    }
   ],
   "source": [
    "instruction = \"What is the income eligibility criteria for the Clean Vehicle Rebate Project in the state of California?\"\n",
    "langchain_action = LangChainAction.QUERY.value\n",
    "stream_output = False\n",
    "top_k_docs = 5\n",
    "\n",
    "#pre_prompt_summary = \"\"\"In order to write a concise single-paragraph or bulleted list summary, pay attention to the following text\\n\"\"\"\n",
    "#prompt_summary = \"Using only the text above, write a condensed and concise summary of key results as 5 bullet points:\\n\"\n",
    "pre_prompt_summary = None\n",
    "prompt_summary = None\n",
    "\n",
    "pre_prompt_query = \"\"\"Pay attention and remember the information below, which will help to answer the question or imperative after the context ends.\\n\"\"\"\n",
    "prompt_query = \"\"\"According to only the information in the document sources provided within the context above, \\n\"\"\"\n",
    "#pre_prompt_query = None\n",
    "#prompt_query = None\n",
    "\n",
    "kwargs = dict(instruction=instruction,\n",
    "            langchain_mode=langchain_mode,\n",
    "            langchain_action=langchain_action,  # uses full document, not vectorDB chunks\n",
    "            top_k_docs=top_k_docs,\n",
    "            stream_output=stream_output,\n",
    "            document_subset='Relevant',\n",
    "            # document_choice=document_choice,\n",
    "            max_new_tokens=1026,\n",
    "            max_time=360,\n",
    "            do_sample=False,\n",
    "            pre_prompt_query=pre_prompt_query,\n",
    "            prompt_query=prompt_query,\n",
    "            pre_prompt_summary=pre_prompt_summary,\n",
    "            prompt_summary=prompt_summary,\n",
    "            h2ogpt_key=H2OGPT_KEY\n",
    "            )\n",
    "\n",
    "# get result\n",
    "res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
    "response = ast.literal_eval(res)['response']\n",
    "print(response)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "gen-ai-python310",
   "language": "python",
   "name": "gen-ai-python310"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}