{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# h2oGPT API call example\n",
"\n",
"Documentation: https://github.com/h2oai/h2ogpt/blob/main/docs/README_CLIENT.md\n",
"\n",
"Good summary of many of the parameters can be found in the [`grclient.py`](https://github.com/h2oai/h2ogpt/blob/main/gradio_utils/grclient.py) \n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"One can interact with Gradio Client by using either native client or h2oGPT wrapper: \n",
"\n",
"- Using Gradio \\'s native client:\n",
"\n",
" ```python\n",
" from gradio_client import Client\n",
" import ast\n",
" \n",
" HOST_URL = \"http://localhost:7860\"\n",
" client = Client(HOST_URL)\n",
" \n",
" # string of dict for input\n",
" kwargs = dict(instruction_nochat='Who are you?')\n",
" res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
" \n",
" # string of dict for output\n",
" response = ast.literal_eval(res)['response']\n",
" print(response)\n",
" ```\n",
"\n",
"- Using [h2oGPT wrapper for Gradio Native Client](https://github.com/h2oai/h2ogpt/blob/main/docs/README_CLIENT.md#h2ogpt-gradio-wrapper)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loaded h2oGPT details\n"
]
}
],
"source": [
"from gradio_client import Client\n",
"import ast\n",
"from pprint import pprint\n",
"import json\n",
"from tqdm import tqdm\n",
"from enum import Enum\n",
"\n",
"class LangChainAction(Enum):\n",
" \"\"\"LangChain action\"\"\"\n",
" QUERY = \"Query\"\n",
" SUMMARIZE_MAP = \"Summarize\"\n",
" \n",
"\n",
"with open('../tokens/h2oGPT_details.txt') as f:\n",
" gpt_details = json.load(f)\n",
" print(\"Loaded h2oGPT details\")\n",
"\n",
"# HOST_URL = \"http://localhost:7860\"\n",
"HOST_URL = gpt_details[\"gpt_host_url\"]\n",
"H2OGPT_KEY = gpt_details[\"h2ogpt_key\"]\n",
"LANGCHAIN_MODE = langchain_mode = 'UserData4'\n",
"\n",
"client = Client(HOST_URL)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Utility functions"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import shutil\n",
"import uuid\n",
"import requests\n",
"from requests.exceptions import HTTPError\n",
"import contextlib\n",
"\n",
"\n",
"def print_full_model_response(response):\n",
" '''\n",
" Helper function to print full response from the h2oGPT call, including all parameters.\n",
" Important keys/parameters:\n",
" - `base_model` - model that used to answer the API call\n",
" - `extra_dict` - model parameters that were used to answer the API call\n",
" - `prompt` - actual prompt sent to LLM\n",
" - `where_from` - how hosted model is running: vLLM , tensor, ....\n",
" '''\n",
" print(\"Model Response with Parameters:\\n\")\n",
" save_dict = ast.literal_eval(res)['save_dict']\n",
" # Remove key from extra_dict\n",
" save_dict.pop('h2ogpt_key', None)\n",
" pprint(save_dict)\n",
" print(\"\\n\")\n",
" try:\n",
" sources = ast.literal_eval(response)['sources']\n",
" print(\"Sources:\\n\")\n",
" pprint(sources)\n",
" print(\"\\n\")\n",
" except:\n",
" print(\"No sources\\n\")\n",
"\n",
"\n",
"def makedirs(path, exist_ok=True, tmp_ok=False, use_base=False):\n",
" \"\"\"\n",
" Avoid some inefficiency in os.makedirs()\n",
" :param path:\n",
" :param exist_ok:\n",
" :param tmp_ok: use /tmp if can't write locally\n",
" :param use_base:\n",
" :return:\n",
" \"\"\"\n",
" if path is None:\n",
" return path\n",
" # if base path set, make relative to that, unless user_path absolute path\n",
" if use_base:\n",
" if os.path.normpath(path) == os.path.normpath(os.path.abspath(path)):\n",
" pass\n",
" else:\n",
" if os.getenv('H2OGPT_BASE_PATH') is not None:\n",
" base_dir = os.path.normpath(os.getenv('H2OGPT_BASE_PATH'))\n",
" path = os.path.normpath(path)\n",
" if not path.startswith(base_dir):\n",
" path = os.path.join(os.getenv('H2OGPT_BASE_PATH', ''), path)\n",
" path = os.path.normpath(path)\n",
"\n",
" if os.path.isdir(path) and os.path.exists(path):\n",
" assert exist_ok, \"Path already exists\"\n",
" return path\n",
" try:\n",
" os.makedirs(path, exist_ok=exist_ok)\n",
" return path\n",
" except FileExistsError:\n",
" # e.g. soft link\n",
" return path\n",
" except PermissionError:\n",
" if tmp_ok:\n",
" path0 = path\n",
" path = os.path.join('/tmp/', path)\n",
" print(\"Permission denied to %s, using %s instead\" % (path0, path), flush=True)\n",
" os.makedirs(path, exist_ok=exist_ok)\n",
" return path\n",
" else:\n",
" raise\n",
"\n",
" \n",
"def shutil_rmtree(*args, **kwargs):\n",
" return shutil.rmtree(*args, **kwargs)\n",
"\n",
"\n",
"def remove(path: str):\n",
" try:\n",
" if path is not None and os.path.exists(path):\n",
" if os.path.isdir(path):\n",
" shutil_rmtree(path, ignore_errors=True)\n",
" else:\n",
" with contextlib.suppress(FileNotFoundError):\n",
" os.remove(path)\n",
" except:\n",
" pass\n",
"\n",
"\n",
"def atomic_move_simple(src, dst):\n",
" try:\n",
" shutil.move(src, dst)\n",
" except (shutil.Error, FileExistsError):\n",
" pass\n",
" remove(src)\n",
"\n",
"\n",
"def download_simple(url, dest=None, overwrite=False, verbose=False):\n",
" if dest is None:\n",
" dest = os.path.basename(url)\n",
" base_path = os.path.dirname(dest)\n",
" if base_path: # else local path\n",
" base_path = makedirs(base_path, exist_ok=True, tmp_ok=True, use_base=True)\n",
" dest = os.path.join(base_path, os.path.basename(dest))\n",
"\n",
" if os.path.isfile(dest):\n",
" if not overwrite:\n",
" print(\"Already have %s from url %s, delete file if invalid\" % (dest, str(url)), flush=True)\n",
" return dest\n",
" else:\n",
" remove(dest)\n",
"\n",
" if verbose:\n",
" print(\"BEGIN get url %s\" % str(url), flush=True)\n",
" if url.startswith(\"file://\"):\n",
" from requests_file import FileAdapter\n",
" s = requests.Session()\n",
" s.mount('file://', FileAdapter())\n",
" url_data = s.get(url, stream=True)\n",
" else:\n",
" url_data = requests.get(url, stream=True)\n",
" if verbose:\n",
" print(\"GOT url %s\" % str(url), flush=True)\n",
"\n",
" if url_data.status_code != requests.codes.ok:\n",
" msg = \"Cannot get url %s, code: %s, reason: %s\" % (\n",
" str(url),\n",
" str(url_data.status_code),\n",
" str(url_data.reason),\n",
" )\n",
" raise requests.exceptions.RequestException(msg)\n",
" url_data.raw.decode_content = True\n",
"\n",
" uuid_tmp = str(uuid.uuid4())[:6]\n",
" dest_tmp = dest + \"_dl_\" + uuid_tmp + \".tmp\"\n",
" with open(dest_tmp, \"wb\") as f:\n",
" shutil.copyfileobj(url_data.raw, f)\n",
" atomic_move_simple(dest_tmp, dest)\n",
" if verbose:\n",
" print(\"DONE url %s\" % str(url), flush=True)\n",
" return dest"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Hello World example"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model Response:\n",
"\n",
"(\" Hello! My name is LLaMA, I'm a large language model trained by a team of \"\n",
" 'researcher at Meta AI. My primary function is to understand and respond to '\n",
" 'human input in a helpful and engaging manner. I can answer questions, '\n",
" 'provide information, and even generate creative content such as stories or '\n",
" 'dialogue. Is there anything specific you would like to know or talk about?')\n"
]
}
],
"source": [
"# string of dict for input\n",
"kwargs = dict(instruction_nochat='Who are you?',\n",
" h2ogpt_key=H2OGPT_KEY)\n",
"res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
"\n",
"# string of dict for output\n",
"response = ast.literal_eval(res)['response']\n",
"print(\"Model Response:\\n\")\n",
"pprint(response)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model Response with Parameters:\n",
"\n",
"{'base_model': 'h2oai/h2ogpt-4096-llama2-70b-chat',\n",
" 'error': '',\n",
" 'extra_dict': {'frequency_penalty': 0,\n",
" 'inference_server': 'vllm:192.176.243.12:5000',\n",
" 'max_tokens': 1024,\n",
" 'n': 1,\n",
" 'ntokens': None,\n",
" 'num_prompt_tokens': 13,\n",
" 'presence_penalty': 0.6,\n",
" 't_generate': 4.012332916259766,\n",
" 'temperature': 0,\n",
" 'tokens_persecond': None,\n",
" 'top_p': 1,\n",
" 'username': 'NO_REQUEST'},\n",
" 'output': \" Hello! My name is LLaMA, I'm a large language model trained by a \"\n",
" 'team of researcher at Meta AI. My primary function is to '\n",
" 'understand and respond to human input in a helpful and engaging '\n",
" 'manner. I can answer questions, provide information, and even '\n",
" 'generate creative content such as stories or dialogue. Is there '\n",
" 'anything specific you would like to know or talk about?',\n",
" 'prompt': '[INST] Who are you? [/INST]',\n",
" 'save_dir': 'saveall_docs',\n",
" 'sources': [],\n",
" 'valid_key': True,\n",
" 'where_from': 'vllm',\n",
" 'which_api': 'str_api'}\n",
"\n",
"\n",
"Sources:\n",
"\n",
"[]\n",
"\n",
"\n"
]
}
],
"source": [
"print_full_model_response(res)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Setting `temperature` parameter requires setting `do_sample` to `True`. For best reproducibility, set `do_sample` to `False`.\n",
"\n",
"```python"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model Response:\n",
"\n",
"(\" Hello! I'm LLaMA, an AI assistant developed by Meta AI that can understand \"\n",
" \"and respond to human input in a conversational manner. I'm trained on a \"\n",
" 'massive dataset of text from the internet and can generate human-like '\n",
" 'responses to a wide range of topics and questions. I can be used to create '\n",
" 'chatbots, virtual assistants, and other applications that require natural '\n",
" 'language understanding and generation capabilities.')\n"
]
}
],
"source": [
"# string of dict for input\n",
"kwargs = dict(instruction_nochat='Who are you?',\n",
" seed=123,\n",
" temperature=0.5,\n",
" do_sample=True,\n",
" h2ogpt_key=H2OGPT_KEY)\n",
"res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
"\n",
"# string of dict for output\n",
"response = ast.literal_eval(res)['response']\n",
"print(\"Model Response:\\n\")\n",
"pprint(response)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model Response with Parameters:\n",
"\n",
"{'base_model': 'h2oai/h2ogpt-4096-llama2-70b-chat',\n",
" 'error': '',\n",
" 'extra_dict': {'frequency_penalty': 0,\n",
" 'inference_server': 'vllm:192.176.243.12:5000',\n",
" 'max_tokens': 1024,\n",
" 'n': 1,\n",
" 'ntokens': None,\n",
" 'num_prompt_tokens': 13,\n",
" 'presence_penalty': 0.6,\n",
" 't_generate': 3.7804932594299316,\n",
" 'temperature': 0.5,\n",
" 'tokens_persecond': None,\n",
" 'top_p': 0.75,\n",
" 'username': 'NO_REQUEST'},\n",
" 'output': \" Hello! I'm LLaMA, an AI assistant developed by Meta AI that can \"\n",
" 'understand and respond to human input in a conversational manner. '\n",
" \"I'm trained on a massive dataset of text from the internet and can \"\n",
" 'generate human-like responses to a wide range of topics and '\n",
" 'questions. I can be used to create chatbots, virtual assistants, '\n",
" 'and other applications that require natural language understanding '\n",
" 'and generation capabilities.',\n",
" 'prompt': '[INST] Who are you? [/INST]',\n",
" 'save_dir': 'saveall_docs',\n",
" 'sources': [],\n",
" 'valid_key': True,\n",
" 'where_from': 'vllm',\n",
" 'which_api': 'str_api'}\n",
"\n",
"\n",
"Sources:\n",
"\n",
"[]\n",
"\n",
"\n"
]
}
],
"source": [
"print_full_model_response(res)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example of Context only call with parameters\n",
"\n",
"Good summary of many of the parameters can be found in the [`grclient.py`](https://github.com/h2oai/h2ogpt/blob/main/gradio_utils/grclient.py) \n",
"\n",
"In the below example, we will set LLM model to use as well as some parameters."
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model Response:\n",
"\n",
"(\" Hello! My name is LLaMA, I'm a large language model trained by a team of \"\n",
" 'researcher at Meta AI. My primary function is to assist with tasks such as '\n",
" 'answering questions, providing information, and generating text. I am '\n",
" 'capable of understanding and responding to human input in a conversational '\n",
" 'manner. I am here to help and provide information to the best of my ability. '\n",
" 'Is there something specific you would like to know or discuss?')\n"
]
}
],
"source": [
"# string of dict for input\n",
"kwargs = dict(instruction_nochat='Who are you?',\n",
" visible_models=['h2oai/h2ogpt-4096-llama2-13b-chat'],\n",
" langchain_mode='LLM',\n",
" max_new_tokens=512,\n",
" max_time=360,\n",
" repetition_penalty=1.07,\n",
" do_sample=True,\n",
" temperature=0.1,\n",
" top_p=0.75,\n",
" penalty_alpha=0,\n",
" h2ogpt_key=H2OGPT_KEY)\n",
"res = client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')\n",
"\n",
"# string of dict for output\n",
"response = ast.literal_eval(res)['response']\n",
"print(\"Model Response:\\n\")\n",
"pprint(response)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model Response with Parameters:\n",
"\n",
"{'base_model': 'h2oai/h2ogpt-4096-llama2-13b-chat',\n",
" 'error': '',\n",
" 'extra_dict': {'frequency_penalty': 0,\n",
" 'inference_server': 'vllm:192.176.243.12:5001',\n",
" 'max_tokens': 512,\n",
" 'n': 1,\n",
" 'ntokens': None,\n",
" 'num_prompt_tokens': 13,\n",
" 'presence_penalty': 0.6,\n",
" 't_generate': 2.1190145015716553,\n",
" 'temperature': 0.1,\n",
" 'tokens_persecond': None,\n",
" 'top_p': 0.75,\n",
" 'username': 'NO_REQUEST'},\n",
" 'output': \" Hello! My name is LLaMA, I'm a large language model trained by a \"\n",
" 'team of researcher at Meta AI. My primary function is to assist '\n",
" 'with tasks such as answering questions, providing information, and '\n",
" 'generating text. I am capable of understanding and responding to '\n",
" 'human input in a conversational manner. I am here to help and '\n",
" 'provide information to the best of my ability. Is there something '\n",
" 'specific you would like to know or discuss?',\n",
" 'prompt': '[INST] Who are you? [/INST]',\n",
" 'save_dir': 'saveall_docs',\n",
" 'sources': [],\n",
" 'valid_key': True,\n",
" 'where_from': 'vllm',\n",
" 'which_api': 'str_api'}\n",
"\n",
"\n",
"Sources:\n",
"\n",
"[]\n",
"\n",
"\n"
]
}
],
"source": [
"print_full_model_response(res)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Summarize Document with mode \"Summarize\"\n",
"\n",
"This approach is useful for the following scenarios:\n",
"- Summarize a given document\n",
"- Ask question about given document. \n",
"\n",
"This is different from asking question (searching) full collection of documents"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step 1 - create shared Collection and upload documents\n",
"\n",
"Currently there is no way to authenticate with Gradio Client, therefore we will use shared collection. \n",
"\n",
"The additional examples of Client use can be found in the `test_client_chat_stream_langchain_steps3` function located in the `test_client_calls.py` file. \n",
"\n",
"**Create Shared folder**:"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"user_path = 'user_path'\n",
"new_langchain_mode_text = '%s, %s, %s' % (langchain_mode, 'shared', user_path)\n",
"res = client.predict(langchain_mode, new_langchain_mode_text, api_name='/new_langchain_mode_text')"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"({'__type__': 'update',\n",
" 'choices': [['UserData', 'UserData'],\n",
" ['MyData', 'MyData'],\n",
" ['LLM', 'LLM'],\n",
" ['UserData4', 'UserData4']],\n",
" 'value': 'UserData4'},\n",
" '',\n",
" '/var/folders/_z/jf3ghwdx1kg905xm5p1nktlh0000gp/T/gradio/tmpplv8021u.json')\n"
]
}
],
"source": [
"pprint(res)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"text = \"Yufuu is a wonderful place and you should really visit because there is lots of sun.\"\n",
"loaders = tuple([None, None, None, None])\n",
"res = client.predict(text, langchain_mode, True, 512, True,\n",
" *loaders,\n",
" H2OGPT_KEY,\n",
" api_name='/add_text')"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(None,\n",
" 'UserData4',\n",
" ' \\n'\n",
" ' \\n'\n",
" '
\\n'\n",
" ' Sources:
\\n'\n",
" '
'\n", " 'index | source '\n", " ' | head |
---|---|---|
1 | user_paste/_37aa0924-8.txt | Yufuu is a '\n", " 'wonderful place and you should really v |
\\n'\n",
" ' Sources:
\\n'\n",
" '
'\n", " 'index | source '\n", " ' | head |
---|---|---|
1 | user_paste/_37aa0924-8.txt '\n", " ' | Yufuu is a wonderful place and you should really v |
2 | https://www.africau.edu/images/default/sample.pdf | Simple '\n", " 'PDF File 2\\n'\n", " '...continued from page 1. Yet '\n", " 'mo |
\\n'\n",
" ' Sources:
\\n'\n",
" '
'\n", " 'index | source '\n", " ' | head |
---|---|---|
1 | user_paste/_37aa0924-8.txt '\n", " ' | Yufuu is a wonderful place and you should really v |
2 | https://www.africau.edu/images/default/sample.pdf | Simple '\n", " 'PDF File 2\\n'\n", " '...continued from page 1. Yet '\n", " 'mo |
3 | user_path/terms-and-conditions.pdf '\n", " ' | August 2017\\n'\n", " '11 I agree to reimburse the dealer '\n", " 'the |
\\n'\n",
" ' Sources:
\\n'\n",
" '
'\n", " 'index | source '\n", " ' | head |
---|---|---|
1 | user_paste/_37aa0924-8.txt '\n", " ' | Yufuu is a wonderful place and you should really v |
2 | https://www.africau.edu/images/default/sample.pdf | Simple '\n", " 'PDF File 2\\n'\n", " '...continued from page 1. Yet '\n", " 'mo |
3 | user_path/terms-and-conditions.pdf '\n", " ' | August 2017\\n'\n", " '11 I agree to reimburse the dealer '\n", " 'the |
4 | user_path/CVRP-Implementation-Manual.pdf '\n", " ' | This page intentionally blank. |