Spaces:

iblfe
/

test

Runtime error

App Files Files Community

iblfe commited on Jan 19, 2024

Commit

b585c7f

verified ·

1 Parent(s): 23490d1

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +17 -0
.env +16 -0
.gitattributes +4 -0
.github/workflows/python-package-publish.yml +57 -0
.github/workflows/snyk-scan.yml +76 -0
.gitignore +44 -0
.ipynb_checkpoints/generate-checkpoint.py +16 -0
.ipynb_checkpoints/requirements-checkpoint.txt +77 -0
Dockerfile +35 -0
LICENSE +201 -0
Makefile +116 -0
README.md +313 -8
auth.json.lock +0 -0
benchmarks/llm_gpu_benchmark.py +123 -0
benchmarks/llm_gpu_benchmark_text-generation-inference.html +7 -0
benchmarks/llm_gpu_benchmark_transformers.html +7 -0
benchmarks/llm_gpu_benchmarks.json +2790 -0
benchmarks/perf.json +136 -0
benchmarks/perf.md +200 -0
blog/README.md +81 -0
ci/jenkinsfile +158 -0
client/.gitignore +168 -0
client/Makefile +58 -0
client/README.md +107 -0
client/h2ogpt_client/__init__.py +4 -0
client/h2ogpt_client/_completion.py +507 -0
client/h2ogpt_client/_core.py +50 -0
client/h2ogpt_client/_gradio_client.py +54 -0
client/h2ogpt_client/_models.py +35 -0
client/h2ogpt_client/_server.py +18 -0
client/poetry.lock +856 -0
client/poetry.toml +1 -0
client/pyproject.toml +41 -0
client/tests/__init__.py +0 -0
client/tests/conftest.py +57 -0
client/tests/test_client.py +156 -0
cloud/packer/Jenkinsfile +80 -0
cloud/packer/README.md +22 -0
cloud/packer/h2oai-h2ogpt-4096-llama2-13b-chat.sh +11 -0
cloud/packer/h2ogpt-azure.json +123 -0
cloud/packer/h2ogpt-gcp.json +107 -0
cloud/packer/install_h2ogpt.sh +19 -0
cloud/packer/setup_environment.sh +46 -0
cloud/packer/startup-scripts/h2ogpt.service +12 -0
cloud/packer/startup-scripts/h2ogpt_nginx.service +12 -0
cloud/packer/startup-scripts/run_h2ogpt.sh +26 -0
cloud/packer/startup-scripts/run_nginx.sh +23 -0
cloud/packer/startup-scripts/run_vllm.sh +10 -0
cloud/packer/startup-scripts/temp.conf +14 -0
cloud/packer/startup-scripts/vllm.service +12 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,17 @@

+.git
+.npm
+.dockerignore
+.pytest_cache
+.cache
+.local
+.github
+.nv
+.benchmarks
+.bash_history
+.gitignore
+h2ogpt.egg-info
+venv
+build
+dist
+prebuilt_deps
+Dockerfile

.env ADDED Viewed

	@@ -0,0 +1,16 @@

+# H2OGPT
+H2OGPT_PORT=7860
+H2OGPT_BASE_MODEL=h2oai/h2ogpt-4096-llama2-7b-chat
+H2OGPT_ARGS="/workspace/generate.py --base_model=${H2OGPT_BASE_MODEL} --use_safetensors=True --prompt_type=llama2 --save_dir=/workspace/save/ --use_gpu_id=False --score_model=None --max_max_new_tokens=2048 --max_new_tokens=1024"
+# VLLM
+VLLM_TOKENIZER=hf-internal-testing/llama-tokenizer
+H2OGPT_VLLM_ARGS="--model=${H2OGPT_BASE_MODEL} --tokenizer=${VLLM_TOKENIZER} --tensor-parallel-size=2 --seed=1234 --trust-remote-code --download-dir=/workspace/.cache/huggingface/hub"
+# CPU models
+MODEL_PATH_LLAMA=https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q6_K.gguf
+H2OGPT_CPU_ARGS="/workspace/generate.py --base_model=llama --model_path_llama=${MODEL_PATH_LLAMA} --max_seq_len=4096"

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/demo.png filter=lfs diff=lfs merge=lfs -text
+docs/aws_sagemaker_endpoint_setup.pdf filter=lfs diff=lfs merge=lfs -text
+tests/CityofTshwaneWater.pdf filter=lfs diff=lfs merge=lfs -text
+tests/ocr2.png filter=lfs diff=lfs merge=lfs -text

.github/workflows/python-package-publish.yml ADDED Viewed

	@@ -0,0 +1,57 @@

+name: Build & Publish h2oGPT Python wheel to PYPI
+on:
+  workflow_dispatch:
+    inputs:
+      pypi-index:
+        type: choice
+        description: PyPI index that needed to be published
+        required: true
+        default: Test-PyPI
+        options:
+          - PyPI
+          - Test-PyPI
+      version:
+        description: |
+          Override the current version for the python package for dev purposes when uploading to Test-PyPI
+        type: string
+jobs:
+  build_and_upload:
+    runs-on: ubuntu-latest
+    steps:
+        - uses: actions/checkout@v3.5.3
+        - uses: actions/setup-python@v4
+          with:
+            python-version: '3.10'
+        - name: Install Dependencies
+          run: |
+            python3.10 -m pip install --upgrade pip
+            python3.10 -m pip install setuptools wheel twine --upgrade
+        - name: Modify Version
+          if: ${{ inputs.version != ''}}
+          run: |
+            echo ${{ inputs.version}} > version.txt
+            echo "h2ogpt-wheel-version  = $(cat version.txt)"
+        - name: Build Wheel
+          run: make clean dist
+        - name: Publish to Test-PyPI
+          if: ${{ inputs.pypi-index == 'Test-PyPI' }}
+          run: |
+            twine upload -r testpypi dist/*
+          env:
+            TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+            TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+        - name: Publish to PyPI
+          if: ${{ inputs.pypi-index == 'PyPI' }}
+          run: |
+            twine upload dist/*
+          env:
+            TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+            TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}

.github/workflows/snyk-scan.yml ADDED Viewed

	@@ -0,0 +1,76 @@

+name: Snyk Security Vulnerability Scan
+on:
+  workflow_dispatch:
+  pull_request:
+  push:
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+'
+    branches:
+      - main
+jobs:
+  snyk_scan_test:
+    if: ${{ github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch' }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@master
+      - uses: snyk/actions/setup@master
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Check changed Deps files
+        uses: tj-actions/changed-files@v35
+        id: changed-files
+        with:
+          files: | # This will match all the files with below patterns
+            requirements.txt
+      - name: Scan python dependencies
+        if: contains(steps.changed-files.outputs.all_changed_and_modified_files, 'requirements.txt')
+        env:
+          SNYK_TOKEN: '${{ secrets.SNYK_TOKEN }}'
+        run: |
+          head -n 41 requirements.txt > temp-requirements.txt #remove test deps
+          python3.10 -m pip install -r temp-requirements.txt
+          snyk test \
+            -d \
+            --file=temp-requirements.txt \
+            --package-manager=pip \
+            --command=python3.10 \
+            --skip-unresolved \
+            --severity-threshold=high
+  snyk_scan_monitor:
+    if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch'}}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@master
+      - uses: snyk/actions/setup@master
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Extract github branch/tag name
+        shell: bash
+        run: echo "ref=$(echo ${GITHUB_REF##*/})" >> $GITHUB_OUTPUT
+        id: extract_ref
+      - name: Monitor python dependencies
+        env:
+          SNYK_TOKEN: '${{ secrets.SNYK_TOKEN }}'
+        run: |
+          head -n 41 requirements.txt > temp-requirements.txt #remove test deps
+          python3.10 -m pip install -r temp-requirements.txt
+          snyk monitor \
+            -d \
+            --file=temp-requirements.txt \
+            --command=python3.10 \
+            --package-manager=pip \
+            --skip-unresolved \
+            --remote-repo-url=h2ogpt/${{ steps.extract_ref.outputs.ref }} \
+            --org=h2o-gpt \
+            --project-name=H2O-GPT/h2ogpt/${{ steps.extract_ref.outputs.ref }}/requirements.txt

.gitignore ADDED Viewed

	@@ -0,0 +1,44 @@

+out/
+7B/
+13B/
+__pycache__/
+checkpoint**
+minimal-llama**
+upload.py
+lora-**
+*ckpt
+wandb
+evaluate.py
+test_data.json
+todo.txt
+.neptune/
+*.bin
+db_dir_UserData
+temp_path_do_doc1
+offline_folder
+flagged_data_points
+.pytest_cache
+user_path
+user_path_test
+build
+h2ogpt.egg-info
+dist
+.idea
+.cache
+.local
+.bash_history
+.benchmarks
+Dockerfile-runner.dockerfile
+build_info.txt
+prebuilt_deps
+Dockerfile_deps
+# IDEs
+.idea/
+# virtual envs
+venv
+# Mac one click installer
+Tesseract-OCR/
+poppler/

.ipynb_checkpoints/generate-checkpoint.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import os
+import sys
+if os.path.dirname(os.path.abspath(__file__)) not in sys.path:
+    sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+from src.gen import main
+from src.utils import H2O_Fire
+def entrypoint_main():
+    H2O_Fire(main)
+if __name__ == "__main__":
+    entrypoint_main()

.ipynb_checkpoints/requirements-checkpoint.txt ADDED Viewed

	@@ -0,0 +1,77 @@

+# for generate (gradio server) and finetune
+datasets==2.13.0
+sentencepiece==0.1.99
+gradio==3.50.2
+sse_starlette==1.8.2
+huggingface_hub==0.19.4
+appdirs==1.4.4
+fire==0.5.0
+docutils==0.20.1
+torch==2.1.2; sys_platform != "darwin" and platform_machine != "arm64"
+torch==2.1.2; sys_platform == "darwin" and platform_machine == "arm64"
+evaluate==0.4.0
+rouge_score==0.1.2
+sacrebleu==2.3.1
+scikit-learn==1.2.2
+# optional (need to uncomment code in gradio_runner.py for import of better_profanity)
+# alt-profanity-check==1.2.2
+# better-profanity==0.7.0
+numpy==1.24.3
+pandas==2.0.2
+matplotlib==3.7.1
+loralib==0.1.1
+bitsandbytes==0.41.3
+accelerate==0.25.0
+peft==0.7.1
+transformers==4.36.2
+tokenizers==0.15.0
+APScheduler==3.10.1
+# optional for generate
+pynvml==11.5.0
+psutil==5.9.5
+boto3==1.26.101
+botocore==1.29.101
+# optional for finetune
+tensorboard==2.13.0
+neptune==1.2.0
+# for gradio client
+gradio_client==0.6.1
+beautifulsoup4==4.12.2
+markdown==3.4.3
+# data and testing
+pytest==7.2.2
+pytest-xdist==3.2.1
+nltk==3.8.1
+textstat==0.7.3
+# pandoc==2.3
+pypandoc==1.11; sys_platform == "darwin" and platform_machine == "arm64"
+pypandoc_binary==1.11; platform_machine == "x86_64"
+pypandoc_binary==1.11; platform_system == "Windows"
+python-magic-bin==0.4.14; platform_system == "Windows"
+openpyxl==3.1.2
+lm_dataformat==0.0.20
+bioc==2.0
+# falcon
+einops==0.6.1
+instructorembedding==1.0.1
+# for gpt4all .env file, but avoid worrying about imports
+python-dotenv==1.0.0
+text-generation==0.6.1
+# for tokenization when don't have HF tokenizer
+tiktoken==0.5.2
+requests>=2.31.0
+httpx==0.24.1
+urllib3>=1.26.16
+filelock>=3.12.2
+joblib>=1.3.1
+tqdm>=4.65.0
+tabulate>=0.9.0
+packaging>=23.1

Dockerfile ADDED Viewed

	@@ -0,0 +1,35 @@

+# devel needed for bitsandbytes requirement of libcudart.so, otherwise runtime sufficient
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PATH="/h2ogpt_conda/bin:${PATH}"
+ARG PATH="/h2ogpt_conda/bin:${PATH}"
+ENV HOME=/workspace
+ENV CUDA_HOME=/usr/local/cuda-11.8
+ENV VLLM_CACHE=/workspace/.vllm_cache
+ENV TIKTOKEN_CACHE_DIR=/workspace/tiktoken_cache
+WORKDIR /workspace
+COPY . /workspace/
+RUN cd /workspace && ./docker_build_script_ubuntu.sh
+RUN chmod -R a+rwx /workspace
+ARG user=h2ogpt
+ARG group=h2ogpt
+ARG uid=1000
+ARG gid=1000
+RUN groupadd -g ${gid} ${group} && useradd -u ${uid} -g ${group} -s /bin/bash ${user}
+EXPOSE 8888
+EXPOSE 7860
+EXPOSE 5000
+USER h2ogpt
+ENTRYPOINT ["python3.10"]

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

Makefile ADDED Viewed

	@@ -0,0 +1,116 @@

+all: clean dist
+PACKAGE_VERSION       := `cat version.txt | tr -d '\n'`
+BUILD_TAG             := $(shell git describe --always --dirty)
+DOCKER_TEST_IMAGE     := harbor.h2o.ai/h2ogpt/test-image:$(BUILD_TAG)
+PYTHON_BINARY         ?= `which python`
+DEFAULT_MARKERS       ?= "not need_tokens and not need_gpu"
+.PHONY: venv dist test publish docker_build build_info.txt
+clean:
+	rm -rf dist build h2ogpt.egg-info
+venv:
+	$(PYTHON_BINARY) -m virtualenv -p $(PYTHON_BINARY) venv
+install:
+	$(PYTHON_BINARY) -m pip install dist/h2ogpt-$(PACKAGE_VERSION)-py3-none-any.whl
+install-%:
+	$(PYTHON_BINARY) -m pip install dist/h2ogpt-$(PACKAGE_VERSION)-py3-none-any.whl[$*]
+dist:
+	$(PYTHON_BINARY) setup.py bdist_wheel
+test:
+	$(PYTHON_BINARY) -m pip install requirements-parser
+	$(PYTHON_BINARY) -m pytest tests --disable-warnings --junit-xml=test_report.xml -m "$(DEFAULT_MARKERS)"
+test_imports:
+	$(PYTHON_BINARY) -m pytest tests/test_imports.py --disable-warnings --junit-xml=test_report.xml -m "$(DEFAULT_MARKERS)"
+publish:
+	echo "Publishing not implemented yet."
+build_info.txt:
+	@rm -rf build_info.txt
+	@echo "commit=\"$(shell git rev-parse HEAD)\"" >> $@
+	@echo "branch=\"`git rev-parse HEAD | git branch -a --contains | grep -v detached | sed -e 's~remotes/origin/~~g' -e 's~^ *~~' | sort | uniq | tr '*\n' ' '`\"" >> $@
+	@echo "describe=\"`git describe --always --dirty`\"" >> $@
+	@echo "build_os=\"`uname -a`\"" >> $@
+	@echo "build_machine=\"`hostname`\"" >> $@
+	@echo "build_date=\"$(shell date "+%Y%m%d")\"" >> $@
+	@echo "build_user=\"`id -u -n`\"" >> $@
+	@echo "base_version=\"$(PACKAGE_VERSION)\"" >> $@
+git_hash.txt:
+	@echo "$(shell git rev-parse HEAD)" >> $@
+# Deprecated for now, no 0.4.1 on pypi, use release binary wheel that has no CUDA errors anymore
+docker_build_deps:
+	@cp docker_build_script_ubuntu.sh docker_build_script_ubuntu.sh.back
+	@sed -i '/# Install prebuilt dependencies/,$$d' docker_build_script_ubuntu.sh
+	@docker build -t h2ogpt-deps-builder -f Dockerfile .
+	@mv docker_build_script_ubuntu.sh.back docker_build_script_ubuntu.sh
+	@mkdir -p prebuilt_deps
+	@docker run \
+		--rm \
+		-it \
+		--entrypoint bash \
+		--runtime nvidia \
+		-v `pwd`:/dot \
+		-v /etc/passwd:/etc/passwd:ro \
+		-v /etc/group:/etc/group:ro \
+		-u `id -u`:`id -g` \
+		h2ogpt-deps-builder  -c " \
+			mkdir -p /dot/prebuilt_deps && cd /dot/prebuilt_deps && \
+			GITHUB_ACTIONS=true python3.10 -m pip install auto-gptq==0.4.2 --no-cache-dir --use-deprecated=legacy-resolver && \
+			python3.10 -m pip wheel auto-gptq==0.4.2 \
+		"
+	@docker run \
+		--rm \
+		-it \
+		--entrypoint bash \
+		-v `pwd`:/dot \
+		quay.io/pypa/manylinux2014_x86_64 -c " \
+			ln -s /usr/local/bin/python3.10 /usr/local/bin/python3 && cd /tmp && \
+			git clone https://github.com/h2oai/duckdb.git && \
+			cd duckdb && \
+			git checkout dcd8c1ffc53dd020623630efb99ba6a3a4cbc5ad && \
+			BUILD_PYTHON=1 make release && \
+			cd tools/pythonpkg  && \
+			python3.10 setup.py bdist_wheel  && \
+			cp dist/duckdb-0.*.whl /dot/prebuilt_deps \
+		"
+	s3cmd put prebuilt_deps/auto_gptq-0.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl s3://artifacts.h2o.ai/deps/h2ogpt/ && \
+	s3cmd setacl s3://artifacts.h2o.ai/deps/h2ogpt/auto_gptq-0.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl --acl-public
+	s3cmd put prebuilt_deps/duckdb-0.8.2.dev4026+gdcd8c1ffc5-cp310-cp310-linux_x86_64.whl s3://artifacts.h2o.ai/deps/h2ogpt/ && \
+	s3cmd setacl s3://artifacts.h2o.ai/deps/h2ogpt/duckdb-0.8.2.dev4026+gdcd8c1ffc5-cp310-cp310-linux_x86_64.whl --acl-public
+docker_build: build_info.txt
+ifeq ($(shell curl --connect-timeout 4 --write-out %{http_code} -sS --output /dev/null -X GET http://harbor.h2o.ai/api/v2.0/projects/h2ogpt/repositories/test-image/artifacts/$(BUILD_TAG)/tags),200)
+	@echo "Image already pushed to Harbor: $(DOCKER_TEST_IMAGE)"
+else
+	DOCKER_BUILDKIT=1 docker build -t $(DOCKER_TEST_IMAGE) -f Dockerfile .
+	docker push $(DOCKER_TEST_IMAGE)
+endif
+just_docker_build: build_info.txt
+	DOCKER_BUILDKIT=1 docker build -t $(DOCKER_TEST_IMAGE) -f Dockerfile .
+docker_build_runner: docker_build
+	-docker pull $(DOCKER_TEST_IMAGE)
+	docker tag $(DOCKER_TEST_IMAGE) gcr.io/vorvan/h2oai/h2ogpt-runtime:$(BUILD_TAG)
+	docker tag $(DOCKER_TEST_IMAGE) gcr.io/vorvan/h2oai/h2ogpt-runtime:$(PACKAGE_VERSION)
+	docker tag $(DOCKER_TEST_IMAGE) gcr.io/vorvan/h2oai/h2ogpt-runtime:latest
+	docker push gcr.io/vorvan/h2oai/h2ogpt-runtime:$(BUILD_TAG)
+	docker push gcr.io/vorvan/h2oai/h2ogpt-runtime:$(PACKAGE_VERSION)
+	docker push gcr.io/vorvan/h2oai/h2ogpt-runtime:latest
+ifdef BUILD_ID
+	docker tag $(DOCKER_TEST_IMAGE) gcr.io/vorvan/h2oai/h2ogpt-runtime:$(PACKAGE_VERSION)-$(BUILD_ID)
+	docker push gcr.io/vorvan/h2oai/h2ogpt-runtime:$(PACKAGE_VERSION)-$(BUILD_ID)
+endif
+print-%:
+	@echo $($*)

README.md CHANGED Viewed

@@ -1,12 +1,317 @@
 ---
-title: Test
-emoji: ⚡
-colorFrom: yellow
-colorTo: yellow
 sdk: gradio
-sdk_version: 4.15.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: test
+app_file: generate.py
 sdk: gradio
+sdk_version: 3.50.2
 ---
+# h2oGPT
+Turn ★ into ⭐ (top-right corner) if you like the project!
+Query and summarize your documents or just chat with local private GPT LLMs using h2oGPT, an Apache V2 open-source project.
+- **Private** offline database of any documents [(PDFs, Excel, Word, Images, Video Frames, Youtube, Audio, Code, Text, MarkDown, etc.)](docs/README_LangChain.md#supported-datatypes)
+  - **Persistent** database (Chroma, Weaviate, or in-memory FAISS) using accurate embeddings (instructor-large, all-MiniLM-L6-v2, etc.)
+  - **Efficient** use of context using instruct-tuned LLMs (no need for LangChain's few-shot approach)
+  - **Parallel** summarization and extraction, reaching an output of 80 tokens per second with the 13B LLaMa2 model
+  - **HYDE** (Hypothetical Document Embeddings) for enhanced retrieval based upon LLM responses
+- **Variety** of models supported (LLaMa2, Mistral, Falcon, Vicuna, WizardLM.  With AutoGPTQ, 4-bit/8-bit, LORA, etc.)
+  - **GPU** support from HF and LLaMa.cpp GGML models, and **CPU** support using HF, LLaMa.cpp, and GPT4ALL models
+  - **Attention Sinks** for [arbitrarily long](https://github.com/tomaarsen/attention_sinks) generation (LLaMa-2, Mistral, MPT, Pythia, Falcon, etc.)
+- **UI** or CLI with streaming of all models
+  - **Upload** and **View** documents through the UI (control multiple collaborative or personal collections)
+  - **Vision LLaVa** Model and **Stable Diffusion** Image Generation
+  - **Voice STT** using Whisper with streaming audio conversion
+  - **Voice TTS** using MIT-Licensed Microsoft Speech T5 with multiple voices and Streaming audio conversion
+  - **Voice TTS** using MPL2-Licensed TTS including Voice Cloning and Streaming audio conversion
+  - **AI Assistant Voice Control Mode** for hands-free control of h2oGPT chat
+  - **Bake-off** UI mode against many models at the same time
+  - **Easy Download** of model artifacts and control over models like LLaMa.cpp through the UI
+  - **Authentication** in the UI by user/password
+  - **State Preservation** in the UI by user/password
+- **Linux, Docker, macOS, and Windows** support
+  - [**Easy Windows Installer**](#windows-1011-64-bit-with-full-document-qa-capability) for Windows 10 64-bit (CPU/CUDA)
+  - [**Easy macOS Installer**](#macos-cpum1m2-with-full-document-qa-capability) for macOS (CPU/M1/M2)
+- **Inference Servers** support (HF TGI server, vLLM, Gradio, ExLLaMa, Replicate, OpenAI, Azure OpenAI, Anthropic)
+- **OpenAI-compliant**
+  - Server Proxy API (h2oGPT acts as drop-in-replacement to OpenAI server)
+  - Python client API (to talk to Gradio server)
+- **Web-Search** integration with Chat and Document Q/A
+- **Agents** for Search, Document Q/A, Python Code, CSV frames (Experimental, best with OpenAI currently)
+- **Evaluate** performance using reward models
+- **Quality** maintained with over 1000 unit and integration tests taking over 4 GPU-hours
+### Get Started
+[![GitHub license](https://img.shields.io/github/license/NVIDIA/nvidia-docker?style=flat-square)](https://raw.githubusercontent.com/h2oai/h2ogpt/main/LICENSE)
+[![Linux](https://img.shields.io/badge/Linux-FCC624?style=for-the-badge&logo=linux&logoColor=black)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_LINUX.md)
+[![macOS](https://img.shields.io/badge/mac%20os-000000?style=for-the-badge&logo=macos&logoColor=F0F0F0)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_MACOS.md)
+[![Windows](https://img.shields.io/badge/Windows-0078D6?style=for-the-badge&logo=windows&logoColor=white)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_WINDOWS.md)
+[![Docker](https://img.shields.io/badge/docker-%230db7ed.svg?style=for-the-badge&logo=docker&logoColor=white)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_DOCKER.md)
+To quickly try out h2oGPT with limited document Q/A capability, create a fresh Python 3.10 environment and run:
+* CPU or MAC (M1/M2):
+   ```bash
+   # for windows/mac use "set" or relevant environment setting mechanism
+   export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
+   ```
+* Linux/Windows CUDA:
+   ```bash
+   # for windows/mac use "set" or relevant environment setting mechanism
+   export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu118"
+   ```
+Then run the following commands on any system:
+   ```bash
+   git clone https://github.com/h2oai/h2ogpt.git
+   cd h2ogpt
+   pip install -r requirements.txt
+   pip install -r reqs_optional/requirements_optional_langchain.txt
+   pip install -r reqs_optional/requirements_optional_gpt4all.txt
+   pip install -r reqs_optional/requirements_optional_langchain.urls.txt
+   # GPL, only run next line if that is ok:
+   # pip install -r reqs_optional/requirements_optional_langchain.gpllike.txt
+   python generate.py --base_model=TheBloke/zephyr-7B-beta-GGUF --prompt_type=zephyr --max_seq_len=4096
+   ```
+Next, go to your browser by visiting [http://127.0.0.1:7860](http://127.0.0.1:7860) or [http://localhost:7860](http://localhost:7860).  Choose 13B for a better model than 7B.
+If you encounter issues with `llama-cpp-python` or other packages that try to compile and fail, try binary wheels for your platform as linked in the detailed instructions below.  For AVX1 or AMD ROC systems, edit `reqs_optional/requirements_optional_gpt4all.txt` to choose valid packages.
+We recommend quantized models for most small-GPU systems, e.g. [LLaMa-2-7B-Chat-GGUF](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q6_K.gguf) for 9GB+ GPU memory or larger models like [LLaMa-2-13B-Chat-GGUF](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-13b-chat.Q6_K.gguf) if you have 16GB+ GPU memory.
+---
+Note that for all platforms, some packages such as DocTR, Unstructured, BLIP, Stable Diffusion, etc. download models at runtime that appear to delay operations in the UI. The progress appears in the console logs.
+#### Windows 10/11 64-bit with full document Q/A capability
+  * One-Click Installer
+    * CPU or GPU: Download [h2oGPT Windows Installer](https://h2o-release.s3.amazonaws.com/h2ogpt/Jan2024/h2oGPT_0.0.1.exe) (1.3GB file)
+      * Once installed, feel free to change start directory for icon from `%HOMEDRIVE%\%HOMEPATH%` to (e.g.) `%HOMEDRIVE%\%HOMEPATH%\h2ogpt_data` so all created files (like database) go there.  All paths saved are relative to this path.
+    * CPU: Click the h2oGPT icon in the Start menu.  Give it about 15 seconds to open in a browser if many optional packages are included.  By default, the browser will launch with the actual local IP address, not localhost.
+    * GPU: Before starting, run the following commands (replace `pseud` with your user):
+      ```
+      C:\Users\pseud\AppData\Local\Programs\h2oGPT\Python\python.exe -m pip uninstall -y torch
+      C:\Users\pseud\AppData\Local\Programs\h2oGPT\Python\python.exe -m pip install https://h2o-release.s3.amazonaws.com/h2ogpt/torch-2.1.2%2Bcu118-cp310-cp310-win_amd64.whl
+      ```
+      Now click the h2oGPT icon in the Start menu.  Give it about 20 seconds to open in a browser if many optional packages are included.  By default, the browser will launch with the actual local IP address, not localhost.
+    * To debug any issues, run the following (replace `pseud` with your user):
+      ```
+      C:\Users\pseud\AppData\Local\Programs\h2oGPT\Python\python.exe "C:\Users\pseud\AppData\Local\Programs\h2oGPT\h2oGPT.launch.pyw"
+      ```
+      Any start-up exceptions are appended to log, e.g. `C:\Users\pseud\h2ogpt_exception.log`.
+  * To control startup, tweak the python startup file, e.g. for user `pseud`: `C:\Users\pseud\AppData\Local\Programs\h2oGPT\pkgs\win_run_app.py`
+    * In this Python code, set ENVs anywhere before main_h2ogpt() is called
+      * E.g. `os.environ['name'] = 'value'`, e.g. `os.environ['n_jobs'] = '10'` (must be always a string).
+    * Environment variables can be changed, e.g.:
+      * `n_jobs`: number of cores for various tasks
+      * `OMP_NUM_THREADS` thread count for LLaMa
+      * `CUDA_VISIBLE_DEVICES` which GPUs are used.  Recommend set to single fast GPU, e.g. `CUDA_VISIBLE_DEVICES=0` if have multiple GPUs.  Note that UI cannot control which GPUs (or CPU mode) for LLaMa models.
+      * Any CLI argument from `python generate.py --help` with environment variable set as `h2ogpt_x`, e.g. `h2ogpt_h2ocolors` to `False`.
+      * Set env `h2ogpt_server_name` to actual IP address for LAN to see app, e.g. `h2ogpt_server_name` to `192.168.1.172` and allow access through firewall if have Windows Defender activated.
+  * One can tweak installed h2oGPT code at, e.g. `C:\Users\pseud\AppData\Local\Programs\h2oGPT`.
+  * To terminate the app, go to System Tab and click Admin and click Shutdown h2oGPT.
+    * If startup fails, run as console and check for errors, e.g. and kill any old Python processes.
+  * [Full Windows 10/11 Manual Installation Script](docs/README_WINDOWS.md)
+    * Single `.bat` file for installation (if you do not skip any optional packages, takes about 9GB filled on disk).
+    * Recommend base Conda env, which allows for DocTR that requires pygobject that has otherwise no support (except `mysys2` that cannot be used by h2oGPT).
+    * Also allows for the TTS package by Coqui, which is otherwise not currently enabled in the one-click installer.
+---
+#### Linux (CPU/CUDA) with full document Q/A capability
+  * [Docker Build and Run Docs](docs/README_DOCKER.md)
+  * [Linux Manual Install and Run Docs](docs/README_LINUX.md)
+---
+#### macOS (CPU/M1/M2) with full document Q/A capability
+* One-click Installers (Experimental and subject to changes)
+  Nov 08, 2023
+  - [h2ogpt-osx-m1-cpu](https://h2o-release.s3.amazonaws.com/h2ogpt/Nov2023/h2ogpt-osx-m1-cpu)
+  - [h2ogpt-osx-m1-gpu](https://h2o-release.s3.amazonaws.com/h2ogpt/Nov2023/h2ogpt-osx-m1-gpu)
+  Download the runnable file and open it from the Finder. It will take a few minutes to unpack and run the application.
+  These one-click installers are experimental. Report any issues with steps to reproduce at https://github.com/h2oai/h2ogpt/issues.
+  **Note:** The app bundle is unsigned. If you experience any issues with running the app, run the following commands:
+  ```bash
+  $ xattr -dr com.apple.quarantine {file-path}/h2ogpt-osx-m1-gpu
+  $ chmod +x {file-path}/h2ogpt-osx-m1-gpu
+  ```
+* [macOS Manual Install and Run Docs](docs/README_MACOS.md)
+---
+#### Example Models
+* [Highest accuracy and speed](https://huggingface.co/h2oai/h2ogpt-4096-llama2-70b-chat) on 16-bit with TGI/vLLM using ~48GB/GPU when in use (4xA100 high concurrency, 2xA100 for low concurrency)
+* [Middle-range accuracy](https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2) on 16-bit with TGI/vLLM using ~45GB/GPU when in use (2xA100)
+* [Small memory profile with ok accuracy](https://huggingface.co/TheBloke/Llama-2-13B-Chat-GGUF) 16GB GPU if full GPU offloading
+* [Balanced accuracy and size](https://huggingface.co/h2oai/h2ogpt-4096-llama2-13b-chat) on 16-bit with TGI/vLLM using ~45GB/GPU when in use (1xA100)
+* [Smallest or CPU friendly](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF) 32GB system ram or 9GB GPU if full GPU offloading
+* [Best for 4*A10G using g5.12xlarge](https://huggingface.co/TheBloke/Llama-2-70B-chat-AWQ) AWQ LLaMa 70B using 4*A10G using vLLM
+**GPU** mode requires CUDA support via torch and transformers. A 7B/13B model in 16-bit uses 14GB/26GB of GPU memory to store the weights (2 bytes per weight). Compression such as 4-bit precision (bitsandbytes, AWQ, GPTQ, etc.) can further reduce memory requirements down to less than 6GB when asking a question about your documents. (For more information, see [low-memory mode](docs/FAQ.md#low-memory-mode).)
+**CPU** mode uses GPT4ALL and LLaMa.cpp, e.g. gpt4all-j, requiring about 14GB of system RAM in typical use.
+---
+### Live Demos
+- [![img-small.png](docs/img-small.png) Live h2oGPT Document Q/A Demo](https://gpt.h2o.ai/)
+- [🤗 Live h2oGPT Chat Demo 1](https://huggingface.co/spaces/h2oai/h2ogpt-chatbot)
+- [🤗 Live h2oGPT Chat Demo 2](https://huggingface.co/spaces/h2oai/h2ogpt-chatbot2)
+- [![](https://colab.research.google.com/assets/colab-badge.svg) h2oGPT CPU](https://colab.research.google.com/drive/13RiBdAFZ6xqDwDKfW6BG_-tXfXiqPNQe?usp=sharing)
+- [![](https://colab.research.google.com/assets/colab-badge.svg) h2oGPT GPU](https://colab.research.google.com/drive/143-KFHs2iCqXTQLI2pFCDiR69z0dR8iE?usp=sharing)
+### Inference Benchmarks for Summarization & Generation
+* [Benchmark results for Llama2](https://github.com/h2oai/h2ogpt/blob/main/benchmarks/perf.md)
+* [pytest to create benchmark results](https://github.com/h2oai/h2ogpt/blob/main/tests/test_perf_benchmarks.py)
+* [Raw benchmark results (JSON)](https://github.com/h2oai/h2ogpt/blob/main/benchmarks/perf.json)
+### Resources
+- [Discord](https://discord.gg/WKhYMWcVbq)
+- [Models (LLaMa-2, Falcon 40, etc.) at 🤗](https://huggingface.co/h2oai/)
+- [YouTube: 100% Offline ChatGPT Alternative?](https://www.youtube.com/watch?v=Coj72EzmX20)
+- [YouTube: Ultimate Open-Source LLM Showdown (6 Models Tested) - Surprising Results!](https://www.youtube.com/watch?v=FTm5C_vV_EY)
+- [YouTube: Blazing Fast Falcon 40b 🚀 Uncensored, Open-Source, Fully Hosted, Chat With Your Docs](https://www.youtube.com/watch?v=H8Dx-iUY49s)
+- [Technical Paper: https://arxiv.org/pdf/2306.08161.pdf](https://arxiv.org/pdf/2306.08161.pdf)
+### Partners
+- [Live Leaderboard](https://evalgpt.ai/) for GPT-4 Elo Evaluation of Instruct/Chat models with [h2o-LLM-eval](https://github.com/h2oai/h2o-LLM-eval).
+- Advanced fine-tuning with [H2O LLM Studio](https://github.com/h2oai/h2o-llmstudio)
+### Video Demo
+https://github.com/h2oai/h2ogpt/assets/2249614/2f805035-2c85-42fb-807f-fd0bca79abc6
+YouTube 4K version: https://www.youtube.com/watch?v=_iktbj4obAI
+### Docs Guide
+<!--  cat README.md | ./gh-md-toc  -  But Help is heavily processed -->
+* [Get Started](#get-started)
+   * [Linux (CPU or CUDA)](docs/README_LINUX.md)
+   * [macOS (CPU or M1/M2)](docs/README_MACOS.md)
+   * [Windows 10/11 (CPU or CUDA)](docs/README_WINDOWS.md)
+   * [GPU (CUDA, AutoGPTQ, exllama) Running Details](docs/README_GPU.md)
+   * [CPU Running Details](docs/README_CPU.md)
+   * [CLI chat](docs/README_CLI.md)
+   * [Gradio UI](docs/README_ui.md)
+   * [Client API (Gradio, OpenAI-Compliant)](docs/README_CLIENT.md)
+   * [Inference Servers (HF TGI server, vLLM, Gradio, ExLLaMa, Replicate, OpenAI, Azure OpenAI)](docs/README_InferenceServers.md)
+   * [Python Wheel](docs/README_WHEEL.md)
+   * [Offline Installation](docs/README_offline.md)
+   * [Low Memory](docs/FAQ.md#low-memory-mode)
+   * [Docker](docs/README_DOCKER.md)
+* [LangChain Document Support](docs/README_LangChain.md)
+* [Compare to PrivateGPT et al.](docs/README_LangChain.md#what-is-h2ogpts-langchain-integration-like)
+* [Roadmap](#roadmap)
+* [Development](#development)
+* [Help](#help)
+   * [LangChain file types supported](docs/README_LangChain.md#supported-datatypes)
+   * [CLI Database control](docs/README_LangChain.md#database-creation)
+   * [FAQ](docs/FAQ.md)
+     * [Model Usage Notes](docs/FAQ.md#model-usage-notes)
+     * [Adding LLM Models (including using GGUF and Attention Sinks)](docs/FAQ.md#adding-models)
+     * [Adding Embedding Models](docs/FAQ.md#add-new-embedding-model)
+     * [Adding Prompts](docs/FAQ.md#adding-prompt-templates)
+     * [In-Context Learning](docs/FAQ.md#in-context-learning-via-prompt-engineering)
+     * [Multiple GPUs](docs/FAQ.md#multiple-gpus)
+     * [Low-Memory Usage](docs/FAQ.md#low-memory-mode)
+     * [Environment Variables](docs/FAQ.md#what-envs-can-i-pass-to-control-h2ogpt)
+     * [HTTPS access for server and client](docs/FAQ.md#https-access-for-server-and-client)
+   * [Useful Links](docs/LINKS.md)
+   * [Fine-Tuning](docs/FINETUNE.md)
+   * [Triton](docs/TRITON.md)
+   * [Commercial viability](docs/FAQ.md#commercial-viability)
+* [Acknowledgements](#acknowledgements)
+* [Why H2O.ai?](#why-h2oai)
+* [Disclaimer](#disclaimer)
+### Experimental features
+These are not part of normal installation instructions and are experimental.
+* [Agents](docs/README_Agents.md) -- in Alpha testing.  Optimal for OpenAI, but that also fails sometimes.
+### Roadmap
+- Integration of code and resulting LLMs with downstream applications and low/no-code platforms
+- Complement h2oGPT chatbot with other APIs like [ToolBench](https://github.com/OpenBMB/ToolBench)
+- Enhance the model's code completion, reasoning, and mathematical capabilities, ensure factual correctness, minimize hallucinations, and avoid repetitive output
+- Add better agents for SQL and CSV question/answer
+### Development
+- To create a development environment for training and generation, follow the [installation instructions](docs/INSTALL.md).
+- To fine-tune any LLM models on your data, follow the [fine-tuning instructions](docs/FINETUNE.md).
+- To run h2oGPT tests:
+    ```bash
+    pip install requirements-parser pytest-instafail pytest-random-order
+    pip install playsound==1.3.0
+    pytest --instafail -s -v tests
+    # for client tests
+    make -C client setup
+    make -C client build
+    pytest --instafail -s -v client/tests
+    # for openai server test on already-running local server
+    pytest -s -v -n 4 openai_server/test_openai_server.py::test_openai_client
+    ```
+  or tweak/run `tests/test4gpus.sh` to run tests in parallel.
+### Help
+- [FAQs](docs/FAQ.md)
+- [README for LangChain](docs/README_LangChain.md)
+- Useful [links](docs/LINKS.md) for additional context and information on competitors, models, and datasets
+### Acknowledgements
+* Some training code was based upon March 24 version of [Alpaca-LoRA](https://github.com/tloen/alpaca-lora/).
+* Used high-quality created data by [OpenAssistant](https://open-assistant.io/).
+* Used base models by [EleutherAI](https://www.eleuther.ai/).
+* Used OIG data created by [LAION](https://laion.ai/blog/oig-dataset/).
+### Why H2O.ai?
+Our [Makers](https://h2o.ai/company/team/) at [H2O.ai](https://h2o.ai) have built several world-class Machine Learning, Deep Learning and AI platforms:
+- #1 open-source machine learning platform for the enterprise [H2O-3](https://github.com/h2oai/h2o-3)
+- The world's best AutoML (Automatic Machine Learning) with [H2O Driverless AI](https://h2o.ai/platform/ai-cloud/make/h2o-driverless-ai/)
+- No-Code Deep Learning with [H2O Hydrogen Torch](https://h2o.ai/platform/ai-cloud/make/hydrogen-torch/)
+- Document Processing with Deep Learning in [Document AI](https://h2o.ai/platform/ai-cloud/make/document-ai/)
+We also built platforms for deployment and monitoring, and for data wrangling and governance:
+- [H2O MLOps](https://h2o.ai/platform/ai-cloud/operate/h2o-mlops/) to deploy and monitor models at scale
+- [H2O Feature Store](https://h2o.ai/platform/ai-cloud/make/feature-store/) in collaboration with AT&T
+- Open-source Low-Code AI App Development Frameworks [Wave](https://wave.h2o.ai/) and [Nitro](https://nitro.h2o.ai/)
+- Open-source Python [datatable](https://github.com/h2oai/datatable/) (the engine for H2O Driverless AI feature engineering)
+Many of our customers are creating models and deploying them enterprise-wide and at scale in the [H2O AI Cloud](https://h2o.ai/platform/ai-cloud/):
+- Multi-Cloud or on Premises
+- [Managed Cloud (SaaS)](https://h2o.ai/platform/ai-cloud/managed)
+- [Hybrid Cloud](https://h2o.ai/platform/ai-cloud/hybrid)
+- [AI Appstore](https://docs.h2o.ai/h2o-ai-cloud/)
+We are proud to have over 25 (of the world's 280) [Kaggle Grandmasters](https://h2o.ai/company/team/kaggle-grandmasters/) call H2O home, including three Kaggle Grandmasters who have made it to world #1.
+### Disclaimer
+Please read this disclaimer carefully before using the large language model provided in this repository. Your use of the model signifies your agreement to the following terms and conditions.
+- Biases and Offensiveness: The large language model is trained on a diverse range of internet text data, which may contain biased, racist, offensive, or otherwise inappropriate content. By using this model, you acknowledge and accept that the generated content may sometimes exhibit biases or produce content that is offensive or inappropriate. The developers of this repository do not endorse, support, or promote any such content or viewpoints.
+- Limitations: The large language model is an AI-based tool and not a human. It may produce incorrect, nonsensical, or irrelevant responses. It is the user's responsibility to critically evaluate the generated content and use it at their discretion.
+- Use at Your Own Risk: Users of this large language model must assume full responsibility for any consequences that may arise from their use of the tool. The developers and contributors of this repository shall not be held liable for any damages, losses, or harm resulting from the use or misuse of the provided model.
+- Ethical Considerations: Users are encouraged to use the large language model responsibly and ethically. By using this model, you agree not to use it for purposes that promote hate speech, discrimination, harassment, or any form of illegal or harmful activities.
+- Reporting Issues: If you encounter any biased, offensive, or otherwise inappropriate content generated by the large language model, please report it to the repository maintainers through the provided channels. Your feedback will help improve the model and mitigate potential issues.
+- Changes to this Disclaimer: The developers of this repository reserve the right to modify or update this disclaimer at any time without prior notice. It is the user's responsibility to periodically review the disclaimer to stay informed about any changes.
+By using the large language model provided in this repository, you agree to accept and comply with the terms and conditions outlined in this disclaimer. If you do not agree with any part of this disclaimer, you should refrain from using the model and any content generated by it.
+## Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=h2oai/h2ogpt&type=Timeline)](https://star-history.com/#h2oai/h2ogpt&Timeline)

auth.json.lock ADDED Viewed

File without changes

benchmarks/llm_gpu_benchmark.py ADDED Viewed

	@@ -0,0 +1,123 @@

+# %%
+import json
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import plotly.io as pio
+from plotly.subplots import make_subplots
+# %%
+# Read the json file
+# This file processes the llm_gpu_benchmark.json file in the tmp/inputs folder
+# File is generated using the command
+# curl  -sSL https://raw.githubusercontent.com/h2oai/h2ogpt/main/benchmarks/perf.json | jq -s '.' > llm_gpu_benchmarks.json
+with open('llm_gpu_benchmarks.json') as f:
+    data = json.load(f)
+del f
+# %%
+# Read the json file into a dataframe
+df = pd.json_normalize(data)
+del data
+# %%
+# Process the dataframe
+# Drop columns that are not needed
+df.drop(columns=['task', 'ngpus', 'reps', 'date', 'git_sha', 'transformers', 'bitsandbytes', 'cuda', 'hostname',
+                 'summarize_input_len_bytes'], inplace=True)
+# Rename columns
+df.rename(columns={'n_gpus': 'gpu_count'}, inplace=True)
+# Split the gpu column into gpu and gpu_memory
+df["gpu_name"] = df.gpus.str.extract(r'[1-9] x ([\w\- ]+) .+')
+df["gpu_memory_gb"] = round(
+    pd.to_numeric(df.gpus.str.extract(r'[\w ]+ \(([\d]+) .+', expand=False), errors='coerce') / 1024)
+df["gpu_memory_gb"] = df["gpu_memory_gb"].astype('Int64')
+df.drop(columns=['gpus'], inplace=True)
+# Manage gpu_names
+df.gpu_name = df.gpu_name.str.replace('NVIDIA ', '')
+df.gpu_name = df.gpu_name.str.replace('GeForce ', '')
+df.gpu_name = df.gpu_name.str.replace('A100-SXM4-80GB', 'A100 SXM4')
+df.gpu_name = df.gpu_memory_gb.astype(str) + "-" + df.gpu_name
+# Remove CPUs
+df.drop(df[df.gpu_name.isnull()].index, inplace=True)
+# %%
+# Remove duplicate rows
+df.drop_duplicates(['backend', 'base_model', 'bits', 'gpu_count', 'gpu_name'], inplace=True)
+# %% Add baseline comparison columns
+# Looking at the CPU data for 4, 8, and 16 bit quantization values for the benchmark we are simplifying it to a single
+# value
+cpu_summary_out_throughput = 1353 / 1216  # bytes/second  (calculated from summarize_output_len_bytes / summarize_time)
+cpu_generate_out_throughput = 849 / 180  # bytes/second   (calculated from generate_output_len_bytes / generate_time)
+# add GPU throughput columns
+df["summary_out_throughput"] = df.summarize_output_len_bytes / df.summarize_time
+df["generate_out_throughput"] = df.generate_output_len_bytes / df.generate_time
+# add GPU throughput boost columns
+df["summary_out_throughput_normalize"] = df.summary_out_throughput / cpu_summary_out_throughput
+df["generate_out_throughput_normalize"] = df.generate_out_throughput / cpu_generate_out_throughput
+# %%
+# df.to_excel('tmp/scratchpad/output/llm_gpu_benchmarks.xlsx', index=False)
+# %%
+pio.renderers.default = "browser"
+# %%
+bits_bar_colors = {'4': px.colors.qualitative.D3[0],
+                   '8': px.colors.qualitative.D3[1],
+                   '16': px.colors.qualitative.D3[2]}
+backends = list(df.backend.unique())
+base_models = list(df.base_model.unique())
+n_gpus = list(df.gpu_count.unique())
+# %%
+for backend in backends:
+    # for backend in ['transformers']:
+    fig_bar = make_subplots(rows=len(n_gpus),
+                            cols=len(base_models) * 2,
+                            shared_xaxes='all',
+                            shared_yaxes='columns',
+                            start_cell="top-left",
+                            vertical_spacing=0.1,
+                            print_grid=False,
+                            row_titles=[f'{gpu_count} GPUs' for gpu_count in n_gpus],
+                            column_titles=['llama2-7b-chat Summarization', 'llama2-7b-chat Generation',
+                                           'llama2-13b-chat Summarization', 'llama2-13b-chat Generation',
+                                           'llama2-70b-chat Summarization', 'llama2-70b-chat Generation'],)
+    # for base_model in ['h2oai/h2ogpt-4096-llama2-7b-chat']:
+    for base_model in base_models:
+        for gpu_count in n_gpus:
+            for bits in sorted(df.bits.unique()):
+                sub_df = df[(df.backend == backend) &
+                            (df.base_model == base_model) &
+                            (df.gpu_count == gpu_count) &
+                            (df.bits == bits)].sort_values(by='gpu_name')
+                fig_bar.add_trace(go.Bar(x=sub_df.summary_out_throughput_normalize,
+                                         y=sub_df.gpu_name,
+                                         name=f'sum-{bits} bits',
+                                         legendgroup=f'sum-{bits} bits',
+                                         marker=dict(color=bits_bar_colors[f'{bits}']),
+                                         orientation='h'),
+                                  row=n_gpus.index(gpu_count) + 1,
+                                  col=base_models.index(base_model) * 2 + 1)
+                fig_bar.add_trace(go.Bar(x=sub_df.generate_out_throughput_normalize,
+                                         y=sub_df.gpu_name,
+                                         name=f'gen-{bits} bits',
+                                         legendgroup=f'gen-{bits} bits',
+                                         marker=dict(color=bits_bar_colors[f'{bits}']),
+                                         orientation='h'),
+                                  row=list(n_gpus).index(gpu_count) + 1,
+                                  col=list(base_models).index(base_model) * 2 + 2)
+    fig_bar.update_layout(plot_bgcolor='rgb(250,250,250)',
+                          showlegend=True,
+                          barmode="group")
+    # fig_bar.show()
+    fig_bar.write_html(f'llm_gpu_benchmark_{backend}.html', include_plotlyjs='cdn')

benchmarks/llm_gpu_benchmark_text-generation-inference.html ADDED Viewed

	@@ -0,0 +1,7 @@

+<html>
+<head><meta charset="utf-8" /></head>
+<body>
+    <div>                        <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
+        <script src="https://cdn.plot.ly/plotly-2.2.0.min.js"></script>                <div id="8d98303e-9d8d-4a86-9ab9-85be1f565ba7" class="plotly-graph-div" style="height:100%; width:100%;"></div>            <script type="text/javascript">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("8d98303e-9d8d-4a86-9ab9-85be1f565ba7")) {                    Plotly.newPlot(                        "8d98303e-9d8d-4a86-9ab9-85be1f565ba7",                        [{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x","y":[],"yaxis":"y"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x2","y":[],"yaxis":"y2"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x","y":[],"yaxis":"y"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x2","y":[],"yaxis":"y2"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[31.964670378460696,40.07702972093452,28.212217062134258,24.76324507950772,29.383143217889106],"xaxis":"x","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[38.97113273835895,37.81293817302825,25.418311714688866,46.82453047975238,25.870047557539163],"xaxis":"x2","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y2"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x7","y":[],"yaxis":"y7"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x8","y":[],"yaxis":"y8"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x7","y":[],"yaxis":"y7"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x8","y":[],"yaxis":"y8"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[27.742149283479364,131.11372927692716,27.756812705358207],"xaxis":"x7","y":["45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y7"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[25.757641294033732,60.88036130542081,24.89894321470165],"xaxis":"x8","y":["45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y8"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x13","y":[],"yaxis":"y13"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x14","y":[],"yaxis":"y14"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x13","y":[],"yaxis":"y13"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x14","y":[],"yaxis":"y14"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[26.58192050074467,27.706125039541696],"xaxis":"x13","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y13"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[24.92264927072723,24.11901127583454],"xaxis":"x14","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y14"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x19","y":[],"yaxis":"y19"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x20","y":[],"yaxis":"y20"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x19","y":[],"yaxis":"y19"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x20","y":[],"yaxis":"y20"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[26.56845022740626],"xaxis":"x19","y":["80-A100 SXM4"],"yaxis":"y19"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[23.63055816163121],"xaxis":"x20","y":["80-A100 SXM4"],"yaxis":"y20"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x3","y":[],"yaxis":"y3"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x4","y":[],"yaxis":"y4"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x3","y":[],"yaxis":"y3"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x4","y":[],"yaxis":"y4"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[null,null,38.784585018023556,18.13337657657005],"xaxis":"x3","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y3"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[null,null,28.590730184060984,16.18347618092991],"xaxis":"x4","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y4"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x9","y":[],"yaxis":"y9"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x10","y":[],"yaxis":"y10"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x9","y":[],"yaxis":"y9"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x10","y":[],"yaxis":"y10"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[20.929693801547206,12.694114023867758,85.02391911717123,17.23203722663425],"xaxis":"x9","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y9"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[26.649908731325855,18.11013971401145,49.03779902422664,18.7070327239283],"xaxis":"x10","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y10"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x15","y":[],"yaxis":"y15"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x16","y":[],"yaxis":"y16"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x15","y":[],"yaxis":"y15"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x16","y":[],"yaxis":"y16"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[12.361580993407348,16.12018834278174],"xaxis":"x15","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y15"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[17.620036315851138,17.885323649884445],"xaxis":"x16","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y16"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x21","y":[],"yaxis":"y21"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x22","y":[],"yaxis":"y22"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x21","y":[],"yaxis":"y21"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x22","y":[],"yaxis":"y22"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[17.333509386436194],"xaxis":"x21","y":["80-A100 SXM4"],"yaxis":"y21"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[17.907476788430102],"xaxis":"x22","y":["80-A100 SXM4"],"yaxis":"y22"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x5","y":[],"yaxis":"y5"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x6","y":[],"yaxis":"y6"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x5","y":[],"yaxis":"y5"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x6","y":[],"yaxis":"y6"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x5","y":[],"yaxis":"y5"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x6","y":[],"yaxis":"y6"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x11","y":[],"yaxis":"y11"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x12","y":[],"yaxis":"y12"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x11","y":[],"yaxis":"y11"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x12","y":[],"yaxis":"y12"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x11","y":[],"yaxis":"y11"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x12","y":[],"yaxis":"y12"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x17","y":[],"yaxis":"y17"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x18","y":[],"yaxis":"y18"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x17","y":[],"yaxis":"y17"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x18","y":[],"yaxis":"y18"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[null,6.337898874140187],"xaxis":"x17","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y17"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[null,8.157040216950774],"xaxis":"x18","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y18"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x23","y":[],"yaxis":"y23"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x24","y":[],"yaxis":"y24"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x23","y":[],"yaxis":"y23"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x24","y":[],"yaxis":"y24"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[6.239297143818297],"xaxis":"x23","y":["80-A100 SXM4"],"yaxis":"y23"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[8.082069511295837],"xaxis":"x24","y":["80-A100 SXM4"],"yaxis":"y24"}],                        {"annotations":[{"font":{"size":16},"showarrow":false,"text":"llama2-7b-chat Summarization","x":0.06777777777777778,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-7b-chat Generation","x":0.2366666666666667,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-13b-chat Summarization","x":0.40555555555555556,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-13b-chat Generation","x":0.5744444444444445,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-70b-chat Summarization","x":0.7433333333333334,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-70b-chat Generation","x":0.9122222222222223,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"1 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.9125,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"2 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.6375000000000001,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"4 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.36250000000000004,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"8 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.0875,"yanchor":"middle","yref":"paper"}],"barmode":"group","plot_bgcolor":"rgb(250,250,250)","showlegend":true,"template":{"data":{"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"choropleth":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"choropleth"}],"contour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"contour"}],"contourcarpet":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"contourcarpet"}],"heatmap":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmap"}],"heatmapgl":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmapgl"}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"histogram2d":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2d"}],"histogram2dcontour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2dcontour"}],"mesh3d":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"mesh3d"}],"parcoords":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"parcoords"}],"pie":[{"automargin":true,"type":"pie"}],"scatter":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter"}],"scatter3d":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter3d"}],"scattercarpet":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattercarpet"}],"scattergeo":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergeo"}],"scattergl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergl"}],"scattermapbox":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattermapbox"}],"scatterpolar":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolar"}],"scatterpolargl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolargl"}],"scatterternary":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterternary"}],"surface":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"surface"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}]},"layout":{"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"autotypenumbers":"strict","coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]],"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]},"colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"geo":{"bgcolor":"white","lakecolor":"white","landcolor":"#E5ECF6","showlakes":true,"showland":true,"subunitcolor":"white"},"hoverlabel":{"align":"left"},"hovermode":"closest","mapbox":{"style":"light"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"ternary":{"aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"title":{"x":0.05},"xaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2},"yaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2}}},"xaxis":{"anchor":"y","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis10":{"anchor":"y10","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis11":{"anchor":"y11","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis12":{"anchor":"y12","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis13":{"anchor":"y13","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis14":{"anchor":"y14","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis15":{"anchor":"y15","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"xaxis16":{"anchor":"y16","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis17":{"anchor":"y17","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis18":{"anchor":"y18","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis19":{"anchor":"y19","domain":[0.0,0.13555555555555557]},"xaxis2":{"anchor":"y2","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis20":{"anchor":"y20","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19"},"xaxis21":{"anchor":"y21","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19"},"xaxis22":{"anchor":"y22","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19"},"xaxis23":{"anchor":"y23","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19"},"xaxis24":{"anchor":"y24","domain":[0.8444444444444444,0.98],"matches":"x19"},"xaxis3":{"anchor":"y3","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"xaxis4":{"anchor":"y4","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis5":{"anchor":"y5","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis6":{"anchor":"y6","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis7":{"anchor":"y7","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis8":{"anchor":"y8","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis9":{"anchor":"y9","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"yaxis":{"anchor":"x","domain":[0.825,1.0],"matches":"y19"},"yaxis10":{"anchor":"x10","domain":[0.55,0.7250000000000001],"matches":"y22"},"yaxis11":{"anchor":"x11","domain":[0.55,0.7250000000000001],"matches":"y23"},"yaxis12":{"anchor":"x12","domain":[0.55,0.7250000000000001],"matches":"y24"},"yaxis13":{"anchor":"x13","domain":[0.275,0.45],"matches":"y19"},"yaxis14":{"anchor":"x14","domain":[0.275,0.45],"matches":"y20"},"yaxis15":{"anchor":"x15","domain":[0.275,0.45],"matches":"y21"},"yaxis16":{"anchor":"x16","domain":[0.275,0.45],"matches":"y22"},"yaxis17":{"anchor":"x17","domain":[0.275,0.45],"matches":"y23"},"yaxis18":{"anchor":"x18","domain":[0.275,0.45],"matches":"y24"},"yaxis19":{"anchor":"x19","domain":[0.0,0.175]},"yaxis2":{"anchor":"x2","domain":[0.825,1.0],"matches":"y20"},"yaxis20":{"anchor":"x20","domain":[0.0,0.175]},"yaxis21":{"anchor":"x21","domain":[0.0,0.175]},"yaxis22":{"anchor":"x22","domain":[0.0,0.175]},"yaxis23":{"anchor":"x23","domain":[0.0,0.175]},"yaxis24":{"anchor":"x24","domain":[0.0,0.175]},"yaxis3":{"anchor":"x3","domain":[0.825,1.0],"matches":"y21"},"yaxis4":{"anchor":"x4","domain":[0.825,1.0],"matches":"y22"},"yaxis5":{"anchor":"x5","domain":[0.825,1.0],"matches":"y23"},"yaxis6":{"anchor":"x6","domain":[0.825,1.0],"matches":"y24"},"yaxis7":{"anchor":"x7","domain":[0.55,0.7250000000000001],"matches":"y19"},"yaxis8":{"anchor":"x8","domain":[0.55,0.7250000000000001],"matches":"y20"},"yaxis9":{"anchor":"x9","domain":[0.55,0.7250000000000001],"matches":"y21"}},                        {"responsive": true}                    )                };                            </script>        </div>
+</body>
+</html>

benchmarks/llm_gpu_benchmark_transformers.html ADDED Viewed

	@@ -0,0 +1,7 @@

+<html>
+<head><meta charset="utf-8" /></head>
+<body>
+    <div>                        <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
+        <script src="https://cdn.plot.ly/plotly-2.2.0.min.js"></script>                <div id="4671500e-e030-484c-8d8f-02c9ef28c439" class="plotly-graph-div" style="height:100%; width:100%;"></div>            <script type="text/javascript">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("4671500e-e030-484c-8d8f-02c9ef28c439")) {                    Plotly.newPlot(                        "4671500e-e030-484c-8d8f-02c9ef28c439",                        [{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[9.839381644193974,19.682153353799034,14.47651674912018,26.790154000919145,16.85058557689085],"xaxis":"x","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[12.67469844085007,27.622051912134882,19.374373797474846,27.42684895928983,20.2526752952322],"xaxis":"x2","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y2"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[4.417365201244467,11.290925144038532,6.08976919051411,9.56217317275004,5.9263976593415855],"xaxis":"x","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[3.695887145541112,7.812688672567852,5.614002693550519,7.59461596844275,6.252509885345299],"xaxis":"x2","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y2"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[18.73507454097704,39.43429532784967,27.07453064626594,39.96998450085984,29.3453161508673],"xaxis":"x","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[15.313436327725622,34.706856549443415,25.316661797353536,35.57028809081909,26.27458999671037],"xaxis":"x2","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y2"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[9.423935993931764,13.777794033942168,26.52473854898931,15.828182317775882],"xaxis":"x7","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y7"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[12.395401201017949,18.633481353508632,27.185836623669307,19.299187279602062],"xaxis":"x8","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y8"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[4.376286144153169,5.93295870509821,9.48124590639799,5.974715789431367],"xaxis":"x7","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y7"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[3.689148081304866,5.460311898298637,7.664435463393246,6.406802687346095],"xaxis":"x8","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y8"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[17.81624239176298,26.86157274268731,39.624799784757535,27.909081799152222],"xaxis":"x7","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y7"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[15.05454520400735,24.464037234597612,34.25052506253877,25.495156728837525],"xaxis":"x8","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y8"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[13.394795492541103,15.210707499507597],"xaxis":"x13","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y13"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[18.15606381072783,18.661753478727857],"xaxis":"x14","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y14"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[5.899421336969099,5.767145178389089],"xaxis":"x13","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y13"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[5.482425931352881,6.192523296540574],"xaxis":"x14","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y14"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[25.9430839554289,27.46244144955532],"xaxis":"x13","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y13"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[23.520372312313448,25.20924356998125],"xaxis":"x14","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y14"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[14.764927656045513],"xaxis":"x19","y":["80-A100 SXM4"],"yaxis":"y19"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[18.07719847124392],"xaxis":"x20","y":["80-A100 SXM4"],"yaxis":"y20"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[5.718961706449293],"xaxis":"x19","y":["80-A100 SXM4"],"yaxis":"y19"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[6.177879854004683],"xaxis":"x20","y":["80-A100 SXM4"],"yaxis":"y20"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[27.054106396318144],"xaxis":"x19","y":["80-A100 SXM4"],"yaxis":"y19"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[25.138719102309768],"xaxis":"x20","y":["80-A100 SXM4"],"yaxis":"y20"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[8.083390907285379,12.174340676118161,11.076606608131389,16.98095523506584,12.1008725506651],"xaxis":"x3","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y3"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[8.287678373962581,13.674114390829141,13.308822531004934,17.365713991091738,12.794482361704157],"xaxis":"x4","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y4"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[3.1403647823510736,4.962801741500335,3.5348819482865093,5.3562909858984185,4.1213135763128905],"xaxis":"x3","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y3"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[3.18815245154689,5.105728547922034,4.718240806380357,6.509024089959697,4.827719089783637],"xaxis":"x4","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y4"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[null,null,11.435609485285738,17.906931325335666,18.878279411581737],"xaxis":"x3","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y3"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[null,null,15.63493452970772,22.260343102292754,21.142120495293863],"xaxis":"x4","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y4"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[7.931395602652238,10.626258179366356,16.319110879759947,11.241866660596408],"xaxis":"x9","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y9"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[8.215795602873966,12.833338647314658,16.943446615015436,12.165800832662722],"xaxis":"x10","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y10"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[3.1362935762237645,3.499943275803895,5.402452917863267,3.9771491776646073],"xaxis":"x9","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y9"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[3.156100924190738,4.674808411970743,6.638529207897594,4.611620121814299],"xaxis":"x10","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y10"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[9.178059918412773,11.287265701494618,18.203631997182082,18.325614335569053],"xaxis":"x9","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y9"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[10.880162241524287,15.437944210820223,22.20571335065674,21.096027375985646],"xaxis":"x10","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y10"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[10.252856056970655,11.390035634842294],"xaxis":"x15","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y15"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[12.44766998737035,12.445574043628245],"xaxis":"x16","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y16"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[3.432161213004653,4.005435712274412],"xaxis":"x15","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y15"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[4.577172738204334,4.623478053690466],"xaxis":"x16","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y16"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[11.047490604822276,16.97583795634349],"xaxis":"x15","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y15"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[15.140929085583872,18.40904684710705],"xaxis":"x16","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y16"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[10.82198892665345],"xaxis":"x21","y":["80-A100 SXM4"],"yaxis":"y21"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[11.846523539191672],"xaxis":"x22","y":["80-A100 SXM4"],"yaxis":"y22"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[3.8795801184687786],"xaxis":"x21","y":["80-A100 SXM4"],"yaxis":"y21"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[4.568029810459134],"xaxis":"x22","y":["80-A100 SXM4"],"yaxis":"y22"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[16.97013525520682],"xaxis":"x21","y":["80-A100 SXM4"],"yaxis":"y21"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[18.913362098572737],"xaxis":"x22","y":["80-A100 SXM4"],"yaxis":"y22"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[null,5.161890396610965,6.976123395155549],"xaxis":"x5","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y5"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[null,5.887611768925055,9.031399021823733],"xaxis":"x6","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y6"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[null,null,null],"xaxis":"x5","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y5"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[null,null,null],"xaxis":"x6","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y6"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[null,null,null,null],"xaxis":"x5","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y5"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[null,null,null,null],"xaxis":"x6","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y6"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[null,4.96092701086689,7.068376492905629],"xaxis":"x11","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y11"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[null,5.693355665703394,8.905280446876153],"xaxis":"x12","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y12"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[1.9856691832414866],"xaxis":"x11","y":["45-RTX A6000"],"yaxis":"y11"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[1.913951722547195],"xaxis":"x12","y":["45-RTX A6000"],"yaxis":"y12"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[null],"xaxis":"x11","y":["45-RTX A6000"],"yaxis":"y11"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[null],"xaxis":"x12","y":["45-RTX A6000"],"yaxis":"y12"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[4.8550061015042685],"xaxis":"x17","y":["45-RTX A6000"],"yaxis":"y17"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[5.58004075989967],"xaxis":"x18","y":["45-RTX A6000"],"yaxis":"y18"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[1.9670200139619358],"xaxis":"x17","y":["45-RTX A6000"],"yaxis":"y17"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[1.8873606277914459],"xaxis":"x18","y":["45-RTX A6000"],"yaxis":"y18"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[2.665381007576966],"xaxis":"x17","y":["45-RTX A6000"],"yaxis":"y17"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[3.597816249219273],"xaxis":"x18","y":["45-RTX A6000"],"yaxis":"y18"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x23","y":[],"yaxis":"y23"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x24","y":[],"yaxis":"y24"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x23","y":[],"yaxis":"y23"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x24","y":[],"yaxis":"y24"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x23","y":[],"yaxis":"y23"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x24","y":[],"yaxis":"y24"}],                        {"annotations":[{"font":{"size":16},"showarrow":false,"text":"llama2-7b-chat Summarization","x":0.06777777777777778,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-7b-chat Generation","x":0.2366666666666667,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-13b-chat Summarization","x":0.40555555555555556,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-13b-chat Generation","x":0.5744444444444445,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-70b-chat Summarization","x":0.7433333333333334,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-70b-chat Generation","x":0.9122222222222223,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"1 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.9125,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"2 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.6375000000000001,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"4 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.36250000000000004,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"8 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.0875,"yanchor":"middle","yref":"paper"}],"barmode":"group","plot_bgcolor":"rgb(250,250,250)","showlegend":true,"template":{"data":{"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"choropleth":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"choropleth"}],"contour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"contour"}],"contourcarpet":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"contourcarpet"}],"heatmap":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmap"}],"heatmapgl":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmapgl"}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"histogram2d":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2d"}],"histogram2dcontour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2dcontour"}],"mesh3d":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"mesh3d"}],"parcoords":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"parcoords"}],"pie":[{"automargin":true,"type":"pie"}],"scatter":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter"}],"scatter3d":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter3d"}],"scattercarpet":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattercarpet"}],"scattergeo":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergeo"}],"scattergl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergl"}],"scattermapbox":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattermapbox"}],"scatterpolar":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolar"}],"scatterpolargl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolargl"}],"scatterternary":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterternary"}],"surface":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"surface"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}]},"layout":{"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"autotypenumbers":"strict","coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]],"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]},"colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"geo":{"bgcolor":"white","lakecolor":"white","landcolor":"#E5ECF6","showlakes":true,"showland":true,"subunitcolor":"white"},"hoverlabel":{"align":"left"},"hovermode":"closest","mapbox":{"style":"light"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"ternary":{"aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"title":{"x":0.05},"xaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2},"yaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2}}},"xaxis":{"anchor":"y","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis10":{"anchor":"y10","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis11":{"anchor":"y11","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis12":{"anchor":"y12","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis13":{"anchor":"y13","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis14":{"anchor":"y14","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis15":{"anchor":"y15","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"xaxis16":{"anchor":"y16","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis17":{"anchor":"y17","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis18":{"anchor":"y18","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis19":{"anchor":"y19","domain":[0.0,0.13555555555555557]},"xaxis2":{"anchor":"y2","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis20":{"anchor":"y20","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19"},"xaxis21":{"anchor":"y21","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19"},"xaxis22":{"anchor":"y22","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19"},"xaxis23":{"anchor":"y23","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19"},"xaxis24":{"anchor":"y24","domain":[0.8444444444444444,0.98],"matches":"x19"},"xaxis3":{"anchor":"y3","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"xaxis4":{"anchor":"y4","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis5":{"anchor":"y5","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis6":{"anchor":"y6","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis7":{"anchor":"y7","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis8":{"anchor":"y8","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis9":{"anchor":"y9","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"yaxis":{"anchor":"x","domain":[0.825,1.0],"matches":"y19"},"yaxis10":{"anchor":"x10","domain":[0.55,0.7250000000000001],"matches":"y22"},"yaxis11":{"anchor":"x11","domain":[0.55,0.7250000000000001],"matches":"y23"},"yaxis12":{"anchor":"x12","domain":[0.55,0.7250000000000001],"matches":"y24"},"yaxis13":{"anchor":"x13","domain":[0.275,0.45],"matches":"y19"},"yaxis14":{"anchor":"x14","domain":[0.275,0.45],"matches":"y20"},"yaxis15":{"anchor":"x15","domain":[0.275,0.45],"matches":"y21"},"yaxis16":{"anchor":"x16","domain":[0.275,0.45],"matches":"y22"},"yaxis17":{"anchor":"x17","domain":[0.275,0.45],"matches":"y23"},"yaxis18":{"anchor":"x18","domain":[0.275,0.45],"matches":"y24"},"yaxis19":{"anchor":"x19","domain":[0.0,0.175]},"yaxis2":{"anchor":"x2","domain":[0.825,1.0],"matches":"y20"},"yaxis20":{"anchor":"x20","domain":[0.0,0.175]},"yaxis21":{"anchor":"x21","domain":[0.0,0.175]},"yaxis22":{"anchor":"x22","domain":[0.0,0.175]},"yaxis23":{"anchor":"x23","domain":[0.0,0.175]},"yaxis24":{"anchor":"x24","domain":[0.0,0.175]},"yaxis3":{"anchor":"x3","domain":[0.825,1.0],"matches":"y21"},"yaxis4":{"anchor":"x4","domain":[0.825,1.0],"matches":"y22"},"yaxis5":{"anchor":"x5","domain":[0.825,1.0],"matches":"y23"},"yaxis6":{"anchor":"x6","domain":[0.825,1.0],"matches":"y24"},"yaxis7":{"anchor":"x7","domain":[0.55,0.7250000000000001],"matches":"y19"},"yaxis8":{"anchor":"x8","domain":[0.55,0.7250000000000001],"matches":"y20"},"yaxis9":{"anchor":"x9","domain":[0.55,0.7250000000000001],"matches":"y21"}},                        {"responsive": true}                    )                };                            </script>        </div>
+</body>
+</html>

benchmarks/llm_gpu_benchmarks.json ADDED Viewed

	@@ -0,0 +1,2790 @@

+[
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 10:46:19",
+    "git_sha": "55d3b55b",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1417,
+    "summarize_time": 32.29472152392069,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 14.563165505727133
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 10:48:55",
+    "git_sha": "55d3b55b",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1417,
+    "summarize_time": 67.97515447934468,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 33.00641902287801
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 10:48:58",
+    "git_sha": "55d3b55b",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1440,
+    "summarize_time": 114.62220064798991,
+    "generate_output_len_bytes": 2619,
+    "generate_time": 71.0722058614095
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 10:58:34",
+    "git_sha": "55d3b55b",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 866,
+    "summarize_time": 39.54404203097025,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 22.466302394866943
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 11:01:59",
+    "git_sha": "55d3b55b",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1417,
+    "summarize_time": 32.1394579410553,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 14.757195552190145
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 10:54:29",
+    "git_sha": "55d3b55b",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 910,
+    "summarize_time": 185.14580019315085,
+    "generate_output_len_bytes": 2042,
+    "generate_time": 117.13909141222636
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 11:04:37",
+    "git_sha": "55d3b55b",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1002,
+    "summarize_time": 94.98129558563232,
+    "generate_output_len_bytes": 2512,
+    "generate_time": 69.4871145884196
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 11:13:08",
+    "git_sha": "55d3b55b",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1276,
+    "summarize_time": 43.23498781522115,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 22.826789538065594
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 11:10:08",
+    "git_sha": "55d3b55b",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 991,
+    "summarize_time": 90.51939169565837,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 48.96095744768778
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 11:16:48",
+    "git_sha": "55d3b55b",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1417,
+    "summarize_time": 31.86189842224121,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 14.209659894307455
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 11:17:39",
+    "git_sha": "55d3b55b",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1417,
+    "summarize_time": 71.48081835110982,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 33.5740262667338
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 11:19:24",
+    "git_sha": "55d3b55b",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1002,
+    "summarize_time": 94.17744310696919,
+    "generate_output_len_bytes": 2512,
+    "generate_time": 70.12592967351277
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 11:27:57",
+    "git_sha": "55d3b55b",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1276,
+    "summarize_time": 42.8066500822703,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 22.626200040181477
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 11:23:22",
+    "git_sha": "55d3b55b",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 910,
+    "summarize_time": 186.88371555010477,
+    "generate_output_len_bytes": 2042,
+    "generate_time": 117.3530724843343
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 11:39:03",
+    "git_sha": "55d3b55b",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 991,
+    "summarize_time": 94.50985678037007,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 50.06416177749634
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 21:08:31",
+    "git_sha": "fc4826f2",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 38.80374129613241,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 19.23690136273702
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 21:11:49",
+    "git_sha": "fc4826f2",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1179,
+    "summarize_time": 178.79640992482504,
+    "generate_output_len_bytes": 2772,
+    "generate_time": 93.99476226170857
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 21:25:53",
+    "git_sha": "fc4826f2",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1002,
+    "summarize_time": 53.44271365801493,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 30.641155401865642
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 21:30:30",
+    "git_sha": "fc4826f2",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 40.80062770843506,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 19.825008392333984
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 21:35:29",
+    "git_sha": "fc4826f2",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1179,
+    "summarize_time": 177.35046529769897,
+    "generate_output_len_bytes": 2772,
+    "generate_time": 91.73111907641093
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 21:49:20",
+    "git_sha": "fc4826f2",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1002,
+    "summarize_time": 56.894784371058144,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 32.15500020980835
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/18/2023 21:54:11",
+    "git_sha": "fc4826f2",
+    "n_gpus": 4,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 41.46419604619344,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 20.049855709075928
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/18/2023 21:57:39",
+    "git_sha": "fc4826f2",
+    "n_gpus": 4,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1179,
+    "summarize_time": 183.73364853858948,
+    "generate_output_len_bytes": 2772,
+    "generate_time": 94.9052836894989
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/18/2023 22:11:59",
+    "git_sha": "fc4826f2",
+    "n_gpus": 4,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1002,
+    "summarize_time": 59.204413731892906,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 33.25332593917847
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 8,
+    "reps": 3,
+    "date": "08/18/2023 22:17:00",
+    "git_sha": "fc4826f2",
+    "n_gpus": 8,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 42.09002653757731,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 20.106103817621868
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 8,
+    "reps": 3,
+    "date": "08/18/2023 22:20:31",
+    "git_sha": "fc4826f2",
+    "n_gpus": 8,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1179,
+    "summarize_time": 185.28164370854697,
+    "generate_output_len_bytes": 2772,
+    "generate_time": 95.13023789723714
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 8,
+    "reps": 3,
+    "date": "08/18/2023 22:34:58",
+    "git_sha": "fc4826f2",
+    "n_gpus": 8,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1002,
+    "summarize_time": 60.9919019540151,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 34.328625202178955
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 13:31:34",
+    "git_sha": "fc4826f2",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1046,
+    "summarize_time": 52.49842747052511,
+    "generate_output_len_bytes": 2172,
+    "generate_time": 20.686774571736652
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 13:31:55",
+    "git_sha": "fc4826f2",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 13:35:38",
+    "git_sha": "fc4826f2",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1007,
+    "summarize_time": 168.9666860898336,
+    "generate_output_len_bytes": 2249,
+    "generate_time": 73.25518870353699
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 13:48:09",
+    "git_sha": "fc4826f2",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 856,
+    "summarize_time": 45.30513469378153,
+    "generate_output_len_bytes": 1802,
+    "generate_time": 22.000216643015545
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 13:51:56",
+    "git_sha": "fc4826f2",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1046,
+    "summarize_time": 51.64275654157003,
+    "generate_output_len_bytes": 2172,
+    "generate_time": 20.737667481104534
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 13:35:47",
+    "git_sha": "fc4826f2",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 980,
+    "summarize_time": 280.4669913450877,
+    "generate_output_len_bytes": 2132,
+    "generate_time": 141.7793349424998
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 13:57:35",
+    "git_sha": "fc4826f2",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 869,
+    "summarize_time": 96.61887431144714,
+    "generate_output_len_bytes": 3244,
+    "generate_time": 82.98751719792683
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 13:55:51",
+    "git_sha": "fc4826f2",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1007,
+    "summarize_time": 167.52292919158936,
+    "generate_output_len_bytes": 2249,
+    "generate_time": 71.82611886660258
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 14:08:08",
+    "git_sha": "fc4826f2",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 856,
+    "summarize_time": 47.14254776636759,
+    "generate_output_len_bytes": 1802,
+    "generate_time": 22.54850967725118
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 14:15:15",
+    "git_sha": "d13230ee",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 14:07:15",
+    "git_sha": "fc4826f2",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 915,
+    "summarize_time": 89.59958203633626,
+    "generate_output_len_bytes": 2172,
+    "generate_time": 42.32424934705099
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 14:15:30",
+    "git_sha": "d13230ee",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1024,
+    "summarize_time": 185.44230167071024,
+    "generate_output_len_bytes": 2122,
+    "generate_time": 88.11553311347961
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 14:29:36",
+    "git_sha": "d13230ee",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 922,
+    "summarize_time": 68.06459252039592,
+    "generate_output_len_bytes": 1802,
+    "generate_time": 27.939613421758015
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 14:26:29",
+    "git_sha": "d13230ee",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 980,
+    "summarize_time": 280.8310640652974,
+    "generate_output_len_bytes": 2132,
+    "generate_time": 143.21916349728903
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 14:48:17",
+    "git_sha": "d13230ee",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 869,
+    "summarize_time": 98.47045453389485,
+    "generate_output_len_bytes": 3244,
+    "generate_time": 83.71360301971436
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 15:35:13",
+    "git_sha": "0dec0f52",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 15:49:33",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 16:26:53",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 16:27:32",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 16:29:03",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 17:26:02",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 18:59:16",
+    "git_sha": "5691db4a",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1075,
+    "summarize_time": 39.01545596122742,
+    "generate_output_len_bytes": 2242,
+    "generate_time": 10.151424566904703
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 19:03:13",
+    "git_sha": "5691db4a",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 940,
+    "summarize_time": 21.78233750661214,
+    "generate_output_len_bytes": 2130,
+    "generate_time": 15.794983307520548
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 19:38:40",
+    "git_sha": "6f05e8f1",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1114,
+    "summarize_time": 7.636120955149333,
+    "generate_output_len_bytes": 2275,
+    "generate_time": 7.922623078028361
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 19:41:02",
+    "git_sha": "6f05e8f1",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1024,
+    "summarize_time": 10.824170271555582,
+    "generate_output_len_bytes": 2130,
+    "generate_time": 9.209020694096884
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 19:55:17",
+    "git_sha": "2c548f21",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1088,
+    "summarize_time": 24.39883820215861,
+    "generate_output_len_bytes": 2275,
+    "generate_time": 12.755743900934855
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/19/2023 00:57:21",
+    "git_sha": "a227be4f",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 37.113919814427696,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 18.36507821083069
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/19/2023 01:00:31",
+    "git_sha": "a227be4f",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1046,
+    "summarize_time": 49.79721482594808,
+    "generate_output_len_bytes": 2172,
+    "generate_time": 21.780913591384888
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/19/2023 01:04:36",
+    "git_sha": "a227be4f",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/19/2023 01:05:26",
+    "git_sha": "a227be4f",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1179,
+    "summarize_time": 181.2461258570353,
+    "generate_output_len_bytes": 2772,
+    "generate_time": 92.64811905225118
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/19/2023 01:19:33",
+    "git_sha": "a227be4f",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 800,
+    "summarize_time": 174.4576851526896,
+    "generate_output_len_bytes": 2713,
+    "generate_time": 119.14412077267964
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/19/2023 01:36:14",
+    "git_sha": "a227be4f",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1002,
+    "summarize_time": 53.39731526374817,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 31.369641542434692
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/19/2023 01:40:53",
+    "git_sha": "a227be4f",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1000,
+    "summarize_time": 74.27096923192342,
+    "generate_output_len_bytes": 1802,
+    "generate_time": 29.860486666361492
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 01:48:09",
+    "git_sha": "a227be4f",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 39.926851193110146,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 18.481745958328247
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 01:51:27",
+    "git_sha": "a227be4f",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1046,
+    "summarize_time": 51.299002488454185,
+    "generate_output_len_bytes": 2172,
+    "generate_time": 21.828503131866455
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 01:56:20",
+    "git_sha": "a227be4f",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1179,
+    "summarize_time": 178.19972308476767,
+    "generate_output_len_bytes": 2772,
+    "generate_time": 91.73426882425944
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 02:10:13",
+    "git_sha": "a227be4f",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 800,
+    "summarize_time": 180.7814578215281,
+    "generate_output_len_bytes": 2713,
+    "generate_time": 124.72717420260112
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 02:26:43",
+    "git_sha": "a227be4f",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1002,
+    "summarize_time": 57.08081785837809,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 32.26534946759542
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 02:31:36",
+    "git_sha": "a227be4f",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1000,
+    "summarize_time": 79.9461121559143,
+    "generate_output_len_bytes": 1802,
+    "generate_time": 31.403561115264893
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/19/2023 02:38:23",
+    "git_sha": "a227be4f",
+    "n_gpus": 4,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 42.33977222442627,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 19.723278522491455
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/19/2023 02:41:52",
+    "git_sha": "a227be4f",
+    "n_gpus": 4,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1046,
+    "summarize_time": 55.377869288126625,
+    "generate_output_len_bytes": 2172,
+    "generate_time": 25.01458676656087
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/19/2023 02:47:05",
+    "git_sha": "a227be4f",
+    "n_gpus": 4,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1179,
+    "summarize_time": 180.53432401021323,
+    "generate_output_len_bytes": 2772,
+    "generate_time": 91.93375285466512
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/19/2023 03:01:07",
+    "git_sha": "a227be4f",
+    "n_gpus": 4,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 800,
+    "summarize_time": 179.50477250417075,
+    "generate_output_len_bytes": 2713,
+    "generate_time": 124.40728378295898
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/19/2023 03:17:36",
+    "git_sha": "a227be4f",
+    "n_gpus": 4,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1002,
+    "summarize_time": 58.62867816289266,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 33.394495725631714
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/19/2023 03:22:37",
+    "git_sha": "a227be4f",
+    "n_gpus": 4,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1000,
+    "summarize_time": 78.90612125396729,
+    "generate_output_len_bytes": 1802,
+    "generate_time": 30.697617371877033
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 8,
+    "reps": 3,
+    "date": "08/19/2023 03:29:20",
+    "git_sha": "a227be4f",
+    "n_gpus": 8,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 40.498607873916626,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 19.509677171707153
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 8,
+    "reps": 3,
+    "date": "08/19/2023 03:32:44",
+    "git_sha": "a227be4f",
+    "n_gpus": 8,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1046,
+    "summarize_time": 55.3964786529541,
+    "generate_output_len_bytes": 2172,
+    "generate_time": 24.347585439682007
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 8,
+    "reps": 3,
+    "date": "08/19/2023 03:37:55",
+    "git_sha": "a227be4f",
+    "n_gpus": 8,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1179,
+    "summarize_time": 186.71331850687662,
+    "generate_output_len_bytes": 2772,
+    "generate_time": 95.784650405248
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 8,
+    "reps": 3,
+    "date": "08/19/2023 03:52:28",
+    "git_sha": "a227be4f",
+    "n_gpus": 8,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 800,
+    "summarize_time": 185.3280005455017,
+    "generate_output_len_bytes": 2713,
+    "generate_time": 125.91738017400105
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 8,
+    "reps": 3,
+    "date": "08/19/2023 04:09:18",
+    "git_sha": "a227be4f",
+    "n_gpus": 8,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1002,
+    "summarize_time": 60.18280680974325,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 33.386961142222084
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 8,
+    "reps": 3,
+    "date": "08/19/2023 04:14:25",
+    "git_sha": "a227be4f",
+    "n_gpus": 8,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1000,
+    "summarize_time": 83.04790727297465,
+    "generate_output_len_bytes": 1802,
+    "generate_time": 32.24992283185323
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 23:26:19",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1417,
+    "summarize_time": 47.03754989306132,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 19.964784463246662
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 23:33:09",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 915,
+    "summarize_time": 71.91136892636617,
+    "generate_output_len_bytes": 2480,
+    "generate_time": 33.6295014222463
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 23:44:08",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/19/2023 00:45:42",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1007,
+    "summarize_time": 148.61560583114624,
+    "generate_output_len_bytes": 2357,
+    "generate_time": 89.01266026496887
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/19/2023 00:58:00",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 763,
+    "summarize_time": 193.99270629882812,
+    "generate_output_len_bytes": 2129,
+    "generate_time": 95.66660761833191
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/19/2023 01:13:01",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/19/2023 01:13:55",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 991,
+    "summarize_time": 61.52411222457886,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 32.030215660730995
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/19/2023 01:19:00",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 1,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1000,
+    "summarize_time": 81.13888708750407,
+    "generate_output_len_bytes": 3486,
+    "generate_time": 55.5331826210022
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 01:27:49",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1417,
+    "summarize_time": 47.41046245892843,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 20.660600344340008
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 01:34:28",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 915,
+    "summarize_time": 72.85646979014079,
+    "generate_output_len_bytes": 2480,
+    "generate_time": 34.05861854553223
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 02:39:22",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1007,
+    "summarize_time": 152.54357608159384,
+    "generate_output_len_bytes": 2357,
+    "generate_time": 91.51808977127075
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 02:52:58",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 763,
+    "summarize_time": 195.92926557858786,
+    "generate_output_len_bytes": 2129,
+    "generate_time": 96.55542047818501
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 03:15:01",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 991,
+    "summarize_time": 64.64422671000163,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 33.30378039677938
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 03:20:19",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 2,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1000,
+    "summarize_time": 84.57761120796204,
+    "generate_output_len_bytes": 3486,
+    "generate_time": 57.59072462717692
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/19/2023 03:28:44",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 4,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1417,
+    "summarize_time": 49.08898218472799,
+    "generate_output_len_bytes": 2384,
+    "generate_time": 21.489527861277264
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/19/2023 03:32:39",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 4,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 915,
+    "summarize_time": 74.43774898846944,
+    "generate_output_len_bytes": 2480,
+    "generate_time": 34.72673638661703
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/19/2023 03:39:21",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 4,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1007,
+    "summarize_time": 153.41076453526816,
+    "generate_output_len_bytes": 2357,
+    "generate_time": 91.14894040425618
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/19/2023 03:52:00",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 4,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 763,
+    "summarize_time": 199.79869039853415,
+    "generate_output_len_bytes": 2129,
+    "generate_time": 98.61504419644673
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/19/2023 04:08:12",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 4,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 991,
+    "summarize_time": 66.49260465304057,
+    "generate_output_len_bytes": 2927,
+    "generate_time": 34.17951035499573
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/19/2023 04:13:39",
+    "git_sha": "0cdb75ef",
+    "n_gpus": 4,
+    "transformers": "4.30.2",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1000,
+    "summarize_time": 87.65787092844646,
+    "generate_output_len_bytes": 3486,
+    "generate_time": 59.3750696182251
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 22:22:24",
+    "git_sha": "b63768c6",
+    "n_gpus": 1,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 948,
+    "summarize_time": 122.13213857014973,
+    "generate_output_len_bytes": 2826,
+    "generate_time": 66.34098903338115
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/18/2023 22:33:33",
+    "git_sha": "c1348fb3",
+    "n_gpus": 2,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 948,
+    "summarize_time": 120.53812781969707,
+    "generate_output_len_bytes": 2826,
+    "generate_time": 67.28052496910095
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 22:56:52",
+    "git_sha": "fb84de76",
+    "n_gpus": 1,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1036,
+    "summarize_time": 29.128981749216717,
+    "generate_output_len_bytes": 2242,
+    "generate_time": 12.197122732798258
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/18/2023 23:00:33",
+    "git_sha": "fb84de76",
+    "n_gpus": 1,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/19/2023 05:47:43",
+    "git_sha": "22352acd",
+    "n_gpus": 1,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/19/2023 05:48:58",
+    "git_sha": "22352acd",
+    "n_gpus": 1,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/19/2023 05:50:40",
+    "git_sha": "22352acd",
+    "n_gpus": 1,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 948,
+    "summarize_time": 165.05752809842429,
+    "generate_output_len_bytes": 2605,
+    "generate_time": 93.80659619967143
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 06:05:51",
+    "git_sha": "22352acd",
+    "n_gpus": 2,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 06:10:05",
+    "git_sha": "22352acd",
+    "n_gpus": 2,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 906,
+    "summarize_time": 410.0691332022349,
+    "generate_output_len_bytes": 521,
+    "generate_time": 57.71272214253744
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 06:36:58",
+    "git_sha": "22352acd",
+    "n_gpus": 2,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 948,
+    "summarize_time": 171.74388321240744,
+    "generate_output_len_bytes": 2605,
+    "generate_time": 97.00725762049358
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/19/2023 06:51:13",
+    "git_sha": "22352acd",
+    "n_gpus": 4,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 792,
+    "summarize_time": 267.0555826822917,
+    "generate_output_len_bytes": 2783,
+    "generate_time": 163.99818523724875
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/19/2023 07:13:35",
+    "git_sha": "22352acd",
+    "n_gpus": 4,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 906,
+    "summarize_time": 413.9569679101308,
+    "generate_output_len_bytes": 521,
+    "generate_time": 58.52583885192871
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/19/2023 07:38:02",
+    "git_sha": "22352acd",
+    "n_gpus": 4,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 948,
+    "summarize_time": 175.4907926718394,
+    "generate_output_len_bytes": 2605,
+    "generate_time": 98.97720170021057
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/19/2023 12:35:08",
+    "git_sha": "29a002e5",
+    "n_gpus": 2,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "timemachine",
+    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 983,
+    "summarize_time": 42.21107586224874,
+    "generate_output_len_bytes": 2130,
+    "generate_time": 16.94527777036031
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/21/2023 20:03:36",
+    "git_sha": "51318f44",
+    "n_gpus": 2,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 41.0461368560791,
+    "generate_output_len_bytes": 2383,
+    "generate_time": 19.614749511082966
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/21/2023 20:07:35",
+    "git_sha": "51318f44",
+    "n_gpus": 4,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 42.8376894791921,
+    "generate_output_len_bytes": 2383,
+    "generate_time": 20.2719091574351
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/21/2023 20:42:46",
+    "git_sha": "2f4bb620",
+    "n_gpus": 1,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/21/2023 20:50:19",
+    "git_sha": "2f4bb620",
+    "n_gpus": 4,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 915,
+    "summarize_time": 66.52468911806743,
+    "generate_output_len_bytes": 2479,
+    "generate_time": 29.828714847564697
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/21/2023 20:56:04",
+    "git_sha": "2f4bb620",
+    "n_gpus": 4,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+    "exception": "OOM"
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/21/2023 19:55:35",
+    "git_sha": "51318f44",
+    "n_gpus": 1,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 38.753786404927574,
+    "generate_output_len_bytes": 2383,
+    "generate_time": 19.529522736867268
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/21/2023 20:36:13",
+    "git_sha": "51318f44",
+    "n_gpus": 2,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 41.024452924728394,
+    "generate_output_len_bytes": 2383,
+    "generate_time": 20.29120985666911
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/21/2023 20:40:08",
+    "git_sha": "51318f44",
+    "n_gpus": 2,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1046,
+    "summarize_time": 54.554532527923584,
+    "generate_output_len_bytes": 2171,
+    "generate_time": 24.604793945948284
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/21/2023 20:50:05",
+    "git_sha": "51318f44",
+    "n_gpus": 4,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 41.09950613975525,
+    "generate_output_len_bytes": 2383,
+    "generate_time": 20.947362899780273
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/21/2023 20:54:08",
+    "git_sha": "51318f44",
+    "n_gpus": 4,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1046,
+    "summarize_time": 58.3172922929128,
+    "generate_output_len_bytes": 2171,
+    "generate_time": 25.735217014948528
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 8,
+    "reps": 3,
+    "date": "08/21/2023 21:01:04",
+    "git_sha": "51318f44",
+    "n_gpus": 8,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 42.85940829912821,
+    "generate_output_len_bytes": 2383,
+    "generate_time": 21.380353291829426
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 8,
+    "reps": 3,
+    "date": "08/21/2023 21:05:24",
+    "git_sha": "51318f44",
+    "n_gpus": 8,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1046,
+    "summarize_time": 54.235164642333984,
+    "generate_output_len_bytes": 2171,
+    "generate_time": 25.70338026682536
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 8,
+    "reps": 3,
+    "date": "08/21/2023 21:10:37",
+    "git_sha": "51318f44",
+    "n_gpus": 8,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 927,
+    "summarize_time": 133.53030570348105,
+    "generate_output_len_bytes": 2782,
+    "generate_time": 72.97924383481343
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/21/2023 22:18:17",
+    "git_sha": "51318f44",
+    "n_gpus": 4,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 927,
+    "summarize_time": 131.45291074117026,
+    "generate_output_len_bytes": 2782,
+    "generate_time": 72.30849742889404
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/21/2023 22:51:09",
+    "git_sha": "383b6bbc",
+    "n_gpus": 1,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 39.269713958104454,
+    "generate_output_len_bytes": 2383,
+    "generate_time": 19.65731406211853
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/21/2023 22:54:54",
+    "git_sha": "383b6bbc",
+    "n_gpus": 1,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1046,
+    "summarize_time": 51.84283971786499,
+    "generate_output_len_bytes": 2171,
+    "generate_time": 28.441521485646565
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/21/2023 23:13:10",
+    "git_sha": "383b6bbc",
+    "n_gpus": 2,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1046,
+    "summarize_time": 53.383726040522255,
+    "generate_output_len_bytes": 2171,
+    "generate_time": 24.422890504201252
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 4,
+    "reps": 3,
+    "date": "08/21/2023 23:18:04",
+    "git_sha": "383b6bbc",
+    "n_gpus": 4,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1046,
+    "summarize_time": 52.791220347086586,
+    "generate_output_len_bytes": 2171,
+    "generate_time": 25.378511508305866
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 8,
+    "reps": 3,
+    "date": "08/21/2023 23:23:11",
+    "git_sha": "383b6bbc",
+    "n_gpus": 8,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.8",
+    "hostname": "cloudvm",
+    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1046,
+    "summarize_time": 56.3846542040507,
+    "generate_output_len_bytes": 2171,
+    "generate_time": 26.636192480723064
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 1,
+    "reps": 3,
+    "date": "08/21/2023 23:52:44",
+    "git_sha": "da69b822",
+    "n_gpus": 1,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1267,
+    "summarize_time": 40.36223220825195,
+    "generate_output_len_bytes": 2383,
+    "generate_time": 19.87660264968872
+  },
+  {
+    "backend": "text-generation-inference",
+    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 2,
+    "reps": 3,
+    "date": "08/22/2023 00:15:05",
+    "git_sha": "e843e8c3",
+    "n_gpus": 2,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "recypabaszmhhmuae",
+    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 915,
+    "summarize_time": 64.78201874097188,
+    "generate_output_len_bytes": 2479,
+    "generate_time": 29.02147897084554
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 16,
+    "ngpus": 0,
+    "reps": 3,
+    "date": "08/22/2023 19:01:15",
+    "git_sha": "855b7d15",
+    "n_gpus": 0,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "CPU",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1351,
+    "summarize_time": 1215.5185990333557,
+    "generate_output_len_bytes": 849,
+    "generate_time": 180.56836318969727
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 8,
+    "ngpus": 0,
+    "reps": 3,
+    "date": "08/22/2023 20:11:16",
+    "git_sha": "855b7d15",
+    "n_gpus": 0,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "CPU",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1353,
+    "summarize_time": 1216.9783231417339,
+    "generate_output_len_bytes": 849,
+    "generate_time": 180.42225472132364
+  },
+  {
+    "backend": "transformers",
+    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+    "task": "summary_and_generate",
+    "bits": 4,
+    "ngpus": 0,
+    "reps": 3,
+    "date": "08/22/2023 21:21:20",
+    "git_sha": "855b7d15",
+    "n_gpus": 0,
+    "transformers": "4.31.0",
+    "bitsandbytes": "0.41.1",
+    "cuda": "11.7",
+    "hostname": "rippa",
+    "gpus": "CPU",
+    "summarize_input_len_bytes": 857252,
+    "summarize_output_len_bytes": 1354,
+    "summarize_time": 1217.1687794526417,
+    "generate_output_len_bytes": 843,
+    "generate_time": 180.78463260332742
+  }
+]

benchmarks/perf.json ADDED Viewed

	@@ -0,0 +1,136 @@

+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:46:19", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 32.29472152392069, "generate_output_len_bytes": 2384, "generate_time": 14.563165505727133}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:48:55", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 67.97515447934468, "generate_output_len_bytes": 2384, "generate_time": 33.00641902287801}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:48:58", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1440, "summarize_time": 114.62220064798991, "generate_output_len_bytes": 2619, "generate_time": 71.0722058614095}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:58:34", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 866, "summarize_time": 39.54404203097025, "generate_output_len_bytes": 2927, "generate_time": 22.466302394866943}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:01:59", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 32.1394579410553, "generate_output_len_bytes": 2384, "generate_time": 14.757195552190145}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:54:29", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 910, "summarize_time": 185.14580019315085, "generate_output_len_bytes": 2042, "generate_time": 117.13909141222636}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:04:37", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 94.98129558563232, "generate_output_len_bytes": 2512, "generate_time": 69.4871145884196}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:13:08", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1276, "summarize_time": 43.23498781522115, "generate_output_len_bytes": 2927, "generate_time": 22.826789538065594}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:10:08", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 90.51939169565837, "generate_output_len_bytes": 2927, "generate_time": 48.96095744768778}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:16:48", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 31.86189842224121, "generate_output_len_bytes": 2384, "generate_time": 14.209659894307455}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:17:39", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 71.48081835110982, "generate_output_len_bytes": 2384, "generate_time": 33.5740262667338}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:19:24", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 94.17744310696919, "generate_output_len_bytes": 2512, "generate_time": 70.12592967351277}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:27:57", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1276, "summarize_time": 42.8066500822703, "generate_output_len_bytes": 2927, "generate_time": 22.626200040181477}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:23:22", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 910, "summarize_time": 186.88371555010477, "generate_output_len_bytes": 2042, "generate_time": 117.3530724843343}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:39:03", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 94.50985678037007, "generate_output_len_bytes": 2927, "generate_time": 50.06416177749634}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 21:08:31", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 38.80374129613241, "generate_output_len_bytes": 2384, "generate_time": 19.23690136273702}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 21:11:49", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 178.79640992482504, "generate_output_len_bytes": 2772, "generate_time": 93.99476226170857}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 21:25:53", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 53.44271365801493, "generate_output_len_bytes": 2927, "generate_time": 30.641155401865642}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 21:30:30", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 40.80062770843506, "generate_output_len_bytes": 2384, "generate_time": 19.825008392333984}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 21:35:29", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 177.35046529769897, "generate_output_len_bytes": 2772, "generate_time": 91.73111907641093}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 21:49:20", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 56.894784371058144, "generate_output_len_bytes": 2927, "generate_time": 32.15500020980835}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/18/2023 21:54:11", "git_sha": "fc4826f2", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.46419604619344, "generate_output_len_bytes": 2384, "generate_time": 20.049855709075928}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/18/2023 21:57:39", "git_sha": "fc4826f2", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 183.73364853858948, "generate_output_len_bytes": 2772, "generate_time": 94.9052836894989}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/18/2023 22:11:59", "git_sha": "fc4826f2", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 59.204413731892906, "generate_output_len_bytes": 2927, "generate_time": 33.25332593917847}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/18/2023 22:17:00", "git_sha": "fc4826f2", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.09002653757731, "generate_output_len_bytes": 2384, "generate_time": 20.106103817621868}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 8, "reps": 3, "date": "08/18/2023 22:20:31", "git_sha": "fc4826f2", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 185.28164370854697, "generate_output_len_bytes": 2772, "generate_time": 95.13023789723714}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 8, "reps": 3, "date": "08/18/2023 22:34:58", "git_sha": "fc4826f2", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 60.9919019540151, "generate_output_len_bytes": 2927, "generate_time": 34.328625202178955}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:31:34", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 52.49842747052511, "generate_output_len_bytes": 2172, "generate_time": 20.686774571736652}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:31:55", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:35:38", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 168.9666860898336, "generate_output_len_bytes": 2249, "generate_time": 73.25518870353699}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:48:09", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 856, "summarize_time": 45.30513469378153, "generate_output_len_bytes": 1802, "generate_time": 22.000216643015545}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 13:51:56", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 51.64275654157003, "generate_output_len_bytes": 2172, "generate_time": 20.737667481104534}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:35:47", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 980, "summarize_time": 280.4669913450877, "generate_output_len_bytes": 2132, "generate_time": 141.7793349424998}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:57:35", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 869, "summarize_time": 96.61887431144714, "generate_output_len_bytes": 3244, "generate_time": 82.98751719792683}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 13:55:51", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 167.52292919158936, "generate_output_len_bytes": 2249, "generate_time": 71.82611886660258}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:08:08", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 856, "summarize_time": 47.14254776636759, "generate_output_len_bytes": 1802, "generate_time": 22.54850967725118}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 14:15:15", "git_sha": "d13230ee", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:07:15", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 89.59958203633626, "generate_output_len_bytes": 2172, "generate_time": 42.32424934705099}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 14:15:30", "git_sha": "d13230ee", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1024, "summarize_time": 185.44230167071024, "generate_output_len_bytes": 2122, "generate_time": 88.11553311347961}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 14:29:36", "git_sha": "d13230ee", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 922, "summarize_time": 68.06459252039592, "generate_output_len_bytes": 1802, "generate_time": 27.939613421758015}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:26:29", "git_sha": "d13230ee", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 980, "summarize_time": 280.8310640652974, "generate_output_len_bytes": 2132, "generate_time": 143.21916349728903}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:48:17", "git_sha": "d13230ee", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 869, "summarize_time": 98.47045453389485, "generate_output_len_bytes": 3244, "generate_time": 83.71360301971436}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 15:35:13", "git_sha": "0dec0f52", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 15:49:33", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 16:26:53", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 16:27:32", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 16:29:03", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 17:26:02", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 18:59:16", "git_sha": "5691db4a", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1075, "summarize_time": 39.01545596122742, "generate_output_len_bytes": 2242, "generate_time": 10.151424566904703}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 19:03:13", "git_sha": "5691db4a", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 940, "summarize_time": 21.78233750661214, "generate_output_len_bytes": 2130, "generate_time": 15.794983307520548}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 19:38:40", "git_sha": "6f05e8f1", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1114, "summarize_time": 7.636120955149333, "generate_output_len_bytes": 2275, "generate_time": 7.922623078028361}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 19:41:02", "git_sha": "6f05e8f1", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1024, "summarize_time": 10.824170271555582, "generate_output_len_bytes": 2130, "generate_time": 9.209020694096884}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 19:55:17", "git_sha": "2c548f21", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1088, "summarize_time": 24.39883820215861, "generate_output_len_bytes": 2275, "generate_time": 12.755743900934855}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 00:57:21", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 37.113919814427696, "generate_output_len_bytes": 2384, "generate_time": 18.36507821083069}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:00:31", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 49.79721482594808, "generate_output_len_bytes": 2172, "generate_time": 21.780913591384888}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:04:36", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:05:26", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 181.2461258570353, "generate_output_len_bytes": 2772, "generate_time": 92.64811905225118}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:19:33", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 174.4576851526896, "generate_output_len_bytes": 2713, "generate_time": 119.14412077267964}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:36:14", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 53.39731526374817, "generate_output_len_bytes": 2927, "generate_time": 31.369641542434692}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:40:53", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 74.27096923192342, "generate_output_len_bytes": 1802, "generate_time": 29.860486666361492}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:48:09", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 39.926851193110146, "generate_output_len_bytes": 2384, "generate_time": 18.481745958328247}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:51:27", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 51.299002488454185, "generate_output_len_bytes": 2172, "generate_time": 21.828503131866455}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:56:20", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 178.19972308476767, "generate_output_len_bytes": 2772, "generate_time": 91.73426882425944}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:10:13", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 180.7814578215281, "generate_output_len_bytes": 2713, "generate_time": 124.72717420260112}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:26:43", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 57.08081785837809, "generate_output_len_bytes": 2927, "generate_time": 32.26534946759542}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:31:36", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 79.9461121559143, "generate_output_len_bytes": 1802, "generate_time": 31.403561115264893}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 02:38:23", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.33977222442627, "generate_output_len_bytes": 2384, "generate_time": 19.723278522491455}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 02:41:52", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 55.377869288126625, "generate_output_len_bytes": 2172, "generate_time": 25.01458676656087}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 02:47:05", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 180.53432401021323, "generate_output_len_bytes": 2772, "generate_time": 91.93375285466512}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:01:07", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 179.50477250417075, "generate_output_len_bytes": 2713, "generate_time": 124.40728378295898}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:17:36", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 58.62867816289266, "generate_output_len_bytes": 2927, "generate_time": 33.394495725631714}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:22:37", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 78.90612125396729, "generate_output_len_bytes": 1802, "generate_time": 30.697617371877033}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:29:20", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 40.498607873916626, "generate_output_len_bytes": 2384, "generate_time": 19.509677171707153}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:32:44", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 55.3964786529541, "generate_output_len_bytes": 2172, "generate_time": 24.347585439682007}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:37:55", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 186.71331850687662, "generate_output_len_bytes": 2772, "generate_time": 95.784650405248}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:52:28", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 185.3280005455017, "generate_output_len_bytes": 2713, "generate_time": 125.91738017400105}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 8, "reps": 3, "date": "08/19/2023 04:09:18", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 60.18280680974325, "generate_output_len_bytes": 2927, "generate_time": 33.386961142222084}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 8, "reps": 3, "date": "08/19/2023 04:14:25", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 83.04790727297465, "generate_output_len_bytes": 1802, "generate_time": 32.24992283185323}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:26:19", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 47.03754989306132, "generate_output_len_bytes": 2384, "generate_time": 19.964784463246662}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:33:09", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 71.91136892636617, "generate_output_len_bytes": 2480, "generate_time": 33.6295014222463}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:44:08", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 00:45:42", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 148.61560583114624, "generate_output_len_bytes": 2357, "generate_time": 89.01266026496887}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 00:58:00", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 763, "summarize_time": 193.99270629882812, "generate_output_len_bytes": 2129, "generate_time": 95.66660761833191}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:13:01", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:13:55", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 61.52411222457886, "generate_output_len_bytes": 2927, "generate_time": 32.030215660730995}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:19:00", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 81.13888708750407, "generate_output_len_bytes": 3486, "generate_time": 55.5331826210022}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:27:49", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 47.41046245892843, "generate_output_len_bytes": 2384, "generate_time": 20.660600344340008}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:34:28", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 72.85646979014079, "generate_output_len_bytes": 2480, "generate_time": 34.05861854553223}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:39:22", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 152.54357608159384, "generate_output_len_bytes": 2357, "generate_time": 91.51808977127075}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:52:58", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 763, "summarize_time": 195.92926557858786, "generate_output_len_bytes": 2129, "generate_time": 96.55542047818501}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 03:15:01", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 64.64422671000163, "generate_output_len_bytes": 2927, "generate_time": 33.30378039677938}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 03:20:19", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 84.57761120796204, "generate_output_len_bytes": 3486, "generate_time": 57.59072462717692}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:28:44", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 49.08898218472799, "generate_output_len_bytes": 2384, "generate_time": 21.489527861277264}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:32:39", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 74.43774898846944, "generate_output_len_bytes": 2480, "generate_time": 34.72673638661703}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:39:21", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 153.41076453526816, "generate_output_len_bytes": 2357, "generate_time": 91.14894040425618}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:52:00", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 763, "summarize_time": 199.79869039853415, "generate_output_len_bytes": 2129, "generate_time": 98.61504419644673}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 04:08:12", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 66.49260465304057, "generate_output_len_bytes": 2927, "generate_time": 34.17951035499573}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 04:13:39", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 87.65787092844646, "generate_output_len_bytes": 3486, "generate_time": 59.3750696182251}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 22:22:24", "git_sha": "b63768c6", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 122.13213857014973, "generate_output_len_bytes": 2826, "generate_time": 66.34098903338115}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 22:33:33", "git_sha": "c1348fb3", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 120.53812781969707, "generate_output_len_bytes": 2826, "generate_time": 67.28052496910095}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 22:56:52", "git_sha": "fb84de76", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1036, "summarize_time": 29.128981749216717, "generate_output_len_bytes": 2242, "generate_time": 12.197122732798258}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:00:33", "git_sha": "fb84de76", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 05:47:43", "git_sha": "22352acd", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 05:48:58", "git_sha": "22352acd", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 05:50:40", "git_sha": "22352acd", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 165.05752809842429, "generate_output_len_bytes": 2605, "generate_time": 93.80659619967143}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 06:05:51", "git_sha": "22352acd", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 06:10:05", "git_sha": "22352acd", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 906, "summarize_time": 410.0691332022349, "generate_output_len_bytes": 521, "generate_time": 57.71272214253744}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 06:36:58", "git_sha": "22352acd", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 171.74388321240744, "generate_output_len_bytes": 2605, "generate_time": 97.00725762049358}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 06:51:13", "git_sha": "22352acd", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 792, "summarize_time": 267.0555826822917, "generate_output_len_bytes": 2783, "generate_time": 163.99818523724875}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 07:13:35", "git_sha": "22352acd", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 906, "summarize_time": 413.9569679101308, "generate_output_len_bytes": 521, "generate_time": 58.52583885192871}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 07:38:02", "git_sha": "22352acd", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 175.4907926718394, "generate_output_len_bytes": 2605, "generate_time": 98.97720170021057}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 12:35:08", "git_sha": "29a002e5", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 983, "summarize_time": 42.21107586224874, "generate_output_len_bytes": 2130, "generate_time": 16.94527777036031}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 20:03:36", "git_sha": "51318f44", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.0461368560791, "generate_output_len_bytes": 2383, "generate_time": 19.614749511082966}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:07:35", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.8376894791921, "generate_output_len_bytes": 2383, "generate_time": 20.2719091574351}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 20:42:46", "git_sha": "2f4bb620", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:50:19", "git_sha": "2f4bb620", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 66.52468911806743, "generate_output_len_bytes": 2479, "generate_time": 29.828714847564697}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:56:04", "git_sha": "2f4bb620", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 19:55:35", "git_sha": "51318f44", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 38.753786404927574, "generate_output_len_bytes": 2383, "generate_time": 19.529522736867268}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 20:36:13", "git_sha": "51318f44", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.024452924728394, "generate_output_len_bytes": 2383, "generate_time": 20.29120985666911}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 20:40:08", "git_sha": "51318f44", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 54.554532527923584, "generate_output_len_bytes": 2171, "generate_time": 24.604793945948284}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:50:05", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.09950613975525, "generate_output_len_bytes": 2383, "generate_time": 20.947362899780273}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:54:08", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 58.3172922929128, "generate_output_len_bytes": 2171, "generate_time": 25.735217014948528}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 21:01:04", "git_sha": "51318f44", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.85940829912821, "generate_output_len_bytes": 2383, "generate_time": 21.380353291829426}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 21:05:24", "git_sha": "51318f44", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 54.235164642333984, "generate_output_len_bytes": 2171, "generate_time": 25.70338026682536}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 21:10:37", "git_sha": "51318f44", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 927, "summarize_time": 133.53030570348105, "generate_output_len_bytes": 2782, "generate_time": 72.97924383481343}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 22:18:17", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 927, "summarize_time": 131.45291074117026, "generate_output_len_bytes": 2782, "generate_time": 72.30849742889404}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 22:51:09", "git_sha": "383b6bbc", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 39.269713958104454, "generate_output_len_bytes": 2383, "generate_time": 19.65731406211853}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 22:54:54", "git_sha": "383b6bbc", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 51.84283971786499, "generate_output_len_bytes": 2171, "generate_time": 28.441521485646565}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 23:13:10", "git_sha": "383b6bbc", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 53.383726040522255, "generate_output_len_bytes": 2171, "generate_time": 24.422890504201252}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 23:18:04", "git_sha": "383b6bbc", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 52.791220347086586, "generate_output_len_bytes": 2171, "generate_time": 25.378511508305866}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 23:23:11", "git_sha": "383b6bbc", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 56.3846542040507, "generate_output_len_bytes": 2171, "generate_time": 26.636192480723064}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 23:52:44", "git_sha": "da69b822", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 40.36223220825195, "generate_output_len_bytes": 2383, "generate_time": 19.87660264968872}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/22/2023 00:15:05", "git_sha": "e843e8c3", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 64.78201874097188, "generate_output_len_bytes": 2479, "generate_time": 29.02147897084554}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 0, "reps": 3, "date": "08/22/2023 19:01:15", "git_sha": "855b7d15", "n_gpus": 0, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "CPU", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1351, "summarize_time": 1215.5185990333557, "generate_output_len_bytes": 849, "generate_time": 180.56836318969727}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 0, "reps": 3, "date": "08/22/2023 20:11:16", "git_sha": "855b7d15", "n_gpus": 0, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "CPU", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1353, "summarize_time": 1216.9783231417339, "generate_output_len_bytes": 849, "generate_time": 180.42225472132364}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 0, "reps": 3, "date": "08/22/2023 21:21:20", "git_sha": "855b7d15", "n_gpus": 0, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "CPU", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1354, "summarize_time": 1217.1687794526417, "generate_output_len_bytes": 843, "generate_time": 180.78463260332742}

benchmarks/perf.md ADDED Viewed

	@@ -0,0 +1,200 @@

+# Backend: transformers
+For [Interactive visualization of the results](https://raw.githubusercontent.com/h2oai/h2ogpt/blob/main/benchmarks/llm_gpu_benchmark_transformers.html), save the linked file as html on your machine and open it in a browser.
+## Model: h2oai/h2ogpt-4096-llama2-7b-chat (transformers)
+### Number of GPUs: 0
+|   bits | gpus   |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:-------|---------------------------:|--------------------------------:|:------------|
+|     16 | CPU    |                    1215.52 |                         1.17546 |             |
+|      8 | CPU    |                    1216.98 |                         1.17641 |             |
+|      4 | CPU    |                    1217.17 |                         1.16575 |             |
+### Number of GPUs: 1
+|   bits | gpus                                           |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    31.8619 |                        41.9433  |             |
+|     16 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB)        |                    32.2947 |                        40.9252  |             |
+|     16 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                    37.1139 |                        32.4529  |             |
+|     16 | 1 x NVIDIA RTX A6000 (46068 MiB)               |                    47.0375 |                        29.8526  |             |
+|     16 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                    67.9752 |                        18.0571  |             |
+|      8 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB)        |                   114.622  |                         9.21246 |             |
+|      8 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    94.1774 |                         8.95532 |             |
+|      8 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                   181.246  |                         7.47991 |             |
+|      8 | 1 x NVIDIA RTX A6000 (46068 MiB)               |                   148.616  |                         6.61984 |             |
+|      8 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                   185.146  |                         4.35807 |             |
+|      4 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB)        |                    39.544  |                        32.571   |             |
+|      4 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    42.8067 |                        32.3408  |             |
+|      4 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                    53.3973 |                        23.3267  |             |
+|      4 | 1 x NVIDIA RTX A6000 (46068 MiB)               |                    61.5241 |                        22.8456  |             |
+|      4 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                    90.5194 |                        14.9456  |             |
+### Number of GPUs: 2
+|   bits | gpus                                           |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    32.1395 |                        40.3871  |             |
+|     16 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                    39.9269 |                        32.248   |             |
+|     16 | 2 x NVIDIA RTX A6000 (46068 MiB)               |                    47.4105 |                        28.8472  |             |
+|     16 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                    71.4808 |                        17.7518  |             |
+|      8 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    94.9813 |                         9.03765 |             |
+|      8 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                   178.2    |                         7.55443 |             |
+|      8 | 2 x NVIDIA RTX A6000 (46068 MiB)               |                   152.544  |                         6.43862 |             |
+|      8 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                   186.884  |                         4.35012 |             |
+|      4 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    43.235  |                        32.0566  |             |
+|      4 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                    57.0808 |                        22.6791  |             |
+|      4 | 2 x NVIDIA RTX A6000 (46068 MiB)               |                    64.6442 |                        21.972   |             |
+|      4 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                    94.5099 |                        14.6162  |             |
+### Number of GPUs: 4
+|   bits | gpus                                  |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) |                    42.3398 |                        30.2181  |             |
+|     16 | 4 x NVIDIA RTX A6000 (46068 MiB)      |                    49.089  |                        27.7344  |             |
+|      8 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) |                   180.534  |                         7.53804 |             |
+|      8 | 4 x NVIDIA RTX A6000 (46068 MiB)      |                   153.411  |                         6.46469 |             |
+|      4 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) |                    58.6287 |                        21.9123  |             |
+|      4 | 4 x NVIDIA RTX A6000 (46068 MiB)      |                    66.4926 |                        21.409   |             |
+### Number of GPUs: 8
+|   bits | gpus                                  |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) |                    40.4986 |                        30.5489  |             |
+|      8 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) |                   186.713  |                         7.23498 |             |
+|      4 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) |                    60.1828 |                        21.9172  |             |
+## Model: h2oai/h2ogpt-4096-llama2-13b-chat (transformers)
+### Number of GPUs: 1
+|   bits | gpus                                           |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    52.4984 |                        26.2487  |             |
+|     16 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                    49.7972 |                        24.9301  |             |
+|     16 | 1 x NVIDIA RTX A6000 (46068 MiB)               |                    71.9114 |                        18.4362  |             |
+|     16 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                   nan      |                       nan       | OOM         |
+|     16 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB)        |                   nan      |                       nan       | OOM         |
+|      8 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                   168.967  |                         7.67522 |             |
+|      8 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB)        |                   185.442  |                         6.0205  |             |
+|      8 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                   174.458  |                         5.69269 |             |
+|      8 | 1 x NVIDIA RTX A6000 (46068 MiB)               |                   193.993  |                         5.56359 |             |
+|      8 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                   280.467  |                         3.75936 |             |
+|      4 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    45.3051 |                        20.4771  |             |
+|      4 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB)        |                    68.0646 |                        16.1241  |             |
+|      4 | 1 x NVIDIA RTX A6000 (46068 MiB)               |                    81.1389 |                        15.6933  |             |
+|      4 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                    74.271  |                        15.0868  |             |
+|      4 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                    96.6189 |                         9.77255 |             |
+### Number of GPUs: 2
+|   bits | gpus                                           |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    51.6428 |                        26.1842  |             |
+|     16 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                    51.299  |                        24.8757  |             |
+|     16 | 2 x NVIDIA RTX A6000 (46068 MiB)               |                    72.8565 |                        18.2039  |             |
+|     16 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                    89.5996 |                        12.8295  |             |
+|      8 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                   167.523  |                         7.82793 |             |
+|      8 | 2 x NVIDIA RTX A6000 (46068 MiB)               |                   195.929  |                         5.51238 |             |
+|      8 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                   180.781  |                         5.43787 |             |
+|      8 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                   280.831  |                         3.72157 |             |
+|      4 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    47.1425 |                        19.9791  |             |
+|      4 | 2 x NVIDIA RTX A6000 (46068 MiB)               |                    84.5776 |                        15.1326  |             |
+|      4 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                    79.9461 |                        14.3455  |             |
+|      4 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                    98.4705 |                         9.68779 |             |
+### Number of GPUs: 4
+|   bits | gpus                                  |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) |                    55.3779 |                        21.7073  |             |
+|     16 | 4 x NVIDIA RTX A6000 (46068 MiB)      |                    74.4377 |                        17.8537  |             |
+|      8 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) |                   179.505  |                         5.45185 |             |
+|      8 | 4 x NVIDIA RTX A6000 (46068 MiB)      |                   199.799  |                         5.39725 |             |
+|      4 | 4 x NVIDIA RTX A6000 (46068 MiB)      |                    87.6579 |                        14.6779  |             |
+|      4 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) |                    78.9061 |                        14.6754  |             |
+### Number of GPUs: 8
+|   bits | gpus                                  |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) |                    55.3965 |                        22.302   |             |
+|      8 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) |                   185.328  |                         5.38647 |             |
+|      4 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) |                    83.0479 |                        13.969   |             |
+## Model: h2oai/h2ogpt-4096-llama2-70b-chat (transformers)
+### Number of GPUs: 1
+|   bits | gpus                                           |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    nan     |                       nan       | OOM         |
+|     16 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                    nan     |                       nan       | OOM         |
+|     16 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                    nan     |                       nan       | OOM         |
+|     16 | 1 x NVIDIA RTX A6000 (46068 MiB)               |                    nan     |                       nan       | OOM         |
+|      8 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    nan     |                       nan       | OOM         |
+|      8 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                    nan     |                       nan       | OOM         |
+|      8 | 1 x NVIDIA RTX A6000 (46068 MiB)               |                    nan     |                       nan       | OOM         |
+|      4 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    122.132 |                        10.6495  |             |
+|      4 | 1 x NVIDIA RTX A6000 (46068 MiB)               |                    165.058 |                         6.94248 |             |
+|      4 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                    nan     |                       nan       | OOM         |
+### Number of GPUs: 2
+|   bits | gpus                                           |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 2 x NVIDIA RTX A6000 (46068 MiB)               |                    nan     |                       nan       | OOM         |
+|      8 | 2 x NVIDIA RTX A6000 (46068 MiB)               |                    410.069 |                         2.25687 |             |
+|      4 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    120.538 |                        10.5008  |             |
+|      4 | 2 x NVIDIA RTX A6000 (46068 MiB)               |                    171.744 |                         6.71342 |             |
+|      4 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                    nan     |                       nan       | OOM         |
+### Number of GPUs: 4
+|   bits | gpus                             |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:---------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 4 x NVIDIA RTX A6000 (46068 MiB) |                    267.056 |                         4.24242 |             |
+|      8 | 4 x NVIDIA RTX A6000 (46068 MiB) |                    413.957 |                         2.22551 |             |
+|      4 | 4 x NVIDIA RTX A6000 (46068 MiB) |                    175.491 |                         6.5798  |             |
+# Backend: text-generation-inference
+For [Interactive visualization of the results](https://raw.githubusercontent.com/h2oai/h2ogpt/blob/main/benchmarks/llm_gpu_benchmark_text-generation-inference.html), save the linked file as html on your machine and open it in a browser.
+## Model: h2oai/h2ogpt-4096-llama2-7b-chat (text-generation-inference)
+### Number of GPUs: 1
+|   bits | gpus                                           |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    39.0155 |                         55.2139 |             |
+|     16 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                    29.129  |                         45.9535 |             |
+|     16 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB)        |                    24.3988 |                         44.5878 |             |
+|     16 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                    39.2697 |                         30.3068 |             |
+|     16 | 1 x NVIDIA RTX A6000 (46068 MiB)               |                    40.3622 |                         29.9724 |             |
+### Number of GPUs: 2
+|   bits | gpus                                           |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    7.63612 |                         71.7881 |             |
+|     16 | 2 x NVIDIA RTX A6000 (46068 MiB)               |                   41.0461  |                         30.3726 |             |
+|     16 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                   41.0245  |                         29.36   |             |
+### Number of GPUs: 4
+|   bits | gpus                                  |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 4 x NVIDIA RTX A6000 (46068 MiB)      |                    42.8377 |                         29.388  |             |
+|     16 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) |                    41.0995 |                         28.4403 |             |
+### Number of GPUs: 8
+|   bits | gpus                                  |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) |                    42.8594 |                         27.8644 |             |
+## Model: h2oai/h2ogpt-4096-llama2-13b-chat (text-generation-inference)
+### Number of GPUs: 1
+|   bits | gpus                                           |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    21.7823 |                         33.7132 |             |
+|     16 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                    51.8428 |                         19.083  |             |
+|     16 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                   nan      |                        nan      | OOM         |
+|     16 | 1 x NVIDIA RTX A6000 (46068 MiB)               |                   nan      |                        nan      | OOM         |
+### Number of GPUs: 2
+|   bits | gpus                                           |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) |                    10.8242 |                         57.8237 |             |
+|     16 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB)        |                    42.2111 |                         31.4247 |             |
+|     16 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB)          |                    53.3837 |                         22.223  |             |
+|     16 | 2 x NVIDIA RTX A6000 (46068 MiB)               |                    64.782  |                         21.3549 |             |
+### Number of GPUs: 4
+|   bits | gpus                                  |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) |                    52.7912 |                         21.3862 |             |
+|     16 | 4 x NVIDIA RTX A6000 (46068 MiB)      |                    66.5247 |                         20.777  |             |
+### Number of GPUs: 8
+|   bits | gpus                                  |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) |                    56.3847 |                         20.3764 |             |
+## Model: h2oai/h2ogpt-4096-llama2-70b-chat (text-generation-inference)
+### Number of GPUs: 4
+|   bits | gpus                                  |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) |                    131.453 |                         9.61851 |             |
+|     16 | 4 x NVIDIA RTX A6000 (46068 MiB)      |                    nan     |                       nan       | OOM         |
+### Number of GPUs: 8
+|   bits | gpus                                  |   summarization time [sec] |   generation speed [tokens/sec] | exception   |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+|     16 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) |                     133.53 |                         9.53011 |             |

blog/README.md ADDED Viewed

	@@ -0,0 +1,81 @@

+# Building the World's Best Open-Source Large Language Model: H2O.ai's Journey
+by Arno Candel, PhD, CTO H2O.ai, April 19 2023
+At H2O.ai, we pride ourselves on developing world-class Machine Learning, Deep Learning, and AI platforms. We released H2O, the most widely used open-source distributed and scalable machine learning platform, before XGBoost, TensorFlow and PyTorch existed. H2O.ai is home to over 25 Kaggle grandmasters, including the current #1. In 2017, we used GPUs to create the world's best AutoML in H2O Driverless AI. We have witnessed first-hand how Large Language Models (LLMs) have taken over the world by storm.
+We are proud to announce that we are building h2oGPT, an LLM that not only excels in performance but is also fully open-source and commercially usable, providing a valuable resource for developers, researchers, and organizations worldwide.
+In this blog, we'll explore our journey in building h2oGPT in our effort to further democratize AI.
+## Why Open-Source LLMs?
+While LLMs like OpenAI's ChatGPT/GPT-4, Anthropic's Claude, Microsoft's Bing AI Chat, Google's Bard, and Cohere are powerful and effective, they have certain limitations compared to open-source LLMs:
+1. **Data Privacy and Security**: Using hosted LLMs requires sending data to external servers. This can raise concerns about data privacy, security, and compliance, especially for sensitive information or industries with strict regulations.
+2. **Dependency and Customization**: Hosted LLMs often limit the extent of customization and control, as users rely on the service provider's infrastructure and predefined models. Open-source LLMs allow users to tailor the models to their specific needs, deploy on their own infrastructure, and even modify the underlying code.
+3. **Cost and Scalability**: Hosted LLMs usually come with usage fees, which can increase significantly with large-scale applications. Open-source LLMs can be more cost-effective, as users can scale the models on their own infrastructure without incurring additional costs from the service provider.
+4. **Access and Availability**: Hosted LLMs may be subject to downtime or limited availability, affecting users' access to the models. Open-source LLMs can be deployed on-premises or on private clouds, ensuring uninterrupted access and reducing reliance on external providers.
+Overall, open-source LLMs offer greater flexibility, control, and cost-effectiveness, while addressing data privacy and security concerns. They foster a competitive landscape in the AI industry and empower users to innovate and customize models to suit their specific needs.
+## The H2O.ai LLM Ecosystem
+Our open-source LLM ecosystem currently includes the following components:
+1. **Code, data, and models**: Fully permissive, commercially usable [code](https://github.com/h2oai/h2ogpt), curated fine-tuning [data](https://huggingface.co/h2oai), and fine-tuned [models](https://huggingface.co/h2oai) ranging from 7 to 20 billion parameters.
+2. **State-of-the-art fine-tuning**: We provide code for highly efficient fine-tuning, including targeted data preparation, prompt engineering, and computational optimizations to fine-tune LLMs with up to 20 billion parameters (even larger models expected soon) in hours on commodity hardware or enterprise servers. Techniques like low-rank approximations (LoRA) and data compression allow computational savings of several orders of magnitude.
+3. **Chatbot**: We provide code to run a multi-tenant chatbot on GPU servers, with an easily shareable end-point and a Python client API, allowing you to evaluate and compare the performance of fine-tuned LLMs.
+4. **H2O LLM Studio**: Our no-code LLM fine-tuning framework created by the world's top Kaggle grandmasters makes it even easier to fine-tune and evaluate LLMs.
+Everything we release is based on fully permissive data and models, with all code open-sourced, enabling broader access for businesses and commercial products without legal concerns, thus expanding access to cutting-edge AI while adhering to licensing requirements.
+## Roadmap and Future Plans
+We have an ambitious roadmap for our LLM ecosystem, including:
+1. Integration with downstream applications and low/no-code platforms (H2O Document AI, H2O LLM Studio, etc.)
+2. Improved validation and benchmarking frameworks of LLMs
+3. Complementing our chatbot with search and other APIs (LangChain, etc.)
+4. Contribute to large-scale data cleaning efforts (Open Assistant, Stability AI, RedPajama, etc.)
+5. High-performance distributed training of larger models on trillion tokens
+6. High-performance scalable on-premises hosting for high-throughput endpoints
+7. Improvements in code completion, reasoning, mathematics, factual correctness, hallucinations, and reducing repetitions
+## Getting Started with H2O.ai's LLMs
+You can [Chat with h2oGPT](https://gpt.h2o.ai/) right now!
+https://user-images.githubusercontent.com/6147661/232924684-6c0e2dfb-2f24-4098-848a-c3e4396f29f6.mov
+![](https://user-images.githubusercontent.com/6147661/233239878-de3b0fce-5425-4189-8095-5313c7817d58.png)
+![](https://user-images.githubusercontent.com/6147661/233239861-e99f238c-dd5d-4dd7-ac17-6367f91f86ac.png)
+To start using our LLM as a developer, follow the steps below:
+1. Clone the repository: `git clone https://github.com/h2oai/h2ogpt.git`
+2. Change to the repository directory: `cd h2ogpt`
+3. Install the requirements: `pip install -r requirements.txt`
+4. Run the chatbot: `python generate.py --base_model=h2oai/h2ogpt-oig-oasst1-256-6_9b`
+5. Open your browser at `http://0.0.0.0:7860` or the public live URL printed by the server.
+For more information, visit [h2oGPT GitHub page](https://github.com/h2oai/h2ogpt), [H2O.ai's Hugging Face page](https://huggingface.co/h2oai) and [H2O LLM Studio GitHub page](https://github.com/h2oai/h2o-llmstudio).
+Join us on this exciting journey as we continue to improve and expand the capabilities of our open-source LLM ecosystem!
+## Acknowledgements
+We appreciate the work by many open-source contributors, especially:
+* [H2O.ai makers](https://h2o.ai/company/team/)
+* [Alpaca-LoRA](https://github.com/tloen/alpaca-lora/)
+* [LoRA](https://github.com/microsoft/LoRA/)
+* [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca/)
+* [Hugging Face](https://huggingface.co/)
+* [OpenAssistant](https://open-assistant.io/)
+* [EleutherAI](https://www.eleuther.ai/)
+* [LAION](https://laion.ai/blog/oig-dataset/)
+* [BigScience](https://github.com/bigscience-workshop/bigscience/)
+* [LLaMa](https://github.com/facebookresearch/llama/)
+* [StableLM](https://github.com/Stability-AI/StableLM/)
+* [Vicuna](https://github.com/lm-sys/FastChat/)

ci/jenkinsfile ADDED Viewed

	@@ -0,0 +1,158 @@

+#!/usr/bin/groovy
+@Library('test-shared-library@dai_pipeline') _
+import ai.h2o.ci.buildsummary.StagesSummary
+import groovy.json.JsonOutput
+buildSummary('https://github.com/h2oai/h2ogpt', true)
+buildSummary.get().addStagesSummary(this, new StagesSummary())
+def ALL_TESTS = [
+        "test_osx": [
+            install_deps: "TRAINING",
+            test_target: "test_imports",
+            node: "osx",
+            test_markers: "not need_tokens and not need_gpu",
+            timeout: 90,
+            use_docker: false,
+            env: ['PYTHON_BINARY=/Users/jenkins/anaconda/envs/h2ogpt-py3.10/bin/python']
+        ],
+        "test_all": [
+            install_deps: "TRAINING,WIKI_EXTRA",
+            test_target: "test",
+            test_markers: "not need_tokens and not need_gpu",
+            node: "DAIDEV-GPU || DAIDEV-2GPU",
+            timeout: 90,
+            use_docker: true,
+            env: []
+        ],
+]
+pipeline {
+    agent none
+    parameters {
+        booleanParam(name: 'skipTesting', defaultValue: false, description: 'Skip testing')
+        text(name: "testTargets", defaultValue: "${ALL_TESTS.keySet().join('\n')}", description: "A select set of tests to run")
+        booleanParam(name: 'publish', defaultValue: false, description: 'Upload to HF')
+    }
+    options {
+        ansiColor('xterm')
+        timestamps()
+    }
+    stages {
+        stage('Build') {
+            agent {
+                label "linux && docker"
+            }
+            steps {
+                script {
+                    def shortHash = sh(returnStdout: true, script: 'git rev-parse --short HEAD').trim()
+                    def commitMsg = sh(returnStdout: true, script: 'git log -1 --pretty=format:"[%an] %s"').trim()
+                    currentBuild.displayName = "${env.BUILD_ID} - [${shortHash}]"
+                    currentBuild.description = "${commitMsg}"
+                    sh "make docker_build"
+                    docker.image("harbor.h2o.ai/library/python:3.10").inside("--entrypoint='' --security-opt seccomp=unconfined -e USE_WHEEL=1 -e HOME=${WORKSPACE}") {
+                        sh "make clean dist"
+                    }
+                    archiveArtifacts allowEmptyArchive: true, artifacts: "dist/h2ogpt-*.whl"
+                    stash includes: "dist/h2ogpt-*.whl", name: "wheel_file"
+                }
+            }
+        }
+        stage('Tests') {
+            when {
+                anyOf {
+                    expression { return !params.skipTesting }
+                }
+                beforeAgent true
+            }
+            agent {
+                label "linux && docker"
+            }
+            steps {
+                script {
+                    def testTargets = [:]
+                    params.testTargets.split('\n').findAll{ it.contains("test_") }.each { testName ->
+                        testTargets[testName] = {
+                            node("${ALL_TESTS[testName].node}") {
+                                buildSummary.stageWithSummary("${testName}", "${testName}") {
+                                    buildSummary.setStageUrl("${testName}")
+                                    timeout(time: ALL_TESTS[testName].timeout, unit: 'MINUTES') {
+                                        script {
+                                            try {
+                                                dir("${testName}") {
+                                                    withEnv(ALL_TESTS[testName].env + ["PYTEST_TEST_NAME=_${testName}", "IS_PR_BUILD=${isPrBranch()}", "USE_WHEEL=1"]) {
+                                                        // cleanup and force the use of the installed wheel
+                                                        deleteDir()
+                                                        checkout scm
+                                                        unstash "wheel_file"
+                                                        sh "rm -rf *.py spaces models"
+                                                        // pull runtime details
+                                                        def dockerImage = sh(returnStdout: true, script: "make print-DOCKER_TEST_IMAGE").trim()
+                                                        def nvidiaSmiExitCode = sh(returnStdout: false, returnStatus: true, script: "nvidia-smi")
+                                                        // def dockerRuntime = "${nvidiaSmiExitCode}" == "0" ? "--runtime nvidia" : ""
+                                                        def dockerRuntime = ""  // TODO: keep until lab machines are upgraded
+                                                        if (ALL_TESTS[testName].use_docker) {
+                                                            docker.image("${dockerImage}").inside("--entrypoint='' --security-opt seccomp=unconfined --ulimit core=-1 --init --pid=host -e USE_WHEEL=1 -e HOME=${WORKSPACE}/${testName} ${dockerRuntime}") {
+                                                                sh "nvidia-smi || true"
+                                                                sh "SKIP_MANUAL_TESTS=1 PYTHON_BINARY=/usr/bin/python3.10 make install"
+                                                                sh "SKIP_MANUAL_TESTS=1 PYTHON_BINARY=/usr/bin/python3.10 make install-${ALL_TESTS[testName].install_deps}"
+                                                                sh """DEFAULT_MARKERS="${ALL_TESTS[testName].test_markers}" SKIP_MANUAL_TESTS=1 PYTHON_BINARY=/usr/bin/python3.10 make ${ALL_TESTS[testName].test_target}"""
+                                                            }
+                                                        } else {
+                                                            sh "make venv"
+                                                            sh "SKIP_MANUAL_TESTS=1 PYTHON_BINARY=${WORKSPACE}/${testName}/venv/bin/python make install"
+                                                            sh "SKIP_MANUAL_TESTS=1 PYTHON_BINARY=${WORKSPACE}/${testName}/venv/bin/python make install-${ALL_TESTS[testName].install_deps}"
+                                                            sh """DEFAULT_MARKERS="${ALL_TESTS[testName].test_markers}" SKIP_MANUAL_TESTS=1 PYTHON_BINARY=${WORKSPACE}/${testName}/venv/bin/python make ${ALL_TESTS[testName].test_target}"""
+                                                        }
+                                                    }
+                                                }
+                                            } catch (e) {
+                                                throw e
+                                            } finally {
+                                                sh "mv ${testName}/test_report.xml ${testName}/${testName}_report.xml"
+                                                archiveArtifacts allowEmptyArchive: true, artifacts: "${testName}/${testName}_report.xml"
+                                                junit testResults: "${testName}/${testName}_report.xml", keepLongStdio: true, allowEmptyResults: true
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    parallel(testTargets)
+                }
+            }
+        }
+        stage('Publish') {
+            when {
+                anyOf {
+                    expression { return params.publish }
+                }
+                beforeAgent true
+            }
+            agent {
+                label "linux && docker"
+            }
+            steps {
+                script {
+                    sh "make IS_PR_BUILD=${isPrBranch()} BUILD_NUMBER=${env.BUILD_ID} BUILD_BASE_NAME=${env.JOB_BASE_NAME} publish"
+                }
+            }
+        }
+    }
+}
+def isPrBranch() {
+    return (env.CHANGE_BRANCH != null && env.CHANGE_BRANCH != '') ||
+            (env.BRANCH_NAME != null && env.BRANCH_NAME.startsWith("PR-"))
+}

client/.gitignore ADDED Viewed

	@@ -0,0 +1,168 @@

+### Copied files ###
+h2ogpt_client/_h2ogpt_*.py
+### Poetry ###
+.poetry
+poetry
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/

client/Makefile ADDED Viewed

	@@ -0,0 +1,58 @@

+POETRY_INSTALL_DIR := $(abspath ./.poetry)
+POETRY_BIN         := $(POETRY_INSTALL_DIR)/bin/poetry
+PACKAGE_NAME    = $(firstword $(shell $(POETRY_BIN) version))
+PACKAGE_DIR     = $(subst -,_,$(PACKAGE_NAME))
+PACKAGE_VERSION = $(shell $(POETRY_BIN) version --short)
+# Space separated list of file path that needs to be copied from h2oGPT.
+FILES_FROM_H2OGPT := enums.py
+NAME_PREFIX_FOR_FILES_COPIED_FROM_H2OGPT = _h2ogpt_
+$(POETRY_BIN):
+	@echo "Installing Poetry into '$(POETRY_INSTALL_DIR)' ..."
+	curl -sSL https://install.python-poetry.org | POETRY_HOME="$(POETRY_INSTALL_DIR)" python3 - --force --version 1.5.1
+.PHONY: copy_files_from_h2ogpt
+copy_files_from_h2ogpt:
+	for file in $(FILES_FROM_H2OGPT); do \
+		dst="$(PACKAGE_DIR)/$(NAME_PREFIX_FOR_FILES_COPIED_FROM_H2OGPT)$(notdir $$file)"; \
+		echo "Copying '$$file' to '$$dst' ..."; \
+		cp -f "./../src/$$file" "$$dst"; \
+	done
+.PHONY: clean
+clean:
+	rm -rf dist
+	find "$(PACKAGE_DIR)" -name "$(NAME_PREFIX_FOR_FILES_COPIED_FROM_H2OGPT)*" -delete
+.PHONY: clean_deep
+clean_deep: clean
+	rm -rf "$(POETRY_INSTALL_DIR)"
+	rm -rf ".venv"
+.PHONY: setup
+setup: $(POETRY_BIN)
+	$(POETRY_BIN) install
+.PHONY: setup_test
+setup_test:
+	$(POETRY_BIN) install --only=test
+.PHONY: lint
+lint: copy_files_from_h2ogpt
+	$(POETRY_BIN) run black .
+	$(POETRY_BIN) run isort .
+	$(POETRY_BIN) run flake8 "$(PACKAGE_DIR)" "tests" || true
+	$(POETRY_BIN) run mypy --show-error-codes --pretty .
+.PHONY: test
+test: copy_files_from_h2ogpt
+	$(POETRY_BIN) run pytest -r=A
+.PHONY: build
+build: copy_files_from_h2ogpt
+	$(POETRY_BIN) build
+print-%:
+	@echo $($*)

client/README.md ADDED Viewed

	@@ -0,0 +1,107 @@

+# h2oGPT Client
+A Python thin-client for h2oGPT.
+## Prerequisites
+- Python 3.8+
+If you don't have Python 3.8 in your system, you can use [Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html).
+```bash
+conda create -n h2ogpt_client_build -y
+conda activate h2ogpt_client_build
+conda install python=3.8 -y
+```
+## Download Client Wheel
+Install the latest nightly wheel from S3.
+```bash
+pip install https://s3.amazonaws.com/artifacts.h2o.ai/snapshots/ai/h2o/h2ogpt_client/latest-nightly/h2ogpt_client-0.1.0-py3-none-any.whl
+```
+Nightly releases can also be found [here](https://github.com/h2oai/h2ogpt/releases)
+## Build Client Wheel
+If want to build fresh wheel from main branch instead of getting nightly, follow these instructions.
+### Setup
+:information_source: [Poetry](https://python-poetry.org) is used as the build tool.
+```shell
+rm -rf client/.poetry/
+make -C client setup
+```
+### Build
+```shell
+make -C client build
+```
+Distribution wheel file can be found in the `client/dist` directory.  This wheel can be installed in the primary h2oGPT environment or any other environment, e.g.
+```bash
+pip uninstall -y h2ogpt_client
+pip install client/dist/h2ogpt_client-*-py3-none-any.whl
+```
+## Usage
+Based upon [test code](tests/test_client.py) and test code `test_readme_example`:
+```python
+def test_readme_example(local_server):
+    import os
+    import asyncio
+    from h2ogpt_client import Client
+    if local_server:
+        client = Client("http://0.0.0.0:7860")
+    else:
+        h2ogpt_key = os.getenv("H2OGPT_KEY") or os.getenv("H2OGPT_H2OGPT_KEY")
+        if h2ogpt_key is None:
+            return
+        # if you have API key for public instance:
+        client = Client("https://gpt.h2o.ai", h2ogpt_key=h2ogpt_key)
+    # Text completion
+    text_completion = client.text_completion.create()
+    response = asyncio.run(text_completion.complete("Hello world"))
+    print("asyncio text completion response: %s" % response)
+    # Text completion: synchronous
+    response = text_completion.complete_sync("Hello world")
+    print("sync text completion response: %s" % response)
+    # Chat completion
+    chat_completion = client.chat_completion.create()
+    reply = asyncio.run(chat_completion.chat("Hey!"))
+    print("asyncio text completion user: %s gpt: %s" % (reply["user"], reply["gpt"]))
+    chat_history = chat_completion.chat_history()
+    print("chat_history: %s" % chat_history)
+    # Chat completion: synchronous
+    reply = chat_completion.chat_sync("Hey!")
+    print("sync chat completion gpt: %s" % reply["gpt"])
+test_readme_example(local_server=True)
+```
+:warning: **Note**: Client APIs are still evolving. Hence, APIs can be changed without prior warnings.
+## Development Guide
+### Test
+In an h2oGPT environment with the client installed, can run tests that test client and server.
+### Test with h2oGPT env
+1. Install test dependencies of the Client into the h2oGPT Python environment.
+```shell
+make -C client setup_test
+```
+2. Run the tests with h2oGPT.
+```shell
+pytest client/tests/
+```
+#### Test with an existing h2oGPT server
+If you already have a running h2oGPT server, then set the `H2OGPT_SERVER` environment variable to use it for testing.
+```shell
+make H2OGPT_SERVER="http://0.0.0.0:7860" -C client test
+```

client/h2ogpt_client/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from h2ogpt_client._core import Client
+from h2ogpt_client._h2ogpt_enums import LangChainMode, PromptType
+__all__ = ["Client", "PromptType", "LangChainMode"]

client/h2ogpt_client/_completion.py ADDED Viewed

	@@ -0,0 +1,507 @@

+import abc
+import ast
+import collections
+from typing import (
+    Any,
+    AsyncGenerator,
+    Dict,
+    Generator,
+    List,
+    Optional,
+    OrderedDict,
+    Union,
+)
+from h2ogpt_client._gradio_client import GradioClientWrapper
+from h2ogpt_client._h2ogpt_enums import (
+    DocumentSubset,
+    LangChainAction,
+    LangChainMode,
+    PromptType,
+)
+from h2ogpt_client._models import Model
+_H2OGPT_PARAMETERS_TO_CLIENT = collections.OrderedDict(
+    instruction="instruction",
+    iinput="input",
+    context="system_pre_context",
+    stream_output="stream_output",
+    prompt_type="prompt_type",
+    prompt_dict="prompt_dict",
+    temperature="temperature",
+    top_p="top_p",
+    top_k="top_k",
+    penalty_alpha="penalty_alpha",
+    num_beams="beams",
+    max_new_tokens="max_output_length",
+    min_new_tokens="min_output_length",
+    early_stopping="early_stopping",
+    max_time="max_time",
+    repetition_penalty="repetition_penalty",
+    num_return_sequences="number_returns",
+    do_sample="enable_sampler",
+    chat="chat",
+    instruction_nochat="instruction_nochat",
+    iinput_nochat="input_context_for_instruction",
+    langchain_mode="langchain_mode",
+    add_chat_history_to_context="add_chat_history_to_context",
+    langchain_action="langchain_action",
+    langchain_agents="langchain_agents",
+    top_k_docs="langchain_top_k_docs",
+    chunk="langchain_enable_chunk",
+    chunk_size="langchain_chunk_size",
+    document_subset="langchain_document_subset",
+    document_choice="langchain_document_choice",
+    document_source_substrings="langchain_document_source_substrings",
+    document_source_substrings_op="langchain_document_source_substrings_op",
+    document_content_substrings="langchain_document_content_substrings",
+    document_content_substrings_op="langchain_document_content_substrings_op",
+    pre_prompt_query="pre_prompt_query",
+    prompt_query="prompt_query",
+    pre_prompt_summary="pre_prompt_summary",
+    prompt_summary="prompt_summary",
+    hyde_llm_prompt="hyde_llm_prompt",
+    system_prompt="system_prompt",
+    image_audio_loaders="image_audio_loaders",
+    pdf_loaders="pdf_loaders",
+    url_loaders="url_loaders",
+    jq_schema="jq_schema",
+    visible_models="model",
+    h2ogpt_key="h2ogpt_key",
+    add_search_to_context="add_search_to_context",
+    chat_conversation="chat_conversation",
+    text_context_list="text_context_list",
+    docs_ordering_type="docs_ordering_type",
+    min_max_new_tokens="min_max_new_tokens",
+    max_input_tokens="max_input_tokens",
+    max_total_input_tokens="max_total_input_tokens",
+    docs_token_handling="docs_token_handling",
+    docs_joiner="docs_joiner",
+    hyde_level="hyde_level",
+    hyde_template="hyde_template",
+    hyde_show_only_final="hyde_show_only_final",
+    doc_json_mode="doc_json_mode",
+    chatbot_role="chatbot_role",
+    speaker="speaker",
+    tts_language="tts_language",
+    tts_speed="tts_speed",
+)
+def _to_h2ogpt_params(client_params: Dict[str, Any]) -> OrderedDict[str, Any]:
+    """Convert given params to the order of params in h2oGPT."""
+    h2ogpt_params: OrderedDict[str, Any] = collections.OrderedDict()
+    for h2ogpt_param_name, client_param_name in _H2OGPT_PARAMETERS_TO_CLIENT.items():
+        if client_param_name in client_params:
+            h2ogpt_params[h2ogpt_param_name] = client_params[client_param_name]
+    return h2ogpt_params
+_DEFAULT_PARAMETERS: Dict[str, Any] = dict(
+    instruction="",
+    input="",
+    system_pre_context="",
+    stream_output=False,
+    prompt_type=PromptType.plain.value,
+    prompt_dict="",  # empty as prompt_type cannot be 'custom'
+    temperature=0.1,
+    top_p=1.0,
+    top_k=40,
+    penalty_alpha=0.0,
+    beams=1.0,
+    max_output_length=1024,
+    min_output_length=0,
+    early_stopping=False,
+    max_time=360,
+    repetition_penalty=1.07,
+    number_returns=1,
+    enable_sampler=False,
+    chat=False,
+    instruction_nochat="",
+    input_context_for_instruction="",
+    langchain_mode=LangChainMode.DISABLED.value,
+    add_chat_history_to_context=False,  # relevant only for the UI
+    langchain_action=LangChainAction.QUERY.value,
+    langchain_agents=[],
+    langchain_top_k_docs=4,  # langchain: number of document chunks
+    langchain_enable_chunk=True,  # langchain: whether to chunk documents
+    langchain_chunk_size=512,  # langchain: chunk size for document chunking
+    langchain_document_subset=DocumentSubset.Relevant.name,
+    langchain_document_choice=[],
+    langchain_document_source_substrings=[],
+    langchain_document_source_substrings_op='and',
+    langchain_document_content_substrings=[],
+    langchain_document_content_substrings_op='and',
+    pre_prompt_query=[],
+    prompt_query="",
+    pre_prompt_summary="",
+    prompt_summary="",
+    hyde_llm_prompt="",
+    system_prompt="",
+    image_audio_loaders=[],
+    pdf_loaders=[],
+    url_loaders=[],
+    jq_schema=".[]",
+    model=None,
+    h2ogpt_key=None,
+    add_search_to_context=False,
+    chat_conversation=None,
+    text_context_list=[],
+    docs_ordering_type="reverse_ucurve_sort",
+    min_max_new_tokens=256,
+    max_input_tokens=-1,
+    max_total_input_tokens=-1,
+    docs_token_handling="split_or_merge",
+    docs_joiner="\n\n",
+    hyde_level=0,
+    hyde_template=None,
+    hyde_show_only_final=None,
+    doc_json_mode=False,
+    chatbot_role="None",
+    speaker="None",
+    tts_language="autodetect",
+    tts_speed=1.0,
+)
+class _Completion(abc.ABC):
+    _API_NAME = "/submit_nochat_api"
+    def __init__(self, client: GradioClientWrapper, parameters: OrderedDict[str, Any]):
+        self._client = client
+        self._parameters = dict(parameters)
+    def _get_parameters(self, prompt: str) -> Dict[str, Any]:
+        self._parameters["instruction_nochat"] = prompt
+        return self._parameters
+    @staticmethod
+    def _get_reply(response: str) -> str:
+        return ast.literal_eval(response)["response"]
+    def _predict(self, prompt: str) -> str:
+        response = self._client.predict(
+            str(self._get_parameters(prompt)), api_name=self._API_NAME
+        )
+        return self._get_reply(response)
+    def _predict_and_stream(self, prompt: str) -> Generator[str, None, None]:
+        generator = self._client.predict_and_stream(
+            str(self._get_parameters(prompt)), api_name=self._API_NAME
+        )
+        reply_size_so_far = 0
+        for response in generator:
+            current_reply = self._get_reply(response)
+            new_reply_chunk = current_reply[reply_size_so_far:]
+            if not new_reply_chunk:
+                continue
+            reply_size_so_far += len(new_reply_chunk)
+            yield new_reply_chunk
+    async def _submit(self, prompt: str) -> str:
+        response = await self._client.submit(
+            str(self._get_parameters(prompt)), api_name=self._API_NAME
+        )
+        return self._get_reply(response)
+    async def _submit_and_stream(self, prompt: str) -> AsyncGenerator[str, None]:
+        generator = self._client.submit_and_stream(
+            str(self._get_parameters(prompt)), api_name=self._API_NAME
+        )
+        reply_size_so_far = 0
+        async for response in generator:
+            current_reply = self._get_reply(response)
+            new_reply_chunk = current_reply[reply_size_so_far:]
+            if not new_reply_chunk:
+                continue
+            reply_size_so_far += len(new_reply_chunk)
+            yield new_reply_chunk
+class TextCompletionCreator:
+    """Builder that can create text completions."""
+    def __init__(self, client: GradioClientWrapper):
+        self._client = client
+    def create(
+        self,
+        model: Union[None, Model, str] = None,
+        prompt_type: PromptType = PromptType.plain,
+        input_context_for_instruction: str = "",
+        enable_sampler=False,
+        temperature: float = 0.1,
+        top_p: float = 1.0,
+        top_k: int = 40,
+        penalty_alpha: float = 0.0,
+        beams: float = 1.0,
+        early_stopping: bool = False,
+        min_output_length: int = 0,
+        max_output_length: int = 1024,
+        max_time: int = 360,
+        repetition_penalty: float = 1.07,
+        number_returns: int = 1,
+        system_pre_context: str = "",
+        langchain_mode: LangChainMode = LangChainMode.DISABLED,
+        system_prompt: str = "",
+        add_search_to_context: bool = False,
+        text_context_list: List[str] = [],
+        docs_ordering_type: str = "reverse_ucurve_sort",
+        min_max_new_tokens: int = 256,
+        max_input_tokens: int = -1,
+        max_total_input_tokens: int = -1,
+        docs_token_handling: str = "split_or_merge",
+        docs_joiner: str = "\n\n",
+        hyde_level: int = 0,
+        hyde_template: Optional[str] = None,
+        hyde_show_only_final: bool = False,
+        doc_json_mode: bool = False,
+        chatbot_role="None",
+        speaker="None",
+        tts_language="autodetect",
+        tts_speed=1.0,
+    ) -> "TextCompletion":
+        """
+        Creates a new text completion.
+        :param model: model to be used, `None` means used the default model.
+        :param prompt_type: type of the prompt
+        :param input_context_for_instruction: input context for instruction
+        :param enable_sampler: enable or disable the sampler, required for use of
+                temperature, top_p, top_k
+        :param temperature: What sampling temperature to use, between 0 and 3.
+                Lower values will make it more focused and deterministic, but may lead
+                to repeat. Higher values will make the output more creative, but may
+                lead to hallucinations.
+        :param top_p: cumulative probability of tokens to sample from
+        :param top_k: number of tokens to sample from
+        :param penalty_alpha: >0 and top_k>1 enable contrastive search (not all models support)
+        :param beams: Number of searches for optimal overall probability.
+                Higher values uses more GPU memory and compute.
+        :param early_stopping: whether to stop early or not in beam search
+        :param min_output_length: minimum output length
+        :param max_output_length: maximum output length
+        :param max_time: maximum time to search optimal output
+        :param repetition_penalty: penalty for repetition
+        :param number_returns:
+        :param system_pre_context: directly pre-appended without prompt processing
+        :param langchain_mode: LangChain mode
+        :param system_prompt: Universal system prompt to override prompt_type's system
+                              prompt
+                              If pass 'None' or 'auto' or None, then automatic per-model value used
+        :param add_search_to_context: Whether to add web search of query to context
+        :param text_context_list: list of strings to use as context (up to allowed max_seq_len of model)
+        :param docs_ordering_type: By default uses 'reverse_ucurve_sort' for optimal retrieval
+        :param min_max_new_tokens: minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
+        :param max_input_tokens: Max input tokens to place into model context for each LLM call
+                                 -1 means auto, fully fill context for query, and fill by original document chunk for summarization
+                                 >=0 means use that to limit context filling to that many tokens
+        :param max_total_input_tokens: like max_input_tokens but instead of per LLM call, applies across all LLM calls for single summarization/extraction action
+        :param docs_token_handling: 'chunk' means fill context with top_k_docs (limited by max_input_tokens or model_max_len) chunks for query
+                                                                         or top_k_docs original document chunks summarization
+                                    None or 'split_or_merge' means same as 'chunk' for query, while for summarization merges documents to fill up to max_input_tokens or model_max_len tokens
+        :param docs_joiner: string to join lists of text when doing split_or_merge.  None means '\n\n'
+        :param hyde_level: HYDE level for HYDE approach (https://arxiv.org/abs/2212.10496)
+                     0: No HYDE
+                     1: Use non-document-based LLM response and original query for embedding query
+                     2: Use document-based LLM response and original query for embedding query
+                     3+: Continue iterations of embedding prior answer and getting new response
+        :param hyde_template:
+                     None, 'None', 'auto' uses internal value and enable
+                     '{query}' is minimal template one can pass
+        :param hyde_show_only_final: See h2oGPT server docs
+        :param doc_json_mode: whether to give JSON to LLM and get JSON response back
+        :param chatbot_role: See h2oGPT server docs
+        :param speaker: See h2oGPT server docs
+        :param tts_language: See h2oGPT server docs
+        :param tts_speed: See h2oGPT server docs
+        """
+        args = locals().copy()
+        args["prompt_type"] = prompt_type.value  # convert to serializable type
+        args["langchain_mode"] = langchain_mode.value  # convert to serializable type
+        params = _to_h2ogpt_params({**_DEFAULT_PARAMETERS, **args})
+        params["instruction_nochat"] = None  # future prompt
+        params["h2ogpt_key"] = self._client.h2ogpt_key
+        return TextCompletion(self._client, params)
+class TextCompletion(_Completion):
+    """Text completion."""
+    async def complete(
+        self, prompt: str, enable_streaming: bool = False
+    ) -> Union[str, AsyncGenerator[str, None]]:
+        """
+        Complete this text completion.
+        :param prompt: text prompt to generate completion for
+        :param enable_streaming: whether to enable or disable streaming the response
+        :return: response from the model
+        """
+        if enable_streaming:
+            params = self._get_parameters(prompt)
+            params["stream_output"] = True
+            return self._submit_and_stream(prompt)
+        else:
+            return await self._submit(prompt)
+    def complete_sync(
+        self, prompt: str, enable_streaming: bool = False
+    ) -> Union[str, Generator[str, None, None]]:
+        """
+        Complete this text completion synchronously.
+        :param prompt: text prompt to generate completion for
+        :param enable_streaming: whether to enable or disable streaming the response
+        :return: response from the model
+        """
+        if enable_streaming:
+            params = self._get_parameters(prompt)
+            params["stream_output"] = True
+            return self._predict_and_stream(prompt)
+        else:
+            return self._predict(prompt)
+class ChatCompletionCreator:
+    """Chat completion."""
+    def __init__(self, client: GradioClientWrapper):
+        self._client = client
+    def create(
+        self,
+        model: Union[None, Model, str] = None,
+        prompt_type: PromptType = PromptType.plain,
+        input_context_for_instruction: str = "",
+        enable_sampler=False,
+        temperature: float = 0.1,
+        top_p: float = 1.0,
+        top_k: int = 40,
+        penalty_alpha: float = 0.0,
+        beams: float = 1.0,
+        early_stopping: bool = False,
+        min_output_length: int = 0,
+        max_output_length: int = 1024,
+        max_time: int = 360,
+        repetition_penalty: float = 1.07,
+        number_returns: int = 1,
+        system_pre_context: str = "",
+        langchain_mode: LangChainMode = LangChainMode.DISABLED,
+        system_prompt: str = "",
+        add_search_to_context: bool = False,
+        text_context_list: List[str] = [],
+        docs_ordering_type: str = "reverse_ucurve_sort",
+        min_max_new_tokens: int = 256,
+        max_input_tokens: int = -1,
+        max_total_input_tokens: int = -1,
+        docs_token_handling: str = "split_or_merge",
+        docs_joiner: str = "\n\n",
+        hyde_level: int = 0,
+        hyde_template: Optional[str] = None,
+        hyde_show_only_final: bool = False,
+        doc_json_mode: bool = False,
+        chatbot_role="None",
+        speaker="None",
+        tts_language="autodetect",
+        tts_speed=1.0,
+    ) -> "ChatCompletion":
+        """
+        Creates a new chat completion.
+        :param model: model to be used, `None` means used the default model.
+        :param prompt_type: type of the prompt
+        :param input_context_for_instruction: input context for instruction
+        :param enable_sampler: enable or disable the sampler, required for use of
+                temperature, top_p, top_k
+        :param temperature: What sampling temperature to use, between 0 and 3.
+                Lower values will make it more focused and deterministic, but may lead
+                to repeat. Higher values will make the output more creative, but may
+                lead to hallucinations.
+        :param top_p: cumulative probability of tokens to sample from
+        :param top_k: number of tokens to sample from
+        :param penalty_alpha: >0 and top_k>1 enable contrastive search (not all models support)
+        :param beams: Number of searches for optimal overall probability.
+                Higher values uses more GPU memory and compute.
+        :param early_stopping: whether to stop early or not in beam search
+        :param min_output_length: minimum output length
+        :param max_output_length: maximum output length
+        :param max_time: maximum time to search optimal output
+        :param repetition_penalty: penalty for repetition
+        :param number_returns:
+        :param system_pre_context: directly pre-appended without prompt processing
+        :param langchain_mode: LangChain mode
+        :param system_prompt: Universal system prompt to override prompt_type's system
+                              prompt
+        :param add_search_to_context: Whether to add web search of query to context
+        :param text_context_list: list of strings to use as context (up to allowed max_seq_len of model)
+        :param docs_ordering_type: By default uses 'reverse_ucurve_sort' for optimal retrieval
+        :param min_max_new_tokens: minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
+        :param max_input_tokens: Max input tokens to place into model context for each LLM call
+                                 -1 means auto, fully fill context for query, and fill by original document chunk for summarization
+                                 >=0 means use that to limit context filling to that many tokens
+        :param max_total_input_tokens: like max_input_tokens but instead of per LLM call, applies across all LLM calls for single summarization/extraction action
+        :param docs_token_handling: 'chunk' means fill context with top_k_docs (limited by max_input_tokens or model_max_len) chunks for query
+                                                                         or top_k_docs original document chunks summarization
+                                    None or 'split_or_merge' means same as 'chunk' for query, while for summarization merges documents to fill up to max_input_tokens or model_max_len tokens
+        :param docs_joiner: string to join lists of text when doing split_or_merge.  None means '\n\n'
+        :param hyde_level: HYDE level for HYDE approach (https://arxiv.org/abs/2212.10496)
+                     0: No HYDE
+                     1: Use non-document-based LLM response and original query for embedding query
+                     2: Use document-based LLM response and original query for embedding query
+                     3+: Continue iterations of embedding prior answer and getting new response
+        :param hyde_template:
+                     None, 'None', 'auto' uses internal value and enable
+                     '{query}' is minimal template one can pass
+        :param hyde_show_only_final: See h2oGPT server docs
+        :param doc_json_mode: whether to give JSON to LLM and get JSON response back
+        :param chatbot_role: See h2oGPT server docs
+        :param speaker: See h2oGPT server docs
+        :param tts_language: See h2oGPT server docs
+        :param tts_speed: See h2oGPT server docs
+        """
+        args = locals().copy()
+        args["prompt_type"] = prompt_type.value  # convert to serializable type
+        args["langchain_mode"] = langchain_mode.value  # convert to serializable type
+        params = _to_h2ogpt_params({**_DEFAULT_PARAMETERS, **args})
+        params["instruction_nochat"] = None  # future prompts
+        params["add_chat_history_to_context"] = True
+        params["h2ogpt_key"] = self._client.h2ogpt_key
+        params["chat_conversation"] = []  # chat history (FIXME: Only works if 1 model?)
+        return ChatCompletion(self._client, params)
+class ChatCompletion(_Completion):
+    """Chat completion."""
+    def _update_history(self, prompt: str, reply: str) -> None:
+        self._parameters["chat_conversation"].append((prompt, reply))
+    async def chat(self, prompt: str) -> Dict[str, str]:
+        """
+        Complete this chat completion.
+        :param prompt: text prompt to generate completions for
+        :returns chat reply
+        """
+        reply = await self._submit(prompt)
+        self._update_history(prompt, reply)
+        return {"user": prompt, "gpt": reply}
+    def chat_sync(self, prompt: str) -> Dict[str, str]:
+        """
+        Complete this chat completion.
+        :param prompt: text prompt to generate completions for
+        :returns chat reply
+        """
+        reply = self._predict(prompt)
+        self._update_history(prompt, reply)
+        return {"user": prompt, "gpt": reply}
+    def chat_history(self) -> List[Dict[str, str]]:
+        """Returns the full chat history."""
+        return [
+            {"user": i[0], "gpt": i[1]} for i in self._parameters["chat_conversation"]
+        ]

client/h2ogpt_client/_core.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from typing import Optional
+from h2ogpt_client._completion import ChatCompletionCreator, TextCompletionCreator
+from h2ogpt_client._gradio_client import GradioClientWrapper
+from h2ogpt_client._models import Models
+from h2ogpt_client._server import Server
+class Client:
+    """h2oGPT Client."""
+    def __init__(
+        self,
+        src: str,
+        h2ogpt_key: Optional[str] = None,
+        huggingface_token: Optional[str] = None,
+    ):
+        """
+        Creates a GPT client.
+        :param src: either the full URL to the hosted h2oGPT
+            (e.g. "http://0.0.0.0:7860", "https://fc752f297207f01c32.gradio.live")
+            or name of the Hugging Face Space to load, (e.g. "h2oai/h2ogpt-chatbot")
+        :param h2ogpt_key: access key to connect with a h2oGPT server
+        :param huggingface_token: Hugging Face token to use to access private Spaces
+        """
+        self._client = GradioClientWrapper(src, h2ogpt_key, huggingface_token)
+        self._text_completion = TextCompletionCreator(self._client)
+        self._chat_completion = ChatCompletionCreator(self._client)
+        self._models = Models(self._client)
+        self._server = Server(self._client)
+    @property
+    def text_completion(self) -> TextCompletionCreator:
+        """Text completion."""
+        return self._text_completion
+    @property
+    def chat_completion(self) -> ChatCompletionCreator:
+        """Chat completion."""
+        return self._chat_completion
+    @property
+    def models(self) -> Models:
+        """LL models."""
+        return self._models
+    @property
+    def server(self) -> Server:
+        """h2oGPT server."""
+        return self._server

client/h2ogpt_client/_gradio_client.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import asyncio
+import time
+from typing import Any, AsyncGenerator, Generator, List, Optional
+import gradio_client  # type: ignore
+class GradioClientWrapper:
+    def __init__(
+        self,
+        src: str,
+        h2ogpt_key: Optional[str] = None,
+        huggingface_token: Optional[str] = None,
+    ):
+        self._client = gradio_client.Client(
+            src=src, hf_token=huggingface_token, serialize=False, verbose=False
+        )
+        self.h2ogpt_key = h2ogpt_key
+    def predict(self, *args, api_name: str) -> Any:
+        return self._client.predict(*args, api_name=api_name)
+    def predict_and_stream(self, *args, api_name: str) -> Generator[str, None, None]:
+        job = self._client.submit(*args, api_name=api_name)
+        while not job.done():
+            outputs: List[str] = job.outputs()
+            if not len(outputs):
+                time.sleep(0.1)
+                continue
+            newest_response = outputs[-1]
+            yield newest_response
+        e = job.exception()
+        if e and isinstance(e, BaseException):
+            raise RuntimeError from e
+    async def submit(self, *args, api_name: str) -> Any:
+        return await asyncio.wrap_future(self._client.submit(*args, api_name=api_name))
+    async def submit_and_stream(
+        self, *args, api_name: str
+    ) -> AsyncGenerator[Any, None]:
+        job = self._client.submit(*args, api_name=api_name)
+        while not job.done():
+            outputs: List[str] = job.outputs()
+            if not len(outputs):
+                await asyncio.sleep(0.1)
+                continue
+            newest_response = outputs[-1]
+            yield newest_response
+        e = job.exception()
+        if e and isinstance(e, BaseException):
+            raise RuntimeError from e

client/h2ogpt_client/_models.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import ast
+from typing import Any, Dict, List
+from h2ogpt_client._gradio_client import GradioClientWrapper
+class Model:
+    """Large language model in the h2oGPT server."""
+    def __init__(self, raw_info: Dict[str, Any]):
+        self._name = raw_info["base_model"]
+        self._raw_info = raw_info
+    @property
+    def name(self) -> str:
+        """Name of the model."""
+        return self._name
+    def __repr__(self) -> str:
+        return self.name.__repr__()
+    def __str__(self) -> str:
+        return self.name.__str__()
+class Models:
+    """Interact with LL Models in h2oGPT."""
+    def __init__(self, client: GradioClientWrapper):
+        self._client = client
+    def list(self) -> List[Model]:
+        """List all models available in the h2oGPT server."""
+        models = ast.literal_eval(self._client.predict(api_name="/model_names"))
+        return [Model(m) for m in models]

client/h2ogpt_client/_server.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from h2ogpt_client._gradio_client import GradioClientWrapper
+class Server:
+    """h2oGPT server."""
+    def __init__(self, client: GradioClientWrapper):
+        self._client = client
+    @property
+    def address(self) -> str:
+        """h2oGPT server address."""
+        return self._client._client.src
+    @property
+    def hash(self) -> str:
+        """h2oGPT server system hash."""
+        return str(self._client.predict(api_name="/system_hash"))

client/poetry.lock ADDED Viewed

	@@ -0,0 +1,856 @@

+# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
+[[package]]
+name = "anyio"
+version = "3.6.2"
+description = "High level compatibility layer for multiple asynchronous event loop implementations"
+optional = false
+python-versions = ">=3.6.2"
+files = [
+    {file = "anyio-3.6.2-py3-none-any.whl", hash = "sha256:fbbe32bd270d2a2ef3ed1c5d45041250284e31fc0a4df4a5a6071842051a51e3"},
+    {file = "anyio-3.6.2.tar.gz", hash = "sha256:25ea0d673ae30af41a0c442f81cf3b38c7e79fdc7b60335a4c14e05eb0947421"},
+]
+[package.dependencies]
+idna = ">=2.8"
+sniffio = ">=1.1"
+[package.extras]
+doc = ["packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
+test = ["contextlib2", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (<0.15)", "uvloop (>=0.15)"]
+trio = ["trio (>=0.16,<0.22)"]
+[[package]]
+name = "attrs"
+version = "23.1.0"
+description = "Classes Without Boilerplate"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"},
+    {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"},
+]
+[package.extras]
+cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
+dev = ["attrs[docs,tests]", "pre-commit"]
+docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"]
+tests = ["attrs[tests-no-zope]", "zope-interface"]
+tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
+[[package]]
+name = "black"
+version = "23.3.0"
+description = "The uncompromising code formatter."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "black-23.3.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915"},
+    {file = "black-23.3.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9"},
+    {file = "black-23.3.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2"},
+    {file = "black-23.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c"},
+    {file = "black-23.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c"},
+    {file = "black-23.3.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6"},
+    {file = "black-23.3.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b"},
+    {file = "black-23.3.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d"},
+    {file = "black-23.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70"},
+    {file = "black-23.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326"},
+    {file = "black-23.3.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b"},
+    {file = "black-23.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2"},
+    {file = "black-23.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925"},
+    {file = "black-23.3.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27"},
+    {file = "black-23.3.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331"},
+    {file = "black-23.3.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5"},
+    {file = "black-23.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961"},
+    {file = "black-23.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8"},
+    {file = "black-23.3.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30"},
+    {file = "black-23.3.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3"},
+    {file = "black-23.3.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266"},
+    {file = "black-23.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab"},
+    {file = "black-23.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb"},
+    {file = "black-23.3.0-py3-none-any.whl", hash = "sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4"},
+    {file = "black-23.3.0.tar.gz", hash = "sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940"},
+]
+[package.dependencies]
+click = ">=8.0.0"
+mypy-extensions = ">=0.4.3"
+packaging = ">=22.0"
+pathspec = ">=0.9.0"
+platformdirs = ">=2"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
+[package.extras]
+colorama = ["colorama (>=0.4.3)"]
+d = ["aiohttp (>=3.7.4)"]
+jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
+uvloop = ["uvloop (>=0.15.2)"]
+[[package]]
+name = "certifi"
+version = "2023.5.7"
+description = "Python package for providing Mozilla's CA Bundle."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "certifi-2023.5.7-py3-none-any.whl", hash = "sha256:c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716"},
+    {file = "certifi-2023.5.7.tar.gz", hash = "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7"},
+]
+[[package]]
+name = "charset-normalizer"
+version = "3.1.0"
+description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
+optional = false
+python-versions = ">=3.7.0"
+files = [
+    {file = "charset-normalizer-3.1.0.tar.gz", hash = "sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:04eefcee095f58eaabe6dc3cc2262f3bcd776d2c67005880894f447b3f2cb9c1"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20064ead0717cf9a73a6d1e779b23d149b53daf971169289ed2ed43a71e8d3b0"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1435ae15108b1cb6fffbcea2af3d468683b7afed0169ad718451f8db5d1aff6f"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c84132a54c750fda57729d1e2599bb598f5fa0344085dbde5003ba429a4798c0"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f2568b4189dda1c567339b48cba4ac7384accb9c2a7ed655cd86b04055c795"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11d3bcb7be35e7b1bba2c23beedac81ee893ac9871d0ba79effc7fc01167db6c"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:891cf9b48776b5c61c700b55a598621fdb7b1e301a550365571e9624f270c203"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5f008525e02908b20e04707a4f704cd286d94718f48bb33edddc7d7b584dddc1"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:b06f0d3bf045158d2fb8837c5785fe9ff9b8c93358be64461a1089f5da983137"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:49919f8400b5e49e961f320c735388ee686a62327e773fa5b3ce6721f7e785ce"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:22908891a380d50738e1f978667536f6c6b526a2064156203d418f4856d6e86a"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-win32.whl", hash = "sha256:12d1a39aa6b8c6f6248bb54550efcc1c38ce0d8096a146638fd4738e42284448"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:65ed923f84a6844de5fd29726b888e58c62820e0769b76565480e1fdc3d062f8"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9a3267620866c9d17b959a84dd0bd2d45719b817245e49371ead79ed4f710d19"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6734e606355834f13445b6adc38b53c0fd45f1a56a9ba06c2058f86893ae8017"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f8303414c7b03f794347ad062c0516cee0e15f7a612abd0ce1e25caf6ceb47df"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf53a6cebad0eae578f062c7d462155eada9c172bd8c4d250b8c1d8eb7f916a"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dc5b6a8ecfdc5748a7e429782598e4f17ef378e3e272eeb1340ea57c9109f41"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1b25e3ad6c909f398df8921780d6a3d120d8c09466720226fc621605b6f92b1"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b82fab78e0b1329e183a65260581de4375f619167478dddab510c6c6fb04d9b6"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bd7163182133c0c7701b25e604cf1611c0d87712e56e88e7ee5d72deab3e76b5"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:11d117e6c63e8f495412d37e7dc2e2fff09c34b2d09dbe2bee3c6229577818be"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:cf6511efa4801b9b38dc5546d7547d5b5c6ef4b081c60b23e4d941d0eba9cbeb"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:abc1185d79f47c0a7aaf7e2412a0eb2c03b724581139193d2d82b3ad8cbb00ac"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cb7b2ab0188829593b9de646545175547a70d9a6e2b63bf2cd87a0a391599324"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-win32.whl", hash = "sha256:c36bcbc0d5174a80d6cccf43a0ecaca44e81d25be4b7f90f0ed7bcfbb5a00909"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:cca4def576f47a09a943666b8f829606bcb17e2bc2d5911a46c8f8da45f56755"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0c95f12b74681e9ae127728f7e5409cbbef9cd914d5896ef238cc779b8152373"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fca62a8301b605b954ad2e9c3666f9d97f63872aa4efcae5492baca2056b74ab"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac0aa6cd53ab9a31d397f8303f92c42f534693528fafbdb997c82bae6e477ad9"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3af8e0f07399d3176b179f2e2634c3ce9c1301379a6b8c9c9aeecd481da494f"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a5fc78f9e3f501a1614a98f7c54d3969f3ad9bba8ba3d9b438c3bc5d047dd28"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:628c985afb2c7d27a4800bfb609e03985aaecb42f955049957814e0491d4006d"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:74db0052d985cf37fa111828d0dd230776ac99c740e1a758ad99094be4f1803d"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e8fcdd8f672a1c4fc8d0bd3a2b576b152d2a349782d1eb0f6b8e52e9954731d"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:04afa6387e2b282cf78ff3dbce20f0cc071c12dc8f685bd40960cc68644cfea6"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:dd5653e67b149503c68c4018bf07e42eeed6b4e956b24c00ccdf93ac79cdff84"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d2686f91611f9e17f4548dbf050e75b079bbc2a82be565832bc8ea9047b61c8c"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-win32.whl", hash = "sha256:4155b51ae05ed47199dc5b2a4e62abccb274cee6b01da5b895099b61b1982974"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:322102cdf1ab682ecc7d9b1c5eed4ec59657a65e1c146a0da342b78f4112db23"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e633940f28c1e913615fd624fcdd72fdba807bf53ea6925d6a588e84e1151531"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3a06f32c9634a8705f4ca9946d667609f52cf130d5548881401f1eb2c39b1e2c"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7381c66e0561c5757ffe616af869b916c8b4e42b367ab29fedc98481d1e74e14"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3573d376454d956553c356df45bb824262c397c6e26ce43e8203c4c540ee0acb"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e89df2958e5159b811af9ff0f92614dabf4ff617c03a4c1c6ff53bf1c399e0e1"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78cacd03e79d009d95635e7d6ff12c21eb89b894c354bd2b2ed0b4763373693b"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de5695a6f1d8340b12a5d6d4484290ee74d61e467c39ff03b39e30df62cf83a0"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c60b9c202d00052183c9be85e5eaf18a4ada0a47d188a83c8f5c5b23252f649"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f645caaf0008bacf349875a974220f1f1da349c5dbe7c4ec93048cdc785a3326"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ea9f9c6034ea2d93d9147818f17c2a0860d41b71c38b9ce4d55f21b6f9165a11"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:80d1543d58bd3d6c271b66abf454d437a438dff01c3e62fdbcd68f2a11310d4b"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:73dc03a6a7e30b7edc5b01b601e53e7fc924b04e1835e8e407c12c037e81adbd"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6f5c2e7bc8a4bf7c426599765b1bd33217ec84023033672c1e9a8b35eaeaaaf8"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-win32.whl", hash = "sha256:12a2b561af122e3d94cdb97fe6fb2bb2b82cef0cdca131646fdb940a1eda04f0"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3160a0fd9754aab7d47f95a6b63ab355388d890163eb03b2d2b87ab0a30cfa59"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38e812a197bf8e71a59fe55b757a84c1f946d0ac114acafaafaf21667a7e169e"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6baf0baf0d5d265fa7944feb9f7451cc316bfe30e8df1a61b1bb08577c554f31"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f25e17ab3039b05f762b0a55ae0b3632b2e073d9c8fc88e89aca31a6198e88f"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3747443b6a904001473370d7810aa19c3a180ccd52a7157aacc264a5ac79265e"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b116502087ce8a6b7a5f1814568ccbd0e9f6cfd99948aa59b0e241dc57cf739f"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d16fd5252f883eb074ca55cb622bc0bee49b979ae4e8639fff6ca3ff44f9f854"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f6c7a8a57e9405cad7485f4c9d3172ae486cfef1344b5ddd8e5239582d7355e"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ac3775e3311661d4adace3697a52ac0bab17edd166087d493b52d4f4f553f9f0"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:10c93628d7497c81686e8e5e557aafa78f230cd9e77dd0c40032ef90c18f2230"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:6f4f4668e1831850ebcc2fd0b1cd11721947b6dc7c00bf1c6bd3c929ae14f2c7"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0be65ccf618c1e7ac9b849c315cc2e8a8751d9cfdaa43027d4f6624bd587ab7e"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:53d0a3fa5f8af98a1e261de6a3943ca631c526635eb5817a87a59d9a57ebf48f"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-win32.whl", hash = "sha256:a04f86f41a8916fe45ac5024ec477f41f886b3c435da2d4e3d2709b22ab02af1"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b"},
+    {file = "charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d"},
+]
+[[package]]
+name = "click"
+version = "8.1.3"
+description = "Composable command line interface toolkit"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
+    {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
+]
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+[[package]]
+name = "colorama"
+version = "0.4.6"
+description = "Cross-platform colored terminal text."
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+files = [
+    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
+    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
+]
+[[package]]
+name = "exceptiongroup"
+version = "1.1.1"
+description = "Backport of PEP 654 (exception groups)"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"},
+    {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"},
+]
+[package.extras]
+test = ["pytest (>=6)"]
+[[package]]
+name = "filelock"
+version = "3.12.0"
+description = "A platform independent file lock."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "filelock-3.12.0-py3-none-any.whl", hash = "sha256:ad98852315c2ab702aeb628412cbf7e95b7ce8c3bf9565670b4eaecf1db370a9"},
+    {file = "filelock-3.12.0.tar.gz", hash = "sha256:fc03ae43288c013d2ea83c8597001b1129db351aad9c57fe2409327916b8e718"},
+]
+[package.extras]
+docs = ["furo (>=2023.3.27)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"]
+testing = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "diff-cover (>=7.5)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)", "pytest-timeout (>=2.1)"]
+[[package]]
+name = "flake8"
+version = "5.0.4"
+description = "the modular source code checker: pep8 pyflakes and co"
+optional = false
+python-versions = ">=3.6.1"
+files = [
+    {file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"},
+    {file = "flake8-5.0.4.tar.gz", hash = "sha256:6fbe320aad8d6b95cec8b8e47bc933004678dc63095be98528b7bdd2a9f510db"},
+]
+[package.dependencies]
+mccabe = ">=0.7.0,<0.8.0"
+pycodestyle = ">=2.9.0,<2.10.0"
+pyflakes = ">=2.5.0,<2.6.0"
+[[package]]
+name = "flake8-pyproject"
+version = "1.2.3"
+description = "Flake8 plug-in loading the configuration from pyproject.toml"
+optional = false
+python-versions = ">= 3.6"
+files = [
+    {file = "flake8_pyproject-1.2.3-py3-none-any.whl", hash = "sha256:6249fe53545205af5e76837644dc80b4c10037e73a0e5db87ff562d75fb5bd4a"},
+]
+[package.dependencies]
+Flake8 = ">=5"
+TOMLi = {version = "*", markers = "python_version < \"3.11\""}
+[package.extras]
+dev = ["pyTest", "pyTest-cov"]
+[[package]]
+name = "fsspec"
+version = "2023.5.0"
+description = "File-system specification"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "fsspec-2023.5.0-py3-none-any.whl", hash = "sha256:51a4ad01a5bb66fcc58036e288c0d53d3975a0df2a5dc59a93b59bade0391f2a"},
+    {file = "fsspec-2023.5.0.tar.gz", hash = "sha256:b3b56e00fb93ea321bc9e5d9cf6f8522a0198b20eb24e02774d329e9c6fb84ce"},
+]
+[package.extras]
+abfs = ["adlfs"]
+adl = ["adlfs"]
+arrow = ["pyarrow (>=1)"]
+dask = ["dask", "distributed"]
+devel = ["pytest", "pytest-cov"]
+dropbox = ["dropbox", "dropboxdrivefs", "requests"]
+full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
+fuse = ["fusepy"]
+gcs = ["gcsfs"]
+git = ["pygit2"]
+github = ["requests"]
+gs = ["gcsfs"]
+gui = ["panel"]
+hdfs = ["pyarrow (>=1)"]
+http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"]
+libarchive = ["libarchive-c"]
+oci = ["ocifs"]
+s3 = ["s3fs"]
+sftp = ["paramiko"]
+smb = ["smbprotocol"]
+ssh = ["paramiko"]
+tqdm = ["tqdm"]
+[[package]]
+name = "gradio-client"
+version = "0.6.1"
+description = "Python library for easily interacting with trained machine learning models"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "gradio_client-0.6.1-py3-none-any.whl", hash = "sha256:2f36a8467622f748539c84d142f1a71df6b83666d017ee4fb6ef8a2696f70f66"},
+    {file = "gradio_client-0.6.1.tar.gz", hash = "sha256:2f36a8467622f748539c84d142f1a71df6b83666d017ee4fb6ef8a2696f70f66"},
+]
+[package.dependencies]
+fsspec = "*"
+httpx = "*"
+huggingface-hub = ">=0.13.0"
+packaging = "*"
+requests = ">=2.0,<3.0"
+typing-extensions = ">=4.0,<5.0"
+websockets = ">=10.0,<12.0"
+[[package]]
+name = "h11"
+version = "0.14.0"
+description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
+    {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
+]
+[[package]]
+name = "httpcore"
+version = "0.17.0"
+description = "A minimal low-level HTTP client."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "httpcore-0.17.0-py3-none-any.whl", hash = "sha256:0fdfea45e94f0c9fd96eab9286077f9ff788dd186635ae61b312693e4d943599"},
+    {file = "httpcore-0.17.0.tar.gz", hash = "sha256:cc045a3241afbf60ce056202301b4d8b6af08845e3294055eb26b09913ef903c"},
+]
+[package.dependencies]
+anyio = ">=3.0,<5.0"
+certifi = "*"
+h11 = ">=0.13,<0.15"
+sniffio = "==1.*"
+[package.extras]
+http2 = ["h2 (>=3,<5)"]
+socks = ["socksio (==1.*)"]
+[[package]]
+name = "httpx"
+version = "0.24.0"
+description = "The next generation HTTP client."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "httpx-0.24.0-py3-none-any.whl", hash = "sha256:447556b50c1921c351ea54b4fe79d91b724ed2b027462ab9a329465d147d5a4e"},
+    {file = "httpx-0.24.0.tar.gz", hash = "sha256:507d676fc3e26110d41df7d35ebd8b3b8585052450f4097401c9be59d928c63e"},
+]
+[package.dependencies]
+certifi = "*"
+httpcore = ">=0.15.0,<0.18.0"
+idna = "*"
+sniffio = "*"
+[package.extras]
+brotli = ["brotli", "brotlicffi"]
+cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
+http2 = ["h2 (>=3,<5)"]
+socks = ["socksio (==1.*)"]
+[[package]]
+name = "huggingface-hub"
+version = "0.16.4"
+description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
+optional = false
+python-versions = ">=3.7.0"
+files = [
+    {file = "huggingface_hub-0.16.4-py3-none-any.whl", hash = "sha256:608c7d4f3d368b326d1747f91523dbd1f692871e8e2e7a4750314a2dd8b63e14"},
+    {file = "huggingface_hub-0.16.4.tar.gz", hash = "sha256:608c7d4f3d368b326d1747f91523dbd1f692871e8e2e7a4750314a2dd8b63e14"},
+]
+[package.dependencies]
+filelock = "*"
+fsspec = "*"
+packaging = ">=20.9"
+pyyaml = ">=5.1"
+requests = "*"
+tqdm = ">=4.42.1"
+typing-extensions = ">=3.7.4.3"
+[package.extras]
+all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
+cli = ["InquirerPy (==0.3.4)"]
+dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
+fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
+quality = ["black (>=23.1,<24.0)", "mypy (==0.982)", "ruff (>=0.0.241)"]
+tensorflow = ["graphviz", "pydot", "tensorflow"]
+testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "gradio", "jedi", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "soundfile"]
+torch = ["torch"]
+typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
+[[package]]
+name = "idna"
+version = "3.4"
+description = "Internationalized Domain Names in Applications (IDNA)"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
+    {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
+]
+[[package]]
+name = "iniconfig"
+version = "2.0.0"
+description = "brain-dead simple config-ini parsing"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
+    {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
+]
+[[package]]
+name = "isort"
+version = "5.12.0"
+description = "A Python utility / library to sort Python imports."
+optional = false
+python-versions = ">=3.8.0"
+files = [
+    {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"},
+    {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"},
+]
+[package.extras]
+colors = ["colorama (>=0.4.3)"]
+pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"]
+plugins = ["setuptools"]
+requirements-deprecated-finder = ["pip-api", "pipreqs"]
+[[package]]
+name = "mccabe"
+version = "0.7.0"
+description = "McCabe checker, plugin for flake8"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
+    {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
+]
+[[package]]
+name = "mypy"
+version = "1.3.0"
+description = "Optional static typing for Python"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "mypy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eb485cea53f4f5284e5baf92902cd0088b24984f4209e25981cc359d64448d"},
+    {file = "mypy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4c99c3ecf223cf2952638da9cd82793d8f3c0c5fa8b6ae2b2d9ed1e1ff51ba85"},
+    {file = "mypy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:550a8b3a19bb6589679a7c3c31f64312e7ff482a816c96e0cecec9ad3a7564dd"},
+    {file = "mypy-1.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cbc07246253b9e3d7d74c9ff948cd0fd7a71afcc2b77c7f0a59c26e9395cb152"},
+    {file = "mypy-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:a22435632710a4fcf8acf86cbd0d69f68ac389a3892cb23fbad176d1cddaf228"},
+    {file = "mypy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6e33bb8b2613614a33dff70565f4c803f889ebd2f859466e42b46e1df76018dd"},
+    {file = "mypy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7d23370d2a6b7a71dc65d1266f9a34e4cde9e8e21511322415db4b26f46f6b8c"},
+    {file = "mypy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:658fe7b674769a0770d4b26cb4d6f005e88a442fe82446f020be8e5f5efb2fae"},
+    {file = "mypy-1.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6e42d29e324cdda61daaec2336c42512e59c7c375340bd202efa1fe0f7b8f8ca"},
+    {file = "mypy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:d0b6c62206e04061e27009481cb0ec966f7d6172b5b936f3ead3d74f29fe3dcf"},
+    {file = "mypy-1.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:76ec771e2342f1b558c36d49900dfe81d140361dd0d2df6cd71b3db1be155409"},
+    {file = "mypy-1.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebc95f8386314272bbc817026f8ce8f4f0d2ef7ae44f947c4664efac9adec929"},
+    {file = "mypy-1.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:faff86aa10c1aa4a10e1a301de160f3d8fc8703b88c7e98de46b531ff1276a9a"},
+    {file = "mypy-1.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8c5979d0deb27e0f4479bee18ea0f83732a893e81b78e62e2dda3e7e518c92ee"},
+    {file = "mypy-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c5d2cc54175bab47011b09688b418db71403aefad07cbcd62d44010543fc143f"},
+    {file = "mypy-1.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:87df44954c31d86df96c8bd6e80dfcd773473e877ac6176a8e29898bfb3501cb"},
+    {file = "mypy-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:473117e310febe632ddf10e745a355714e771ffe534f06db40702775056614c4"},
+    {file = "mypy-1.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:74bc9b6e0e79808bf8678d7678b2ae3736ea72d56eede3820bd3849823e7f305"},
+    {file = "mypy-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:44797d031a41516fcf5cbfa652265bb994e53e51994c1bd649ffcd0c3a7eccbf"},
+    {file = "mypy-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ddae0f39ca146972ff6bb4399f3b2943884a774b8771ea0a8f50e971f5ea5ba8"},
+    {file = "mypy-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1c4c42c60a8103ead4c1c060ac3cdd3ff01e18fddce6f1016e08939647a0e703"},
+    {file = "mypy-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e86c2c6852f62f8f2b24cb7a613ebe8e0c7dc1402c61d36a609174f63e0ff017"},
+    {file = "mypy-1.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f9dca1e257d4cc129517779226753dbefb4f2266c4eaad610fc15c6a7e14283e"},
+    {file = "mypy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:95d8d31a7713510685b05fbb18d6ac287a56c8f6554d88c19e73f724a445448a"},
+    {file = "mypy-1.3.0-py3-none-any.whl", hash = "sha256:a8763e72d5d9574d45ce5881962bc8e9046bf7b375b0abf031f3e6811732a897"},
+    {file = "mypy-1.3.0.tar.gz", hash = "sha256:e1f4d16e296f5135624b34e8fb741eb0eadedca90862405b1f1fde2040b9bd11"},
+]
+[package.dependencies]
+mypy-extensions = ">=1.0.0"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = ">=3.10"
+[package.extras]
+dmypy = ["psutil (>=4.0)"]
+install-types = ["pip"]
+python2 = ["typed-ast (>=1.4.0,<2)"]
+reports = ["lxml"]
+[[package]]
+name = "mypy-extensions"
+version = "1.0.0"
+description = "Type system extensions for programs checked with the mypy type checker."
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
+    {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
+]
+[[package]]
+name = "packaging"
+version = "23.1"
+description = "Core utilities for Python packages"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"},
+    {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
+]
+[[package]]
+name = "pathspec"
+version = "0.11.1"
+description = "Utility library for gitignore style pattern matching of file paths."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "pathspec-0.11.1-py3-none-any.whl", hash = "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293"},
+    {file = "pathspec-0.11.1.tar.gz", hash = "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687"},
+]
+[[package]]
+name = "platformdirs"
+version = "3.5.0"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "platformdirs-3.5.0-py3-none-any.whl", hash = "sha256:47692bc24c1958e8b0f13dd727307cff1db103fca36399f457da8e05f222fdc4"},
+    {file = "platformdirs-3.5.0.tar.gz", hash = "sha256:7954a68d0ba23558d753f73437c55f89027cf8f5108c19844d4b82e5af396335"},
+]
+[package.extras]
+docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"]
+test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"]
+[[package]]
+name = "pluggy"
+version = "1.0.0"
+description = "plugin and hook calling mechanisms for python"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
+    {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
+]
+[package.extras]
+dev = ["pre-commit", "tox"]
+testing = ["pytest", "pytest-benchmark"]
+[[package]]
+name = "pycodestyle"
+version = "2.9.1"
+description = "Python style guide checker"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "pycodestyle-2.9.1-py2.py3-none-any.whl", hash = "sha256:d1735fc58b418fd7c5f658d28d943854f8a849b01a5d0a1e6f3f3fdd0166804b"},
+    {file = "pycodestyle-2.9.1.tar.gz", hash = "sha256:2c9607871d58c76354b697b42f5d57e1ada7d261c261efac224b664affdc5785"},
+]
+[[package]]
+name = "pyflakes"
+version = "2.5.0"
+description = "passive checker of Python programs"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"},
+    {file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"},
+]
+[[package]]
+name = "pytest"
+version = "7.2.2"
+description = "pytest: simple powerful testing with Python"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "pytest-7.2.2-py3-none-any.whl", hash = "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e"},
+    {file = "pytest-7.2.2.tar.gz", hash = "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4"},
+]
+[package.dependencies]
+attrs = ">=19.2.0"
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
+exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
+iniconfig = "*"
+packaging = "*"
+pluggy = ">=0.12,<2.0"
+tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
+[package.extras]
+testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
+[[package]]
+name = "pytest-asyncio"
+version = "0.21.0"
+description = "Pytest support for asyncio"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "pytest-asyncio-0.21.0.tar.gz", hash = "sha256:2b38a496aef56f56b0e87557ec313e11e1ab9276fc3863f6a7be0f1d0e415e1b"},
+    {file = "pytest_asyncio-0.21.0-py3-none-any.whl", hash = "sha256:f2b3366b7cd501a4056858bd39349d5af19742aed2d81660b7998b6341c7eb9c"},
+]
+[package.dependencies]
+pytest = ">=7.0.0"
+[package.extras]
+docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"]
+testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (>=0.931)", "pytest-trio (>=0.7.0)"]
+[[package]]
+name = "pyyaml"
+version = "6.0"
+description = "YAML parser and emitter for Python"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"},
+    {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"},
+    {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"},
+    {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"},
+    {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"},
+    {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"},
+    {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"},
+    {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"},
+    {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"},
+    {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"},
+    {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"},
+    {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"},
+    {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"},
+    {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"},
+    {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"},
+    {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"},
+    {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"},
+    {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"},
+    {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"},
+    {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"},
+    {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"},
+    {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"},
+    {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"},
+    {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"},
+    {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"},
+    {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"},
+    {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"},
+    {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"},
+    {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"},
+    {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"},
+    {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"},
+    {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"},
+    {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"},
+    {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"},
+    {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"},
+    {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"},
+    {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"},
+    {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"},
+    {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"},
+    {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
+]
+[[package]]
+name = "requests"
+version = "2.30.0"
+description = "Python HTTP for Humans."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "requests-2.30.0-py3-none-any.whl", hash = "sha256:10e94cc4f3121ee6da529d358cdaeaff2f1c409cd377dbc72b825852f2f7e294"},
+    {file = "requests-2.30.0.tar.gz", hash = "sha256:239d7d4458afcb28a692cdd298d87542235f4ca8d36d03a15bfc128a6559a2f4"},
+]
+[package.dependencies]
+certifi = ">=2017.4.17"
+charset-normalizer = ">=2,<4"
+idna = ">=2.5,<4"
+urllib3 = ">=1.21.1,<3"
+[package.extras]
+socks = ["PySocks (>=1.5.6,!=1.5.7)"]
+use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
+[[package]]
+name = "sniffio"
+version = "1.3.0"
+description = "Sniff out which async library your code is running under"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"},
+    {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"},
+]
+[[package]]
+name = "tomli"
+version = "2.0.1"
+description = "A lil' TOML parser"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
+    {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
+]
+[[package]]
+name = "tqdm"
+version = "4.65.0"
+description = "Fast, Extensible Progress Meter"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tqdm-4.65.0-py3-none-any.whl", hash = "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"},
+    {file = "tqdm-4.65.0.tar.gz", hash = "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5"},
+]
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+[package.extras]
+dev = ["py-make (>=0.1.0)", "twine", "wheel"]
+notebook = ["ipywidgets (>=6)"]
+slack = ["slack-sdk"]
+telegram = ["requests"]
+[[package]]
+name = "typing-extensions"
+version = "4.5.0"
+description = "Backported and Experimental Type Hints for Python 3.7+"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"},
+    {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"},
+]
+[[package]]
+name = "urllib3"
+version = "1.26.16"
+description = "HTTP library with thread-safe connection pooling, file post, and more."
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
+files = [
+    {file = "urllib3-1.26.16-py2.py3-none-any.whl", hash = "sha256:8d36afa7616d8ab714608411b4a3b13e58f463aee519024578e062e141dce20f"},
+    {file = "urllib3-1.26.16.tar.gz", hash = "sha256:8f135f6502756bde6b2a9b28989df5fbe87c9970cecaa69041edcce7f0589b14"},
+]
+[package.extras]
+brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
+secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
+socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
+[[package]]
+name = "websockets"
+version = "11.0.3"
+description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3ccc8a0c387629aec40f2fc9fdcb4b9d5431954f934da3eaf16cdc94f67dbfac"},
+    {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d67ac60a307f760c6e65dad586f556dde58e683fab03323221a4e530ead6f74d"},
+    {file = "websockets-11.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84d27a4832cc1a0ee07cdcf2b0629a8a72db73f4cf6de6f0904f6661227f256f"},
+    {file = "websockets-11.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffd7dcaf744f25f82190856bc26ed81721508fc5cbf2a330751e135ff1283564"},
+    {file = "websockets-11.0.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7622a89d696fc87af8e8d280d9b421db5133ef5b29d3f7a1ce9f1a7bf7fcfa11"},
+    {file = "websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bceab846bac555aff6427d060f2fcfff71042dba6f5fca7dc4f75cac815e57ca"},
+    {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:54c6e5b3d3a8936a4ab6870d46bdd6ec500ad62bde9e44462c32d18f1e9a8e54"},
+    {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:41f696ba95cd92dc047e46b41b26dd24518384749ed0d99bea0a941ca87404c4"},
+    {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:86d2a77fd490ae3ff6fae1c6ceaecad063d3cc2320b44377efdde79880e11526"},
+    {file = "websockets-11.0.3-cp310-cp310-win32.whl", hash = "sha256:2d903ad4419f5b472de90cd2d40384573b25da71e33519a67797de17ef849b69"},
+    {file = "websockets-11.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:1d2256283fa4b7f4c7d7d3e84dc2ece74d341bce57d5b9bf385df109c2a1a82f"},
+    {file = "websockets-11.0.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e848f46a58b9fcf3d06061d17be388caf70ea5b8cc3466251963c8345e13f7eb"},
+    {file = "websockets-11.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aa5003845cdd21ac0dc6c9bf661c5beddd01116f6eb9eb3c8e272353d45b3288"},
+    {file = "websockets-11.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b58cbf0697721120866820b89f93659abc31c1e876bf20d0b3d03cef14faf84d"},
+    {file = "websockets-11.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:660e2d9068d2bedc0912af508f30bbeb505bbbf9774d98def45f68278cea20d3"},
+    {file = "websockets-11.0.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c1f0524f203e3bd35149f12157438f406eff2e4fb30f71221c8a5eceb3617b6b"},
+    {file = "websockets-11.0.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:def07915168ac8f7853812cc593c71185a16216e9e4fa886358a17ed0fd9fcf6"},
+    {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b30c6590146e53149f04e85a6e4fcae068df4289e31e4aee1fdf56a0dead8f97"},
+    {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:619d9f06372b3a42bc29d0cd0354c9bb9fb39c2cbc1a9c5025b4538738dbffaf"},
+    {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:01f5567d9cf6f502d655151645d4e8b72b453413d3819d2b6f1185abc23e82dd"},
+    {file = "websockets-11.0.3-cp311-cp311-win32.whl", hash = "sha256:e1459677e5d12be8bbc7584c35b992eea142911a6236a3278b9b5ce3326f282c"},
+    {file = "websockets-11.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:e7837cb169eca3b3ae94cc5787c4fed99eef74c0ab9506756eea335e0d6f3ed8"},
+    {file = "websockets-11.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9f59a3c656fef341a99e3d63189852be7084c0e54b75734cde571182c087b152"},
+    {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2529338a6ff0eb0b50c7be33dc3d0e456381157a31eefc561771ee431134a97f"},
+    {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34fd59a4ac42dff6d4681d8843217137f6bc85ed29722f2f7222bd619d15e95b"},
+    {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:332d126167ddddec94597c2365537baf9ff62dfcc9db4266f263d455f2f031cb"},
+    {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:6505c1b31274723ccaf5f515c1824a4ad2f0d191cec942666b3d0f3aa4cb4007"},
+    {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f467ba0050b7de85016b43f5a22b46383ef004c4f672148a8abf32bc999a87f0"},
+    {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:9d9acd80072abcc98bd2c86c3c9cd4ac2347b5a5a0cae7ed5c0ee5675f86d9af"},
+    {file = "websockets-11.0.3-cp37-cp37m-win32.whl", hash = "sha256:e590228200fcfc7e9109509e4d9125eace2042fd52b595dd22bbc34bb282307f"},
+    {file = "websockets-11.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:b16fff62b45eccb9c7abb18e60e7e446998093cdcb50fed33134b9b6878836de"},
+    {file = "websockets-11.0.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fb06eea71a00a7af0ae6aefbb932fb8a7df3cb390cc217d51a9ad7343de1b8d0"},
+    {file = "websockets-11.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8a34e13a62a59c871064dfd8ffb150867e54291e46d4a7cf11d02c94a5275bae"},
+    {file = "websockets-11.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4841ed00f1026dfbced6fca7d963c4e7043aa832648671b5138008dc5a8f6d99"},
+    {file = "websockets-11.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a073fc9ab1c8aff37c99f11f1641e16da517770e31a37265d2755282a5d28aa"},
+    {file = "websockets-11.0.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:68b977f21ce443d6d378dbd5ca38621755f2063d6fdb3335bda981d552cfff86"},
+    {file = "websockets-11.0.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1a99a7a71631f0efe727c10edfba09ea6bee4166a6f9c19aafb6c0b5917d09c"},
+    {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:bee9fcb41db2a23bed96c6b6ead6489702c12334ea20a297aa095ce6d31370d0"},
+    {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4b253869ea05a5a073ebfdcb5cb3b0266a57c3764cf6fe114e4cd90f4bfa5f5e"},
+    {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:1553cb82942b2a74dd9b15a018dce645d4e68674de2ca31ff13ebc2d9f283788"},
+    {file = "websockets-11.0.3-cp38-cp38-win32.whl", hash = "sha256:f61bdb1df43dc9c131791fbc2355535f9024b9a04398d3bd0684fc16ab07df74"},
+    {file = "websockets-11.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:03aae4edc0b1c68498f41a6772d80ac7c1e33c06c6ffa2ac1c27a07653e79d6f"},
+    {file = "websockets-11.0.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:777354ee16f02f643a4c7f2b3eff8027a33c9861edc691a2003531f5da4f6bc8"},
+    {file = "websockets-11.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8c82f11964f010053e13daafdc7154ce7385ecc538989a354ccc7067fd7028fd"},
+    {file = "websockets-11.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3580dd9c1ad0701169e4d6fc41e878ffe05e6bdcaf3c412f9d559389d0c9e016"},
+    {file = "websockets-11.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f1a3f10f836fab6ca6efa97bb952300b20ae56b409414ca85bff2ad241d2a61"},
+    {file = "websockets-11.0.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df41b9bc27c2c25b486bae7cf42fccdc52ff181c8c387bfd026624a491c2671b"},
+    {file = "websockets-11.0.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:279e5de4671e79a9ac877427f4ac4ce93751b8823f276b681d04b2156713b9dd"},
+    {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1fdf26fa8a6a592f8f9235285b8affa72748dc12e964a5518c6c5e8f916716f7"},
+    {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:69269f3a0b472e91125b503d3c0b3566bda26da0a3261c49f0027eb6075086d1"},
+    {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:97b52894d948d2f6ea480171a27122d77af14ced35f62e5c892ca2fae9344311"},
+    {file = "websockets-11.0.3-cp39-cp39-win32.whl", hash = "sha256:c7f3cb904cce8e1be667c7e6fef4516b98d1a6a0635a58a57528d577ac18a128"},
+    {file = "websockets-11.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:c792ea4eabc0159535608fc5658a74d1a81020eb35195dd63214dcf07556f67e"},
+    {file = "websockets-11.0.3-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f2e58f2c36cc52d41f2659e4c0cbf7353e28c8c9e63e30d8c6d3494dc9fdedcf"},
+    {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de36fe9c02995c7e6ae6efe2e205816f5f00c22fd1fbf343d4d18c3d5ceac2f5"},
+    {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0ac56b661e60edd453585f4bd68eb6a29ae25b5184fd5ba51e97652580458998"},
+    {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e052b8467dd07d4943936009f46ae5ce7b908ddcac3fda581656b1b19c083d9b"},
+    {file = "websockets-11.0.3-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:42cc5452a54a8e46a032521d7365da775823e21bfba2895fb7b77633cce031bb"},
+    {file = "websockets-11.0.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e6316827e3e79b7b8e7d8e3b08f4e331af91a48e794d5d8b099928b6f0b85f20"},
+    {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8531fdcad636d82c517b26a448dcfe62f720e1922b33c81ce695d0edb91eb931"},
+    {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c114e8da9b475739dde229fd3bc6b05a6537a88a578358bc8eb29b4030fac9c9"},
+    {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e063b1865974611313a3849d43f2c3f5368093691349cf3c7c8f8f75ad7cb280"},
+    {file = "websockets-11.0.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:92b2065d642bf8c0a82d59e59053dd2fdde64d4ed44efe4870fa816c1232647b"},
+    {file = "websockets-11.0.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0ee68fe502f9031f19d495dae2c268830df2760c0524cbac5d759921ba8c8e82"},
+    {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcacf2c7a6c3a84e720d1bb2b543c675bf6c40e460300b628bab1b1efc7c034c"},
+    {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b67c6f5e5a401fc56394f191f00f9b3811fe843ee93f4a70df3c389d1adf857d"},
+    {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d5023a4b6a5b183dc838808087033ec5df77580485fc533e7dab2567851b0a4"},
+    {file = "websockets-11.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ed058398f55163a79bb9f06a90ef9ccc063b204bb346c4de78efc5d15abfe602"},
+    {file = "websockets-11.0.3-py3-none-any.whl", hash = "sha256:6681ba9e7f8f3b19440921e99efbb40fc89f26cd71bf539e45d8c8a25c976dc6"},
+    {file = "websockets-11.0.3.tar.gz", hash = "sha256:88fc51d9a26b10fc331be344f1781224a375b78488fc343620184e95a4b27016"},
+]
+[metadata]
+lock-version = "2.0"
+python-versions = "^3.8"
+content-hash = "80634bedd72b53e96d00fe6cbad0d9bfbbdda1e017c24f19d6de41d046f566c7"

client/poetry.toml ADDED Viewed

	@@ -0,0 +1 @@


1	+ virtualenvs.in-project = true

client/pyproject.toml ADDED Viewed

	@@ -0,0 +1,41 @@

+[tool.poetry]
+name = "h2ogpt-client"
+version = "0.1.0"
+description = ""
+authors = []
+readme = "README.md"
+include = ["h2ogpt_client/_h2ogpt*"]
+[tool.poetry.dependencies]
+python = "^3.8"
+gradio-client = "^0.6.1"
+[tool.poetry.group.test.dependencies]
+pytest = "7.2.2"
+pytest-asyncio = "^0.21.0"
+[tool.poetry.group.dev.dependencies]
+mypy = "^1.3.0"
+black = "^23.3.0"
+flake8 = "5.0.4"
+isort = "^5.12.0"
+flake8-pyproject = "^1.2.3"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+[tool.isort]
+profile = "black"
+py_version = "auto"
+[tool.flake8]
+max-line-length = 88
+[tool.mypy]
+python_version = "3.8"
+[tool.pytest.ini_options]
+pythonpath = "h2ogpt_client"
+log_cli = true
+log_cli_level = "INFO"

client/tests/__init__.py ADDED Viewed

File without changes

client/tests/conftest.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import importlib.util
+import logging
+import os
+import sys
+from pathlib import Path
+from types import ModuleType
+import pytest
+LOGGER = logging.getLogger(__name__)
+@pytest.fixture(scope="module")
+def server_url():
+    server_url = os.getenv("H2OGPT_SERVER")
+    if not server_url:
+        LOGGER.info("Couldn't find a running h2oGPT server. Hence starting a one.")
+        generate = _import_module_from_h2ogpt("generate.py")
+        generate.main(
+            base_model="h2oai/h2ogpt-oig-oasst1-512-6_9b",
+            prompt_type="human_bot",
+            chat=False,
+            stream_output=False,
+            gradio=True,
+            num_beams=1,
+            block_gradio_exit=False,
+        )
+        server_url = "http://0.0.0.0:7860"  # assume server started
+        LOGGER.info(f"h2oGPT server started at '{server_url}'.")
+    return server_url
+@pytest.fixture(scope="module")
+def h2ogpt_key():
+    return os.getenv("H2OGPT_KEY") or os.getenv("H2OGPT_H2OGPT_KEY")
+@pytest.fixture(scope="module")
+def eval_func_param_names():
+    parameters = _import_module_from_h2ogpt("src/evaluate_params.py")
+    return parameters.eval_func_param_names
+def _import_module_from_h2ogpt(file_name: str) -> ModuleType:
+    h2ogpt_dir = Path(__file__).parent.parent.parent
+    file_path = (h2ogpt_dir / file_name).absolute()
+    module_name = file_path.stem
+    LOGGER.info(f"Loading module '{module_name}' from '{file_path}'.")
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if not spec:
+        raise Exception(f"Couldn't load module '{module_name}' from '{file_path}'.")
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module

client/tests/test_client.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import platform
+import pytest
+from h2ogpt_client import Client
+platform.python_version()
+@pytest.fixture
+def client(server_url, h2ogpt_key) -> Client:
+    return Client(server_url, h2ogpt_key=h2ogpt_key)
+def _create_text_completion(client):
+    model = client.models.list()[-1]
+    return client.text_completion.create(model=model)
+@pytest.mark.asyncio
+async def test_text_completion(client):
+    text_completion = _create_text_completion(client)
+    response = await text_completion.complete(prompt="Hello world")
+    assert response
+    print(response)
+@pytest.mark.asyncio
+async def test_text_completion_stream(client):
+    text_completion = _create_text_completion(client)
+    response = await text_completion.complete(
+        prompt="Write a poem about the Amazon rainforest. End it with an emoji.",
+        enable_streaming=True,
+    )
+    async for token in response:
+        assert token
+        print(token, end="")
+def test_text_completion_sync(client):
+    text_completion = _create_text_completion(client)
+    response = text_completion.complete_sync(prompt="Hello world")
+    assert response
+    print(response)
+def test_text_completion_sync_stream(client):
+    text_completion = _create_text_completion(client)
+    response = text_completion.complete_sync(
+        prompt="Write a poem about the Amazon rainforest. End it with an emoji.",
+        enable_streaming=True,
+    )
+    for token in response:
+        assert token
+        print(token, end="")
+def _create_chat_completion(client):
+    model = client.models.list()[-1]
+    return client.chat_completion.create(model=model)
+@pytest.mark.asyncio
+async def test_chat_completion(client):
+    chat_completion = _create_chat_completion(client)
+    chat1 = await chat_completion.chat(prompt="Hey!")
+    assert chat1["user"] == "Hey!"
+    assert chat1["gpt"]
+    chat2 = await chat_completion.chat(prompt="What is the capital of USA?")
+    assert chat2["user"] == "What is the capital of USA?"
+    assert chat2["gpt"]
+    chat3 = await chat_completion.chat(prompt="What is the population in there?")
+    assert chat3["user"] == "What is the population in there?"
+    assert chat3["gpt"]
+    chat_history = chat_completion.chat_history()
+    assert chat_history == [chat1, chat2, chat3]
+    print(chat_history)
+def test_chat_completion_sync(client):
+    chat_completion = _create_chat_completion(client)
+    chat1 = chat_completion.chat_sync(prompt="What is UNESCO?")
+    assert chat1["user"] == "What is UNESCO?"
+    assert chat1["gpt"]
+    chat2 = chat_completion.chat_sync(prompt="Is it a part of the UN?")
+    assert chat2["user"] == "Is it a part of the UN?"
+    assert chat2["gpt"]
+    chat3 = chat_completion.chat_sync(prompt="Where is the headquarters?")
+    assert chat3["user"] == "Where is the headquarters?"
+    assert chat3["gpt"]
+    chat_history = chat_completion.chat_history()
+    assert chat_history == [chat1, chat2, chat3]
+    print(chat_history)
+def test_available_models(client):
+    models = client.models.list()
+    assert len(models)
+    print(models)
+def test_server_properties(client, server_url):
+    assert client.server.address.startswith(server_url)
+    assert client.server.hash
+def test_parameters_order(client, eval_func_param_names):
+    text_completion = client.text_completion.create()
+    assert eval_func_param_names == list(text_completion._parameters.keys())
+    chat_completion = client.chat_completion.create()
+    assert eval_func_param_names == list(chat_completion._parameters.keys())
+@pytest.mark.parametrize("local_server", [True, False])
+def test_readme_example(local_server):
+    # self-contained example used for readme,
+    # to be copied to client/README.md if changed, setting local_server = True at first
+    import asyncio
+    import os
+    from h2ogpt_client import Client
+    if local_server:
+        client = Client("http://0.0.0.0:7860")
+    else:
+        h2ogpt_key = os.getenv("H2OGPT_KEY") or os.getenv("H2OGPT_H2OGPT_KEY")
+        if h2ogpt_key is None:
+            return
+        # if you have API key for public instance:
+        client = Client("https://gpt.h2o.ai", h2ogpt_key=h2ogpt_key)
+    # Text completion
+    text_completion = client.text_completion.create()
+    response = asyncio.run(text_completion.complete("Hello world"))
+    print("asyncio text completion response: %s" % response)
+    # Text completion: synchronous
+    response = text_completion.complete_sync("Hello world")
+    print("sync text completion response: %s" % response)
+    # Chat completion
+    chat_completion = client.chat_completion.create()
+    reply = asyncio.run(chat_completion.chat("Hey!"))
+    print("asyncio text completion user: %s gpt: %s" % (reply["user"], reply["gpt"]))
+    chat_history = chat_completion.chat_history()
+    print("chat_history: %s" % chat_history)
+    # Chat completion: synchronous
+    reply = chat_completion.chat_sync("Hey!")
+    print("sync chat completion gpt: %s" % reply["gpt"])

cloud/packer/Jenkinsfile ADDED Viewed

	@@ -0,0 +1,80 @@

+import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
+properties(
+    [
+        parameters(
+            [
+                string(name: 'BRANCH_TAG', defaultValue: 'origin/main'),
+                booleanParam(name: 'AZURE', defaultValue: true, description: 'Make Azure Machine Image/Not?'),
+                booleanParam(name: 'GCP', defaultValue: true, description: 'Make GCP Image/Not?'),
+                string(name: 'H2OGPT_VERSION', defaultValue: "010", description: 'Example: for version 1.10.5 use 1105')
+            ]
+        )
+    ]
+)
+node('linux && docker') {
+    stage('Init') {
+        cleanWs()
+        currentBuild.displayName = "#${BUILD_NUMBER} - Rel:${H2OGPT_VERSION}"
+        checkout scm
+        sh('ls -al')
+    }
+    stage('Build Images') {
+        try {
+            docker.image('harbor.h2o.ai/opsh2oai/h2oai-packer-build:2').inside {
+                parallel([
+                        "GCP Ubuntu 20.04": {
+                            withCredentials([file(credentialsId: 'GCP_MARKETPLACE_SERVICE_ACCOUNT', variable: 'GCP_ACCOUNT_FILE')]) {
+                                dir('cloud/packer') {
+                                    if (params.GCP) {
+                                        sh("packer build \
+                                            --force \
+                                            -var 'project_id=h2o-gce' \
+                                            -var 'account_file=$GCP_ACCOUNT_FILE' \
+                                            -var 'h2ogpt_version=${H2OGPT_VERSION}' \
+                                            -var 'branch_tag=${BRANCH_TAG}' \
+                                            h2ogpt-gcp.json"
+                                        )
+                                        archiveArtifacts artifacts: '*-image-info.json'
+                                    }else {
+                                        Utils.markStageSkippedForConditional('GCP Ubuntu 20.04')
+                                    }
+                                }
+                            }
+                        },
+                         "AZURE Ubuntu 20.04": {
+                            withCredentials([string(credentialsId: "AZURE_MARKETPLACE_CLIENT_ID", variable: "AZURE_CLIENT_ID"),
+                                             string(credentialsId: "AZURE_MARKETPLACE_CLIENT_SECRET", variable: "AZURE_CLIENT_SECRET"),
+                                             string(credentialsId: "AZURE_MARKETPLACE_SUBSCRIPTION_ID", variable: "AZURE_SUBSCRIPTION_ID"),
+                                             string(credentialsId: "AZURE_MARKETPLACE_TENANT_ID", variable: "AZURE_TENANT_ID")]) {
+                                dir('cloud/packer') {
+                                    if (params.AZURE) {
+                                        sh("packer build \
+                                            --force \
+                                            -var 'client_id=$AZURE_CLIENT_ID' \
+                                            -var 'client_secret=$AZURE_CLIENT_SECRET' \
+                                            -var 'managed_image_resource_group_name=H2OIMAGES' \
+                                            -var 'subscription_id=$AZURE_SUBSCRIPTION_ID' \
+                                            -var 'tenant_id=$AZURE_TENANT_ID' \
+                                            -var 'h2ogpt_version=${H2OGPT_VERSION}' \
+                                            -var 'branch_tag=${BRANCH_TAG}' \
+                                            h2ogpt-azure.json"
+                                        )
+                                        archiveArtifacts artifacts: '*-image-info.json'
+                                    }else {
+                                        Utils.markStageSkippedForConditional('AZURE Ubuntu 20.04')
+                                    }
+                                }
+                            }
+                        },
+                ])
+            }
+        } finally {
+            cleanWs()
+        }
+    }
+}

cloud/packer/README.md ADDED Viewed

	@@ -0,0 +1,22 @@

+# h2oGPT Packer Templates
+These scripts help create images in public clouds that can then submitted to Azure/GCP Marketplace for commercial use.
+### Packer Scripts
+- Azure - `h2ogpt-azure.json`
+- GCP - `h2ogpt-gcp.json`
+### Provisioning Scripts
+ - `setup_environment.sh`
+    - Responsible for setting up CUDA, GCC, Nginx, Python
+- `install_h2ogpt.sh`
+    - Responsible for setting up h2oGPT with its dependencies
+- `h2oai-h2ogpt-4096-llama2-13b-chat.sh`
+    - Responsible for setting up default model h2oai-h2ogpt-4096-llama2-13b-chat with vLLM in port 80 via Nginx
+    - vLLM, h2oGPT and Nginx are executed through services
+    - Model is downloaded at the runtime
+__Jenkins Pipeline__: http://jenkins.h2o.local:8080/job/build-h2ogpt-cloud-images/
+### Notes:
+ - Since model is downloaded at the runtime after VM is provisioned it takes around 5 - 10 min start h2oGPT correctly

cloud/packer/h2oai-h2ogpt-4096-llama2-13b-chat.sh ADDED Viewed

	@@ -0,0 +1,11 @@

+#!/bin/bash -e
+sudo systemctl daemon-reload
+sudo systemctl enable h2ogpt_nginx.service
+sudo systemctl enable vllm.service
+sudo systemctl enable h2ogpt.service
+cd "$HOME"
+# sudo rm -rf "$HOME"/.cache/huggingface/hub/
+sudo DEBIAN_FRONTEND=noninteractive apt-get -y autoremove
+sudo DEBIAN_FRONTEND=noninteractive apt-get -y clean

cloud/packer/h2ogpt-azure.json ADDED Viewed

	@@ -0,0 +1,123 @@

+{
+    "variables": {
+        "client_id": "<AZURE CLIENT ID>",
+        "client_secret": "<AZURE CLIENT SECRET>",
+        "subscription_id": "92429150-401a-431f-8955-e69c0c119e68",
+        "tenant_id": "840229f2-c911-49e6-a73d-5b3a4311835a",
+        "managed_image_resource_group_name": "H2OIMAGES",
+        "h2ogpt_version": "010",
+        "branch_tag": "main",
+        "base_model": "h2oai-h2ogpt-4096-llama2-13b-chat"
+    },
+    "builders": [
+        {
+            "type": "azure-arm",
+            "client_id": "{{user `client_id`}}",
+            "client_secret": "{{user `client_secret`}}",
+            "subscription_id": "{{user `subscription_id`}}",
+            "tenant_id": "{{user `tenant_id`}}",
+            "capture_container_name": "h2ovhdimages",
+            "capture_name_prefix": "h2ogpt-{{user `h2ogpt_version`}}",
+            "resource_group_name": "{{user `managed_image_resource_group_name`}}",
+            "temp_resource_group_name": "Engineering_DevOps_h2oGPT-Ubuntu",
+            "storage_account": "h2ovhdimages",
+            "os_type": "Linux",
+            "image_publisher": "Canonical",
+            "image_offer": "0001-com-ubuntu-server-focal",
+            "image_sku": "20_04-lts",
+            "os_disk_size_gb": 512,
+            "azure_tags": {
+                "dept": "Engineering",
+                "task": "Image deployment",
+                "Name": "H2OGPT-CLOUD-IMAGES",
+                "Owner": "ops@h2o.ai",
+                "Project": "DevOps",
+                "Department": "Engineering",
+                "Environment": "Dev",
+                "Scheduling": "self-managed"
+            },
+            "location": "East US",
+            "vm_size": "Standard_NC24s_v3",
+            "ssh_username": "ubuntu"
+        }
+    ],
+    "post-processors": [
+        {
+            "type": "manifest",
+            "output": "azure-ubuntu-image-info.json",
+            "strip_path": true,
+            "custom_data": {
+                "base_image": "AZURE Ubuntu 20.04",
+                "h2ogpt_version": "{{user `h2ogpt_version`}}"
+            }
+        }
+    ],
+    "provisioners": [
+      {
+        "type": "shell",
+        "script": "setup_environment.sh",
+        "pause_before": "10s",
+        "pause_after": "10s"
+      },
+      {
+        "type": "shell",
+        "inline": ["sudo reboot now"],
+        "pause_after": "10s",
+        "expect_disconnect": true
+      },
+      {
+        "type": "shell",
+        "environment_vars": ["BRANCH_TAG={{user `branch_tag`}}"],
+        "script": "install_h2ogpt.sh",
+        "pause_after": "10s"
+      },
+      {
+        "type": "shell",
+        "inline": [
+          "sudo chown -R ubuntu:ubuntu /etc/nginx/conf.d",
+          "sudo chown -R ubuntu:ubuntu /etc/systemd/system/"
+        ],
+        "pause_before": "10s"
+      },
+      {
+        "type":  "file",
+        "source": "./startup-scripts/run_nginx.sh",
+        "destination": "/workspace/run_nginx.sh"
+      },
+      {
+        "type":  "file",
+        "source": "./startup-scripts/run_vllm.sh",
+        "destination": "/workspace/run_vllm.sh"
+      },
+      {
+        "type":  "file",
+        "source": "./startup-scripts/run_h2ogpt.sh",
+        "destination": "/workspace/run_h2ogpt.sh"
+      },
+      {
+        "type":  "file",
+        "source": "./startup-scripts/h2ogpt_nginx.service",
+        "destination": "/etc/systemd/system/h2ogpt_nginx.service"
+      },
+      {
+        "type":  "file",
+        "source": "./startup-scripts/vllm.service",
+        "destination": "/etc/systemd/system/vllm.service"
+      },
+      {
+        "type":  "file",
+        "source": "./startup-scripts/h2ogpt.service",
+        "destination": "/etc/systemd/system/h2ogpt.service"
+      },
+      {
+        "type":  "file",
+        "source": "./startup-scripts/temp.conf",
+        "destination": "/workspace/temp.conf"
+      },
+      {
+        "type": "shell",
+        "script": "{{user `base_model`}}.sh",
+        "pause_after": "10s"
+      }
+      ]
+}

cloud/packer/h2ogpt-gcp.json ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+  "variables": {
+    "project_id": "eng-llm",
+    "account_file": "<NAME OF GCP CREDENTIALS JSON FILE>",
+    "h2ogpt_version": "010",
+    "branch_tag": "main",
+    "base_model": "h2oai-h2ogpt-4096-llama2-13b-chat"
+  },
+  "builders": [
+    {
+      "type": "googlecompute",
+      "project_id": "{{user `project_id`}}",
+      "account_file": "{{user `account_file`}}",
+      "machine_type": "n1-standard-8",
+      "on_host_maintenance": "TERMINATE",
+      "accelerator_type": "projects/{{user `project_id`}}/zones/us-west1-b/acceleratorTypes/nvidia-tesla-t4",
+      "accelerator_count": "4",
+      "source_image_family": "ubuntu-2004-lts",
+      "zone": "us-west1-b",
+      "image_description": "h2ogpt using Packer",
+      "image_name": "h2ogpt-{{user `h2ogpt_version`}}",
+      "disk_size": 512,
+      "disk_type": "pd-ssd",
+      "ssh_username": "ubuntu",
+      "tags": ["h2ogpt"]
+    }
+  ],
+  "post-processors": [
+    {
+      "type": "manifest",
+      "output": "gcp-image-info.json",
+      "strip_path": true,
+      "custom_data": {
+        "base_image": "GCP Ubuntu 20.04",
+        "h2ogpt_version": "{{user `h2ogpt_version`}}"
+      }
+    }
+  ],
+  "provisioners": [
+    {
+      "type": "shell",
+      "script": "setup_environment.sh",
+      "pause_before": "10s",
+      "pause_after": "10s"
+    },
+    {
+      "type": "shell",
+      "inline": ["sudo reboot now"],
+      "pause_after": "10s",
+      "expect_disconnect": true
+    },
+    {
+      "type": "shell",
+      "environment_vars": ["BRANCH_TAG={{user `branch_tag`}}"],
+      "script": "install_h2ogpt.sh",
+      "pause_after": "10s"
+    },
+    {
+      "type": "shell",
+      "inline": [
+        "sudo chown -R ubuntu:ubuntu /etc/nginx/conf.d",
+        "sudo chown -R ubuntu:ubuntu /etc/systemd/system/"
+      ],
+      "pause_before": "10s"
+    },
+    {
+      "type":  "file",
+      "source": "./startup-scripts/run_nginx.sh",
+      "destination": "/workspace/run_nginx.sh"
+    },
+    {
+      "type":  "file",
+      "source": "./startup-scripts/run_vllm.sh",
+      "destination": "/workspace/run_vllm.sh"
+    },
+    {
+      "type":  "file",
+      "source": "./startup-scripts/run_h2ogpt.sh",
+      "destination": "/workspace/run_h2ogpt.sh"
+    },
+    {
+      "type":  "file",
+      "source": "./startup-scripts/h2ogpt_nginx.service",
+      "destination": "/etc/systemd/system/h2ogpt_nginx.service"
+    },
+    {
+      "type":  "file",
+      "source": "./startup-scripts/vllm.service",
+      "destination": "/etc/systemd/system/vllm.service"
+    },
+    {
+      "type":  "file",
+      "source": "./startup-scripts/h2ogpt.service",
+      "destination": "/etc/systemd/system/h2ogpt.service"
+    },
+    {
+      "type":  "file",
+      "source": "./startup-scripts/temp.conf",
+      "destination": "/workspace/temp.conf"
+    },
+    {
+      "type": "shell",
+      "script": "{{user `base_model`}}.sh",
+      "pause_after": "10s"
+    }
+  ]
+}

cloud/packer/install_h2ogpt.sh ADDED Viewed

	@@ -0,0 +1,19 @@

+#!/bin/bash -e
+export PATH=$PATH:/home/ubuntu/.local/bin
+sudo mkdir -p /workspace && cd /workspace
+sudo chmod a+rwx .
+git config --global --add safe.directory /workspace
+git config --global advice.detachedHead false
+git clone https://github.com/h2oai/h2ogpt.git .
+if [ -z "$BRANCH_TAG" ]; then
+  echo "BRANCH_TAG environment variable is not set."
+  exit 1
+fi
+git checkout $BRANCH_TAG
+ls -la
+sudo ./docker_build_script_ubuntu.sh

cloud/packer/setup_environment.sh ADDED Viewed

	@@ -0,0 +1,46 @@

+#!/bin/bash -e
+sudo DEBIAN_FRONTEND=noninteractive apt-get -y update
+sudo DEBIAN_FRONTEND=noninteractive apt-get -y --no-install-recommends install \
+  git \
+  software-properties-common \
+  pandoc \
+  curl \
+  apt-utils \
+  make \
+  build-essential \
+  wget \
+  gnupg2 \
+  ca-certificates \
+  lsb-release \
+  ubuntu-keyring
+curl https://nginx.org/keys/nginx_signing.key | gpg --dearmor | sudo tee /usr/share/keyrings/nginx-archive-keyring.gpg >/dev/null
+gpg --dry-run --quiet --no-keyring --import --import-options import-show /usr/share/keyrings/nginx-archive-keyring.gpg
+echo "deb [signed-by=/usr/share/keyrings/nginx-archive-keyring.gpg] http://nginx.org/packages/ubuntu `lsb_release -cs` nginx" sudo tee /etc/apt/sources.list.d/nginx.list
+echo -e "Package: *\nPin: origin nginx.org\nPin: release o=nginx\nPin-Priority: 900\n" sudo tee /etc/apt/preferences.d/99nginx
+sudo DEBIAN_FRONTEND=noninteractive apt -y update
+sudo DEBIAN_FRONTEND=noninteractive apt -y install nginx
+MAX_GCC_VERSION=11
+sudo DEBIAN_FRONTEND=noninteractive add-apt-repository -y ppa:ubuntu-toolchain-r/test
+sudo DEBIAN_FRONTEND=noninteractive apt-get -y install gcc-$MAX_GCC_VERSION g++-$MAX_GCC_VERSION
+sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-$MAX_GCC_VERSION 100
+sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-$MAX_GCC_VERSION 100
+sudo update-alternatives --set gcc /usr/bin/gcc-$MAX_GCC_VERSION
+sudo update-alternatives --set g++ /usr/bin/g++-$MAX_GCC_VERSION
+wget --quiet https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
+sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
+wget --quiet https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda-repo-ubuntu2004-11-8-local_11.8.0-520.61.05-1_amd64.deb
+sudo dpkg -i cuda-repo-ubuntu2004-11-8-local_11.8.0-520.61.05-1_amd64.deb
+sudo cp /var/cuda-repo-ubuntu2004-11-8-local/cuda-*-keyring.gpg /usr/share/keyrings/
+sudo DEBIAN_FRONTEND=noninteractive apt-get -y update
+sudo DEBIAN_FRONTEND=noninteractive apt-get -y install cuda
+sudo rm -rf "*.deb"
+sudo echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.8/lib64/" >> ~/.bashrc
+sudo echo "export CUDA_HOME=/usr/local/cuda-11.8" >> ~/.bashrc
+sudo echo "export PATH=$PATH:/h2ogpt_conda/bin:/usr/local/cuda-11.8/bin/" >> ~/.bashrc

cloud/packer/startup-scripts/h2ogpt.service ADDED Viewed

	@@ -0,0 +1,12 @@

+[Unit]
+Description=h2oGPT Server
+After=network.target
+[Service]
+Type=simple
+User=ubuntu
+WorkingDirectory=/workspace
+ExecStart=/usr/bin/bash /workspace/run_h2ogpt.sh
+[Install]
+WantedBy=multi-user.target

cloud/packer/startup-scripts/h2ogpt_nginx.service ADDED Viewed

	@@ -0,0 +1,12 @@

+[Unit]
+Description=h2oGPT Nginx Server
+After=network.target
+[Service]
+Type=simple
+User=ubuntu
+WorkingDirectory=/workspace
+ExecStart=/usr/bin/bash /workspace/run_nginx.sh
+[Install]
+WantedBy=multi-user.target

cloud/packer/startup-scripts/run_h2ogpt.sh ADDED Viewed

	@@ -0,0 +1,26 @@

+#!/bin/bash -e
+while true; do
+  http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:5000/v1/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+      "model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+      "prompt": "San Francisco is a",
+      "max_tokens": 7,
+      "temperature": 0
+    }')
+  if [ "$http_code" -eq 200 ]; then
+    echo "Received HTTP 200 status code. Starting h2ogpt service"
+    CUDA_VISIBLE_DEVICES=$(seq -s, $(($(nvidia-smi -L | wc -l) / 2)) $(($(nvidia-smi -L | wc -l) - 1))) /h2ogpt_conda/bin/python3.10 \
+      /workspace/generate.py \
+      --inference_server="vllm:0.0.0.0:5000" \
+      --base_model=h2oai/h2ogpt-4096-llama2-13b-chat \
+      --langchain_mode=UserData
+    break
+  else
+    echo "Received HTTP $http_code status code. Retrying in 5 seconds..."
+    sleep 5
+  fi
+done

cloud/packer/startup-scripts/run_nginx.sh ADDED Viewed

	@@ -0,0 +1,23 @@

+#!/bin/bash -e
+while true; do
+  http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:5000/v1/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+      "model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+      "prompt": "San Francisco is a",
+      "max_tokens": 7,
+      "temperature": 0
+    }')
+  if [ "$http_code" -eq 200 ]; then
+    echo "Received HTTP 200 status code. Restarting Nginx for h2oGPT"
+    ip=$(dig +short myip.opendns.com @resolver1.opendns.com)
+    sed "s/<|_SUBST_PUBLIC_IP|>;/$ip;/g" /workspace/temp.conf  > /etc/nginx/conf.d/h2ogpt.conf
+    sudo systemctl restart nginx.service
+    break
+  else
+    echo "Received HTTP $http_code status code. Retrying in 5 seconds..."
+    sleep 5
+  fi
+done

cloud/packer/startup-scripts/run_vllm.sh ADDED Viewed

	@@ -0,0 +1,10 @@

+#!/bin/bash -e
+tps=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader | wc -l | awk '{if ($1 > 1) print int($1/2); else print 1}')
+NCCL_IGNORE_DISABLED_P2P=1 CUDA_VISIBLE_DEVICES=$(seq -s, 0 $(($(nvidia-smi -L | wc -l) > 1 ? $(nvidia-smi -L | wc -l) / 2 - 1 : 0))) \
+/h2ogpt_conda/vllm_env/bin/python3.10 -m vllm.entrypoints.openai.api_server \
+    --port=5000 \
+    --host=0.0.0.0 \
+    --model h2oai/h2ogpt-4096-llama2-13b-chat \
+    --tokenizer=hf-internal-testing/llama-tokenizer \
+    --tensor-parallel-size=$tps --seed 1234

cloud/packer/startup-scripts/temp.conf ADDED Viewed

	@@ -0,0 +1,14 @@

+server {
+    listen 80;
+    listen [::]:80;
+    server_name <|_SUBST_PUBLIC_IP|>;  # Change this to your domain name
+    location / {  # Change this if you'd like to server your Gradio app on a different path
+        proxy_pass http://0.0.0.0:7860/; # Change this if your Gradio app will be running on a different port
+        proxy_redirect off;
+        proxy_http_version 1.1;
+        proxy_set_header Upgrade $http_upgrade;
+        proxy_set_header Connection "upgrade";
+        proxy_set_header Host $host;
+    }
+}

cloud/packer/startup-scripts/vllm.service ADDED Viewed

	@@ -0,0 +1,12 @@

+[Unit]
+Description=vLLM Server
+After=network.target
+[Service]
+Type=simple
+User=ubuntu
+WorkingDirectory=/workspace
+ExecStart=/usr/bin/bash /workspace/run_vllm.sh
+[Install]
+WantedBy=multi-user.target