Commit
·
2195fa8
1
Parent(s):
93d4ebf
Downloading instead of hardcoding llmperf
Browse files- llmperf/.gitignore +0 -247
- llmperf/LICENSE.txt +0 -202
- llmperf/NOTICE.txt +0 -14
- llmperf/README.md +0 -415
- llmperf/analyze-token-benchmark-results.ipynb +0 -327
- llmperf/llm_correctness.py +0 -309
- llmperf/pre-commit.sh +0 -5
- llmperf/pyproject.toml +0 -23
- llmperf/requirements-dev.txt +0 -2
- llmperf/src/llmperf/__init__.py +0 -1
- llmperf/src/llmperf/common.py +0 -38
- llmperf/src/llmperf/common_metrics.py +0 -17
- llmperf/src/llmperf/models.py +0 -21
- llmperf/src/llmperf/ray_clients/__init__.py +0 -0
- llmperf/src/llmperf/ray_clients/litellm_client.py +0 -100
- llmperf/src/llmperf/ray_clients/openai_chat_completions_client.py +0 -120
- llmperf/src/llmperf/ray_clients/sagemaker_client.py +0 -158
- llmperf/src/llmperf/ray_clients/vertexai_client.py +0 -135
- llmperf/src/llmperf/ray_llm_client.py +0 -22
- llmperf/src/llmperf/requests_launcher.py +0 -48
- llmperf/src/llmperf/sonnet.txt +0 -84
- llmperf/src/llmperf/utils.py +0 -147
- llmperf/token_benchmark_ray.py +0 -469
- on_startup.sh +6 -0
- requirements.txt +7 -6
llmperf/.gitignore
DELETED
@@ -1,247 +0,0 @@
|
|
1 |
-
# The build output should clearly not be checked in
|
2 |
-
*test-output.xml
|
3 |
-
/bazel-*
|
4 |
-
/python/ray/core
|
5 |
-
/python/ray/pickle5_files/
|
6 |
-
/python/ray/thirdparty_files/
|
7 |
-
/python/ray/pyarrow_files/
|
8 |
-
/python/ray/jars/
|
9 |
-
/python/ray/cpp/
|
10 |
-
/python/build
|
11 |
-
/python/dist
|
12 |
-
/python/python-driver-*
|
13 |
-
/python/ray/serve/generated
|
14 |
-
/thirdparty/pkg/
|
15 |
-
/build/java
|
16 |
-
.jar
|
17 |
-
/dashboard/client/build
|
18 |
-
|
19 |
-
# Files generated by flatc should be ignored
|
20 |
-
/src/ray/gcs/format/*_generated.h
|
21 |
-
/src/ray/object_manager/format/*_generated.h
|
22 |
-
/src/ray/raylet/format/*_generated.h
|
23 |
-
/java/runtime/src/main/java/io/ray/runtime/generated/*
|
24 |
-
/java/serve/src/main/java/io/ray/serve/generated/*
|
25 |
-
|
26 |
-
# Files genrated by c++ worker should be ignored.
|
27 |
-
/cpp/example/thirdparty/
|
28 |
-
/cpp/example/bazel-*
|
29 |
-
/python/ray/cpp
|
30 |
-
|
31 |
-
# Redis temporary files
|
32 |
-
*dump.rdb
|
33 |
-
|
34 |
-
# Python byte code files
|
35 |
-
*.pyc
|
36 |
-
python/.eggs
|
37 |
-
*.egg-info
|
38 |
-
|
39 |
-
# Backup files
|
40 |
-
*.bak
|
41 |
-
|
42 |
-
# Emacs temporary files
|
43 |
-
*~
|
44 |
-
*#
|
45 |
-
|
46 |
-
# Compiled Object files
|
47 |
-
*.slo
|
48 |
-
*.lo
|
49 |
-
*.o
|
50 |
-
*.xo
|
51 |
-
*.obj
|
52 |
-
|
53 |
-
# Precompiled Headers
|
54 |
-
*.gch
|
55 |
-
*.pch
|
56 |
-
|
57 |
-
# Compiled Dynamic libraries
|
58 |
-
*.so
|
59 |
-
*.dylib
|
60 |
-
*.dll
|
61 |
-
python/ray/_raylet.pyd
|
62 |
-
|
63 |
-
# Incremental linking files
|
64 |
-
*.ilk
|
65 |
-
|
66 |
-
# Library export files
|
67 |
-
*.exp
|
68 |
-
|
69 |
-
# Debug symbols
|
70 |
-
*.pdb
|
71 |
-
|
72 |
-
# Fortran module files
|
73 |
-
*.mod
|
74 |
-
!deploy/ray-operator/go.mod
|
75 |
-
|
76 |
-
# Compiled Static libraries
|
77 |
-
*.lai
|
78 |
-
*.la
|
79 |
-
*.a
|
80 |
-
*.lib
|
81 |
-
|
82 |
-
# Executables
|
83 |
-
*.exe
|
84 |
-
*.out
|
85 |
-
*.app
|
86 |
-
|
87 |
-
# Visual Studio files
|
88 |
-
/packages
|
89 |
-
*.suo
|
90 |
-
*.user
|
91 |
-
*.VC.db
|
92 |
-
*.VC.opendb
|
93 |
-
|
94 |
-
# Protobuf-generated files
|
95 |
-
*_pb2.py
|
96 |
-
*.pb.h
|
97 |
-
*.pb.cc
|
98 |
-
|
99 |
-
# Ray cluster configuration
|
100 |
-
scripts/nodes.txt
|
101 |
-
|
102 |
-
# OS X folder attributes
|
103 |
-
.DS_Store
|
104 |
-
|
105 |
-
# Debug files
|
106 |
-
*.dSYM/
|
107 |
-
*.su
|
108 |
-
|
109 |
-
# Python setup files
|
110 |
-
*.egg-info
|
111 |
-
|
112 |
-
# Compressed files
|
113 |
-
*.gz
|
114 |
-
|
115 |
-
# Datasets from examples
|
116 |
-
**/MNIST_data/
|
117 |
-
**/cifar-10-batches-bin/
|
118 |
-
|
119 |
-
# Generated documentation files
|
120 |
-
/doc/_build
|
121 |
-
/doc/source/_static/thumbs
|
122 |
-
/doc/source/tune/generated_guides/
|
123 |
-
/doc/source/**/doc/
|
124 |
-
|
125 |
-
# User-specific stuff:
|
126 |
-
.idea/**/workspace.xml
|
127 |
-
.idea/**/tasks.xml
|
128 |
-
.idea/dictionaries
|
129 |
-
.llvm-local.bazelrc
|
130 |
-
|
131 |
-
# Sensitive or high-churn files:
|
132 |
-
.idea/**/dataSources/
|
133 |
-
.idea/**/dataSources.ids
|
134 |
-
.idea/**/dataSources.xml
|
135 |
-
.idea/**/dataSources.local.xml
|
136 |
-
.idea/**/sqlDataSources.xml
|
137 |
-
.idea/**/dynamic.xml
|
138 |
-
.idea/**/uiDesigner.xml
|
139 |
-
|
140 |
-
# Gradle:
|
141 |
-
.idea/**/gradle.xml
|
142 |
-
.idea/**/libraries
|
143 |
-
.idea
|
144 |
-
|
145 |
-
# Website
|
146 |
-
/site/Gemfile.lock
|
147 |
-
/site/.sass-cache
|
148 |
-
/site/_site
|
149 |
-
|
150 |
-
# Pytest Cache
|
151 |
-
**/.pytest_cache
|
152 |
-
**/.cache
|
153 |
-
.benchmarks
|
154 |
-
python-driver-*
|
155 |
-
|
156 |
-
# Vscode
|
157 |
-
.vscode/
|
158 |
-
|
159 |
-
*.iml
|
160 |
-
|
161 |
-
# Java
|
162 |
-
java/**/target
|
163 |
-
java/**/lib
|
164 |
-
java/**/.settings
|
165 |
-
java/**/.classpath
|
166 |
-
java/**/.project
|
167 |
-
java/runtime/native_dependencies/
|
168 |
-
java/testng_custom.xml
|
169 |
-
|
170 |
-
dependency-reduced-pom.xml
|
171 |
-
|
172 |
-
# Cpp
|
173 |
-
cpp/example/thirdparty/
|
174 |
-
|
175 |
-
.clwb
|
176 |
-
|
177 |
-
# pom.xml files generated from pom_template.xml
|
178 |
-
java/**/pom.xml
|
179 |
-
|
180 |
-
# python virtual env
|
181 |
-
venv
|
182 |
-
|
183 |
-
# pyenv version file
|
184 |
-
.python-version
|
185 |
-
|
186 |
-
# Vim
|
187 |
-
.*.swp
|
188 |
-
*.swp
|
189 |
-
.*.swo
|
190 |
-
*.swo
|
191 |
-
tags
|
192 |
-
tags.lock
|
193 |
-
tags.temp
|
194 |
-
*.vim
|
195 |
-
|
196 |
-
# Emacs
|
197 |
-
.#*
|
198 |
-
|
199 |
-
# tools
|
200 |
-
tools/prometheus*
|
201 |
-
|
202 |
-
# ray project files
|
203 |
-
project-id
|
204 |
-
.mypy_cache/
|
205 |
-
|
206 |
-
# release test related
|
207 |
-
.anyscale.yaml
|
208 |
-
test_state.json
|
209 |
-
|
210 |
-
# workflow storage
|
211 |
-
workflow_data/
|
212 |
-
|
213 |
-
# vscode java extention generated
|
214 |
-
.factorypath
|
215 |
-
|
216 |
-
# Jupyter Notebooks
|
217 |
-
**/.ipynb_checkpoints/
|
218 |
-
|
219 |
-
### Added by Hedron's Bazel Compile Commands Extractor: https://github.com/hedronvision/bazel-compile-commands-extractor
|
220 |
-
# The external link: Differs on Windows vs macOS/Linux, so we can't check it in. The pattern needs to not have a trailing / because it's a symlink on macOS/Linux.
|
221 |
-
/external
|
222 |
-
# Compiled output -> don't check in
|
223 |
-
/compile_commands.json
|
224 |
-
# Directory where clangd puts its indexing work
|
225 |
-
/.cache/
|
226 |
-
|
227 |
-
# Auto-generated tag mapping
|
228 |
-
tag-mapping.json
|
229 |
-
|
230 |
-
.bazeliskrc
|
231 |
-
|
232 |
-
# ignore tmp files
|
233 |
-
*.tmp
|
234 |
-
out
|
235 |
-
temp*
|
236 |
-
|
237 |
-
# build output
|
238 |
-
build/
|
239 |
-
dist/
|
240 |
-
|
241 |
-
# results
|
242 |
-
output/
|
243 |
-
*.json
|
244 |
-
result_outputs/
|
245 |
-
|
246 |
-
__pycache__
|
247 |
-
**/__pycache__/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/LICENSE.txt
DELETED
@@ -1,202 +0,0 @@
|
|
1 |
-
|
2 |
-
Apache License
|
3 |
-
Version 2.0, January 2004
|
4 |
-
http://www.apache.org/licenses/
|
5 |
-
|
6 |
-
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
7 |
-
|
8 |
-
1. Definitions.
|
9 |
-
|
10 |
-
"License" shall mean the terms and conditions for use, reproduction,
|
11 |
-
and distribution as defined by Sections 1 through 9 of this document.
|
12 |
-
|
13 |
-
"Licensor" shall mean the copyright owner or entity authorized by
|
14 |
-
the copyright owner that is granting the License.
|
15 |
-
|
16 |
-
"Legal Entity" shall mean the union of the acting entity and all
|
17 |
-
other entities that control, are controlled by, or are under common
|
18 |
-
control with that entity. For the purposes of this definition,
|
19 |
-
"control" means (i) the power, direct or indirect, to cause the
|
20 |
-
direction or management of such entity, whether by contract or
|
21 |
-
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
22 |
-
outstanding shares, or (iii) beneficial ownership of such entity.
|
23 |
-
|
24 |
-
"You" (or "Your") shall mean an individual or Legal Entity
|
25 |
-
exercising permissions granted by this License.
|
26 |
-
|
27 |
-
"Source" form shall mean the preferred form for making modifications,
|
28 |
-
including but not limited to software source code, documentation
|
29 |
-
source, and configuration files.
|
30 |
-
|
31 |
-
"Object" form shall mean any form resulting from mechanical
|
32 |
-
transformation or translation of a Source form, including but
|
33 |
-
not limited to compiled object code, generated documentation,
|
34 |
-
and conversions to other media types.
|
35 |
-
|
36 |
-
"Work" shall mean the work of authorship, whether in Source or
|
37 |
-
Object form, made available under the License, as indicated by a
|
38 |
-
copyright notice that is included in or attached to the work
|
39 |
-
(an example is provided in the Appendix below).
|
40 |
-
|
41 |
-
"Derivative Works" shall mean any work, whether in Source or Object
|
42 |
-
form, that is based on (or derived from) the Work and for which the
|
43 |
-
editorial revisions, annotations, elaborations, or other modifications
|
44 |
-
represent, as a whole, an original work of authorship. For the purposes
|
45 |
-
of this License, Derivative Works shall not include works that remain
|
46 |
-
separable from, or merely link (or bind by name) to the interfaces of,
|
47 |
-
the Work and Derivative Works thereof.
|
48 |
-
|
49 |
-
"Contribution" shall mean any work of authorship, including
|
50 |
-
the original version of the Work and any modifications or additions
|
51 |
-
to that Work or Derivative Works thereof, that is intentionally
|
52 |
-
submitted to Licensor for inclusion in the Work by the copyright owner
|
53 |
-
or by an individual or Legal Entity authorized to submit on behalf of
|
54 |
-
the copyright owner. For the purposes of this definition, "submitted"
|
55 |
-
means any form of electronic, verbal, or written communication sent
|
56 |
-
to the Licensor or its representatives, including but not limited to
|
57 |
-
communication on electronic mailing lists, source code control systems,
|
58 |
-
and issue tracking systems that are managed by, or on behalf of, the
|
59 |
-
Licensor for the purpose of discussing and improving the Work, but
|
60 |
-
excluding communication that is conspicuously marked or otherwise
|
61 |
-
designated in writing by the copyright owner as "Not a Contribution."
|
62 |
-
|
63 |
-
"Contributor" shall mean Licensor and any individual or Legal Entity
|
64 |
-
on behalf of whom a Contribution has been received by Licensor and
|
65 |
-
subsequently incorporated within the Work.
|
66 |
-
|
67 |
-
2. Grant of Copyright License. Subject to the terms and conditions of
|
68 |
-
this License, each Contributor hereby grants to You a perpetual,
|
69 |
-
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
70 |
-
copyright license to reproduce, prepare Derivative Works of,
|
71 |
-
publicly display, publicly perform, sublicense, and distribute the
|
72 |
-
Work and such Derivative Works in Source or Object form.
|
73 |
-
|
74 |
-
3. Grant of Patent License. Subject to the terms and conditions of
|
75 |
-
this License, each Contributor hereby grants to You a perpetual,
|
76 |
-
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
77 |
-
(except as stated in this section) patent license to make, have made,
|
78 |
-
use, offer to sell, sell, import, and otherwise transfer the Work,
|
79 |
-
where such license applies only to those patent claims licensable
|
80 |
-
by such Contributor that are necessarily infringed by their
|
81 |
-
Contribution(s) alone or by combination of their Contribution(s)
|
82 |
-
with the Work to which such Contribution(s) was submitted. If You
|
83 |
-
institute patent litigation against any entity (including a
|
84 |
-
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
85 |
-
or a Contribution incorporated within the Work constitutes direct
|
86 |
-
or contributory patent infringement, then any patent licenses
|
87 |
-
granted to You under this License for that Work shall terminate
|
88 |
-
as of the date such litigation is filed.
|
89 |
-
|
90 |
-
4. Redistribution. You may reproduce and distribute copies of the
|
91 |
-
Work or Derivative Works thereof in any medium, with or without
|
92 |
-
modifications, and in Source or Object form, provided that You
|
93 |
-
meet the following conditions:
|
94 |
-
|
95 |
-
(a) You must give any other recipients of the Work or
|
96 |
-
Derivative Works a copy of this License; and
|
97 |
-
|
98 |
-
(b) You must cause any modified files to carry prominent notices
|
99 |
-
stating that You changed the files; and
|
100 |
-
|
101 |
-
(c) You must retain, in the Source form of any Derivative Works
|
102 |
-
that You distribute, all copyright, patent, trademark, and
|
103 |
-
attribution notices from the Source form of the Work,
|
104 |
-
excluding those notices that do not pertain to any part of
|
105 |
-
the Derivative Works; and
|
106 |
-
|
107 |
-
(d) If the Work includes a "NOTICE" text file as part of its
|
108 |
-
distribution, then any Derivative Works that You distribute must
|
109 |
-
include a readable copy of the attribution notices contained
|
110 |
-
within such NOTICE file, excluding those notices that do not
|
111 |
-
pertain to any part of the Derivative Works, in at least one
|
112 |
-
of the following places: within a NOTICE text file distributed
|
113 |
-
as part of the Derivative Works; within the Source form or
|
114 |
-
documentation, if provided along with the Derivative Works; or,
|
115 |
-
within a display generated by the Derivative Works, if and
|
116 |
-
wherever such third-party notices normally appear. The contents
|
117 |
-
of the NOTICE file are for informational purposes only and
|
118 |
-
do not modify the License. You may add Your own attribution
|
119 |
-
notices within Derivative Works that You distribute, alongside
|
120 |
-
or as an addendum to the NOTICE text from the Work, provided
|
121 |
-
that such additional attribution notices cannot be construed
|
122 |
-
as modifying the License.
|
123 |
-
|
124 |
-
You may add Your own copyright statement to Your modifications and
|
125 |
-
may provide additional or different license terms and conditions
|
126 |
-
for use, reproduction, or distribution of Your modifications, or
|
127 |
-
for any such Derivative Works as a whole, provided Your use,
|
128 |
-
reproduction, and distribution of the Work otherwise complies with
|
129 |
-
the conditions stated in this License.
|
130 |
-
|
131 |
-
5. Submission of Contributions. Unless You explicitly state otherwise,
|
132 |
-
any Contribution intentionally submitted for inclusion in the Work
|
133 |
-
by You to the Licensor shall be under the terms and conditions of
|
134 |
-
this License, without any additional terms or conditions.
|
135 |
-
Notwithstanding the above, nothing herein shall supersede or modify
|
136 |
-
the terms of any separate license agreement you may have executed
|
137 |
-
with Licensor regarding such Contributions.
|
138 |
-
|
139 |
-
6. Trademarks. This License does not grant permission to use the trade
|
140 |
-
names, trademarks, service marks, or product names of the Licensor,
|
141 |
-
except as required for reasonable and customary use in describing the
|
142 |
-
origin of the Work and reproducing the content of the NOTICE file.
|
143 |
-
|
144 |
-
7. Disclaimer of Warranty. Unless required by applicable law or
|
145 |
-
agreed to in writing, Licensor provides the Work (and each
|
146 |
-
Contributor provides its Contributions) on an "AS IS" BASIS,
|
147 |
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
148 |
-
implied, including, without limitation, any warranties or conditions
|
149 |
-
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
150 |
-
PARTICULAR PURPOSE. You are solely responsible for determining the
|
151 |
-
appropriateness of using or redistributing the Work and assume any
|
152 |
-
risks associated with Your exercise of permissions under this License.
|
153 |
-
|
154 |
-
8. Limitation of Liability. In no event and under no legal theory,
|
155 |
-
whether in tort (including negligence), contract, or otherwise,
|
156 |
-
unless required by applicable law (such as deliberate and grossly
|
157 |
-
negligent acts) or agreed to in writing, shall any Contributor be
|
158 |
-
liable to You for damages, including any direct, indirect, special,
|
159 |
-
incidental, or consequential damages of any character arising as a
|
160 |
-
result of this License or out of the use or inability to use the
|
161 |
-
Work (including but not limited to damages for loss of goodwill,
|
162 |
-
work stoppage, computer failure or malfunction, or any and all
|
163 |
-
other commercial damages or losses), even if such Contributor
|
164 |
-
has been advised of the possibility of such damages.
|
165 |
-
|
166 |
-
9. Accepting Warranty or Additional Liability. While redistributing
|
167 |
-
the Work or Derivative Works thereof, You may choose to offer,
|
168 |
-
and charge a fee for, acceptance of support, warranty, indemnity,
|
169 |
-
or other liability obligations and/or rights consistent with this
|
170 |
-
License. However, in accepting such obligations, You may act only
|
171 |
-
on Your own behalf and on Your sole responsibility, not on behalf
|
172 |
-
of any other Contributor, and only if You agree to indemnify,
|
173 |
-
defend, and hold each Contributor harmless for any liability
|
174 |
-
incurred by, or claims asserted against, such Contributor by reason
|
175 |
-
of your accepting any such warranty or additional liability.
|
176 |
-
|
177 |
-
END OF TERMS AND CONDITIONS
|
178 |
-
|
179 |
-
APPENDIX: How to apply the Apache License to your work.
|
180 |
-
|
181 |
-
To apply the Apache License to your work, attach the following
|
182 |
-
boilerplate notice, with the fields enclosed by brackets "[]"
|
183 |
-
replaced with your own identifying information. (Don't include
|
184 |
-
the brackets!) The text should be enclosed in the appropriate
|
185 |
-
comment syntax for the file format. We also recommend that a
|
186 |
-
file or class name and description of purpose be included on the
|
187 |
-
same "printed page" as the copyright notice for easier
|
188 |
-
identification within third-party archives.
|
189 |
-
|
190 |
-
Copyright [yyyy] [name of copyright owner]
|
191 |
-
|
192 |
-
Licensed under the Apache License, Version 2.0 (the "License");
|
193 |
-
you may not use this file except in compliance with the License.
|
194 |
-
You may obtain a copy of the License at
|
195 |
-
|
196 |
-
http://www.apache.org/licenses/LICENSE-2.0
|
197 |
-
|
198 |
-
Unless required by applicable law or agreed to in writing, software
|
199 |
-
distributed under the License is distributed on an "AS IS" BASIS,
|
200 |
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
201 |
-
See the License for the specific language governing permissions and
|
202 |
-
limitations under the License.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/NOTICE.txt
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
[Project Name]
|
2 |
-
Copyright 2023-onwards Anyscale, Inc.
|
3 |
-
|
4 |
-
Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
-
you may not use this file except in compliance with the License.
|
6 |
-
You may obtain a copy of the License at
|
7 |
-
|
8 |
-
http://www.apache.org/licenses/LICENSE-2.0
|
9 |
-
|
10 |
-
Unless required by applicable law or agreed to in writing, software
|
11 |
-
distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
-
See the License for the specific language governing permissions and
|
14 |
-
limitations under the License.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/README.md
DELETED
@@ -1,415 +0,0 @@
|
|
1 |
-
# LLMPerf
|
2 |
-
|
3 |
-
A Tool for evaulation the performance of LLM APIs.
|
4 |
-
|
5 |
-
# Installation
|
6 |
-
```bash
|
7 |
-
git clone https://github.com/ray-project/llmperf.git
|
8 |
-
cd llmperf
|
9 |
-
pip install -e .
|
10 |
-
```
|
11 |
-
|
12 |
-
# Basic Usage
|
13 |
-
|
14 |
-
We implement 2 tests for evaluating LLMs: a load test to check for performance and a correctness test to check for correctness.
|
15 |
-
|
16 |
-
## Load test
|
17 |
-
|
18 |
-
The load test spawns a number of concurrent requests to the LLM API and measures the inter-token latency and generation throughput per request and across concurrent requests. The prompt that is sent with each request is of the format:
|
19 |
-
|
20 |
-
```
|
21 |
-
Randomly stream lines from the following text. Don't generate eos tokens:
|
22 |
-
LINE 1,
|
23 |
-
LINE 2,
|
24 |
-
LINE 3,
|
25 |
-
...
|
26 |
-
```
|
27 |
-
|
28 |
-
Where the lines are randomly sampled from a collection of lines from Shakespeare sonnets. Tokens are counted using the `LlamaTokenizer` regardless of which LLM API is being tested. This is to ensure that the prompts are consistent across different LLM APIs.
|
29 |
-
|
30 |
-
To run the most basic load test you can the token_benchmark_ray script.
|
31 |
-
|
32 |
-
|
33 |
-
### Caveats and Disclaimers
|
34 |
-
|
35 |
-
- The endpoints provider backend might vary widely, so this is not a reflection on how the software runs on a particular hardware.
|
36 |
-
- The results may vary with time of day.
|
37 |
-
- The results may vary with the load.
|
38 |
-
- The results may not correlate with users’ workloads.
|
39 |
-
|
40 |
-
### OpenAI Compatible APIs
|
41 |
-
```bash
|
42 |
-
export OPENAI_API_KEY=secret_abcdefg
|
43 |
-
export OPENAI_API_BASE="https://api.endpoints.anyscale.com/v1"
|
44 |
-
|
45 |
-
python token_benchmark_ray.py \
|
46 |
-
--model "meta-llama/Llama-2-7b-chat-hf" \
|
47 |
-
--mean-input-tokens 550 \
|
48 |
-
--stddev-input-tokens 150 \
|
49 |
-
--mean-output-tokens 150 \
|
50 |
-
--stddev-output-tokens 10 \
|
51 |
-
--max-num-completed-requests 2 \
|
52 |
-
--timeout 600 \
|
53 |
-
--num-concurrent-requests 1 \
|
54 |
-
--results-dir "result_outputs" \
|
55 |
-
--llm-api openai \
|
56 |
-
--additional-sampling-params '{}'
|
57 |
-
|
58 |
-
```
|
59 |
-
|
60 |
-
### Anthropic
|
61 |
-
```bash
|
62 |
-
export ANTHROPIC_API_KEY=secret_abcdefg
|
63 |
-
|
64 |
-
python token_benchmark_ray.py \
|
65 |
-
--model "claude-2" \
|
66 |
-
--mean-input-tokens 550 \
|
67 |
-
--stddev-input-tokens 150 \
|
68 |
-
--mean-output-tokens 150 \
|
69 |
-
--stddev-output-tokens 10 \
|
70 |
-
--max-num-completed-requests 2 \
|
71 |
-
--timeout 600 \
|
72 |
-
--num-concurrent-requests 1 \
|
73 |
-
--results-dir "result_outputs" \
|
74 |
-
--llm-api anthropic \
|
75 |
-
--additional-sampling-params '{}'
|
76 |
-
|
77 |
-
```
|
78 |
-
|
79 |
-
### TogetherAI
|
80 |
-
|
81 |
-
```bash
|
82 |
-
export TOGETHERAI_API_KEY="YOUR_TOGETHER_KEY"
|
83 |
-
|
84 |
-
python token_benchmark_ray.py \
|
85 |
-
--model "together_ai/togethercomputer/CodeLlama-7b-Instruct" \
|
86 |
-
--mean-input-tokens 550 \
|
87 |
-
--stddev-input-tokens 150 \
|
88 |
-
--mean-output-tokens 150 \
|
89 |
-
--stddev-output-tokens 10 \
|
90 |
-
--max-num-completed-requests 2 \
|
91 |
-
--timeout 600 \
|
92 |
-
--num-concurrent-requests 1 \
|
93 |
-
--results-dir "result_outputs" \
|
94 |
-
--llm-api "litellm" \
|
95 |
-
--additional-sampling-params '{}'
|
96 |
-
|
97 |
-
```
|
98 |
-
|
99 |
-
### Hugging Face
|
100 |
-
|
101 |
-
```bash
|
102 |
-
export HUGGINGFACE_API_KEY="YOUR_HUGGINGFACE_API_KEY"
|
103 |
-
export HUGGINGFACE_API_BASE="YOUR_HUGGINGFACE_API_ENDPOINT"
|
104 |
-
|
105 |
-
python token_benchmark_ray.py \
|
106 |
-
--model "huggingface/meta-llama/Llama-2-7b-chat-hf" \
|
107 |
-
--mean-input-tokens 550 \
|
108 |
-
--stddev-input-tokens 150 \
|
109 |
-
--mean-output-tokens 150 \
|
110 |
-
--stddev-output-tokens 10 \
|
111 |
-
--max-num-completed-requests 2 \
|
112 |
-
--timeout 600 \
|
113 |
-
--num-concurrent-requests 1 \
|
114 |
-
--results-dir "result_outputs" \
|
115 |
-
--llm-api "litellm" \
|
116 |
-
--additional-sampling-params '{}'
|
117 |
-
|
118 |
-
```
|
119 |
-
|
120 |
-
### LiteLLM
|
121 |
-
|
122 |
-
LLMPerf can use LiteLLM to send prompts to LLM APIs. To see the environment variables to set for the provider and arguments that one should set for model and additional-sampling-params.
|
123 |
-
|
124 |
-
see the [LiteLLM Provider Documentation](https://docs.litellm.ai/docs/providers).
|
125 |
-
|
126 |
-
```bash
|
127 |
-
python token_benchmark_ray.py \
|
128 |
-
--model "meta-llama/Llama-2-7b-chat-hf" \
|
129 |
-
--mean-input-tokens 550 \
|
130 |
-
--stddev-input-tokens 150 \
|
131 |
-
--mean-output-tokens 150 \
|
132 |
-
--stddev-output-tokens 10 \
|
133 |
-
--max-num-completed-requests 2 \
|
134 |
-
--timeout 600 \
|
135 |
-
--num-concurrent-requests 1 \
|
136 |
-
--results-dir "result_outputs" \
|
137 |
-
--llm-api "litellm" \
|
138 |
-
--additional-sampling-params '{}'
|
139 |
-
|
140 |
-
```
|
141 |
-
|
142 |
-
### Vertex AI
|
143 |
-
|
144 |
-
Here, --model is used for logging, not for selecting the model. The model is specified in the Vertex AI Endpoint ID.
|
145 |
-
|
146 |
-
The GCLOUD_ACCESS_TOKEN needs to be somewhat regularly set, as the token generated by `gcloud auth print-access-token` expires after 15 minutes or so.
|
147 |
-
|
148 |
-
Vertex AI doesn't return the total number of tokens that are generated by their endpoint, so tokens are counted using the LLama tokenizer.
|
149 |
-
|
150 |
-
```bash
|
151 |
-
|
152 |
-
gcloud auth application-default login
|
153 |
-
gcloud config set project YOUR_PROJECT_ID
|
154 |
-
|
155 |
-
export GCLOUD_ACCESS_TOKEN=$(gcloud auth print-access-token)
|
156 |
-
export GCLOUD_PROJECT_ID=YOUR_PROJECT_ID
|
157 |
-
export GCLOUD_REGION=YOUR_REGION
|
158 |
-
export VERTEXAI_ENDPOINT_ID=YOUR_ENDPOINT_ID
|
159 |
-
|
160 |
-
python token_benchmark_ray.py \
|
161 |
-
--model "meta-llama/Llama-2-7b-chat-hf" \
|
162 |
-
--mean-input-tokens 550 \
|
163 |
-
--stddev-input-tokens 150 \
|
164 |
-
--mean-output-tokens 150 \
|
165 |
-
--stddev-output-tokens 10 \
|
166 |
-
--max-num-completed-requests 2 \
|
167 |
-
--timeout 600 \
|
168 |
-
--num-concurrent-requests 1 \
|
169 |
-
--results-dir "result_outputs" \
|
170 |
-
--llm-api "vertexai" \
|
171 |
-
--additional-sampling-params '{}'
|
172 |
-
|
173 |
-
```
|
174 |
-
|
175 |
-
### SageMaker
|
176 |
-
|
177 |
-
SageMaker doesn't return the total number of tokens that are generated by their endpoint, so tokens are counted using the LLama tokenizer.
|
178 |
-
|
179 |
-
```bash
|
180 |
-
|
181 |
-
export AWS_ACCESS_KEY_ID="YOUR_ACCESS_KEY_ID"
|
182 |
-
export AWS_SECRET_ACCESS_KEY="YOUR_SECRET_ACCESS_KEY"s
|
183 |
-
export AWS_SESSION_TOKEN="YOUR_SESSION_TOKEN"
|
184 |
-
export AWS_REGION_NAME="YOUR_ENDPOINTS_REGION_NAME"
|
185 |
-
|
186 |
-
python llm_correctness.py \
|
187 |
-
--model "llama-2-7b" \
|
188 |
-
--llm-api "sagemaker" \
|
189 |
-
--max-num-completed-requests 2 \
|
190 |
-
--timeout 600 \
|
191 |
-
--num-concurrent-requests 1 \
|
192 |
-
--results-dir "result_outputs" \
|
193 |
-
|
194 |
-
```
|
195 |
-
|
196 |
-
see `python token_benchmark_ray.py --help` for more details on the arguments.
|
197 |
-
|
198 |
-
## Correctness Test
|
199 |
-
|
200 |
-
The correctness test spawns a number of concurrent requests to the LLM API with the following format:
|
201 |
-
|
202 |
-
```
|
203 |
-
Convert the following sequence of words into a number: {random_number_in_word_format}. Output just your final answer.
|
204 |
-
```
|
205 |
-
|
206 |
-
where random_number_in_word_format could be for example "one hundred and twenty three". The test then checks that the response contains that number in digit format which in this case would be 123.
|
207 |
-
|
208 |
-
The test does this for a number of randomly generated numbers and reports the number of responses that contain a mismatch.
|
209 |
-
|
210 |
-
To run the most basic correctness test you can run the the llm_correctness.py script.
|
211 |
-
|
212 |
-
### OpenAI Compatible APIs
|
213 |
-
|
214 |
-
```bash
|
215 |
-
export OPENAI_API_KEY=secret_abcdefg
|
216 |
-
export OPENAI_API_BASE=https://console.endpoints.anyscale.com/m/v1
|
217 |
-
|
218 |
-
python llm_correctness.py \
|
219 |
-
--model "meta-llama/Llama-2-7b-chat-hf" \
|
220 |
-
--max-num-completed-requests 150 \
|
221 |
-
--timeout 600 \
|
222 |
-
--num-concurrent-requests 10 \
|
223 |
-
--results-dir "result_outputs"
|
224 |
-
```
|
225 |
-
|
226 |
-
### Anthropic
|
227 |
-
|
228 |
-
```bash
|
229 |
-
export ANTHROPIC_API_KEY=secret_abcdefg
|
230 |
-
|
231 |
-
python llm_correctness.py \
|
232 |
-
--model "claude-2" \
|
233 |
-
--llm-api "anthropic" \
|
234 |
-
--max-num-completed-requests 5 \
|
235 |
-
--timeout 600 \
|
236 |
-
--num-concurrent-requests 1 \
|
237 |
-
--results-dir "result_outputs"
|
238 |
-
```
|
239 |
-
|
240 |
-
### TogetherAI
|
241 |
-
|
242 |
-
```bash
|
243 |
-
export TOGETHERAI_API_KEY="YOUR_TOGETHER_KEY"
|
244 |
-
|
245 |
-
python llm_correctness.py \
|
246 |
-
--model "together_ai/togethercomputer/CodeLlama-7b-Instruct" \
|
247 |
-
--llm-api "litellm" \
|
248 |
-
--max-num-completed-requests 2 \
|
249 |
-
--timeout 600 \
|
250 |
-
--num-concurrent-requests 1 \
|
251 |
-
--results-dir "result_outputs" \
|
252 |
-
|
253 |
-
```
|
254 |
-
|
255 |
-
### Hugging Face
|
256 |
-
|
257 |
-
```bash
|
258 |
-
export HUGGINGFACE_API_KEY="YOUR_HUGGINGFACE_API_KEY"
|
259 |
-
export HUGGINGFACE_API_BASE="YOUR_HUGGINGFACE_API_ENDPOINT"
|
260 |
-
|
261 |
-
python llm_correctness.py \
|
262 |
-
--model "huggingface/meta-llama/Llama-2-7b-chat-hf" \
|
263 |
-
--llm-api "litellm" \
|
264 |
-
--max-num-completed-requests 2 \
|
265 |
-
--timeout 600 \
|
266 |
-
--num-concurrent-requests 1 \
|
267 |
-
--results-dir "result_outputs" \
|
268 |
-
|
269 |
-
```
|
270 |
-
|
271 |
-
### LiteLLM
|
272 |
-
|
273 |
-
LLMPerf can use LiteLLM to send prompts to LLM APIs. To see the environment variables to set for the provider and arguments that one should set for model and additional-sampling-params.
|
274 |
-
|
275 |
-
see the [LiteLLM Provider Documentation](https://docs.litellm.ai/docs/providers).
|
276 |
-
|
277 |
-
```bash
|
278 |
-
python llm_correctness.py \
|
279 |
-
--model "meta-llama/Llama-2-7b-chat-hf" \
|
280 |
-
--llm-api "litellm" \
|
281 |
-
--max-num-completed-requests 2 \
|
282 |
-
--timeout 600 \
|
283 |
-
--num-concurrent-requests 1 \
|
284 |
-
--results-dir "result_outputs" \
|
285 |
-
|
286 |
-
```
|
287 |
-
|
288 |
-
see `python llm_correctness.py --help` for more details on the arguments.
|
289 |
-
|
290 |
-
|
291 |
-
### Vertex AI
|
292 |
-
|
293 |
-
Here, --model is used for logging, not for selecting the model. The model is specified in the Vertex AI Endpoint ID.
|
294 |
-
|
295 |
-
The GCLOUD_ACCESS_TOKEN needs to be somewhat regularly set, as the token generated by `gcloud auth print-access-token` expires after 15 minutes or so.
|
296 |
-
|
297 |
-
Vertex AI doesn't return the total number of tokens that are generated by their endpoint, so tokens are counted using the LLama tokenizer.
|
298 |
-
|
299 |
-
|
300 |
-
```bash
|
301 |
-
|
302 |
-
gcloud auth application-default login
|
303 |
-
gcloud config set project YOUR_PROJECT_ID
|
304 |
-
|
305 |
-
export GCLOUD_ACCESS_TOKEN=$(gcloud auth print-access-token)
|
306 |
-
export GCLOUD_PROJECT_ID=YOUR_PROJECT_ID
|
307 |
-
export GCLOUD_REGION=YOUR_REGION
|
308 |
-
export VERTEXAI_ENDPOINT_ID=YOUR_ENDPOINT_ID
|
309 |
-
|
310 |
-
python llm_correctness.py \
|
311 |
-
--model "meta-llama/Llama-2-7b-chat-hf" \
|
312 |
-
--llm-api "vertexai" \
|
313 |
-
--max-num-completed-requests 2 \
|
314 |
-
--timeout 600 \
|
315 |
-
--num-concurrent-requests 1 \
|
316 |
-
--results-dir "result_outputs" \
|
317 |
-
|
318 |
-
```
|
319 |
-
|
320 |
-
### SageMaker
|
321 |
-
|
322 |
-
SageMaker doesn't return the total number of tokens that are generated by their endpoint, so tokens are counted using the LLama tokenizer.
|
323 |
-
|
324 |
-
```bash
|
325 |
-
|
326 |
-
export AWS_ACCESS_KEY_ID="YOUR_ACCESS_KEY_ID"
|
327 |
-
export AWS_SECRET_ACCESS_KEY="YOUR_SECRET_ACCESS_KEY"s
|
328 |
-
export AWS_SESSION_TOKEN="YOUR_SESSION_TOKEN"
|
329 |
-
export AWS_REGION_NAME="YOUR_ENDPOINTS_REGION_NAME"
|
330 |
-
|
331 |
-
python llm_correctness.py \
|
332 |
-
--model "llama-2-7b" \
|
333 |
-
--llm-api "sagemaker" \
|
334 |
-
--max-num-completed-requests 2 \
|
335 |
-
--timeout 600 \
|
336 |
-
--num-concurrent-requests 1 \
|
337 |
-
--results-dir "result_outputs" \
|
338 |
-
|
339 |
-
```
|
340 |
-
|
341 |
-
## Saving Results
|
342 |
-
|
343 |
-
The results of the load test and correctness test are saved in the results directory specified by the `--results-dir` argument. The results are saved in 2 files, one with the summary metrics of the test, and one with metrics from each individual request that is returned.
|
344 |
-
|
345 |
-
# Advanced Usage
|
346 |
-
|
347 |
-
The correctness tests were implemented with the following workflow in mind:
|
348 |
-
|
349 |
-
```python
|
350 |
-
import ray
|
351 |
-
from transformers import LlamaTokenizerFast
|
352 |
-
|
353 |
-
from llmperf.ray_clients.openai_chat_completions_client import (
|
354 |
-
OpenAIChatCompletionsClient,
|
355 |
-
)
|
356 |
-
from llmperf.models import RequestConfig
|
357 |
-
from llmperf.requests_launcher import RequestsLauncher
|
358 |
-
|
359 |
-
|
360 |
-
# Copying the environment variables and passing them to ray.init() is necessary
|
361 |
-
# For making any clients work.
|
362 |
-
ray.init(runtime_env={"env_vars": {"OPENAI_API_BASE" : "https://api.endpoints.anyscale.com/v1",
|
363 |
-
"OPENAI_API_KEY" : "YOUR_API_KEY"}})
|
364 |
-
|
365 |
-
base_prompt = "hello_world"
|
366 |
-
tokenizer = LlamaTokenizerFast.from_pretrained(
|
367 |
-
"hf-internal-testing/llama-tokenizer"
|
368 |
-
)
|
369 |
-
base_prompt_len = len(tokenizer.encode(base_prompt))
|
370 |
-
prompt = (base_prompt, base_prompt_len)
|
371 |
-
|
372 |
-
# Create a client for spawning requests
|
373 |
-
clients = [OpenAIChatCompletionsClient.remote()]
|
374 |
-
|
375 |
-
req_launcher = RequestsLauncher(clients)
|
376 |
-
|
377 |
-
req_config = RequestConfig(
|
378 |
-
model="meta-llama/Llama-2-7b-chat-hf",
|
379 |
-
prompt=prompt
|
380 |
-
)
|
381 |
-
|
382 |
-
req_launcher.launch_requests(req_config)
|
383 |
-
result = req_launcher.get_next_ready(block=True)
|
384 |
-
print(result)
|
385 |
-
|
386 |
-
```
|
387 |
-
|
388 |
-
# Implementing New LLM Clients
|
389 |
-
|
390 |
-
To implement a new LLM client, you need to implement the base class `llmperf.ray_llm_client.LLMClient` and decorate it as a ray actor.
|
391 |
-
|
392 |
-
```python
|
393 |
-
|
394 |
-
from llmperf.ray_llm_client import LLMClient
|
395 |
-
import ray
|
396 |
-
|
397 |
-
|
398 |
-
@ray.remote
|
399 |
-
class CustomLLMClient(LLMClient):
|
400 |
-
|
401 |
-
def llm_request(self, request_config: RequestConfig) -> Tuple[Metrics, str, RequestConfig]:
|
402 |
-
"""Make a single completion request to a LLM API
|
403 |
-
|
404 |
-
Returns:
|
405 |
-
Metrics about the performance charateristics of the request.
|
406 |
-
The text generated by the request to the LLM API.
|
407 |
-
The request_config used to make the request. This is mainly for logging purposes.
|
408 |
-
|
409 |
-
"""
|
410 |
-
...
|
411 |
-
|
412 |
-
```
|
413 |
-
|
414 |
-
# Legacy Codebase
|
415 |
-
The old LLMPerf code base can be found in the [llmperf-legacy](https://github.com/ray-project/llmval-legacy) repo.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/analyze-token-benchmark-results.ipynb
DELETED
@@ -1,327 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "markdown",
|
5 |
-
"id": "56950450",
|
6 |
-
"metadata": {},
|
7 |
-
"source": [
|
8 |
-
"# Token Benchmark Example Analysis\n",
|
9 |
-
"The following is an example of the analysis that can be done on individual responses that are saved when running `token_benchmark_ray.py` with the flag `--results-dir` which enables the saving of all responses."
|
10 |
-
]
|
11 |
-
},
|
12 |
-
{
|
13 |
-
"cell_type": "code",
|
14 |
-
"execution_count": 1,
|
15 |
-
"id": "dacfe98a-e81b-4089-9506-97a652993b5b",
|
16 |
-
"metadata": {
|
17 |
-
"tags": []
|
18 |
-
},
|
19 |
-
"outputs": [],
|
20 |
-
"source": [
|
21 |
-
"import pandas as pd"
|
22 |
-
]
|
23 |
-
},
|
24 |
-
{
|
25 |
-
"cell_type": "code",
|
26 |
-
"execution_count": 6,
|
27 |
-
"id": "17f7abe9-ed9e-466c-b034-577489aaf98b",
|
28 |
-
"metadata": {
|
29 |
-
"tags": []
|
30 |
-
},
|
31 |
-
"outputs": [
|
32 |
-
{
|
33 |
-
"data": {
|
34 |
-
"text/html": [
|
35 |
-
"<div>\n",
|
36 |
-
"<style scoped>\n",
|
37 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
38 |
-
" vertical-align: middle;\n",
|
39 |
-
" }\n",
|
40 |
-
"\n",
|
41 |
-
" .dataframe tbody tr th {\n",
|
42 |
-
" vertical-align: top;\n",
|
43 |
-
" }\n",
|
44 |
-
"\n",
|
45 |
-
" .dataframe thead th {\n",
|
46 |
-
" text-align: right;\n",
|
47 |
-
" }\n",
|
48 |
-
"</style>\n",
|
49 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
50 |
-
" <thead>\n",
|
51 |
-
" <tr style=\"text-align: right;\">\n",
|
52 |
-
" <th></th>\n",
|
53 |
-
" <th>error_code</th>\n",
|
54 |
-
" <th>error_msg</th>\n",
|
55 |
-
" <th>inter_token_latency_s</th>\n",
|
56 |
-
" <th>ttft_s</th>\n",
|
57 |
-
" <th>end_to_end_latency_s</th>\n",
|
58 |
-
" <th>request_output_throughput_token_per_s</th>\n",
|
59 |
-
" <th>number_total_tokens</th>\n",
|
60 |
-
" <th>number_output_tokens</th>\n",
|
61 |
-
" <th>number_input_tokens</th>\n",
|
62 |
-
" </tr>\n",
|
63 |
-
" </thead>\n",
|
64 |
-
" <tbody>\n",
|
65 |
-
" <tr>\n",
|
66 |
-
" <th>0</th>\n",
|
67 |
-
" <td>NaN</td>\n",
|
68 |
-
" <td></td>\n",
|
69 |
-
" <td>[0.5549881670012831, 0.0009654169989510001, 0....</td>\n",
|
70 |
-
" <td>0.554988</td>\n",
|
71 |
-
" <td>1.610734</td>\n",
|
72 |
-
" <td>44.079272</td>\n",
|
73 |
-
" <td>706</td>\n",
|
74 |
-
" <td>71</td>\n",
|
75 |
-
" <td>635</td>\n",
|
76 |
-
" </tr>\n",
|
77 |
-
" <tr>\n",
|
78 |
-
" <th>1</th>\n",
|
79 |
-
" <td>NaN</td>\n",
|
80 |
-
" <td></td>\n",
|
81 |
-
" <td>[0.6019128750049271, 0.007011749999946, 0.0144...</td>\n",
|
82 |
-
" <td>0.601913</td>\n",
|
83 |
-
" <td>1.725729</td>\n",
|
84 |
-
" <td>44.039357</td>\n",
|
85 |
-
" <td>730</td>\n",
|
86 |
-
" <td>76</td>\n",
|
87 |
-
" <td>654</td>\n",
|
88 |
-
" </tr>\n",
|
89 |
-
" </tbody>\n",
|
90 |
-
"</table>\n",
|
91 |
-
"</div>"
|
92 |
-
],
|
93 |
-
"text/plain": [
|
94 |
-
" error_code error_msg inter_token_latency_s \\\n",
|
95 |
-
"0 NaN [0.5549881670012831, 0.0009654169989510001, 0.... \n",
|
96 |
-
"1 NaN [0.6019128750049271, 0.007011749999946, 0.0144... \n",
|
97 |
-
"\n",
|
98 |
-
" ttft_s end_to_end_latency_s request_output_throughput_token_per_s \\\n",
|
99 |
-
"0 0.554988 1.610734 44.079272 \n",
|
100 |
-
"1 0.601913 1.725729 44.039357 \n",
|
101 |
-
"\n",
|
102 |
-
" number_total_tokens number_output_tokens number_input_tokens \n",
|
103 |
-
"0 706 71 635 \n",
|
104 |
-
"1 730 76 654 "
|
105 |
-
]
|
106 |
-
},
|
107 |
-
"execution_count": 6,
|
108 |
-
"metadata": {},
|
109 |
-
"output_type": "execute_result"
|
110 |
-
}
|
111 |
-
],
|
112 |
-
"source": [
|
113 |
-
"# path to the individual responses json file\n",
|
114 |
-
"df = pd.read_json('/home/ray/default/llmperf/result_outputs/550_150_individual_responses.json')\n"
|
115 |
-
]
|
116 |
-
},
|
117 |
-
{
|
118 |
-
"cell_type": "code",
|
119 |
-
"execution_count": 12,
|
120 |
-
"id": "565a59e4",
|
121 |
-
"metadata": {},
|
122 |
-
"outputs": [],
|
123 |
-
"source": [
|
124 |
-
"valid_df = df[(df[\"error_code\"] != \"\")]"
|
125 |
-
]
|
126 |
-
},
|
127 |
-
{
|
128 |
-
"cell_type": "code",
|
129 |
-
"execution_count": 13,
|
130 |
-
"id": "102894bc",
|
131 |
-
"metadata": {},
|
132 |
-
"outputs": [
|
133 |
-
{
|
134 |
-
"data": {
|
135 |
-
"text/html": [
|
136 |
-
"<div>\n",
|
137 |
-
"<style scoped>\n",
|
138 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
139 |
-
" vertical-align: middle;\n",
|
140 |
-
" }\n",
|
141 |
-
"\n",
|
142 |
-
" .dataframe tbody tr th {\n",
|
143 |
-
" vertical-align: top;\n",
|
144 |
-
" }\n",
|
145 |
-
"\n",
|
146 |
-
" .dataframe thead th {\n",
|
147 |
-
" text-align: right;\n",
|
148 |
-
" }\n",
|
149 |
-
"</style>\n",
|
150 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
151 |
-
" <thead>\n",
|
152 |
-
" <tr style=\"text-align: right;\">\n",
|
153 |
-
" <th></th>\n",
|
154 |
-
" <th>error_code</th>\n",
|
155 |
-
" <th>error_msg</th>\n",
|
156 |
-
" <th>inter_token_latency_s</th>\n",
|
157 |
-
" <th>ttft_s</th>\n",
|
158 |
-
" <th>end_to_end_latency_s</th>\n",
|
159 |
-
" <th>request_output_throughput_token_per_s</th>\n",
|
160 |
-
" <th>number_total_tokens</th>\n",
|
161 |
-
" <th>number_output_tokens</th>\n",
|
162 |
-
" <th>number_input_tokens</th>\n",
|
163 |
-
" </tr>\n",
|
164 |
-
" </thead>\n",
|
165 |
-
" <tbody>\n",
|
166 |
-
" <tr>\n",
|
167 |
-
" <th>0</th>\n",
|
168 |
-
" <td>NaN</td>\n",
|
169 |
-
" <td></td>\n",
|
170 |
-
" <td>[0.5549881670012831, 0.0009654169989510001, 0....</td>\n",
|
171 |
-
" <td>0.554988</td>\n",
|
172 |
-
" <td>1.610734</td>\n",
|
173 |
-
" <td>44.079272</td>\n",
|
174 |
-
" <td>706</td>\n",
|
175 |
-
" <td>71</td>\n",
|
176 |
-
" <td>635</td>\n",
|
177 |
-
" </tr>\n",
|
178 |
-
" <tr>\n",
|
179 |
-
" <th>1</th>\n",
|
180 |
-
" <td>NaN</td>\n",
|
181 |
-
" <td></td>\n",
|
182 |
-
" <td>[0.6019128750049271, 0.007011749999946, 0.0144...</td>\n",
|
183 |
-
" <td>0.601913</td>\n",
|
184 |
-
" <td>1.725729</td>\n",
|
185 |
-
" <td>44.039357</td>\n",
|
186 |
-
" <td>730</td>\n",
|
187 |
-
" <td>76</td>\n",
|
188 |
-
" <td>654</td>\n",
|
189 |
-
" </tr>\n",
|
190 |
-
" </tbody>\n",
|
191 |
-
"</table>\n",
|
192 |
-
"</div>"
|
193 |
-
],
|
194 |
-
"text/plain": [
|
195 |
-
" error_code error_msg inter_token_latency_s \\\n",
|
196 |
-
"0 NaN [0.5549881670012831, 0.0009654169989510001, 0.... \n",
|
197 |
-
"1 NaN [0.6019128750049271, 0.007011749999946, 0.0144... \n",
|
198 |
-
"\n",
|
199 |
-
" ttft_s end_to_end_latency_s request_output_throughput_token_per_s \\\n",
|
200 |
-
"0 0.554988 1.610734 44.079272 \n",
|
201 |
-
"1 0.601913 1.725729 44.039357 \n",
|
202 |
-
"\n",
|
203 |
-
" number_total_tokens number_output_tokens number_input_tokens \n",
|
204 |
-
"0 706 71 635 \n",
|
205 |
-
"1 730 76 654 "
|
206 |
-
]
|
207 |
-
},
|
208 |
-
"execution_count": 13,
|
209 |
-
"metadata": {},
|
210 |
-
"output_type": "execute_result"
|
211 |
-
}
|
212 |
-
],
|
213 |
-
"source": [
|
214 |
-
"valid_df"
|
215 |
-
]
|
216 |
-
},
|
217 |
-
{
|
218 |
-
"cell_type": "code",
|
219 |
-
"execution_count": 14,
|
220 |
-
"id": "c7519fc9",
|
221 |
-
"metadata": {},
|
222 |
-
"outputs": [
|
223 |
-
{
|
224 |
-
"name": "stdout",
|
225 |
-
"output_type": "stream",
|
226 |
-
"text": [
|
227 |
-
"Mean number of input tokens: 644.5. Mean number of output tokens: 73.5\n"
|
228 |
-
]
|
229 |
-
},
|
230 |
-
{
|
231 |
-
"data": {
|
232 |
-
"text/plain": [
|
233 |
-
"<Axes: title={'center': 'Number of Input Tokens vs. TTFT'}, xlabel='number_input_tokens', ylabel='ttft_s'>"
|
234 |
-
]
|
235 |
-
},
|
236 |
-
"execution_count": 14,
|
237 |
-
"metadata": {},
|
238 |
-
"output_type": "execute_result"
|
239 |
-
},
|
240 |
-
{
|
241 |
-
"data": {
|
242 |
-
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkAAAAHHCAYAAABXx+fLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/SrBM8AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+u0lEQVR4nO3deVgW9f7/8dcNsqrgAgIqgop7uYSKYIkVbp1TVp4yWzBOmpWmRllRuWSLmll2mSeXcknLXKqv+tP0JLlUmpZmaZnghkuCogKiBgmf3x9d3MdbFsFYnefjuua6vD8z85nPe4aBlzNz37fNGGMEAABgIU4VPQAAAIDyRgACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACKsCGDRtks9m0bNmyih5KsaSkpOhf//qX6tatK5vNpqlTp1b0kCq1cePGyWazKTU1taKHAqAQBCBcs+bNmyebzSZ3d3cdO3Ys3/zu3bvruuuuq4CRVT1PPfWU1q5dq7i4OC1YsEC9e/cudFmbzaZhw4aV4+iK9vHHHxcrsOWFlitN3bt3L/MxX+uKs59LMm3YsEGHDh0qdH6XLl04vsinWkUPAChrWVlZmjhxoqZNm1bRQ6myvvrqK/Xt21fPPPNMRQ+lxD7++GPt3r1bI0eOLHK5u+++WyEhIfbXmZmZevzxx3XXXXfp7rvvtrf7+fmV1VAtY8GCBQ6vP/zwQ3355Zf52nNycuTs7HzF5Vq1aqULFy5IkgYMGKDbbrvNYb6vr68CAgI4vnBAAMI1r3379po9e7bi4uJUv379ih5OuTp37pyqV6/+t/s5ceKEatWq9fcHVIm1bdtWbdu2tb9OTU3V448/rrZt2+rBBx+swJFdey7fn999952+/PLLK+7nopY7dOiQJOmGG24otB+OLy7FLTBc81544QXl5ORo4sSJRS6Xdwl93rx5+ebZbDaNGzfO/jrvcnpCQoIefPBBeXt7y9fXV6NHj5YxRkeOHFHfvn3l5eUlf39/TZkypcBt5uTk6IUXXpC/v7+qV6+uO+64Q0eOHMm33NatW9W7d295e3vL09NTkZGR+vbbbx2WyRvTr7/+qvvvv1+1a9fWjTfeWGTNBw4c0D333KM6derI09NTXbp00apVq+zz824jGmM0ffp0+22Cksh73mnJkiV67bXX1LBhQ7m7u+vWW2/Vvn37HJbNuy25fft2RUREyMPDQ40bN9aMGTMclssbV94fvcu3tWHDBnt/q1atUlJSkn3swcHBJRr/5b766ivddNNNql69umrVqqW+fftqz549V1wvKSlJISEhuu6665SSkiJJSktL08iRIxUYGCg3NzeFhIRo0qRJys3Nta+X93P55ptvatasWWratKnc3NzUqVMnff/99w7bSE5OVkxMjBo2bCg3NzcFBASob9+++fbTpd58803ZbDYlJSXlmxcXFydXV1edOXNGkpSYmKh+/frJ399f7u7uatiwoe677z6lp6cXZ9cBlQpXgHDNa9y4saKjozV79mw9//zzpXoVqH///mrVqpUmTpyoVatW6dVXX1WdOnU0c+ZM3XLLLZo0aZI++ugjPfPMM+rUqZO6devmsP5rr70mm82m5557TidOnNDUqVMVFRWlnTt3ysPDQ9Jff3D79Omj0NBQjR07Vk5OTpo7d65uueUWff311+rcubNDn/fcc4+aNWum119/XcaYQseekpKiiIgInT9/XsOHD1fdunU1f/583XHHHVq2bJnuuusudevWTQsWLNBDDz2kHj16KDo6+qr31cSJE+Xk5KRnnnlG6enpeuONN/TAAw9o69atDsudOXNGt912m+69914NGDBAS5Ys0eOPPy5XV1f9+9//LtE2X3zxRaWnp+vo0aN6++23JUk1atS46hrWrVunPn36qEmTJho3bpwuXLigadOmqWvXrtqxY0eh4Wr//v265ZZbVKdOHX355Zfy8fHR+fPnFRkZqWPHjmnIkCFq1KiRNm/erLi4OB0/fjzfc0sff/yxzp49qyFDhshms+mNN97Q3XffrQMHDsjFxUWS1K9fP/3yyy968sknFRwcrBMnTujLL7/U4cOHCx3bvffeq2effVZLlizRqFGjHOYtWbJEPXv2VO3atZWdna1evXopKytLTz75pPz9/XXs2DH9v//3/5SWliZvb++r3q+l6fz58/kePvf29rbvI8DOANeouXPnGknm+++/N/v37zfVqlUzw4cPt8+PjIw0bdq0sb8+ePCgkWTmzp2bry9JZuzYsfbXY8eONZLMo48+am+7ePGiadiwobHZbGbixIn29jNnzhgPDw8zcOBAe9v69euNJNOgQQOTkZFhb1+yZImRZN555x1jjDG5ubmmWbNmplevXiY3N9e+3Pnz503jxo1Njx498o1pwIABxdo/I0eONJLM119/bW87e/asady4sQkODjY5OTkO9Q8dOrRY/V6+bF6trVq1MllZWfb2d955x0gyu3btsrdFRkYaSWbKlCn2tqysLNO+fXtTr149k52dbYz537E9ePCgw7bztrV+/Xp72z/+8Q8TFBRUrLFf6uTJk/mOe944Tp06ZW/76aefjJOTk4mOjra35R2LkydPmj179pj69eubTp06mdOnT9uXeeWVV0z16tVNQkKCw3aff/554+zsbA4fPmyM+d/PZd26dR3WX758uZFkVq5caYz56+dMkpk8eXKJaw0PDzehoaEObdu2bTOSzIcffmiMMebHH380kszSpUtL3P+VDB061BTnz1FRy+Xtp4KmS38e8hR0fGEt3AKDJTRp0kQPPfSQZs2apePHj5dav4MGDbL/29nZWR07dpQxRo888oi9vVatWmrRooUOHDiQb/3o6GjVrFnT/vpf//qXAgICtHr1aknSzp07lZiYqPvvv1+nTp1SamqqUlNTde7cOd16663atGmTw+0SSXrssceKNfbVq1erc+fODrfJatSooUcffVSHDh3Sr7/+WrydUEwxMTFydXW1v77pppskKd9+qVatmoYMGWJ/7erqqiFDhujEiRPavn17qY6pJI4fP66dO3fq4YcfVp06deztbdu2VY8ePezH7FK7d+9WZGSkgoODtW7dOtWuXds+b+nSpbrppptUu3Zt+3FNTU1VVFSUcnJytGnTJoe++vfv77D+5fvPw8NDrq6u2rBhg/2WVXH1799f27dv1/79++1tixcvlpubm/r27StJ9is8a9eu1fnz50vUf3l69NFH9eWXXzpM7dq1q+hhoRIiAMEyXnrpJV28ePGKzwKVRKNGjRxee3t7y93dXT4+PvnaC/qj1KxZM4fXNptNISEh9mc2EhMTJUkDBw6Ur6+vw/T+++8rKysr3/MXjRs3LtbYk5KS1KJFi3ztrVq1ss8vTZfvq7w/5pfvl/r16+d7cLt58+aSVOSzLGUtb38Uts/ygumlbr/9dtWsWVNr166Vl5eXw7zExEStWbMm33GNioqS9NeD55e60v5zc3PTpEmT9MUXX8jPz0/dunXTG2+8oeTk5CvWds8998jJyUmLFy+WJBljtHTpUvXp08c+7saNGys2Nlbvv/++fHx81KtXL02fPr3SPf/TrFkzRUVFOUyXBkcgDwEIltGkSRM9+OCDhV4FKuzh3pycnEL7vPQtukW1SSryeZzC5F3dmTx5cr7/1eZNlz/TkvfsUGVTmvvlao5VRejXr5/279+vjz76KN+83Nxc9ejRo9Dj2q9fP4fli7P/Ro4cqYSEBE2YMEHu7u4aPXq0WrVqpR9//LHIcdavX1833XSTlixZIumvd1sdPnxY/fv3d1huypQp+vnnn/XCCy/owoULGj58uNq0aaOjR48Wa38AlQkPQcNSXnrpJS1cuFCTJk3KNy/vf4lpaWkO7aV9JeRSeVd48hhjtG/fPvvbdZs2bSpJ8vLysl8ZKC1BQUHau3dvvvbffvvNPr8i/P777/nevp+QkCBJ9gd5S3KsSvqutcLk7Y/C9pmPj0++K1eTJ09WtWrV9MQTT6hmzZq6//777fOaNm2qzMzMUj+uTZs21dNPP62nn35aiYmJat++vaZMmaKFCxcWuV7//v31xBNPaO/evVq8eLE8PT11++2351vu+uuv1/XXX6+XXnpJmzdvVteuXTVjxgy9+uqrpVoHUNa4AgRLadq0qR588EHNnDkz360BLy8v+fj45Hv24j//+U+ZjefDDz/U2bNn7a+XLVum48ePq0+fPpKk0NBQNW3aVG+++aYyMzPzrX/y5Mmr3vZtt92mbdu2acuWLfa2c+fOadasWQoODlbr1q2vuu+/4+LFi5o5c6b9dXZ2tmbOnClfX1+FhoZK+l8wvPRY5eTkaNasWfn6q169eqncpgkICFD79u01f/58h+C1e/du/fe//8334XvSX+Fr1qxZ+te//qWBAwdqxYoV9nn33nuvtmzZorVr1+ZbLy0tTRcvXizR+M6fP68//vjDoa1p06aqWbOmsrKyrrh+v3795OzsrEWLFmnp0qX65z//6RDoMjIy8o3p+uuvl5OTk0P/hw8ftodooDLjChAs58UXX9SCBQu0d+9etWnTxmHeoEGDNHHiRA0aNEgdO3bUpk2b7FcfykKdOnV04403KiYmRikpKZo6dapCQkI0ePBgSZKTk5Pef/999enTR23atFFMTIwaNGigY8eOaf369fLy8tLKlSuvatvPP/+8Fi1apD59+mj48OGqU6eO5s+fr4MHD+rTTz+Vk1PF/P+ofv36mjRpkg4dOqTmzZtr8eLF2rlzp2bNmmV/K3ObNm3UpUsXxcXF6fTp06pTp44++eSTAkNDaGioFi9erNjYWHXq1Ek1atQo8MpGcUyePFl9+vRReHi4HnnkEfvb4L29vR0+J+pSTk5OWrhwoe68807de++9Wr16tW655RaNGjVKK1as0D//+U89/PDDCg0N1blz57Rr1y4tW7ZMhw4dyvcsWVESEhJ066236t5771Xr1q1VrVo1ff7550pJSdF99913xfXr1aunm2++WW+99ZbOnj2b7/bXV199pWHDhumee+5R8+bNdfHiRS1YsEDOzs4Ot+uio6O1cePGq7q1CZQnAhAsJyQkRA8++KDmz5+fb96YMWN08uRJLVu2TEuWLFGfPn30xRdfqF69emUylhdeeEE///yzJkyYoLNnz+rWW2/Vf/7zH3l6etqX6d69u7Zs2aJXXnlF7777rjIzM+Xv76+wsDCHd0uVlJ+fnzZv3qznnntO06ZN0x9//KG2bdtq5cqV+sc//lEa5V2V2rVra/78+XryySc1e/Zs+fn56d1337WHwjwfffSRhgwZookTJ6pWrVp65JFHdPPNN6tHjx4Oyz3xxBPauXOn5s6dq7fffltBQUFXHYCioqK0Zs0ajR07VmPGjJGLi4siIyM1adKkIh8+d3Fx0bJly9SnTx/17dtX69atU1hYmDZu3KjXX39dS5cu1YcffigvLy81b95cL7/8cok/VycwMFADBgxQfHy8FixYoGrVqqlly5ZasmRJvueJCtO/f3+tW7dONWvWzHdFq127durVq5dWrlypY8eOydPTU+3atdMXX3yhLl26lGisQGVgM8R0AJVE9+7dlZqaqt27d1f0UABc43gGCAAAWA4BCAAAWA4BCAAAWA7PAAEAAMvhChAAALAcAhAAALAcPgeoALm5ufr9999Vs2bNUvsYfQAAULaMMTp79qzq169/xQ9zJQAV4Pfff1dgYGBFDwMAAFyFI0eOqGHDhkUuQwAqQM2aNSX9tQO9vLwqeDQAAKA4MjIyFBgYaP87XhQCUAHybnt5eXkRgAAAqGKK8/gKD0EDAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADLIQABAADL4aswAABAuTpwMlNJp88ruG51NfapXiFjIAABAIBykXY+W8MX7dSmxJP2tm7NfDVtQAd5e7qU61i4BQYAAMrF8EU79e2+VIe2b/el6slFP5b7WAhAAACgzB04malNiSeVY4xDe44x2pR4UgdTz5XreAhAAACgzCWdPl/k/EOnCEAAAOAaE1THs8j5wXXL92FoAhAAAChzTXxrqFszXznbbA7tzjabujXzLfd3gxGAAABAuZg2oIO6hvg4tHUN8dG0AR3KfSy8DR4AAJQLb08XffhIZx1MPadDp87xOUAAAMA6GvtUXPDJwy0wAABgOQQgAABgOQQgAABgOZUiAE2fPl3BwcFyd3dXWFiYtm3bVuTyaWlpGjp0qAICAuTm5qbmzZtr9erVf6tPAABgHRUegBYvXqzY2FiNHTtWO3bsULt27dSrVy+dOHGiwOWzs7PVo0cPHTp0SMuWLdPevXs1e/ZsNWjQ4Kr7BAAA1mIz5rIv5ShnYWFh6tSpk959911JUm5urgIDA/Xkk0/q+eefz7f8jBkzNHnyZP32229ycSn4m2NL2uflMjIy5O3trfT0dHl5ef2N6gAAQHkpyd/vCr0ClJ2dre3btysqKsre5uTkpKioKG3ZsqXAdVasWKHw8HANHTpUfn5+uu666/T6668rJyfnqvvMyspSRkaGwwQAAK5dFRqAUlNTlZOTIz8/P4d2Pz8/JScnF7jOgQMHtGzZMuXk5Gj16tUaPXq0pkyZoldfffWq+5wwYYK8vb3tU2BgYClUBwAAKqsKfwaopHJzc1WvXj3NmjVLoaGh6t+/v1588UXNmDHjqvuMi4tTenq6fTpy5EgpjhgAAFQ2FfpJ0D4+PnJ2dlZKSopDe0pKivz9/QtcJyAgQC4uLnJ2dra3tWrVSsnJycrOzr6qPt3c3OTm5vY3qwEAAFVFhV4BcnV1VWhoqOLj4+1tubm5io+PV3h4eIHrdO3aVfv27VNubq69LSEhQQEBAXJ1db2qPgEAgLVU+C2w2NhYzZ49W/Pnz9eePXv0+OOP69y5c4qJiZEkRUdHKy4uzr78448/rtOnT2vEiBFKSEjQqlWr9Prrr2vo0KHF7hMAAFhbhX8Zav/+/XXy5EmNGTNGycnJat++vdasWWN/iPnw4cNycvpfTgsMDNTatWv11FNPqW3btmrQoIFGjBih5557rth9AgAAa6vwzwGqjPgcIAAAqp4q8zlAAAAAFYEABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALKdSBKDp06crODhY7u7uCgsL07Zt2wpddt68ebLZbA6Tu7u7wzIpKSl6+OGHVb9+fXl6eqp3795KTEws6zIAAEAVUeEBaPHixYqNjdXYsWO1Y8cOtWvXTr169dKJEycKXcfLy0vHjx+3T0lJSfZ5xhjdeeedOnDggJYvX64ff/xRQUFBioqK0rlz58qjJAAAUMlVeAB66623NHjwYMXExKh169aaMWOGPD09NWfOnELXsdls8vf3t09+fn72eYmJifruu+/03nvvqVOnTmrRooXee+89XbhwQYsWLSqPkgAAQCVXoQEoOztb27dvV1RUlL3NyclJUVFR2rJlS6HrZWZmKigoSIGBgerbt69++eUX+7ysrCxJcrgt5uTkJDc3N33zzTcF9peVlaWMjAyHCQAAXLsqNAClpqYqJyfH4QqOJPn5+Sk5ObnAdVq0aKE5c+Zo+fLlWrhwoXJzcxUREaGjR49Kklq2bKlGjRopLi5OZ86cUXZ2tiZNmqSjR4/q+PHjBfY5YcIEeXt726fAwMDSLRQAAFQqFX4LrKTCw8MVHR2t9u3bKzIyUp999pl8fX01c+ZMSZKLi4s+++wzJSQkqE6dOvL09NT69evVp08fOTkVXG5cXJzS09Pt05EjR8qzJAAAUM6qVeTGfXx85OzsrJSUFIf2lJQU+fv7F6sPFxcXdejQQfv27bO3hYaGaufOnUpPT1d2drZ8fX0VFhamjh07FtiHm5ub3Nzcrr4QAABQpVToFSBXV1eFhoYqPj7e3pabm6v4+HiFh4cXq4+cnBzt2rVLAQEB+eZ5e3vL19dXiYmJ+uGHH9S3b99SGzsAAKi6KvQKkCTFxsZq4MCB6tixozp37qypU6fq3LlziomJkSRFR0erQYMGmjBhgiRp/Pjx6tKli0JCQpSWlqbJkycrKSlJgwYNsve5dOlS+fr6qlGjRtq1a5dGjBihO++8Uz179qyQGgEAQOVS4QGof//+OnnypMaMGaPk5GS1b99ea9assT8YffjwYYdnd86cOaPBgwcrOTlZtWvXVmhoqDZv3qzWrVvblzl+/LhiY2OVkpKigIAARUdHa/To0eVeGwAAqJxsxhhT0YOobDIyMuTt7a309HR5eXlV9HAAAEAxlOTvd5V7FxgAAMDfRQACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWQwACAACWc1UBaMeOHdq1a5f99fLly3XnnXfqhRdeUHZ2don7mz59uoKDg+Xu7q6wsDBt27at0GXnzZsnm83mMLm7uzssk5mZqWHDhqlhw4by8PBQ69atNWPGjBKPCwAAXJuuKgANGTJECQkJkqQDBw7ovvvuk6enp5YuXapnn322RH0tXrxYsbGxGjt2rHbs2KF27dqpV69eOnHiRKHreHl56fjx4/YpKSnJYX5sbKzWrFmjhQsXas+ePRo5cqSGDRumFStWlLxYAABwzbmqAJSQkKD27dtLkpYuXapu3brp448/1rx58/Tpp5+WqK+33npLgwcPVkxMjP1Kjaenp+bMmVPoOjabTf7+/vbJz8/PYf7mzZs1cOBAde/eXcHBwXr00UfVrl27Iq8sAQAA67iqAGSMUW5uriRp3bp1uu222yRJgYGBSk1NLXY/2dnZ2r59u6Kiov43ICcnRUVFacuWLYWul5mZqaCgIAUGBqpv37765ZdfHOZHRERoxYoVOnbsmIwxWr9+vRISEtSzZ88C+8vKylJGRobDBAAArl1XFYA6duyoV199VQsWLNDGjRv1j3/8Q5J08ODBfFdjipKamqqcnJx86/j5+Sk5ObnAdVq0aKE5c+Zo+fLlWrhwoXJzcxUREaGjR4/al5k2bZpat26thg0bytXVVb1799b06dPVrVu3AvucMGGCvL297VNgYGCxawAAAFXPVQWgqVOnaseOHRo2bJhefPFFhYSESJKWLVumiIiIUh3g5cLDwxUdHa327dsrMjJSn332mXx9fTVz5kz7MtOmTdN3332nFStWaPv27ZoyZYqGDh2qdevWFdhnXFyc0tPT7dORI0fKtAYAAFCxql3NSm3btnV4F1ieyZMny9nZ2f560aJFuuOOO1S9evUC+/Hx8ZGzs7NSUlIc2lNSUuTv71+ssbi4uKhDhw7at2+fJOnChQt64YUX9Pnnn9uvTLVt21Y7d+7Um2++6XC7LY+bm5vc3NyKtT0AAFD1lernALm7u8vFxcX+esiQIfnCzaVcXV0VGhqq+Ph4e1tubq7i4+MVHh5erG3m5ORo165dCggIkCT9+eef+vPPP+Xk5Fias7Oz/bklAABgbVd1Bai4jDFXXCY2NlYDBw5Ux44d1blzZ02dOlXnzp1TTEyMJCk6OloNGjTQhAkTJEnjx49Xly5dFBISorS0NE2ePFlJSUkaNGiQpL/eIh8ZGalRo0bJw8NDQUFB2rhxoz788EO99dZbZVcsAACoMso0ABVH//79dfLkSY0ZM0bJyclq37691qxZY38w+vDhww5Xc86cOaPBgwcrOTlZtWvXVmhoqDZv3qzWrVvbl/nkk08UFxenBx54QKdPn1ZQUJBee+01PfbYY+VeHwAAqHxspjiXaa5SzZo19dNPP6lJkyZltYkykZGRIW9vb6Wnp8vLy6uihwMAAIqhJH+/+S4wAABgOQQgAABgOWUagIKCghzeFQYAAFAZXFUAatKkiU6dOpWvPS0tzeF5n927d/OpygAAoNK5qgB06NAh5eTk5GvPysrSsWPH/vagAAAAylKJ3ga/YsUK+7/Xrl0rb29v++ucnBzFx8crODi41AYHAABQFkoUgO688077vwcOHOgwz8XFRcHBwZoyZUqpDAwAAKCsFDsA/fzzz/rzzz/l7Oysxo0b6/vvv5ePj09Zjg0AAKBMFPsZoA4dOuj06dOSJJvNJpvNVmaDAgAAKEvFDkC1atXSgQMHJElJSUl8sSgAAKiyin0LrF+/foqMjLR/63rHjh3l7Oxc4LJ5QQkAAKAyKnYAmjVrlu6++27t27dPw4cP1+DBg1WzZs2yHBsAAECZKNG7wHr37i1J2r59u0aMGEEAAgAAVdJVfRBiYQ9Anzt3Tv/+97//1oAAAADK2lUFoPnz5+vChQv52i9cuKAPP/zwbw8KAACgLJXoFlhGRoaMMTLG6OzZs3J3d7fPy8nJ0erVq1WvXr1SHyQAAEBpKlEAqlWrlv0zgJo3b55vvs1m08svv1xqgwMAACgLJQpA69evlzFGt9xyi5YtW6a6deva57m6uiooKEgXL14s9UECAACUphIFoMjISPu/w8PD7Z8JlOfUqVMKDAws8JviAQAAKoureghakqpVy5+dMjMzHZ4LAgAAqIxKdAUoNjZW0l/P+owePVqenp72eTk5Odq6davat29fqgMEAAAobSUKQD/++KMkyRijXbt2ydXV1T7P1dVV7dq10zPPPFO6IwQAAChlJX4IWpJiYmL0zjvvyMvLq0wGBQAAUJZKFIDyzJ07t7THAQAAUG6u+iFoAACAqooABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALIcABAAALKdSBKDp06crODhY7u7uCgsL07Zt2wpddt68ebLZbA6Tu7u7wzKXz8+bJk+eXNalAACAKqDCA9DixYsVGxursWPHaseOHWrXrp169eqlEydOFLqOl5eXjh8/bp+SkpIc5l867/jx45ozZ45sNpv69etX1uUAAIAqoMID0FtvvaXBgwcrJiZGrVu31owZM+Tp6ak5c+YUuo7NZpO/v7998vPzc5h/6Tx/f38tX75cN998s5o0aVLW5QAAgCqgQgNQdna2tm/frqioKHubk5OToqKitGXLlkLXy8zMVFBQkAIDA9W3b1/98ssvhS6bkpKiVatW6ZFHHil0maysLGVkZDhMAADg2lWhASg1NVU5OTn5ruD4+fkpOTm5wHVatGihOXPmaPny5Vq4cKFyc3MVERGho0ePFrj8/PnzVbNmTd19992FjmPChAny9va2T4GBgVdfFAAAqPQq/BZYSYWHhys6Olrt27dXZGSkPvvsM/n6+mrmzJkFLj9nzhw98MAD+R6UvlRcXJzS09Pt05EjR8pq+AAAoBKoVpEb9/HxkbOzs1JSUhzaU1JS5O/vX6w+XFxc1KFDB+3bty/fvK+//lp79+7V4sWLi+zDzc1Nbm5uxR84AACo0ir0CpCrq6tCQ0MVHx9vb8vNzVV8fLzCw8OL1UdOTo527dqlgICAfPM++OADhYaGql27dqU2ZgAAUPVV6BUgSYqNjdXAgQPVsWNHde7cWVOnTtW5c+cUExMjSYqOjlaDBg00YcIESdL48ePVpUsXhYSEKC0tTZMnT1ZSUpIGDRrk0G9GRoaWLl2qKVOmlHtNAACgcqvwANS/f3+dPHlSY8aMUXJystq3b681a9bYH4w+fPiwnJz+d6HqzJkzGjx4sJKTk1W7dm2FhoZq8+bNat26tUO/n3zyiYwxGjBgQLnWAwAAKj+bMcZU9CAqm4yMDHl7eys9PV1eXl4VPRwAAFAMJfn7XeXeBQYAAPB3EYAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlEIAAAIDlVIoANH36dAUHB8vd3V1hYWHatm1bocvOmzdPNpvNYXJ3d8+33J49e3THHXfI29tb1atXV6dOnXT48OGyLAMAAFQRFR6AFi9erNjYWI0dO1Y7duxQu3bt1KtXL504caLQdby8vHT8+HH7lJSU5DB///79uvHGG9WyZUtt2LBBP//8s0aPHl1gUAIAANZjM8aYihxAWFiYOnXqpHfffVeSlJubq8DAQD355JN6/vnn8y0/b948jRw5UmlpaYX2ed9998nFxUULFiy4qjFlZGTI29tb6enp8vLyuqo+AABA+SrJ3+8KvQKUnZ2t7du3Kyoqyt7m5OSkqKgobdmypdD1MjMzFRQUpMDAQPXt21e//PKLfV5ubq5WrVql5s2bq1evXqpXr57CwsL0f//3f4X2l5WVpYyMDIcJAABcuyo0AKWmpionJ0d+fn4O7X5+fkpOTi5wnRYtWmjOnDlavny5Fi5cqNzcXEVEROjo0aOSpBMnTigzM1MTJ05U79699d///ld33XWX7r77bm3cuLHAPidMmCBvb2/7FBgYWLqFAgCASqVaRQ+gpMLDwxUeHm5/HRERoVatWmnmzJl65ZVXlJubK0nq27evnnrqKUlS+/bttXnzZs2YMUORkZH5+oyLi1NsbKz9dUZGBiEIAIBrWIUGIB8fHzk7OyslJcWhPSUlRf7+/sXqw8XFRR06dNC+ffvsfVarVk2tW7d2WK5Vq1b65ptvCuzDzc1Nbm5uV1EBAACoiir0Fpirq6tCQ0MVHx9vb8vNzVV8fLzDVZ6i5OTkaNeuXQoICLD32alTJ+3du9dhuYSEBAUFBZXe4AEAQJVV4bfAYmNjNXDgQHXs2FGdO3fW1KlTde7cOcXExEiSoqOj1aBBA02YMEGSNH78eHXp0kUhISFKS0vT5MmTlZSUpEGDBtn7HDVqlPr3769u3brp5ptv1po1a7Ry5Upt2LChIkoEAACVTIUHoP79++vkyZMaM2aMkpOT1b59e61Zs8b+YPThw4fl5PS/C1VnzpzR4MGDlZycrNq1ays0NFSbN292uOV11113acaMGZowYYKGDx+uFi1a6NNPP9WNN95Y7vUBAIDKp8I/B6gy4nOAAACoeqrM5wABAABUBAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwnGoVPQCrOXAyU0mnzyu4bnU19qle0cMBAMCSCEDlJO18toYv2qlNiSftbd2a+WragA7y9nSpwJEBAGA93AIrJ8MX7dS3+1Id2r7dl6onF/1YQSMCAMC6CEDl4MDJTG1KPKkcYxzac4zRpsSTOph6roJGBgCANRGAykHS6fNFzj90igAEAEB5IgCVg6A6nkXOD67Lw9AAAJQnAlA5aOJbQ92a+crZZnNod7bZ1K2ZL+8GAwCgnBGAysm0AR3UNcTHoa1riI+mDehQQSMCAMC6eBt8OfH2dNGHj3TWwdRzOnTqHJ8DBABABSIAlbPGPgQfAAAqGrfAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5RCAAACA5fBVGAUwxkiSMjIyKngkAACguPL+buf9HS8KAagAZ8+elSQFBgZW8EgAAEBJnT17Vt7e3kUuYzPFiUkWk5ubq99//101a9aUzWYr1b4zMjIUGBioI0eOyMvLq1T7rmyo9dplpXqp9dplpXqtUqsxRmfPnlX9+vXl5FT0Uz5cASqAk5OTGjZsWKbb8PLyuqZ/CC9FrdcuK9VLrdcuK9VrhVqvdOUnDw9BAwAAyyEAAQAAyyEAlTM3NzeNHTtWbm5uFT2UMket1y4r1Uut1y4r1WulWouLh6ABAIDlcAUIAABYDgEIAABYDgEIAABYDgEIAABYDgGoBI4dO6YHH3xQdevWlYeHh66//nr98MMP9vnjxo1Ty5YtVb16ddWuXVtRUVHaunWrQx/BwcGy2WwO08SJE4vc7h9//KGhQ4eqbt26qlGjhvr166eUlJQyqTHP3611w4YN+erMm77//vtCt9u9e/d8yz/22GNlWqt05Xov9dhjj8lms2nq1KkO7adPn9YDDzwgLy8v1apVS4888ogyMzOL3G5lPLaXKqjWQ4cO6ZFHHlHjxo3l4eGhpk2bauzYscrOzi5yuxVxbEvjuFaVc1b6+/VWpfP2SrU+/PDD+cbUu3dvhz6ulXP2SrVWpXO2PPFJ0MV05swZde3aVTfffLO++OIL+fr6KjExUbVr17Yv07x5c7377rtq0qSJLly4oLfffls9e/bUvn375Ovra19u/PjxGjx4sP11zZo1i9z2U089pVWrVmnp0qXy9vbWsGHDdPfdd+vbb78t/UJVOrVGRETo+PHjDv2OHj1a8fHx6tixY5HbHzx4sMaPH29/7enpWboFXqY49eb5/PPP9d1336l+/fr55j3wwAM6fvy4vvzyS/3555+KiYnRo48+qo8//rjQbVfGY5unsFp/++035ebmaubMmQoJCdHu3bs1ePBgnTt3Tm+++WaR2y/PY1tax1Wq/OesVDr1VpXztri19u7dW3PnzrW/vvwt4NfSOVtUrVXlnC13BsXy3HPPmRtvvLFE66SnpxtJZt26dfa2oKAg8/bbbxe7j7S0NOPi4mKWLl1qb9uzZ4+RZLZs2VKi8RRXadV6qezsbOPr62vGjx9fZD+RkZFmxIgRJdr231Xceo8ePWoaNGhgdu/ene84/vrrr0aS+f777+1tX3zxhbHZbObYsWMF9leZj21RtRbkjTfeMI0bNy5ymfI+tqVVa1U4Z40pm2NbWc/b4tQ6cOBA07dv30LnX0vn7JVqLUhlPGfLG7fAimnFihXq2LGj7rnnHtWrV08dOnTQ7NmzC10+Oztbs2bNkre3t9q1a+cwb+LEiapbt646dOigyZMn6+LFi4X2s337dv3555+Kioqyt7Vs2VKNGjXSli1b/n5hBSjNWi/t89SpU4qJibni9j/66CP5+PjouuuuU1xcnM6fP3/VtRRHcerNzc3VQw89pFGjRqlNmzb5+tiyZYtq1arl8L/kqKgoOTk55bsNmqeyHtsr1VqQ9PR01alT54rLleexLc1aK/s5K5XNsa2s521xf0dt2LBB9erVU4sWLfT444/r1KlT9nnX0jkrFV1rQSrjOVvuKjqBVRVubm7Gzc3NxMXFmR07dpiZM2cad3d3M2/ePIflVq5caapXr25sNpupX7++2bZtm8P8KVOmmPXr15uffvrJvPfee6ZWrVrmqaeeKnS7H330kXF1dc3X3qlTJ/Pss8+WTnGXKa1aL9WnTx/Tp0+fK2575syZZs2aNebnn382CxcuNA0aNDB33XXX366pKMWp9/XXXzc9evQwubm5xpj8VwVee+0107x583x9+/r6mv/85z8FbreyHtsr1Xq5xMRE4+XlZWbNmlXktsv72JZWrVXhnDWmbI5tZT1vi1ProkWLzPLly83PP/9sPv/8c9OqVSvTqVMnc/HiRWPMtXXOXqnWy1XWc7a8EYCKycXFxYSHhzu0Pfnkk6ZLly4ObZmZmSYxMdFs2bLF/Pvf/zbBwcEmJSWl0H4/+OADU61aNfPHH38UOL8iTrjSrvXIkSPGycnJLFu2rMRjiY+PN5LMvn37SrxucV2p3h9++MH4+fk5XBavqgGoNGq91NGjR03Tpk3NI488UuKxlPWxLe1a81TGc9aY0q+3Mp+3xf0ddan9+/c73Ka/Vs7Zglxe66Uq8zlb3rgFVkwBAQFq3bq1Q1urVq10+PBhh7bq1asrJCREXbp00QcffKBq1arpgw8+KLTfsLAwXbx4UYcOHSpwvr+/v7Kzs5WWlubQnpKSIn9//6uq5UpKu9a5c+eqbt26uuOOO0o8lrCwMEnSvn37SrxucV2p3q+//lonTpxQo0aNVK1aNVWrVk1JSUl6+umnFRwcLOmv43TixAmHPi5evKjTp08Xepwq47EtTq15fv/9d918882KiIjQrFmzSjyWsj62pVnr5eOubOesVPr1Vubztri/oy7VpEkT+fj42Md0rZyzBbm81jyV/ZwtbwSgYuratav27t3r0JaQkKCgoKAi18vNzVVWVlah83fu3CknJyfVq1evwPmhoaFycXFRfHy8vW3v3r06fPiwwsPDS1BB8ZVmrcYYzZ07V9HR0XJxcSnxWHbu3Cnpr18CZeVK9T700EP6+eeftXPnTvtUv359jRo1SmvXrpUkhYeHKy0tTdu3b7f38dVXXyk3N9f+S+NylfHYFqdW6a+35Xbv3l2hoaGaO3eunJxK/qukrI9tadVa0Lgr2zkrlW69lf28vZrfUUePHtWpU6fsY7pWztmCXF6rVDXO2XJX0Zegqopt27aZatWqmddee80kJiaajz76yHh6epqFCxcaY/66HRQXF2e2bNliDh06ZH744QcTExNj3NzczO7du40xxmzevNm8/fbbZufOnWb//v1m4cKFxtfX10RHR9u3c/ToUdOiRQuzdetWe9tjjz1mGjVqZL766ivzww8/mPDw8HyXRCtbrXnWrVtnJJk9e/bk287lte7bt8+MHz/e/PDDD+bgwYNm+fLlpkmTJqZbt25lVmtx6i1IQbcOevfubTp06GC2bt1qvvnmG9OsWTMzYMAA+/yqcGwLcnmtR48eNSEhIebWW281R48eNcePH7dPly5T0ce2NGqtKuesMaX3c2xM5T9vr1Tr2bNnzTPPPGO2bNliDh48aNatW2duuOEG06xZM4dbl9fCOVucWqvKOVveCEAlsHLlSnPdddcZNzc307JlS4cHyC5cuGDuuusuU79+fePq6moCAgLMHXfc4fBg8Pbt201YWJjx9vY27u7uplWrVub11193OCEPHjxoJJn169c79P3EE0+Y2rVrG09PT3PXXXc5/OBWxlrzDBgwwERERBS4jctrPXz4sOnWrZupU6eOcXNzMyEhIWbUqFEmPT29TGq8VFH1FqSgPxynTp0yAwYMMDVq1DBeXl4mJibGnD171j6/Khzbglxe69y5c42kAqc8leXY/t1aq9I5a0zp/BwbUzXO26JqPX/+vOnZs6fx9fU1Li4uJigoyAwePNgkJyc79HEtnLPFqbUqnbPlyWaMMeV5xQkAAKCi8QwQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQAACwHAIQgL+te/fuGjlyZLlv9+GHH9add95Z7tstT8HBwZo6dWpFDwO45hCAAFRZ77zzjubNm1fu2503b55q1apVonUIMkDlUq2iBwAABcnJyZHNZivySxu9vb3LcUQAriVcAQKuId27d9fw4cP17LPPqk6dOvL399e4ceMkSYcOHZLNZrN/o7MkpaWlyWazacOGDZKkDRs2yGazae3aterQoYM8PDx0yy236MSJE/riiy/UqlUreXl56f7779f58+cdtn3x4kUNGzZM3t7e8vHx0ejRo3XpN+1kZWXpmWeeUYMGDVS9enWFhYXZtyv976rKihUr1Lp1a7m5uenw4cNF1nv5LbCi6s9js9n03nvvqU+fPvLw8FCTJk20bNky+/y8fZCWlmZv27lzp2w2mw4dOqQNGzYoJiZG6enpstlsstls+bZxue7duyspKUlPPfWUfZ08n376qdq0aSM3NzcFBwdrypQpRfb1/vvvq1atWvZvJN+9e7f69OmjGjVqyM/PTw899JBSU1OLvU+MMRo3bpwaNWokNzc31a9fX8OHDy9yDMA1oWK/igxAaYqMjDReXl5m3LhxJiEhwcyfP9/YbDbz3//+1/5lhz/++KN9+TNnzjh8AeL69euNJNOlSxfzzTffmB07dpiQkBATGRlpevbsaXbs2GE2bdpk6tatayZOnOiw3Ro1apgRI0aY3377zSxcuNB4eno6fGnjoEGDTEREhNm0aZPZt2+fmTx5snFzczMJCQnGmL++sNHFxcVERESYb7/91vz222/m3LlzRdY7cOBA07dv32LVn0eSqVu3rpk9e7bZu3eveemll4yzs7P59ddfHfbBmTNn7Ov8+OOPRpI5ePCgycrKMlOnTjVeXl72b9S+9As0C3Lq1CnTsGFDM378eIdv4f7hhx+Mk5OTGT9+vNm7d6+ZO3eu8fDwMHPnzrWve+kXlk6aNMnUrVvX/o3dZ86cMb6+viYuLs7s2bPH7Nixw/To0cPcfPPNxd4nS5cuNV5eXmb16tUmKSnJbN269YpfogpcCwhAwDUkMjLS3HjjjQ5tnTp1Ms8991yJAtC6devsy0yYMMFIMvv377e3DRkyxPTq1cthu61atTK5ubn2tueee860atXKGGNMUlKScXZ2NseOHXMY26233mri4uKMMf/7xuqdO3cWu96CAlBh9eeRZB577DGHZcLCwszjjz/usA8KC0B5Y/X29i72OI0p+JvX77//ftOjRw+HtlGjRpnWrVvnW+/ZZ581AQEBZvfu3fZ5r7zyiunZs6fD+keOHDGSzN69e40xV94nU6ZMMc2bNzfZ2dklqgeo6rgFBlxj2rZt6/A6ICBAJ06cuOo+/Pz85OnpqSZNmji0Xd5nly5dHG7thIeHKzExUTk5Odq1a5dycnLUvHlz1ahRwz5t3LhR+/fvt6/j6uqab/wlVZz6w8PD873es2fP39ru1dizZ4+6du3q0Na1a1f7fsszZcoUzZ49W998843atGljb//pp5+0fv16h33asmVLSXLYr0Xtk3vuuUcXLlxQkyZNNHjwYH3++ee6ePFiqdcKVDY8BA1cY1xcXBxe22w25ebm2h8mNpc8l/Pnn39esQ+bzVZon8WVmZkpZ2dnbd++Xc7Ozg7zatSoYf+3h4eHQ4i6Gn93rCXZT+Xlpptu0qpVq7RkyRI9//zz9vbMzEzdfvvtmjRpUr51AgIC7P8uap8EBgZq7969Wrdunb788ks98cQTmjx5sjZu3JhvPeBaQgACLMLX11eSdPz4cXXo0EGSHB6I/ru2bt3q8Pq7775Ts2bN5OzsrA4dOignJ0cnTpzQTTfdVGrbvFrfffedoqOjHV7n7ZNL91Pt2rUl5d9Prq6uDldoiqOgdVq1aqVvv/3Woe3bb79V8+bNHYJi586dNWzYMPXu3VvVqlXTM888I0m64YYb9Omnnyo4OFjVql39r3MPDw/dfvvtuv322zV06FC1bNlSu3bt0g033HDVfQKVHbfAAIvw8PBQly5dNHHiRO3Zs0cbN27USy+9VGr9Hz58WLGxsdq7d68WLVqkadOmacSIEZKk5s2b64EHHlB0dLQ+++wzHTx4UNu2bdOECRO0atWqUhtDcS1dulRz5sxRQkKCxo4dq23btmnYsGGSpJCQEAUGBmrcuHFKTEzUqlWr8r0zKzg4WJmZmYqPj1dqamq+d8QVJDg4WJs2bdKxY8fs79J6+umnFR8fr1deeUUJCQmaP3++3n33XXvAuVRERIRWr16tl19+2f55QkOHDtXp06c1YMAAff/999q/f7/Wrl2rmJiYYge0efPm6YMPPtDu3bt14MABLVy4UB4eHgoKCirW+kBVRQACLGTOnDm6ePGiQkNDNXLkSL366qul1nd0dLQuXLigzp07a+jQoRoxYoQeffRR+/y5c+cqOjpaTz/9tFq0aKE777xT33//vRo1alRqYyiul19+WZ988onatm2rDz/8UIsWLVLr1q0l/XW7aNGiRfrtt9/Utm1bTZo0Kd9+ioiI0GOPPab+/fvL19dXb7zxxhW3OX78eB06dEhNmza1X2W64YYbtGTJEn3yySe67rrrNGbMGI0fP14PP/xwgX3ceOONWrVqlV566SVNmzZN9evX17fffqucnBz17NlT119/vUaOHKlatWoV+flJl6pVq5Zmz56trl27qm3btlq3bp1WrlypunXrFmt9oKqymUtvdAPANc5ms+nzzz+/5r9CA0DRuAIEAAAshwAEoNK69O3dl09ff/11RQ/P7uuvvy5yrAAqH26BAai09u3bV+i8Bg0ayMPDoxxHU7gLFy7o2LFjhc4PCQkpx9EAKA4CEAAAsBxugQEAAMshAAEAAMshAAEAAMshAAEAAMshAAEAAMshAAEAAMshAAEAAMshAAEAAMv5//KcgIuL/M9kAAAAAElFTkSuQmCC",
|
243 |
-
"text/plain": [
|
244 |
-
"<Figure size 640x480 with 1 Axes>"
|
245 |
-
]
|
246 |
-
},
|
247 |
-
"metadata": {},
|
248 |
-
"output_type": "display_data"
|
249 |
-
}
|
250 |
-
],
|
251 |
-
"source": [
|
252 |
-
"final_df = pd.DataFrame()\n",
|
253 |
-
"final_df[\"number_input_tokens\"] = valid_df[\"number_input_tokens\"]\n",
|
254 |
-
"final_df[\"number_output_tokens\"] = valid_df[\"number_output_tokens\"]\n",
|
255 |
-
"final_df[\"ttft_s\"] = valid_df[\"ttft_s\"]\n",
|
256 |
-
"final_df[\"end_to_end_latency_s\"] = valid_df[\"end_to_end_latency_s\"]\n",
|
257 |
-
"final_df[\"generation_throughput\"] = valid_df[\"request_output_throughput_token_per_s\"]\n",
|
258 |
-
"\n",
|
259 |
-
"mean_tokens_in = final_df[\"number_input_tokens\"].mean()\n",
|
260 |
-
"mean_tokens_out = valid_df[\"number_output_tokens\"].mean()\n",
|
261 |
-
"print(f\"Mean number of input tokens: {mean_tokens_in}. Mean number of output tokens: {mean_tokens_out}\")\n",
|
262 |
-
"final_df.plot.scatter(x=\"number_input_tokens\", y=\"ttft_s\", title=\"Number of Input Tokens vs. TTFT\")"
|
263 |
-
]
|
264 |
-
},
|
265 |
-
{
|
266 |
-
"cell_type": "code",
|
267 |
-
"execution_count": 15,
|
268 |
-
"id": "a14de79c",
|
269 |
-
"metadata": {},
|
270 |
-
"outputs": [
|
271 |
-
{
|
272 |
-
"data": {
|
273 |
-
"text/plain": [
|
274 |
-
"<Axes: title={'center': 'Token Latencies'}, ylabel='Frequency'>"
|
275 |
-
]
|
276 |
-
},
|
277 |
-
"execution_count": 15,
|
278 |
-
"metadata": {},
|
279 |
-
"output_type": "execute_result"
|
280 |
-
},
|
281 |
-
{
|
282 |
-
"data": {
|
283 |
-
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGzCAYAAADT4Tb9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/SrBM8AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAvAklEQVR4nO3de1TUVb/H8c+AAl7wUioocsRILTO19EiEVhaJ6aHMfLyVIpkdE08m6VNmiaaJWZKdMk3zkqdj+pSXWqWWktbpaI8nL1k9XlIjvIGQFxQTEPb5w+U8TWDCODCwfb/WmrWaPXv/5rt31HzW77d/Mw5jjBEAAIAlfLxdAAAAgCcRbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuABTjcDg0cuRIb5dxVXA4HJo4caK3ywCsQrgBLOFwOEr12Lhxo7dLLZO77rpLbdq08cixNm3apIkTJ+rkyZMeOR6AyqmatwsA4Bn/9V//5fJ88eLFWrduXbH2G2+8sSLLqlQ2bdqkSZMmaciQIapXr563y5Ek/fbbb6pWjf8VA57Ef1GAJR555BGX5998843WrVtXrB2VS0BAgLdLAKzDZSngKpKbm6unn35aoaGh8vf3V6tWrfTqq6/KGHPZsVOmTJGPj4/eeOMNZ9uaNWvUpUsX1apVS4GBgerZs6d+/PFHl3FDhgxR7dq1dfjwYfXq1Uu1a9dWw4YNNWbMGBUWFnpkXjt37tSQIUN03XXXKSAgQMHBwXr00Uf166+/OvtMnDhRY8eOlSQ1b97ceZkuLS3N2ee9995Thw4dVKNGDV1zzTXq37+/Dh486PJeFy+T/eMf/1DXrl1Vs2ZNhYSEaPr06cXqOnfunCZOnKiWLVsqICBAjRs3Vu/evbV//35nn5L23Bw+fFiPPvqogoKC5O/vr5tuukkLFiwodvw33nhDN910k2rWrKn69eurY8eOWrJkiTtLCFiFMzfAVcIYo/vvv18bNmzQ0KFD1b59e3322WcaO3asDh8+rNdee+2SY59//nlNnTpVb7/9toYNGybpwmWwuLg4xcTE6OWXX9bZs2c1e/Zsde7cWdu3b1dYWJhzfGFhoWJiYhQREaFXX31V69ev14wZMxQeHq4nnnjiiue2bt06HThwQPHx8QoODtaPP/6ouXPn6scff9Q333wjh8Oh3r17a+/evXr//ff12muvqUGDBpKkhg0bSpJeeuklvfDCC+rbt68ee+wxZWVl6Y033tAdd9yh7du3u1zGOnHihLp3767evXurb9+++vDDD/XMM8/o5ptv1n333eec87/9278pNTVV/fv316hRo3T69GmtW7dOP/zwg8LDw0ucS2Zmpm677Tbnpu6GDRtqzZo1Gjp0qHJycvTUU09JkubNm6cnn3xSffr00ahRo3Tu3Dnt3LlTf//73zVw4MArXlOgSjMArJSQkGB+/5/4qlWrjCQzZcoUl359+vQxDofD7Nu3z9kmySQkJBhjjHn66aeNj4+PWbRokfP106dPm3r16plhw4a5HCsjI8PUrVvXpT0uLs5IMi+++KJL31tuucV06NDhsvO48847zU033fSnfc6ePVus7f333zeSzFdffeVse+WVV4wk8/PPP7v0TUtLM76+vuall15yaf/+++9NtWrVXNrvvPNOI8ksXrzY2ZaXl2eCg4PNQw895GxbsGCBkWRSUlKK1VZUVOT8Z0kmKSnJ+Xzo0KGmcePGJjs722VM//79Td26dZ1zfeCBBy67LsDVistSwFVi9erV8vX11ZNPPunS/vTTT8sYozVr1ri0G2M0cuRIvf7663rvvfcUFxfnfG3dunU6efKkBgwYoOzsbOfD19dXERER2rBhQ7H3Hz58uMvzLl266MCBAx6ZW40aNZz/fO7cOWVnZ+u2226TJG3btu2y41esWKGioiL17dvXZT7BwcFq0aJFsfnUrl3bZS+Tn5+fOnXq5DKf5cuXq0GDBvqP//iPYu/ncDhKrMMYo+XLlys2NlbGGJdaYmJidOrUKed86tWrp0OHDun//u//Ljs/4GrDZSngKvHLL7+oSZMmCgwMdGm/ePfUL7/84tK+ePFinTlzRrNnz9aAAQNcXvvpp58kSXfffXeJ71WnTh2X5wEBAc7LPxfVr19fJ06cKPtESnD8+HFNmjRJS5cu1bFjx1xeO3Xq1GXH//TTTzLGqEWLFiW+Xr16dZfnTZs2LRZQ6tevr507dzqf79+/X61atSrTnVBZWVk6efKk5s6dq7lz55bY5+L8nnnmGa1fv16dOnXS9ddfr27dumngwIGKiooq9fsBtiLcAChRVFSUduzYoTfffFN9+/bVNddc43ytqKhI0oV9N8HBwcXG/vED3dfXt1xr7du3rzZt2qSxY8eqffv2ql27toqKitS9e3dnrX+mqKhIDodDa9asKbHW2rVruzy/1HxMKTZmX64O6cKdb78/U/Z7bdu2lXQhlO7Zs0effPKJ1q5dq+XLl+utt97ShAkTNGnSpCuqA6jqCDfAVaJZs2Zav369Tp8+7XL2Zvfu3c7Xf+/666/X9OnTddddd6l79+5KTU11jru4GbZRo0aKjo6uoBmU7MSJE0pNTdWkSZM0YcIEZ/vFs0u/d6nLQeHh4TLGqHnz5mrZsqVH6goPD9ff//53FRQUFDvzcykNGzZUYGCgCgsLS7WutWrVUr9+/dSvXz/l5+erd+/eeumllzRu3DhuMcdVjT03wFWiR48eKiws1JtvvunS/tprr8nhcDjv8vm9tm3bavXq1dq1a5diY2P122+/SZJiYmJUp04dTZ06VQUFBcXGZWVllc8kSnDxLMofz5rMnDmzWN9atWpJUrFvKO7du7d8fX01adKkYscxxrjcUl5aDz30kLKzs4utd0m1XuTr66uHHnpIy5cv1w8//FDs9d+v6x9r8vPzU+vWrWWMKfHfCXA14cwNcJWIjY1V165dNX78eKWlpaldu3b6/PPP9dFHH+mpp5665K3Jt912mz766CP16NFDffr00apVq1SnTh3Nnj1bgwYN0q233qr+/furYcOGSk9P16effqqoqKgSP9TdlZWVpSlTphRrb968uR5++GHdcccdmj59ugoKChQSEqLPP/9cP//8c7H+HTp0kCSNHz9e/fv3V/Xq1RUbG6vw8HBNmTJF48aNU1pamnr16qXAwED9/PPPWrlypR5//HGNGTOmTDUPHjxYixcvVmJiorZs2aIuXbooNzdX69ev14gRI/TAAw+UOG7atGnasGGDIiIiNGzYMLVu3VrHjx/Xtm3btH79eh0/flyS1K1bNwUHBysqKkpBQUHatWuX3nzzTfXs2bPYvirgquOdm7QAlLc/3gpuzIVbuEePHm2aNGliqlevblq0aGFeeeUVl1uTjXG9Ffyijz76yFSrVs3069fPFBYWGmOM2bBhg4mJiTF169Y1AQEBJjw83AwZMsR8++23znFxcXGmVq1axepLSkoqVl9JLt56XdLjnnvuMcYYc+jQIfPggw+aevXqmbp165q//OUv5siRI8VuszbGmMmTJ5uQkBDj4+NT7Lbw5cuXm86dO5tatWqZWrVqmRtuuMEkJCSYPXv2uNRT0i3YcXFxplmzZi5tZ8+eNePHjzfNmzc31atXN8HBwaZPnz5m//79zj4l1ZiZmWkSEhJMaGioc9w999xj5s6d6+zz9ttvmzvuuMNce+21xt/f34SHh5uxY8eaU6dOXXZNAds5jLnCHXAAAACVCHtuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsctV9iV9RUZGOHDmiwMDAS34VOwAAqFyMMTp9+rSaNGkiH58/Pzdz1YWbI0eOKDQ01NtlAAAANxw8eFBNmzb90z5XXbi5+LXkBw8eVJ06dbxcDQAAKI2cnByFhoaW6udFrrpwc/FSVJ06dQg3AABUMaXZUsKGYgAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwilfDzVdffaXY2Fg1adJEDodDq1atuuyYjRs36tZbb5W/v7+uv/56LVq0qNzrBAAAVYdXw01ubq7atWunWbNmlar/zz//rJ49e6pr167asWOHnnrqKT322GP67LPPyrlSAABQVXj1hzPvu+8+3XfffaXuP2fOHDVv3lwzZsyQJN144436+uuv9dprrykmJqa8ygQAAFVIldpzs3nzZkVHR7u0xcTEaPPmzZcck5eXp5ycHJcHAACwl1fP3JRVRkaGgoKCXNqCgoKUk5Oj3377TTVq1Cg2Jjk5WZMmTaqoEhX27KcV9l6ekjatp7dLAABcAp8rZVelzty4Y9y4cTp16pTzcfDgQW+XBAAAylGVOnMTHByszMxMl7bMzEzVqVOnxLM2kuTv7y9/f/+KKA8AAFQCVerMTWRkpFJTU13a1q1bp8jISC9VBAAAKhuvhpszZ85ox44d2rFjh6QLt3rv2LFD6enpki5cUho8eLCz//Dhw3XgwAH99a9/1e7du/XWW2/pb3/7m0aPHu2N8gEAQCXk1XDz7bff6pZbbtEtt9wiSUpMTNQtt9yiCRMmSJKOHj3qDDqS1Lx5c3366adat26d2rVrpxkzZuidd97hNnAAAODk1T03d911l4wxl3y9pG8fvuuuu7R9+/ZyrAoAAFRlVWrPDQAAwOUQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFa+Hm1mzZiksLEwBAQGKiIjQli1b/rT/zJkz1apVK9WoUUOhoaEaPXq0zp07V0HVAgCAys6r4WbZsmVKTExUUlKStm3bpnbt2ikmJkbHjh0rsf+SJUv07LPPKikpSbt27dL8+fO1bNkyPffccxVcOQAAqKy8Gm5SUlI0bNgwxcfHq3Xr1pozZ45q1qypBQsWlNh/06ZNioqK0sCBAxUWFqZu3bppwIABlz3bAwAArh5eCzf5+fnaunWroqOj/1mMj4+io6O1efPmEsfcfvvt2rp1qzPMHDhwQKtXr1aPHj0u+T55eXnKyclxeQAAAHtV89YbZ2dnq7CwUEFBQS7tQUFB2r17d4ljBg4cqOzsbHXu3FnGGJ0/f17Dhw//08tSycnJmjRpkkdrBwAAlZfXNxSXxcaNGzV16lS99dZb2rZtm1asWKFPP/1UkydPvuSYcePG6dSpU87HwYMHK7BiAABQ0bx25qZBgwby9fVVZmamS3tmZqaCg4NLHPPCCy9o0KBBeuyxxyRJN998s3Jzc/X4449r/Pjx8vEpntX8/f3l7+/v+QkAAIBKyWtnbvz8/NShQwelpqY624qKipSamqrIyMgSx5w9e7ZYgPH19ZUkGWPKr1gAAFBleO3MjSQlJiYqLi5OHTt2VKdOnTRz5kzl5uYqPj5ekjR48GCFhIQoOTlZkhQbG6uUlBTdcsstioiI0L59+/TCCy8oNjbWGXIAAMDVzavhpl+/fsrKytKECROUkZGh9u3ba+3atc5Nxunp6S5nap5//nk5HA49//zzOnz4sBo2bKjY2Fi99NJL3poCAACoZBzmKruek5OTo7p16+rUqVOqU6eOx48f9uynHj9meUub1tPbJQAALoHPlQvK8vldpe6WAgAAuBzCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFbfCzYEDBzxdBwAAgEe4FW6uv/56de3aVe+9957OnTvn6ZoAAADc5la42bZtm9q2bavExEQFBwfr3//937Vlyxa3Cpg1a5bCwsIUEBCgiIiIyx7n5MmTSkhIUOPGjeXv76+WLVtq9erVbr03AACwj1vhpn379nr99dd15MgRLViwQEePHlXnzp3Vpk0bpaSkKCsrq1THWbZsmRITE5WUlKRt27apXbt2iomJ0bFjx0rsn5+fr3vvvVdpaWn68MMPtWfPHs2bN08hISHuTAMAAFjoijYUV6tWTb1799YHH3ygl19+Wfv27dOYMWMUGhqqwYMH6+jRo386PiUlRcOGDVN8fLxat26tOXPmqGbNmlqwYEGJ/RcsWKDjx49r1apVioqKUlhYmO688061a9fuSqYBAAAsckXh5ttvv9WIESPUuHFjpaSkaMyYMdq/f7/WrVunI0eO6IEHHrjk2Pz8fG3dulXR0dH/LMbHR9HR0dq8eXOJYz7++GNFRkYqISFBQUFBatOmjaZOnarCwsJLvk9eXp5ycnJcHgAAwF7V3BmUkpKihQsXas+ePerRo4cWL16sHj16yMfnQlZq3ry5Fi1apLCwsEseIzs7W4WFhQoKCnJpDwoK0u7du0scc+DAAX3xxRd6+OGHtXr1au3bt08jRoxQQUGBkpKSShyTnJysSZMmuTNNAABQBbkVbmbPnq1HH31UQ4YMUePGjUvs06hRI82fP/+KivujoqIiNWrUSHPnzpWvr686dOigw4cP65VXXrlkuBk3bpwSExOdz3NychQaGurRugAAQOXhVrj56aefLtvHz89PcXFxl3y9QYMG8vX1VWZmpkt7ZmamgoODSxzTuHFjVa9eXb6+vs62G2+8URkZGcrPz5efn1+xMf7+/vL3979svQAAwA5u7blZuHChPvjgg2LtH3zwgd59991SHcPPz08dOnRQamqqs62oqEipqamKjIwscUxUVJT27dunoqIiZ9vevXvVuHHjEoMNAAC4+rgVbpKTk9WgQYNi7Y0aNdLUqVNLfZzExETNmzdP7777rnbt2qUnnnhCubm5io+PlyQNHjxY48aNc/Z/4okndPz4cY0aNUp79+7Vp59+qqlTpyohIcGdaQAAAAu5dVkqPT1dzZs3L9berFkzpaenl/o4/fr1U1ZWliZMmKCMjAy1b99ea9eudW4yTk9Pd25SlqTQ0FB99tlnGj16tNq2bauQkBCNGjVKzzzzjDvTAAAAFnIr3DRq1Eg7d+4sdjfUd999p2uvvbZMxxo5cqRGjhxZ4msbN24s1hYZGalvvvmmTO8BAACuHm5dlhowYICefPJJbdiwQYWFhSosLNQXX3yhUaNGqX///p6uEQAAoNTcOnMzefJkpaWl6Z577lG1ahcOUVRUpMGDB5dpzw0AAICnuRVu/Pz8tGzZMk2ePFnfffedatSooZtvvlnNmjXzdH0AAABl4la4uahly5Zq2bKlp2oBAAC4Ym6Fm8LCQi1atEipqak6duyYy/fOSNIXX3zhkeIAAADKyq1wM2rUKC1atEg9e/ZUmzZt5HA4PF0XAACAW9wKN0uXLtXf/vY39ejRw9P1AAAAXBG3bgX38/PT9ddf7+laAAAArphb4ebpp5/W66+/LmOMp+sBAAC4Im5dlvr666+1YcMGrVmzRjfddJOqV6/u8vqKFSs8UhwAAEBZuRVu6tWrpwcffNDTtQAAAFwxt8LNwoULPV0HAACAR7i150aSzp8/r/Xr1+vtt9/W6dOnJUlHjhzRmTNnPFYcAABAWbl15uaXX35R9+7dlZ6erry8PN17770KDAzUyy+/rLy8PM2ZM8fTdQIAAJSKW2duRo0apY4dO+rEiROqUaOGs/3BBx9Uamqqx4oDAAAoK7fO3PzP//yPNm3aJD8/P5f2sLAwHT582COFAQAAuMOtMzdFRUUqLCws1n7o0CEFBgZecVEAAADucivcdOvWTTNnznQ+dzgcOnPmjJKSkvhJBgAA4FVuXZaaMWOGYmJi1Lp1a507d04DBw7UTz/9pAYNGuj999/3dI0AAACl5la4adq0qb777jstXbpUO3fu1JkzZzR06FA9/PDDLhuMAQAAKppb4UaSqlWrpkceecSTtQAAAFwxt8LN4sWL//T1wYMHu1UMAADAlXIr3IwaNcrleUFBgc6ePSs/Pz/VrFmTcAMAALzGrbulTpw44fI4c+aM9uzZo86dO7OhGAAAeJXbvy31Ry1atNC0adOKndUBAACoSB4LN9KFTcZHjhzx5CEBAADKxK09Nx9//LHLc2OMjh49qjfffFNRUVEeKQwAAMAdboWbXr16uTx3OBxq2LCh7r77bs2YMcMTdQEAALjFrXBTVFTk6ToAAAA8wqN7bgAAALzNrTM3iYmJpe6bkpLizlsAAAC4xa1ws337dm3fvl0FBQVq1aqVJGnv3r3y9fXVrbfe6uzncDg8UyUAAEApuRVuYmNjFRgYqHfffVf169eXdOGL/eLj49WlSxc9/fTTHi0SAACgtNzaczNjxgwlJyc7g40k1a9fX1OmTOFuKQAA4FVuhZucnBxlZWUVa8/KytLp06evuCgAAAB3uRVuHnzwQcXHx2vFihU6dOiQDh06pOXLl2vo0KHq3bu3p2sEAAAoNbf23MyZM0djxozRwIEDVVBQcOFA1app6NCheuWVVzxaIAAAQFm4FW5q1qypt956S6+88or2798vSQoPD1etWrU8WhwAAEBZXdGX+B09elRHjx5VixYtVKtWLRljPFUXAACAW9wKN7/++qvuuecetWzZUj169NDRo0clSUOHDuU2cAAA4FVuhZvRo0erevXqSk9PV82aNZ3t/fr109q1az1WHAAAQFm5tefm888/12effaamTZu6tLdo0UK//PKLRwoDAABwh1tnbnJzc13O2Fx0/Phx+fv7X3FRAAAA7nIr3HTp0kWLFy92Pnc4HCoqKtL06dPVtWtXjxUHAABQVm5dlpo+fbruueceffvtt8rPz9df//pX/fjjjzp+/Lj+93//19M1AgAAlJpbZ27atGmjvXv3qnPnznrggQeUm5ur3r17a/v27QoPD/d0jQAAAKVW5jM3BQUF6t69u+bMmaPx48eXR00AAABuK/OZm+rVq2vnzp3lUQsAAMAVc+uy1COPPKL58+d7uhYAAIAr5taG4vPnz2vBggVav369OnToUOw3pVJSUjxSHAAAQFmVKdwcOHBAYWFh+uGHH3TrrbdKkvbu3evSx+FweK46AACAMipTuGnRooWOHj2qDRs2SLrwcwv/+Z//qaCgoHIpDgAAoKzKtOfmj7/6vWbNGuXm5nq0IAAAgCvh1obii/4YdgAAALytTOHG4XAU21PDHhsAAFCZlGnPjTFGQ4YMcf445rlz5zR8+PBid0utWLHCcxUCAACUQZnCTVxcnMvzRx55xKPFAAAAXKkyhZuFCxeWVx0AAAAecUUbigEAACobwg0AALBKpQg3s2bNUlhYmAICAhQREaEtW7aUatzSpUvlcDjUq1ev8i0QAABUGV4PN8uWLVNiYqKSkpK0bds2tWvXTjExMTp27NifjktLS9OYMWPUpUuXCqoUAABUBV4PNykpKRo2bJji4+PVunVrzZkzRzVr1tSCBQsuOaawsFAPP/ywJk2apOuuu+5Pj5+Xl6ecnByXBwAAsJdXw01+fr62bt2q6OhoZ5uPj4+io6O1efPmS4578cUX1ahRIw0dOvSy75GcnKy6des6H6GhoR6pHQAAVE5eDTfZ2dkqLCws9sObQUFBysjIKHHM119/rfnz52vevHmleo9x48bp1KlTzsfBgwevuG4AAFB5lel7brzt9OnTGjRokObNm6cGDRqUaoy/v7/zG5UBAID9vBpuGjRoIF9fX2VmZrq0Z2ZmKjg4uFj//fv3Ky0tTbGxsc62oqIiSVK1atW0Z88ehYeHl2/RAACgUvPqZSk/Pz916NBBqampzraioiKlpqYqMjKyWP8bbrhB33//vXbs2OF83H///eratat27NjBfhoAAOD9y1KJiYmKi4tTx44d1alTJ82cOVO5ubmKj4+XJA0ePFghISFKTk5WQECA2rRp4zK+Xr16klSsHQAAXJ28Hm769eunrKwsTZgwQRkZGWrfvr3Wrl3r3GScnp4uHx+v37EOAACqCK+HG0kaOXKkRo4cWeJrGzdu/NOxixYt8nxBAACgyuKUCAAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsUinCzaxZsxQWFqaAgABFRERoy5Ytl+w7b948denSRfXr11f9+vUVHR39p/0BAMDVxevhZtmyZUpMTFRSUpK2bdumdu3aKSYmRseOHSux/8aNGzVgwABt2LBBmzdvVmhoqLp166bDhw9XcOUAAKAy8nq4SUlJ0bBhwxQfH6/WrVtrzpw5qlmzphYsWFBi///+7//WiBEj1L59e91www165513VFRUpNTU1AquHAAAVEZeDTf5+fnaunWroqOjnW0+Pj6Kjo7W5s2bS3WMs2fPqqCgQNdcc02Jr+fl5SknJ8flAQAA7OXVcJOdna3CwkIFBQW5tAcFBSkjI6NUx3jmmWfUpEkTl4D0e8nJyapbt67zERoaesV1AwCAysvrl6WuxLRp07R06VKtXLlSAQEBJfYZN26cTp065XwcPHiwgqsEAAAVqZo337xBgwby9fVVZmamS3tmZqaCg4P/dOyrr76qadOmaf369Wrbtu0l+/n7+8vf398j9QIAgMrPq2du/Pz81KFDB5fNwBc3B0dGRl5y3PTp0zV58mStXbtWHTt2rIhSAQBAFeHVMzeSlJiYqLi4OHXs2FGdOnXSzJkzlZubq/j4eEnS4MGDFRISouTkZEnSyy+/rAkTJmjJkiUKCwtz7s2pXbu2ateu7bV5AACAysHr4aZfv37KysrShAkTlJGRofbt22vt2rXOTcbp6eny8fnnCabZs2crPz9fffr0cTlOUlKSJk6cWJGlAwCASsjr4UaSRo4cqZEjR5b42saNG12ep6WllX9BAACgyqrSd0sBAAD8EeEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxCuAEAAFYh3AAAAKsQbgAAgFUINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACrEG4AAIBVCDcAAMAqhBsAAGAVwg0AALAK4QYAAFiFcAMAAKxSKcLNrFmzFBYWpoCAAEVERGjLli1/2v+DDz7QDTfcoICAAN18881avXp1BVUKAAAqO6+Hm2XLlikxMVFJSUnatm2b2rVrp5iYGB07dqzE/ps2bdKAAQM0dOhQbd++Xb169VKvXr30ww8/VHDlAACgMvJ6uElJSdGwYcMUHx+v1q1ba86cOapZs6YWLFhQYv/XX39d3bt319ixY3XjjTdq8uTJuvXWW/Xmm29WcOUAAKAyqubNN8/Pz9fWrVs1btw4Z5uPj4+io6O1efPmEsds3rxZiYmJLm0xMTFatWpVif3z8vKUl5fnfH7q1ClJUk5OzhVWX7KivLPlctzyVF5rAQC4cnyuuB7TGHPZvl4NN9nZ2SosLFRQUJBLe1BQkHbv3l3imIyMjBL7Z2RklNg/OTlZkyZNKtYeGhrqZtX2qTvT2xUAAGxSnp8rp0+fVt26df+0j1fDTUUYN26cy5meoqIiHT9+XNdee60cDocXKyt/OTk5Cg0N1cGDB1WnTh1vl1OlsHbuYd3cw7q5j7VzT1VcN2OMTp8+rSZNmly2r1fDTYMGDeTr66vMzEyX9szMTAUHB5c4Jjg4uEz9/f395e/v79JWr14994uugurUqVNl/ngrG9bOPaybe1g397F27qlq63a5MzYXeXVDsZ+fnzp06KDU1FRnW1FRkVJTUxUZGVnimMjISJf+krRu3bpL9gcAAFcXr1+WSkxMVFxcnDp27KhOnTpp5syZys3NVXx8vCRp8ODBCgkJUXJysiRp1KhRuvPOOzVjxgz17NlTS5cu1bfffqu5c+d6cxoAAKCS8Hq46devn7KysjRhwgRlZGSoffv2Wrt2rXPTcHp6unx8/nmC6fbbb9eSJUv0/PPP67nnnlOLFi20atUqtWnTxltTqLT8/f2VlJRU7LIcLo+1cw/r5h7WzX2snXtsXzeHKc09VQAAAFWE17/EDwAAwJMINwAAwCqEGwAAYBXCDQAAsArhBgAAWIVwU0V89dVXio2NVZMmTeRwOC75Q6G/l5eXp/Hjx6tZs2by9/dXWFiYy6+tz5s3T126dFH9+vVVv359RUdHa8uWLeU4C+8oj7X7vaVLl8rhcKhXr16eLdzLymvdTp48qYSEBDVu3Fj+/v5q2bKlVq9eXU6zqHjltW4zZ85Uq1atVKNGDYWGhmr06NE6d+5cOc3CO8q6dkOGDJHD4Sj2uOmmm1z6zZo1S2FhYQoICFBERIR1/58rj3VLTk7Wv/7rvyowMFCNGjVSr169tGfPnnKeiecQbqqI3NxctWvXTrNmzSr1mL59+yo1NVXz58/Xnj179P7776tVq1bO1zdu3KgBAwZow4YN2rx5s0JDQ9WtWzcdPny4PKbgNeWxdhelpaVpzJgx6tKliydLrhTKY93y8/N17733Ki0tTR9++KH27NmjefPmKSQkpDym4BXlsW5LlizRs88+q6SkJO3atUvz58/XsmXL9Nxzz5XHFLymrGv3+uuv6+jRo87HwYMHdc011+gvf/mLs8+yZcuUmJiopKQkbdu2Te3atVNMTIyOHTtWXtOocOWxbl9++aUSEhL0zTffaN26dSooKFC3bt2Um5tbXtPwLIMqR5JZuXLln/ZZs2aNqVu3rvn1119Lfdzz58+bwMBA8+67715hhZWXJ9fu/Pnz5vbbbzfvvPOOiYuLMw888IDnCq1kPLVus2fPNtddd53Jz8/3cIWVk6fWLSEhwdx9990ubYmJiSYqKsoTZVZKpVm7P1q5cqVxOBwmLS3N2dapUyeTkJDgfF5YWGiaNGlikpOTPVVqpeKpdfujY8eOGUnmyy+/vMIKKwZnbiz18ccfq2PHjpo+fbpCQkLUsmVLjRkzRr/99tslx5w9e1YFBQW65pprKrDSyqe0a/fiiy+qUaNGGjp0qJcqrVxKs24ff/yxIiMjlZCQoKCgILVp00ZTp05VYWGhFyv3rtKs2+23366tW7c6L6ccOHBAq1evVo8ePbxVdqU0f/58RUdHq1mzZpIunCncunWroqOjnX18fHwUHR2tzZs3e6vMSueP61aSU6dOSVKV+Xzw+s8voHwcOHBAX3/9tQICArRy5UplZ2drxIgR+vXXX7Vw4cISxzzzzDNq0qSJy/8IrkalWbuvv/5a8+fP144dO7xbbCVSmnU7cOCAvvjiCz388MNavXq19u3bpxEjRqigoEBJSUlenoF3lGbdBg4cqOzsbHXu3FnGGJ0/f17Dhw+37rLUlThy5IjWrFmjJUuWONuys7NVWFjo/Dmfi4KCgrR79+6KLrFSKmnd/qioqEhPPfWUoqKiqs5PHXn71BHKTqU47XjvvfeagIAAc/LkSWfb8uXLjcPhMGfPni3WPzk52dSvX9989913ni63UvHE2uXk5JiwsDCzevVq5+tclird31yLFi1MaGioOX/+vLPPjBkzTHBwcLnU7W2eWrcNGzaYoKAgM2/ePLNz506zYsUKExoaal588cXyLN+rSrN2vzd16lRz7bXXmry8PGfb4cOHjSSzadMml75jx441nTp18lSplYon1u2Phg8fbpo1a2YOHjzogQorBmduLNW4cWOFhISobt26zrYbb7xRxhgdOnRILVq0cLa/+uqrmjZtmtavX6+2bdt6o9xK5XJrl5ubq7S0NMXGxjpfLyoqkiRVq1ZNe/bsUXh4eIXX7W2l+Ztr3LixqlevLl9fX5c+GRkZys/Pl5+fnzdK96rSrNsLL7ygQYMG6bHHHpMk3XzzzcrNzdXjjz+u8ePHu/y48NXIGKMFCxZo0KBBLn9DDRo0kK+vrzIzM136Z2ZmKjg4uKLLrHQutW6/N3LkSH3yySf66quv1LRp0wqu0H1X938RFouKitKRI0d05swZZ9vevXvl4+Pj8gc6ffp0TZ48WWvXrlXHjh29UWqlc7m1u+GGG/T9999rx44dzsf999+vrl27aseOHQoNDfVi9d5Tmr+5qKgo7du3zxkGL/Zp3LjxVRlspNKt29mzZ4sFmIsB0fDbx/ryyy+1b9++Yvvf/Pz81KFDB6WmpjrbioqKlJqaqsjIyIous9K51LpJF/6uRo4cqZUrV+qLL75Q8+bNvVDhFfDeSSOUxenTp8327dvN9u3bjSSTkpJitm/fbn755RdjjDHPPvusGTRokEv/pk2bmj59+pgff/zRfPnll6ZFixbmsccec/aZNm2a8fPzMx9++KE5evSo83H69OkKn195Ko+1+yMbL0uVx7qlp6ebwMBAM3LkSLNnzx7zySefmEaNGpkpU6ZU+PzKS3msW1JSkgkMDDTvv/++OXDggPn8889NeHi46du3b4XPrzyVde0ueuSRR0xERESJx1y6dKnx9/c3ixYtMv/4xz/M448/burVq2cyMjLKdS4VqTzW7YknnjB169Y1GzdudPl8KGlbQ2VEuKkiNmzYYCQVe8TFxRljLny43nnnnS5jdu3aZaKjo02NGjVM06ZNTWJiossfZrNmzUo8ZlJSUsVNrAKUx9r9kY3hprzWbdOmTSYiIsL4+/ub6667zrz00ksue3CquvJYt4KCAjNx4kQTHh5uAgICTGhoqBkxYoQ5ceJExU2sArizdidPnjQ1atQwc+fOveRx33jjDfMv//Ivxs/Pz3Tq1Ml888035TiLilce61bS8SSZhQsXlu9kPMRhDOc0AQCAPdhzAwAArEK4AQAAViHcAAAAqxBuAACAVQg3AADAKoQbAABgFcINAACwCuEGAABYhXADAACsQrgBAABWIdwAAACr/D/vevnJpwE9FgAAAABJRU5ErkJggg==",
|
284 |
-
"text/plain": [
|
285 |
-
"<Figure size 640x480 with 1 Axes>"
|
286 |
-
]
|
287 |
-
},
|
288 |
-
"metadata": {},
|
289 |
-
"output_type": "display_data"
|
290 |
-
}
|
291 |
-
],
|
292 |
-
"source": [
|
293 |
-
"all_token_latencies = valid_df['end_to_end_latency_s'].apply(pd.Series).stack()\n",
|
294 |
-
"all_token_latencies = all_token_latencies.reset_index(drop=True)\n",
|
295 |
-
"all_token_latencies.plot.hist(title=\"Token Latencies\")\n"
|
296 |
-
]
|
297 |
-
},
|
298 |
-
{
|
299 |
-
"cell_type": "code",
|
300 |
-
"execution_count": null,
|
301 |
-
"metadata": {},
|
302 |
-
"outputs": [],
|
303 |
-
"source": []
|
304 |
-
}
|
305 |
-
],
|
306 |
-
"metadata": {
|
307 |
-
"kernelspec": {
|
308 |
-
"display_name": "Python 3 (ipykernel)",
|
309 |
-
"language": "python",
|
310 |
-
"name": "python3"
|
311 |
-
},
|
312 |
-
"language_info": {
|
313 |
-
"codemirror_mode": {
|
314 |
-
"name": "ipython",
|
315 |
-
"version": 3
|
316 |
-
},
|
317 |
-
"file_extension": ".py",
|
318 |
-
"mimetype": "text/x-python",
|
319 |
-
"name": "python",
|
320 |
-
"nbconvert_exporter": "python",
|
321 |
-
"pygments_lexer": "ipython3",
|
322 |
-
"version": "3.10.13"
|
323 |
-
}
|
324 |
-
},
|
325 |
-
"nbformat": 4,
|
326 |
-
"nbformat_minor": 5
|
327 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/llm_correctness.py
DELETED
@@ -1,309 +0,0 @@
|
|
1 |
-
import argparse
|
2 |
-
import json
|
3 |
-
import os
|
4 |
-
from pathlib import Path
|
5 |
-
import random
|
6 |
-
import re
|
7 |
-
import time
|
8 |
-
from typing import Any, Dict, List, Optional, Tuple
|
9 |
-
|
10 |
-
import num2words
|
11 |
-
import ray
|
12 |
-
from tqdm import tqdm
|
13 |
-
|
14 |
-
from llmperf import common_metrics
|
15 |
-
from llmperf.common import SUPPORTED_APIS, construct_clients
|
16 |
-
from llmperf.models import RequestConfig
|
17 |
-
from llmperf.requests_launcher import RequestsLauncher
|
18 |
-
from llmperf.utils import (
|
19 |
-
LLMPerfResults,
|
20 |
-
)
|
21 |
-
|
22 |
-
MAX_RANDOM_NUMBER = 10000
|
23 |
-
|
24 |
-
|
25 |
-
def llm_correctness(
|
26 |
-
model: str,
|
27 |
-
additional_sampling_params: Optional[Dict[str, Any]] = None,
|
28 |
-
num_concurrent_requests: int = 1,
|
29 |
-
max_num_completed_requests: int = 500,
|
30 |
-
test_timeout_s=90,
|
31 |
-
llm_api="chat",
|
32 |
-
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
33 |
-
"""Get the token throughput and latencies for the given model.
|
34 |
-
|
35 |
-
Args:
|
36 |
-
model: The name of the model to query.
|
37 |
-
additional_sampling_params: Additional sampling parameters to send with the request.
|
38 |
-
For more information see the LLM APIs documentation for the completions
|
39 |
-
num_concurrent_requests: The number of concurrent requests to make. Increase
|
40 |
-
this to increase the amount of load and vice versa.
|
41 |
-
test_timeout_s: The amount of time to run the test for before reporting results.
|
42 |
-
llm_api: The type of request to make. Either "chat" or "litellm".
|
43 |
-
|
44 |
-
Returns:
|
45 |
-
A tuple containing summary metrics and raw results from the test.
|
46 |
-
|
47 |
-
"""
|
48 |
-
|
49 |
-
if not additional_sampling_params:
|
50 |
-
additional_sampling_params = {}
|
51 |
-
|
52 |
-
clients = construct_clients(llm_api=llm_api, num_clients=num_concurrent_requests)
|
53 |
-
req_launcher = RequestsLauncher(clients)
|
54 |
-
start_time = time.monotonic()
|
55 |
-
|
56 |
-
num_errored_requests = 0
|
57 |
-
num_mismatched_requests = 0
|
58 |
-
num_completed_requests = 0
|
59 |
-
|
60 |
-
sampling_params = {"temperature": 0.0}
|
61 |
-
sampling_params.update(additional_sampling_params)
|
62 |
-
completed_requests = []
|
63 |
-
iter = 0
|
64 |
-
pbar = tqdm(total=max_num_completed_requests)
|
65 |
-
while (
|
66 |
-
time.monotonic() - start_time < test_timeout_s
|
67 |
-
and num_completed_requests < max_num_completed_requests
|
68 |
-
):
|
69 |
-
iter += 1
|
70 |
-
rnd_number = random.randint(0, MAX_RANDOM_NUMBER)
|
71 |
-
rnd_num_words = num2words.num2words(rnd_number)
|
72 |
-
|
73 |
-
prompt = f"Convert the following sequence of words into a number: {rnd_num_words}.\nPrint the number first."
|
74 |
-
|
75 |
-
request_config = RequestConfig(
|
76 |
-
model=model,
|
77 |
-
prompt=(prompt, 0),
|
78 |
-
sampling_params=sampling_params,
|
79 |
-
metadata={"rnd_number": rnd_number},
|
80 |
-
llm_api=llm_api,
|
81 |
-
)
|
82 |
-
req_launcher.launch_requests(request_config)
|
83 |
-
|
84 |
-
if not (iter % num_concurrent_requests):
|
85 |
-
completed_requests.extend(req_launcher.get_next_ready())
|
86 |
-
pbar.update(len(completed_requests) - num_completed_requests)
|
87 |
-
num_completed_requests = len(completed_requests)
|
88 |
-
|
89 |
-
pbar.close()
|
90 |
-
end_time = time.monotonic()
|
91 |
-
if end_time - start_time >= test_timeout_s:
|
92 |
-
print("Test timed out before all requests could be completed.")
|
93 |
-
|
94 |
-
raw_results = []
|
95 |
-
|
96 |
-
print("Mismatched and errored requests.")
|
97 |
-
for out in completed_requests:
|
98 |
-
metrics, generated_text, completed_request_config = out
|
99 |
-
|
100 |
-
raw_results.append(
|
101 |
-
{
|
102 |
-
"metrics": metrics,
|
103 |
-
"generated_text": generated_text,
|
104 |
-
"request_config": dict(completed_request_config),
|
105 |
-
}
|
106 |
-
)
|
107 |
-
|
108 |
-
# if there were no errors when making request.
|
109 |
-
if not metrics[common_metrics.ERROR_CODE]:
|
110 |
-
try:
|
111 |
-
commas_between_numbers_re = r"(\d+),(?=\d)"
|
112 |
-
gen_text_commas_removed = re.sub(
|
113 |
-
commas_between_numbers_re, r"\1", generated_text
|
114 |
-
)
|
115 |
-
nums = re.findall(r"\d+", gen_text_commas_removed)
|
116 |
-
generated_text = gen_text_commas_removed.replace("\n", " ")
|
117 |
-
|
118 |
-
assert str(completed_request_config.metadata["rnd_number"]) in nums
|
119 |
-
except:
|
120 |
-
num_mismatched_requests += 1
|
121 |
-
print(
|
122 |
-
f" mismatched request: {generated_text}, expected: {completed_request_config.metadata['rnd_number']}"
|
123 |
-
)
|
124 |
-
else:
|
125 |
-
num_errored_requests += 1
|
126 |
-
print(
|
127 |
-
f" The request errored: {metrics[common_metrics.ERROR_CODE]}, "
|
128 |
-
f"{metrics[common_metrics.ERROR_MSG]} "
|
129 |
-
)
|
130 |
-
print()
|
131 |
-
|
132 |
-
error_rate = num_errored_requests / num_completed_requests
|
133 |
-
mismatch_rate = num_mismatched_requests / num_completed_requests
|
134 |
-
num_non_errored_requests = num_completed_requests - num_errored_requests
|
135 |
-
summary_metrics = {}
|
136 |
-
summary_metrics[common_metrics.NUM_ERRORS] = num_errored_requests
|
137 |
-
summary_metrics["num_mismatched_requests"] = num_mismatched_requests
|
138 |
-
summary_metrics["error_rate"] = error_rate
|
139 |
-
summary_metrics["mismatch_rate"] = mismatch_rate
|
140 |
-
summary_metrics[common_metrics.NUM_COMPLETED_REQUESTS] = num_completed_requests
|
141 |
-
summary_metrics["num_non_errored_requests"] = num_non_errored_requests
|
142 |
-
|
143 |
-
# Metadata
|
144 |
-
summary_metrics["model"] = model
|
145 |
-
summary_metrics["num_concurrent_requests"] = num_concurrent_requests
|
146 |
-
summary_metrics["additional_sampling_params"] = additional_sampling_params
|
147 |
-
summary_metrics["llm_api"] = llm_api
|
148 |
-
|
149 |
-
return summary_metrics, raw_results
|
150 |
-
|
151 |
-
|
152 |
-
def run(
|
153 |
-
llm_api: str,
|
154 |
-
model: str,
|
155 |
-
test_timeout_s: int,
|
156 |
-
max_num_completed_requests: int,
|
157 |
-
num_concurrent_requests: int,
|
158 |
-
additional_sampling_params: str,
|
159 |
-
results_dir: str,
|
160 |
-
user_metadata: Dict[str, str],
|
161 |
-
):
|
162 |
-
"""
|
163 |
-
Args:
|
164 |
-
llm_api: The type of request to make. Either "chat" or "litellm".
|
165 |
-
model: The name of the model to query.
|
166 |
-
max_num_completed_requests: The number of requests to complete before finishing the test.
|
167 |
-
test_timeout_s: The amount of time to run the test for before reporting results.
|
168 |
-
num_concurrent_requests: The number of concurrent requests to make. Increase
|
169 |
-
this to increase the amount of load and vice versa.
|
170 |
-
mean_input_tokens: The mean number of tokens to send in the prompt for the request.
|
171 |
-
stddev_input_tokens: The standard deviation of the number of tokens to send in the prompt for the request.
|
172 |
-
mean_output_tokens: The mean number of tokens to generate per request.
|
173 |
-
stddev_output_tokens: The standard deviation of the number of tokens to generate per request.
|
174 |
-
additional_sampling_params: Additional sampling parameters to send with the request.
|
175 |
-
For more information see the LLM APIs documentation for the completions.
|
176 |
-
results_dir: The directory to save the results to.
|
177 |
-
|
178 |
-
"""
|
179 |
-
|
180 |
-
summary_metrics, raw_results = llm_correctness(
|
181 |
-
model=model,
|
182 |
-
llm_api=llm_api,
|
183 |
-
test_timeout_s=test_timeout_s,
|
184 |
-
max_num_completed_requests=max_num_completed_requests,
|
185 |
-
num_concurrent_requests=num_concurrent_requests,
|
186 |
-
additional_sampling_params=json.loads(additional_sampling_params),
|
187 |
-
)
|
188 |
-
|
189 |
-
time.sleep(2)
|
190 |
-
|
191 |
-
print(
|
192 |
-
f"Results for llm correctness test for {model} queried with the {llm_api} api."
|
193 |
-
)
|
194 |
-
print(
|
195 |
-
f"Errors: {summary_metrics[common_metrics.NUM_ERRORS]}, "
|
196 |
-
f"Error rate: {summary_metrics['error_rate']}"
|
197 |
-
)
|
198 |
-
|
199 |
-
print(
|
200 |
-
f"Mismatched: {summary_metrics['num_mismatched_requests']}, "
|
201 |
-
f"Mismatch rate: {summary_metrics['mismatch_rate']}"
|
202 |
-
)
|
203 |
-
print(f"Completed: {summary_metrics[common_metrics.NUM_COMPLETED_REQUESTS]}")
|
204 |
-
print(f"Completed without errors: {summary_metrics['num_non_errored_requests']}")
|
205 |
-
|
206 |
-
if results_dir:
|
207 |
-
file_name = f"{model}_correctness"
|
208 |
-
file_name = re.sub(r"[^\w\d-]+", "-", file_name)
|
209 |
-
file_name = re.sub(r"-{2,}", "-", file_name)
|
210 |
-
summary_file_name = f"{file_name}_summary"
|
211 |
-
individual_responses_filename = f"{file_name}_individual_responses"
|
212 |
-
summary_metrics.update(user_metadata)
|
213 |
-
results = LLMPerfResults(name=summary_file_name, metadata=summary_metrics)
|
214 |
-
results_dir = Path(results_dir)
|
215 |
-
if not results_dir.exists():
|
216 |
-
results_dir.mkdir(parents=True)
|
217 |
-
elif not results_dir.is_dir():
|
218 |
-
raise ValueError(f"{results_dir} is not a directory")
|
219 |
-
with open(results_dir / f"{summary_file_name}.json", "w") as f:
|
220 |
-
json.dump(results.to_dict(), f, indent=4)
|
221 |
-
with open(results_dir / f"{individual_responses_filename}.json", "w") as f:
|
222 |
-
json.dump(raw_results, f, indent=4)
|
223 |
-
|
224 |
-
|
225 |
-
args = argparse.ArgumentParser(description="Run a correctness test for a given model.")
|
226 |
-
|
227 |
-
args.add_argument(
|
228 |
-
"--model", type=str, required=True, help="The model to use for this load test."
|
229 |
-
)
|
230 |
-
args.add_argument(
|
231 |
-
"--num-concurrent-requests",
|
232 |
-
type=int,
|
233 |
-
default=10,
|
234 |
-
help=("The number of concurrent requests to send. (default: %(default)s)"),
|
235 |
-
)
|
236 |
-
args.add_argument(
|
237 |
-
"--timeout",
|
238 |
-
type=int,
|
239 |
-
default=90,
|
240 |
-
help="The amount of time to run the load test for. (default: %(default)s)",
|
241 |
-
)
|
242 |
-
args.add_argument(
|
243 |
-
"--max-num-completed-requests",
|
244 |
-
type=int,
|
245 |
-
default=50,
|
246 |
-
help=(
|
247 |
-
"The number of requests to complete before finishing the test. Note "
|
248 |
-
"that its possible for the test to timeout first. (default: %(default)s)"
|
249 |
-
),
|
250 |
-
)
|
251 |
-
args.add_argument(
|
252 |
-
"--additional-sampling-params",
|
253 |
-
type=str,
|
254 |
-
default="{}",
|
255 |
-
help=(
|
256 |
-
"Additional sampling params to send with the each request to the LLM API. "
|
257 |
-
"(default: %(default)s) No additional sampling params are sent."
|
258 |
-
),
|
259 |
-
)
|
260 |
-
args.add_argument(
|
261 |
-
"--results-dir",
|
262 |
-
type=str,
|
263 |
-
default="",
|
264 |
-
help=(
|
265 |
-
"The directory to save the results to. "
|
266 |
-
"(`default: %(default)s`) No results are saved)"
|
267 |
-
),
|
268 |
-
)
|
269 |
-
args.add_argument(
|
270 |
-
"--llm-api",
|
271 |
-
type=str,
|
272 |
-
default="openai",
|
273 |
-
help=(
|
274 |
-
f"The type of request to make. The supported llm apis are {SUPPORTED_APIS} "
|
275 |
-
" (`default: %(default)s`)"
|
276 |
-
),
|
277 |
-
)
|
278 |
-
args.add_argument(
|
279 |
-
"--metadata",
|
280 |
-
type=str,
|
281 |
-
default="",
|
282 |
-
help=(
|
283 |
-
"A comma separated list of metadata to include in the results, e.g. "
|
284 |
-
"name=foo,bar=1. These will be added to the metadata field of the results. "
|
285 |
-
),
|
286 |
-
)
|
287 |
-
|
288 |
-
if __name__ == "__main__":
|
289 |
-
args = args.parse_args()
|
290 |
-
|
291 |
-
env_vars = dict(os.environ)
|
292 |
-
ray.init(runtime_env={"env_vars": env_vars})
|
293 |
-
# Parse user metadata.
|
294 |
-
user_metadata = {}
|
295 |
-
if args.metadata:
|
296 |
-
for item in args.metadata.split(","):
|
297 |
-
key, value = item.split("=")
|
298 |
-
user_metadata[key] = value
|
299 |
-
|
300 |
-
run(
|
301 |
-
llm_api=args.llm_api,
|
302 |
-
model=args.model,
|
303 |
-
test_timeout_s=args.timeout,
|
304 |
-
max_num_completed_requests=args.max_num_completed_requests,
|
305 |
-
num_concurrent_requests=args.num_concurrent_requests,
|
306 |
-
additional_sampling_params=args.additional_sampling_params,
|
307 |
-
results_dir=args.results_dir,
|
308 |
-
user_metadata=user_metadata,
|
309 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/pre-commit.sh
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
echo "Running pre-hooks before committing..."
|
3 |
-
|
4 |
-
echo "======FORMAT====="
|
5 |
-
black . -q
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/pyproject.toml
DELETED
@@ -1,23 +0,0 @@
|
|
1 |
-
[build-system]
|
2 |
-
requires = ["setuptools>=43.0.0", "wheel"]
|
3 |
-
build-backend = "setuptools.build_meta"
|
4 |
-
|
5 |
-
[project]
|
6 |
-
name = "LLMPerf"
|
7 |
-
version = "0.1.0"
|
8 |
-
description = "A framework for load testing LLM APIs"
|
9 |
-
authors = [{name="Avnish Narayan", email="avnish@anyscale.com"}]
|
10 |
-
license = {text= "Apache-2.0"}
|
11 |
-
requires-python = ">=3.8, <3.11"
|
12 |
-
dependencies = ["pydantic<2.5",
|
13 |
-
"ray",
|
14 |
-
"pytest>=6.0",
|
15 |
-
"seaborn>=0.11",
|
16 |
-
"awscli>=1.22",
|
17 |
-
"typer>=0.4",
|
18 |
-
"litellm>=0.1.738",
|
19 |
-
"num2words",
|
20 |
-
"transformers",
|
21 |
-
"tqdm",
|
22 |
-
"boto3",
|
23 |
-
"google-cloud-aiplatform"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/requirements-dev.txt
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
# For lints
|
2 |
-
black
|
|
|
|
|
|
llmperf/src/llmperf/__init__.py
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
|
|
|
|
llmperf/src/llmperf/common.py
DELETED
@@ -1,38 +0,0 @@
|
|
1 |
-
from typing import List
|
2 |
-
from llmperf.ray_clients.litellm_client import LiteLLMClient
|
3 |
-
from llmperf.ray_clients.openai_chat_completions_client import (
|
4 |
-
OpenAIChatCompletionsClient,
|
5 |
-
)
|
6 |
-
from llmperf.ray_clients.sagemaker_client import SageMakerClient
|
7 |
-
from llmperf.ray_clients.vertexai_client import VertexAIClient
|
8 |
-
from llmperf.ray_llm_client import LLMClient
|
9 |
-
|
10 |
-
|
11 |
-
SUPPORTED_APIS = ["openai", "anthropic", "litellm"]
|
12 |
-
|
13 |
-
|
14 |
-
def construct_clients(llm_api: str, num_clients: int) -> List[LLMClient]:
|
15 |
-
"""Construct LLMClients that will be used to make requests to the LLM API.
|
16 |
-
|
17 |
-
Args:
|
18 |
-
llm_api: The name of the LLM API to use.
|
19 |
-
num_clients: The number of concurrent requests to make.
|
20 |
-
|
21 |
-
Returns:
|
22 |
-
The constructed LLMCLients
|
23 |
-
|
24 |
-
"""
|
25 |
-
if llm_api == "openai":
|
26 |
-
clients = [OpenAIChatCompletionsClient.remote() for _ in range(num_clients)]
|
27 |
-
elif llm_api == "sagemaker":
|
28 |
-
clients = [SageMakerClient.remote() for _ in range(num_clients)]
|
29 |
-
elif llm_api == "vertexai":
|
30 |
-
clients = [VertexAIClient.remote() for _ in range(num_clients)]
|
31 |
-
elif llm_api in SUPPORTED_APIS:
|
32 |
-
clients = [LiteLLMClient.remote() for _ in range(num_clients)]
|
33 |
-
else:
|
34 |
-
raise ValueError(
|
35 |
-
f"llm_api must be one of the supported LLM APIs: {SUPPORTED_APIS}"
|
36 |
-
)
|
37 |
-
|
38 |
-
return clients
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/src/llmperf/common_metrics.py
DELETED
@@ -1,17 +0,0 @@
|
|
1 |
-
# TODO (Avnishn): compute metrics in class
|
2 |
-
INTER_TOKEN_LAT = "inter_token_latency_s"
|
3 |
-
TTFT = "ttft_s"
|
4 |
-
E2E_LAT = "end_to_end_latency_s"
|
5 |
-
NUM_INPUT_TOKENS = "number_input_tokens"
|
6 |
-
NUM_OUTPUT_TOKENS = "number_output_tokens"
|
7 |
-
NUM_TOTAL_TOKENS = "number_total_tokens"
|
8 |
-
REQ_OUTPUT_THROUGHPUT = "request_output_throughput_token_per_s"
|
9 |
-
ERROR_MSG = "error_msg"
|
10 |
-
ERROR_CODE = "error_code"
|
11 |
-
ERROR_CODE_FREQ = "error_code_frequency"
|
12 |
-
NUM_ERRORS = "number_errors"
|
13 |
-
OUTPUT_THROUGHPUT = "mean_output_throughput_token_per_s"
|
14 |
-
NUM_COMPLETED_REQUESTS = "num_completed_requests"
|
15 |
-
COMPLETED_REQUESTS_PER_MIN = "num_completed_requests_per_min"
|
16 |
-
ERROR_RATE = "error_rate"
|
17 |
-
NUM_REQ_STARTED = "num_requests_started"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/src/llmperf/models.py
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
from typing import Any, Dict, List, Optional, Tuple
|
2 |
-
from pydantic import BaseModel
|
3 |
-
|
4 |
-
|
5 |
-
class RequestConfig(BaseModel):
|
6 |
-
"""The configuration for a request to the LLM API.
|
7 |
-
|
8 |
-
Args:
|
9 |
-
model: The model to use.
|
10 |
-
prompt: The prompt to provide to the LLM API.
|
11 |
-
sampling_params: Additional sampling parameters to send with the request.
|
12 |
-
For more information see the Router app's documentation for the completions
|
13 |
-
llm_api: The name of the LLM API to send the request to.
|
14 |
-
metadata: Additional metadata to attach to the request for logging or validation purposes.
|
15 |
-
"""
|
16 |
-
|
17 |
-
model: str
|
18 |
-
prompt: Tuple[str, int]
|
19 |
-
sampling_params: Optional[Dict[str, Any]] = None
|
20 |
-
llm_api: Optional[str] = None
|
21 |
-
metadata: Optional[Dict[str, Any]] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/src/llmperf/ray_clients/__init__.py
DELETED
File without changes
|
llmperf/src/llmperf/ray_clients/litellm_client.py
DELETED
@@ -1,100 +0,0 @@
|
|
1 |
-
import time
|
2 |
-
from typing import Any, Dict
|
3 |
-
import ray
|
4 |
-
|
5 |
-
from llmperf.ray_llm_client import LLMClient
|
6 |
-
from llmperf.models import RequestConfig
|
7 |
-
from llmperf import common_metrics
|
8 |
-
|
9 |
-
|
10 |
-
@ray.remote
|
11 |
-
class LiteLLMClient(LLMClient):
|
12 |
-
"""Client for LiteLLM Completions API."""
|
13 |
-
|
14 |
-
def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]:
|
15 |
-
# litellm package isn't serializable, so we import it within the function
|
16 |
-
# to maintain compatibility with ray.
|
17 |
-
from litellm import completion, validate_environment
|
18 |
-
|
19 |
-
prompt = request_config.prompt
|
20 |
-
prompt, prompt_len = prompt
|
21 |
-
|
22 |
-
message = [
|
23 |
-
{"role": "system", "content": ""},
|
24 |
-
{"role": "user", "content": prompt},
|
25 |
-
]
|
26 |
-
assert (
|
27 |
-
request_config.llm_api is not None
|
28 |
-
), "the request config's llm_api must be set."
|
29 |
-
if request_config.llm_api == "litellm":
|
30 |
-
model = request_config.model
|
31 |
-
else:
|
32 |
-
model = request_config.llm_api + "/" + request_config.model
|
33 |
-
validation_result = validate_environment(model)
|
34 |
-
if validation_result["missing_keys"]:
|
35 |
-
raise ValueError(
|
36 |
-
f"The following environment vars weren't found but were necessary for "
|
37 |
-
f"the model {request_config.model}: {validation_result['missing_keys']}"
|
38 |
-
)
|
39 |
-
body = {
|
40 |
-
"model": model,
|
41 |
-
"messages": message,
|
42 |
-
"stream": True,
|
43 |
-
}
|
44 |
-
sampling_params = request_config.sampling_params
|
45 |
-
body.update(sampling_params or {})
|
46 |
-
|
47 |
-
time_to_next_token = []
|
48 |
-
tokens_received = 0
|
49 |
-
ttft = 0
|
50 |
-
error_response_code = -1
|
51 |
-
generated_text = ""
|
52 |
-
error_msg = ""
|
53 |
-
output_throughput = 0
|
54 |
-
total_request_time = 0
|
55 |
-
|
56 |
-
metrics = {}
|
57 |
-
|
58 |
-
metrics[common_metrics.ERROR_CODE] = None
|
59 |
-
metrics[common_metrics.ERROR_MSG] = ""
|
60 |
-
|
61 |
-
try:
|
62 |
-
start_time = time.monotonic()
|
63 |
-
most_recent_received_token_time = time.monotonic()
|
64 |
-
|
65 |
-
response = completion(**body)
|
66 |
-
ttft = 0
|
67 |
-
for tok in response:
|
68 |
-
if tok.choices[0].delta:
|
69 |
-
delta = tok.choices[0].delta
|
70 |
-
if delta.get("content", None):
|
71 |
-
if ttft == 0:
|
72 |
-
ttft = time.monotonic() - start_time
|
73 |
-
time_to_next_token.append(ttft)
|
74 |
-
else:
|
75 |
-
time_to_next_token.append(
|
76 |
-
time.monotonic() - most_recent_received_token_time
|
77 |
-
)
|
78 |
-
generated_text += delta["content"]
|
79 |
-
most_recent_received_token_time = time.monotonic()
|
80 |
-
tokens_received += 1
|
81 |
-
|
82 |
-
total_request_time = time.monotonic() - start_time
|
83 |
-
|
84 |
-
output_throughput = tokens_received / total_request_time
|
85 |
-
|
86 |
-
except Exception as e:
|
87 |
-
metrics[common_metrics.ERROR_MSG] = error_msg
|
88 |
-
metrics[common_metrics.ERROR_CODE] = error_response_code
|
89 |
-
|
90 |
-
print(f"Warning Or Error: {e}")
|
91 |
-
print(error_response_code)
|
92 |
-
|
93 |
-
metrics[common_metrics.INTER_TOKEN_LAT] = sum(time_to_next_token)
|
94 |
-
metrics[common_metrics.TTFT] = ttft
|
95 |
-
metrics[common_metrics.E2E_LAT] = total_request_time
|
96 |
-
metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = output_throughput
|
97 |
-
metrics[common_metrics.NUM_TOTAL_TOKENS] = tokens_received + prompt_len
|
98 |
-
metrics[common_metrics.NUM_OUTPUT_TOKENS] = tokens_received
|
99 |
-
metrics[common_metrics.NUM_INPUT_TOKENS] = prompt_len
|
100 |
-
return metrics, generated_text, request_config
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/src/llmperf/ray_clients/openai_chat_completions_client.py
DELETED
@@ -1,120 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
import os
|
3 |
-
import time
|
4 |
-
from typing import Any, Dict
|
5 |
-
|
6 |
-
import ray
|
7 |
-
import requests
|
8 |
-
|
9 |
-
from llmperf.ray_llm_client import LLMClient
|
10 |
-
from llmperf.models import RequestConfig
|
11 |
-
from llmperf import common_metrics
|
12 |
-
|
13 |
-
|
14 |
-
@ray.remote
|
15 |
-
class OpenAIChatCompletionsClient(LLMClient):
|
16 |
-
"""Client for OpenAI Chat Completions API."""
|
17 |
-
|
18 |
-
def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]:
|
19 |
-
prompt = request_config.prompt
|
20 |
-
prompt, prompt_len = prompt
|
21 |
-
|
22 |
-
message = [
|
23 |
-
{"role": "system", "content": ""},
|
24 |
-
{"role": "user", "content": prompt},
|
25 |
-
]
|
26 |
-
model = request_config.model
|
27 |
-
body = {
|
28 |
-
"model": model,
|
29 |
-
"messages": message,
|
30 |
-
"stream": True,
|
31 |
-
}
|
32 |
-
sampling_params = request_config.sampling_params
|
33 |
-
body.update(sampling_params or {})
|
34 |
-
time_to_next_token = []
|
35 |
-
tokens_received = 0
|
36 |
-
ttft = 0
|
37 |
-
error_response_code = -1
|
38 |
-
generated_text = ""
|
39 |
-
error_msg = ""
|
40 |
-
output_throughput = 0
|
41 |
-
total_request_time = 0
|
42 |
-
|
43 |
-
metrics = {}
|
44 |
-
|
45 |
-
metrics[common_metrics.ERROR_CODE] = None
|
46 |
-
metrics[common_metrics.ERROR_MSG] = ""
|
47 |
-
|
48 |
-
start_time = time.monotonic()
|
49 |
-
most_recent_received_token_time = time.monotonic()
|
50 |
-
address = os.environ.get("OPENAI_API_BASE")
|
51 |
-
if not address:
|
52 |
-
raise ValueError("the environment variable OPENAI_API_BASE must be set.")
|
53 |
-
key = os.environ.get("OPENAI_API_KEY")
|
54 |
-
if not key:
|
55 |
-
raise ValueError("the environment variable OPENAI_API_KEY must be set.")
|
56 |
-
headers = {"Authorization": f"Bearer {key}"}
|
57 |
-
if not address:
|
58 |
-
raise ValueError("No host provided.")
|
59 |
-
if not address.endswith("/"):
|
60 |
-
address = address + "/"
|
61 |
-
address += "chat/completions"
|
62 |
-
try:
|
63 |
-
with requests.post(
|
64 |
-
address,
|
65 |
-
json=body,
|
66 |
-
stream=True,
|
67 |
-
timeout=180,
|
68 |
-
headers=headers,
|
69 |
-
) as response:
|
70 |
-
if response.status_code != 200:
|
71 |
-
error_msg = response.text
|
72 |
-
error_response_code = response.status_code
|
73 |
-
response.raise_for_status()
|
74 |
-
for chunk in response.iter_lines(chunk_size=None):
|
75 |
-
chunk = chunk.strip()
|
76 |
-
|
77 |
-
if not chunk:
|
78 |
-
continue
|
79 |
-
stem = "data: "
|
80 |
-
chunk = chunk[len(stem) :]
|
81 |
-
if chunk == b"[DONE]":
|
82 |
-
continue
|
83 |
-
tokens_received += 1
|
84 |
-
data = json.loads(chunk)
|
85 |
-
|
86 |
-
if "error" in data:
|
87 |
-
error_msg = data["error"]["message"]
|
88 |
-
error_response_code = data["error"]["code"]
|
89 |
-
raise RuntimeError(data["error"]["message"])
|
90 |
-
|
91 |
-
delta = data["choices"][0]["delta"]
|
92 |
-
if delta.get("content", None):
|
93 |
-
if not ttft:
|
94 |
-
ttft = time.monotonic() - start_time
|
95 |
-
time_to_next_token.append(ttft)
|
96 |
-
else:
|
97 |
-
time_to_next_token.append(
|
98 |
-
time.monotonic() - most_recent_received_token_time
|
99 |
-
)
|
100 |
-
most_recent_received_token_time = time.monotonic()
|
101 |
-
generated_text += delta["content"]
|
102 |
-
|
103 |
-
total_request_time = time.monotonic() - start_time
|
104 |
-
output_throughput = tokens_received / total_request_time
|
105 |
-
|
106 |
-
except Exception as e:
|
107 |
-
metrics[common_metrics.ERROR_MSG] = error_msg
|
108 |
-
metrics[common_metrics.ERROR_CODE] = error_response_code
|
109 |
-
print(f"Warning Or Error: {e}")
|
110 |
-
print(error_response_code)
|
111 |
-
|
112 |
-
metrics[common_metrics.INTER_TOKEN_LAT] = sum(time_to_next_token) #This should be same as metrics[common_metrics.E2E_LAT]. Leave it here for now
|
113 |
-
metrics[common_metrics.TTFT] = ttft
|
114 |
-
metrics[common_metrics.E2E_LAT] = total_request_time
|
115 |
-
metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = output_throughput
|
116 |
-
metrics[common_metrics.NUM_TOTAL_TOKENS] = tokens_received + prompt_len
|
117 |
-
metrics[common_metrics.NUM_OUTPUT_TOKENS] = tokens_received
|
118 |
-
metrics[common_metrics.NUM_INPUT_TOKENS] = prompt_len
|
119 |
-
|
120 |
-
return metrics, generated_text, request_config
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/src/llmperf/ray_clients/sagemaker_client.py
DELETED
@@ -1,158 +0,0 @@
|
|
1 |
-
import io
|
2 |
-
import json
|
3 |
-
import os
|
4 |
-
import time
|
5 |
-
from typing import Any, Dict
|
6 |
-
|
7 |
-
import boto3
|
8 |
-
import ray
|
9 |
-
from transformers import LlamaTokenizerFast
|
10 |
-
|
11 |
-
from llmperf.ray_llm_client import LLMClient
|
12 |
-
from llmperf.models import RequestConfig
|
13 |
-
from llmperf import common_metrics
|
14 |
-
|
15 |
-
|
16 |
-
@ray.remote
|
17 |
-
class SageMakerClient(LLMClient):
|
18 |
-
"""Client for OpenAI Chat Completions API."""
|
19 |
-
|
20 |
-
def __init__(self):
|
21 |
-
# Sagemaker doesn't return the number of tokens that are generated so we approximate it by
|
22 |
-
# using the llama tokenizer.
|
23 |
-
self.tokenizer = LlamaTokenizerFast.from_pretrained(
|
24 |
-
"hf-internal-testing/llama-tokenizer"
|
25 |
-
)
|
26 |
-
|
27 |
-
def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]:
|
28 |
-
if not os.environ.get("AWS_ACCESS_KEY_ID"):
|
29 |
-
raise ValueError("AWS_ACCESS_KEY_ID must be set.")
|
30 |
-
if not os.environ.get("AWS_SECRET_ACCESS_KEY"):
|
31 |
-
raise ValueError("AWS_SECRET_ACCESS_KEY must be set.")
|
32 |
-
if not os.environ.get("AWS_REGION_NAME"):
|
33 |
-
raise ValueError("AWS_REGION_NAME must be set.")
|
34 |
-
|
35 |
-
prompt = request_config.prompt
|
36 |
-
prompt, prompt_len = prompt
|
37 |
-
|
38 |
-
message = [
|
39 |
-
{"role": "system", "content": ""},
|
40 |
-
{"role": "user", "content": prompt},
|
41 |
-
]
|
42 |
-
model = request_config.model
|
43 |
-
sm_runtime = boto3.client(
|
44 |
-
"sagemaker-runtime", region_name=os.environ.get("AWS_REGION_NAME")
|
45 |
-
)
|
46 |
-
|
47 |
-
sampling_params = request_config.sampling_params
|
48 |
-
|
49 |
-
if "max_tokens" in sampling_params:
|
50 |
-
sampling_params["max_new_tokens"] = sampling_params["max_tokens"]
|
51 |
-
del sampling_params["max_tokens"]
|
52 |
-
|
53 |
-
message = {
|
54 |
-
"inputs": [
|
55 |
-
[
|
56 |
-
{"role": "system", "content": ""},
|
57 |
-
{"role": "user", "content": prompt},
|
58 |
-
]
|
59 |
-
],
|
60 |
-
"parameters": {
|
61 |
-
**request_config.sampling_params,
|
62 |
-
},
|
63 |
-
}
|
64 |
-
|
65 |
-
time_to_next_token = []
|
66 |
-
tokens_received = 0
|
67 |
-
ttft = 0
|
68 |
-
error_response_code = None
|
69 |
-
generated_text = ""
|
70 |
-
error_msg = ""
|
71 |
-
output_throughput = 0
|
72 |
-
total_request_time = 0
|
73 |
-
metrics = {}
|
74 |
-
|
75 |
-
start_time = time.monotonic()
|
76 |
-
most_recent_received_token_time = time.monotonic()
|
77 |
-
|
78 |
-
try:
|
79 |
-
response = sm_runtime.invoke_endpoint_with_response_stream(
|
80 |
-
EndpointName=model,
|
81 |
-
ContentType="application/json",
|
82 |
-
Body=json.dumps(message),
|
83 |
-
CustomAttributes="accept_eula=true",
|
84 |
-
)
|
85 |
-
|
86 |
-
event_stream = response["Body"]
|
87 |
-
json_byte = b""
|
88 |
-
for line, ttft, _ in LineIterator(event_stream):
|
89 |
-
json_byte += line
|
90 |
-
time_to_next_token.append(
|
91 |
-
time.monotonic() - most_recent_received_token_time
|
92 |
-
)
|
93 |
-
most_recent_received_token_time = time.monotonic()
|
94 |
-
ttft = ttft - start_time
|
95 |
-
resp = json.loads(json_byte)
|
96 |
-
total_request_time = time.monotonic() - start_time
|
97 |
-
generated_text = resp[0]["generation"]["content"]
|
98 |
-
tokens_received = len(self.tokenizer.encode(generated_text))
|
99 |
-
output_throughput = tokens_received / total_request_time
|
100 |
-
|
101 |
-
except Exception as e:
|
102 |
-
print(f"Warning Or Error: {e}")
|
103 |
-
print(error_response_code)
|
104 |
-
error_msg = str(e)
|
105 |
-
error_response_code = 500
|
106 |
-
|
107 |
-
metrics[common_metrics.ERROR_MSG] = error_msg
|
108 |
-
metrics[common_metrics.ERROR_CODE] = error_response_code
|
109 |
-
metrics[common_metrics.INTER_TOKEN_LAT] = time_to_next_token
|
110 |
-
metrics[common_metrics.TTFT] = ttft
|
111 |
-
metrics[common_metrics.E2E_LAT] = total_request_time
|
112 |
-
metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = output_throughput
|
113 |
-
metrics[common_metrics.NUM_TOTAL_TOKENS] = tokens_received + prompt_len
|
114 |
-
metrics[common_metrics.NUM_OUTPUT_TOKENS] = tokens_received
|
115 |
-
metrics[common_metrics.NUM_INPUT_TOKENS] = prompt_len
|
116 |
-
|
117 |
-
return metrics, generated_text, request_config
|
118 |
-
|
119 |
-
|
120 |
-
class LineIterator:
|
121 |
-
"""
|
122 |
-
A helper class for parsing the byte stream input.
|
123 |
-
Reference: https://aws.amazon.com/blogs/machine-learning/elevating-the-generative-ai-experience-introducing-streaming-support-in-amazon-sagemaker-hosting/
|
124 |
-
"""
|
125 |
-
|
126 |
-
def __init__(self, stream):
|
127 |
-
self.byte_iterator = iter(stream)
|
128 |
-
self.buffer = io.BytesIO()
|
129 |
-
self.read_pos = 0
|
130 |
-
self.ttft = 0
|
131 |
-
|
132 |
-
def __iter__(self):
|
133 |
-
return self
|
134 |
-
|
135 |
-
def __next__(self):
|
136 |
-
while True:
|
137 |
-
self.buffer.seek(self.read_pos)
|
138 |
-
line = self.buffer.readline()
|
139 |
-
if line and line[-1] == ord("\n"):
|
140 |
-
if self.ttft == 0:
|
141 |
-
self.ttft = time.monotonic()
|
142 |
-
self.read_pos += len(line)
|
143 |
-
return line[:-1], self.ttft, time.monotonic()
|
144 |
-
# kyle: dealing with last ']' for chat output
|
145 |
-
if line and self.read_pos == self.buffer.getbuffer().nbytes - 1:
|
146 |
-
self.read_pos += 1
|
147 |
-
return line, self.ttft, time.monotonic()
|
148 |
-
try:
|
149 |
-
chunk = next(self.byte_iterator)
|
150 |
-
except StopIteration:
|
151 |
-
if self.read_pos < self.buffer.getbuffer().nbytes:
|
152 |
-
continue
|
153 |
-
raise
|
154 |
-
if "PayloadPart" not in chunk:
|
155 |
-
print("Unknown event type:" + chunk)
|
156 |
-
continue
|
157 |
-
self.buffer.seek(0, io.SEEK_END)
|
158 |
-
self.buffer.write(chunk["PayloadPart"]["Bytes"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/src/llmperf/ray_clients/vertexai_client.py
DELETED
@@ -1,135 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
import os
|
3 |
-
import time
|
4 |
-
from typing import Any, Dict
|
5 |
-
|
6 |
-
import ray
|
7 |
-
import requests
|
8 |
-
from transformers import LlamaTokenizerFast
|
9 |
-
|
10 |
-
from llmperf.ray_llm_client import LLMClient
|
11 |
-
from llmperf.models import RequestConfig
|
12 |
-
from llmperf import common_metrics
|
13 |
-
|
14 |
-
|
15 |
-
@ray.remote
|
16 |
-
class VertexAIClient(LLMClient):
|
17 |
-
"""Client for VertexAI API."""
|
18 |
-
|
19 |
-
def __init__(self):
|
20 |
-
# VertexAI doesn't return the number of tokens that are generated so we approximate it by
|
21 |
-
# using the llama tokenizer.
|
22 |
-
self.tokenizer = LlamaTokenizerFast.from_pretrained(
|
23 |
-
"hf-internal-testing/llama-tokenizer"
|
24 |
-
)
|
25 |
-
|
26 |
-
def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]:
|
27 |
-
project_id = os.environ.get("GCLOUD_PROJECT_ID")
|
28 |
-
region = os.environ.get("GCLOUD_REGION")
|
29 |
-
endpoint_id = os.environ.get("VERTEXAI_ENDPOINT_ID")
|
30 |
-
access_token = os.environ.get("GCLOUD_ACCESS_TOKEN").strip()
|
31 |
-
if not project_id:
|
32 |
-
raise ValueError("the environment variable GCLOUD_PROJECT_ID must be set.")
|
33 |
-
if not region:
|
34 |
-
raise ValueError("the environment variable GCLOUD_REGION must be set.")
|
35 |
-
if not endpoint_id:
|
36 |
-
raise ValueError(
|
37 |
-
"the environment variable VERTEXAI_ENDPOINT_ID must be set."
|
38 |
-
)
|
39 |
-
if not access_token:
|
40 |
-
raise ValueError(
|
41 |
-
"the environment variable GCLOUD_ACCESS_TOKEN must be set."
|
42 |
-
)
|
43 |
-
prompt = request_config.prompt
|
44 |
-
prompt, prompt_len = prompt
|
45 |
-
|
46 |
-
time_to_next_token = []
|
47 |
-
tokens_received = 0
|
48 |
-
ttft = 0
|
49 |
-
generated_text = ""
|
50 |
-
output_throughput = 0
|
51 |
-
total_request_time = 0
|
52 |
-
|
53 |
-
metrics = {}
|
54 |
-
|
55 |
-
metrics[common_metrics.ERROR_CODE] = None
|
56 |
-
metrics[common_metrics.ERROR_MSG] = ""
|
57 |
-
|
58 |
-
try:
|
59 |
-
# Define the URL for the request
|
60 |
-
url = (
|
61 |
-
f"https://{region}-aiplatform.googleapis.com/v1/projects/"
|
62 |
-
f"{project_id}/locations/{region}/endpoints/{endpoint_id}:predict"
|
63 |
-
)
|
64 |
-
|
65 |
-
# Define the headers
|
66 |
-
headers = {
|
67 |
-
"Authorization": f"Bearer {access_token}",
|
68 |
-
"Content-Type": "application/json",
|
69 |
-
}
|
70 |
-
|
71 |
-
sampling_params = request_config.sampling_params
|
72 |
-
if "max_new_tokens" in sampling_params:
|
73 |
-
sampling_params["maxOutputTokens"] = sampling_params.pop(
|
74 |
-
"max_new_tokens"
|
75 |
-
)
|
76 |
-
|
77 |
-
# Define the data payload
|
78 |
-
data = {"instances": [{"prompt": prompt}], "parameters": sampling_params}
|
79 |
-
|
80 |
-
# Make the POST request
|
81 |
-
start_time = time.monotonic()
|
82 |
-
response = requests.post(url, headers=headers, data=json.dumps(data))
|
83 |
-
total_request_time = time.monotonic() - start_time
|
84 |
-
response_code = response.status_code
|
85 |
-
response.raise_for_status()
|
86 |
-
# output from the endpoint is in the form:
|
87 |
-
# {"predictions": ["Input: ... \nOutput:\n ..."]}
|
88 |
-
generated_text = response.json()["predictions"][0].split("\nOutput:\n")[1]
|
89 |
-
tokens_received = len(self.tokenizer.encode(generated_text))
|
90 |
-
ttft = -1
|
91 |
-
output_throughput = tokens_received / total_request_time
|
92 |
-
time_to_next_token = [
|
93 |
-
total_request_time / tokens_received for _ in range(tokens_received)
|
94 |
-
]
|
95 |
-
|
96 |
-
except Exception as e:
|
97 |
-
metrics[common_metrics.ERROR_MSG] = str(e)
|
98 |
-
metrics[common_metrics.ERROR_CODE] = response_code
|
99 |
-
print(f"Warning Or Error: {e}")
|
100 |
-
print(response_code)
|
101 |
-
print(response_code)
|
102 |
-
|
103 |
-
metrics[common_metrics.INTER_TOKEN_LAT] = time_to_next_token
|
104 |
-
metrics[common_metrics.TTFT] = ttft
|
105 |
-
metrics[common_metrics.E2E_LAT] = total_request_time
|
106 |
-
metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = output_throughput
|
107 |
-
metrics[common_metrics.NUM_TOTAL_TOKENS] = tokens_received + prompt_len
|
108 |
-
metrics[common_metrics.NUM_OUTPUT_TOKENS] = tokens_received
|
109 |
-
metrics[common_metrics.NUM_INPUT_TOKENS] = prompt_len
|
110 |
-
|
111 |
-
return metrics, generated_text, request_config
|
112 |
-
|
113 |
-
|
114 |
-
if __name__ == "__main__":
|
115 |
-
# Run these before hand:
|
116 |
-
|
117 |
-
# gcloud auth application-default login
|
118 |
-
# gcloud config set project YOUR_PROJECT_ID
|
119 |
-
# export GCLOUD_ACCESS_TOKEN=$(gcloud auth print-access-token)
|
120 |
-
# export GCLOUD_PROJECT_ID=YOUR_PROJECT_ID
|
121 |
-
# export GCLOUD_REGION=YOUR_REGION
|
122 |
-
# export VERTEXAI_ENDPOINT_ID=YOUR_ENDPOINT_ID
|
123 |
-
|
124 |
-
client = VertexAIClient.remote()
|
125 |
-
request_config = RequestConfig(
|
126 |
-
prompt=("Give me ten interview questions for the role of program manager.", 10),
|
127 |
-
model="gpt3",
|
128 |
-
sampling_params={
|
129 |
-
"temperature": 0.2,
|
130 |
-
"max_new_tokens": 256,
|
131 |
-
"top_k": 40,
|
132 |
-
"top_p": 0.95,
|
133 |
-
},
|
134 |
-
)
|
135 |
-
ray.get(client.llm_request.remote(request_config))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/src/llmperf/ray_llm_client.py
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
import abc
|
2 |
-
from typing import Any, Dict, Tuple
|
3 |
-
|
4 |
-
from llmperf.models import RequestConfig
|
5 |
-
|
6 |
-
|
7 |
-
class LLMClient:
|
8 |
-
"""A client for making requests to a LLM API e.g Anyscale Endpoints."""
|
9 |
-
|
10 |
-
@abc.abstractmethod
|
11 |
-
def llm_request(
|
12 |
-
self, request_config: RequestConfig
|
13 |
-
) -> Tuple[Dict[str, Any], str, RequestConfig]:
|
14 |
-
"""Make a single completion request to a LLM API
|
15 |
-
|
16 |
-
Returns:
|
17 |
-
Metrics about the performance charateristics of the request.
|
18 |
-
The text generated by the request to the LLM API.
|
19 |
-
The request_config used to make the request. This is mainly for logging purposes.
|
20 |
-
|
21 |
-
"""
|
22 |
-
...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/src/llmperf/requests_launcher.py
DELETED
@@ -1,48 +0,0 @@
|
|
1 |
-
from typing import Any, List
|
2 |
-
|
3 |
-
from llmperf.ray_llm_client import LLMClient
|
4 |
-
from llmperf.models import RequestConfig
|
5 |
-
from ray.util import ActorPool
|
6 |
-
|
7 |
-
|
8 |
-
class RequestsLauncher:
|
9 |
-
"""Launch requests from LLMClients to their respective LLM APIs."""
|
10 |
-
|
11 |
-
def __init__(self, llm_clients: List[LLMClient]):
|
12 |
-
self._llm_client_pool = ActorPool(llm_clients)
|
13 |
-
|
14 |
-
def launch_requests(self, request_config: RequestConfig) -> None:
|
15 |
-
"""Launch requests to the LLM API.
|
16 |
-
|
17 |
-
Args:
|
18 |
-
request_config: The configuration for the request.
|
19 |
-
|
20 |
-
"""
|
21 |
-
if self._llm_client_pool.has_free():
|
22 |
-
self._llm_client_pool.submit(
|
23 |
-
lambda client, _request_config: client.llm_request.remote(
|
24 |
-
_request_config
|
25 |
-
),
|
26 |
-
request_config,
|
27 |
-
)
|
28 |
-
|
29 |
-
def get_next_ready(self, block: bool = False) -> List[Any]:
|
30 |
-
"""Return results that are ready from completed requests.
|
31 |
-
|
32 |
-
Args:
|
33 |
-
block: Whether to block until a result is ready.
|
34 |
-
|
35 |
-
Returns:
|
36 |
-
A list of results that are ready.
|
37 |
-
|
38 |
-
"""
|
39 |
-
results = []
|
40 |
-
if not block:
|
41 |
-
while self._llm_client_pool.has_next():
|
42 |
-
results.append(self._llm_client_pool.get_next_unordered())
|
43 |
-
else:
|
44 |
-
while not self._llm_client_pool.has_next():
|
45 |
-
pass
|
46 |
-
while self._llm_client_pool.has_next():
|
47 |
-
results.append(self._llm_client_pool.get_next_unordered())
|
48 |
-
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/src/llmperf/sonnet.txt
DELETED
@@ -1,84 +0,0 @@
|
|
1 |
-
Shall I compare thee to a summer's day?
|
2 |
-
Thou art more lovely and more temperate:
|
3 |
-
Rough winds do shake the darling buds of May,
|
4 |
-
And summer's lease hath all too short a date:
|
5 |
-
Sometime too hot the eye of heaven shines,
|
6 |
-
And often is his gold complexion dimm'd;
|
7 |
-
And every fair from fair sometime declines,
|
8 |
-
By chance or nature's changing course untrimm'd;
|
9 |
-
But thy eternal summer shall not fade
|
10 |
-
Nor lose possession of that fair thou owest;
|
11 |
-
Nor shall Death brag thou wander'st in his shade,
|
12 |
-
When in eternal lines to time thou growest:
|
13 |
-
So long as men can breathe or eyes can see,
|
14 |
-
So long lives this and this gives life to thee.
|
15 |
-
Then let not winter's ragged hand deface
|
16 |
-
In thee thy summer, ere thou be distill'd:
|
17 |
-
Make sweet some vial; treasure thou some place
|
18 |
-
With beauty's treasure, ere it be self-kill'd.
|
19 |
-
That use is not forbidden usury,
|
20 |
-
Which happies those that pay the willing loan;
|
21 |
-
That's for thyself to breed another thee,
|
22 |
-
Or ten times happier, be it ten for one;
|
23 |
-
Ten times thyself were happier than thou art,
|
24 |
-
If ten of thine ten times refigured thee:
|
25 |
-
Then what could death do, if thou shouldst depart,
|
26 |
-
Leaving thee living in posterity?
|
27 |
-
Be not self-will'd, for thou art much too fair
|
28 |
-
To be death's conquest and make worms thine heir.
|
29 |
-
Where art thou, Muse, that thou forget'st so long
|
30 |
-
To speak of that which gives thee all thy might?
|
31 |
-
Spend'st thou thy fury on some worthless song,
|
32 |
-
Darkening thy power to lend base subjects light?
|
33 |
-
Return, forgetful Muse, and straight redeem
|
34 |
-
In gentle numbers time so idly spent;
|
35 |
-
Sing to the ear that doth thy lays esteem
|
36 |
-
And gives thy pen both skill and argument.
|
37 |
-
Rise, resty Muse, my love's sweet face survey,
|
38 |
-
If Time have any wrinkle graven there;
|
39 |
-
If any, be a satire to decay,
|
40 |
-
And make Time's spoils despised every where.
|
41 |
-
Give my love fame faster than Time wastes life;
|
42 |
-
So thou prevent'st his scythe and crooked knife.
|
43 |
-
My glass shall not persuade me I am old,
|
44 |
-
So long as youth and thou are of one date;
|
45 |
-
But when in thee time's furrows I behold,
|
46 |
-
Then look I death my days should expiate.
|
47 |
-
For all that beauty that doth cover thee
|
48 |
-
Is but the seemly raiment of my heart,
|
49 |
-
Which in thy breast doth live, as thine in me:
|
50 |
-
How can I then be elder than thou art?
|
51 |
-
O, therefore, love, be of thyself so wary
|
52 |
-
As I, not for myself, but for thee will;
|
53 |
-
Bearing thy heart, which I will keep so chary
|
54 |
-
As tender nurse her babe from faring ill.
|
55 |
-
Presume not on thy heart when mine is slain;
|
56 |
-
Thou gavest me thine, not to give back again.
|
57 |
-
So am I as the rich, whose blessed key
|
58 |
-
Can bring him to his sweet up-locked treasure,
|
59 |
-
The which he will not every hour survey,
|
60 |
-
For blunting the fine point of seldom pleasure.
|
61 |
-
Therefore are feasts so solemn and so rare,
|
62 |
-
Since, seldom coming, in the long year set,
|
63 |
-
Like stones of worth they thinly placed are,
|
64 |
-
Or captain jewels in the carcanet.
|
65 |
-
So is the time that keeps you as my chest,
|
66 |
-
Or as the wardrobe which the robe doth hide,
|
67 |
-
To make some special instant special blest,
|
68 |
-
By new unfolding his imprison'd pride.
|
69 |
-
Blessed are you, whose worthiness gives scope,
|
70 |
-
Being had, to triumph, being lack'd, to hope.
|
71 |
-
If there be nothing new, but that which is
|
72 |
-
Hath been before, how are our brains beguiled,
|
73 |
-
Which, labouring for invention, bear amiss
|
74 |
-
The second burden of a former child!
|
75 |
-
O, that record could with a backward look,
|
76 |
-
Even of five hundred courses of the sun,
|
77 |
-
Show me your image in some antique book,
|
78 |
-
Since mind at first in character was done!
|
79 |
-
That I might see what the old world could say
|
80 |
-
To this composed wonder of your frame;
|
81 |
-
Whether we are mended, or whether better they,
|
82 |
-
Or whether revolution be the same.
|
83 |
-
O, sure I am, the wits of former days
|
84 |
-
To subjects worse have given admiring praise.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/src/llmperf/utils.py
DELETED
@@ -1,147 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
import math
|
3 |
-
import pathlib
|
4 |
-
import random
|
5 |
-
import subprocess
|
6 |
-
import time
|
7 |
-
from typing import Any, Dict, Tuple
|
8 |
-
|
9 |
-
from transformers import LlamaTokenizerFast
|
10 |
-
|
11 |
-
|
12 |
-
RESULTS_VERSION = "2023-08-31"
|
13 |
-
|
14 |
-
|
15 |
-
class LLMPerfResults:
|
16 |
-
def __init__(
|
17 |
-
self,
|
18 |
-
name: str,
|
19 |
-
metadata: Dict[str, Any] = None,
|
20 |
-
):
|
21 |
-
self.name = name
|
22 |
-
self.metadata = metadata or {}
|
23 |
-
self.timestamp = int(time.time())
|
24 |
-
self.metadata["timestamp"] = self.timestamp
|
25 |
-
self.version = RESULTS_VERSION
|
26 |
-
|
27 |
-
def to_dict(self):
|
28 |
-
data = {
|
29 |
-
"version": self.version,
|
30 |
-
"name": self.name,
|
31 |
-
}
|
32 |
-
data.update(self.metadata)
|
33 |
-
data = flatten_dict(data)
|
34 |
-
return data
|
35 |
-
|
36 |
-
def json(self):
|
37 |
-
data = self.to_dict()
|
38 |
-
return json.dumps(data)
|
39 |
-
|
40 |
-
|
41 |
-
def upload_to_s3(results_path: str, s3_path: str) -> None:
|
42 |
-
"""Upload the results to s3.
|
43 |
-
|
44 |
-
Args:
|
45 |
-
results_path: The path to the results file.
|
46 |
-
s3_path: The s3 path to upload the results to.
|
47 |
-
|
48 |
-
"""
|
49 |
-
|
50 |
-
command = ["aws", "s3", "sync", results_path, f"{s3_path}/"]
|
51 |
-
result = subprocess.run(command)
|
52 |
-
if result.returncode == 0:
|
53 |
-
print("Files uploaded successfully!")
|
54 |
-
else:
|
55 |
-
print("An error occurred:")
|
56 |
-
print(result.stderr)
|
57 |
-
|
58 |
-
|
59 |
-
def randomly_sample_sonnet_lines_prompt(
|
60 |
-
prompt_tokens_mean: int = 550,
|
61 |
-
prompt_tokens_stddev: int = 250,
|
62 |
-
expect_output_tokens: int = 150,
|
63 |
-
) -> Tuple[str, int]:
|
64 |
-
"""Generate a prompt that randomly samples lines from a the shakespeare sonnet at sonnet.txt.
|
65 |
-
|
66 |
-
Args:
|
67 |
-
prompt_length_mean: The mean length of the prompt to generate.
|
68 |
-
prompt_len_stddev: The standard deviation of the length of the prompt to generate.
|
69 |
-
expect_output_tokens: The number of tokens to expect in the output. This is used to
|
70 |
-
determine the length of the prompt. The prompt will be generated such that the output
|
71 |
-
will be approximately this many tokens.
|
72 |
-
|
73 |
-
Note:
|
74 |
-
tokens will be counted from the sonnet using the Llama tokenizer. Using one tokenizer
|
75 |
-
ensures a fairer comparison across different LLMs. For example, if gpt 3.5 tokenizes
|
76 |
-
a prompt in less tokens than Llama2, then this will be reflected in the results since
|
77 |
-
they will be fed identical prompts.
|
78 |
-
|
79 |
-
Returns:
|
80 |
-
A tuple of the prompt and the length of the prompt.
|
81 |
-
"""
|
82 |
-
|
83 |
-
tokenizer = LlamaTokenizerFast.from_pretrained(
|
84 |
-
"hf-internal-testing/llama-tokenizer"
|
85 |
-
)
|
86 |
-
|
87 |
-
get_token_length = lambda text: len(tokenizer.encode(text))
|
88 |
-
|
89 |
-
prompt = (
|
90 |
-
"Randomly stream lines from the following text "
|
91 |
-
f"with {expect_output_tokens} output tokens. "
|
92 |
-
"Don't generate eos tokens:\n\n"
|
93 |
-
)
|
94 |
-
# get a prompt length that is at least as long as the base
|
95 |
-
num_prompt_tokens = sample_random_positive_int(
|
96 |
-
prompt_tokens_mean, prompt_tokens_stddev
|
97 |
-
)
|
98 |
-
while num_prompt_tokens < get_token_length(prompt):
|
99 |
-
num_prompt_tokens = sample_random_positive_int(
|
100 |
-
prompt_tokens_mean, prompt_tokens_stddev
|
101 |
-
)
|
102 |
-
remaining_prompt_tokens = num_prompt_tokens - get_token_length(prompt)
|
103 |
-
sonnet_path = pathlib.Path(__file__).parent.resolve() / "sonnet.txt"
|
104 |
-
with open(sonnet_path, "r") as f:
|
105 |
-
sonnet_lines = f.readlines()
|
106 |
-
random.shuffle(sonnet_lines)
|
107 |
-
sampling_lines = True
|
108 |
-
while sampling_lines:
|
109 |
-
for line in sonnet_lines:
|
110 |
-
line_to_add = line
|
111 |
-
if remaining_prompt_tokens - get_token_length(line_to_add) < 0:
|
112 |
-
# This will cut off a line in the middle of a word, but that's ok since an
|
113 |
-
# llm should be able to handle that.
|
114 |
-
line_to_add = line_to_add[: int(math.ceil(remaining_prompt_tokens))]
|
115 |
-
sampling_lines = False
|
116 |
-
prompt += line_to_add
|
117 |
-
break
|
118 |
-
prompt += line_to_add
|
119 |
-
remaining_prompt_tokens -= get_token_length(line_to_add)
|
120 |
-
return (prompt, num_prompt_tokens)
|
121 |
-
|
122 |
-
|
123 |
-
def sample_random_positive_int(mean: int, stddev: int) -> int:
|
124 |
-
"""Sample random numbers from a gaussian distribution until a positive number is sampled.
|
125 |
-
|
126 |
-
Args:
|
127 |
-
mean: The mean of the gaussian distribution to sample from.
|
128 |
-
stddev: The standard deviation of the gaussian distribution to sample from.
|
129 |
-
|
130 |
-
Returns:
|
131 |
-
A random positive integer sampled from the gaussian distribution.
|
132 |
-
"""
|
133 |
-
ret = -1
|
134 |
-
while ret <= 0:
|
135 |
-
ret = int(random.gauss(mean, stddev))
|
136 |
-
return ret
|
137 |
-
|
138 |
-
|
139 |
-
def flatten_dict(d, parent_key="", sep="_"):
|
140 |
-
items = []
|
141 |
-
for k, v in d.items():
|
142 |
-
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
143 |
-
if isinstance(v, dict):
|
144 |
-
items.extend(flatten_dict(v, new_key, sep=sep).items())
|
145 |
-
else:
|
146 |
-
items.append((new_key, v))
|
147 |
-
return dict(items)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmperf/token_benchmark_ray.py
DELETED
@@ -1,469 +0,0 @@
|
|
1 |
-
import argparse
|
2 |
-
from collections.abc import Iterable
|
3 |
-
import json
|
4 |
-
import os
|
5 |
-
from pathlib import Path
|
6 |
-
import re
|
7 |
-
import time
|
8 |
-
import random
|
9 |
-
from typing import Any, Dict, List, Optional, Tuple
|
10 |
-
|
11 |
-
import pandas as pd
|
12 |
-
import ray
|
13 |
-
|
14 |
-
from llmperf import common_metrics
|
15 |
-
from llmperf.common import SUPPORTED_APIS, construct_clients
|
16 |
-
|
17 |
-
from llmperf.models import RequestConfig
|
18 |
-
from llmperf.requests_launcher import RequestsLauncher
|
19 |
-
from llmperf.utils import (
|
20 |
-
randomly_sample_sonnet_lines_prompt,
|
21 |
-
LLMPerfResults,
|
22 |
-
sample_random_positive_int,
|
23 |
-
)
|
24 |
-
from tqdm import tqdm
|
25 |
-
|
26 |
-
from transformers import LlamaTokenizerFast
|
27 |
-
|
28 |
-
def get_token_throughput_latencies(
|
29 |
-
model: str,
|
30 |
-
mean_input_tokens: int,
|
31 |
-
stddev_input_tokens: int,
|
32 |
-
mean_output_tokens: int,
|
33 |
-
stddev_output_tokens: int,
|
34 |
-
additional_sampling_params: Optional[Dict[str, Any]] = None,
|
35 |
-
num_concurrent_requests: int = 1,
|
36 |
-
max_num_completed_requests: int = 500,
|
37 |
-
test_timeout_s=90,
|
38 |
-
llm_api="openai",
|
39 |
-
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
40 |
-
"""Get the token throughput and latencies for the given model.
|
41 |
-
|
42 |
-
Args:
|
43 |
-
model: The name of the model to query.
|
44 |
-
mean_input_tokens: The mean number of tokens to send in the prompt for the request.
|
45 |
-
stddev_input_tokens: The standard deviation of the number of tokens to send in the prompt for the request.
|
46 |
-
mean_output_tokens: The mean number of tokens to generate per request.
|
47 |
-
stddev_output_tokens: The standard deviation of the number of tokens to generate per request.
|
48 |
-
additional_sampling_params: Additional sampling parameters to send with the request.
|
49 |
-
For more information see the LLM APIs documentation for the completions
|
50 |
-
num_concurrent_requests: The number of concurrent requests to make. Increase
|
51 |
-
this to increase the amount of load and vice versa.
|
52 |
-
test_timeout_s: The amount of time to run the test for before reporting results.
|
53 |
-
llm_api: The name of the llm api to use. Either "openai" or "litellm".
|
54 |
-
|
55 |
-
Returns:
|
56 |
-
A summary of the performance metrics collected across all completed requests
|
57 |
-
(e.g. throughput, latencies, etc.)
|
58 |
-
The individual metrics for each request.
|
59 |
-
"""
|
60 |
-
random.seed(11111)
|
61 |
-
|
62 |
-
tokenizer = LlamaTokenizerFast.from_pretrained(
|
63 |
-
"hf-internal-testing/llama-tokenizer"
|
64 |
-
)
|
65 |
-
get_token_length = lambda text: len(tokenizer.encode(text))
|
66 |
-
|
67 |
-
if not additional_sampling_params:
|
68 |
-
additional_sampling_params = {}
|
69 |
-
|
70 |
-
clients = construct_clients(llm_api=llm_api, num_clients=num_concurrent_requests)
|
71 |
-
req_launcher = RequestsLauncher(clients)
|
72 |
-
completed_requests = []
|
73 |
-
num_completed_requests = 0
|
74 |
-
start_time = time.monotonic()
|
75 |
-
iter = 0
|
76 |
-
pbar = tqdm(total=max_num_completed_requests)
|
77 |
-
while (
|
78 |
-
time.monotonic() - start_time < test_timeout_s
|
79 |
-
and len(completed_requests) < max_num_completed_requests
|
80 |
-
):
|
81 |
-
iter += 1
|
82 |
-
num_output_tokens = sample_random_positive_int(
|
83 |
-
mean_output_tokens, stddev_output_tokens
|
84 |
-
)
|
85 |
-
|
86 |
-
prompt = randomly_sample_sonnet_lines_prompt(
|
87 |
-
prompt_tokens_mean=mean_input_tokens,
|
88 |
-
prompt_tokens_stddev=stddev_input_tokens,
|
89 |
-
expect_output_tokens=num_output_tokens,
|
90 |
-
)
|
91 |
-
|
92 |
-
default_sampling_params = {"max_tokens": num_output_tokens}
|
93 |
-
default_sampling_params.update(additional_sampling_params)
|
94 |
-
request_config = RequestConfig(
|
95 |
-
model=model,
|
96 |
-
prompt=prompt,
|
97 |
-
sampling_params=default_sampling_params,
|
98 |
-
llm_api=llm_api,
|
99 |
-
)
|
100 |
-
req_launcher.launch_requests(request_config)
|
101 |
-
# Retrieving results less frequently allows for more concurrent requests
|
102 |
-
# to be launched. This will overall reduce the amount of time it takes
|
103 |
-
# for the test to run.
|
104 |
-
if not (iter % num_concurrent_requests):
|
105 |
-
outs = req_launcher.get_next_ready()
|
106 |
-
all_metrics = []
|
107 |
-
for out in outs:
|
108 |
-
request_metrics, gen_text, _ = out
|
109 |
-
num_output_tokens = get_token_length(gen_text)
|
110 |
-
if num_output_tokens:
|
111 |
-
request_metrics[common_metrics.INTER_TOKEN_LAT] /= num_output_tokens
|
112 |
-
else:
|
113 |
-
request_metrics[common_metrics.INTER_TOKEN_LAT] = 0
|
114 |
-
request_metrics[common_metrics.NUM_OUTPUT_TOKENS] = num_output_tokens
|
115 |
-
request_metrics[common_metrics.NUM_TOTAL_TOKENS] = request_metrics[common_metrics.NUM_INPUT_TOKENS] + num_output_tokens
|
116 |
-
request_metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = num_output_tokens / request_metrics[common_metrics.E2E_LAT]
|
117 |
-
all_metrics.append(request_metrics)
|
118 |
-
completed_requests.extend(all_metrics)
|
119 |
-
pbar.update(len(completed_requests) - num_completed_requests)
|
120 |
-
num_completed_requests = len(completed_requests)
|
121 |
-
|
122 |
-
pbar.close()
|
123 |
-
end_time = time.monotonic()
|
124 |
-
if end_time - start_time >= test_timeout_s:
|
125 |
-
print("Test timed out before all requests could be completed.")
|
126 |
-
|
127 |
-
# check one last time that there are no remaining results to collect.
|
128 |
-
outs = req_launcher.get_next_ready()
|
129 |
-
all_metrics = []
|
130 |
-
for out in outs:
|
131 |
-
request_metrics, gen_text, _ = out
|
132 |
-
num_output_tokens = get_token_length(gen_text)
|
133 |
-
if num_output_tokens:
|
134 |
-
request_metrics[common_metrics.INTER_TOKEN_LAT] /= num_output_tokens
|
135 |
-
else:
|
136 |
-
request_metrics[common_metrics.INTER_TOKEN_LAT] = 0
|
137 |
-
request_metrics[common_metrics.NUM_OUTPUT_TOKENS] = num_output_tokens
|
138 |
-
request_metrics[common_metrics.NUM_TOTAL_TOKENS] = request_metrics[common_metrics.NUM_INPUT_TOKENS] + num_output_tokens
|
139 |
-
request_metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = num_output_tokens / request_metrics[common_metrics.E2E_LAT]
|
140 |
-
|
141 |
-
all_metrics.append(request_metrics)
|
142 |
-
completed_requests.extend(all_metrics)
|
143 |
-
|
144 |
-
print(f"\Results for token benchmark for {model} queried with the {llm_api} api.\n")
|
145 |
-
ret = metrics_summary(completed_requests, start_time, end_time)
|
146 |
-
|
147 |
-
metadata = {
|
148 |
-
"model": model,
|
149 |
-
"mean_input_tokens": mean_input_tokens,
|
150 |
-
"stddev_input_tokens": stddev_input_tokens,
|
151 |
-
"mean_output_tokens": mean_output_tokens,
|
152 |
-
"stddev_output_tokens": stddev_output_tokens,
|
153 |
-
"num_concurrent_requests": num_concurrent_requests,
|
154 |
-
"additional_sampling_params": additional_sampling_params,
|
155 |
-
}
|
156 |
-
|
157 |
-
metadata["results"] = ret
|
158 |
-
|
159 |
-
return metadata, completed_requests
|
160 |
-
|
161 |
-
|
162 |
-
def metrics_summary(
|
163 |
-
metrics: List[Dict[str, Any]], start_time: int, end_time: int
|
164 |
-
) -> Dict[str, Any]:
|
165 |
-
"""Generate a summary over metrics generated from potentially multiple instances of this client.
|
166 |
-
|
167 |
-
Args:
|
168 |
-
metrics: The metrics to summarize.
|
169 |
-
start_time: The time the test started.
|
170 |
-
end_time: The time the test ended.
|
171 |
-
|
172 |
-
Returns:
|
173 |
-
A summary with the following information:
|
174 |
-
- Overall throughput (generated tokens / total test time)
|
175 |
-
- Number of completed requests
|
176 |
-
- Error rate
|
177 |
-
- Error code frequency
|
178 |
-
- Quantiles (p25-p99) for the following metrics:
|
179 |
-
- Inter token latency
|
180 |
-
- Time to first token
|
181 |
-
- User total request time
|
182 |
-
- Number of tokens processed per request
|
183 |
-
- Number of tokens generated per request
|
184 |
-
- User throughput (tokens / s)
|
185 |
-
"""
|
186 |
-
ret = {}
|
187 |
-
|
188 |
-
def flatten(item):
|
189 |
-
for sub_item in item:
|
190 |
-
if isinstance(sub_item, Iterable) and not isinstance(sub_item, str):
|
191 |
-
yield from flatten(sub_item)
|
192 |
-
else:
|
193 |
-
yield sub_item
|
194 |
-
|
195 |
-
df = pd.DataFrame(metrics)
|
196 |
-
df_without_errored_req = df[df[common_metrics.ERROR_CODE].isna()]
|
197 |
-
|
198 |
-
for key in [
|
199 |
-
common_metrics.INTER_TOKEN_LAT,
|
200 |
-
common_metrics.TTFT,
|
201 |
-
common_metrics.E2E_LAT,
|
202 |
-
common_metrics.REQ_OUTPUT_THROUGHPUT,
|
203 |
-
common_metrics.NUM_INPUT_TOKENS,
|
204 |
-
common_metrics.NUM_OUTPUT_TOKENS
|
205 |
-
]:
|
206 |
-
print(key)
|
207 |
-
ret[key] = {}
|
208 |
-
series = pd.Series(list(flatten(df_without_errored_req[key]))).dropna()
|
209 |
-
quantiles = series.quantile([0.25, 0.5, 0.75, 0.9, 0.95, 0.99]).to_dict()
|
210 |
-
quantiles_reformatted_keys = {}
|
211 |
-
for quantile, value in quantiles.items():
|
212 |
-
reformatted_key = f"p{int(quantile * 100)}"
|
213 |
-
print(f" {reformatted_key} = {value}")
|
214 |
-
quantiles_reformatted_keys[reformatted_key] = value
|
215 |
-
ret[key]["quantiles"] = quantiles_reformatted_keys
|
216 |
-
mean = series.mean()
|
217 |
-
print(f" mean = {mean}")
|
218 |
-
ret[key]["mean"] = mean
|
219 |
-
print(f" min = {series.min()}")
|
220 |
-
ret[key]["min"] = series.min()
|
221 |
-
print(f" max = {series.max()}")
|
222 |
-
ret[key]["max"] = series.max()
|
223 |
-
print(f" stddev = {series.std()}")
|
224 |
-
ret[key]["stddev"] = series.std()
|
225 |
-
|
226 |
-
ret[common_metrics.NUM_REQ_STARTED] = len(metrics)
|
227 |
-
|
228 |
-
error_codes = df[common_metrics.ERROR_CODE].dropna()
|
229 |
-
num_errors = len(error_codes)
|
230 |
-
ret[common_metrics.ERROR_RATE] = num_errors / len(metrics) if len(metrics) else 0
|
231 |
-
ret[common_metrics.NUM_ERRORS] = num_errors
|
232 |
-
print(f"Number Of Errored Requests: {num_errors}")
|
233 |
-
error_code_frequency = dict(error_codes.value_counts())
|
234 |
-
if num_errors:
|
235 |
-
error_code_frequency = dict(error_codes.value_counts())
|
236 |
-
print("Error Code Frequency")
|
237 |
-
print(error_code_frequency)
|
238 |
-
ret[common_metrics.ERROR_CODE_FREQ] = str(error_code_frequency)
|
239 |
-
|
240 |
-
overall_output_throughput = df_without_errored_req[
|
241 |
-
common_metrics.NUM_OUTPUT_TOKENS
|
242 |
-
].sum() / (end_time - start_time)
|
243 |
-
|
244 |
-
print(f"Overall Output Throughput: {overall_output_throughput}")
|
245 |
-
ret[common_metrics.OUTPUT_THROUGHPUT] = overall_output_throughput
|
246 |
-
|
247 |
-
num_completed_requests = len(df_without_errored_req)
|
248 |
-
num_completed_requests_per_min = (
|
249 |
-
num_completed_requests / (end_time - start_time) * 60
|
250 |
-
)
|
251 |
-
print(f"Number Of Completed Requests: {num_completed_requests}")
|
252 |
-
print(f"Completed Requests Per Minute: {num_completed_requests_per_min}")
|
253 |
-
|
254 |
-
ret[common_metrics.NUM_COMPLETED_REQUESTS] = num_completed_requests
|
255 |
-
ret[common_metrics.COMPLETED_REQUESTS_PER_MIN] = num_completed_requests_per_min
|
256 |
-
|
257 |
-
return ret
|
258 |
-
|
259 |
-
|
260 |
-
def run_token_benchmark(
|
261 |
-
llm_api: str,
|
262 |
-
model: str,
|
263 |
-
test_timeout_s: int,
|
264 |
-
max_num_completed_requests: int,
|
265 |
-
num_concurrent_requests: int,
|
266 |
-
mean_input_tokens: int,
|
267 |
-
stddev_input_tokens: int,
|
268 |
-
mean_output_tokens: int,
|
269 |
-
stddev_output_tokens: int,
|
270 |
-
additional_sampling_params: str,
|
271 |
-
results_dir: str,
|
272 |
-
user_metadata: Dict[str, Any],
|
273 |
-
):
|
274 |
-
"""
|
275 |
-
Args:
|
276 |
-
llm_api: The name of the llm api to use.
|
277 |
-
model: The name of the model to query.
|
278 |
-
max_num_completed_requests: The number of requests to complete before finishing the test.
|
279 |
-
test_timeout_s: The amount of time to run the test for before reporting results.
|
280 |
-
num_concurrent_requests: The number of concurrent requests to make. Increase
|
281 |
-
this to increase the amount of load and vice versa.
|
282 |
-
mean_input_tokens: The mean number of tokens to send in the prompt for the request.
|
283 |
-
stddev_input_tokens: The standard deviation of the number of tokens to send in the prompt for the request.
|
284 |
-
mean_output_tokens: The mean number of tokens to generate per request.
|
285 |
-
stddev_output_tokens: The standard deviation of the number of tokens to generate per request.
|
286 |
-
additional_sampling_params: Additional sampling parameters to send with the request.
|
287 |
-
For more information see the LLM APIs documentation for the completions.
|
288 |
-
results_dir: The directory to save the results to.
|
289 |
-
user_metadata: Additional metadata to include in the results.
|
290 |
-
"""
|
291 |
-
if mean_input_tokens < 40:
|
292 |
-
print(
|
293 |
-
"the minimum number of input tokens that will be sent is 41"
|
294 |
-
" because of the prompting logic right now"
|
295 |
-
)
|
296 |
-
|
297 |
-
summary, individual_responses = get_token_throughput_latencies(
|
298 |
-
model=model,
|
299 |
-
llm_api=llm_api,
|
300 |
-
test_timeout_s=test_timeout_s,
|
301 |
-
max_num_completed_requests=max_num_completed_requests,
|
302 |
-
mean_input_tokens=mean_input_tokens,
|
303 |
-
stddev_input_tokens=stddev_input_tokens,
|
304 |
-
mean_output_tokens=mean_output_tokens,
|
305 |
-
stddev_output_tokens=stddev_output_tokens,
|
306 |
-
num_concurrent_requests=num_concurrent_requests,
|
307 |
-
additional_sampling_params=json.loads(additional_sampling_params),
|
308 |
-
)
|
309 |
-
|
310 |
-
if results_dir:
|
311 |
-
filename = f"{model}_{mean_input_tokens}_{mean_output_tokens}"
|
312 |
-
filename = re.sub(r"[^\w\d-]+", "-", filename)
|
313 |
-
filename = re.sub(r"-{2,}", "-", filename)
|
314 |
-
summary_filename = f"{filename}_summary"
|
315 |
-
individual_responses_filename = f"{filename}_individual_responses"
|
316 |
-
|
317 |
-
# Update to metadata.
|
318 |
-
summary.update(user_metadata)
|
319 |
-
|
320 |
-
results = LLMPerfResults(name=summary_filename, metadata=summary)
|
321 |
-
results_dir = Path(results_dir)
|
322 |
-
if not results_dir.exists():
|
323 |
-
results_dir.mkdir(parents=True)
|
324 |
-
elif not results_dir.is_dir():
|
325 |
-
raise ValueError(f"{results_dir} is not a directory")
|
326 |
-
|
327 |
-
try:
|
328 |
-
with open(results_dir / f"{summary_filename}.json", "w") as f:
|
329 |
-
json.dump(results.to_dict(), f, indent=4, default=str)
|
330 |
-
except Exception as e:
|
331 |
-
print(results.to_dict())
|
332 |
-
raise e
|
333 |
-
|
334 |
-
try:
|
335 |
-
with open(results_dir / f"{individual_responses_filename}.json", "w") as f:
|
336 |
-
json.dump(individual_responses, f, indent=4)
|
337 |
-
except Exception as e:
|
338 |
-
print(individual_responses)
|
339 |
-
raise e
|
340 |
-
|
341 |
-
|
342 |
-
args = argparse.ArgumentParser(
|
343 |
-
description="Run a token throughput and latency benchmark."
|
344 |
-
)
|
345 |
-
|
346 |
-
args.add_argument(
|
347 |
-
"--model", type=str, required=True, help="The model to use for this load test."
|
348 |
-
)
|
349 |
-
args.add_argument(
|
350 |
-
"--mean-input-tokens",
|
351 |
-
type=int,
|
352 |
-
default=550,
|
353 |
-
help=(
|
354 |
-
"The mean number of tokens to send in the prompt for the request. "
|
355 |
-
" (default: %(default)s)"
|
356 |
-
),
|
357 |
-
)
|
358 |
-
args.add_argument(
|
359 |
-
"--stddev-input-tokens",
|
360 |
-
type=int,
|
361 |
-
default=150,
|
362 |
-
help=(
|
363 |
-
"The standard deviation of number of tokens to send in the prompt for the request. "
|
364 |
-
"(default: %(default)s)"
|
365 |
-
),
|
366 |
-
)
|
367 |
-
args.add_argument(
|
368 |
-
"--mean-output-tokens",
|
369 |
-
type=int,
|
370 |
-
default=150,
|
371 |
-
help=(
|
372 |
-
"The mean number of tokens to generate from each llm request. This is the max_tokens param "
|
373 |
-
"for the completions API. Note that this is not always the number of tokens returned. "
|
374 |
-
"(default: %(default)s)"
|
375 |
-
),
|
376 |
-
)
|
377 |
-
args.add_argument(
|
378 |
-
"--stddev-output-tokens",
|
379 |
-
type=int,
|
380 |
-
default=80,
|
381 |
-
help=(
|
382 |
-
"The stdandard deviation on the number of tokens to generate per llm request. "
|
383 |
-
"(default: %(default)s)"
|
384 |
-
),
|
385 |
-
)
|
386 |
-
args.add_argument(
|
387 |
-
"--num-concurrent-requests",
|
388 |
-
type=int,
|
389 |
-
default=10,
|
390 |
-
help=("The number of concurrent requests to send (default: %(default)s)"),
|
391 |
-
)
|
392 |
-
args.add_argument(
|
393 |
-
"--timeout",
|
394 |
-
type=int,
|
395 |
-
default=90,
|
396 |
-
help="The amount of time to run the load test for. (default: %(default)s)",
|
397 |
-
)
|
398 |
-
args.add_argument(
|
399 |
-
"--max-num-completed-requests",
|
400 |
-
type=int,
|
401 |
-
default=10,
|
402 |
-
help=(
|
403 |
-
"The number of requests to complete before finishing the test. Note "
|
404 |
-
"that its possible for the test to timeout first. (default: %(default)s)"
|
405 |
-
),
|
406 |
-
)
|
407 |
-
args.add_argument(
|
408 |
-
"--additional-sampling-params",
|
409 |
-
type=str,
|
410 |
-
default="{}",
|
411 |
-
help=(
|
412 |
-
"Additional sampling params to send with the each request to the LLM API. "
|
413 |
-
"(default: %(default)s) No additional sampling params are sent."
|
414 |
-
),
|
415 |
-
)
|
416 |
-
args.add_argument(
|
417 |
-
"--results-dir",
|
418 |
-
type=str,
|
419 |
-
default="",
|
420 |
-
help=(
|
421 |
-
"The directory to save the results to. "
|
422 |
-
"(`default: %(default)s`) No results are saved)"
|
423 |
-
),
|
424 |
-
)
|
425 |
-
args.add_argument(
|
426 |
-
"--llm-api",
|
427 |
-
type=str,
|
428 |
-
default="openai",
|
429 |
-
help=(
|
430 |
-
f"The name of the llm api to use. Can select from {SUPPORTED_APIS}"
|
431 |
-
" (default: %(default)s)"
|
432 |
-
),
|
433 |
-
)
|
434 |
-
args.add_argument(
|
435 |
-
"--metadata",
|
436 |
-
type=str,
|
437 |
-
default="",
|
438 |
-
help=(
|
439 |
-
"A comma separated list of metadata to include in the results, e.g. "
|
440 |
-
"name=foo,bar=1. These will be added to the metadata field of the results. "
|
441 |
-
),
|
442 |
-
)
|
443 |
-
|
444 |
-
if __name__ == "__main__":
|
445 |
-
env_vars = dict(os.environ)
|
446 |
-
ray.init(runtime_env={"env_vars": env_vars})
|
447 |
-
args = args.parse_args()
|
448 |
-
|
449 |
-
# Parse user metadata.
|
450 |
-
user_metadata = {}
|
451 |
-
if args.metadata:
|
452 |
-
for item in args.metadata.split(","):
|
453 |
-
key, value = item.split("=")
|
454 |
-
user_metadata[key] = value
|
455 |
-
|
456 |
-
run_token_benchmark(
|
457 |
-
llm_api=args.llm_api,
|
458 |
-
model=args.model,
|
459 |
-
test_timeout_s=args.timeout,
|
460 |
-
max_num_completed_requests=args.max_num_completed_requests,
|
461 |
-
mean_input_tokens=args.mean_input_tokens,
|
462 |
-
stddev_input_tokens=args.stddev_input_tokens,
|
463 |
-
mean_output_tokens=args.mean_output_tokens,
|
464 |
-
stddev_output_tokens=args.stddev_output_tokens,
|
465 |
-
num_concurrent_requests=args.num_concurrent_requests,
|
466 |
-
additional_sampling_params=args.additional_sampling_params,
|
467 |
-
results_dir=args.results_dir,
|
468 |
-
user_metadata=user_metadata,
|
469 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
on_startup.sh
CHANGED
@@ -14,6 +14,12 @@ git config --global credential.helper store
|
|
14 |
## Remove the temporary clone directory
|
15 |
#rm -rf /tmp/tgi-benchmark-notebooks
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
# Add dark theme
|
18 |
mkdir -p ~/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/ && \
|
19 |
echo '{ "theme":"JupyterLab Dark" }' > ~/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings
|
|
|
14 |
## Remove the temporary clone directory
|
15 |
#rm -rf /tmp/tgi-benchmark-notebooks
|
16 |
|
17 |
+
# Install llmperf
|
18 |
+
cd ~/app
|
19 |
+
git clone https://github.com/ray-project/llmperf.git
|
20 |
+
cd llmperf
|
21 |
+
git checkout afd137a
|
22 |
+
|
23 |
# Add dark theme
|
24 |
mkdir -p ~/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/ && \
|
25 |
echo '{ "theme":"JupyterLab Dark" }' > ~/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings
|
requirements.txt
CHANGED
@@ -3,9 +3,10 @@ jupyterlab-vim==0.15.1
|
|
3 |
jupyterlab-vimrc==0.5.2
|
4 |
jupyter-server==2.3.0
|
5 |
tornado==6.2
|
6 |
-
ipywidgets
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
3 |
jupyterlab-vimrc==0.5.2
|
4 |
jupyter-server==2.3.0
|
5 |
tornado==6.2
|
6 |
+
ipywidgets==8.1.3
|
7 |
+
huggingface-hub==0.23.2
|
8 |
+
transformers==4.41.2
|
9 |
+
pandas==2.2.2
|
10 |
+
datasets==2.19.1
|
11 |
+
plotly==5.22.0
|
12 |
+
ray[default]==2.23.0
|