Spaces:
Runtime error
Runtime error
Soutrik
commited on
Commit
•
16d3463
1
Parent(s):
4f7a99d
right set of test codes for litserver , server side, client side and benchmark code
Browse files- client.py +18 -0
- notebooks/training_lightning_tests.ipynb +122 -0
- poetry.lock +0 -0
- pyproject.toml +3 -0
- src/litserve_api_test.py +200 -0
- src/litserve_test_client.py +86 -0
- src/litserve_test_server.py +116 -0
client.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Copyright The Lightning AI team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
import requests
|
16 |
+
|
17 |
+
response = requests.post("http://127.0.0.1:8080/predict", json={"input": 4.0})
|
18 |
+
print(f"Status: {response.status_code}\nResponse:\n {response.text}")
|
notebooks/training_lightning_tests.ipynb
CHANGED
@@ -829,6 +829,128 @@
|
|
829 |
" return loggers"
|
830 |
]
|
831 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
832 |
{
|
833 |
"cell_type": "code",
|
834 |
"execution_count": null,
|
|
|
829 |
" return loggers"
|
830 |
]
|
831 |
},
|
832 |
+
{
|
833 |
+
"cell_type": "code",
|
834 |
+
"execution_count": 1,
|
835 |
+
"metadata": {},
|
836 |
+
"outputs": [
|
837 |
+
{
|
838 |
+
"name": "stderr",
|
839 |
+
"output_type": "stream",
|
840 |
+
"text": [
|
841 |
+
"/anaconda/envs/emlo_env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
842 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
843 |
+
]
|
844 |
+
},
|
845 |
+
{
|
846 |
+
"name": "stdout",
|
847 |
+
"output_type": "stream",
|
848 |
+
"text": [
|
849 |
+
"['bat_resnext26ts', 'beit_base_patch16_224', 'beit_base_patch16_384', 'beit_large_patch16_224', 'beit_large_patch16_384', 'beit_large_patch16_512', 'beitv2_base_patch16_224', 'beitv2_large_patch16_224', 'botnet26t_256', 'botnet50ts_256', 'caformer_b36', 'caformer_m36', 'caformer_s18', 'caformer_s36', 'cait_m36_384', 'cait_m48_448', 'cait_s24_224', 'cait_s24_384', 'cait_s36_384', 'cait_xs24_384', 'cait_xxs24_224', 'cait_xxs24_384', 'cait_xxs36_224', 'cait_xxs36_384', 'coat_lite_medium', 'coat_lite_medium_384', 'coat_lite_mini', 'coat_lite_small', 'coat_lite_tiny', 'coat_mini', 'coat_small', 'coat_tiny', 'coatnet_0_224', 'coatnet_0_rw_224', 'coatnet_1_224', 'coatnet_1_rw_224', 'coatnet_2_224', 'coatnet_2_rw_224', 'coatnet_3_224', 'coatnet_3_rw_224', 'coatnet_4_224', 'coatnet_5_224', 'coatnet_bn_0_rw_224', 'coatnet_nano_cc_224', 'coatnet_nano_rw_224', 'coatnet_pico_rw_224', 'coatnet_rmlp_0_rw_224', 'coatnet_rmlp_1_rw2_224', 'coatnet_rmlp_1_rw_224', 'coatnet_rmlp_2_rw_224', 'coatnet_rmlp_2_rw_384', 'coatnet_rmlp_3_rw_224', 'coatnet_rmlp_nano_rw_224', 'coatnext_nano_rw_224', 'convformer_b36', 'convformer_m36', 'convformer_s18', 'convformer_s36', 'convit_base', 'convit_small', 'convit_tiny', 'convmixer_768_32', 'convmixer_1024_20_ks9_p14', 'convmixer_1536_20', 'convnext_atto', 'convnext_atto_ols', 'convnext_base', 'convnext_femto', 'convnext_femto_ols', 'convnext_large', 'convnext_large_mlp', 'convnext_nano', 'convnext_nano_ols', 'convnext_pico', 'convnext_pico_ols', 'convnext_small', 'convnext_tiny', 'convnext_tiny_hnf', 'convnext_xlarge', 'convnext_xxlarge', 'convnextv2_atto', 'convnextv2_base', 'convnextv2_femto', 'convnextv2_huge', 'convnextv2_large', 'convnextv2_nano', 'convnextv2_pico', 'convnextv2_small', 'convnextv2_tiny', 'crossvit_9_240', 'crossvit_9_dagger_240', 'crossvit_15_240', 'crossvit_15_dagger_240', 'crossvit_15_dagger_408', 'crossvit_18_240', 'crossvit_18_dagger_240', 'crossvit_18_dagger_408', 'crossvit_base_240', 'crossvit_small_240', 'crossvit_tiny_240', 'cs3darknet_focus_l', 'cs3darknet_focus_m', 'cs3darknet_focus_s', 'cs3darknet_focus_x', 'cs3darknet_l', 'cs3darknet_m', 'cs3darknet_s', 'cs3darknet_x', 'cs3edgenet_x', 'cs3se_edgenet_x', 'cs3sedarknet_l', 'cs3sedarknet_x', 'cs3sedarknet_xdw', 'cspdarknet53', 'cspresnet50', 'cspresnet50d', 'cspresnet50w', 'cspresnext50', 'darknet17', 'darknet21', 'darknet53', 'darknetaa53', 'davit_base', 'davit_base_fl', 'davit_giant', 'davit_huge', 'davit_huge_fl', 'davit_large', 'davit_small', 'davit_tiny', 'deit3_base_patch16_224', 'deit3_base_patch16_384', 'deit3_huge_patch14_224', 'deit3_large_patch16_224', 'deit3_large_patch16_384', 'deit3_medium_patch16_224', 'deit3_small_patch16_224', 'deit3_small_patch16_384', 'deit_base_distilled_patch16_224', 'deit_base_distilled_patch16_384', 'deit_base_patch16_224', 'deit_base_patch16_384', 'deit_small_distilled_patch16_224', 'deit_small_patch16_224', 'deit_tiny_distilled_patch16_224', 'deit_tiny_patch16_224', 'densenet121', 'densenet161', 'densenet169', 'densenet201', 'densenet264d', 'densenetblur121d', 'dla34', 'dla46_c', 'dla46x_c', 'dla60', 'dla60_res2net', 'dla60_res2next', 'dla60x', 'dla60x_c', 'dla102', 'dla102x', 'dla102x2', 'dla169', 'dm_nfnet_f0', 'dm_nfnet_f1', 'dm_nfnet_f2', 'dm_nfnet_f3', 'dm_nfnet_f4', 'dm_nfnet_f5', 'dm_nfnet_f6', 'dpn48b', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn107', 'dpn131', 'eca_botnext26ts_256', 'eca_halonext26ts', 'eca_nfnet_l0', 'eca_nfnet_l1', 'eca_nfnet_l2', 'eca_nfnet_l3', 'eca_resnet33ts', 'eca_resnext26ts', 'eca_vovnet39b', 'ecaresnet26t', 'ecaresnet50d', 'ecaresnet50d_pruned', 'ecaresnet50t', 'ecaresnet101d', 'ecaresnet101d_pruned', 'ecaresnet200d', 'ecaresnet269d', 'ecaresnetlight', 'ecaresnext26t_32x4d', 'ecaresnext50t_32x4d', 'edgenext_base', 'edgenext_small', 'edgenext_small_rw', 'edgenext_x_small', 'edgenext_xx_small', 'efficientformer_l1', 'efficientformer_l3', 'efficientformer_l7', 'efficientformerv2_l', 'efficientformerv2_s0', 'efficientformerv2_s1', 'efficientformerv2_s2', 'efficientnet_b0', 'efficientnet_b0_g8_gn', 'efficientnet_b0_g16_evos', 'efficientnet_b0_gn', 'efficientnet_b1', 'efficientnet_b1_pruned', 'efficientnet_b2', 'efficientnet_b2_pruned', 'efficientnet_b3', 'efficientnet_b3_g8_gn', 'efficientnet_b3_gn', 'efficientnet_b3_pruned', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'efficientnet_b8', 'efficientnet_blur_b0', 'efficientnet_cc_b0_4e', 'efficientnet_cc_b0_8e', 'efficientnet_cc_b1_8e', 'efficientnet_el', 'efficientnet_el_pruned', 'efficientnet_em', 'efficientnet_es', 'efficientnet_es_pruned', 'efficientnet_h_b5', 'efficientnet_l2', 'efficientnet_lite0', 'efficientnet_lite1', 'efficientnet_lite2', 'efficientnet_lite3', 'efficientnet_lite4', 'efficientnet_x_b3', 'efficientnet_x_b5', 'efficientnetv2_l', 'efficientnetv2_m', 'efficientnetv2_rw_m', 'efficientnetv2_rw_s', 'efficientnetv2_rw_t', 'efficientnetv2_s', 'efficientnetv2_xl', 'efficientvit_b0', 'efficientvit_b1', 'efficientvit_b2', 'efficientvit_b3', 'efficientvit_l1', 'efficientvit_l2', 'efficientvit_l3', 'efficientvit_m0', 'efficientvit_m1', 'efficientvit_m2', 'efficientvit_m3', 'efficientvit_m4', 'efficientvit_m5', 'ese_vovnet19b_dw', 'ese_vovnet19b_slim', 'ese_vovnet19b_slim_dw', 'ese_vovnet39b', 'ese_vovnet39b_evos', 'ese_vovnet57b', 'ese_vovnet99b', 'eva02_base_patch14_224', 'eva02_base_patch14_448', 'eva02_base_patch16_clip_224', 'eva02_enormous_patch14_clip_224', 'eva02_large_patch14_224', 'eva02_large_patch14_448', 'eva02_large_patch14_clip_224', 'eva02_large_patch14_clip_336', 'eva02_small_patch14_224', 'eva02_small_patch14_336', 'eva02_tiny_patch14_224', 'eva02_tiny_patch14_336', 'eva_giant_patch14_224', 'eva_giant_patch14_336', 'eva_giant_patch14_560', 'eva_giant_patch14_clip_224', 'eva_large_patch14_196', 'eva_large_patch14_336', 'fastvit_ma36', 'fastvit_mci0', 'fastvit_mci1', 'fastvit_mci2', 'fastvit_s12', 'fastvit_sa12', 'fastvit_sa24', 'fastvit_sa36', 'fastvit_t8', 'fastvit_t12', 'fbnetc_100', 'fbnetv3_b', 'fbnetv3_d', 'fbnetv3_g', 'flexivit_base', 'flexivit_large', 'flexivit_small', 'focalnet_base_lrf', 'focalnet_base_srf', 'focalnet_huge_fl3', 'focalnet_huge_fl4', 'focalnet_large_fl3', 'focalnet_large_fl4', 'focalnet_small_lrf', 'focalnet_small_srf', 'focalnet_tiny_lrf', 'focalnet_tiny_srf', 'focalnet_xlarge_fl3', 'focalnet_xlarge_fl4', 'gc_efficientnetv2_rw_t', 'gcresnet33ts', 'gcresnet50t', 'gcresnext26ts', 'gcresnext50ts', 'gcvit_base', 'gcvit_small', 'gcvit_tiny', 'gcvit_xtiny', 'gcvit_xxtiny', 'gernet_l', 'gernet_m', 'gernet_s', 'ghostnet_050', 'ghostnet_100', 'ghostnet_130', 'ghostnetv2_100', 'ghostnetv2_130', 'ghostnetv2_160', 'gmixer_12_224', 'gmixer_24_224', 'gmlp_b16_224', 'gmlp_s16_224', 'gmlp_ti16_224', 'halo2botnet50ts_256', 'halonet26t', 'halonet50ts', 'halonet_h1', 'haloregnetz_b', 'hardcorenas_a', 'hardcorenas_b', 'hardcorenas_c', 'hardcorenas_d', 'hardcorenas_e', 'hardcorenas_f', 'hgnet_base', 'hgnet_small', 'hgnet_tiny', 'hgnetv2_b0', 'hgnetv2_b1', 'hgnetv2_b2', 'hgnetv2_b3', 'hgnetv2_b4', 'hgnetv2_b5', 'hgnetv2_b6', 'hiera_base_224', 'hiera_base_abswin_256', 'hiera_base_plus_224', 'hiera_huge_224', 'hiera_large_224', 'hiera_small_224', 'hiera_small_abswin_256', 'hiera_tiny_224', 'hieradet_small', 'hrnet_w18', 'hrnet_w18_small', 'hrnet_w18_small_v2', 'hrnet_w18_ssld', 'hrnet_w30', 'hrnet_w32', 'hrnet_w40', 'hrnet_w44', 'hrnet_w48', 'hrnet_w48_ssld', 'hrnet_w64', 'inception_next_base', 'inception_next_small', 'inception_next_tiny', 'inception_resnet_v2', 'inception_v3', 'inception_v4', 'lambda_resnet26rpt_256', 'lambda_resnet26t', 'lambda_resnet50ts', 'lamhalobotnet50ts_256', 'lcnet_035', 'lcnet_050', 'lcnet_075', 'lcnet_100', 'lcnet_150', 'legacy_senet154', 'legacy_seresnet18', 'legacy_seresnet34', 'legacy_seresnet50', 'legacy_seresnet101', 'legacy_seresnet152', 'legacy_seresnext26_32x4d', 'legacy_seresnext50_32x4d', 'legacy_seresnext101_32x4d', 'legacy_xception', 'levit_128', 'levit_128s', 'levit_192', 'levit_256', 'levit_256d', 'levit_384', 'levit_384_s8', 'levit_512', 'levit_512_s8', 'levit_512d', 'levit_conv_128', 'levit_conv_128s', 'levit_conv_192', 'levit_conv_256', 'levit_conv_256d', 'levit_conv_384', 'levit_conv_384_s8', 'levit_conv_512', 'levit_conv_512_s8', 'levit_conv_512d', 'maxvit_base_tf_224', 'maxvit_base_tf_384', 'maxvit_base_tf_512', 'maxvit_large_tf_224', 'maxvit_large_tf_384', 'maxvit_large_tf_512', 'maxvit_nano_rw_256', 'maxvit_pico_rw_256', 'maxvit_rmlp_base_rw_224', 'maxvit_rmlp_base_rw_384', 'maxvit_rmlp_nano_rw_256', 'maxvit_rmlp_pico_rw_256', 'maxvit_rmlp_small_rw_224', 'maxvit_rmlp_small_rw_256', 'maxvit_rmlp_tiny_rw_256', 'maxvit_small_tf_224', 'maxvit_small_tf_384', 'maxvit_small_tf_512', 'maxvit_tiny_pm_256', 'maxvit_tiny_rw_224', 'maxvit_tiny_rw_256', 'maxvit_tiny_tf_224', 'maxvit_tiny_tf_384', 'maxvit_tiny_tf_512', 'maxvit_xlarge_tf_224', 'maxvit_xlarge_tf_384', 'maxvit_xlarge_tf_512', 'maxxvit_rmlp_nano_rw_256', 'maxxvit_rmlp_small_rw_256', 'maxxvit_rmlp_tiny_rw_256', 'maxxvitv2_nano_rw_256', 'maxxvitv2_rmlp_base_rw_224', 'maxxvitv2_rmlp_base_rw_384', 'maxxvitv2_rmlp_large_rw_224', 'mixer_b16_224', 'mixer_b32_224', 'mixer_l16_224', 'mixer_l32_224', 'mixer_s16_224', 'mixer_s32_224', 'mixnet_l', 'mixnet_m', 'mixnet_s', 'mixnet_xl', 'mixnet_xxl', 'mnasnet_050', 'mnasnet_075', 'mnasnet_100', 'mnasnet_140', 'mnasnet_small', 'mobilenet_edgetpu_100', 'mobilenet_edgetpu_v2_l', 'mobilenet_edgetpu_v2_m', 'mobilenet_edgetpu_v2_s', 'mobilenet_edgetpu_v2_xs', 'mobilenetv1_100', 'mobilenetv1_100h', 'mobilenetv1_125', 'mobilenetv2_035', 'mobilenetv2_050', 'mobilenetv2_075', 'mobilenetv2_100', 'mobilenetv2_110d', 'mobilenetv2_120d', 'mobilenetv2_140', 'mobilenetv3_large_075', 'mobilenetv3_large_100', 'mobilenetv3_large_150d', 'mobilenetv3_rw', 'mobilenetv3_small_050', 'mobilenetv3_small_075', 'mobilenetv3_small_100', 'mobilenetv4_conv_aa_large', 'mobilenetv4_conv_aa_medium', 'mobilenetv4_conv_blur_medium', 'mobilenetv4_conv_large', 'mobilenetv4_conv_medium', 'mobilenetv4_conv_small', 'mobilenetv4_hybrid_large', 'mobilenetv4_hybrid_large_075', 'mobilenetv4_hybrid_medium', 'mobilenetv4_hybrid_medium_075', 'mobileone_s0', 'mobileone_s1', 'mobileone_s2', 'mobileone_s3', 'mobileone_s4', 'mobilevit_s', 'mobilevit_xs', 'mobilevit_xxs', 'mobilevitv2_050', 'mobilevitv2_075', 'mobilevitv2_100', 'mobilevitv2_125', 'mobilevitv2_150', 'mobilevitv2_175', 'mobilevitv2_200', 'mvitv2_base', 'mvitv2_base_cls', 'mvitv2_huge_cls', 'mvitv2_large', 'mvitv2_large_cls', 'mvitv2_small', 'mvitv2_small_cls', 'mvitv2_tiny', 'nasnetalarge', 'nest_base', 'nest_base_jx', 'nest_small', 'nest_small_jx', 'nest_tiny', 'nest_tiny_jx', 'nextvit_base', 'nextvit_large', 'nextvit_small', 'nf_ecaresnet26', 'nf_ecaresnet50', 'nf_ecaresnet101', 'nf_regnet_b0', 'nf_regnet_b1', 'nf_regnet_b2', 'nf_regnet_b3', 'nf_regnet_b4', 'nf_regnet_b5', 'nf_resnet26', 'nf_resnet50', 'nf_resnet101', 'nf_seresnet26', 'nf_seresnet50', 'nf_seresnet101', 'nfnet_f0', 'nfnet_f1', 'nfnet_f2', 'nfnet_f3', 'nfnet_f4', 'nfnet_f5', 'nfnet_f6', 'nfnet_f7', 'nfnet_l0', 'pit_b_224', 'pit_b_distilled_224', 'pit_s_224', 'pit_s_distilled_224', 'pit_ti_224', 'pit_ti_distilled_224', 'pit_xs_224', 'pit_xs_distilled_224', 'pnasnet5large', 'poolformer_m36', 'poolformer_m48', 'poolformer_s12', 'poolformer_s24', 'poolformer_s36', 'poolformerv2_m36', 'poolformerv2_m48', 'poolformerv2_s12', 'poolformerv2_s24', 'poolformerv2_s36', 'pvt_v2_b0', 'pvt_v2_b1', 'pvt_v2_b2', 'pvt_v2_b2_li', 'pvt_v2_b3', 'pvt_v2_b4', 'pvt_v2_b5', 'rdnet_base', 'rdnet_large', 'rdnet_small', 'rdnet_tiny', 'regnetv_040', 'regnetv_064', 'regnetx_002', 'regnetx_004', 'regnetx_004_tv', 'regnetx_006', 'regnetx_008', 'regnetx_016', 'regnetx_032', 'regnetx_040', 'regnetx_064', 'regnetx_080', 'regnetx_120', 'regnetx_160', 'regnetx_320', 'regnety_002', 'regnety_004', 'regnety_006', 'regnety_008', 'regnety_008_tv', 'regnety_016', 'regnety_032', 'regnety_040', 'regnety_040_sgn', 'regnety_064', 'regnety_080', 'regnety_080_tv', 'regnety_120', 'regnety_160', 'regnety_320', 'regnety_640', 'regnety_1280', 'regnety_2560', 'regnetz_005', 'regnetz_040', 'regnetz_040_h', 'regnetz_b16', 'regnetz_b16_evos', 'regnetz_c16', 'regnetz_c16_evos', 'regnetz_d8', 'regnetz_d8_evos', 'regnetz_d32', 'regnetz_e8', 'repghostnet_050', 'repghostnet_058', 'repghostnet_080', 'repghostnet_100', 'repghostnet_111', 'repghostnet_130', 'repghostnet_150', 'repghostnet_200', 'repvgg_a0', 'repvgg_a1', 'repvgg_a2', 'repvgg_b0', 'repvgg_b1', 'repvgg_b1g4', 'repvgg_b2', 'repvgg_b2g4', 'repvgg_b3', 'repvgg_b3g4', 'repvgg_d2se', 'repvit_m0_9', 'repvit_m1', 'repvit_m1_0', 'repvit_m1_1', 'repvit_m1_5', 'repvit_m2', 'repvit_m2_3', 'repvit_m3', 'res2net50_14w_8s', 'res2net50_26w_4s', 'res2net50_26w_6s', 'res2net50_26w_8s', 'res2net50_48w_2s', 'res2net50d', 'res2net101_26w_4s', 'res2net101d', 'res2next50', 'resmlp_12_224', 'resmlp_24_224', 'resmlp_36_224', 'resmlp_big_24_224', 'resnest14d', 'resnest26d', 'resnest50d', 'resnest50d_1s4x24d', 'resnest50d_4s2x40d', 'resnest101e', 'resnest200e', 'resnest269e', 'resnet10t', 'resnet14t', 'resnet18', 'resnet18d', 'resnet26', 'resnet26d', 'resnet26t', 'resnet32ts', 'resnet33ts', 'resnet34', 'resnet34d', 'resnet50', 'resnet50_clip', 'resnet50_clip_gap', 'resnet50_gn', 'resnet50_mlp', 'resnet50c', 'resnet50d', 'resnet50s', 'resnet50t', 'resnet50x4_clip', 'resnet50x4_clip_gap', 'resnet50x16_clip', 'resnet50x16_clip_gap', 'resnet50x64_clip', 'resnet50x64_clip_gap', 'resnet51q', 'resnet61q', 'resnet101', 'resnet101_clip', 'resnet101_clip_gap', 'resnet101c', 'resnet101d', 'resnet101s', 'resnet152', 'resnet152c', 'resnet152d', 'resnet152s', 'resnet200', 'resnet200d', 'resnetaa34d', 'resnetaa50', 'resnetaa50d', 'resnetaa101d', 'resnetblur18', 'resnetblur50', 'resnetblur50d', 'resnetblur101d', 'resnetrs50', 'resnetrs101', 'resnetrs152', 'resnetrs200', 'resnetrs270', 'resnetrs350', 'resnetrs420', 'resnetv2_50', 'resnetv2_50d', 'resnetv2_50d_evos', 'resnetv2_50d_frn', 'resnetv2_50d_gn', 'resnetv2_50t', 'resnetv2_50x1_bit', 'resnetv2_50x3_bit', 'resnetv2_101', 'resnetv2_101d', 'resnetv2_101x1_bit', 'resnetv2_101x3_bit', 'resnetv2_152', 'resnetv2_152d', 'resnetv2_152x2_bit', 'resnetv2_152x4_bit', 'resnext26ts', 'resnext50_32x4d', 'resnext50d_32x4d', 'resnext101_32x4d', 'resnext101_32x8d', 'resnext101_32x16d', 'resnext101_32x32d', 'resnext101_64x4d', 'rexnet_100', 'rexnet_130', 'rexnet_150', 'rexnet_200', 'rexnet_300', 'rexnetr_100', 'rexnetr_130', 'rexnetr_150', 'rexnetr_200', 'rexnetr_300', 'sam2_hiera_base_plus', 'sam2_hiera_large', 'sam2_hiera_small', 'sam2_hiera_tiny', 'samvit_base_patch16', 'samvit_base_patch16_224', 'samvit_huge_patch16', 'samvit_large_patch16', 'sebotnet33ts_256', 'sedarknet21', 'sehalonet33ts', 'selecsls42', 'selecsls42b', 'selecsls60', 'selecsls60b', 'selecsls84', 'semnasnet_050', 'semnasnet_075', 'semnasnet_100', 'semnasnet_140', 'senet154', 'sequencer2d_l', 'sequencer2d_m', 'sequencer2d_s', 'seresnet18', 'seresnet33ts', 'seresnet34', 'seresnet50', 'seresnet50t', 'seresnet101', 'seresnet152', 'seresnet152d', 'seresnet200d', 'seresnet269d', 'seresnetaa50d', 'seresnext26d_32x4d', 'seresnext26t_32x4d', 'seresnext26ts', 'seresnext50_32x4d', 'seresnext101_32x4d', 'seresnext101_32x8d', 'seresnext101_64x4d', 'seresnext101d_32x8d', 'seresnextaa101d_32x8d', 'seresnextaa201d_32x8d', 'skresnet18', 'skresnet34', 'skresnet50', 'skresnet50d', 'skresnext50_32x4d', 'spnasnet_100', 'swin_base_patch4_window7_224', 'swin_base_patch4_window12_384', 'swin_large_patch4_window7_224', 'swin_large_patch4_window12_384', 'swin_s3_base_224', 'swin_s3_small_224', 'swin_s3_tiny_224', 'swin_small_patch4_window7_224', 'swin_tiny_patch4_window7_224', 'swinv2_base_window8_256', 'swinv2_base_window12_192', 'swinv2_base_window12to16_192to256', 'swinv2_base_window12to24_192to384', 'swinv2_base_window16_256', 'swinv2_cr_base_224', 'swinv2_cr_base_384', 'swinv2_cr_base_ns_224', 'swinv2_cr_giant_224', 'swinv2_cr_giant_384', 'swinv2_cr_huge_224', 'swinv2_cr_huge_384', 'swinv2_cr_large_224', 'swinv2_cr_large_384', 'swinv2_cr_small_224', 'swinv2_cr_small_384', 'swinv2_cr_small_ns_224', 'swinv2_cr_small_ns_256', 'swinv2_cr_tiny_224', 'swinv2_cr_tiny_384', 'swinv2_cr_tiny_ns_224', 'swinv2_large_window12_192', 'swinv2_large_window12to16_192to256', 'swinv2_large_window12to24_192to384', 'swinv2_small_window8_256', 'swinv2_small_window16_256', 'swinv2_tiny_window8_256', 'swinv2_tiny_window16_256', 'test_byobnet', 'test_efficientnet', 'test_vit', 'tf_efficientnet_b0', 'tf_efficientnet_b1', 'tf_efficientnet_b2', 'tf_efficientnet_b3', 'tf_efficientnet_b4', 'tf_efficientnet_b5', 'tf_efficientnet_b6', 'tf_efficientnet_b7', 'tf_efficientnet_b8', 'tf_efficientnet_cc_b0_4e', 'tf_efficientnet_cc_b0_8e', 'tf_efficientnet_cc_b1_8e', 'tf_efficientnet_el', 'tf_efficientnet_em', 'tf_efficientnet_es', 'tf_efficientnet_l2', 'tf_efficientnet_lite0', 'tf_efficientnet_lite1', 'tf_efficientnet_lite2', 'tf_efficientnet_lite3', 'tf_efficientnet_lite4', 'tf_efficientnetv2_b0', 'tf_efficientnetv2_b1', 'tf_efficientnetv2_b2', 'tf_efficientnetv2_b3', 'tf_efficientnetv2_l', 'tf_efficientnetv2_m', 'tf_efficientnetv2_s', 'tf_efficientnetv2_xl', 'tf_mixnet_l', 'tf_mixnet_m', 'tf_mixnet_s', 'tf_mobilenetv3_large_075', 'tf_mobilenetv3_large_100', 'tf_mobilenetv3_large_minimal_100', 'tf_mobilenetv3_small_075', 'tf_mobilenetv3_small_100', 'tf_mobilenetv3_small_minimal_100', 'tiny_vit_5m_224', 'tiny_vit_11m_224', 'tiny_vit_21m_224', 'tiny_vit_21m_384', 'tiny_vit_21m_512', 'tinynet_a', 'tinynet_b', 'tinynet_c', 'tinynet_d', 'tinynet_e', 'tnt_b_patch16_224', 'tnt_s_patch16_224', 'tresnet_l', 'tresnet_m', 'tresnet_v2_l', 'tresnet_xl', 'twins_pcpvt_base', 'twins_pcpvt_large', 'twins_pcpvt_small', 'twins_svt_base', 'twins_svt_large', 'twins_svt_small', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19', 'vgg19_bn', 'visformer_small', 'visformer_tiny', 'vit_base_mci_224', 'vit_base_patch8_224', 'vit_base_patch14_dinov2', 'vit_base_patch14_reg4_dinov2', 'vit_base_patch16_18x2_224', 'vit_base_patch16_224', 'vit_base_patch16_224_miil', 'vit_base_patch16_384', 'vit_base_patch16_clip_224', 'vit_base_patch16_clip_384', 'vit_base_patch16_clip_quickgelu_224', 'vit_base_patch16_gap_224', 'vit_base_patch16_plus_240', 'vit_base_patch16_reg4_gap_256', 'vit_base_patch16_rope_reg1_gap_256', 'vit_base_patch16_rpn_224', 'vit_base_patch16_siglip_224', 'vit_base_patch16_siglip_256', 'vit_base_patch16_siglip_384', 'vit_base_patch16_siglip_512', 'vit_base_patch16_siglip_gap_224', 'vit_base_patch16_siglip_gap_256', 'vit_base_patch16_siglip_gap_384', 'vit_base_patch16_siglip_gap_512', 'vit_base_patch16_xp_224', 'vit_base_patch32_224', 'vit_base_patch32_384', 'vit_base_patch32_clip_224', 'vit_base_patch32_clip_256', 'vit_base_patch32_clip_384', 'vit_base_patch32_clip_448', 'vit_base_patch32_clip_quickgelu_224', 'vit_base_patch32_plus_256', 'vit_base_r26_s32_224', 'vit_base_r50_s16_224', 'vit_base_r50_s16_384', 'vit_base_resnet26d_224', 'vit_base_resnet50d_224', 'vit_betwixt_patch16_gap_256', 'vit_betwixt_patch16_reg1_gap_256', 'vit_betwixt_patch16_reg4_gap_256', 'vit_betwixt_patch16_reg4_gap_384', 'vit_betwixt_patch16_rope_reg4_gap_256', 'vit_betwixt_patch32_clip_224', 'vit_giant_patch14_224', 'vit_giant_patch14_clip_224', 'vit_giant_patch14_dinov2', 'vit_giant_patch14_reg4_dinov2', 'vit_giant_patch16_gap_224', 'vit_gigantic_patch14_224', 'vit_gigantic_patch14_clip_224', 'vit_huge_patch14_224', 'vit_huge_patch14_clip_224', 'vit_huge_patch14_clip_336', 'vit_huge_patch14_clip_378', 'vit_huge_patch14_clip_quickgelu_224', 'vit_huge_patch14_clip_quickgelu_378', 'vit_huge_patch14_gap_224', 'vit_huge_patch14_xp_224', 'vit_huge_patch16_gap_448', 'vit_large_patch14_224', 'vit_large_patch14_clip_224', 'vit_large_patch14_clip_336', 'vit_large_patch14_clip_quickgelu_224', 'vit_large_patch14_clip_quickgelu_336', 'vit_large_patch14_dinov2', 'vit_large_patch14_reg4_dinov2', 'vit_large_patch14_xp_224', 'vit_large_patch16_224', 'vit_large_patch16_384', 'vit_large_patch16_siglip_256', 'vit_large_patch16_siglip_384', 'vit_large_patch16_siglip_gap_256', 'vit_large_patch16_siglip_gap_384', 'vit_large_patch32_224', 'vit_large_patch32_384', 'vit_large_r50_s32_224', 'vit_large_r50_s32_384', 'vit_little_patch16_reg1_gap_256', 'vit_little_patch16_reg4_gap_256', 'vit_medium_patch16_clip_224', 'vit_medium_patch16_gap_240', 'vit_medium_patch16_gap_256', 'vit_medium_patch16_gap_384', 'vit_medium_patch16_reg1_gap_256', 'vit_medium_patch16_reg4_gap_256', 'vit_medium_patch16_rope_reg1_gap_256', 'vit_medium_patch32_clip_224', 'vit_mediumd_patch16_reg4_gap_256', 'vit_mediumd_patch16_reg4_gap_384', 'vit_mediumd_patch16_rope_reg1_gap_256', 'vit_pwee_patch16_reg1_gap_256', 'vit_relpos_base_patch16_224', 'vit_relpos_base_patch16_cls_224', 'vit_relpos_base_patch16_clsgap_224', 'vit_relpos_base_patch16_plus_240', 'vit_relpos_base_patch16_rpn_224', 'vit_relpos_base_patch32_plus_rpn_256', 'vit_relpos_medium_patch16_224', 'vit_relpos_medium_patch16_cls_224', 'vit_relpos_medium_patch16_rpn_224', 'vit_relpos_small_patch16_224', 'vit_relpos_small_patch16_rpn_224', 'vit_small_patch8_224', 'vit_small_patch14_dinov2', 'vit_small_patch14_reg4_dinov2', 'vit_small_patch16_18x2_224', 'vit_small_patch16_36x1_224', 'vit_small_patch16_224', 'vit_small_patch16_384', 'vit_small_patch32_224', 'vit_small_patch32_384', 'vit_small_r26_s32_224', 'vit_small_r26_s32_384', 'vit_small_resnet26d_224', 'vit_small_resnet50d_s16_224', 'vit_so150m_patch16_reg4_gap_256', 'vit_so150m_patch16_reg4_map_256', 'vit_so400m_patch14_siglip_224', 'vit_so400m_patch14_siglip_384', 'vit_so400m_patch14_siglip_gap_224', 'vit_so400m_patch14_siglip_gap_384', 'vit_so400m_patch14_siglip_gap_448', 'vit_so400m_patch14_siglip_gap_896', 'vit_srelpos_medium_patch16_224', 'vit_srelpos_small_patch16_224', 'vit_tiny_patch16_224', 'vit_tiny_patch16_384', 'vit_tiny_r_s16_p8_224', 'vit_tiny_r_s16_p8_384', 'vit_wee_patch16_reg1_gap_256', 'vit_xsmall_patch16_clip_224', 'vitamin_base_224', 'vitamin_large2_224', 'vitamin_large2_256', 'vitamin_large2_336', 'vitamin_large2_384', 'vitamin_large_224', 'vitamin_large_256', 'vitamin_large_336', 'vitamin_large_384', 'vitamin_small_224', 'vitamin_xlarge_256', 'vitamin_xlarge_336', 'vitamin_xlarge_384', 'volo_d1_224', 'volo_d1_384', 'volo_d2_224', 'volo_d2_384', 'volo_d3_224', 'volo_d3_448', 'volo_d4_224', 'volo_d4_448', 'volo_d5_224', 'volo_d5_448', 'volo_d5_512', 'vovnet39a', 'vovnet57a', 'wide_resnet50_2', 'wide_resnet101_2', 'xception41', 'xception41p', 'xception65', 'xception65p', 'xception71', 'xcit_large_24_p8_224', 'xcit_large_24_p8_384', 'xcit_large_24_p16_224', 'xcit_large_24_p16_384', 'xcit_medium_24_p8_224', 'xcit_medium_24_p8_384', 'xcit_medium_24_p16_224', 'xcit_medium_24_p16_384', 'xcit_nano_12_p8_224', 'xcit_nano_12_p8_384', 'xcit_nano_12_p16_224', 'xcit_nano_12_p16_384', 'xcit_small_12_p8_224', 'xcit_small_12_p8_384', 'xcit_small_12_p16_224', 'xcit_small_12_p16_384', 'xcit_small_24_p8_224', 'xcit_small_24_p8_384', 'xcit_small_24_p16_224', 'xcit_small_24_p16_384', 'xcit_tiny_12_p8_224', 'xcit_tiny_12_p8_384', 'xcit_tiny_12_p16_224', 'xcit_tiny_12_p16_384', 'xcit_tiny_24_p8_224', 'xcit_tiny_24_p8_384', 'xcit_tiny_24_p16_224', 'xcit_tiny_24_p16_384']\n"
|
850 |
+
]
|
851 |
+
}
|
852 |
+
],
|
853 |
+
"source": [
|
854 |
+
"import timm\n",
|
855 |
+
"print(timm.list_models())"
|
856 |
+
]
|
857 |
+
},
|
858 |
+
{
|
859 |
+
"cell_type": "markdown",
|
860 |
+
"metadata": {},
|
861 |
+
"source": [
|
862 |
+
"##### testing the litserve model"
|
863 |
+
]
|
864 |
+
},
|
865 |
+
{
|
866 |
+
"cell_type": "code",
|
867 |
+
"execution_count": 2,
|
868 |
+
"metadata": {},
|
869 |
+
"outputs": [],
|
870 |
+
"source": [
|
871 |
+
"import requests\n",
|
872 |
+
"from urllib.request import urlopen\n",
|
873 |
+
"import base64"
|
874 |
+
]
|
875 |
+
},
|
876 |
+
{
|
877 |
+
"cell_type": "code",
|
878 |
+
"execution_count": 33,
|
879 |
+
"metadata": {},
|
880 |
+
"outputs": [
|
881 |
+
{
|
882 |
+
"name": "stdout",
|
883 |
+
"output_type": "stream",
|
884 |
+
"text": [
|
885 |
+
"<class 'bytes'>\n"
|
886 |
+
]
|
887 |
+
}
|
888 |
+
],
|
889 |
+
"source": [
|
890 |
+
"url = \"https://media.istockphoto.com/id/541844008/photo/portland-grand-floral-parade-2016.jpg?s=2048x2048&w=is&k=20&c=ZuvR6oDv5WxwL5dhXKAbevysEXhXV47shJdpzkqen5Y=\"\n",
|
891 |
+
"img_data = urlopen(url).read()\n",
|
892 |
+
"print(type(img_data))"
|
893 |
+
]
|
894 |
+
},
|
895 |
+
{
|
896 |
+
"cell_type": "code",
|
897 |
+
"execution_count": 34,
|
898 |
+
"metadata": {},
|
899 |
+
"outputs": [
|
900 |
+
{
|
901 |
+
"name": "stdout",
|
902 |
+
"output_type": "stream",
|
903 |
+
"text": [
|
904 |
+
"<class 'str'>\n"
|
905 |
+
]
|
906 |
+
}
|
907 |
+
],
|
908 |
+
"source": [
|
909 |
+
"# Convert to base64 string\n",
|
910 |
+
"img_bytes = base64.b64encode(img_data).decode('utf-8')\n",
|
911 |
+
"print(type(img_bytes))"
|
912 |
+
]
|
913 |
+
},
|
914 |
+
{
|
915 |
+
"cell_type": "code",
|
916 |
+
"execution_count": 35,
|
917 |
+
"metadata": {},
|
918 |
+
"outputs": [],
|
919 |
+
"source": [
|
920 |
+
"response = requests.post(\n",
|
921 |
+
" \"http://localhost:8080/predict\", json={\"image\": img_bytes} # image is the key\n",
|
922 |
+
")"
|
923 |
+
]
|
924 |
+
},
|
925 |
+
{
|
926 |
+
"cell_type": "code",
|
927 |
+
"execution_count": 36,
|
928 |
+
"metadata": {},
|
929 |
+
"outputs": [
|
930 |
+
{
|
931 |
+
"name": "stdout",
|
932 |
+
"output_type": "stream",
|
933 |
+
"text": [
|
934 |
+
"\\nTop 5 Predictions:\n",
|
935 |
+
"mountain_bike, all-terrain_bike, off-roader: 82.13%\n",
|
936 |
+
"maillot: 5.09%\n",
|
937 |
+
"crash_helmet: 1.84%\n",
|
938 |
+
"bicycle-built-for-two, tandem_bicycle, tandem: 1.83%\n",
|
939 |
+
"alp: 0.69%\n"
|
940 |
+
]
|
941 |
+
}
|
942 |
+
],
|
943 |
+
"source": [
|
944 |
+
"if response.status_code == 200:\n",
|
945 |
+
" predictions = response.json()[\"predictions\"]\n",
|
946 |
+
" print(\"\\\\nTop 5 Predictions:\")\n",
|
947 |
+
" for pred in predictions:\n",
|
948 |
+
" print(f\"{pred['label']}: {pred['probability']:.2%}\")\n",
|
949 |
+
"else:\n",
|
950 |
+
" print(f\"Error: {response.status_code}\")\n",
|
951 |
+
" print(response.text)"
|
952 |
+
]
|
953 |
+
},
|
954 |
{
|
955 |
"cell_type": "code",
|
956 |
"execution_count": null,
|
poetry.lock
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
CHANGED
@@ -70,6 +70,9 @@ celery = "^5.4.0"
|
|
70 |
fastapi-cache2 = "^0.2.2"
|
71 |
aiocache = "^0.12.3"
|
72 |
dvc-s3 = "^3.2.0"
|
|
|
|
|
|
|
73 |
|
74 |
[tool.poetry.dev-dependencies]
|
75 |
pytest-asyncio = "^0.20.3"
|
|
|
70 |
fastapi-cache2 = "^0.2.2"
|
71 |
aiocache = "^0.12.3"
|
72 |
dvc-s3 = "^3.2.0"
|
73 |
+
litserve = "^0.2.4"
|
74 |
+
gpustat = "^1.1.1"
|
75 |
+
nvitop = "^1.3.2"
|
76 |
|
77 |
[tool.poetry.dev-dependencies]
|
78 |
pytest-asyncio = "^0.20.3"
|
src/litserve_api_test.py
ADDED
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import concurrent.futures
|
3 |
+
import time
|
4 |
+
import numpy as np
|
5 |
+
import requests
|
6 |
+
import psutil
|
7 |
+
from urllib.request import urlopen
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
|
10 |
+
# Try importing `gpustat` for GPU monitoring
|
11 |
+
try:
|
12 |
+
import gpustat
|
13 |
+
|
14 |
+
GPU_AVAILABLE = True
|
15 |
+
except ImportError:
|
16 |
+
GPU_AVAILABLE = False
|
17 |
+
|
18 |
+
# Constants
|
19 |
+
SERVER_URL = "http://localhost:8080" # Base server URL
|
20 |
+
TEST_IMAGE_URL = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png"
|
21 |
+
|
22 |
+
|
23 |
+
def fetch_and_prepare_payload():
|
24 |
+
"""
|
25 |
+
Fetch the test image and prepare a base64 payload.
|
26 |
+
"""
|
27 |
+
try:
|
28 |
+
img_data = urlopen(TEST_IMAGE_URL).read()
|
29 |
+
return base64.b64encode(img_data).decode("utf-8")
|
30 |
+
except Exception as e:
|
31 |
+
print(f"Error fetching the image: {e}")
|
32 |
+
return None
|
33 |
+
|
34 |
+
|
35 |
+
def send_request(payload, batch=False):
|
36 |
+
"""
|
37 |
+
Send a single or batch request and measure response time.
|
38 |
+
"""
|
39 |
+
start_time = time.time()
|
40 |
+
endpoint = f"{SERVER_URL}/predict"
|
41 |
+
try:
|
42 |
+
if batch:
|
43 |
+
response = requests.post(endpoint, json=[{"image": img} for img in payload])
|
44 |
+
else:
|
45 |
+
response = requests.post(endpoint, json={"image": payload})
|
46 |
+
response_time = time.time() - start_time
|
47 |
+
predictions = response.json() if response.status_code == 200 else None
|
48 |
+
return response_time, response.status_code, predictions
|
49 |
+
except Exception as e:
|
50 |
+
print(f"Error sending request: {e}")
|
51 |
+
return None, None, None
|
52 |
+
|
53 |
+
|
54 |
+
def get_system_metrics():
|
55 |
+
"""
|
56 |
+
Get current CPU and GPU usage.
|
57 |
+
"""
|
58 |
+
metrics = {"cpu_usage": psutil.cpu_percent(0.1)}
|
59 |
+
if GPU_AVAILABLE:
|
60 |
+
try:
|
61 |
+
gpu_stats = gpustat.GPUStatCollection.new_query()
|
62 |
+
metrics["gpu_usage"] = sum([gpu.utilization for gpu in gpu_stats.gpus])
|
63 |
+
except Exception:
|
64 |
+
metrics["gpu_usage"] = -1
|
65 |
+
else:
|
66 |
+
metrics["gpu_usage"] = -1
|
67 |
+
return metrics
|
68 |
+
|
69 |
+
|
70 |
+
def benchmark_api(num_requests=100, concurrency_level=10, batch=False):
|
71 |
+
"""
|
72 |
+
Benchmark the API server.
|
73 |
+
"""
|
74 |
+
payload = fetch_and_prepare_payload()
|
75 |
+
if not payload:
|
76 |
+
print("Error preparing payload. Benchmark aborted.")
|
77 |
+
return
|
78 |
+
|
79 |
+
payloads = [payload] * num_requests if batch else [payload]
|
80 |
+
system_metrics = []
|
81 |
+
response_times = []
|
82 |
+
status_codes = []
|
83 |
+
predictions = []
|
84 |
+
|
85 |
+
# Start benchmark timer
|
86 |
+
start_benchmark_time = time.time()
|
87 |
+
|
88 |
+
with concurrent.futures.ThreadPoolExecutor(
|
89 |
+
max_workers=concurrency_level
|
90 |
+
) as executor:
|
91 |
+
futures = [
|
92 |
+
executor.submit(send_request, payloads if batch else payload, batch)
|
93 |
+
for _ in range(num_requests)
|
94 |
+
]
|
95 |
+
while any(not f.done() for f in futures):
|
96 |
+
system_metrics.append(get_system_metrics())
|
97 |
+
time.sleep(0.1)
|
98 |
+
|
99 |
+
for future in futures:
|
100 |
+
result = future.result()
|
101 |
+
if result:
|
102 |
+
response_time, status_code, prediction = result
|
103 |
+
response_times.append(response_time)
|
104 |
+
status_codes.append(status_code)
|
105 |
+
predictions.append(prediction)
|
106 |
+
|
107 |
+
# Stop benchmark timer
|
108 |
+
total_benchmark_time = time.time() - start_benchmark_time
|
109 |
+
|
110 |
+
avg_cpu = np.mean([m["cpu_usage"] for m in system_metrics])
|
111 |
+
avg_gpu = np.mean([m["gpu_usage"] for m in system_metrics]) if GPU_AVAILABLE else -1
|
112 |
+
|
113 |
+
success_rate = (status_codes.count(200) / num_requests) * 100 if status_codes else 0
|
114 |
+
avg_response_time = np.mean(response_times) * 1000 if response_times else 0 # ms
|
115 |
+
requests_per_second = num_requests / total_benchmark_time
|
116 |
+
|
117 |
+
print("\n--- Sample Predictions ---")
|
118 |
+
for i, prediction in enumerate(
|
119 |
+
predictions[:5]
|
120 |
+
): # Show predictions for the first 5 requests
|
121 |
+
print(f"Request {i + 1}: {prediction}")
|
122 |
+
|
123 |
+
return {
|
124 |
+
"total_requests": num_requests,
|
125 |
+
"concurrency_level": concurrency_level,
|
126 |
+
"total_time": total_benchmark_time,
|
127 |
+
"avg_response_time": avg_response_time,
|
128 |
+
"success_rate": success_rate,
|
129 |
+
"requests_per_second": requests_per_second,
|
130 |
+
"avg_cpu_usage": avg_cpu,
|
131 |
+
"avg_gpu_usage": avg_gpu,
|
132 |
+
}
|
133 |
+
|
134 |
+
|
135 |
+
def run_benchmarks():
|
136 |
+
"""
|
137 |
+
Run comprehensive benchmarks and create plots.
|
138 |
+
"""
|
139 |
+
concurrency_levels = [1, 8, 16, 32]
|
140 |
+
metrics = []
|
141 |
+
|
142 |
+
print("Running API benchmarks...")
|
143 |
+
for concurrency in concurrency_levels:
|
144 |
+
print(f"\nTesting concurrency level: {concurrency}")
|
145 |
+
result = benchmark_api(
|
146 |
+
num_requests=50, concurrency_level=concurrency, batch=False
|
147 |
+
)
|
148 |
+
if result:
|
149 |
+
metrics.append(result)
|
150 |
+
print(
|
151 |
+
f"Concurrency {concurrency}: "
|
152 |
+
f"{result['requests_per_second']:.2f} reqs/sec, "
|
153 |
+
f"CPU: {result['avg_cpu_usage']:.1f}%, "
|
154 |
+
f"GPU: {result['avg_gpu_usage']:.1f}%"
|
155 |
+
)
|
156 |
+
|
157 |
+
# Generate plots
|
158 |
+
plt.figure(figsize=(12, 6))
|
159 |
+
|
160 |
+
# Throughput
|
161 |
+
plt.subplot(1, 2, 1)
|
162 |
+
plt.plot(
|
163 |
+
concurrency_levels,
|
164 |
+
[m["requests_per_second"] for m in metrics],
|
165 |
+
"r-o",
|
166 |
+
label="Throughput",
|
167 |
+
)
|
168 |
+
plt.xlabel("Concurrency Level")
|
169 |
+
plt.ylabel("Requests per Second")
|
170 |
+
plt.title("API Throughput")
|
171 |
+
plt.grid(True)
|
172 |
+
|
173 |
+
# Resource Usage
|
174 |
+
plt.subplot(1, 2, 2)
|
175 |
+
plt.plot(
|
176 |
+
concurrency_levels,
|
177 |
+
[m["avg_cpu_usage"] for m in metrics],
|
178 |
+
"b-o",
|
179 |
+
label="CPU Usage",
|
180 |
+
)
|
181 |
+
if GPU_AVAILABLE:
|
182 |
+
plt.plot(
|
183 |
+
concurrency_levels,
|
184 |
+
[m["avg_gpu_usage"] for m in metrics],
|
185 |
+
"g-o",
|
186 |
+
label="GPU Usage",
|
187 |
+
)
|
188 |
+
plt.xlabel("Concurrency Level")
|
189 |
+
plt.ylabel("Resource Usage (%)")
|
190 |
+
plt.title("Resource Usage")
|
191 |
+
plt.legend()
|
192 |
+
plt.grid(True)
|
193 |
+
|
194 |
+
plt.tight_layout()
|
195 |
+
plt.savefig("benchmark_results.png")
|
196 |
+
print("Benchmark results saved as 'benchmark_results.png'.")
|
197 |
+
|
198 |
+
|
199 |
+
if __name__ == "__main__":
|
200 |
+
run_benchmarks()
|
src/litserve_test_client.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from urllib.request import urlopen
|
3 |
+
import base64
|
4 |
+
import os
|
5 |
+
|
6 |
+
|
7 |
+
def fetch_image(url):
|
8 |
+
"""
|
9 |
+
Fetch image data from a URL.
|
10 |
+
"""
|
11 |
+
return urlopen(url).read()
|
12 |
+
|
13 |
+
|
14 |
+
def encode_image_to_base64(img_data):
|
15 |
+
"""
|
16 |
+
Encode image bytes to a base64 string.
|
17 |
+
"""
|
18 |
+
return base64.b64encode(img_data).decode("utf-8")
|
19 |
+
|
20 |
+
|
21 |
+
def send_prediction_request(base64_image, server_url):
|
22 |
+
"""
|
23 |
+
Send a single base64 image to the prediction API and retrieve predictions.
|
24 |
+
"""
|
25 |
+
try:
|
26 |
+
response = requests.post(f"{server_url}/predict", json={"image": base64_image})
|
27 |
+
return response
|
28 |
+
except requests.exceptions.RequestException as e:
|
29 |
+
print(f"Error connecting to the server: {e}")
|
30 |
+
return None
|
31 |
+
|
32 |
+
|
33 |
+
def send_batch_prediction_request(base64_images, server_url):
|
34 |
+
"""
|
35 |
+
Send a batch of base64 images to the prediction API and retrieve predictions.
|
36 |
+
"""
|
37 |
+
try:
|
38 |
+
response = requests.post(
|
39 |
+
f"{server_url}/predict", json=[{"image": img} for img in base64_images]
|
40 |
+
)
|
41 |
+
return response
|
42 |
+
except requests.exceptions.RequestException as e:
|
43 |
+
print(f"Error connecting to the server: {e}")
|
44 |
+
return None
|
45 |
+
|
46 |
+
|
47 |
+
def main():
|
48 |
+
# Server URL (default or from environment)
|
49 |
+
server_url = os.getenv("SERVER_URL", "http://localhost:8080")
|
50 |
+
|
51 |
+
# Example URLs for testing
|
52 |
+
image_urls = [
|
53 |
+
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png",
|
54 |
+
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png",
|
55 |
+
]
|
56 |
+
|
57 |
+
# Fetch and encode images
|
58 |
+
try:
|
59 |
+
print("Fetching and encoding images...")
|
60 |
+
base64_images = [encode_image_to_base64(fetch_image(url)) for url in image_urls]
|
61 |
+
print("Images fetched and encoded successfully.")
|
62 |
+
except Exception as e:
|
63 |
+
print(f"Error fetching or encoding images: {e}")
|
64 |
+
return
|
65 |
+
|
66 |
+
# Test single image prediction
|
67 |
+
try:
|
68 |
+
print("\n--- Single Image Prediction ---")
|
69 |
+
single_response = send_prediction_request(base64_images[0], server_url)
|
70 |
+
if single_response and single_response.status_code == 200:
|
71 |
+
predictions = single_response.json().get("predictions", [])
|
72 |
+
if predictions:
|
73 |
+
print("Top 5 Predictions:")
|
74 |
+
for pred in predictions:
|
75 |
+
print(f"{pred['label']}: {pred['probability']:.2%}")
|
76 |
+
else:
|
77 |
+
print("No predictions returned.")
|
78 |
+
elif single_response:
|
79 |
+
print(f"Error: {single_response.status_code}")
|
80 |
+
print(single_response.text)
|
81 |
+
except Exception as e:
|
82 |
+
print(f"Error sending single prediction request: {e}")
|
83 |
+
|
84 |
+
|
85 |
+
if __name__ == "__main__":
|
86 |
+
main()
|
src/litserve_test_server.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import timm
|
3 |
+
from PIL import Image
|
4 |
+
import io
|
5 |
+
import litserve as lit
|
6 |
+
import base64
|
7 |
+
import requests
|
8 |
+
import logging
|
9 |
+
|
10 |
+
|
11 |
+
class ImageClassifierAPI(lit.LitAPI):
|
12 |
+
def setup(self, device):
|
13 |
+
"""Initialize the model and necessary components."""
|
14 |
+
self.device = device
|
15 |
+
logging.info("Setting up the model and components.")
|
16 |
+
|
17 |
+
# Create and load the model
|
18 |
+
self.model = timm.create_model("resnet50.a1_in1k", pretrained=True)
|
19 |
+
self.model = self.model.to(device).eval()
|
20 |
+
|
21 |
+
# Disable gradients to save memory
|
22 |
+
with torch.no_grad():
|
23 |
+
data_config = timm.data.resolve_model_data_config(self.model)
|
24 |
+
self.transforms = timm.data.create_transform(
|
25 |
+
**data_config, is_training=False
|
26 |
+
)
|
27 |
+
|
28 |
+
# Load labels
|
29 |
+
url = "https://storage.googleapis.com/bit_models/ilsvrc2012_wordnet_lemmas.txt"
|
30 |
+
try:
|
31 |
+
self.labels = requests.get(url).text.strip().split("\n")
|
32 |
+
logging.info("Labels loaded successfully.")
|
33 |
+
except Exception as e:
|
34 |
+
logging.error(f"Failed to load labels: {e}")
|
35 |
+
self.labels = []
|
36 |
+
|
37 |
+
def decode_request(self, request):
|
38 |
+
"""Handle both single and batch inputs."""
|
39 |
+
logging.info(f"decode_request received: {request}")
|
40 |
+
if isinstance(request, dict):
|
41 |
+
return request["image"]
|
42 |
+
|
43 |
+
def batch(self, inputs):
|
44 |
+
"""Batch process images."""
|
45 |
+
logging.info(f"batch received inputs: {inputs}")
|
46 |
+
if not isinstance(inputs, list):
|
47 |
+
raise ValueError("Input to batch must be a list.")
|
48 |
+
|
49 |
+
batch_tensors = []
|
50 |
+
try:
|
51 |
+
for image_bytes in inputs:
|
52 |
+
if not isinstance(image_bytes, str): # Ensure input is a base64 string
|
53 |
+
raise ValueError(
|
54 |
+
f"Input must be a base64-encoded string, got: {type(image_bytes)}"
|
55 |
+
)
|
56 |
+
|
57 |
+
# Decode base64 string to bytes
|
58 |
+
img_bytes = base64.b64decode(image_bytes)
|
59 |
+
|
60 |
+
# Convert bytes to PIL Image
|
61 |
+
image = Image.open(io.BytesIO(img_bytes)).convert("RGB")
|
62 |
+
|
63 |
+
# Apply transforms and add to batch
|
64 |
+
tensor = self.transforms(image)
|
65 |
+
batch_tensors.append(tensor)
|
66 |
+
|
67 |
+
return torch.stack(batch_tensors).to(self.device)
|
68 |
+
except Exception as e:
|
69 |
+
logging.error(f"Error decoding image: {e}")
|
70 |
+
raise ValueError("Failed to decode and process the images.")
|
71 |
+
|
72 |
+
@torch.no_grad()
|
73 |
+
def predict(self, x):
|
74 |
+
"""Make predictions on the input batch."""
|
75 |
+
outputs = self.model(x)
|
76 |
+
probabilities = torch.nn.functional.softmax(outputs, dim=1)
|
77 |
+
logging.info("Prediction completed.")
|
78 |
+
return probabilities
|
79 |
+
|
80 |
+
def unbatch(self, output):
|
81 |
+
"""Unbatch the output."""
|
82 |
+
return [output[i] for i in range(output.size(0))]
|
83 |
+
|
84 |
+
def encode_response(self, output):
|
85 |
+
"""Convert model output to API response for batches."""
|
86 |
+
try:
|
87 |
+
probs, indices = torch.topk(output, k=5)
|
88 |
+
responses = {
|
89 |
+
"predictions": [
|
90 |
+
{
|
91 |
+
"label": self.labels[idx.item()],
|
92 |
+
"probability": prob.item(),
|
93 |
+
}
|
94 |
+
for prob, idx in zip(probs, indices)
|
95 |
+
]
|
96 |
+
}
|
97 |
+
logging.info("Batch response successfully encoded.")
|
98 |
+
return responses
|
99 |
+
except Exception as e:
|
100 |
+
logging.error(f"Error encoding batch response: {e}")
|
101 |
+
raise ValueError("Failed to encode the batch response.")
|
102 |
+
|
103 |
+
|
104 |
+
if __name__ == "__main__":
|
105 |
+
logging.basicConfig(
|
106 |
+
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
107 |
+
)
|
108 |
+
logging.info("Starting the Image Classifier API server.")
|
109 |
+
|
110 |
+
api = ImageClassifierAPI()
|
111 |
+
|
112 |
+
# Configure server with optimal settings
|
113 |
+
server = lit.LitServer(
|
114 |
+
api, accelerator="auto", max_batch_size=16, batch_timeout=0.01
|
115 |
+
)
|
116 |
+
server.run(port=8080)
|