test / benchmarks /perf.json
iblfe's picture
Upload folder using huggingface_hub
b585c7f verified
raw
history blame
70.5 kB
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:46:19", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 32.29472152392069, "generate_output_len_bytes": 2384, "generate_time": 14.563165505727133}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:48:55", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 67.97515447934468, "generate_output_len_bytes": 2384, "generate_time": 33.00641902287801}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:48:58", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1440, "summarize_time": 114.62220064798991, "generate_output_len_bytes": 2619, "generate_time": 71.0722058614095}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:58:34", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 866, "summarize_time": 39.54404203097025, "generate_output_len_bytes": 2927, "generate_time": 22.466302394866943}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:01:59", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 32.1394579410553, "generate_output_len_bytes": 2384, "generate_time": 14.757195552190145}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:54:29", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 910, "summarize_time": 185.14580019315085, "generate_output_len_bytes": 2042, "generate_time": 117.13909141222636}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:04:37", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 94.98129558563232, "generate_output_len_bytes": 2512, "generate_time": 69.4871145884196}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:13:08", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1276, "summarize_time": 43.23498781522115, "generate_output_len_bytes": 2927, "generate_time": 22.826789538065594}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:10:08", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 90.51939169565837, "generate_output_len_bytes": 2927, "generate_time": 48.96095744768778}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:16:48", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 31.86189842224121, "generate_output_len_bytes": 2384, "generate_time": 14.209659894307455}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:17:39", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 71.48081835110982, "generate_output_len_bytes": 2384, "generate_time": 33.5740262667338}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:19:24", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 94.17744310696919, "generate_output_len_bytes": 2512, "generate_time": 70.12592967351277}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:27:57", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1276, "summarize_time": 42.8066500822703, "generate_output_len_bytes": 2927, "generate_time": 22.626200040181477}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:23:22", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 910, "summarize_time": 186.88371555010477, "generate_output_len_bytes": 2042, "generate_time": 117.3530724843343}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:39:03", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 94.50985678037007, "generate_output_len_bytes": 2927, "generate_time": 50.06416177749634}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 21:08:31", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 38.80374129613241, "generate_output_len_bytes": 2384, "generate_time": 19.23690136273702}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 21:11:49", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 178.79640992482504, "generate_output_len_bytes": 2772, "generate_time": 93.99476226170857}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 21:25:53", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 53.44271365801493, "generate_output_len_bytes": 2927, "generate_time": 30.641155401865642}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 21:30:30", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 40.80062770843506, "generate_output_len_bytes": 2384, "generate_time": 19.825008392333984}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 21:35:29", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 177.35046529769897, "generate_output_len_bytes": 2772, "generate_time": 91.73111907641093}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 21:49:20", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 56.894784371058144, "generate_output_len_bytes": 2927, "generate_time": 32.15500020980835}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/18/2023 21:54:11", "git_sha": "fc4826f2", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.46419604619344, "generate_output_len_bytes": 2384, "generate_time": 20.049855709075928}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/18/2023 21:57:39", "git_sha": "fc4826f2", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 183.73364853858948, "generate_output_len_bytes": 2772, "generate_time": 94.9052836894989}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/18/2023 22:11:59", "git_sha": "fc4826f2", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 59.204413731892906, "generate_output_len_bytes": 2927, "generate_time": 33.25332593917847}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/18/2023 22:17:00", "git_sha": "fc4826f2", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.09002653757731, "generate_output_len_bytes": 2384, "generate_time": 20.106103817621868}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 8, "reps": 3, "date": "08/18/2023 22:20:31", "git_sha": "fc4826f2", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 185.28164370854697, "generate_output_len_bytes": 2772, "generate_time": 95.13023789723714}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 8, "reps": 3, "date": "08/18/2023 22:34:58", "git_sha": "fc4826f2", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 60.9919019540151, "generate_output_len_bytes": 2927, "generate_time": 34.328625202178955}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:31:34", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 52.49842747052511, "generate_output_len_bytes": 2172, "generate_time": 20.686774571736652}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:31:55", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:35:38", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 168.9666860898336, "generate_output_len_bytes": 2249, "generate_time": 73.25518870353699}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:48:09", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 856, "summarize_time": 45.30513469378153, "generate_output_len_bytes": 1802, "generate_time": 22.000216643015545}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 13:51:56", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 51.64275654157003, "generate_output_len_bytes": 2172, "generate_time": 20.737667481104534}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:35:47", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 980, "summarize_time": 280.4669913450877, "generate_output_len_bytes": 2132, "generate_time": 141.7793349424998}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:57:35", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 869, "summarize_time": 96.61887431144714, "generate_output_len_bytes": 3244, "generate_time": 82.98751719792683}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 13:55:51", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 167.52292919158936, "generate_output_len_bytes": 2249, "generate_time": 71.82611886660258}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:08:08", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 856, "summarize_time": 47.14254776636759, "generate_output_len_bytes": 1802, "generate_time": 22.54850967725118}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 14:15:15", "git_sha": "d13230ee", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:07:15", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 89.59958203633626, "generate_output_len_bytes": 2172, "generate_time": 42.32424934705099}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 14:15:30", "git_sha": "d13230ee", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1024, "summarize_time": 185.44230167071024, "generate_output_len_bytes": 2122, "generate_time": 88.11553311347961}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 14:29:36", "git_sha": "d13230ee", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 922, "summarize_time": 68.06459252039592, "generate_output_len_bytes": 1802, "generate_time": 27.939613421758015}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:26:29", "git_sha": "d13230ee", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 980, "summarize_time": 280.8310640652974, "generate_output_len_bytes": 2132, "generate_time": 143.21916349728903}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:48:17", "git_sha": "d13230ee", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 869, "summarize_time": 98.47045453389485, "generate_output_len_bytes": 3244, "generate_time": 83.71360301971436}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 15:35:13", "git_sha": "0dec0f52", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 15:49:33", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 16:26:53", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 16:27:32", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 16:29:03", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 17:26:02", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 18:59:16", "git_sha": "5691db4a", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1075, "summarize_time": 39.01545596122742, "generate_output_len_bytes": 2242, "generate_time": 10.151424566904703}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 19:03:13", "git_sha": "5691db4a", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 940, "summarize_time": 21.78233750661214, "generate_output_len_bytes": 2130, "generate_time": 15.794983307520548}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 19:38:40", "git_sha": "6f05e8f1", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1114, "summarize_time": 7.636120955149333, "generate_output_len_bytes": 2275, "generate_time": 7.922623078028361}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 19:41:02", "git_sha": "6f05e8f1", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1024, "summarize_time": 10.824170271555582, "generate_output_len_bytes": 2130, "generate_time": 9.209020694096884}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 19:55:17", "git_sha": "2c548f21", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1088, "summarize_time": 24.39883820215861, "generate_output_len_bytes": 2275, "generate_time": 12.755743900934855}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 00:57:21", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 37.113919814427696, "generate_output_len_bytes": 2384, "generate_time": 18.36507821083069}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:00:31", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 49.79721482594808, "generate_output_len_bytes": 2172, "generate_time": 21.780913591384888}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:04:36", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:05:26", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 181.2461258570353, "generate_output_len_bytes": 2772, "generate_time": 92.64811905225118}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:19:33", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 174.4576851526896, "generate_output_len_bytes": 2713, "generate_time": 119.14412077267964}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:36:14", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 53.39731526374817, "generate_output_len_bytes": 2927, "generate_time": 31.369641542434692}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:40:53", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 74.27096923192342, "generate_output_len_bytes": 1802, "generate_time": 29.860486666361492}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:48:09", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 39.926851193110146, "generate_output_len_bytes": 2384, "generate_time": 18.481745958328247}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:51:27", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 51.299002488454185, "generate_output_len_bytes": 2172, "generate_time": 21.828503131866455}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:56:20", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 178.19972308476767, "generate_output_len_bytes": 2772, "generate_time": 91.73426882425944}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:10:13", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 180.7814578215281, "generate_output_len_bytes": 2713, "generate_time": 124.72717420260112}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:26:43", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 57.08081785837809, "generate_output_len_bytes": 2927, "generate_time": 32.26534946759542}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:31:36", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 79.9461121559143, "generate_output_len_bytes": 1802, "generate_time": 31.403561115264893}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 02:38:23", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.33977222442627, "generate_output_len_bytes": 2384, "generate_time": 19.723278522491455}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 02:41:52", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 55.377869288126625, "generate_output_len_bytes": 2172, "generate_time": 25.01458676656087}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 02:47:05", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 180.53432401021323, "generate_output_len_bytes": 2772, "generate_time": 91.93375285466512}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:01:07", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 179.50477250417075, "generate_output_len_bytes": 2713, "generate_time": 124.40728378295898}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:17:36", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 58.62867816289266, "generate_output_len_bytes": 2927, "generate_time": 33.394495725631714}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:22:37", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 78.90612125396729, "generate_output_len_bytes": 1802, "generate_time": 30.697617371877033}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:29:20", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 40.498607873916626, "generate_output_len_bytes": 2384, "generate_time": 19.509677171707153}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:32:44", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 55.3964786529541, "generate_output_len_bytes": 2172, "generate_time": 24.347585439682007}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:37:55", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 186.71331850687662, "generate_output_len_bytes": 2772, "generate_time": 95.784650405248}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:52:28", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 185.3280005455017, "generate_output_len_bytes": 2713, "generate_time": 125.91738017400105}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 8, "reps": 3, "date": "08/19/2023 04:09:18", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 60.18280680974325, "generate_output_len_bytes": 2927, "generate_time": 33.386961142222084}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 8, "reps": 3, "date": "08/19/2023 04:14:25", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 83.04790727297465, "generate_output_len_bytes": 1802, "generate_time": 32.24992283185323}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:26:19", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 47.03754989306132, "generate_output_len_bytes": 2384, "generate_time": 19.964784463246662}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:33:09", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 71.91136892636617, "generate_output_len_bytes": 2480, "generate_time": 33.6295014222463}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:44:08", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 00:45:42", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 148.61560583114624, "generate_output_len_bytes": 2357, "generate_time": 89.01266026496887}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 00:58:00", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 763, "summarize_time": 193.99270629882812, "generate_output_len_bytes": 2129, "generate_time": 95.66660761833191}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:13:01", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:13:55", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 61.52411222457886, "generate_output_len_bytes": 2927, "generate_time": 32.030215660730995}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:19:00", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 81.13888708750407, "generate_output_len_bytes": 3486, "generate_time": 55.5331826210022}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:27:49", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 47.41046245892843, "generate_output_len_bytes": 2384, "generate_time": 20.660600344340008}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:34:28", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 72.85646979014079, "generate_output_len_bytes": 2480, "generate_time": 34.05861854553223}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:39:22", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 152.54357608159384, "generate_output_len_bytes": 2357, "generate_time": 91.51808977127075}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:52:58", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 763, "summarize_time": 195.92926557858786, "generate_output_len_bytes": 2129, "generate_time": 96.55542047818501}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 03:15:01", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 64.64422671000163, "generate_output_len_bytes": 2927, "generate_time": 33.30378039677938}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 03:20:19", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 84.57761120796204, "generate_output_len_bytes": 3486, "generate_time": 57.59072462717692}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:28:44", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 49.08898218472799, "generate_output_len_bytes": 2384, "generate_time": 21.489527861277264}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:32:39", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 74.43774898846944, "generate_output_len_bytes": 2480, "generate_time": 34.72673638661703}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:39:21", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 153.41076453526816, "generate_output_len_bytes": 2357, "generate_time": 91.14894040425618}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:52:00", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 763, "summarize_time": 199.79869039853415, "generate_output_len_bytes": 2129, "generate_time": 98.61504419644673}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 04:08:12", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 66.49260465304057, "generate_output_len_bytes": 2927, "generate_time": 34.17951035499573}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 04:13:39", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 87.65787092844646, "generate_output_len_bytes": 3486, "generate_time": 59.3750696182251}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 22:22:24", "git_sha": "b63768c6", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 122.13213857014973, "generate_output_len_bytes": 2826, "generate_time": 66.34098903338115}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 22:33:33", "git_sha": "c1348fb3", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 120.53812781969707, "generate_output_len_bytes": 2826, "generate_time": 67.28052496910095}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 22:56:52", "git_sha": "fb84de76", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1036, "summarize_time": 29.128981749216717, "generate_output_len_bytes": 2242, "generate_time": 12.197122732798258}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:00:33", "git_sha": "fb84de76", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 05:47:43", "git_sha": "22352acd", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 05:48:58", "git_sha": "22352acd", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 05:50:40", "git_sha": "22352acd", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 165.05752809842429, "generate_output_len_bytes": 2605, "generate_time": 93.80659619967143}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 06:05:51", "git_sha": "22352acd", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 06:10:05", "git_sha": "22352acd", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 906, "summarize_time": 410.0691332022349, "generate_output_len_bytes": 521, "generate_time": 57.71272214253744}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 06:36:58", "git_sha": "22352acd", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 171.74388321240744, "generate_output_len_bytes": 2605, "generate_time": 97.00725762049358}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 06:51:13", "git_sha": "22352acd", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 792, "summarize_time": 267.0555826822917, "generate_output_len_bytes": 2783, "generate_time": 163.99818523724875}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 07:13:35", "git_sha": "22352acd", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 906, "summarize_time": 413.9569679101308, "generate_output_len_bytes": 521, "generate_time": 58.52583885192871}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 07:38:02", "git_sha": "22352acd", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 175.4907926718394, "generate_output_len_bytes": 2605, "generate_time": 98.97720170021057}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 12:35:08", "git_sha": "29a002e5", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 983, "summarize_time": 42.21107586224874, "generate_output_len_bytes": 2130, "generate_time": 16.94527777036031}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 20:03:36", "git_sha": "51318f44", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.0461368560791, "generate_output_len_bytes": 2383, "generate_time": 19.614749511082966}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:07:35", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.8376894791921, "generate_output_len_bytes": 2383, "generate_time": 20.2719091574351}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 20:42:46", "git_sha": "2f4bb620", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:50:19", "git_sha": "2f4bb620", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 66.52468911806743, "generate_output_len_bytes": 2479, "generate_time": 29.828714847564697}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:56:04", "git_sha": "2f4bb620", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 19:55:35", "git_sha": "51318f44", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 38.753786404927574, "generate_output_len_bytes": 2383, "generate_time": 19.529522736867268}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 20:36:13", "git_sha": "51318f44", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.024452924728394, "generate_output_len_bytes": 2383, "generate_time": 20.29120985666911}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 20:40:08", "git_sha": "51318f44", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 54.554532527923584, "generate_output_len_bytes": 2171, "generate_time": 24.604793945948284}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:50:05", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.09950613975525, "generate_output_len_bytes": 2383, "generate_time": 20.947362899780273}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:54:08", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 58.3172922929128, "generate_output_len_bytes": 2171, "generate_time": 25.735217014948528}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 21:01:04", "git_sha": "51318f44", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.85940829912821, "generate_output_len_bytes": 2383, "generate_time": 21.380353291829426}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 21:05:24", "git_sha": "51318f44", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 54.235164642333984, "generate_output_len_bytes": 2171, "generate_time": 25.70338026682536}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 21:10:37", "git_sha": "51318f44", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 927, "summarize_time": 133.53030570348105, "generate_output_len_bytes": 2782, "generate_time": 72.97924383481343}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 22:18:17", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 927, "summarize_time": 131.45291074117026, "generate_output_len_bytes": 2782, "generate_time": 72.30849742889404}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 22:51:09", "git_sha": "383b6bbc", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 39.269713958104454, "generate_output_len_bytes": 2383, "generate_time": 19.65731406211853}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 22:54:54", "git_sha": "383b6bbc", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 51.84283971786499, "generate_output_len_bytes": 2171, "generate_time": 28.441521485646565}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 23:13:10", "git_sha": "383b6bbc", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 53.383726040522255, "generate_output_len_bytes": 2171, "generate_time": 24.422890504201252}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 23:18:04", "git_sha": "383b6bbc", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 52.791220347086586, "generate_output_len_bytes": 2171, "generate_time": 25.378511508305866}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 23:23:11", "git_sha": "383b6bbc", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 56.3846542040507, "generate_output_len_bytes": 2171, "generate_time": 26.636192480723064}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 23:52:44", "git_sha": "da69b822", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 40.36223220825195, "generate_output_len_bytes": 2383, "generate_time": 19.87660264968872}
{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/22/2023 00:15:05", "git_sha": "e843e8c3", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 64.78201874097188, "generate_output_len_bytes": 2479, "generate_time": 29.02147897084554}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 0, "reps": 3, "date": "08/22/2023 19:01:15", "git_sha": "855b7d15", "n_gpus": 0, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "CPU", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1351, "summarize_time": 1215.5185990333557, "generate_output_len_bytes": 849, "generate_time": 180.56836318969727}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 0, "reps": 3, "date": "08/22/2023 20:11:16", "git_sha": "855b7d15", "n_gpus": 0, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "CPU", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1353, "summarize_time": 1216.9783231417339, "generate_output_len_bytes": 849, "generate_time": 180.42225472132364}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 0, "reps": 3, "date": "08/22/2023 21:21:20", "git_sha": "855b7d15", "n_gpus": 0, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "CPU", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1354, "summarize_time": 1217.1687794526417, "generate_output_len_bytes": 843, "generate_time": 180.78463260332742}