{ "base_current_gpu_type": "NVIDIA A100-PCIE-40GB", "base_current_gpu_total_memory": 40339.3125, "base_token_generation_latency_sync": 77.97878570556641, "base_token_generation_latency_async": 76.24018508940935, "base_token_generation_throughput_sync": 0.012824000668281968, "base_token_generation_throughput_async": 0.013116442448654439, "base_token_generation_CO2_emissions": null, "base_token_generation_energy_consumption": null, "base_inference_latency_sync": 73.83418731689453, "base_inference_latency_async": 73.6673355102539, "base_inference_throughput_sync": 0.013543861405395097, "base_inference_throughput_async": 0.013574537385851399, "base_inference_CO2_emissions": null, "base_inference_energy_consumption": null, "smashed_current_gpu_type": "NVIDIA A100-PCIE-40GB", "smashed_current_gpu_total_memory": 40339.3125, "smashed_token_generation_latency_sync": 169.28594207763672, "smashed_token_generation_latency_async": 169.22315284609795, "smashed_token_generation_throughput_sync": 0.00590716504706213, "smashed_token_generation_throughput_async": 0.005909356865070716, "smashed_token_generation_CO2_emissions": null, "smashed_token_generation_energy_consumption": null, "smashed_inference_latency_sync": 175.40904846191407, "smashed_inference_latency_async": 151.64473056793213, "smashed_inference_throughput_sync": 0.005700960177189071, "smashed_inference_throughput_async": 0.006594360359603996, "smashed_inference_CO2_emissions": null, "smashed_inference_energy_consumption": null }