```bash # Make sure you have git-lfs installed (https://git-lfs.com) git lfs install git clone https://huggingface.co/muhtasham/llama3-ins-8b-int4-trt-llm git clone https://github.com/NVIDIA/TensorRT-LLM.git python ./TensorRT-LLM/examples/run.py --engine_dir=./ \ --max_output_len 5 \ --tokenizer_dir llama3-hf \ --input_text "How do I count to nine in French?" \ --run_profiling 2024-04-25 19:35:59.062455: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Input [Text 0]: "<|begin_of_text|>How do I count to nine in French?" Output [Text 0 Beam 0]: " Counting in French is" batch_size: 1, avg latency of 10 iterations: : 0.0999948501586914 sec ```