Here is an example of how to use this model: https://github.com/LJ-Hao/MLC-LLM-on-Jetson
Note: This model only support Jetson Containers with MLC: https://github.com/dusty-nv/jetson-containers
from mlc_chat import ChatModule
from mlc_chat.callback import StreamToStdout
def run():
cm = ChatModule(
model="PATH/params",
model_lib_path="PATH/Llama-2-7b-chat-hf-q4f16_ft-cuda.so")
while True:
try:
prompt = input('\033[94m' +"Prompt: " + '\033[0m')
cm.generate(
prompt=prompt,
progress_callback=StreamToStdout(callback_interval=2),
)
except KeyboardInterrupt:
break
if __name__ == '__main__':
run()