from transformers import AutoModelForCausalLM, AutoTokenizer import sys import os model_id = os.getcwd() if len(sys.argv) > 1: model_id = sys.argv[1] tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id).cuda().bfloat16() prompt = "Lily picked up a flower." inputs = tokenizer(prompt, return_tensors="pt", return_token_type_ids=False).to('cuda') out = model.generate(**inputs, max_new_tokens=80).ravel() out = tokenizer.decode(out) print(out)