# torchrun --nproc_per_node=2 test_modeling.py --local_path {local_path} from transformers import AutoModelForCausalLM, AutoTokenizer import torch import argparse parser = argparse.ArgumentParser() parser.add_argument("--model_path", type=str, default="petil777/srv1_parallel") parser.add_argument("--revision", type=str, default=None) parser.add_argument("--local_path", type=str, required=True) args = parser.parse_args() model_path = "petil777/srv1_parallel" # Distributed.launch will occur inside. model = AutoModelForCausalLM.from_pretrained(args.model_path,local_path=args.local_path, revision=args.revision, trust_remote_code=True) model.eval() tokenizer = model.tokenizer rank = torch.distributed.get_rank() if torch.distributed.is_initialized() else 0 model = model.to(f"cuda:{rank}") input_str="apple is red and banana is" input_dict = tokenizer(input_str, return_tensors="pt") #input_ids, attention_mask input_ids= input_dict.input_ids input_ids=input_ids.to(f"cuda:{rank}") out_tensor = model.generate(input_ids, top_k=0,return_dict_in_generate=True,output_scores=True,output_hidden_states=True) if rank == 0: print(tokenizer.decode(out_tensor.sequences[0]))