|
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import torch |
|
import argparse |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--model_path", type=str, default="petil777/srv1_parallel") |
|
parser.add_argument("--revision", type=str, default=None) |
|
parser.add_argument("--local_path", type=str, required=True) |
|
args = parser.parse_args() |
|
|
|
model_path = "petil777/srv1_parallel" |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(args.model_path,local_path=args.local_path, |
|
revision=args.revision, trust_remote_code=True) |
|
|
|
model.eval() |
|
tokenizer = model.tokenizer |
|
|
|
rank = torch.distributed.get_rank() if torch.distributed.is_initialized() else 0 |
|
model = model.to(f"cuda:{rank}") |
|
|
|
|
|
input_str="apple is red and banana is" |
|
input_dict = tokenizer(input_str, return_tensors="pt") |
|
input_ids= input_dict.input_ids |
|
input_ids=input_ids.to(f"cuda:{rank}") |
|
|
|
out_tensor = model.generate(input_ids, top_k=0,return_dict_in_generate=True,output_scores=True,output_hidden_states=True) |
|
if rank == 0: |
|
print(tokenizer.decode(out_tensor.sequences[0])) |