from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torch tokenizer = AutoTokenizer.from_pretrained("BEE-spoke-data/hf_slimpajama-6B-28672-BPE-forT5") special_tokens_dict = {'additional_special_tokens': ['[R]', '[S]', '[X]', '[NTP]']} tokenizer.add_special_tokens(special_tokens_dict) model = AutoModelForSeq2SeqLM.from_pretrained("/workspace/nanoT5/logs/2024-10-20/18-25-17/checkpoint-pt-27000").to("cuda") prompt = "The " inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # Add decoder_input_ids # decoder_input_ids = torch.ones((inputs.input_ids.shape[0], 1), dtype=torch.long) * model.config.decoder_start_token_id # Generate generated_ids = model.generate( **inputs, # decoder_input_ids=decoder_input_ids, max_new_tokens=20, no_repeat_ngram_size=5 ) # Decode the output generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) print(generated_text)