import transformers import torch # Model and tokenizer initialization model_path_name = "SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA" # Replace with your model path # Initialize the pipeline pipeline = transformers.pipeline( "text-generation", model=model_path_name, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto", # Adjust to 'cuda' if needed ) # Prepare the message list message_list = [ [ {'role': 'system', 'content': "You are an AI assistant."}, {'role': 'user', 'content': "Who are you?"} ] ] # Apply the chat template or manually format the prompts try: prompts = [ pipeline.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, ) for messages in message_list ] except AttributeError: # Fallback: Manually format the prompts if `apply_chat_template` is unsupported prompts = [ f"<|im_start|>system\n{msg[0]['content']}<|im_end|>\n" f"<|im_start|>user\n{msg[1]['content']}<|im_end|>\n<|im_start|>assistant\n" for msg in message_list ] # Debugging: Print prompts print("Formatted Prompts:", prompts) # Validate tokenizer and model's EOS and PAD token IDs eos_token_id = pipeline.tokenizer.eos_token_id or 50256 # Default fallback for GPT-like models pad_token_id = eos_token_id # Ensure consistency print("EOS Token ID:", eos_token_id) # Tokenize the prompts (optional debugging step) tokens = pipeline.tokenizer(prompts, padding=True, return_tensors="pt") print("Tokenized Input:", tokens) # Generate the output try: outputs = pipeline( prompts, max_new_tokens=100, # Reduce for debugging purposes do_sample=True, temperature=0.5, top_p=0.5, eos_token_id=eos_token_id, pad_token_id=pad_token_id, ) print("Outputs:", outputs) except Exception as e: print("Error during generation:", str(e))