Also cant use from transformers import LlamaForCausalLM as a workaround, because of size mismatch.
Since the Huggingface Autotransformer model.py is broken, and there is a size mismatch in the safetensors, I tried to create a simple model.py from scratch. The same mismatch type problem blocks using the from transformers import LlamaForCausalLM
as a workaround.
Shape Mismatch Error: The ValueError: Trying to set a tensor of shape torch.Size([192, 576]) in "weight" (which has shape torch.Size([576, 576])) is the core problem. It means the pretrained weights you are trying to load have a different shape than what the LlamaForCausalLM model architecture expects for the "weight" parameter (likely in one of the linear layers). This strongly suggests an incompatibility between the pretrained weights and the model architecture.
pip install sentencepiece
pip install accelerate
pip install bitsandbytes # Or, if you encounter issues with the direct installation: pip install -i https://test.pypi.org/simple/ bitsandbytes
import torch
import os
#from transformers import LlamaTokenizer, LlamaForCausalLM, pipeline
from transformers import GPT2Tokenizer, LlamaForCausalLM, pipeline # Import
from datetime import datetime
Model and tokenizer names - adjust if needed for your specific model version
MODEL_NAME = "HuggingFaceTB/SmolLM2-135M"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Use GPU if available
Load tokenizer and model
Get the absolute path of the current script's directory
MODEL_DIR = os.path.dirname(os.path.abspath(file))
Use GPT2Tokenizer
Load tokenizer and model from the current directory
tokenizer = GPT2Tokenizer.from_pretrained(MODEL_DIR)
#tokenizer = LlamaTokenizer.from_pretrained(MODEL_DIR, device_map="auto")
#tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME, device_map="auto")
DEVICE = "cuda" # or the appropriate CUDA Expected one of cpu, cuda, ipu, xpu, mkldnn, opengl, opencl, ideep, hip, ve, fpga, ort, xla, lazy, vulkan, mps, meta, hpu, mtia, privateuseone device type at start of device string: DEVICE
model = LlamaForCausalLM.from_pretrained(
MODEL_DIR, # was MODEL_NAME
torch_dtype=torch.float16,
device_map={"": DEVICE}, # Correct: Use the variable DEVICE directly
offload_folder="offload",
) # deleted load_in_8bit=True,
Create a text generation pipeline - simpler interface for inference
generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer, device=DEVICE)
def generate_text(prompt, max_new_tokens=100, temperature=0.7):
"""Generates text based on the given prompt."""
sequences = generator(prompt, max_new_tokens=max_new_tokens, temperature=temperature, do_sample=True)
return sequences[0]['generated_text']
def save_chat(chat_history):
"""Saves the chat history to a file."""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"autosaved_chat_{timestamp}.txt"
with open(filename, "w", encoding="utf-8") as f:
f.write(chat_history)
print(f"Chat saved to {filename}") # Indicate save location to the user
Basic Chat Loop (Expand this for full functionality)
chat_history = ""
while True:
user_input = input("You: ")
if user_input.lower() == "exit":
break
# TODO: Implement Chat Memory Logic Here - how to incorporate past turns
full_prompt = f"### Instruction: {user_input}" # Example Prompt Format
response = generate_text(full_prompt)
chat_history += f"You: {user_input}\nBot: {response}\n"
print("Bot:", response)
save_chat(chat_history) # Autosave after each turn
print("Exiting chat.")
This is the CMD console error itself:
C:\Users\User\OneDrive\Desktop\SmolLM2-135M\LlamaForCausalLM>python SmolLM2-135M_LlamaForCausalLM.py
A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.
If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.
Traceback (most recent call last): File "C:\Users\User\OneDrive\Desktop\SmolLM2-135M\LlamaForCausalLM\SmolLM2-135M_LlamaForCausalLM.py", line 28, in
model = LlamaForCausalLM.from_pretrained(
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\modeling_utils.py", line 2604, in from_pretrained
state_dict = load_state_dict(resolved_archive_file)
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\modeling_utils.py", line 461, in load_state_dict
return safe_load_file(checkpoint_file)
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\safetensors\torch.py", line 315, in load_file
result[k] = f.get_tensor(k)
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\storage.py", line 234, in getitem
return super().getitem(*args, **kwargs)
C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\storage.py:234: UserWarning: Failed to initialize NumPy: _ARRAY_API not found (Triggered internally at ..\torch\csrc\utils\tensor_numpy.cpp:84.)
return super().getitem(*args, **kwargs)
Traceback (most recent call last):
File "C:\Users\User\OneDrive\Desktop\SmolLM2-135M\LlamaForCausalLM\SmolLM2-135M_LlamaForCausalLM.py", line 28, in
model = LlamaForCausalLM.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\modeling_utils.py", line 2881, in from_pretrained
) = cls._load_pretrained_model(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\modeling_utils.py", line 3228, in _load_pretrained_model
new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\modeling_utils.py", line 720, in _load_state_dict_into_meta_model
set_module_tensor_to_device(model, param_name, param_device, **set_module_kwargs)
File "C:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\accelerate\utils\modeling.py", line 286, in set_module_tensor_to_device
raise ValueError(
ValueError: Trying to set a tensor of shape torch.Size([192, 576]) in "weight" (which has shape torch.Size([576, 576])), this looks incorrect.
Please refrain from spamming the discussions. I have responded here: https://huggingface.co/HuggingFaceTB/SmolLM2-135M/discussions/3#674debb7e0b9b21b8352520c
Please ensure your transformers version is up to date.