Error when running inference
#2
by
fortiag
- opened
I have been trying to do some tests using the following code:
tokenizer = AutoTokenizer.from_pretrained(model_hf_id, cache_dir=cache_path)
model = AutoModelForCausalLM.from_pretrained(model_hf_id, torch_dtype=torch.float16, cache_dir=cache_path)
test = pipeline(model=model, tokenizer=tokenizer)
example = "Hello, how are you?"
print("QUESTION: "+example)
result = test(example)
Unfortunately, when I run the code I get the following error (which seems not to be my fault):
Traceback (most recent call last):
File "/home/eve/Documents/llm-agent-poc/scripts/download_model_hf.py", line 24, in <module>
test = pipeline(model=model, tokenizer=tokenizer)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/transformers/pipelines/__init__.py", line 801, in pipeline
raise RuntimeError(
RuntimeError: Inferring the task automatically requires to check the hub with a model_id defined as a `str`. LlamaForCausalLM(
(model): LlamaModel(
(embed_tokens): Embedding(32000, 4096)
(layers): ModuleList(
(0-31): 32 x LlamaDecoderLayer(
(self_attn): LlamaSdpaAttention(
(q_proj): Linear(in_features=4096, out_features=4096, bias=False)
(k_proj): Linear(in_features=4096, out_features=4096, bias=False)
(v_proj): Linear(in_features=4096, out_features=4096, bias=False)
(o_proj): Linear(in_features=4096, out_features=4096, bias=False)
(rotary_emb): LlamaRotaryEmbedding()
)
(mlp): LlamaMLP(
(gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
(up_proj): Linear(in_features=4096, out_features=11008, bias=False)
(down_proj): Linear(in_features=11008, out_features=4096, bias=False)
(act_fn): SiLU()
)
(input_layernorm): LlamaRMSNorm()
(post_attention_layernorm): LlamaRMSNorm()
)
)
(norm): LlamaRMSNorm()
)
(lm_head): Linear(in_features=4096, out_features=32000, bias=False)
) is not a valid model_id.
I Googled the error and found a thread of someone reporting a similar problem with another model. The way to solve the error, they said, was to add a parameter to the pipeline:
test = pipeline(task='text-generation',model=model, tokenizer=tokenizer)
In my case it didn't solve the error, but generated another one:
Traceback (most recent call last):
File "/home/eve/Documents/llm-agent-poc/scripts/download_model_hf.py", line 28, in <module>
result = test(example)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/transformers/pipelines/text_generation.py", line 208, in __call__
return super().__call__(text_inputs, **kwargs)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1140, in __call__
return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1147, in run_single
model_outputs = self.forward(model_inputs, **forward_params)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1046, in forward
model_outputs = self._forward(model_inputs, **forward_params)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/transformers/pipelines/text_generation.py", line 271, in _forward
generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, **generate_kwargs)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/transformers/generation/utils.py", line 1718, in generate
return self.greedy_search(
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/transformers/generation/utils.py", line 2579, in greedy_search
outputs = self(
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1181, in forward
outputs = self.model(
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1068, in forward
layer_outputs = decoder_layer(
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 796, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 691, in forward
query_states = self.q_proj(hidden_states)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/eve/Documents/llm-agent-poc/.venv/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: "addmm_impl_cpu_" not implemented for 'Half'