runtime error

File "/home/user/.local/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context return func(*args, **kwargs) File "/home/user/.local/lib/python3.10/site-packages/transformers/generation/utils.py", line 1648, in generate return self.sample( File "/home/user/.local/lib/python3.10/site-packages/transformers/generation/utils.py", line 2730, in sample outputs = self( File "/home/user/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "/home/user/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 820, in forward outputs = self.model( File "/home/user/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "/home/user/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 708, in forward layer_outputs = decoder_layer( File "/home/user/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "/home/user/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 424, in forward hidden_states, self_attn_weights, present_key_value = self.self_attn( File "/home/user/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "/home/user/.local/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 321, in forward query_states = self.q_proj(hidden_states) File "/home/user/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "/home/user/.local/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 114, in forward return F.linear(input, self.weight, self.bias) RuntimeError: "addmm_impl_cpu_" not implemented for 'Half'

Container logs:

Fetching error logs...