Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
from typing import List, Literal, Optional, Tuple, Union | |
import torch | |
import transformers | |
from lm_eval.models.huggingface import HFLM | |
from lm_eval.api.registry import register_model | |
class HFLMwithChatTemplate(HFLM): | |
def __init__(self, use_chat_template=True, **kwargs): | |
super().__init__(**kwargs) | |
self.use_chat_template = use_chat_template | |
def tok_batch_encode( | |
self, | |
strings: List[str], | |
padding_side: str = "left", | |
left_truncate_len: int = None, | |
truncation: bool = False, | |
) -> Tuple[torch.Tensor, torch.Tensor]: | |
if self.use_chat_template: | |
try: | |
updated_strings = [] | |
for input_string in strings: | |
messages = [ | |
{"role": "user", "content": f"{input_string}"}, | |
] | |
updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False) | |
updated_strings.append(updated_string) | |
strings = updated_strings[:] | |
except: | |
print(f"failed to update input string with chat template: {self._model}") | |
# encode a batch of strings. converts to tensors and pads automatically, unlike tok_encode. | |
old_padding_side = self.tokenizer.padding_side | |
self.tokenizer.padding_side = padding_side | |
if self.AUTO_MODEL_CLASS == transformers.AutoModelForCausalLM: | |
add_special_tokens = False | |
elif self.AUTO_MODEL_CLASS == transformers.AutoModelForSeq2SeqLM: | |
add_special_tokens = True | |
encoding = self.tokenizer( | |
strings, | |
truncation=truncation, | |
padding="longest", | |
return_tensors="pt", | |
add_special_tokens=add_special_tokens, | |
) | |
if left_truncate_len: | |
encoding["input_ids"] = encoding["input_ids"][:, -left_truncate_len:] | |
encoding["attention_mask"] = encoding["attention_mask"][ | |
:, -left_truncate_len: | |
] | |
self.tokenizer.padding_side = old_padding_side | |
return encoding["input_ids"], encoding["attention_mask"] |