|
```python |
|
!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git |
|
!pip install -q datasets bitsandbytes einops wandb sentencepiece transformers_stream_generator tiktoken |
|
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import torch |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("TinyPixel/qwen-1.8B-guanaco", trust_remote_code=True) |
|
model = AutoModelForCausalLM.from_pretrained("TinyPixel/qwen-1.8B-guanaco", torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True) |
|
|
|
device = "cuda:0" |
|
|
|
from transformers import StoppingCriteria, StoppingCriteriaList |
|
|
|
stop_token_ids = [[14374, 11097, 25], [14374, 21388, 25]] |
|
stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids] |
|
|
|
from transformers import StoppingCriteria, StoppingCriteriaList |
|
|
|
class StopOnTokens(StoppingCriteria): |
|
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: |
|
for stop_ids in stop_token_ids: |
|
if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all(): |
|
return True |
|
return False |
|
|
|
stopping_criteria = StoppingCriteriaList([StopOnTokens()]) |
|
|
|
text = '''### Human: what is the difference between a dog and a cat on a biological level? |
|
### Assistant:''' |
|
|
|
inputs = tokenizer(text, return_tensors="pt").to(device) |
|
outputs = model.generate(**inputs, |
|
max_new_tokens=512, |
|
stopping_criteria=stopping_criteria, |
|
do_sample=True, |
|
top_p=0.95, |
|
temperature=0.7, |
|
top_k=50) |
|
|
|
print(tokenizer.decode(outputs[0], skip_special_tokens=False) |
|
``` |