|
--- |
|
inference: false |
|
license: other |
|
tags: |
|
- llama-2 |
|
- llama2 |
|
- gptq |
|
- auto-gptq |
|
- 13b |
|
- llama |
|
- 4bit |
|
- quantization |
|
--- |
|
|
|
# Get Started |
|
This model should use [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) so you need to use `auto-gptq` |
|
- `no-act-order` model |
|
- 4bit model quantization |
|
|
|
```py |
|
from transformers import AutoTokenizer, pipeline, LlamaForCausalLM, LlamaTokenizer |
|
from auto_gptq import AutoGPTQForCausalLM |
|
|
|
model_id = 'seonglae/llama-2-13b-chat-hf-gptq' |
|
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) |
|
model = AutoGPTQForCausalLM.from_quantized( |
|
model_id, |
|
model_basename=model_basename, |
|
trust_remote_code=True, |
|
device='cuda:0', |
|
use_triton=False, |
|
use_safetensors=True, |
|
) |
|
|
|
pipe = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
temperature=0.5, |
|
top_p=0.95, |
|
max_new_tokens=100, |
|
repetition_penalty=1.15, |
|
) |
|
prompt = "USER: Are you AI?\nASSISTANT:" |
|
pipe(prompt) |
|
``` |