How does that translate?
#1
by
xuan0126
- opened
I want to convert a baichuan2_base,can I get your help?
You can deploy a pod on https://www.runpod.io/
I use this for quantizing the model and uploading.
First install:
mkdir baichuan2-13b-chat
cd baichuan2-13b-chat
wget https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/resolve/main/config.json
wget https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/resolve/main/configuration_baichuan.py
wget https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/resolve/main/generation_config.json
wget https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/resolve/main/generation_utils.py
wget https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/resolve/main/modeling_baichuan.py
wget https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/resolve/main/pytorch_model-00001-of-00003.bin
wget https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/resolve/main/pytorch_model-00002-of-00003.bin
wget https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/resolve/main/pytorch_model-00003-of-00003.bin
wget https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/resolve/main/pytorch_model.bin.index.json
wget https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/resolve/main/quantizer.py
wget https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/resolve/main/special_tokens_map.json
wget https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/resolve/main/tokenization_baichuan.py
wget https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/resolve/main/tokenizer.model
wget https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/resolve/main/tokenizer_config.json
cd ..
apt update
apt install python3.10-venv -y
python -m venv venv --system-site-packages
source venv/bin/activate
pip install auto-gptq
pip install sentencepiece
pip install protobuf==3.20.0
wget https://github.com/gururise/AlpacaDataCleaned/raw/main/alpaca_data_cleaned.json
And run this:
import sys
sys.path.insert(0, '/workspace/venv/lib/python3.10/site-packages/')
import transformers
from transformers import AutoTokenizer
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
import logging
pretrained_model_dir = "./baichuan2-13b-chat/"
quantized_model_dir = "./baichuan2-13b-chat-gptq-32g-act/"
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir)
quantize_config = BaseQuantizeConfig(
bits=4, # quantize model to 4-bit
group_size=32, # it is recommended to set the value to 128
desc_act=True, # set to False can significantly speed up inference but the perplexity may slightly bad
)
model = AutoGPTQForCausalLM.from_pretrained(pretrained_model_dir, quantize_config)
import json
import random
import time
import torch
from datasets import Dataset
def load_data(data_path, tokenizer, n_samples):
with open(data_path, "r", encoding="utf-8") as f:
raw_data = json.load(f)
raw_data = random.sample(raw_data, k=min(n_samples, len(raw_data)))
def dummy_gen():
return raw_data
def tokenize(examples):
instructions = examples["instruction"]
inputs = examples["input"]
outputs = examples["output"]
prompts = []
texts = []
input_ids = []
attention_mask = []
for istr, inp, opt in zip(instructions, inputs, outputs):
if inp:
prompt = f"Instruction:\n{istr}\nInput:\n{inp}\nOutput:\n"
text = prompt + opt
else:
prompt = f"Instruction:\n{istr}\nOutput:\n"
text = prompt + opt
if len(tokenizer(prompt)["input_ids"]) >= tokenizer.model_max_length:
continue
tokenized_data = tokenizer(text)
input_ids.append(tokenized_data["input_ids"][: tokenizer.model_max_length])
attention_mask.append(tokenized_data["attention_mask"][: tokenizer.model_max_length])
prompts.append(prompt)
texts.append(text)
return {
"input_ids": input_ids,
"attention_mask": attention_mask,
"prompt": prompts
}
dataset = Dataset.from_generator(dummy_gen)
dataset = dataset.map(
tokenize,
batched=True,
batch_size=len(dataset),
num_proc=1,
keep_in_memory=True,
load_from_cache_file=False,
remove_columns=["instruction", "input"]
)
dataset = dataset.to_list()
for sample in dataset:
sample["input_ids"] = torch.LongTensor(sample["input_ids"])
sample["attention_mask"] = torch.LongTensor(sample["attention_mask"])
return dataset
examples = load_data("alpaca_data_cleaned.json", tokenizer, 128)
model.quantize(examples, batch_size=1)
model.save_quantized(quantized_model_dir)
model.push_to_hub(repo_id='baichuan2-13b-chat-gptq-32g-act', use_safetensors=True, private=False, token='')