zhangtao-whu's picture
Upload folder using huggingface_hub
476ac07 verified
raw
history blame
4.77 kB
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp
import shutil
import warnings
from accelerate import init_empty_weights
from accelerate.utils import set_module_tensor_to_device
from mmengine import print_log
from mmengine.config import Config, DictAction
from mmengine.fileio import PetrelBackend, get_file_backend
from mmengine.utils import mkdir_or_exist
from tqdm import tqdm
from xtuner.configs import cfgs_name_path
from xtuner.model.utils import guess_load_checkpoint
from xtuner.registry import BUILDER
def parse_args():
parser = argparse.ArgumentParser(
description='Convert the pth model to HuggingFace model')
parser.add_argument('config', help='config file name or path.')
parser.add_argument('pth_model', help='pth model file')
parser.add_argument(
'save_dir', help='the directory to save HuggingFace model')
parser.add_argument(
'--fp32',
action='store_true',
help='Save LLM in fp32. If not set, fp16 will be used by default.')
parser.add_argument(
'--max-shard-size',
type=str,
default='2GB',
help='Only applicable for LLM. The maximum size for '
'each sharded checkpoint.')
parser.add_argument(
'--safe-serialization',
action='store_true',
help='Indicate if using `safe_serialization`')
parser.add_argument(
'--save-format',
default='xtuner',
choices=('xtuner', 'official', 'huggingface'),
help='Only applicable for LLaVAModel. Indicate the save format.')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
args = parser.parse_args()
return args
def main():
args = parse_args()
# parse config
if not osp.isfile(args.config):
try:
args.config = cfgs_name_path[args.config]
except KeyError:
raise FileNotFoundError(f'Cannot find {args.config}')
# load config
cfg = Config.fromfile(args.config)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
model_name = cfg.model.type if isinstance(cfg.model.type,
str) else cfg.model.type.__name__
use_meta_init = True
if 'LLaVAModel' in model_name:
cfg.model.pretrained_pth = None
if args.save_format != 'xtuner':
use_meta_init = False
if 'Reward' in model_name:
use_meta_init = False
cfg.model.llm.pop('quantization_config', None)
if use_meta_init:
try:
# Initializing the model with meta-tensor can reduce unwanted
# memory usage.
with init_empty_weights():
with warnings.catch_warnings():
warnings.filterwarnings(
'ignore', message='.*non-meta.*', category=UserWarning)
model = BUILDER.build(cfg.model)
except NotImplementedError as e:
# Cannot initialize the model with meta tensor if the model is
# quantized.
if 'Cannot copy out of meta tensor' in str(e):
model = BUILDER.build(cfg.model)
else:
raise e
else:
model = BUILDER.build(cfg.model)
backend = get_file_backend(args.pth_model)
if isinstance(backend, PetrelBackend):
from xtuner.utils.fileio import patch_fileio
with patch_fileio():
state_dict = guess_load_checkpoint(args.pth_model)
else:
state_dict = guess_load_checkpoint(args.pth_model)
for name, param in tqdm(state_dict.items(), desc='Load State Dict'):
set_module_tensor_to_device(model, name, 'cpu', param)
model.llm.config.use_cache = True
print_log(f'Load PTH model from {args.pth_model}', 'current')
mkdir_or_exist(args.save_dir)
save_pretrained_kwargs = {
'max_shard_size': args.max_shard_size,
'safe_serialization': args.safe_serialization
}
model.to_hf(
cfg=cfg,
save_dir=args.save_dir,
fp32=args.fp32,
save_pretrained_kwargs=save_pretrained_kwargs,
save_format=args.save_format)
shutil.copyfile(args.config, osp.join(args.save_dir, 'xtuner_config.py'))
print_log('All done!', 'current')
if __name__ == '__main__':
main()