# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp
import shutil
import warnings

from accelerate import init_empty_weights
from accelerate.utils import set_module_tensor_to_device
from mmengine import print_log
from mmengine.config import Config, DictAction
from mmengine.fileio import PetrelBackend, get_file_backend
from mmengine.utils import mkdir_or_exist
from tqdm import tqdm

from xtuner.configs import cfgs_name_path
from xtuner.model.utils import guess_load_checkpoint
from xtuner.registry import BUILDER


def parse_args():
    parser = argparse.ArgumentParser(
        description='Convert the pth model to HuggingFace model')
    parser.add_argument('config', help='config file name or path.')
    parser.add_argument('pth_model', help='pth model file')
    parser.add_argument(
        'save_dir', help='the directory to save HuggingFace model')
    parser.add_argument(
        '--fp32',
        action='store_true',
        help='Save LLM in fp32. If not set, fp16 will be used by default.')
    parser.add_argument(
        '--max-shard-size',
        type=str,
        default='2GB',
        help='Only applicable for LLM. The maximum size for '
        'each sharded checkpoint.')
    parser.add_argument(
        '--safe-serialization',
        action='store_true',
        help='Indicate if using `safe_serialization`')
    parser.add_argument(
        '--save-format',
        default='xtuner',
        choices=('xtuner', 'official', 'huggingface'),
        help='Only applicable for LLaVAModel. Indicate the save format.')
    parser.add_argument(
        '--cfg-options',
        nargs='+',
        action=DictAction,
        help='override some settings in the used config, the key-value pair '
        'in xxx=yyy format will be merged into config file. If the value to '
        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
        'Note that the quotation marks are necessary and that no white space '
        'is allowed.')
    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    # parse config
    if not osp.isfile(args.config):
        try:
            args.config = cfgs_name_path[args.config]
        except KeyError:
            raise FileNotFoundError(f'Cannot find {args.config}')

    # load config
    cfg = Config.fromfile(args.config)
    if args.cfg_options is not None:
        cfg.merge_from_dict(args.cfg_options)

    model_name = cfg.model.type if isinstance(cfg.model.type,
                                              str) else cfg.model.type.__name__
    use_meta_init = True

    if 'LLaVAModel' in model_name:
        cfg.model.pretrained_pth = None
        if args.save_format != 'xtuner':
            use_meta_init = False
    if 'Reward' in model_name:
        use_meta_init = False
        cfg.model.llm.pop('quantization_config', None)

    if use_meta_init:
        try:
            # Initializing the model with meta-tensor can reduce unwanted
            # memory usage.
            with init_empty_weights():
                with warnings.catch_warnings():
                    warnings.filterwarnings(
                        'ignore', message='.*non-meta.*', category=UserWarning)
                    model = BUILDER.build(cfg.model)
        except NotImplementedError as e:
            # Cannot initialize the model with meta tensor if the model is
            # quantized.
            if 'Cannot copy out of meta tensor' in str(e):
                model = BUILDER.build(cfg.model)
            else:
                raise e
    else:
        model = BUILDER.build(cfg.model)

    backend = get_file_backend(args.pth_model)
    if isinstance(backend, PetrelBackend):
        from xtuner.utils.fileio import patch_fileio
        with patch_fileio():
            state_dict = guess_load_checkpoint(args.pth_model)
    else:
        state_dict = guess_load_checkpoint(args.pth_model)

    for name, param in tqdm(state_dict.items(), desc='Load State Dict'):
        set_module_tensor_to_device(model, name, 'cpu', param)

    model.llm.config.use_cache = True

    print_log(f'Load PTH model from {args.pth_model}', 'current')

    mkdir_or_exist(args.save_dir)

    save_pretrained_kwargs = {
        'max_shard_size': args.max_shard_size,
        'safe_serialization': args.safe_serialization
    }
    model.to_hf(
        cfg=cfg,
        save_dir=args.save_dir,
        fp32=args.fp32,
        save_pretrained_kwargs=save_pretrained_kwargs,
        save_format=args.save_format)

    shutil.copyfile(args.config, osp.join(args.save_dir, 'xtuner_config.py'))
    print_log('All done!', 'current')


if __name__ == '__main__':
    main()