Spaces:
Runtime error
Runtime error
# #!/usr/bin/env python3 | |
# | |
# import argparse | |
# import os | |
# import time | |
# import numpy as np | |
# import nvidia_smi | |
# import psutil | |
# import torch | |
# | |
# from model_manager import ModelManager | |
# from schema import Config, HDStrategy, SDSampler | |
# | |
# try: | |
# torch._C._jit_override_can_fuse_on_cpu(False) | |
# torch._C._jit_override_can_fuse_on_gpu(False) | |
# torch._C._jit_set_texpr_fuser_enabled(False) | |
# torch._C._jit_set_nvfuser_enabled(False) | |
# except: | |
# pass | |
# | |
# NUM_THREADS = str(4) | |
# | |
# os.environ["OMP_NUM_THREADS"] = NUM_THREADS | |
# os.environ["OPENBLAS_NUM_THREADS"] = NUM_THREADS | |
# os.environ["MKL_NUM_THREADS"] = NUM_THREADS | |
# os.environ["VECLIB_MAXIMUM_THREADS"] = NUM_THREADS | |
# os.environ["NUMEXPR_NUM_THREADS"] = NUM_THREADS | |
# if os.environ.get("CACHE_DIR"): | |
# os.environ["TORCH_HOME"] = os.environ["CACHE_DIR"] | |
# | |
# | |
# def run_model(model, size): | |
# # RGB | |
# image = np.random.randint(0, 256, (size[0], size[1], 3)).astype(np.uint8) | |
# mask = np.random.randint(0, 255, size).astype(np.uint8) | |
# | |
# config = Config( | |
# ldm_steps=2, | |
# hd_strategy=HDStrategy.ORIGINAL, | |
# hd_strategy_crop_margin=128, | |
# hd_strategy_crop_trigger_size=128, | |
# hd_strategy_resize_limit=128, | |
# prompt="a fox is sitting on a bench", | |
# sd_steps=5, | |
# sd_sampler=SDSampler.ddim | |
# ) | |
# model(image, mask, config) | |
# | |
# | |
# def benchmark(model, times: int, empty_cache: bool): | |
# sizes = [(512, 512)] | |
# | |
# nvidia_smi.nvmlInit() | |
# device_id = 0 | |
# handle = nvidia_smi.nvmlDeviceGetHandleByIndex(device_id) | |
# | |
# def format(metrics): | |
# return f"{np.mean(metrics):.2f} ± {np.std(metrics):.2f}" | |
# | |
# process = psutil.Process(os.getpid()) | |
# # 每个 size 给出显存和内存占用的指标 | |
# for size in sizes: | |
# torch.cuda.empty_cache() | |
# time_metrics = [] | |
# cpu_metrics = [] | |
# memory_metrics = [] | |
# gpu_memory_metrics = [] | |
# for _ in range(times): | |
# start = time.time() | |
# run_model(model, size) | |
# torch.cuda.synchronize() | |
# | |
# # cpu_metrics.append(process.cpu_percent()) | |
# time_metrics.append((time.time() - start) * 1000) | |
# memory_metrics.append(process.memory_info().rss / 1024 / 1024) | |
# gpu_memory_metrics.append(nvidia_smi.nvmlDeviceGetMemoryInfo(handle).used / 1024 / 1024) | |
# | |
# print(f"size: {size}".center(80, "-")) | |
# # print(f"cpu: {format(cpu_metrics)}") | |
# print(f"latency: {format(time_metrics)}ms") | |
# print(f"memory: {format(memory_metrics)} MB") | |
# print(f"gpu memory: {format(gpu_memory_metrics)} MB") | |
# | |
# nvidia_smi.nvmlShutdown() | |
# | |
# | |
# def get_args_parser(): | |
# parser = argparse.ArgumentParser() | |
# parser.add_argument("--name") | |
# parser.add_argument("--device", default="cuda", type=str) | |
# parser.add_argument("--times", default=10, type=int) | |
# parser.add_argument("--empty-cache", action="store_true") | |
# return parser.parse_args() | |
# | |
# | |
# if __name__ == "__main__": | |
# args = get_args_parser() | |
# device = torch.device(args.device) | |
# model = ModelManager( | |
# name=args.name, | |
# device=device, | |
# sd_run_local=True, | |
# disable_nsfw=True, | |
# sd_cpu_textencoder=True, | |
# hf_access_token="123" | |
# ) | |
# benchmark(model, args.times, args.empty_cache) | |