dev(narugo): init this project
history blame
6.95 kB
import json
import logging
import os
import re
import shutil
from functools import lru_cache
from typing import Optional, List, Tuple, Mapping
import gradio as gr
import numpy as np
from PIL import Image
from hbutils.system import pip_install
from huggingface_hub import hf_hub_download
def _ensure_onnxruntime():
import onnxruntime
except (ImportError, ModuleNotFoundError):
logging.warning('Onnx runtime not installed, preparing to install ...')
if shutil.which('nvidia-smi'):'Installing onnxruntime-gpu ...')
pip_install(['onnxruntime-gpu'], silent=True)
else:'Installing onnxruntime (cpu) ...')
pip_install(['onnxruntime'], silent=True)
from onnxruntime import get_available_providers, get_all_providers, InferenceSession, SessionOptions, \
alias = {
'gpu': "CUDAExecutionProvider",
"trt": "TensorrtExecutionProvider",
def get_onnx_provider(provider: Optional[str] = None):
if not provider:
if "CUDAExecutionProvider" in get_available_providers():
return "CUDAExecutionProvider"
return "CPUExecutionProvider"
elif provider.lower() in alias:
return alias[provider.lower()]
for p in get_all_providers():
if provider.lower() == p.lower() or f'{provider}ExecutionProvider'.lower() == p.lower():
return p
raise ValueError(f'One of the {get_all_providers()!r} expected, '
f'but unsupported provider {provider!r} found.')
def resize(pic: Image.Image, size: int, keep_ratio: float = True) -> Image.Image:
if not keep_ratio:
target_size = (size, size)
min_edge = min(pic.size)
target_size = (
int(pic.size[0] / min_edge * size),
int(pic.size[1] / min_edge * size),
target_size = (
(target_size[0] // 4) * 4,
(target_size[1] // 4) * 4,
return pic.resize(target_size, resample=Image.Resampling.BILINEAR)
def to_tensor(pic: Image.Image):
img: np.ndarray = np.array(pic, np.uint8, copy=True)
img = img.reshape(pic.size[1], pic.size[0], len(pic.getbands()))
# put it from HWC to CHW format
img = img.transpose((2, 0, 1))
return img.astype(np.float32) / 255
def fill_background(pic: Image.Image, background: str = 'white') -> Image.Image:
if pic.mode == 'RGB':
return pic
if pic.mode != 'RGBA':
pic = pic.convert('RGBA')
background = background or 'white'
result ='RGBA', pic.size, background)
result.paste(pic, (0, 0), pic)
return result.convert('RGB')
def image_to_tensor(pic: Image.Image, size: int = 512, keep_ratio: float = True, background: str = 'white'):
return to_tensor(resize(fill_background(pic, background), size, keep_ratio))
def get_onnx_model_file(name='TResnet-D-FLq_ema_6-10000.onnx'):
return hf_hub_download(
def _open_onnx_model(ckpt: str, provider: str) -> InferenceSession:
options = SessionOptions()
options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
if provider == "CPUExecutionProvider":
options.intra_op_num_threads = os.cpu_count()'Model {ckpt!r} loaded with provider {provider!r}')
return InferenceSession(ckpt, options, [provider])
def load_classes() -> List[str]:
classes_file = hf_hub_download(
with open(classes_file, 'r', encoding='utf-8') as f:
return json.load(f)
def get_tags_from_image(pic: Image.Image, threshold: float = 0.7, size: int = 512, keep_ratio: bool = False):
real_input = image_to_tensor(pic, size, keep_ratio)
real_input = real_input.reshape(1, *real_input.shape)
model = _open_onnx_model(get_onnx_model_file(), get_onnx_provider('cpu'))
native_output, =['output'], {'input': real_input})
output = (1 / (1 + np.exp(-native_output))).reshape(-1)
tags = load_classes()
pairs = sorted([(tags[i], ratio) for i, ratio in enumerate(output)], key=lambda x: (-x[1], x[0]))
return {tag: float(ratio) for tag, ratio in pairs if ratio >= threshold}
RE_SPECIAL = re.compile(r'([\\()])')
def image_to_mldanbooru_tags(pic: Image.Image, threshold: float, size: int, keep_ratio: bool,
use_spaces: bool, use_escape: bool, include_ranks: bool, score_descend: bool) \
-> Tuple[str, Mapping[str, float]]:
filtered_tags = get_tags_from_image(pic, threshold, size, keep_ratio)
text_items = []
tags_pairs = filtered_tags.items()
if score_descend:
tags_pairs = sorted(tags_pairs, key=lambda x: (-x[1], x[0]))
for tag, score in tags_pairs:
tag_outformat = tag
if use_spaces:
tag_outformat = tag_outformat.replace('_', ' ')
if use_escape:
tag_outformat = re.sub(RE_SPECIAL, r'\\\1', tag_outformat)
if include_ranks:
tag_outformat = f"({tag_outformat}:{score:.3f})"
output_text = ', '.join(text_items)
return output_text, filtered_tags
if __name__ == '__main__':
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr_input_image = gr.Image(type='pil', label='Original Image')
with gr.Row():
gr_threshold = gr.Slider(0.0, 1.0, 0.7, label='Tagging Confidence Threshold')
gr_image_size = gr.Slider(128, 960, 512, step=32, label='Image for Recognition')
gr_keep_ratio = gr.Checkbox(value=False, label='Keep the Ratio')
with gr.Row():
gr_space = gr.Checkbox(value=False, label='Use Space Instead Of _')
gr_escape = gr.Checkbox(value=True, label='Use Text Escape')
gr_confidence = gr.Checkbox(value=False, label='Keep Confidences')
gr_order = gr.Checkbox(value=True, label='Descend By Confidence')
gr_btn_submit = gr.Button(value='Tagging', variant='primary')
with gr.Column():
with gr.Tabs():
with gr.Tab("Tags"):
gr_tags = gr.Label(label='Tags')
with gr.Tab("Exported Text"):
gr_output_text = gr.TextArea(label='Exported Text')
gr_input_image, gr_threshold, gr_image_size, gr_keep_ratio,
gr_space, gr_escape, gr_confidence, gr_order
outputs=[gr_output_text, gr_tags],