ConsisID

Running on Zero

App Files Files Community

ConsisID / models /eva_clip /utils_qformer.py

BestWishYsh

Upload 57 files

c32f190 verified 16 days ago

raw

history blame contribute delete

5.64 kB

	import importlib
	import math
	import os
	import random

	import cv2
	import numpy as np
	import torch
	import torch.nn.functional as F
	from torchvision.utils import make_grid
	from transformers import PretrainedConfig


	def seed_everything(seed):
	os.environ["PL_GLOBAL_SEED"] = str(seed)
	random.seed(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)
	torch.cuda.manual_seed_all(seed)


	def is_torch2_available():
	return hasattr(F, "scaled_dot_product_attention")


	def instantiate_from_config(config):
	if "target" not in config:
	if config == '__is_first_stage__' or config == "__is_unconditional__":
	return None
	raise KeyError("Expected key `target` to instantiate.")
	return get_obj_from_str(config["target"])(**config.get("params", {}))


	def get_obj_from_str(string, reload=False):
	module, cls = string.rsplit(".", 1)
	if reload:
	module_imp = importlib.import_module(module)
	importlib.reload(module_imp)
	return getattr(importlib.import_module(module, package=None), cls)


	def drop_seq_token(seq, drop_rate=0.5):
	idx = torch.randperm(seq.size(1))
	num_keep_tokens = int(len(idx) * (1 - drop_rate))
	idx = idx[:num_keep_tokens]
	seq = seq[:, idx]
	return seq


	def import_model_class_from_model_name_or_path(
	pretrained_model_name_or_path: str, revision: str, subfolder: str = "text_encoder"
	):
	text_encoder_config = PretrainedConfig.from_pretrained(
	pretrained_model_name_or_path, subfolder=subfolder, revision=revision
	)
	model_class = text_encoder_config.architectures[0]

	if model_class == "CLIPTextModel":
	from transformers import CLIPTextModel

	return CLIPTextModel
	elif model_class == "CLIPTextModelWithProjection": # noqa RET505
	from transformers import CLIPTextModelWithProjection

	return CLIPTextModelWithProjection
	else:
	raise ValueError(f"{model_class} is not supported.")


	def resize_numpy_image_long(image, resize_long_edge=768):
	h, w = image.shape[:2]
	if max(h, w) <= resize_long_edge:
	return image
	k = resize_long_edge / max(h, w)
	h = int(h * k)
	w = int(w * k)
	image = cv2.resize(image, (w, h), interpolation=cv2.INTER_LANCZOS4)
	return image


	# from basicsr
	def img2tensor(imgs, bgr2rgb=True, float32=True):
	"""Numpy array to tensor.

	Args:
	imgs (list[ndarray] \| ndarray): Input images.
	bgr2rgb (bool): Whether to change bgr to rgb.
	float32 (bool): Whether to change to float32.

	Returns:
	list[tensor] \| tensor: Tensor images. If returned results only have
	one element, just return tensor.
	"""

	def _totensor(img, bgr2rgb, float32):
	if img.shape[2] == 3 and bgr2rgb:
	if img.dtype == 'float64':
	img = img.astype('float32')
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	img = torch.from_numpy(img.transpose(2, 0, 1))
	if float32:
	img = img.float()
	return img

	if isinstance(imgs, list):
	return [_totensor(img, bgr2rgb, float32) for img in imgs]
	return _totensor(imgs, bgr2rgb, float32)


	def tensor2img(tensor, rgb2bgr=True, out_type=np.uint8, min_max=(0, 1)):
	"""Convert torch Tensors into image numpy arrays.

	After clamping to [min, max], values will be normalized to [0, 1].

	Args:
	tensor (Tensor or list[Tensor]): Accept shapes:
	1) 4D mini-batch Tensor of shape (B x 3/1 x H x W);
	2) 3D Tensor of shape (3/1 x H x W);
	3) 2D Tensor of shape (H x W).
	Tensor channel should be in RGB order.
	rgb2bgr (bool): Whether to change rgb to bgr.
	out_type (numpy type): output types. If ``np.uint8``, transform outputs
	to uint8 type with range [0, 255]; otherwise, float type with
	range [0, 1]. Default: ``np.uint8``.
	min_max (tuple[int]): min and max values for clamp.

	Returns:
	(Tensor or list): 3D ndarray of shape (H x W x C) OR 2D ndarray of
	shape (H x W). The channel order is BGR.
	"""
	if not (torch.is_tensor(tensor) or (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))):
	raise TypeError(f'tensor or list of tensors expected, got {type(tensor)}')

	if torch.is_tensor(tensor):
	tensor = [tensor]
	result = []
	for _tensor in tensor:
	_tensor = _tensor.squeeze(0).float().detach().cpu().clamp_(*min_max)
	_tensor = (_tensor - min_max[0]) / (min_max[1] - min_max[0])

	n_dim = _tensor.dim()
	if n_dim == 4:
	img_np = make_grid(_tensor, nrow=int(math.sqrt(_tensor.size(0))), normalize=False).numpy()
	img_np = img_np.transpose(1, 2, 0)
	if rgb2bgr:
	img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
	elif n_dim == 3:
	img_np = _tensor.numpy()
	img_np = img_np.transpose(1, 2, 0)
	if img_np.shape[2] == 1: # gray image
	img_np = np.squeeze(img_np, axis=2)
	else:
	if rgb2bgr:
	img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
	elif n_dim == 2:
	img_np = _tensor.numpy()
	else:
	raise TypeError(f'Only support 4D, 3D or 2D tensor. But received with dimension: {n_dim}')
	if out_type == np.uint8:
	# Unlike MATLAB, numpy.unit8() WILL NOT round by default.
	img_np = (img_np * 255.0).round()
	img_np = img_np.astype(out_type)
	result.append(img_np)
	if len(result) == 1:
	result = result[0]
	return result