Spaces:

mikesolar
/

HivisionIDPhotos

Runtime error

App Files Files Community

HivisionIDPhotos / hivisionai /hycv /utils.py

mikesolar

Upload folder using huggingface_hub

d86aa1d over 1 year ago

raw

history blame contribute delete

19.6 kB

	"""
	本文件存放一些自制的简单的图像处理函数
	"""
	from PIL import Image
	import cv2
	import numpy as np
	import math
	import warnings
	import csv
	import glob


	def cover_mask(image_path, mask_path, alpha=0.85, rate=0.1, if_save=True):
	"""
	在图片右下角盖上水印
	:param image_path:
	:param mask_path: 水印路径，以PNG方式读取
	:param alpha：不透明度，默认为0.85
	:param rate：水印比例，越小水印也越小，默认为0.1
	:param if_save: 是否将裁剪后的图片保存，如果为True，则保存并返回新图路径，否则不保存，返回截取后的图片对象
	:return: 新的图片路径
	"""
	# 生成新的图片路径，我们默认图片后缀存在且必然包含“.”
	path_len = len(image_path)
	index = 0
	for index in range(path_len - 1, -1, -1):
	if image_path[index] == ".":
	break
	if 3 >= path_len - index >= 6:
	raise TypeError("输入的图片格式有误！")
	new_path = image_path[0:index] + "_with_mask" + image_path[index:path_len]
	# 以png方式读取水印图
	mask = Image.open(mask_path).convert('RGBA')
	mask_h, mask_w = mask.size
	# 以png的方式读取原图
	im = Image.open(image_path).convert('RGBA')
	# 我采取的策略是，先拷贝一张原图im为base作为基底，然后在im上利用paste函数添加水印
	# 此时的水印是完全不透明的，我需要利用blend函数内置参数alpha进行不透明度调整
	base = im.copy()
	# layer = Image.new('RGBA', im.size, (0, 0, 0, ))
	# tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
	h, w = im.size
	# 根据原图大小缩放水印图
	mask = mask.resize((int(ratemath.sqrt(whmask_h/mask_w)), int(ratemath.sqrt(whmask_w/mask_h))), Image.ANTIALIAS)
	mh, mw = mask.size
	r, g, b, a = mask.split()
	im.paste(mask, (h-mh, w-mw), mask=a)
	# im.show()
	out = Image.blend(base, im, alpha=alpha).convert('RGB')
	# out = Image.alpha_composite(im, layer).convert('RGB')
	if if_save:
	out.save(new_path)
	return new_path
	else:
	return out

	def check_image(image) ->np.ndarray:
	"""
	判断某一对象是否为图像/矩阵类型，最终返回图像/矩阵
	"""
	if not isinstance(image, np.ndarray):
	image = cv2.imread(image, cv2.IMREAD_UNCHANGED)
	return image

	def get_box(image) -> list:
	"""
	这是一个简单的扣图后图像定位函数，不考虑噪点影响
	我们使用遍历的方法，碰到非透明点以后立即返回位置坐标
	:param image:图像信息，可以是图片路径，也可以是已经读取后的图像
	如果传入的是图片路径，我会首先通过读取图片、二值化，然后再进行图像处理
	如果传入的是图像，直接处理，不会二值化
	:return: 回传一个列表，分别是图像的上下（y）左右（x）自个值
	"""
	image = check_image(image)
	height, width, _ = image.shape
	try:
	b, g, r, a = cv2.split(image)
	# 二值化处理
	a = (a > 127).astype(np.int_)
	except ValueError:
	# 说明传入的是无透明图层的图像，直接返回图像尺寸
	warnings.warn("你传入了一张非四通道格式的图片！")
	return [0, height, 0, width]
	flag1, flag2 = 0, 0
	box = [0, 0, 0, 0] # 上下左右
	# 采用两面夹击战术，使用flag1和2确定两面的裁剪程度
	# 先得到上下
	for i in range(height):
	for j in range(width):
	if flag1 == 0 and a[i][j] != 0:
	flag1 = 1
	box[0] = i
	if flag2 == 0 and a[height - i -1][j] != 0:
	flag2 = 1
	box[1] = height - i - 1
	if flag2 * flag1 == 1:
	break
	# 再得到左右
	flag1, flag2 = 0, 0
	for j in range(width):
	for i in range(height):
	if flag1 == 0 and a[i][j] != 0:
	flag1 = 1
	box[2] = j
	if flag2 == 0 and a[i][width - j - 1] != 0:
	flag2 = 1
	box[3] = width - j - 1
	if flag2 * flag1 == 1:
	break
	return box

	def filtering(img, f, x, y, x_max, y_max, x_min, y_min, area=0, noise_size=50) ->tuple:
	"""
	filtering将使用递归的方法得到一个连续图像（这个连续矩阵必须得是单通道的）的范围(坐标)
	:param img: 传入的矩阵
	:param f: 和img相同尺寸的全零矩阵，用于标记递归递归过的点
	:param x: 当前递归到的x轴坐标
	:param y: 当前递归到的y轴坐标
	:param x_max: 递归过程中x轴坐标的最大值
	:param y_max: 递归过程中y轴坐标的最大值
	:param x_min: 递归过程中x轴坐标的最小值
	:param y_min: 递归过程中y轴坐标的最小值
	:param area: 当前递归区域面积大小
	:param noise_size: 最大递归区域面积大小，当area大于noise_size时，函数返回(0, 1)
	:return: 分两种情况，当area大于noise_size时，函数返回(0, 1)，当area小于等于noise_size时，函数返回(box, 0)
	其中box是连续图像的坐标和像素点面积（上下左右，面积）
	理论上来讲，我们可以用这个函数递归出任一图像的形状和坐标，但是从计算机内存、计算速度上考虑，这并不是一个好的选择
	所以这个函数一般用于判断和过滤噪点
	"""
	dire_dir = [(1, 0), (-1, 0), (0, 1), (0, -1), (1, 1), (1, -1), (-1, -1), (-1, 1)]
	height, width = img.shape
	f[x][y] = 1
	for dire in dire_dir:
	delta_x, delta_y = dire
	tmp_x, tmp_y = (x + delta_x, y + delta_y)
	if height > tmp_x >= 0 and width > tmp_y >= 0:
	if img[tmp_x][tmp_y] != 0 and f[tmp_x][tmp_y] == 0:
	f[tmp_x][tmp_y] = 1
	# cv2.imshow("test", f)
	# cv2.waitKey(3)
	area += 1
	if area > noise_size:
	return 0, 1
	else:
	x_max = tmp_x if tmp_x > x_max else x_max
	x_min = tmp_x if tmp_x < x_min else x_min
	y_max = tmp_y if tmp_y > y_max else y_max
	y_min = tmp_y if tmp_y < y_min else y_min
	box, flag = filtering(img, f, tmp_x, tmp_y, x_max, y_max, x_min, y_min, area=area, noise_size=noise_size)
	if flag == 1:
	return 0, 1
	else:
	(x_max, x_min, y_max, y_min, area) = box
	return [x_min, x_max, y_min, y_max, area], 0


	def get_box_pro(image: np.ndarray, model: int = 1, correction_factor=None, thresh: int = 127):
	"""
	本函数能够实现输入一张四通道图像，返回图像中最大连续非透明面积的区域的矩形坐标
	本函数将采用opencv内置函数来解析整个图像的mask，并提供一些参数，用于读取图像的位置信息
	Args:
	image: 四通道矩阵图像
	model: 返回值模式
	correction_factor: 提供一些边缘扩张接口，输入格式为list或者int:[up, down, left, right]。
	举个例子，假设我们希望剪切出的矩形框左边能够偏左1个像素，则输入[0, 0, 1, 0]；
	如果希望右边偏右1个像素，则输入[0, 0, 0, 1]
	如果输入为int，则默认只会对左右两边做拓展，比如输入2，则和[0, 0, 2, 2]是等效的
	thresh: 二值化阈值，为了保持一些羽化效果，thresh必须要小
	Returns:
	model为1时，将会返回切割出的矩形框的四个坐标点信息
	model为2时，将会返回矩形框四边相距于原图四边的距离
	"""
	# ------------ 数据格式规范部分 -------------- #
	# 输入必须为四通道
	if correction_factor is None:
	correction_factor = [0, 0, 0, 0]
	if not isinstance(image, np.ndarray) or len(cv2.split(image)) != 4:
	raise TypeError("输入的图像必须为四通道np.ndarray类型矩阵!")
	# correction_factor规范化
	if isinstance(correction_factor, int):
	correction_factor = [0, 0, correction_factor, correction_factor]
	elif not isinstance(correction_factor, list):
	raise TypeError("correction_factor 必须为int或者list类型!")
	# ------------ 数据格式规范完毕 -------------- #
	# 分离mask
	_, _, _, mask = cv2.split(image)
	# mask二值化处理
	_, mask = cv2.threshold(mask, thresh=thresh, maxval=255, type=0)
	contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
	temp = np.ones(image.shape, np.uint8)*255
	cv2.drawContours(temp, contours, -1, (0, 0, 255), -1)
	contours_area = []
	for cnt in contours:
	contours_area.append(cv2.contourArea(cnt))
	idx = contours_area.index(max(contours_area))
	x, y, w, h = cv2.boundingRect(contours[idx]) # 框出图像
	# ------------ 开始输出数据 -------------- #
	height, width, _ = image.shape
	y_up = y - correction_factor[0] if y - correction_factor[0] >= 0 else 0
	y_down = y + h + correction_factor[1] if y + h + correction_factor[1] < height else height - 1
	x_left = x - correction_factor[2] if x - correction_factor[2] >= 0 else 0
	x_right = x + w + correction_factor[3] if x + w + correction_factor[3] < width else width - 1
	if model == 1:
	# model=1,将会返回切割出的矩形框的四个坐标点信息
	return [y_up, y_down, x_left, x_right]
	elif model == 2:
	# model=2, 将会返回矩形框四边相距于原图四边的距离
	return [y_up, height - y_down, x_left, width - x_right]
	else:
	raise EOFError("请选择正确的模式！")


	def cut(image_path:str, box:list, if_save=True):
	"""
	根据box，裁剪对应的图片区域后保存
	:param image_path: 原图路径
	:param box: 坐标列表，上下左右
	:param if_save：是否将裁剪后的图片保存，如果为True，则保存并返回新图路径，否则不保存，返回截取后的图片对象
	:return: 新图路径或者是新图对象
	"""
	index = 0
	path_len = len(image_path)
	up, down, left, right = box
	image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
	new_image = image[up: down, left: right]
	if if_save:
	for index in range(path_len - 1, -1, -1):
	if image_path[index] == ".":
	break
	if 3 >= path_len - index >= 6:
	raise TypeError("输入的图片格式有误！")
	new_path = image_path[0:index] + "_cut" + image_path[index:path_len]
	cv2.imwrite(new_path, new_image, [cv2.IMWRITE_PNG_COMPRESSION, 9])
	return new_path
	else:
	return new_image


	def zoom_image_without_change_size(image:np.ndarray, zoom_rate, interpolation=cv2.INTER_NEAREST) ->np.ndarray:
	"""
	在不改变原图大小的情况下，对图像进行放大，目前只支持从图像中心放大
	:param image：传入的图像对象
	:param zoom_rate：放大比例，单位为倍（初始为1倍）
	:param interpolation：插值方式，与opencv的resize内置参数相对应，默认为最近邻插值
	:return: 裁剪后的图像实例
	"""
	height, width, _ = image.shape
	if zoom_rate < 1:
	# zoom_rate不能小于1
	raise ValueError("zoom_rate不能小于1！")
	height_tmp = int(height * zoom_rate)
	width_tmp = int(width * zoom_rate)
	image_tmp = cv2.resize(image, (height_tmp, width_tmp), interpolation=interpolation)
	# 定位一下被裁剪的位置，实际上是裁剪框的左上角的点的坐标
	delta_x = (width_tmp - width) // 2 # 横向
	delta_y = (height_tmp - height) // 2 # 纵向
	return image_tmp[delta_y : delta_y + height, delta_x : delta_x + width]


	def filedir2csv(scan_filedir, csv_filedir):
	file_list = glob.glob(scan_filedir+"/*")

	with open(csv_filedir, "w") as csv_file:
	writter = csv.writer(csv_file)
	for file_dir in file_list:
	writter.writerow([file_dir])

	print("filedir2csv success!")


	def full_ties(image_pre:np.ndarray):
	height, width = image_pre.shape
	# 先膨胀
	kernel = np.ones((5, 5), dtype=np.uint8)
	dilate = cv2.dilate(image_pre, kernel, 1)
	# cv2.imshow("dilate", dilate)
	def FillHole(image):
	# 复制 image 图像
	im_floodFill = image.copy()
	# Mask 用于 floodFill，官方要求长宽+2
	mask = np.zeros((height + 2, width + 2), np.uint8)
	seedPoint = (0, 0)
	# floodFill函数中的seedPoint对应像素必须是背景
	is_break = False
	for i in range(im_floodFill.shape[0]):
	for j in range(im_floodFill.shape[1]):
	if (im_floodFill[i][j] == 0):
	seedPoint = (i, j)
	is_break = True
	break
	if (is_break):
	break
	# 得到im_floodFill 255填充非孔洞值
	cv2.floodFill(im_floodFill, mask, seedPoint, 255)
	# cv2.imshow("tmp1", im_floodFill)
	# 得到im_floodFill的逆im_floodFill_inv
	im_floodFill_inv = cv2.bitwise_not(im_floodFill)
	# cv2.imshow("tmp2", im_floodFill_inv)
	# 把image、im_floodFill_inv这两幅图像结合起来得到前景
	im_out = image \| im_floodFill_inv
	return im_out
	# 洪流算法填充
	image_floodFill = FillHole(dilate)
	# 填充图和原图合并
	image_final = image_floodFill \| image_pre
	# 再腐蚀
	kernel = np.ones((5, 5), np.uint8)
	erosion= cv2.erode(image_final, kernel, iterations=6)
	# cv2.imshow("erosion", erosion)
	# 添加高斯模糊
	blur = cv2.GaussianBlur(erosion, (5, 5), 2.5)
	# cv2.imshow("blur", blur)
	# image_final = merge_image(image_pre, erosion)
	# 再与原图合并
	image_final = image_pre \| blur
	# cv2.imshow("final", image_final)
	return image_final


	def cut_BiggestAreas(image):
	# 裁剪出整张图轮廓最大的部分
	def find_BiggestAreas(image_pre):
	# 定义一个三乘三的卷积核
	kernel = np.ones((3, 3), dtype=np.uint8)
	# 将输入图片膨胀
	# dilate = cv2.dilate(image_pre, kernel, 3)
	# cv2.imshow("dilate", dilate)
	# 将输入图片二值化
	_, thresh = cv2.threshold(image_pre, 127, 255, cv2.THRESH_BINARY)
	# cv2.imshow("thresh", thresh)
	# 将二值化后的图片膨胀
	dilate_afterThresh = cv2.dilate(thresh, kernel, 5)
	# cv2.imshow("thresh_afterThresh", dilate_afterThresh)
	# 找轮廓
	contours_, hierarchy = cv2.findContours(dilate_afterThresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
	# 识别出最大的轮廓
	# 需要注意的是,在低版本的findContours当中返回的结果是tuple,不支持pop,所以需要将其转为pop
	contours = [x for x in contours_]
	area = map(cv2.contourArea, contours)
	area_list = list(area)
	area_max = max(area_list)
	post = area_list.index(area_max)
	# 将最大的区域保留,其余全部填黑
	contours.pop(post)
	for i in range(len(contours)):
	cv2.drawContours(image_pre, contours, i, 0, cv2.FILLED)
	# cv2.imshow("cut", image_pre)
	return image_pre
	b, g, r, a = cv2.split(image)
	a_new = find_BiggestAreas(a)
	new_image = cv2.merge((b, g, r, a_new))
	return new_image


	def locate_neck(image:np.ndarray, proportion):
	"""
	根据输入的图片(四通道)和proportion(自上而下)的比例,定位到相应的y点,然后向内收缩,直到两边的像素点不透明
	"""
	if image.shape[-1] != 4:
	raise TypeError("请输入一张png格式的四通道图片!")
	if proportion > 1 or proportion <=0:
	raise ValueError("proportion 必须在0~1之间!")
	_, _, _, a = cv2.split(image)
	height, width = a.shape
	_, a = cv2.threshold(a, 127, 255, cv2.THRESH_BINARY)
	y = int(height * proportion)
	x = 0
	for x in range(width):
	if a[y][x] == 255:
	break
	left = (y, x)
	for x in range(width - 1, -1 , -1):
	if a[y][x] == 255:
	break
	right = (y, x)
	return left, right, right[1] - left[1]


	def get_cutbox_image(input_image):
	height, width = input_image.shape[0], input_image.shape[1]
	y_top, y_bottom, x_left, x_right = get_box_pro(input_image, model=2)
	result_image = input_image[y_top:height - y_bottom, x_left:width - x_right]
	return result_image


	def brightnessAdjustment(image: np.ndarray, bright_factor: int=0):
	"""
	图像亮度调节
	:param image: 输入的图像矩阵
	:param bright_factor:亮度调节因子，可正可负,没有范围限制
	当bright_factor ---> +无穷时，图像全白
	当bright_factor ---> -无穷时，图像全黑
	:return: 处理后的图片
	"""
	res = np.uint8(np.clip(np.int16(image) + bright_factor, 0, 255))
	return res


	def contrastAdjustment(image: np.ndarray, contrast_factor: int = 0):
	"""
	图像对比度调节,实际上调节对比度的同时对亮度也有一定的影响
	:param image: 输入的图像矩阵
	:param contrast_factor:亮度调节因子，可正可负,范围在[-100, +100]之间
	当contrast_factor=-100时，图像变为灰色
	:return: 处理后的图片
	"""
	contrast_factor = 1 + min(contrast_factor, 100) / 100 if contrast_factor > 0 else 1 + max(contrast_factor,
	-100) / 100
	image_b = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	bright_ = image_b.mean()
	res = np.uint8(np.clip(contrast_factor * (np.int16(image) - bright_) + bright_, 0, 255))
	return res


	class CV2Bytes(object):
	@staticmethod
	def byte_cv2(image_byte, flags=cv2.IMREAD_COLOR) ->np.ndarray:
	"""
	将传入的字节流解码为图像, 当flags为 -1 的时候为无损解码
	"""
	np_arr = np.frombuffer(image_byte,np.uint8)
	image = cv2.imdecode(np_arr, flags)
	return image

	@staticmethod
	def cv2_byte(image:np.ndarray, imageType:str=".jpg"):
	"""
	将传入的图像解码为字节流
	"""
	_, image_encode = cv2.imencode(imageType, image)
	image_byte = image_encode.tobytes()
	return image_byte


	def comb2images(src_white:np.ndarray, src_black:np.ndarray, mask:np.ndarray) -> np.ndarray:
	"""输入两张图片，将这两张图片根据输入的mask进行叠加处理
	这里并非简单的cv2.add(),因为也考虑了羽化部分，所以需要进行一些其他的处理操作
	核心的算法为: dst = (mask * src_white + (1 - mask) * src_black).astype(np.uint8)

	Args:
	src_white (np.ndarray): 第一张图像，代表的是mask中的白色区域，三通道
	src_black (np.ndarray): 第二张图像，代表的是mask中的黑色区域，三通道
	mask (np.ndarray): mask.输入为单通道，后续会归一化并转为三通道
	需要注意的是这三者的尺寸应该是一样的

	Returns:
	np.ndarray: 返回的三通道图像
	"""
	# 函数内部不检查相关参数是否一样，使用的时候需要注意一下
	mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR).astype(np.float32) / 255
	return (mask * src_white + (1 - mask) * src_black).astype(np.uint8)