Spaces:

woolbot
/

amino-captcha-ocr

Runtime error

App Files Files Community

amino-captcha-ocr / captcha_processor.py

woolbot

Upload 4 files

e212778 over 1 year ago

raw

history blame contribute delete

No virus

4.06 kB

	import cv2
	from numpy import asarray as np_as_array
	from numpy import all as np_all


	class CaptchaProcessor:

	WHITE_RGB = (255, 255, 255)

	def __init__(self, data: bytes):
	self.img = cv2.imdecode(
	np_as_array(bytearray(data), dtype="uint8"),
	cv2.IMREAD_ANYCOLOR
	)

	def threshold(self):
	self.img = cv2.threshold(self.img, 0, 255, cv2.THRESH_OTSU)[1]

	def convert_color_space(self, target_space: int):
	self.img = cv2.cvtColor(self.img, target_space)

	def get_background_color(self) -> tuple:
	return tuple(self.img[0, 0])

	def resize(self, x: int, y: int):
	self.img = cv2.resize(self.img, (x, y))

	def save(self, name: str):
	cv2.imwrite(name, self.img)

	def get_letters_color(self) -> tuple:
	colors = []
	for y in range(self.img.shape[1]):
	for x in range(self.img.shape[0]):
	color = tuple(self.img[x, y])
	if color != self.WHITE_RGB: colors.append(color)
	return max(set(colors), key=colors.count)

	def replace_color(self, target: tuple, to: tuple):
	self.img[np_all(self.img == target, axis=-1)] = to

	def replace_colors(self, exception: tuple, to: tuple):
	self.img[np_all(self.img != exception, axis=-1)] = to

	def increase_contrast(self, alpha: float, beta: float):
	self.img = cv2.convertScaleAbs(self.img, alpha=alpha, beta=beta)

	def increase_letters_size(self, add_pixels: int):
	pixels = []
	for y in range(self.img.shape[1]):
	for x in range(self.img.shape[0]):
	if self.img[x, y] == 0: pixels.append((x, y))
	for y, x in pixels:
	for i in range(1, add_pixels + 1):
	self.img[y + i, x] = 0
	self.img[y - i, x] = 0
	self.img[y, x + i] = 0
	self.img[y, x - i] = 0
	self.img[y + i, x] = 0
	self.img[y - i, x] = 0
	self.img[y, x + i] = 0
	self.img[y, x - i] = 0

	# Отдаление символов друг от друга
	# Может многократно повысить точность, но я так и не придумал правильную реализацию
	def distance_letters(self, cf: float):
	pixels = []
	for y in range(self.img.shape[1]):
	for x in range(self.img.shape[0]):
	if self.img[x, y] == 0: pixels.append((x, y))
	for y, x in pixels:
	self.img[y, x] = 255
	center = self.img.shape[1] / 2
	z = self.img.shape[1] / x
	if z >= 2: self.img[y, x - int((900 // x) * cf)] = 0
	else: self.img[y, x + int((900 // x) * cf)] = 0

	def slice_letters(self):
	contours, hierarchy = cv2.findContours(self.img, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
	letter_image_regions = []
	letters = []
	for idx, contour in enumerate(contours):
	if hierarchy[0][idx][3] != 0: continue
	(x, y, w, h) = cv2.boundingRect(contour)
	if w / h > 1.5:
	half_width = int(w / 2)
	letter_image_regions.append((idx, x, y, half_width, h))
	letter_image_regions.append((idx, x + half_width, y, half_width, h))
	else:
	letter_image_regions.append((idx, x, y, w, h))
	letter_image_regions = sorted(letter_image_regions, key=lambda z: z[1])
	for _, x, y, w, h in letter_image_regions:
	frame = self.img[y:y + h, x:x + w]
	if frame.shape[1] > 35: continue
	frame = cv2.resize(frame, (20, 40))
	frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
	letters.append(frame)
	return letters

	def show(self):
	cv2.imshow("Captcha Processor", self.img)
	cv2.waitKey(0)

	@classmethod
	def from_file_name(cls, name: str):
	file = open(name, "rb")
	processor = cls(file.read())
	file.close()
	return processor