amino-captcha-ocr / captcha_processor.py
woolbot's picture
Upload 4 files
e212778
raw
history blame contribute delete
No virus
4.06 kB
import cv2
from numpy import asarray as np_as_array
from numpy import all as np_all
class CaptchaProcessor:
WHITE_RGB = (255, 255, 255)
def __init__(self, data: bytes):
self.img = cv2.imdecode(
np_as_array(bytearray(data), dtype="uint8"),
cv2.IMREAD_ANYCOLOR
)
def threshold(self):
self.img = cv2.threshold(self.img, 0, 255, cv2.THRESH_OTSU)[1]
def convert_color_space(self, target_space: int):
self.img = cv2.cvtColor(self.img, target_space)
def get_background_color(self) -> tuple:
return tuple(self.img[0, 0])
def resize(self, x: int, y: int):
self.img = cv2.resize(self.img, (x, y))
def save(self, name: str):
cv2.imwrite(name, self.img)
def get_letters_color(self) -> tuple:
colors = []
for y in range(self.img.shape[1]):
for x in range(self.img.shape[0]):
color = tuple(self.img[x, y])
if color != self.WHITE_RGB: colors.append(color)
return max(set(colors), key=colors.count)
def replace_color(self, target: tuple, to: tuple):
self.img[np_all(self.img == target, axis=-1)] = to
def replace_colors(self, exception: tuple, to: tuple):
self.img[np_all(self.img != exception, axis=-1)] = to
def increase_contrast(self, alpha: float, beta: float):
self.img = cv2.convertScaleAbs(self.img, alpha=alpha, beta=beta)
def increase_letters_size(self, add_pixels: int):
pixels = []
for y in range(self.img.shape[1]):
for x in range(self.img.shape[0]):
if self.img[x, y] == 0: pixels.append((x, y))
for y, x in pixels:
for i in range(1, add_pixels + 1):
self.img[y + i, x] = 0
self.img[y - i, x] = 0
self.img[y, x + i] = 0
self.img[y, x - i] = 0
self.img[y + i, x] = 0
self.img[y - i, x] = 0
self.img[y, x + i] = 0
self.img[y, x - i] = 0
# Отдаление символов друг от друга
# Может многократно повысить точность, но я так и не придумал правильную реализацию
def distance_letters(self, cf: float):
pixels = []
for y in range(self.img.shape[1]):
for x in range(self.img.shape[0]):
if self.img[x, y] == 0: pixels.append((x, y))
for y, x in pixels:
self.img[y, x] = 255
center = self.img.shape[1] / 2
z = self.img.shape[1] / x
if z >= 2: self.img[y, x - int((900 // x) * cf)] = 0
else: self.img[y, x + int((900 // x) * cf)] = 0
def slice_letters(self):
contours, hierarchy = cv2.findContours(self.img, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
letter_image_regions = []
letters = []
for idx, contour in enumerate(contours):
if hierarchy[0][idx][3] != 0: continue
(x, y, w, h) = cv2.boundingRect(contour)
if w / h > 1.5:
half_width = int(w / 2)
letter_image_regions.append((idx, x, y, half_width, h))
letter_image_regions.append((idx, x + half_width, y, half_width, h))
else:
letter_image_regions.append((idx, x, y, w, h))
letter_image_regions = sorted(letter_image_regions, key=lambda z: z[1])
for _, x, y, w, h in letter_image_regions:
frame = self.img[y:y + h, x:x + w]
if frame.shape[1] > 35: continue
frame = cv2.resize(frame, (20, 40))
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
letters.append(frame)
return letters
def show(self):
cv2.imshow("Captcha Processor", self.img)
cv2.waitKey(0)
@classmethod
def from_file_name(cls, name: str):
file = open(name, "rb")
processor = cls(file.read())
file.close()
return processor