Spaces:
Running
Running
tensorflow is not my friend
Browse files- Dockerfile +1 -1
- requirements.txt +2 -4
- routes/aminoOSRapi/captcha_processor.py +0 -111
- routes/aminoOSRapi/main.py +75 -6
- routes/aminoOSRapi/model.h5 +0 -3
- routes/aminoOSRapi/utils.py +0 -43
Dockerfile
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
FROM
|
2 |
|
3 |
COPY . /app
|
4 |
|
|
|
1 |
+
FROM python:3.11.1-bullseye
|
2 |
|
3 |
COPY . /app
|
4 |
|
requirements.txt
CHANGED
@@ -1,14 +1,12 @@
|
|
1 |
wget
|
2 |
flask
|
3 |
-
|
4 |
-
numpy
|
5 |
psutil
|
6 |
yt_dlp
|
7 |
-
aiohttp
|
8 |
urllib3
|
9 |
requests
|
10 |
py-cpuinfo
|
11 |
-
|
12 |
ffmpeg-python
|
13 |
flask_limiter
|
14 |
SpeechRecognition
|
|
|
1 |
wget
|
2 |
flask
|
3 |
+
Pillow
|
|
|
4 |
psutil
|
5 |
yt_dlp
|
|
|
6 |
urllib3
|
7 |
requests
|
8 |
py-cpuinfo
|
9 |
+
pytesseract
|
10 |
ffmpeg-python
|
11 |
flask_limiter
|
12 |
SpeechRecognition
|
routes/aminoOSRapi/captcha_processor.py
DELETED
@@ -1,111 +0,0 @@
|
|
1 |
-
import cv2
|
2 |
-
from numpy import asarray as np_as_array
|
3 |
-
from numpy import all as np_all
|
4 |
-
|
5 |
-
|
6 |
-
class CaptchaProcessor:
|
7 |
-
|
8 |
-
WHITE_RGB = (255, 255, 255)
|
9 |
-
|
10 |
-
def __init__(self, data: bytes):
|
11 |
-
self.img = cv2.imdecode(
|
12 |
-
np_as_array(bytearray(data), dtype="uint8"),
|
13 |
-
cv2.IMREAD_ANYCOLOR
|
14 |
-
)
|
15 |
-
|
16 |
-
def threshold(self):
|
17 |
-
self.img = cv2.threshold(self.img, 0, 255, cv2.THRESH_OTSU)[1]
|
18 |
-
|
19 |
-
def convert_color_space(self, target_space: int):
|
20 |
-
self.img = cv2.cvtColor(self.img, target_space)
|
21 |
-
|
22 |
-
def get_background_color(self) -> tuple:
|
23 |
-
return tuple(self.img[0, 0])
|
24 |
-
|
25 |
-
def resize(self, x: int, y: int):
|
26 |
-
self.img = cv2.resize(self.img, (x, y))
|
27 |
-
|
28 |
-
def save(self, name: str):
|
29 |
-
cv2.imwrite(name, self.img)
|
30 |
-
|
31 |
-
def get_letters_color(self) -> tuple:
|
32 |
-
colors = []
|
33 |
-
for y in range(self.img.shape[1]):
|
34 |
-
for x in range(self.img.shape[0]):
|
35 |
-
color = tuple(self.img[x, y])
|
36 |
-
if color != self.WHITE_RGB: colors.append(color)
|
37 |
-
return max(set(colors), key=colors.count)
|
38 |
-
|
39 |
-
def replace_color(self, target: tuple, to: tuple):
|
40 |
-
self.img[np_all(self.img == target, axis=-1)] = to
|
41 |
-
|
42 |
-
def replace_colors(self, exception: tuple, to: tuple):
|
43 |
-
self.img[np_all(self.img != exception, axis=-1)] = to
|
44 |
-
|
45 |
-
def increase_contrast(self, alpha: float, beta: float):
|
46 |
-
self.img = cv2.convertScaleAbs(self.img, alpha=alpha, beta=beta)
|
47 |
-
|
48 |
-
def increase_letters_size(self, add_pixels: int):
|
49 |
-
pixels = []
|
50 |
-
for y in range(self.img.shape[1]):
|
51 |
-
for x in range(self.img.shape[0]):
|
52 |
-
if self.img[x, y] == 0: pixels.append((x, y))
|
53 |
-
for y, x in pixels:
|
54 |
-
for i in range(1, add_pixels + 1):
|
55 |
-
self.img[y + i, x] = 0
|
56 |
-
self.img[y - i, x] = 0
|
57 |
-
self.img[y, x + i] = 0
|
58 |
-
self.img[y, x - i] = 0
|
59 |
-
self.img[y + i, x] = 0
|
60 |
-
self.img[y - i, x] = 0
|
61 |
-
self.img[y, x + i] = 0
|
62 |
-
self.img[y, x - i] = 0
|
63 |
-
|
64 |
-
# Отдаление символов друг от друга
|
65 |
-
# Может многократно повысить точность, но я так и не придумал правильную реализацию
|
66 |
-
def distance_letters(self, cf: float):
|
67 |
-
pixels = []
|
68 |
-
for y in range(self.img.shape[1]):
|
69 |
-
for x in range(self.img.shape[0]):
|
70 |
-
if self.img[x, y] == 0: pixels.append((x, y))
|
71 |
-
for y, x in pixels:
|
72 |
-
self.img[y, x] = 255
|
73 |
-
center = self.img.shape[1] / 2
|
74 |
-
z = self.img.shape[1] / x
|
75 |
-
if z >= 2: self.img[y, x - int((900 // x) * cf)] = 0
|
76 |
-
else: self.img[y, x + int((900 // x) * cf)] = 0
|
77 |
-
|
78 |
-
def slice_letters(self):
|
79 |
-
contours, hierarchy = cv2.findContours(self.img, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
|
80 |
-
letter_image_regions = []
|
81 |
-
letters = []
|
82 |
-
for idx, contour in enumerate(contours):
|
83 |
-
if hierarchy[0][idx][3] != 0: continue
|
84 |
-
(x, y, w, h) = cv2.boundingRect(contour)
|
85 |
-
if w / h > 1.5:
|
86 |
-
half_width = int(w / 2)
|
87 |
-
letter_image_regions.append((idx, x, y, half_width, h))
|
88 |
-
letter_image_regions.append((idx, x + half_width, y, half_width, h))
|
89 |
-
else:
|
90 |
-
letter_image_regions.append((idx, x, y, w, h))
|
91 |
-
letter_image_regions = sorted(letter_image_regions, key=lambda z: z[1])
|
92 |
-
for _, x, y, w, h in letter_image_regions:
|
93 |
-
frame = self.img[y:y + h, x:x + w]
|
94 |
-
if frame.shape[1] > 35: continue
|
95 |
-
frame = cv2.resize(frame, (20, 40))
|
96 |
-
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
|
97 |
-
letters.append(frame)
|
98 |
-
return letters
|
99 |
-
|
100 |
-
def show(self):
|
101 |
-
cv2.imshow("Captcha Processor", self.img)
|
102 |
-
cv2.waitKey(0)
|
103 |
-
|
104 |
-
@classmethod
|
105 |
-
def from_file_name(cls, name: str):
|
106 |
-
file = open(name, "rb")
|
107 |
-
processor = cls(file.read())
|
108 |
-
file.close()
|
109 |
-
return processor
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
routes/aminoOSRapi/main.py
CHANGED
@@ -1,12 +1,81 @@
|
|
1 |
-
|
2 |
-
import
|
|
|
3 |
|
4 |
def apipredict(request):
|
5 |
try:
|
6 |
if request.method == 'POST': url = request.form['url']
|
7 |
else: url = request.args['url']
|
8 |
-
if url.strip() in ['', None]:
|
|
|
9 |
except: return {"status": "error", "details": { "error_code": 101, "error_details": "No link provided" }}
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import pytesseract
|
3 |
+
from PIL import Image, ImageEnhance
|
4 |
|
5 |
def apipredict(request):
|
6 |
try:
|
7 |
if request.method == 'POST': url = request.form['url']
|
8 |
else: url = request.args['url']
|
9 |
+
if url.strip() in ['', None]:
|
10 |
+
raise Exception()
|
11 |
except: return {"status": "error", "details": { "error_code": 101, "error_details": "No link provided" }}
|
12 |
+
|
13 |
+
WALK_Y = 33
|
14 |
+
|
15 |
+
WHITE_RGB = (255, 255, 255)
|
16 |
+
|
17 |
+
def replaceColor(image: Image, forReplace: tuple, newColor: tuple):
|
18 |
+
for y in range(image.size[1]):
|
19 |
+
for x in range(image.size[0]):
|
20 |
+
if pix[x, y] == forReplace:
|
21 |
+
image.putpixel((x, y), newColor)
|
22 |
+
|
23 |
+
image = Image.open(requests.get(url, stream=True).raw)
|
24 |
+
|
25 |
+
pix = image.load()
|
26 |
+
|
27 |
+
garbageColor = image.getpixel((0, 0))
|
28 |
+
|
29 |
+
replaceColor(image, garbageColor, WHITE_RGB)
|
30 |
+
|
31 |
+
colors = []
|
32 |
+
|
33 |
+
for i in range(249):
|
34 |
+
pixel = image.getpixel((i, WALK_Y))
|
35 |
+
if pixel != WHITE_RGB:
|
36 |
+
colors.append(pixel)
|
37 |
+
|
38 |
+
captchaLettersColor = max(set(colors), key = colors.count)
|
39 |
+
|
40 |
+
for y in range(image.size[1]):
|
41 |
+
for x in range(image.size[0]):
|
42 |
+
if pix[x, y] != captchaLettersColor:
|
43 |
+
image.putpixel((x, y), WHITE_RGB)
|
44 |
+
|
45 |
+
image = ImageEnhance.Contrast(image).enhance(500)
|
46 |
+
|
47 |
+
for y in range(image.size[1]):
|
48 |
+
for x in range(image.size[0]):
|
49 |
+
if pix[x, y] == captchaLettersColor:
|
50 |
+
p1 = pix[x + 1, y]
|
51 |
+
p2 = pix[x - 1, y]
|
52 |
+
p3 = pix[x, y + 1]
|
53 |
+
p4 = pix[x, y - 1]
|
54 |
+
p5 = pix[x + 2, y]
|
55 |
+
p6 = pix[x - 2, y]
|
56 |
+
p7 = pix[x, y + 2]
|
57 |
+
p8 = pix[x, y - 2]
|
58 |
+
if p1 != captchaLettersColor:
|
59 |
+
image.putpixel((x + 1, y), captchaLettersColor)
|
60 |
+
if p2 != captchaLettersColor:
|
61 |
+
image.putpixel((x - 1, y), captchaLettersColor)
|
62 |
+
if p3 != captchaLettersColor:
|
63 |
+
image.putpixel((x, y + 1), captchaLettersColor)
|
64 |
+
if p4 != captchaLettersColor:
|
65 |
+
image.putpixel((x, y - 1), captchaLettersColor)
|
66 |
+
if p5 != captchaLettersColor:
|
67 |
+
image.putpixel((x + 2, y), captchaLettersColor)
|
68 |
+
if p6 != captchaLettersColor:
|
69 |
+
image.putpixel((x - 2, y), captchaLettersColor)
|
70 |
+
if p7 != captchaLettersColor:
|
71 |
+
image.putpixel((x, y + 2), captchaLettersColor)
|
72 |
+
if p8 != captchaLettersColor:
|
73 |
+
image.putpixel((x, y - 2), captchaLettersColor)
|
74 |
+
|
75 |
+
image = ImageEnhance.Contrast(image).enhance(1200)
|
76 |
+
|
77 |
+
resized = image.resize((image.size[0] * 5, image.size[1] * 5))
|
78 |
+
|
79 |
+
decoded = pytesseract.image_to_string(resized, config = "--psm 13 --oem 3 -c tessedit_char_whitelist=23456789", lang = "eng")
|
80 |
+
|
81 |
+
return {"solution": decoded}
|
routes/aminoOSRapi/model.h5
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:792c015158ffcfaadbb2a65fef9623af7fa1d243e3e1f915444f86c40049ea13
|
3 |
-
size 3730536
|
|
|
|
|
|
|
|
routes/aminoOSRapi/utils.py
DELETED
@@ -1,43 +0,0 @@
|
|
1 |
-
from keras.models import load_model
|
2 |
-
from aiohttp import ClientSession
|
3 |
-
from numpy import expand_dims as np_expand_dims
|
4 |
-
from .captcha_processor import CaptchaProcessor
|
5 |
-
from asyncio import get_running_loop
|
6 |
-
|
7 |
-
model = load_model("/app/routes/aminoOSRapi/model.h5")
|
8 |
-
|
9 |
-
|
10 |
-
async def get_binary_from_link(link: str) -> bytes:
|
11 |
-
async with ClientSession() as session:
|
12 |
-
return await (await session.get(link)).read()
|
13 |
-
|
14 |
-
|
15 |
-
async def predict(url: str, recursion: int = 0) -> dict:
|
16 |
-
binary = await get_binary_from_link(url)
|
17 |
-
processor = CaptchaProcessor(binary)
|
18 |
-
processor.replace_color(processor.get_background_color(), processor.WHITE_RGB)
|
19 |
-
processor.replace_colors(processor.get_letters_color(), processor.WHITE_RGB)
|
20 |
-
processor.convert_color_space(6)
|
21 |
-
processor.threshold()
|
22 |
-
try:
|
23 |
-
processor.increase_letters_size(2)
|
24 |
-
except IndexError:
|
25 |
-
return await predict(url, recursion + 1)
|
26 |
-
letters = processor.slice_letters()
|
27 |
-
if len(letters) != 6: return await predict(url, recursion + 1)
|
28 |
-
shorts = []
|
29 |
-
final = ""
|
30 |
-
letters_solving = [
|
31 |
-
get_running_loop().run_in_executor(None, model.predict, np_expand_dims(letter, axis=0))
|
32 |
-
for letter in letters
|
33 |
-
]
|
34 |
-
letters_solving = [await result for result in letters_solving]
|
35 |
-
fulls = [list(map(lambda x: float(x), letter[0])) for letter in letters_solving]
|
36 |
-
for prediction in fulls: shorts.append(prediction.index(max(*prediction)))
|
37 |
-
for short in shorts: final += str(short)
|
38 |
-
return {
|
39 |
-
"prediction": final,
|
40 |
-
"letters_predictions": shorts,
|
41 |
-
"full_prediction": fulls,
|
42 |
-
"recursion": recursion
|
43 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|