Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +112 -0
- requirements.txt +77 -0
app.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
# Function to order points in a consistent manner
|
6 |
+
def order_points(pts):
|
7 |
+
rect = np.zeros((4, 2), dtype='float32')
|
8 |
+
pts = np.array(pts)
|
9 |
+
s = pts.sum(axis=1)
|
10 |
+
rect[0] = pts[np.argmin(s)]
|
11 |
+
rect[2] = pts[np.argmax(s)]
|
12 |
+
diff = np.diff(pts, axis=1)
|
13 |
+
rect[1] = pts[np.argmin(diff)]
|
14 |
+
rect[3] = pts[np.argmax(diff)]
|
15 |
+
return rect.astype('int').tolist()
|
16 |
+
|
17 |
+
# Function to find the destination points for perspective transform
|
18 |
+
def find_dest(pts):
|
19 |
+
(tl, tr, br, bl) = pts
|
20 |
+
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
|
21 |
+
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
|
22 |
+
maxWidth = max(int(widthA), int(widthB))
|
23 |
+
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
|
24 |
+
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
|
25 |
+
maxHeight = max(int(heightA), int(heightB))
|
26 |
+
destination_corners = [[0, 0], [maxWidth, 0], [maxWidth, maxHeight], [0, maxHeight]]
|
27 |
+
return order_points(destination_corners)
|
28 |
+
|
29 |
+
def scan(img):
|
30 |
+
# Resize image if dimensions exceed limit
|
31 |
+
dim_limit = 1080
|
32 |
+
max_dim = max(img.shape)
|
33 |
+
|
34 |
+
if max_dim > dim_limit:
|
35 |
+
resize_scale = dim_limit / max_dim
|
36 |
+
img = cv2.resize(img, None, fx=resize_scale, fy=resize_scale)
|
37 |
+
|
38 |
+
orig_img = img.copy()
|
39 |
+
kernel = np.ones((5, 5), np.uint8)
|
40 |
+
|
41 |
+
# Perform morphological closing
|
42 |
+
img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel, iterations=3)
|
43 |
+
|
44 |
+
# Initialize mask for GrabCut
|
45 |
+
mask = np.zeros(img.shape[:2], np.uint8)
|
46 |
+
bgdModel = np.zeros((1, 65), np.float64)
|
47 |
+
fgdModel = np.zeros((1, 65), np.float64)
|
48 |
+
rect = (20, 20, img.shape[1] - 20, img.shape[0] - 20)
|
49 |
+
cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
|
50 |
+
mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
|
51 |
+
img = img * mask2[:, :, np.newaxis]
|
52 |
+
|
53 |
+
# Convert image to grayscale and apply Gaussian blur
|
54 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
55 |
+
gray = cv2.GaussianBlur(gray, (11, 11), 0)
|
56 |
+
|
57 |
+
# Perform Canny edge detection
|
58 |
+
canny = cv2.Canny(gray, 0, 200)
|
59 |
+
canny = cv2.dilate(canny, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)))
|
60 |
+
|
61 |
+
# Find contours and sort them
|
62 |
+
contours, hierarchy = cv2.findContours(canny, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
|
63 |
+
page = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
|
64 |
+
|
65 |
+
if len(page) == 0:
|
66 |
+
return orig_img
|
67 |
+
|
68 |
+
for c in page:
|
69 |
+
epsilon = 0.02 * cv2.arcLength(c, True)
|
70 |
+
corners = cv2.approxPolyDP(c, epsilon, True)
|
71 |
+
if len(corners) == 4:
|
72 |
+
break
|
73 |
+
|
74 |
+
# Order the corners and find destination points for perspective transform
|
75 |
+
corners = sorted(np.concatenate(corners).tolist())
|
76 |
+
corners = order_points(corners)
|
77 |
+
destination_corners = find_dest(corners)
|
78 |
+
h, w = orig_img.shape[:2]
|
79 |
+
|
80 |
+
# Perform perspective transform to obtain top-down view
|
81 |
+
M = cv2.getPerspectiveTransform(np.float32(corners), np.float32(destination_corners))
|
82 |
+
final = cv2.warpPerspective(orig_img, M, (destination_corners[2][0], destination_corners[2][1]), flags=cv2.INTER_LINEAR)
|
83 |
+
|
84 |
+
return final
|
85 |
+
|
86 |
+
def gradio_interface(input_image):
|
87 |
+
if input_image is None:
|
88 |
+
return None
|
89 |
+
img = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR)
|
90 |
+
processed_img = scan(img)
|
91 |
+
return cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB)
|
92 |
+
|
93 |
+
with gr.Blocks() as demo:
|
94 |
+
gr.Markdown("# Document Scanner using OpenCV")
|
95 |
+
gr.Markdown("## [Reference: 4-point OpenCV getPerspective Transform Example](https://pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/)")
|
96 |
+
gr.Markdown("""
|
97 |
+
### Image Processing Flow:
|
98 |
+
- Resize the image if its dimensions exceed the limit.
|
99 |
+
- Apply morphological transformations to enhance document boundaries.
|
100 |
+
- Perform GrabCut for foreground extraction.
|
101 |
+
- Convert the image to grayscale.
|
102 |
+
- Apply Gaussian blur.
|
103 |
+
- Perform Canny edge detection.
|
104 |
+
- Dilate the edges to close gaps.
|
105 |
+
- Find contours and identify the largest ones likely to be the document edges.
|
106 |
+
- If a contour with four corners is found, transform the perspective to obtain a top-down view of the document.
|
107 |
+
""")
|
108 |
+
image_input = gr.Image(type="numpy", label="Upload Image")
|
109 |
+
image_output = gr.Image(type="numpy", label="Processed Image")
|
110 |
+
gr.Interface(fn=gradio_interface, inputs=image_input, outputs=image_output, allow_flagging="never")
|
111 |
+
|
112 |
+
demo.launch(debug=True)
|
requirements.txt
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.2.1
|
2 |
+
altair==5.3.0
|
3 |
+
annotated-types==0.7.0
|
4 |
+
anyio==4.4.0
|
5 |
+
attrs==23.2.0
|
6 |
+
certifi==2024.7.4
|
7 |
+
charset-normalizer==3.3.2
|
8 |
+
click==8.1.7
|
9 |
+
contourpy==1.2.1
|
10 |
+
cycler==0.12.1
|
11 |
+
dnspython==2.6.1
|
12 |
+
email_validator==2.2.0
|
13 |
+
exceptiongroup==1.2.2
|
14 |
+
fastapi==0.111.0
|
15 |
+
fastapi-cli==0.0.4
|
16 |
+
ffmpy==0.3.2
|
17 |
+
filelock==3.15.4
|
18 |
+
fonttools==4.53.1
|
19 |
+
fsspec==2024.6.1
|
20 |
+
gradio==4.38.1
|
21 |
+
gradio_client==1.1.0
|
22 |
+
h11==0.14.0
|
23 |
+
httpcore==1.0.5
|
24 |
+
httptools==0.6.1
|
25 |
+
httpx==0.27.0
|
26 |
+
huggingface-hub==0.23.4
|
27 |
+
idna==3.7
|
28 |
+
importlib_resources==6.4.0
|
29 |
+
imutils==0.5.4
|
30 |
+
Jinja2==3.1.4
|
31 |
+
jsonschema==4.23.0
|
32 |
+
jsonschema-specifications==2023.12.1
|
33 |
+
kiwisolver==1.4.5
|
34 |
+
markdown-it-py==3.0.0
|
35 |
+
MarkupSafe==2.1.5
|
36 |
+
matplotlib==3.9.1
|
37 |
+
mdurl==0.1.2
|
38 |
+
numpy==2.0.0
|
39 |
+
opencv-python==4.10.0.84
|
40 |
+
orjson==3.10.6
|
41 |
+
packaging==24.1
|
42 |
+
pandas==2.2.2
|
43 |
+
pillow==10.4.0
|
44 |
+
pydantic==2.8.2
|
45 |
+
pydantic_core==2.20.1
|
46 |
+
pydub==0.25.1
|
47 |
+
Pygments==2.18.0
|
48 |
+
pyparsing==3.1.2
|
49 |
+
pytesseract==0.3.10
|
50 |
+
python-dateutil==2.9.0.post0
|
51 |
+
python-dotenv==1.0.1
|
52 |
+
python-multipart==0.0.9
|
53 |
+
pytz==2024.1
|
54 |
+
PyYAML==6.0.1
|
55 |
+
referencing==0.35.1
|
56 |
+
requests==2.32.3
|
57 |
+
rich==13.7.1
|
58 |
+
rpds-py==0.19.0
|
59 |
+
ruff==0.5.1
|
60 |
+
scipy==1.14.0
|
61 |
+
semantic-version==2.10.0
|
62 |
+
shellingham==1.5.4
|
63 |
+
six==1.16.0
|
64 |
+
sniffio==1.3.1
|
65 |
+
starlette==0.37.2
|
66 |
+
tomlkit==0.12.0
|
67 |
+
toolz==0.12.1
|
68 |
+
tqdm==4.66.4
|
69 |
+
typer==0.12.3
|
70 |
+
typing_extensions==4.12.2
|
71 |
+
tzdata==2024.1
|
72 |
+
ujson==5.10.0
|
73 |
+
urllib3==2.2.2
|
74 |
+
uvicorn==0.30.1
|
75 |
+
uvloop==0.19.0
|
76 |
+
watchfiles==0.22.0
|
77 |
+
websockets==11.0.3
|