ad4r5hgs commited on
Commit
f5a8ceb
·
verified ·
1 Parent(s): e6709ba

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +112 -0
  2. requirements.txt +77 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+
5
+ # Function to order points in a consistent manner
6
+ def order_points(pts):
7
+ rect = np.zeros((4, 2), dtype='float32')
8
+ pts = np.array(pts)
9
+ s = pts.sum(axis=1)
10
+ rect[0] = pts[np.argmin(s)]
11
+ rect[2] = pts[np.argmax(s)]
12
+ diff = np.diff(pts, axis=1)
13
+ rect[1] = pts[np.argmin(diff)]
14
+ rect[3] = pts[np.argmax(diff)]
15
+ return rect.astype('int').tolist()
16
+
17
+ # Function to find the destination points for perspective transform
18
+ def find_dest(pts):
19
+ (tl, tr, br, bl) = pts
20
+ widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
21
+ widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
22
+ maxWidth = max(int(widthA), int(widthB))
23
+ heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
24
+ heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
25
+ maxHeight = max(int(heightA), int(heightB))
26
+ destination_corners = [[0, 0], [maxWidth, 0], [maxWidth, maxHeight], [0, maxHeight]]
27
+ return order_points(destination_corners)
28
+
29
+ def scan(img):
30
+ # Resize image if dimensions exceed limit
31
+ dim_limit = 1080
32
+ max_dim = max(img.shape)
33
+
34
+ if max_dim > dim_limit:
35
+ resize_scale = dim_limit / max_dim
36
+ img = cv2.resize(img, None, fx=resize_scale, fy=resize_scale)
37
+
38
+ orig_img = img.copy()
39
+ kernel = np.ones((5, 5), np.uint8)
40
+
41
+ # Perform morphological closing
42
+ img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel, iterations=3)
43
+
44
+ # Initialize mask for GrabCut
45
+ mask = np.zeros(img.shape[:2], np.uint8)
46
+ bgdModel = np.zeros((1, 65), np.float64)
47
+ fgdModel = np.zeros((1, 65), np.float64)
48
+ rect = (20, 20, img.shape[1] - 20, img.shape[0] - 20)
49
+ cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
50
+ mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
51
+ img = img * mask2[:, :, np.newaxis]
52
+
53
+ # Convert image to grayscale and apply Gaussian blur
54
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
55
+ gray = cv2.GaussianBlur(gray, (11, 11), 0)
56
+
57
+ # Perform Canny edge detection
58
+ canny = cv2.Canny(gray, 0, 200)
59
+ canny = cv2.dilate(canny, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)))
60
+
61
+ # Find contours and sort them
62
+ contours, hierarchy = cv2.findContours(canny, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
63
+ page = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
64
+
65
+ if len(page) == 0:
66
+ return orig_img
67
+
68
+ for c in page:
69
+ epsilon = 0.02 * cv2.arcLength(c, True)
70
+ corners = cv2.approxPolyDP(c, epsilon, True)
71
+ if len(corners) == 4:
72
+ break
73
+
74
+ # Order the corners and find destination points for perspective transform
75
+ corners = sorted(np.concatenate(corners).tolist())
76
+ corners = order_points(corners)
77
+ destination_corners = find_dest(corners)
78
+ h, w = orig_img.shape[:2]
79
+
80
+ # Perform perspective transform to obtain top-down view
81
+ M = cv2.getPerspectiveTransform(np.float32(corners), np.float32(destination_corners))
82
+ final = cv2.warpPerspective(orig_img, M, (destination_corners[2][0], destination_corners[2][1]), flags=cv2.INTER_LINEAR)
83
+
84
+ return final
85
+
86
+ def gradio_interface(input_image):
87
+ if input_image is None:
88
+ return None
89
+ img = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR)
90
+ processed_img = scan(img)
91
+ return cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB)
92
+
93
+ with gr.Blocks() as demo:
94
+ gr.Markdown("# Document Scanner using OpenCV")
95
+ gr.Markdown("## [Reference: 4-point OpenCV getPerspective Transform Example](https://pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/)")
96
+ gr.Markdown("""
97
+ ### Image Processing Flow:
98
+ - Resize the image if its dimensions exceed the limit.
99
+ - Apply morphological transformations to enhance document boundaries.
100
+ - Perform GrabCut for foreground extraction.
101
+ - Convert the image to grayscale.
102
+ - Apply Gaussian blur.
103
+ - Perform Canny edge detection.
104
+ - Dilate the edges to close gaps.
105
+ - Find contours and identify the largest ones likely to be the document edges.
106
+ - If a contour with four corners is found, transform the perspective to obtain a top-down view of the document.
107
+ """)
108
+ image_input = gr.Image(type="numpy", label="Upload Image")
109
+ image_output = gr.Image(type="numpy", label="Processed Image")
110
+ gr.Interface(fn=gradio_interface, inputs=image_input, outputs=image_output, allow_flagging="never")
111
+
112
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ altair==5.3.0
3
+ annotated-types==0.7.0
4
+ anyio==4.4.0
5
+ attrs==23.2.0
6
+ certifi==2024.7.4
7
+ charset-normalizer==3.3.2
8
+ click==8.1.7
9
+ contourpy==1.2.1
10
+ cycler==0.12.1
11
+ dnspython==2.6.1
12
+ email_validator==2.2.0
13
+ exceptiongroup==1.2.2
14
+ fastapi==0.111.0
15
+ fastapi-cli==0.0.4
16
+ ffmpy==0.3.2
17
+ filelock==3.15.4
18
+ fonttools==4.53.1
19
+ fsspec==2024.6.1
20
+ gradio==4.38.1
21
+ gradio_client==1.1.0
22
+ h11==0.14.0
23
+ httpcore==1.0.5
24
+ httptools==0.6.1
25
+ httpx==0.27.0
26
+ huggingface-hub==0.23.4
27
+ idna==3.7
28
+ importlib_resources==6.4.0
29
+ imutils==0.5.4
30
+ Jinja2==3.1.4
31
+ jsonschema==4.23.0
32
+ jsonschema-specifications==2023.12.1
33
+ kiwisolver==1.4.5
34
+ markdown-it-py==3.0.0
35
+ MarkupSafe==2.1.5
36
+ matplotlib==3.9.1
37
+ mdurl==0.1.2
38
+ numpy==2.0.0
39
+ opencv-python==4.10.0.84
40
+ orjson==3.10.6
41
+ packaging==24.1
42
+ pandas==2.2.2
43
+ pillow==10.4.0
44
+ pydantic==2.8.2
45
+ pydantic_core==2.20.1
46
+ pydub==0.25.1
47
+ Pygments==2.18.0
48
+ pyparsing==3.1.2
49
+ pytesseract==0.3.10
50
+ python-dateutil==2.9.0.post0
51
+ python-dotenv==1.0.1
52
+ python-multipart==0.0.9
53
+ pytz==2024.1
54
+ PyYAML==6.0.1
55
+ referencing==0.35.1
56
+ requests==2.32.3
57
+ rich==13.7.1
58
+ rpds-py==0.19.0
59
+ ruff==0.5.1
60
+ scipy==1.14.0
61
+ semantic-version==2.10.0
62
+ shellingham==1.5.4
63
+ six==1.16.0
64
+ sniffio==1.3.1
65
+ starlette==0.37.2
66
+ tomlkit==0.12.0
67
+ toolz==0.12.1
68
+ tqdm==4.66.4
69
+ typer==0.12.3
70
+ typing_extensions==4.12.2
71
+ tzdata==2024.1
72
+ ujson==5.10.0
73
+ urllib3==2.2.2
74
+ uvicorn==0.30.1
75
+ uvloop==0.19.0
76
+ watchfiles==0.22.0
77
+ websockets==11.0.3