Spaces:

ad4r5hgs
/

opencv-doc-scanner

Sleeping

App Files Files Community

ad4r5hgs commited on Jul 14, 2024

Commit

f5a8ceb

verified ·

1 Parent(s): e6709ba

Upload 2 files

Browse files

Files changed (2) hide show

app.py +112 -0
requirements.txt +77 -0

app.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import gradio as gr
+import cv2
+import numpy as np
+# Function to order points in a consistent manner
+def order_points(pts):
+    rect = np.zeros((4, 2), dtype='float32')
+    pts = np.array(pts)
+    s = pts.sum(axis=1)
+    rect[0] = pts[np.argmin(s)]
+    rect[2] = pts[np.argmax(s)]
+    diff = np.diff(pts, axis=1)
+    rect[1] = pts[np.argmin(diff)]
+    rect[3] = pts[np.argmax(diff)]
+    return rect.astype('int').tolist()
+# Function to find the destination points for perspective transform
+def find_dest(pts):
+    (tl, tr, br, bl) = pts
+    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
+    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
+    maxWidth = max(int(widthA), int(widthB))
+    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
+    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
+    maxHeight = max(int(heightA), int(heightB))
+    destination_corners = [[0, 0], [maxWidth, 0], [maxWidth, maxHeight], [0, maxHeight]]
+    return order_points(destination_corners)
+def scan(img):
+    # Resize image if dimensions exceed limit
+    dim_limit = 1080
+    max_dim = max(img.shape)
+    if max_dim > dim_limit:
+        resize_scale = dim_limit / max_dim
+        img = cv2.resize(img, None, fx=resize_scale, fy=resize_scale)
+    orig_img = img.copy()
+    kernel = np.ones((5, 5), np.uint8)
+    # Perform morphological closing
+    img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel, iterations=3)
+    # Initialize mask for GrabCut
+    mask = np.zeros(img.shape[:2], np.uint8)
+    bgdModel = np.zeros((1, 65), np.float64)
+    fgdModel = np.zeros((1, 65), np.float64)
+    rect = (20, 20, img.shape[1] - 20, img.shape[0] - 20)
+    cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
+    mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
+    img = img * mask2[:, :, np.newaxis]
+    # Convert image to grayscale and apply Gaussian blur
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    gray = cv2.GaussianBlur(gray, (11, 11), 0)
+    # Perform Canny edge detection
+    canny = cv2.Canny(gray, 0, 200)
+    canny = cv2.dilate(canny, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)))
+    # Find contours and sort them
+    contours, hierarchy = cv2.findContours(canny, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
+    page = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
+    if len(page) == 0:
+        return orig_img
+    for c in page:
+        epsilon = 0.02 * cv2.arcLength(c, True)
+        corners = cv2.approxPolyDP(c, epsilon, True)
+        if len(corners) == 4:
+            break
+    # Order the corners and find destination points for perspective transform
+    corners = sorted(np.concatenate(corners).tolist())
+    corners = order_points(corners)
+    destination_corners = find_dest(corners)
+    h, w = orig_img.shape[:2]
+    # Perform perspective transform to obtain top-down view
+    M = cv2.getPerspectiveTransform(np.float32(corners), np.float32(destination_corners))
+    final = cv2.warpPerspective(orig_img, M, (destination_corners[2][0], destination_corners[2][1]), flags=cv2.INTER_LINEAR)
+    return final
+def gradio_interface(input_image):
+    if input_image is None:
+        return None
+    img = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR)
+    processed_img = scan(img)
+    return cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB)
+with gr.Blocks() as demo:
+    gr.Markdown("# Document Scanner using OpenCV")
+    gr.Markdown("## [Reference: 4-point OpenCV getPerspective Transform Example](https://pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/)")
+    gr.Markdown("""
+    ### Image Processing Flow:
+    - Resize the image if its dimensions exceed the limit.
+    - Apply morphological transformations to enhance document boundaries.
+    - Perform GrabCut for foreground extraction.
+    - Convert the image to grayscale.
+    - Apply Gaussian blur.
+    - Perform Canny edge detection.
+    - Dilate the edges to close gaps.
+    - Find contours and identify the largest ones likely to be the document edges.
+    - If a contour with four corners is found, transform the perspective to obtain a top-down view of the document.
+    """)
+    image_input = gr.Image(type="numpy", label="Upload Image")
+    image_output = gr.Image(type="numpy", label="Processed Image")
+    gr.Interface(fn=gradio_interface, inputs=image_input, outputs=image_output, allow_flagging="never")
+demo.launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,77 @@

+aiofiles==23.2.1
+altair==5.3.0
+annotated-types==0.7.0
+anyio==4.4.0
+attrs==23.2.0
+certifi==2024.7.4
+charset-normalizer==3.3.2
+click==8.1.7
+contourpy==1.2.1
+cycler==0.12.1
+dnspython==2.6.1
+email_validator==2.2.0
+exceptiongroup==1.2.2
+fastapi==0.111.0
+fastapi-cli==0.0.4
+ffmpy==0.3.2
+filelock==3.15.4
+fonttools==4.53.1
+fsspec==2024.6.1
+gradio==4.38.1
+gradio_client==1.1.0
+h11==0.14.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.4
+idna==3.7
+importlib_resources==6.4.0
+imutils==0.5.4
+Jinja2==3.1.4
+jsonschema==4.23.0
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.1
+mdurl==0.1.2
+numpy==2.0.0
+opencv-python==4.10.0.84
+orjson==3.10.6
+packaging==24.1
+pandas==2.2.2
+pillow==10.4.0
+pydantic==2.8.2
+pydantic_core==2.20.1
+pydub==0.25.1
+Pygments==2.18.0
+pyparsing==3.1.2
+pytesseract==0.3.10
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+referencing==0.35.1
+requests==2.32.3
+rich==13.7.1
+rpds-py==0.19.0
+ruff==0.5.1
+scipy==1.14.0
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+starlette==0.37.2
+tomlkit==0.12.0
+toolz==0.12.1
+tqdm==4.66.4
+typer==0.12.3
+typing_extensions==4.12.2
+tzdata==2024.1
+ujson==5.10.0
+urllib3==2.2.2
+uvicorn==0.30.1
+uvloop==0.19.0
+watchfiles==0.22.0
+websockets==11.0.3