File size: 2,830 Bytes
98becde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
import cv2
import pytesseract
import numpy as np

# 设置 Tesseract OCR 路径(如果需要)
# pytesseract.pytesseract.tesseract_cmd = r"/path/to/tesseract"

def preprocess_image(image):
    # 将图像转为灰度
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # 应用自适应阈值以获得更好的 OCR 效果
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
    
    # 使用边缘检测
    edges = cv2.Canny(thresh, 50, 150, apertureSize=3)
    
    # 查找轮廓
    contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
    
    # 尝试找到纸张的四边形轮廓
    for contour in contours:
        epsilon = 0.02 * cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, epsilon, True)
        if len(approx) == 4:
            paper_contour = approx
            break
    else:
        paper_contour = None
    
    # 如果找到轮廓,进行透视变换
    if paper_contour is not None:
        pts = paper_contour.reshape(4, 2)
        rect = np.zeros((4, 2), dtype="float32")

        # 左上和右下
        s = pts.sum(axis=1)
        rect[0] = pts[np.argmin(s)]
        rect[2] = pts[np.argmax(s)]

        # 右上和左下
        diff = np.diff(pts, axis=1)
        rect[1] = pts[np.argmin(diff)]
        rect[3] = pts[np.argmax(diff)]

        # 计算新的变换矩阵
        (tl, tr, br, bl) = rect
        widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
        widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
        maxWidth = max(int(widthA), int(widthB))

        heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
        heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
        maxHeight = max(int(heightA), int(heightB))

        dst = np.array([
            [0, 0],
            [maxWidth - 1, 0],
            [maxWidth - 1, maxHeight - 1],
            [0, maxHeight - 1]], dtype="float32")

        M = cv2.getPerspectiveTransform(rect, dst)
        warped = cv2.warpPerspective(gray, M, (maxWidth, maxHeight))
    else:
        # 无法找到四边形,返回灰度图像
        warped = gray

    return warped

def ocr_process(image):
    # 图像预处理
    processed_image = preprocess_image(image)
    
    # OCR 识别
    text = pytesseract.image_to_string(processed_image, lang='eng')
    
    return text

# 使用 Gradio 创建界面
iface = gr.Interface(
    fn=ocr_process, 
    inputs=gr.Image(type="numpy"), 
    outputs="text",
    title="轻量级 OCR 应用",
    description="上传带角度的纸张图片,自动校正并提取文字"
)

iface.launch()