ake178178 commited on
Commit
98becde
·
verified ·
1 Parent(s): 92f6166

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import pytesseract
4
+ import numpy as np
5
+
6
+ # 设置 Tesseract OCR 路径(如果需要)
7
+ # pytesseract.pytesseract.tesseract_cmd = r"/path/to/tesseract"
8
+
9
+ def preprocess_image(image):
10
+ # 将图像转为灰度
11
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
12
+ # 应用自适应阈值以获得更好的 OCR 效果
13
+ thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
14
+ cv2.THRESH_BINARY, 11, 2)
15
+
16
+ # 使用边缘检测
17
+ edges = cv2.Canny(thresh, 50, 150, apertureSize=3)
18
+
19
+ # 查找轮廓
20
+ contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
21
+ contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
22
+
23
+ # 尝试找到纸张的四边形轮廓
24
+ for contour in contours:
25
+ epsilon = 0.02 * cv2.arcLength(contour, True)
26
+ approx = cv2.approxPolyDP(contour, epsilon, True)
27
+ if len(approx) == 4:
28
+ paper_contour = approx
29
+ break
30
+ else:
31
+ paper_contour = None
32
+
33
+ # 如果找到轮廓,进行透视变换
34
+ if paper_contour is not None:
35
+ pts = paper_contour.reshape(4, 2)
36
+ rect = np.zeros((4, 2), dtype="float32")
37
+
38
+ # 左上和右下
39
+ s = pts.sum(axis=1)
40
+ rect[0] = pts[np.argmin(s)]
41
+ rect[2] = pts[np.argmax(s)]
42
+
43
+ # 右上和左下
44
+ diff = np.diff(pts, axis=1)
45
+ rect[1] = pts[np.argmin(diff)]
46
+ rect[3] = pts[np.argmax(diff)]
47
+
48
+ # 计算新的变换矩阵
49
+ (tl, tr, br, bl) = rect
50
+ widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
51
+ widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
52
+ maxWidth = max(int(widthA), int(widthB))
53
+
54
+ heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
55
+ heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
56
+ maxHeight = max(int(heightA), int(heightB))
57
+
58
+ dst = np.array([
59
+ [0, 0],
60
+ [maxWidth - 1, 0],
61
+ [maxWidth - 1, maxHeight - 1],
62
+ [0, maxHeight - 1]], dtype="float32")
63
+
64
+ M = cv2.getPerspectiveTransform(rect, dst)
65
+ warped = cv2.warpPerspective(gray, M, (maxWidth, maxHeight))
66
+ else:
67
+ # 无法找到四边形,返回灰度图像
68
+ warped = gray
69
+
70
+ return warped
71
+
72
+ def ocr_process(image):
73
+ # 图像预处理
74
+ processed_image = preprocess_image(image)
75
+
76
+ # OCR 识别
77
+ text = pytesseract.image_to_string(processed_image, lang='eng')
78
+
79
+ return text
80
+
81
+ # 使用 Gradio 创建界面
82
+ iface = gr.Interface(
83
+ fn=ocr_process,
84
+ inputs=gr.Image(type="numpy"),
85
+ outputs="text",
86
+ title="轻量级 OCR 应用",
87
+ description="上传带角度的纸张图片,自动校正并提取文字"
88
+ )
89
+
90
+ iface.launch()