Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import cv2
|
3 |
+
import pytesseract
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
# 设置 Tesseract OCR 路径(如果需要)
|
7 |
+
# pytesseract.pytesseract.tesseract_cmd = r"/path/to/tesseract"
|
8 |
+
|
9 |
+
def preprocess_image(image):
|
10 |
+
# 将图像转为灰度
|
11 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
12 |
+
# 应用自适应阈值以获得更好的 OCR 效果
|
13 |
+
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
14 |
+
cv2.THRESH_BINARY, 11, 2)
|
15 |
+
|
16 |
+
# 使用边缘检测
|
17 |
+
edges = cv2.Canny(thresh, 50, 150, apertureSize=3)
|
18 |
+
|
19 |
+
# 查找轮廓
|
20 |
+
contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
21 |
+
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
|
22 |
+
|
23 |
+
# 尝试找到纸张的四边形轮廓
|
24 |
+
for contour in contours:
|
25 |
+
epsilon = 0.02 * cv2.arcLength(contour, True)
|
26 |
+
approx = cv2.approxPolyDP(contour, epsilon, True)
|
27 |
+
if len(approx) == 4:
|
28 |
+
paper_contour = approx
|
29 |
+
break
|
30 |
+
else:
|
31 |
+
paper_contour = None
|
32 |
+
|
33 |
+
# 如果找到轮廓,进行透视变换
|
34 |
+
if paper_contour is not None:
|
35 |
+
pts = paper_contour.reshape(4, 2)
|
36 |
+
rect = np.zeros((4, 2), dtype="float32")
|
37 |
+
|
38 |
+
# 左上和右下
|
39 |
+
s = pts.sum(axis=1)
|
40 |
+
rect[0] = pts[np.argmin(s)]
|
41 |
+
rect[2] = pts[np.argmax(s)]
|
42 |
+
|
43 |
+
# 右上和左下
|
44 |
+
diff = np.diff(pts, axis=1)
|
45 |
+
rect[1] = pts[np.argmin(diff)]
|
46 |
+
rect[3] = pts[np.argmax(diff)]
|
47 |
+
|
48 |
+
# 计算新的变换矩阵
|
49 |
+
(tl, tr, br, bl) = rect
|
50 |
+
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
|
51 |
+
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
|
52 |
+
maxWidth = max(int(widthA), int(widthB))
|
53 |
+
|
54 |
+
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
|
55 |
+
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
|
56 |
+
maxHeight = max(int(heightA), int(heightB))
|
57 |
+
|
58 |
+
dst = np.array([
|
59 |
+
[0, 0],
|
60 |
+
[maxWidth - 1, 0],
|
61 |
+
[maxWidth - 1, maxHeight - 1],
|
62 |
+
[0, maxHeight - 1]], dtype="float32")
|
63 |
+
|
64 |
+
M = cv2.getPerspectiveTransform(rect, dst)
|
65 |
+
warped = cv2.warpPerspective(gray, M, (maxWidth, maxHeight))
|
66 |
+
else:
|
67 |
+
# 无法找到四边形,返回灰度图像
|
68 |
+
warped = gray
|
69 |
+
|
70 |
+
return warped
|
71 |
+
|
72 |
+
def ocr_process(image):
|
73 |
+
# 图像预处理
|
74 |
+
processed_image = preprocess_image(image)
|
75 |
+
|
76 |
+
# OCR 识别
|
77 |
+
text = pytesseract.image_to_string(processed_image, lang='eng')
|
78 |
+
|
79 |
+
return text
|
80 |
+
|
81 |
+
# 使用 Gradio 创建界面
|
82 |
+
iface = gr.Interface(
|
83 |
+
fn=ocr_process,
|
84 |
+
inputs=gr.Image(type="numpy"),
|
85 |
+
outputs="text",
|
86 |
+
title="轻量级 OCR 应用",
|
87 |
+
description="上传带角度的纸张图片,自动校正并提取文字"
|
88 |
+
)
|
89 |
+
|
90 |
+
iface.launch()
|