Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import re
|
4 |
+
import PyPDF2
|
5 |
+
|
6 |
+
def check_spacing_csv(csv_file):
|
7 |
+
# 讀取 CSV 檔案
|
8 |
+
df = pd.read_csv(csv_file.name)
|
9 |
+
|
10 |
+
errors = []
|
11 |
+
|
12 |
+
# 逐行檢查每一行的內容
|
13 |
+
for index, row in df.iterrows():
|
14 |
+
row_text = ' '.join(row.astype(str).values.flatten())
|
15 |
+
|
16 |
+
# 使用正則表達式來查找$符號兩側的中文字之間沒有空格的地方
|
17 |
+
pattern = r'[\u4e00-\u9fa5]\$[\u4e00-\u9fa5]'
|
18 |
+
matches = re.finditer(pattern, row_text)
|
19 |
+
|
20 |
+
for match in matches:
|
21 |
+
errors.append(f"第 {index + 1} 行,錯誤位置:{match.start()},內容:{match.group()}")
|
22 |
+
|
23 |
+
if errors:
|
24 |
+
return "\n".join(errors)
|
25 |
+
else:
|
26 |
+
return "未發現錯誤"
|
27 |
+
|
28 |
+
def check_spacing_pdf(pdf_file):
|
29 |
+
reader = PyPDF2.PdfReader(pdf_file.name)
|
30 |
+
errors = []
|
31 |
+
|
32 |
+
# 遍歷每一頁
|
33 |
+
for page_number, page in enumerate(reader.pages):
|
34 |
+
text = page.extract_text()
|
35 |
+
|
36 |
+
if text: # 如果有內容,進行檢查
|
37 |
+
pattern = r'[\u4e00-\u9fa5]\$[\u4e00-\u9fa5]'
|
38 |
+
matches = re.finditer(pattern, text)
|
39 |
+
|
40 |
+
for match in matches:
|
41 |
+
errors.append(f"第 {page_number + 1} 頁,錯誤位置:{match.start()},內容:{match.group()}")
|
42 |
+
|
43 |
+
if errors:
|
44 |
+
return "\n".join(errors)
|
45 |
+
else:
|
46 |
+
return "未發現錯誤"
|
47 |
+
|
48 |
+
# 使用 Gradio 來建立介面
|
49 |
+
interface = gr.Interface(
|
50 |
+
fn=lambda file: check_spacing_csv(file) if file.name.endswith('.csv') else check_spacing_pdf(file),
|
51 |
+
inputs=gr.File(file_types=['.csv', '.xlsx','.pdf']),
|
52 |
+
outputs="text",
|
53 |
+
title="CSV 和 PDF 中文校對系統",
|
54 |
+
description="上傳一個 CSV 或 PDF 檔案,系統會檢查 $ 符號前後的中文字是否有空格"
|
55 |
+
)
|
56 |
+
|
57 |
+
# 啟動 Gradio 介面
|
58 |
+
interface.launch()
|