|
|
|
|
|
import base64 |
|
import json |
|
import os |
|
from io import BytesIO |
|
import pandas as pd |
|
from PIL import Image |
|
|
|
import requests |
|
|
|
|
|
def ocr(image): |
|
|
|
image = Image.open(image) |
|
img_buffer = BytesIO() |
|
image.save(img_buffer, format=image.format) |
|
byte_data = img_buffer.getvalue() |
|
base64_bytes = base64.b64encode(byte_data) |
|
base64_str = base64_bytes.decode() |
|
url = "https://www.modelscope.cn/api/v1/studio/damo/ofa_ocr_pipeline/gradio/api/predict/" |
|
payload = json.dumps({ |
|
"data": [f"data:image/jpeg;base64,{base64_str}"], |
|
"dataType": ["image"] |
|
}) |
|
headers = { |
|
'Content-Type': 'application/json' |
|
} |
|
|
|
response = requests.request("POST", url, headers=headers, data=payload) |
|
jobj = json.loads(response.text) |
|
return jobj |
|
|
|
if __name__ == '__main__': |
|
|
|
img_path = 'manga' |
|
subdir_path = os.path.join(os.getcwd(), img_path) |
|
|
|
|
|
image_files = [] |
|
for root, dirs, files in os.walk(subdir_path): |
|
for file in files: |
|
if file.endswith(".jpg") or file.endswith(".png"): |
|
image_files.append(os.path.relpath(os.path.join(root, file))) |
|
for image_path in image_files: |
|
result = ocr(image_path) |
|
if 'error' in result: |
|
print("发现问题图片,需要删除以过审:",image_path) |
|
os.remove(image_path) |
|
else: |
|
print(image_path, "图片没有问题") |
|
|
|
|
|
|