|
|
|
import requests |
|
import json |
|
from PIL import Image |
|
import os |
|
|
|
|
|
def getImgCoordinatesByMagi(image_path:"包含后缀的文件路径") -> "全部对话坐标list,失败返回none": |
|
headers = { |
|
'authority': 'rogerxavier-fastapi-t5-magi.hf.space', |
|
'method': 'GET', |
|
'scheme': 'https', |
|
'Accept': '*/*', |
|
'Accept-Encoding': 'gzip, deflate, br, zstd', |
|
'Accept-Language': 'zh-CN,zh;q=0.9', |
|
'Cookie': 'spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NDJhNTNiNTE2ZDRkODI5M2M5YjdiNzgiLCJ1c2VyIjoicm9nZXJ4YXZpZXIifSwiaWF0IjoxNzE2NjExMTE3LCJzdWIiOiIvc3BhY2VzL3JvZ2VyeGF2aWVyL2Zhc3RhcGlfdDVfbWFnaSIsImV4cCI6MTcxNjY5NzUxNywiaXNzIjoiaHR0cHM6Ly9odWdnaW5nZmFjZS5jbyJ9.W00jo8kiRgwCpq5aaGhaPE2RP2jLOyvfimjyIfHVhP1gs7NHkBkRzVTFqYv3TRtZoHNPTiFiI5Ehu12KP06sDQ', |
|
'Dnt': '1', |
|
'Priority': 'u=1, i', |
|
'Sec-Ch-Ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"', |
|
'Sec-Ch-Ua-Mobile': '?0', |
|
'Sec-Ch-Ua-Platform': '"Windows"', |
|
'Sec-Fetch-Dest': 'empty', |
|
'Sec-Fetch-Mode': 'cors', |
|
'Sec-Fetch-Site': 'same-origin', |
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36' |
|
} |
|
with open(image_path, 'rb') as file: |
|
image_bytes = file.read() |
|
files = { |
|
"image": image_bytes, |
|
} |
|
magi_fastapi_base = 'https://rogerxavier-fastapi-t5-magi.hf.space/getCoordinates' |
|
try: |
|
resp = requests.post(magi_fastapi_base, files=files, headers=headers) |
|
print(resp.json()) |
|
return resp.json()["texts"] |
|
except Exception as e: |
|
print(e) |
|
return None |
|
|
|
|
|
def path_to_format(old_path)->"比如/path/to/your/image/1.jpg ->/path/to/your/image/100.jpg 经过03d": |
|
|
|
file_path, file_name = os.path.split(old_path) |
|
file_name, file_extension = os.path.splitext(file_name) |
|
|
|
number_str = ''.join(filter(str.isdigit, file_name)) |
|
number = int(number_str) |
|
|
|
formatted_number = "{:03d}".format(number) |
|
|
|
new_file_name = f"{file_name.replace(number_str, formatted_number)}{file_extension}" |
|
new_absolute_path = os.path.join(file_path, new_file_name) |
|
return new_absolute_path |
|
|
|
|
|
def save_img(new_save_path:"新文件的保存路径(包含后缀)",old_img_path:"旧文件路径(包含后缀)")->"void生成新的文件保存 ,传入旧文件路径是为了删除有问题的旧文件": |
|
print(new_save_path) |
|
|
|
original_image = old_img_path |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_bboxes_for_all_images = getImgCoordinatesByMagi(original_image) |
|
if text_bboxes_for_all_images is not None: |
|
with Image.open(original_image) as img: |
|
for index, box in enumerate(text_bboxes_for_all_images): |
|
cropped_img = img.crop(tuple(box)) |
|
|
|
|
|
cropped_img.save(f"{os.path.splitext(new_save_path)[0]}_{index:03d}_cropped.jpg") |
|
else: |
|
print("图片识别有问题,准备删除") |
|
os.remove(original_image) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
img_path = 'manga1' |
|
subdir_path = os.path.join(os.getcwd(), img_path) |
|
|
|
|
|
image_files = [] |
|
for root, dirs, files in os.walk(subdir_path): |
|
for file in files: |
|
if file.endswith(".jpg") or file.endswith(".png"): |
|
image_files.append(os.path.relpath(os.path.join(root, file))) |
|
|
|
|
|
processed_subdir_path = os.path.join(os.path.dirname(subdir_path), f"{img_path}2") |
|
os.makedirs(processed_subdir_path, exist_ok=True) |
|
|
|
|
|
for img_file in image_files: |
|
|
|
|
|
img_dir = os.path.dirname(img_file) |
|
new_img_dir = os.path.join(processed_subdir_path, img_dir) |
|
os.makedirs(new_img_dir, exist_ok=True) |
|
|
|
new_img_path = os.path.join(new_img_dir, os.path.basename(img_file)) |
|
new_img_path = path_to_format(new_img_path) |
|
|
|
if not os.path.exists(f"{os.path.splitext(new_img_path)[0]}_{1:03d}_cropped.jpg"): |
|
|
|
|
|
save_img(new_save_path=new_img_path, old_img_path=img_file) |
|
else: |
|
print(f"Skipping {new_img_path} as it already exists.") |