File size: 5,779 Bytes
20d0048
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122

import requests
import json
from PIL import Image
import os


def getImgCoordinatesByMagi(image_path:"包含后缀的文件路径") -> "全部对话坐标list,失败返回none":
    headers = {
        'authority': 'rogerxavier-fastapi-t5-magi.hf.space',
        'method': 'GET',
        'scheme': 'https',
        'Accept': '*/*',
        'Accept-Encoding': 'gzip, deflate, br, zstd',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cookie': 'spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NDJhNTNiNTE2ZDRkODI5M2M5YjdiNzgiLCJ1c2VyIjoicm9nZXJ4YXZpZXIifSwiaWF0IjoxNzE2NjExMTE3LCJzdWIiOiIvc3BhY2VzL3JvZ2VyeGF2aWVyL2Zhc3RhcGlfdDVfbWFnaSIsImV4cCI6MTcxNjY5NzUxNywiaXNzIjoiaHR0cHM6Ly9odWdnaW5nZmFjZS5jbyJ9.W00jo8kiRgwCpq5aaGhaPE2RP2jLOyvfimjyIfHVhP1gs7NHkBkRzVTFqYv3TRtZoHNPTiFiI5Ehu12KP06sDQ',
        'Dnt': '1',
        'Priority': 'u=1, i',
        'Sec-Ch-Ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
        'Sec-Ch-Ua-Mobile': '?0',
        'Sec-Ch-Ua-Platform': '"Windows"',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
    }
    with open(image_path, 'rb') as file:
        image_bytes = file.read()
    files = {
        "image": image_bytes,
    }
    magi_fastapi_base = 'https://rogerxavier-fastapi-t5-magi.hf.space/getCoordinates'
    try:
        resp = requests.post(magi_fastapi_base, files=files, headers=headers) ##是仓库public后好像就api可用了
        print(resp.json())
        return resp.json()["texts"]
    except Exception as e:
        print(e)
        return None #意外情况返回


def path_to_format(old_path)->"比如/path/to/your/image/1.jpg  ->/path/to/your/image/100.jpg 经过03d":
    # 获取文件名和文件扩展名
    file_path, file_name = os.path.split(old_path)
    file_name, file_extension = os.path.splitext(file_name)
    # 提取文件名中的数字部分(假设数字部分在文件名的末尾)
    number_str = ''.join(filter(str.isdigit, file_name))
    number = int(number_str)
    # 格式化数字部分为03d格式
    formatted_number = "{:03d}".format(number)
    # 生成新的文件名
    new_file_name = f"{file_name.replace(number_str, formatted_number)}{file_extension}"
    new_absolute_path = os.path.join(file_path, new_file_name)
    return new_absolute_path


def save_img(new_save_path:"新文件的保存路径(包含后缀)",old_img_path:"旧文件路径(包含后缀)")->"void生成新的文件保存 ,传入旧文件路径是为了删除有问题的旧文件":
    print(new_save_path)
    # 原始照片文件名
    original_image = old_img_path
    #打开原始照片
    # img = Image.open(original_image)
    # text_bboxes_for_all_images = getImgCoordinatesByMagi(original_image)
    # if text_bboxes_for_all_images is not None:
    #     for index, box in enumerate(text_bboxes_for_all_images):
    #         cropped_img = img.crop(tuple(box))
    #         # 保存裁剪后的图片,并按照数字大小依次排序命名(包括本身的也要3d,比如 0.jpg - >000.000_cropped.jpg)
    #
    #         cropped_img.save(f"{os.path.splitext(new_save_path)[0]}_{index:03d}_cropped.jpg")
    # else:
    #     print("图片识别有问题,准备删除")
    #     os.remove(original_image)


    ##防止文件打开无法删除

    text_bboxes_for_all_images = getImgCoordinatesByMagi(original_image)
    if text_bboxes_for_all_images is not None:
        with Image.open(original_image) as img:
            for index, box in enumerate(text_bboxes_for_all_images):
                cropped_img = img.crop(tuple(box))
                # 保存裁剪后的图片,并按照数字大小依次排序命名(包括本身的也要3d,比如 0.jpg - >000.000_cropped.jpg)

                cropped_img.save(f"{os.path.splitext(new_save_path)[0]}_{index:03d}_cropped.jpg")
    else:
        print("图片识别有问题,准备删除")
        os.remove(original_image)


if __name__ == '__main__':
    # 获取需要裁剪的无水印漫画位置
    img_path = 'manga1'
    subdir_path = os.path.join(os.getcwd(), img_path)

    # 图片素材获取(包含子目录下所有图片)
    image_files = []
    for root, dirs, files in os.walk(subdir_path):
        for file in files:
            if file.endswith(".jpg") or file.endswith(".png"):
                image_files.append(os.path.relpath(os.path.join(root, file)))

    # 创建处理后的子目录在与image_files同级目录下
    processed_subdir_path = os.path.join(os.path.dirname(subdir_path), f"{img_path}2")#加2表示经过2阶段处理
    os.makedirs(processed_subdir_path, exist_ok=True)

    # 对image_files进行某种处理,生成新图片,并保存在处理后的子目录中
    for img_file in image_files:
        # 处理图片的代码(这里仅作示例)
        # 假设处理后的图片为new_img
        img_dir = os.path.dirname(img_file)
        new_img_dir = os.path.join(processed_subdir_path, img_dir)
        os.makedirs(new_img_dir, exist_ok=True)

        new_img_path = os.path.join(new_img_dir, os.path.basename(img_file))
        new_img_path = path_to_format(new_img_path)

        if not os.path.exists(f"{os.path.splitext(new_img_path)[0]}_{1:03d}_cropped.jpg"):
            # 如果已经处理过那么跳过 ->只看第一个数字片段就行
            # 处理图片并保存
            save_img(new_save_path=new_img_path, old_img_path=img_file)
        else:
            print(f"Skipping {new_img_path} as it already exists.")