import requests import json from PIL import Image import os from faker import Faker fake = Faker() fake_user_agent = fake.user_agent() fake_headers = { 'scheme': 'https', 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, br, zstd', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Dnt': '1', 'Priority': 'u=1, i', 'Sec-Ch-Ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"', 'Sec-Ch-Ua-Mobile': '?0', 'Sec-Ch-Ua-Platform': '"Windows"', 'Sec-Fetch-Dest': 'empty', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Site': 'same-origin', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36' } fake_headers['User-Agent'] = fake_user_agent def getImgCoordinatesByMagi(image_path:"包含后缀的文件路径",fake_headers:dict=None) -> "全部对话坐标list,失败返回none": # 创建一个Session对象 session = requests.Session() if fake_headers is not None: session.headers = fake_headers with open(image_path, 'rb') as file: image_bytes = file.read() files = { "image": image_bytes, } magi_fastapi_base = 'https://rogerxavier-fastapi-t5-magi.hf.space/getCoordinates' try: resp = session.post(magi_fastapi_base, files=files) ##用faker header试试 print(resp.json()) return resp.json()["panels"] except Exception as e: print(e) return None #意外情况返回 def path_to_format(old_path)->"比如/path/to/your/image/1.jpg ->/path/to/your/image/100.jpg 经过03d": # 获取文件名和文件扩展名 file_path, file_name = os.path.split(old_path) file_name, file_extension = os.path.splitext(file_name) # 提取文件名中的数字部分(假设数字部分在文件名的末尾) number_str = ''.join(filter(str.isdigit, file_name)) number = int(number_str) # 格式化数字部分为03d格式 formatted_number = "{:03d}".format(number) # 生成新的文件名 new_file_name = f"{file_name.replace(number_str, formatted_number)}{file_extension}" new_absolute_path = os.path.join(file_path, new_file_name) return new_absolute_path def save_img(new_save_path:"新文件的保存路径(包含后缀)",old_img_path:"旧文件路径(包含后缀)")->"void生成新的文件保存 ,传入旧文件路径是为了删除有问题的旧文件": print(new_save_path) # 原始照片文件名 original_image = old_img_path ##防止文件打开无法删除 text_bboxes_for_all_images = getImgCoordinatesByMagi(image_path =original_image,fake_headers = fake_headers) if text_bboxes_for_all_images is not None: with Image.open(original_image) as img: for index, box in enumerate(text_bboxes_for_all_images): cropped_img = img.crop(tuple(box)) # 保存裁剪后的图片,并按照数字大小依次排序命名(包括本身的也要3d,比如 0.jpg - >000.000_cropped.jpg) cropped_img.save(f"{os.path.splitext(new_save_path)[0]}_{index:03d}_cropped.jpg") else: print("图片识别有问题,准备删除") os.remove(original_image) if __name__ == '__main__': # 获取需要裁剪的无水印漫画位置 img_path = 'manga1' subdir_path = os.path.join(os.getcwd(), img_path) # 图片素材获取(包含子目录下所有图片) image_files = [] for root, dirs, files in os.walk(subdir_path): for file in files: if file.endswith(".jpg") or file.endswith(".png"): image_files.append(os.path.relpath(os.path.join(root, file))) # 创建处理后的子目录在与image_files同级目录下 processed_subdir_path = os.path.join(os.path.dirname(subdir_path), f"{img_path}2")#加2表示经过2阶段处理 os.makedirs(processed_subdir_path, exist_ok=True) # 对image_files进行某种处理,生成新图片,并保存在处理后的子目录中 for img_file in image_files: # 处理图片的代码(这里仅作示例) # 假设处理后的图片为new_img img_dir = os.path.dirname(img_file) new_img_dir = os.path.join(processed_subdir_path, img_dir) os.makedirs(new_img_dir, exist_ok=True) new_img_path = os.path.join(new_img_dir, os.path.basename(img_file)) new_img_path = path_to_format(new_img_path) if not os.path.exists(f"{os.path.splitext(new_img_path)[0]}_{1:03d}_cropped.jpg"): # 如果已经处理过那么跳过 ->只看第一个数字片段就行 # 处理图片并保存 save_img(new_save_path=new_img_path, old_img_path=img_file) else: print(f"Skipping {new_img_path} as it already exists.")