#!/usr/bin/env python import os import re import sys from pathlib import Path def remove_boys(target_dir='.'): # Use pathlib to handle the directory target_path = Path(target_dir) # Walk through the directory and its subdirectories using glob for file_path in target_path.rglob('*.txt'): with open(file_path, 'r+', encoding='utf-8') as f: content = f.read() # Remove occurrences of [1-9]boy, [1-9]boys, [1-9]girl, and [1-9]girls along with a comma and space character content = re.sub(r',\s*([1-9]boy|[1-9]boys|[1-9]girl|[1-9]girls)', '', content) # Remove any remaining occurrences of the tags without the comma and space content = re.sub(r'([1-9]boy|[1-9]boys|[1-9]girl|[1-9]girls)', '', content) # Remove any trailing spaces and commas content = re.sub(r',\s*,', ',', content) # Remove double commas content = re.sub(r',\s*$', '', content) # Remove trailing comma content = re.sub(r'^\s*,', '', content) # Remove leading comma content = re.sub(r'\s*,\s*', ', ', content) # Normalize comma spacing # Remove any leftover commas at the end of the line content = re.sub(r',\s*$', '', content) # Remove any leftover commas at the beginning of the line content = re.sub(r'^\s*,', '', content) # Remove any leftover commas in the middle of the line content = re.sub(r',\s*,', ',', content) f.seek(0) f.write(content) f.truncate() if __name__ == "__main__": # Get the directory from the command line argument or use the current directory target_dir = sys.argv[1] if len(sys.argv) > 1 else '.' remove_boys(target_dir)