# check4sig - Dataset Caption File Watermark Detection Utility # # Purpose: # This function scans .caption files in a dataset directory to identify and edit # files containing watermark-related text. It's particularly useful for cleaning # image datasets where caption files might indicate the presence of watermarks. # # Usage: check4sig /path/to/dataset # # Function workflow: # 1. Validates the provided directory path # 2. Searches all .caption files for "signature" or "watermark" keywords # 3. Opens any matching files in nvim for manual review/editing # # Parameters: # $1 - Target directory containing .caption files # # Returns: # - 1 if directory is invalid or missing # - Opens nvim if matching files are found # - Prints message if no matches are found check4sig() { target_dir="$1" if [[ -z "$target_dir" ]]; then echo "Please provide a target directory." return 1 fi if [[ ! -d "$target_dir" ]]; then echo "The provided target directory does not exist." return 1 fi found_files=() for file in "$target_dir"/*.caption; do if [[ -f "$file" ]]; then if grep -q -e "signature" -e "watermark" "$file"; then found_files+=("$file") fi fi done if [[ ${#found_files[@]} -eq 0 ]]; then echo "No 'signature' or 'watermark' found in any .caption files." else echo "Opening files in nvim: ${found_files[@]}" nvim "${found_files[@]}" fi }