#!/bin/bash input_dir="txt" output_dir="sangraha_hi_phonemized" lang=hi num_files=50000 num_jobs=-1 process_file() { input_file="$1" output_file="$2" lang=hi # Create the output directory and its parent directories if they don't exist mkdir -p "$(dirname "$output_file")" phonemize --quiet -l $lang "$input_file" -o "$output_file" --strip --language-switch remove-flags --preserve-punctuation echo "Processed: $input_file -> $output_file" } export -f process_file # Start the timer start_time=$(date +%s) # Use GNU Parallel with find to process files in parallel find "$input_dir" -type f -name "*.txt" | head -n $num_files | parallel -j $num_jobs process_file "{}" "${output_dir}/phn_$(basename {})" # End the timer end_time=$(date +%s) # Calculate the elapsed time elapsed_time=$((end_time - start_time)) # Convert elapsed time to minutes and seconds minutes=$((elapsed_time / 60)) seconds=$((elapsed_time % 60)) # Print the benchmark results echo "Benchmark Results:" echo "Number of files processed: $num_files" echo "Number of parallel jobs: $num_jobs" echo "Elapsed time: $minutes minutes $seconds seconds"