File size: 2,401 Bytes
c87716a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
#!/bin/bash
# To be run in the main checkpoints directory
# Define the step number for the new model checkpoint path
TARGET_STEP="91000"
# Function to update a single checkpoint file
update_checkpoint_file() {
local checkpoint_file="$1"
local tmp_file
tmp_file=$(mktemp)
# Initialize counters for the paths and timestamps
path_count=0
timestamp_count=0
# Read the file line by line
while IFS= read -r line; do
# Count the number of path and timestamp entries
if [[ $line == all_model_checkpoint_paths* ]]; then
path_count=$((path_count + 1))
elif [[ $line == all_model_checkpoint_timestamps* ]]; then
timestamp_count=$((timestamp_count + 1))
fi
# Add the line to the temporary file
echo "$line" >> "$tmp_file"
done < "$checkpoint_file"
# Remove the last two paths and timestamps and rename the model checkpoint path
new_path_count=0
new_timestamp_count=0
while IFS= read -r line; do
if [[ $line == all_model_checkpoint_paths* ]]; then
new_path_count=$((new_path_count + 1))
# Skip the last two paths
if (( new_path_count > path_count - 1 )); then
continue
fi
elif [[ $line == all_model_checkpoint_timestamps* ]]; then
new_timestamp_count=$((new_timestamp_count + 1))
# Skip the last two timestamps
if (( new_timestamp_count > timestamp_count - 1 )); then
continue
fi
elif [[ $line == model_checkpoint_path* ]]; then
# Rename the model checkpoint path to the target step
line="model_checkpoint_path: \"iter_ckpt_rank_$(dirname $checkpoint_file | cut -d'_' -f4)-$TARGET_STEP\""
fi
# Add the line to the final temporary file
echo "$line" >> "${tmp_file}.final"
done < "$tmp_file"
# Replace the original file with the updated content
mv "${tmp_file}.final" "$checkpoint_file"
# Clean up temporary files
rm "$tmp_file"
}
# Find all checkpoint files with the given glob pattern
for checkpoint_file in iter_ckpt_rank_*/checkpoint; do
# Backup the original checkpoint file
cp "$checkpoint_file" "${checkpoint_file}.bak"
# Update the checkpoint file
update_checkpoint_file "$checkpoint_file"
done
echo "Checkpoint files have been updated."
|