meg-huggingface
Testing whether timeout for taking too long works.
bee1453
raw
history blame
1.61 kB
#!/bin/bash
export SPACE="AIEnergyScore/launch-computation-example"
echo "Not checking h100 -- already know it's not there."
#python /check_h100.py
echo "Attempting to run."
#if [[ $? = 0 ]]; then
# For each line in the requests dataset....
python /parse_requests.py | while read -r line; do
# Read the name of the model and the experiment.
IFS="," read backend_model experiment_name <<< "${line}"
echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
# Initialize the directory for output.
now=$(date +%Y-%m-%d-%H-%M-%S)
run_dir="./runs/${experiment_name}/${backend_model}/${now}"
mkdir -p "$run_dir"
# Let the benchmarking begin!
timeout 5 optimum-benchmark --config-name "${experiment_name}" --config-dir /optimum-benchmark/examples/energy_star/ backend.model="${backend_model}" backend.processor="${backend_model}" hydra.run.dir="${run_dir}" 2> "${run_dir}/error.log" ||
# If the benchmark fails, update accordingly. Remove the run directory for that specific model.
(python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}" && rm -rf $run_dir)
if [ $? -eq 124 ]; then
# Timeout occurred
echo "timeout occurred"
python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}" --reason "TAKES_TOO_LONG" && rm -rf $run_dir
fi
done
echo "Finished; uploading dataset results"
python /create_results.py ./runs
echo "Uploading all output from the /runs folder."
python /upload_run_folder.py --run_dir "/runs"
# Pausing space
echo "Pausing space."
python /pause_space.py
echo "Done."