# Experiments for ICLR 2018 paper.

[Neural Program Synthesis with Priority Queue Training](https://arxiv.org/abs/1801.03526).

Runs policy gradient (REINFORCE), priority queue training, genetic algorithm,
and uniform random search.

Run all examples below out of your top-level repo directory, i.e. where your git
clone resides.


## Just tell me how to run something and see results
```bash
# These tasks are the fastest to learn. 'echo' and 'count-down' are very
# easy. run_eval_tasks.py will do most of the work to run all the jobs.
# Should take between 10 and 30 minutes.

# How many repetitions each experiment will run. In the paper, we use 25. Less
# reps means faster experiments, but noisier results.
REPS=25

# Extra description in the job names for these experiments. Use this description
# to distinguish between multiple runs of the same experiment.
DESC="demo"

# The tasks to run.
TASKS="reverse echo-second-seq"

# The model types and max NPE.
EXPS=( pg-20M topk-20M ga-20M rand-20M )

# Where training data is saved. This is chosen by launch_training.sh. Custom
# implementations of launch_training.sh may use different locations.
MODELS_DIR="/tmp/models"

# Run run_eval_tasks.py for each experiment name in EXPS.
for exp in "${EXPS[@]}"
do
  ./single_task/run_eval_tasks.py \
      --exp "$exp" --tasks $TASKS --desc "$DESC" --reps $REPS
done

# During training or after completion, run this to aggregate results into a
# table. This is also useful for seeing how much progress has been made.
# Make sure the arguments here match the settings used above.
# Note: This can take a few minutes because it reads from every experiment
# directory.
bazel run single_task:aggregate_experiment_results -- \
  --models_dir="$MODELS_DIR" \
  --max_npe="20M" \
  --task_list="$TASKS" \
  --model_types="[('pg', '$DESC'), ('topk', '$DESC'), ('ga', '$DESC'),
                  ('rand', '$DESC')]" \
  --csv_file="/tmp/results_table.csv"
```


## Reproduce tuning results in paper
```bash
bazel build -c opt single_task:tune.par

# PG and TopK Tuning.
MAX_NPE=5000000
CONFIG="
env=c(task_cycle=['reverse-tune','remove-tune']),
agent=c(
  algorithm='pg',
  grad_clip_threshold=50.0,param_init_factor=0.5,entropy_beta=0.05,lr=1e-5,
  optimizer='rmsprop',ema_baseline_decay=0.99,topk_loss_hparam=0.0,topk=0,
  replay_temperature=1.0,alpha=0.0,eos_token=False),
timestep_limit=50,batch_size=64"

./single_task/launch_tuning.sh \
    --job_name="iclr_pg_gridsearch.reverse-remove" \
    --config="$CONFIG" \
    --max_npe="$MAX_NPE" \
    --num_workers_per_tuner=1 \
    --num_ps_per_tuner=0 \
    --num_tuners=1 \
    --num_repetitions=50 \
    --hparam_space_type="pg" \
    --stop_on_success=true
./single_task/launch_tuning.sh \
    --job_name="iclr_pg_topk_gridsearch.reverse-remove" \
    --config="$CONFIG" \
    --max_npe="$MAX_NPE" \
    --num_workers_per_tuner=1 \
    --num_ps_per_tuner=0 \
    --num_tuners=1 \
    --num_repetitions=50 \
    --hparam_space_type="pg-topk" \
    --fixed_hparams="topk=10" \
    --stop_on_success=true
./single_task/launch_tuning.sh \
    --job_name="iclr_topk_gridsearch.reverse-remove" \
    --config="$CONFIG" \
    --max_npe="$MAX_NPE" \
    --num_workers_per_tuner=1 \
    --num_ps_per_tuner=0 \
    --num_tuners=1 \
    --num_repetitions=50 \
    --hparam_space_type="topk" \
    --fixed_hparams="topk=10" \
    --stop_on_success=true

# GA Tuning.
CONFIG="
env=c(task_cycle=['reverse-tune','remove-char-tune']),
agent=c(algorithm='ga'),
timestep_limit=50"
./single_task/launch_tuning.sh \
    --job_name="iclr_ga_gridsearch.reverse-remove" \
    --config="$CONFIG" \
    --max_npe="$MAX_NPE" \
    --num_workers_per_tuner=25 \
    --num_ps_per_tuner=0 \
    --num_tuners=1 \
    --num_repetitions=50 \
    --hparam_space_type="ga" \
    --stop_on_success=true

# Aggregate tuning results. Run after tuning jobs complete.
bazel run -c opt single_task:aggregate_tuning_results -- \
    --tuning_dir="$MODELS_DIR/iclr_pg_gridsearch.reverse-remove"
bazel run -c opt single_task:aggregate_tuning_results -- \
    --tuning_dir="$MODELS_DIR/iclr_pg_topk_gridsearch.reverse-remove"
bazel run -c opt single_task:aggregate_tuning_results -- \
    --tuning_dir="$MODELS_DIR/iclr_topk_gridsearch.reverse-remove"
bazel run -c opt single_task:aggregate_tuning_results -- \
    --tuning_dir="$MODELS_DIR/iclr_ga_gridsearch.reverse-remove"
```

## Reproduce eval results in paper
```bash
DESC="v0"  # Description for each experiment. "Version 0" is a good default.
EXPS=( pg-5M topk-5M ga-5M rand-5M pg-20M topk-20M ga-20M rand-20M )
for exp in "${EXPS[@]}"
do
  ./single_task/run_eval_tasks.py \
      --exp "$exp" --iclr_tasks --desc "$DESC"
done
```

## Run single experiment
```bash
EXP="topk-20M"  # Learning algorithm + max-NPE
TASK="reverse"  # Coding task
DESC="v0"  # Description for each experiment. "Version 0" is a good default.
./single_task/run_eval_tasks.py \
    --exp "$EXP" --task "$TASK" --desc "$DESC"
```

## Fetch eval results into a table
```bash
# These arguments should match the settings you used to run the experiments.
MODELS_DIR="/tmp/models"
MAX_NPE="20M"
DESC="v0"  # Same description used in the experiments.
# MODEL_TYPES specifies each model type and the description used in their
# experiments.
MODEL_TYPES="[('pg', '$DESC'), ('topk', '$DESC'),
              ('ga', '$DESC'), ('rand', '$DESC')]"
TASKS=""  # Empty string will default to all ICLR tasks.
# To specify custom task list, give task names separated by spaces. Example:
# TASKS="reverse remove-char"
bazel run single_task:aggregate_experiment_results -- \
    --models_dir="$MODELS_DIR" \
    --max_npe="$MAX_NPE" \
    --task_list="$TASKS" \
    --model_types="$MODEL_TYPES" \
    --csv_file="/tmp/results_table.csv"
```

## Reproduce shortest code examples in paper
```bash
# Maximum NPE is higher here. We only do 1 repetition, and the algorithm needs
# time to simplify its solution.
MODELS_DIR="/tmp/models"
NPE="500M"
DESC="short-code"
./single_task/run_eval_tasks.py \
    --exp "simpl-$NPE" --desc "$DESC" --iclr_tasks --reps 1

# Aggregate best code strings. Run after training completes.
TASKS=""  # Empty string. Will default to all ICLR tasks.
bazel run single_task:aggregate_experiment_results -- \
    --models_dir="$MODELS_DIR" \
    --max_npe="$NPE" \
    --task_list="$TASKS" \
    --model_types="[('topk', '$DESC')]" \
    --data=code
```