File size: 3,277 Bytes
dced7d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
algo:
ddpg:
params:
target_update_tau: 0.01
policy:
exploration:
sigma: 0.3
theta: 0.15
deterministic_params:
buffer_batch_size: 32
min_buffer_size: 10000
n_train_steps: 500
qf_lr: 0.0001
steps_per_epoch: 1
dqn:
params:
clip_gradient: 10
deterministic_eval: true
double_q: false
target_update_freq: 2
policy:
exploration:
decay_ratio: 0.5
max_epsilon: 1.0
min_epsilon: 0.05
general_params:
discount: 0.99
package: garage
policy:
hidden_sizes:
- 128
- 128
pretrained_policy: null
ppo:
params:
center_adv: false
tanhnormal: false
pretrain:
additional_config: null
algo_to_pretrain: null
params:
episodes_per_batch: 10
loss: log_prob
policy_lr: 0.01
pretrain_algo: rbc
replay_buffer:
buffer_size: 200000
rnd:
batch_size: 64
bound_reward_weight: cosine
bound_reward_weight_initial_ratio: 0.999999
bound_reward_weight_transient_epochs: 10
hidden_sizes:
- 64
- 64
intrinsic_reward_weight: 0.0001
n_train_steps: 32
output_dim: 128
predictor_lr: 0.001
standardize_extrinsic_reward: true
standardize_intrinsic_reward: true
sampler:
n_workers: 16
type: ray
train:
batch_size: 50000
n_epochs: 100
steps_per_epoch: 32
type: ppo
context:
disable_logging: false
experiment_name: null
log_dir:
from_keys:
- microgrid.config.scenario
- microgrid.methods.set_forecaster.forecaster
- microgrid.methods.set_module_attrs.battery_transition_model
- context.seed
- env.domain_randomization.noise_std
- algo.ppo.tanhnormal
- algo.rnd.intrinsic_reward_weight
parent: /home/ahalev/data/GridRL/paper_experiments
use_existing_dir: false
seed: 1
snapshot_gap: 10
verbose: 0
wandb:
api_key_file: ../../local/wandb_api_key.txt
group: null
log_density: 1
plot_baseline:
- mpc
- rbc
username: ahalev
env:
cls: DiscreteMicrogridEnv
domain_randomization:
noise_std: 0.01
relative_noise: true
forced_genset: null
net_load:
slack_module: grid
use: true
observation_keys:
- soc
- net_load
- import_price_current
- import_price_forecast_0
- import_price_forecast_1
- import_price_forecast_2
- import_price_forecast_3
- import_price_forecast_4
- import_price_forecast_5
- import_price_forecast_6
- import_price_forecast_7
- import_price_forecast_8
- import_price_forecast_9
- import_price_forecast_10
microgrid:
attributes:
reward_shaping_func: !BaselineShaper
baseline_module: false
module:
- grid
- 0
config:
scenario: 6
methods:
set_forecaster:
forecast_horizon: 23
forecaster: 0.0
forecaster_increase_uncertainty: true
forecaster_relative_noise: true
set_module_attrs:
battery_transition_model: null
normalized_action_bounds:
- 0.0
- 1.0
trajectory:
evaluate:
final_step: -1
initial_step: 5840
trajectory_func: null
train:
final_step: 5840
initial_step: 0
trajectory_func: !FixedLengthStochasticTrajectory
trajectory_length: 720
verbose: 1
|