File size: 3,277 Bytes
dced7d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
algo:
  ddpg:
    params:
      target_update_tau: 0.01
    policy:
      exploration:
        sigma: 0.3
        theta: 0.15
  deterministic_params:
    buffer_batch_size: 32
    min_buffer_size: 10000
    n_train_steps: 500
    qf_lr: 0.0001
    steps_per_epoch: 1
  dqn:
    params:
      clip_gradient: 10
      deterministic_eval: true
      double_q: false
      target_update_freq: 2
    policy:
      exploration:
        decay_ratio: 0.5
        max_epsilon: 1.0
        min_epsilon: 0.05
  general_params:
    discount: 0.99
  package: garage
  policy:
    hidden_sizes:
    - 128
    - 128
    pretrained_policy: null
  ppo:
    params:
      center_adv: false
    tanhnormal: false
  pretrain:
    additional_config: null
    algo_to_pretrain: null
    params:
      episodes_per_batch: 10
      loss: log_prob
      policy_lr: 0.01
    pretrain_algo: rbc
  replay_buffer:
    buffer_size: 200000
  rnd:
    batch_size: 64
    bound_reward_weight: cosine
    bound_reward_weight_initial_ratio: 0.999999
    bound_reward_weight_transient_epochs: 10
    hidden_sizes:
    - 64
    - 64
    intrinsic_reward_weight: 0.0001
    n_train_steps: 32
    output_dim: 128
    predictor_lr: 0.001
    standardize_extrinsic_reward: true
    standardize_intrinsic_reward: true
  sampler:
    n_workers: 16
    type: ray
  train:
    batch_size: 50000
    n_epochs: 100
    steps_per_epoch: 32
  type: ppo
context:
  disable_logging: false
  experiment_name: null
  log_dir:
    from_keys:
    - microgrid.config.scenario
    - microgrid.methods.set_forecaster.forecaster
    - microgrid.methods.set_module_attrs.battery_transition_model
    - context.seed
    - env.domain_randomization.noise_std
    - algo.ppo.tanhnormal
    - algo.rnd.intrinsic_reward_weight
    parent: /home/ahalev/data/GridRL/paper_experiments
    use_existing_dir: false
  seed: 1
  snapshot_gap: 10
  verbose: 0
  wandb:
    api_key_file: ../../local/wandb_api_key.txt
    group: null
    log_density: 1
    plot_baseline:
    - mpc
    - rbc
    username: ahalev
env:
  cls: DiscreteMicrogridEnv
  domain_randomization:
    noise_std: 0.01
    relative_noise: true
  forced_genset: null
  net_load:
    slack_module: grid
    use: true
  observation_keys:
  - soc
  - net_load
  - import_price_current
  - import_price_forecast_0
  - import_price_forecast_1
  - import_price_forecast_2
  - import_price_forecast_3
  - import_price_forecast_4
  - import_price_forecast_5
  - import_price_forecast_6
  - import_price_forecast_7
  - import_price_forecast_8
  - import_price_forecast_9
  - import_price_forecast_10
microgrid:
  attributes:
    reward_shaping_func: !BaselineShaper
      baseline_module: false
      module:
      - grid
      - 0
  config:
    scenario: 6
  methods:
    set_forecaster:
      forecast_horizon: 23
      forecaster: 0.0
      forecaster_increase_uncertainty: true
      forecaster_relative_noise: true
    set_module_attrs:
      battery_transition_model: null
      normalized_action_bounds:
      - 0.0
      - 1.0
  trajectory:
    evaluate:
      final_step: -1
      initial_step: 5840
      trajectory_func: null
    train:
      final_step: 5840
      initial_step: 0
      trajectory_func: !FixedLengthStochasticTrajectory
        trajectory_length: 720
verbose: 1