File size: 8,923 Bytes
27725b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
β β
βββ¬ββ‘ βββ¬ββ
βββ¬ββββββ β¬ββββββ¬β
ββ¬ββββββ¬β ββ¬βββββββ βββ
β¬β¬β¬β¬ββββ¦β ββ¬ββββ£β£β£β¬ ββ£β£β¬ ββ£β£β£ βββ ββ£β£
β¬β¬β¬β¬β¬β¬β¬β¬βββ¬ββββ¬βͺβββ£β£β£β£β£β£β£β¬ ββ£β£β¬ ββ£β£β£ ββ£β£βββ£β£β£β β£β£β£ β£β£β£β£β£β£ ββ£β£β β£β£β£
β¬β¬β¬β¬β ββ¬β¬β¬β¬βββ£β£β£ββ β«β£β£β£β¬ ββ£β£β¬ ββ£β£β£ ββ£β£β£β ββ£β£β£ β£β£β£ βββ£β£ββ β«β£β£ ββ£β£
β¬β¬β¬β¬β ββ¬β¬β£β£ β«β£β£β£β¬ ββ£β£β¬ ββ£β£β£ ββ£β£β¬ β£β£β£ β£β£β£ ββ£β£ β£β£β£ββ£β£β
β¬β¬β¬β β¬β¬β£β£ βββ£β£β¬ ββ£β£β£βββββ£β£β£β ββ£β£β¬ β£β£β£ β£β£β£ ββ£β£β¦β β£β£β£β£β£
β ββ¦β β¬β¬β£β£ ββββ βββ£β£β£β£ββ ββββ βββ βββ ββ£β£β£ ββ£β£β£
β©β¬β¬β¬β¬β¬β¬β¦β¦β¬β¬β£β£ββ£β£β£β£β£β£β£β β«β£β£β£β£
ββ¬β¬β¬β¬β¬β¬β¬β£β£β£β£β£β£ββ
ββ¬β¬β¬β£β£β£β
β
Version information:
ml-agents: 1.1.0,
ml-agents-envs: 1.1.0,
Communicator API: 1.5.0,
PyTorch: 2.5.1+cu124
[INFO] Connected to Unity environment with package version 2.2.1-exp.1 and communication version 1.5.0
[INFO] Connected new brain: Pyramids?team=0
[INFO] Hyperparameters for behavior name Pyramids:
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
shared_critic: False
learning_rate_schedule: linear
beta_schedule: linear
epsilon_schedule: linear
checkpoint_interval: 500000
network_settings:
normalize: False
hidden_units: 512
num_layers: 2
vis_encode_type: simple
memory: None
goal_conditioning_type: hyper
deterministic: False
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
network_settings:
normalize: False
hidden_units: 128
num_layers: 2
vis_encode_type: simple
memory: None
goal_conditioning_type: hyper
deterministic: False
rnd:
gamma: 0.99
strength: 0.01
network_settings:
normalize: False
hidden_units: 64
num_layers: 3
vis_encode_type: simple
memory: None
goal_conditioning_type: hyper
deterministic: False
learning_rate: 0.0001
encoding_size: None
init_path: None
keep_checkpoints: 5
even_checkpoints: False
max_steps: 1500000
time_horizon: 128
summary_freq: 30000
threaded: False
self_play: None
behavioral_cloning: None
[INFO] Pyramids. Step: 30000. Time Elapsed: 148.003 s. Mean Reward: -1.000. Std of Reward: 0.000. Training.
[INFO] Pyramids. Step: 60000. Time Elapsed: 265.467 s. Mean Reward: -0.933. Std of Reward: 0.372. Training.
[INFO] Pyramids. Step: 90000. Time Elapsed: 367.221 s. Mean Reward: -0.710. Std of Reward: 0.798. Training.
[INFO] Pyramids. Step: 120000. Time Elapsed: 464.638 s. Mean Reward: -0.776. Std of Reward: 0.720. Training.
[INFO] Pyramids. Step: 150000. Time Elapsed: 558.278 s. Mean Reward: -0.767. Std of Reward: 0.704. Training.
[INFO] Pyramids. Step: 180000. Time Elapsed: 647.350 s. Mean Reward: -0.735. Std of Reward: 0.702. Training.
[INFO] Pyramids. Step: 210000. Time Elapsed: 729.384 s. Mean Reward: -0.734. Std of Reward: 0.718. Training.
[INFO] Pyramids. Step: 240000. Time Elapsed: 811.080 s. Mean Reward: -0.770. Std of Reward: 0.717. Training.
[INFO] Pyramids. Step: 270000. Time Elapsed: 892.234 s. Mean Reward: -0.677. Std of Reward: 0.828. Training.
[INFO] Pyramids. Step: 300000. Time Elapsed: 972.033 s. Mean Reward: -0.623. Std of Reward: 0.880. Training.
[INFO] Pyramids. Step: 330000. Time Elapsed: 1052.993 s. Mean Reward: -0.547. Std of Reward: 0.933. Training.
[INFO] Pyramids. Step: 360000. Time Elapsed: 1133.943 s. Mean Reward: -0.255. Std of Reward: 1.081. Training.
[INFO] Pyramids. Step: 390000. Time Elapsed: 1214.047 s. Mean Reward: 0.361. Std of Reward: 1.218. Training.
[INFO] Pyramids. Step: 420000. Time Elapsed: 1295.921 s. Mean Reward: 0.438. Std of Reward: 1.226. Training.
[INFO] Pyramids. Step: 450000. Time Elapsed: 1379.688 s. Mean Reward: 0.412. Std of Reward: 1.220. Training.
[INFO] Pyramids. Step: 480000. Time Elapsed: 1462.029 s. Mean Reward: 0.791. Std of Reward: 1.133. Training.
[INFO] Exported ./results/Pyramids Training/Pyramids/Pyramids-499993.onnx
[INFO] Pyramids. Step: 510000. Time Elapsed: 1548.600 s. Mean Reward: 1.152. Std of Reward: 0.981. Training.
[INFO] Pyramids. Step: 540000. Time Elapsed: 1633.251 s. Mean Reward: 1.311. Std of Reward: 0.824. Training.
[INFO] Pyramids. Step: 570000. Time Elapsed: 1721.782 s. Mean Reward: 1.343. Std of Reward: 0.833. Training.
[INFO] Pyramids. Step: 600000. Time Elapsed: 1805.904 s. Mean Reward: 1.378. Std of Reward: 0.772. Training.
[INFO] Pyramids. Step: 630000. Time Elapsed: 1893.478 s. Mean Reward: 1.566. Std of Reward: 0.591. Training.
[INFO] Pyramids. Step: 660000. Time Elapsed: 1977.764 s. Mean Reward: 1.453. Std of Reward: 0.686. Training.
[INFO] Pyramids. Step: 690000. Time Elapsed: 2066.482 s. Mean Reward: 1.632. Std of Reward: 0.319. Training.
[INFO] Pyramids. Step: 720000. Time Elapsed: 2151.482 s. Mean Reward: 1.549. Std of Reward: 0.660. Training.
[INFO] Pyramids. Step: 750000. Time Elapsed: 2240.531 s. Mean Reward: 1.592. Std of Reward: 0.536. Training.
[INFO] Pyramids. Step: 780000. Time Elapsed: 2328.085 s. Mean Reward: 1.706. Std of Reward: 0.193. Training.
[INFO] Pyramids. Step: 810000. Time Elapsed: 2411.292 s. Mean Reward: 1.578. Std of Reward: 0.580. Training.
[INFO] Pyramids. Step: 840000. Time Elapsed: 2501.359 s. Mean Reward: 1.682. Std of Reward: 0.335. Training.
[INFO] Pyramids. Step: 870000. Time Elapsed: 2586.919 s. Mean Reward: 1.659. Std of Reward: 0.333. Training.
[INFO] Pyramids. Step: 900000. Time Elapsed: 2680.870 s. Mean Reward: 1.629. Std of Reward: 0.434. Training.
[INFO] Pyramids. Step: 930000. Time Elapsed: 2767.200 s. Mean Reward: 1.597. Std of Reward: 0.560. Training.
[INFO] Pyramids. Step: 960000. Time Elapsed: 2857.335 s. Mean Reward: 1.724. Std of Reward: 0.166. Training.
[INFO] Pyramids. Step: 990000. Time Elapsed: 2941.618 s. Mean Reward: 1.619. Std of Reward: 0.553. Training.
[INFO] Exported ./results/Pyramids Training/Pyramids/Pyramids-999946.onnx
[INFO] Pyramids. Step: 1020000. Time Elapsed: 3028.674 s. Mean Reward: 1.678. Std of Reward: 0.411. Training.
[INFO] Pyramids. Step: 1050000. Time Elapsed: 3118.122 s. Mean Reward: 1.692. Std of Reward: 0.402. Training.
[INFO] Pyramids. Step: 1080000. Time Elapsed: 3216.680 s. Mean Reward: 1.727. Std of Reward: 0.183. Training.
[INFO] Pyramids. Step: 1110000. Time Elapsed: 3311.014 s. Mean Reward: 1.661. Std of Reward: 0.489. Training.
[INFO] Pyramids. Step: 1140000. Time Elapsed: 3403.759 s. Mean Reward: 1.660. Std of Reward: 0.528. Training.
[INFO] Pyramids. Step: 1170000. Time Elapsed: 3502.028 s. Mean Reward: 1.663. Std of Reward: 0.472. Training.
[INFO] Pyramids. Step: 1200000. Time Elapsed: 3593.599 s. Mean Reward: 1.724. Std of Reward: 0.368. Training.
[INFO] Pyramids. Step: 1230000. Time Elapsed: 3696.908 s. Mean Reward: 1.726. Std of Reward: 0.294. Training.
[INFO] Pyramids. Step: 1260000. Time Elapsed: 3794.103 s. Mean Reward: 1.698. Std of Reward: 0.393. Training.
[INFO] Pyramids. Step: 1290000. Time Elapsed: 3880.079 s. Mean Reward: 1.725. Std of Reward: 0.296. Training.
[INFO] Pyramids. Step: 1320000. Time Elapsed: 3962.680 s. Mean Reward: 1.721. Std of Reward: 0.297. Training.
[INFO] Pyramids. Step: 1350000. Time Elapsed: 4045.175 s. Mean Reward: 1.770. Std of Reward: 0.140. Training.
[INFO] Pyramids. Step: 1380000. Time Elapsed: 4137.379 s. Mean Reward: 1.735. Std of Reward: 0.287. Training.
[INFO] Pyramids. Step: 1410000. Time Elapsed: 4225.323 s. Mean Reward: 1.680. Std of Reward: 0.395. Training.
[INFO] Pyramids. Step: 1440000. Time Elapsed: 4312.635 s. Mean Reward: 1.731. Std of Reward: 0.297. Training.
[INFO] Pyramids. Step: 1470000. Time Elapsed: 4398.072 s. Mean Reward: 1.720. Std of Reward: 0.380. Training.
[INFO] Pyramids. Step: 1500000. Time Elapsed: 4495.374 s. Mean Reward: 1.770. Std of Reward: 0.128. Training.
[INFO] Exported ./results/Pyramids Training/Pyramids/Pyramids-1499929.onnx
[INFO] Exported ./results/Pyramids Training/Pyramids/Pyramids-1500057.onnx
[INFO] Copied ./results/Pyramids Training/Pyramids/Pyramids-1500057.onnx to ./results/Pyramids Training/Pyramids.onnx.
|