Stoub's picture
First Push
a6befb1 verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.49734601378440857,
"min": 0.4790073335170746,
"max": 1.3264129161834717,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 14960.16796875,
"min": 14331.8994140625,
"max": 40238.0625,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989875.0,
"min": 29937.0,
"max": 989875.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989875.0,
"min": 29937.0,
"max": 989875.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.5065950751304626,
"min": -0.11977048218250275,
"max": 0.5623494386672974,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 140.32684326171875,
"min": -28.744915008544922,
"max": 156.33314514160156,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.029130082577466965,
"min": -0.039908889681100845,
"max": 0.4272623062133789,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 8.069032669067383,
"min": -10.935035705566406,
"max": 101.26116943359375,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.07173537374311469,
"min": 0.06485745379713856,
"max": 0.07504258903661845,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 1.0042952324036056,
"min": 0.5810198251213718,
"max": 1.1256388355492768,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.01669291511323699,
"min": 0.00047593139434515356,
"max": 0.017830996445740648,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.23370081158531783,
"min": 0.00666303952083215,
"max": 0.24963395024036908,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.401033247307141e-06,
"min": 7.401033247307141e-06,
"max": 0.0002948416892194375,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.00010361446546229997,
"min": 0.00010361446546229997,
"max": 0.003634496588501199,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10246697857142857,
"min": 0.10246697857142857,
"max": 0.19828056250000003,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4345377,
"min": 1.4345377,
"max": 2.6114988000000006,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.00025645115928571427,
"min": 0.00025645115928571427,
"max": 0.00982822819375,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0035903162299999996,
"min": 0.0035903162299999996,
"max": 0.12116873012000001,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.010068242438137531,
"min": 0.010068242438137531,
"max": 0.49813413619995117,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.14095538854599,
"min": 0.14095538854599,
"max": 3.9850730895996094,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 352.04819277108436,
"min": 338.35632183908046,
"max": 999.0,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 29220.0,
"min": 16480.0,
"max": 33558.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.5997517915016197,
"min": -0.999962551984936,
"max": 1.6277441670901553,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 132.77939869463444,
"min": -31.998801663517952,
"max": 140.56179805845022,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.5997517915016197,
"min": -0.999962551984936,
"max": 1.6277441670901553,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 132.77939869463444,
"min": -31.998801663517952,
"max": 140.56179805845022,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.03668121975027474,
"min": 0.03668121975027474,
"max": 9.720678557367886,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 3.044541239272803,
"min": 3.044541239272803,
"max": 165.25153547525406,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1727349601",
"python_version": "3.10.12 (main, Sep 11 2024, 15:47:36) [GCC 11.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "1.1.0.dev0",
"mlagents_envs_version": "1.1.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.4.1+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1727353088"
},
"total": 3487.5913884389997,
"count": 1,
"self": 0.6509908139992149,
"children": {
"run_training.setup": {
"total": 0.08190917499996431,
"count": 1,
"self": 0.08190917499996431
},
"TrainerController.start_learning": {
"total": 3486.8584884500006,
"count": 1,
"self": 2.4270497010434156,
"children": {
"TrainerController._reset_env": {
"total": 2.5115042249999533,
"count": 1,
"self": 2.5115042249999533
},
"TrainerController.advance": {
"total": 3481.8374700359577,
"count": 63716,
"self": 2.5243765679938406,
"children": {
"env_step": {
"total": 2327.432728138924,
"count": 63716,
"self": 2155.217982371914,
"children": {
"SubprocessEnvManager._take_step": {
"total": 170.77722702199299,
"count": 63716,
"self": 7.4168800718452985,
"children": {
"TorchPolicy.evaluate": {
"total": 163.3603469501477,
"count": 62566,
"self": 163.3603469501477
}
}
},
"workers": {
"total": 1.4375187450173144,
"count": 63716,
"self": 0.0,
"children": {
"worker_root": {
"total": 3478.8467117769046,
"count": 63716,
"is_parallel": true,
"self": 1516.0120233359617,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.003494481999950949,
"count": 1,
"is_parallel": true,
"self": 0.0013336530005290115,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0021608289994219376,
"count": 8,
"is_parallel": true,
"self": 0.0021608289994219376
}
}
},
"UnityEnvironment.step": {
"total": 0.06367670800000269,
"count": 1,
"is_parallel": true,
"self": 0.0008998499997687759,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0005558069997277926,
"count": 1,
"is_parallel": true,
"self": 0.0005558069997277926
},
"communicator.exchange": {
"total": 0.06013900600009947,
"count": 1,
"is_parallel": true,
"self": 0.06013900600009947
},
"steps_from_proto": {
"total": 0.0020820450004066515,
"count": 1,
"is_parallel": true,
"self": 0.00045236900041345507,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0016296759999931965,
"count": 8,
"is_parallel": true,
"self": 0.0016296759999931965
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1962.834688440943,
"count": 63715,
"is_parallel": true,
"self": 51.3737137449134,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 31.976054763074444,
"count": 63715,
"is_parallel": true,
"self": 31.976054763074444
},
"communicator.exchange": {
"total": 1746.5580799419095,
"count": 63715,
"is_parallel": true,
"self": 1746.5580799419095
},
"steps_from_proto": {
"total": 132.92683999104565,
"count": 63715,
"is_parallel": true,
"self": 29.054715782005587,
"children": {
"_process_rank_one_or_two_observation": {
"total": 103.87212420904007,
"count": 509720,
"is_parallel": true,
"self": 103.87212420904007
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 1151.8803653290397,
"count": 63716,
"self": 4.622221589957007,
"children": {
"process_trajectory": {
"total": 175.45043482209485,
"count": 63716,
"self": 175.27301135709467,
"children": {
"RLTrainer._checkpoint": {
"total": 0.17742346500017447,
"count": 2,
"self": 0.17742346500017447
}
}
},
"_update_policy": {
"total": 971.8077089169878,
"count": 455,
"self": 387.2976176189777,
"children": {
"TorchPPOOptimizer.update": {
"total": 584.5100912980101,
"count": 22830,
"self": 584.5100912980101
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.1069996617152356e-06,
"count": 1,
"self": 1.1069996617152356e-06
},
"TrainerController._save_models": {
"total": 0.08246338099979766,
"count": 1,
"self": 0.0021492160003617755,
"children": {
"RLTrainer._checkpoint": {
"total": 0.08031416499943589,
"count": 1,
"self": 0.08031416499943589
}
}
}
}
}
}
}