z4x's picture
worse
7c99853
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 3.132392168045044,
"min": 2.7980947494506836,
"max": 3.295198917388916,
"count": 3000
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 133414.84375,
"min": 44148.27734375,
"max": 197553.390625,
"count": 3000
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 981.5555555555555,
"min": 384.2,
"max": 999.0,
"count": 3000
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 35336.0,
"min": 27500.0,
"max": 35792.0,
"count": 3000
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1174.4800294470729,
"min": 1152.334753477878,
"max": 1217.6819534744825,
"count": 2841
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 4697.9201177882915,
"min": 2308.7556965116414,
"max": 36002.476069656,
"count": 2841
},
"SoccerTwos.Step.mean": {
"value": 47999800.0,
"min": 15620.0,
"max": 47999800.0,
"count": 3000
},
"SoccerTwos.Step.sum": {
"value": 47999800.0,
"min": 15620.0,
"max": 47999800.0,
"count": 3000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.003107416443526745,
"min": -1.0953278541564941,
"max": 0.0036058947443962097,
"count": 3000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -0.05282608047127724,
"min": -24.097213745117188,
"max": 0.06888406723737717,
"count": 3000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.0031074194703251123,
"min": -1.0989567041397095,
"max": 0.0036059014964848757,
"count": 3000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -0.052826132625341415,
"min": -24.177047729492188,
"max": 0.06888425350189209,
"count": 3000
},
"SoccerTwos.Policy.RndBaselineEstimate.mean": {
"value": 0.000444981997134164,
"min": -0.000454334745882079,
"max": 6.09859561920166,
"count": 3000
},
"SoccerTwos.Policy.RndBaselineEstimate.sum": {
"value": 0.007564694155007601,
"min": -0.008632360026240349,
"max": 153.53292846679688,
"count": 3000
},
"SoccerTwos.Policy.RndValueEstimate.mean": {
"value": 0.0004449794359970838,
"min": -0.00045433553168550134,
"max": 6.09859561920166,
"count": 3000
},
"SoccerTwos.Policy.RndValueEstimate.sum": {
"value": 0.007564650382846594,
"min": -0.008632374927401543,
"max": 153.53292846679688,
"count": 3000
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 3000
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 3000
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": -0.29411764705882354,
"min": -0.5893615392538217,
"max": 0.3275750055909157,
"count": 3000
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": -5.0,
"min": -17.169999957084656,
"max": 7.861800134181976,
"count": 3000
},
"SoccerTwos.Policy.RndReward.mean": {
"value": 0.02999024695771582,
"min": 0.0011296469429393967,
"max": 7076.9688083814535,
"count": 3000
},
"SoccerTwos.Policy.RndReward.sum": {
"value": 0.5098341982811689,
"min": 0.023770103754941374,
"max": 162770.28259277344,
"count": 3000
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": -0.29411764705882354,
"min": -0.5893615392538217,
"max": 0.3275750055909157,
"count": 3000
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": -5.0,
"min": -17.169999957084656,
"max": 7.861800134181976,
"count": 3000
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 3000
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 3000
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.034011427875763424,
"min": 0.009482975578672873,
"max": 0.35181839764118195,
"count": 2021
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.034011427875763424,
"min": 0.009482975578672873,
"max": 0.35181839764118195,
"count": 2021
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.000704554028440422,
"min": 8.442825873607637e-09,
"max": 454.2638854980469,
"count": 2021
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.000704554028440422,
"min": 8.442825873607637e-09,
"max": 454.2638854980469,
"count": 2021
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.000704554028440422,
"min": 8.4428835867012e-09,
"max": 506.7797037760417,
"count": 2021
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.000704554028440422,
"min": 8.4428835867012e-09,
"max": 506.7797037760417,
"count": 2021
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0005000000000000001,
"min": 0.0005000000000000001,
"max": 0.0005000000000000001,
"count": 2021
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0005000000000000001,
"min": 0.0005000000000000001,
"max": 0.0005000000000000001,
"count": 2021
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.10001708333333335,
"min": 0.10001708333333335,
"max": 0.19994972083333332,
"count": 2021
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.10001708333333335,
"min": 0.10001708333333335,
"max": 0.19994972083333332,
"count": 2021
},
"SoccerTwos.Policy.Beta.mean": {
"value": 1.170662500000009e-05,
"min": 1.170662500000009e-05,
"max": 0.00999497711125,
"count": 2021
},
"SoccerTwos.Policy.Beta.sum": {
"value": 1.170662500000009e-05,
"min": 1.170662500000009e-05,
"max": 0.00999497711125,
"count": 2021
},
"SoccerTwos.Losses.RNDLoss.mean": {
"value": 3.44461077474989e-05,
"min": 1.9040650158785866e-06,
"max": 7.383045196533203,
"count": 2021
},
"SoccerTwos.Losses.RNDLoss.sum": {
"value": 3.44461077474989e-05,
"min": 1.9040650158785866e-06,
"max": 7.383045196533203,
"count": 2021
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1676207748",
"python_version": "3.10.6 (main, Nov 14 2022, 16:10:14) [GCC 11.3.0]",
"command_line_arguments": "/home/deep-rl/.local/bin/mlagents-learn config/poca/SoccerTwos.yaml --env training-envs-executables/SoccerTwos.x86_64 --run-id SoccerTwos --no-graphics --force",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.13.1+cu117",
"numpy_version": "1.21.2",
"end_time_seconds": "1676251229"
},
"total": 43481.009814282006,
"count": 1,
"self": 0.21904235598049127,
"children": {
"run_training.setup": {
"total": 0.010040364009910263,
"count": 1,
"self": 0.010040364009910263
},
"TrainerController.start_learning": {
"total": 43480.780731562016,
"count": 1,
"self": 33.42600047615997,
"children": {
"TrainerController._reset_env": {
"total": 7.4349382660439005,
"count": 100,
"self": 7.4349382660439005
},
"TrainerController.advance": {
"total": 43439.807582472844,
"count": 3062924,
"self": 34.19506235700101,
"children": {
"env_step": {
"total": 32537.51252939491,
"count": 3062924,
"self": 23571.234687711403,
"children": {
"SubprocessEnvManager._take_step": {
"total": 8946.558222052365,
"count": 3062924,
"self": 304.98570815504354,
"children": {
"TorchPolicy.evaluate": {
"total": 8641.572513897321,
"count": 6078558,
"self": 8641.572513897321
}
}
},
"workers": {
"total": 19.71961963114154,
"count": 3062924,
"self": 0.0,
"children": {
"worker_root": {
"total": 43436.64464662739,
"count": 3062924,
"is_parallel": true,
"self": 24517.900909940072,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0012339909881120548,
"count": 2,
"is_parallel": true,
"self": 0.00030800099193584174,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.000925989996176213,
"count": 8,
"is_parallel": true,
"self": 0.000925989996176213
}
}
},
"UnityEnvironment.step": {
"total": 0.021953343006316572,
"count": 1,
"is_parallel": true,
"self": 0.0003455630212556571,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0006698079960187897,
"count": 1,
"is_parallel": true,
"self": 0.0006698079960187897
},
"communicator.exchange": {
"total": 0.019872214004863054,
"count": 1,
"is_parallel": true,
"self": 0.019872214004863054
},
"steps_from_proto": {
"total": 0.001065757984179072,
"count": 2,
"is_parallel": true,
"self": 0.00024018595286179334,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0008255720313172787,
"count": 8,
"is_parallel": true,
"self": 0.0008255720313172787
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 18918.64843232333,
"count": 3062923,
"is_parallel": true,
"self": 986.5479166106816,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 712.3963567089668,
"count": 3062923,
"is_parallel": true,
"self": 712.3963567089668
},
"communicator.exchange": {
"total": 14332.539664651806,
"count": 3062923,
"is_parallel": true,
"self": 14332.539664651806
},
"steps_from_proto": {
"total": 2887.164494351877,
"count": 6125846,
"is_parallel": true,
"self": 557.0796193042625,
"children": {
"_process_rank_one_or_two_observation": {
"total": 2330.0848750476143,
"count": 24503384,
"is_parallel": true,
"self": 2330.0848750476143
}
}
}
}
},
"steps_from_proto": {
"total": 0.09530436398927122,
"count": 198,
"is_parallel": true,
"self": 0.01923620468005538,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.07606815930921584,
"count": 792,
"is_parallel": true,
"self": 0.07606815930921584
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 10868.099990720933,
"count": 3062924,
"self": 352.59623967079096,
"children": {
"process_trajectory": {
"total": 5000.804463582928,
"count": 3062924,
"self": 4928.107200435916,
"children": {
"RLTrainer._checkpoint": {
"total": 72.69726314701256,
"count": 600,
"self": 72.69726314701256
}
}
},
"_update_policy": {
"total": 5514.699287467214,
"count": 2021,
"self": 1327.569331967592,
"children": {
"TorchPOCAOptimizer.update": {
"total": 4187.129955499622,
"count": 24252,
"self": 4187.129955499622
}
}
}
}
}
}
},
"trainer_threads": {
"total": 5.629844963550568e-07,
"count": 1,
"self": 5.629844963550568e-07
},
"TrainerController._save_models": {
"total": 0.11220978398341686,
"count": 1,
"self": 0.0014431489689741284,
"children": {
"RLTrainer._checkpoint": {
"total": 0.11076663501444273,
"count": 1,
"self": 0.11076663501444273
}
}
}
}
}
}
}