{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 3.039598226547241, "min": 2.1421823501586914, "max": 3.2956717014312744, "count": 1727 }, "SoccerTwos.Policy.Entropy.sum": { "value": 84914.21875, "min": 13086.33203125, "max": 158486.140625, "count": 1727 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 468.6666666666667, "min": 306.55555555555554, "max": 999.0, "count": 1727 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 16872.0, "min": 13520.0, "max": 24940.0, "count": 1727 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1175.7964605511422, "min": 1170.8791218745284, "max": 1217.5326263817626, "count": 1134 }, "SoccerTwos.Self-play.ELO.sum": { "value": 18812.743368818275, "min": 2378.4901073151914, "max": 32900.605519854325, "count": 1134 }, "SoccerTwos.Step.mean": { "value": 17269951.0, "min": 9456.0, "max": 17269951.0, "count": 1727 }, "SoccerTwos.Step.sum": { "value": 17269951.0, "min": 9456.0, "max": 17269951.0, "count": 1727 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.016308804973959923, "min": -0.0693107396364212, "max": 0.011302482336759567, "count": 1727 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -0.3098672926425934, "min": -1.0396610498428345, "max": 0.18083971738815308, "count": 1727 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.015768375247716904, "min": -0.06925902515649796, "max": 0.01146668940782547, "count": 1727 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -0.29959914088249207, "min": -1.0388853549957275, "max": 0.18346703052520752, "count": 1727 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1727 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1727 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.26808420921626847, "min": -0.7894736842105263, "max": 0.3663249984383583, "count": 1727 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -5.0935999751091, "min": -15.358199998736382, "max": 9.481199979782104, "count": 1727 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.26808420921626847, "min": -0.7894736842105263, "max": 0.3663249984383583, "count": 1727 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -5.0935999751091, "min": -15.358199998736382, "max": 9.481199979782104, "count": 1727 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1727 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1727 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.016648018033447443, "min": 0.00894792212639004, "max": 0.023971050139516593, "count": 805 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.016648018033447443, "min": 0.00894792212639004, "max": 0.023971050139516593, "count": 805 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.012747439245382944, "min": 1.760741797733149e-10, "max": 0.012747439245382944, "count": 805 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.012747439245382944, "min": 1.760741797733149e-10, "max": 0.012747439245382944, "count": 805 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.012811548355966807, "min": 1.4989562369116527e-10, "max": 0.013357076048851012, "count": 805 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.012811548355966807, "min": 1.4989562369116527e-10, "max": 0.013357076048851012, "count": 805 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 805 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 805 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 805 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 805 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 805 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 805 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1682794123", "python_version": "3.9.16 | packaged by conda-forge | (main, Feb 1 2023, 21:28:38) [MSC v.1929 64 bit (AMD64)]", "command_line_arguments": "D:\\users\\amit\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.exe --run-id=SoccerTwos-v2 --no-graphics", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.0.0+cpu", "numpy_version": "1.21.2", "end_time_seconds": "1682887296" }, "total": 93172.8639949, "count": 1, "self": 1.5220524000033038, "children": { "run_training.setup": { "total": 0.24963069999999998, "count": 1, "self": 0.24963069999999998 }, "TrainerController.start_learning": { "total": 93171.0923118, "count": 1, "self": 35.87358480328112, "children": { "TrainerController._reset_env": { "total": 12.864624900015745, "count": 86, "self": 12.864624900015745 }, "TrainerController.advance": { "total": 93122.0949511967, "count": 1123793, "self": 37.620753592273104, "children": { "env_step": { "total": 29269.139859700597, "count": 1123793, "self": 23094.79583350264, "children": { "SubprocessEnvManager._take_step": { "total": 6150.807075899163, "count": 1123793, "self": 227.63414020850996, "children": { "TorchPolicy.evaluate": { "total": 5923.172935690653, "count": 2230662, "self": 5923.172935690653 } } }, "workers": { "total": 23.536950298792494, "count": 1123792, "self": 0.0, "children": { "worker_root": { "total": 93115.74153009402, "count": 1123792, "is_parallel": true, "self": 74653.13398339221, "children": { "steps_from_proto": { "total": 0.21867090001935807, "count": 172, "is_parallel": true, "self": 0.044821599772271625, "children": { "_process_rank_one_or_two_observation": { "total": 0.17384930024708645, "count": 688, "is_parallel": true, "self": 0.17384930024708645 } } }, "UnityEnvironment.step": { "total": 18462.388875801782, "count": 1123792, "is_parallel": true, "self": 997.7885171923735, "children": { "UnityEnvironment._generate_step_input": { "total": 878.5170087003307, "count": 1123792, "is_parallel": true, "self": 878.5170087003307 }, "communicator.exchange": { "total": 13391.759960600484, "count": 1123792, "is_parallel": true, "self": 13391.759960600484 }, "steps_from_proto": { "total": 3194.3233893085944, "count": 2247584, "is_parallel": true, "self": 638.18279228965, "children": { "_process_rank_one_or_two_observation": { "total": 2556.1405970189444, "count": 8990336, "is_parallel": true, "self": 2556.1405970189444 } } } } } } } } } } }, "trainer_advance": { "total": 63815.33433790384, "count": 1123792, "self": 242.69121740006085, "children": { "process_trajectory": { "total": 5335.843226403866, "count": 1123792, "self": 5328.643951603899, "children": { "RLTrainer._checkpoint": { "total": 7.199274799966133, "count": 34, "self": 7.199274799966133 } } }, "_update_policy": { "total": 58236.79989409992, "count": 805, "self": 3674.5273399002544, "children": { "TorchPOCAOptimizer.update": { "total": 54562.27255419966, "count": 24150, "self": 54562.27255419966 } } } } } } }, "trainer_threads": { "total": 2.6000052457675338e-06, "count": 1, "self": 2.6000052457675338e-06 }, "TrainerController._save_models": { "total": 0.2591482999996515, "count": 1, "self": 0.012133100011851639, "children": { "RLTrainer._checkpoint": { "total": 0.24701519998779986, "count": 1, "self": 0.24701519998779986 } } } } } } }