{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 3.2118070125579834, "min": 3.178771495819092, "max": 3.295722723007202, "count": 50 }, "SoccerTwos.Policy.Entropy.sum": { "value": 49538.91015625, "min": 28443.20703125, "max": 122565.3125, "count": 50 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 905.8333333333334, "min": 482.90909090909093, "max": 999.0, "count": 50 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 21740.0, "min": 15152.0, "max": 27908.0, "count": 50 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1206.7770860767412, "min": 1200.0797527907453, "max": 1210.8660544938796, "count": 42 }, "SoccerTwos.Self-play.ELO.sum": { "value": 2413.5541721534823, "min": 2409.644535456532, "max": 16947.48502822369, "count": 42 }, "SoccerTwos.Step.mean": { "value": 499728.0, "min": 9972.0, "max": 499728.0, "count": 50 }, "SoccerTwos.Step.sum": { "value": 499728.0, "min": 9972.0, "max": 499728.0, "count": 50 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.012738331221044064, "min": -0.030371010303497314, "max": -0.0020919039379805326, "count": 50 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -0.14012163877487183, "min": -0.4625488519668579, "max": -0.026379859074950218, "count": 50 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.013159367255866528, "min": -0.025404397398233414, "max": 0.0009231594740413129, "count": 50 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -0.14475303888320923, "min": -0.5334923267364502, "max": 0.010154753923416138, "count": 50 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 50 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 50 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.10159999674016779, "min": -0.5, "max": 0.23845714330673218, "count": 50 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 1.1175999641418457, "min": -7.361599981784821, "max": 3.3384000062942505, "count": 50 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.10159999674016779, "min": -0.5, "max": 0.23845714330673218, "count": 50 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 1.1175999641418457, "min": -7.361599981784821, "max": 3.3384000062942505, "count": 50 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 50 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 50 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.017880129250503765, "min": 0.011993340455228462, "max": 0.02118847225792706, "count": 23 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.017880129250503765, "min": 0.011993340455228462, "max": 0.02118847225792706, "count": 23 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.0013691989743771652, "min": 3.575956373727725e-05, "max": 0.004896668290408949, "count": 23 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.0013691989743771652, "min": 3.575956373727725e-05, "max": 0.004896668290408949, "count": 23 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.001403482809352378, "min": 3.767290254472755e-05, "max": 0.004944877473947903, "count": 23 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.001403482809352378, "min": 3.767290254472755e-05, "max": 0.004944877473947903, "count": 23 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 23 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 23 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 23 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 23 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 23 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 23 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1726662536", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\michalkrych\\miniconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.4.1+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1726664029" }, "total": 1493.2660939000198, "count": 1, "self": 1.6377045000263024, "children": { "run_training.setup": { "total": 0.3988559999852441, "count": 1, "self": 0.3988559999852441 }, "TrainerController.start_learning": { "total": 1491.2295334000082, "count": 1, "self": 1.0019511019345373, "children": { "TrainerController._reset_env": { "total": 27.972174199996516, "count": 3, "self": 27.972174199996516 }, "TrainerController.advance": { "total": 1462.0873340980907, "count": 32923, "self": 0.9656415031349752, "children": { "env_step": { "total": 820.270217696816, "count": 32923, "self": 653.1760731915128, "children": { "SubprocessEnvManager._take_step": { "total": 166.45147170193377, "count": 32923, "self": 5.752347401576117, "children": { "TorchPolicy.evaluate": { "total": 160.69912430035765, "count": 65390, "self": 160.69912430035765 } } }, "workers": { "total": 0.6426728033693507, "count": 32923, "self": 0.0, "children": { "worker_root": { "total": 1475.2166860021243, "count": 32923, "is_parallel": true, "self": 950.869518104475, "children": { "steps_from_proto": { "total": 0.15297649998683482, "count": 6, "is_parallel": true, "self": 0.003927900019334629, "children": { "_process_rank_one_or_two_observation": { "total": 0.1490485999675002, "count": 24, "is_parallel": true, "self": 0.1490485999675002 } } }, "UnityEnvironment.step": { "total": 524.1941913976625, "count": 32923, "is_parallel": true, "self": 25.472543704439886, "children": { "UnityEnvironment._generate_step_input": { "total": 22.500736896996386, "count": 32923, "is_parallel": true, "self": 22.500736896996386 }, "communicator.exchange": { "total": 393.2004810983781, "count": 32923, "is_parallel": true, "self": 393.2004810983781 }, "steps_from_proto": { "total": 83.02042969784816, "count": 65846, "is_parallel": true, "self": 16.540691197296837, "children": { "_process_rank_one_or_two_observation": { "total": 66.47973850055132, "count": 263384, "is_parallel": true, "self": 66.47973850055132 } } } } } } } } } } }, "trainer_advance": { "total": 640.8514748981397, "count": 32923, "self": 6.540527199103963, "children": { "process_trajectory": { "total": 102.42323879897594, "count": 32923, "self": 101.38768729899311, "children": { "RLTrainer._checkpoint": { "total": 1.0355514999828301, "count": 1, "self": 1.0355514999828301 } } }, "_update_policy": { "total": 531.8877089000598, "count": 23, "self": 90.46580589990481, "children": { "TorchPOCAOptimizer.update": { "total": 441.421903000155, "count": 690, "self": 441.421903000155 } } } } } } }, "trainer_threads": { "total": 1.0999792721122503e-06, "count": 1, "self": 1.0999792721122503e-06 }, "TrainerController._save_models": { "total": 0.16807290000724606, "count": 1, "self": 0.012475600000470877, "children": { "RLTrainer._checkpoint": { "total": 0.15559730000677519, "count": 1, "self": 0.15559730000677519 } } } } } } }