{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.8219479322433472, "min": 1.7849860191345215, "max": 3.2956888675689697, "count": 500 }, "SoccerTwos.Policy.Entropy.sum": { "value": 36089.14453125, "min": 22827.01171875, "max": 129424.75, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 55.08988764044944, "min": 40.925, "max": 999.0, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19612.0, "min": 13900.0, "max": 25964.0, "count": 500 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1417.6557670527968, "min": 1198.7578908373744, "max": 1428.170803847532, "count": 493 }, "SoccerTwos.Self-play.ELO.sum": { "value": 252342.72653539781, "min": 2397.8900633931253, "max": 334680.00093222864, "count": 493 }, "SoccerTwos.Step.mean": { "value": 4999954.0, "min": 9998.0, "max": 4999954.0, "count": 500 }, "SoccerTwos.Step.sum": { "value": 4999954.0, "min": 9998.0, "max": 4999954.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.03733022138476372, "min": -0.11393977701663971, "max": 0.1152181476354599, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 6.644779682159424, "min": -19.027942657470703, "max": 17.89875602722168, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.04073020815849304, "min": -0.11943928897380829, "max": 0.111643485724926, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 7.249977111816406, "min": -20.209291458129883, "max": 17.426334381103516, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.21287191048097076, "min": -0.5, "max": 0.45166363330049947, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 37.89120006561279, "min": -63.244799852371216, "max": 51.47139984369278, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.21287191048097076, "min": -0.5, "max": 0.45166363330049947, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 37.89120006561279, "min": -63.244799852371216, "max": 51.47139984369278, "count": 500 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.014809434497146867, "min": 0.011088531155837699, "max": 0.024176820274442436, "count": 240 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.014809434497146867, "min": 0.011088531155837699, "max": 0.024176820274442436, "count": 240 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.11545907805363337, "min": 0.0009283814530742044, "max": 0.12987798303365708, "count": 240 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.11545907805363337, "min": 0.0009283814530742044, "max": 0.12987798303365708, "count": 240 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.1178437220553557, "min": 0.0009338069493727137, "max": 0.13284166728456814, "count": 240 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.1178437220553557, "min": 0.0009338069493727137, "max": 0.13284166728456814, "count": 240 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 240 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 240 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 240 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 240 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 240 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 240 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1733094322", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\Andrew\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.2.0.dev0", "mlagents_envs_version": "1.2.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.5.1+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1733108554" }, "total": 14231.54859999998, "count": 1, "self": 1.4055596999824047, "children": { "run_training.setup": { "total": 0.10438529995735735, "count": 1, "self": 0.10438529995735735 }, "TrainerController.start_learning": { "total": 14230.03865500004, "count": 1, "self": 9.153703050222248, "children": { "TrainerController._reset_env": { "total": 7.307986300671473, "count": 50, "self": 7.307986300671473 }, "TrainerController.advance": { "total": 14213.405922649079, "count": 342157, "self": 8.4421887047356, "children": { "env_step": { "total": 6729.8096587415785, "count": 342157, "self": 5370.092790736118, "children": { "SubprocessEnvManager._take_step": { "total": 1353.914151698118, "count": 342157, "self": 50.10770724574104, "children": { "TorchPolicy.evaluate": { "total": 1303.806444452377, "count": 637202, "self": 1303.806444452377 } } }, "workers": { "total": 5.8027163073420525, "count": 342157, "self": 0.0, "children": { "worker_root": { "total": 14213.033264263766, "count": 342157, "is_parallel": true, "self": 9924.51510984241, "children": { "steps_from_proto": { "total": 0.10075310058891773, "count": 100, "is_parallel": true, "self": 0.018961402005515993, "children": { "_process_rank_one_or_two_observation": { "total": 0.08179169858340174, "count": 400, "is_parallel": true, "self": 0.08179169858340174 } } }, "UnityEnvironment.step": { "total": 4288.417401320767, "count": 342157, "is_parallel": true, "self": 218.4063806089107, "children": { "UnityEnvironment._generate_step_input": { "total": 180.0778886973858, "count": 342157, "is_parallel": true, "self": 180.0778886973858 }, "communicator.exchange": { "total": 3190.3483089100337, "count": 342157, "is_parallel": true, "self": 3190.3483089100337 }, "steps_from_proto": { "total": 699.5848231044365, "count": 684314, "is_parallel": true, "self": 134.75455876684282, "children": { "_process_rank_one_or_two_observation": { "total": 564.8302643375937, "count": 2737256, "is_parallel": true, "self": 564.8302643375937 } } } } } } } } } } }, "trainer_advance": { "total": 7475.154075202765, "count": 342157, "self": 62.14618801290635, "children": { "process_trajectory": { "total": 1340.135070689139, "count": 342157, "self": 1338.7221531892428, "children": { "RLTrainer._checkpoint": { "total": 1.412917499896139, "count": 10, "self": 1.412917499896139 } } }, "_update_policy": { "total": 6072.87281650072, "count": 240, "self": 953.7915709009394, "children": { "TorchPOCAOptimizer.update": { "total": 5119.08124559978, "count": 7209, "self": 5119.08124559978 } } } } } } }, "trainer_threads": { "total": 1.100008375942707e-06, "count": 1, "self": 1.100008375942707e-06 }, "TrainerController._save_models": { "total": 0.17104190005920827, "count": 1, "self": 0.04269300005398691, "children": { "RLTrainer._checkpoint": { "total": 0.12834890000522137, "count": 1, "self": 0.12834890000522137 } } } } } } }