tayfen's picture
256 h gamma 0.995 initial commit
8f7f3a5
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.4369688630104065,
"min": 0.3969142735004425,
"max": 1.4038227796554565,
"count": 43
},
"Pyramids.Policy.Entropy.sum": {
"value": 13158.0068359375,
"min": 11843.921875,
"max": 42586.3671875,
"count": 43
},
"Pyramids.Step.mean": {
"value": 1289907.0,
"min": 29996.0,
"max": 1289907.0,
"count": 43
},
"Pyramids.Step.sum": {
"value": 1289907.0,
"min": 29996.0,
"max": 1289907.0,
"count": 43
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.6243676543235779,
"min": -0.10698558390140533,
"max": 0.7540826797485352,
"count": 43
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 172.32546997070312,
"min": -25.67654037475586,
"max": 220.192138671875,
"count": 43
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": -0.005993920378386974,
"min": -0.018929392099380493,
"max": 0.3606431782245636,
"count": 43
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": -1.6543220281600952,
"min": -5.432735443115234,
"max": 85.83307647705078,
"count": 43
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06948946413626608,
"min": 0.0647837671151917,
"max": 0.07257233516519225,
"count": 43
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.9728524979077252,
"min": 0.5620019177689042,
"max": 1.0711047359509394,
"count": 43
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.01366013651109641,
"min": 6.248228130239457e-05,
"max": 0.01525208695998622,
"count": 43
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.19124191115534975,
"min": 0.0008747519382335241,
"max": 0.2287813043997933,
"count": 43
},
"Pyramids.Policy.LearningRate.mean": {
"value": 0.00017249479964460477,
"min": 0.00017249479964460477,
"max": 0.00029840715053095,
"count": 43
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.002414927195024467,
"min": 0.0023872572042476,
"max": 0.003969120876959734,
"count": 43
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.1574982523809524,
"min": 0.1574982523809524,
"max": 0.19946905000000004,
"count": 43
},
"Pyramids.Policy.Epsilon.sum": {
"value": 2.2049755333333336,
"min": 1.5957524000000003,
"max": 2.7230402666666675,
"count": 43
},
"Pyramids.Policy.Beta.mean": {
"value": 0.0057540754128571426,
"min": 0.0057540754128571426,
"max": 0.009946958095,
"count": 43
},
"Pyramids.Policy.Beta.sum": {
"value": 0.08055705578,
"min": 0.07957566476,
"max": 0.13231172264000002,
"count": 43
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.009142063558101654,
"min": 0.009142063558101654,
"max": 0.46377047896385193,
"count": 43
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.12798888981342316,
"min": 0.12798888981342316,
"max": 3.7101638317108154,
"count": 43
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 305.1443298969072,
"min": 262.1810344827586,
"max": 999.0,
"count": 43
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 29599.0,
"min": 15900.0,
"max": 32910.0,
"count": 43
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.653602044530136,
"min": -0.999858116430621,
"max": 1.7372173753769502,
"count": 43
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 160.3993983194232,
"min": -31.99480165541172,
"max": 199.77999816834927,
"count": 43
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.653602044530136,
"min": -0.999858116430621,
"max": 1.7372173753769502,
"count": 43
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 160.3993983194232,
"min": -31.99480165541172,
"max": 199.77999816834927,
"count": 43
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.029092228220122838,
"min": 0.025694857305656148,
"max": 9.613374326378107,
"count": 43
},
"Pyramids.Policy.RndReward.sum": {
"value": 2.8219461373519152,
"min": 2.719071836618241,
"max": 153.8139892220497,
"count": 43
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 43
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 43
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1673434782",
"python_version": "3.8.16 (default, Dec 7 2022, 01:12:13) \n[GCC 7.5.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "0.29.0.dev0",
"mlagents_envs_version": "0.29.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.8.1+cu102",
"numpy_version": "1.21.6",
"end_time_seconds": "1673437365"
},
"total": 2583.475102176,
"count": 1,
"self": 0.3312493150001501,
"children": {
"run_training.setup": {
"total": 0.10152353000012226,
"count": 1,
"self": 0.10152353000012226
},
"TrainerController.start_learning": {
"total": 2583.0423293309996,
"count": 1,
"self": 1.5909520009126936,
"children": {
"TrainerController._reset_env": {
"total": 6.395754145999945,
"count": 1,
"self": 6.395754145999945
},
"TrainerController.advance": {
"total": 2574.911440058087,
"count": 83761,
"self": 1.5886919969211704,
"children": {
"env_step": {
"total": 1717.3977780141663,
"count": 83761,
"self": 1586.1468911611323,
"children": {
"SubprocessEnvManager._take_step": {
"total": 130.29506974302285,
"count": 83761,
"self": 5.3965148900465465,
"children": {
"TorchPolicy.evaluate": {
"total": 124.8985548529763,
"count": 81875,
"self": 42.38915672793428,
"children": {
"TorchPolicy.sample_actions": {
"total": 82.50939812504203,
"count": 81875,
"self": 82.50939812504203
}
}
}
}
},
"workers": {
"total": 0.9558171100111394,
"count": 83760,
"self": 0.0,
"children": {
"worker_root": {
"total": 2578.6361490249346,
"count": 83760,
"is_parallel": true,
"self": 1113.988334471947,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0016319970000040485,
"count": 1,
"is_parallel": true,
"self": 0.000564218999898003,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0010677780001060455,
"count": 8,
"is_parallel": true,
"self": 0.0010677780001060455
}
}
},
"UnityEnvironment.step": {
"total": 0.04561822499999835,
"count": 1,
"is_parallel": true,
"self": 0.0004956489997312019,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0004165830000602,
"count": 1,
"is_parallel": true,
"self": 0.0004165830000602
},
"communicator.exchange": {
"total": 0.04316109100000176,
"count": 1,
"is_parallel": true,
"self": 0.04316109100000176
},
"steps_from_proto": {
"total": 0.0015449020002051839,
"count": 1,
"is_parallel": true,
"self": 0.00038118900101835607,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0011637129991868278,
"count": 8,
"is_parallel": true,
"self": 0.0011637129991868278
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1464.6478145529877,
"count": 83759,
"is_parallel": true,
"self": 35.543489559984664,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 28.506982841987792,
"count": 83759,
"is_parallel": true,
"self": 28.506982841987792
},
"communicator.exchange": {
"total": 1282.7868344840617,
"count": 83759,
"is_parallel": true,
"self": 1282.7868344840617
},
"steps_from_proto": {
"total": 117.81050766695353,
"count": 83759,
"is_parallel": true,
"self": 27.178101910174973,
"children": {
"_process_rank_one_or_two_observation": {
"total": 90.63240575677855,
"count": 670072,
"is_parallel": true,
"self": 90.63240575677855
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 855.9249700469995,
"count": 83760,
"self": 2.986468146086281,
"children": {
"process_trajectory": {
"total": 186.58085946691472,
"count": 83760,
"self": 186.3861806519153,
"children": {
"RLTrainer._checkpoint": {
"total": 0.1946788149994063,
"count": 2,
"self": 0.1946788149994063
}
}
},
"_update_policy": {
"total": 666.3576424339985,
"count": 602,
"self": 255.26164357897005,
"children": {
"TorchPPOOptimizer.update": {
"total": 411.0959988550285,
"count": 29820,
"self": 411.0959988550285
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.4380002539837733e-06,
"count": 1,
"self": 1.4380002539837733e-06
},
"TrainerController._save_models": {
"total": 0.14418168799966224,
"count": 1,
"self": 0.0018450799998390721,
"children": {
"RLTrainer._checkpoint": {
"total": 0.14233660799982317,
"count": 1,
"self": 0.14233660799982317
}
}
}
}
}
}
}