Predict_CartPole-v1_SoftQ

Browse files

Files changed (10) hide show

ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/config.yaml +49 -0
ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/logs/log.txt +90 -0
ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/models/1000 +3 -0
ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/models/1500 +3 -0
ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/models/2000 +3 -0
ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/models/2500 +3 -0
ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/models/500 +3 -0
ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/models/best +0 -0
ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/tb_logs/interact/events.out.tfevents.1688778274.LAPTOP-L4C8EI5B.44368.0 +3 -0
ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/tb_logs/policy/events.out.tfevents.1688778274.LAPTOP-L4C8EI5B.44368.1 +3 -0

ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/config.yaml ADDED Viewed

	@@ -0,0 +1,49 @@

+general_cfg:
+  algo_name: SoftQ
+  collect_traj: false
+  device: cpu
+  env_name: gym
+  interact_summary_fre: 1
+  load_checkpoint: true
+  load_model_step: best
+  load_path: Train_CartPole-v1_SoftQ_20230705-215318
+  max_episode: 30
+  max_step: 200
+  mode: predict
+  model_save_fre: 500
+  model_summary_fre: 1
+  mp_backend: null
+  n_learners: 1
+  n_workers: 2
+  online_eval: true
+  online_eval_episode: 10
+  seed: 1
+  share_buffer: true
+algo_cfg:
+  alpha: 4
+  batch_size: 128
+  buffer_size: 100000
+  buffer_type: REPLAY_QUE
+  epsilon_decay: 500
+  epsilon_end: 0.01
+  epsilon_start: 0.95
+  gamma: 0.95
+  lr: 0.0001
+  n_steps_per_learn: 1
+  target_update: 4
+  value_layers:
+  - activation: relu
+    layer_size:
+    - 256
+    layer_type: linear
+  - activation: relu
+    layer_size:
+    - 256
+    layer_type: linear
+env_cfg:
+  id: CartPole-v1
+  ignore_params:
+  - wrapper
+  - ignore_params
+  render_mode: null
+  wrapper: null

ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/logs/log.txt ADDED Viewed

	@@ -0,0 +1,90 @@

+2023-07-08 09:04:34 - SimpleLog - INFO: - General Configs:
+2023-07-08 09:04:34 - SimpleLog - INFO: - ================================================================================
+2023-07-08 09:04:34 - SimpleLog - INFO: -         Name        	       Value        	        Type
+2023-07-08 09:04:34 - SimpleLog - INFO: -       env_name      	        gym         	   <class 'str'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -      algo_name      	       SoftQ        	   <class 'str'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -         mode        	      predict       	   <class 'str'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -        device       	        cpu         	   <class 'str'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -         seed        	         1          	   <class 'int'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -     max_episode     	         30         	   <class 'int'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -       max_step      	        200         	   <class 'int'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -     collect_traj    	         0          	   <class 'bool'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -      mp_backend     	        None        	   <class 'str'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -      n_workers      	         2          	   <class 'int'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -      n_learners     	         1          	   <class 'int'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -     share_buffer    	         1          	   <class 'bool'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -     online_eval     	         1          	   <class 'bool'>
+2023-07-08 09:04:34 - SimpleLog - INFO: - online_eval_episode 	         10         	   <class 'int'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -    model_save_fre   	        500         	   <class 'int'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -   load_checkpoint   	         1          	   <class 'bool'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -      load_path      	Train_CartPole-v1_SoftQ_20230705-215318	   <class 'str'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -   load_model_step   	        best        	   <class 'str'>
+2023-07-08 09:04:34 - SimpleLog - INFO: - interact_summary_fre	         1          	   <class 'int'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -  model_summary_fre  	         1          	   <class 'int'>
+2023-07-08 09:04:34 - SimpleLog - INFO: - ================================================================================
+2023-07-08 09:04:34 - SimpleLog - INFO: - Algo Configs:
+2023-07-08 09:04:34 - SimpleLog - INFO: - ================================================================================
+2023-07-08 09:04:34 - SimpleLog - INFO: -         Name        	       Value        	        Type
+2023-07-08 09:04:34 - SimpleLog - INFO: -    epsilon_start    	        0.95        	  <class 'float'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -     epsilon_end     	        0.01        	  <class 'float'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -    epsilon_decay    	        500         	   <class 'int'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -        gamma        	        0.95        	  <class 'float'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -          lr         	       0.0001       	  <class 'float'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -     buffer_type     	     REPLAY_QUE     	   <class 'str'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -     buffer_size     	       100000       	   <class 'int'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -      batch_size     	        128         	   <class 'int'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -        alpha        	         4          	   <class 'int'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -    target_update    	         4          	   <class 'int'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -     value_layers    	[{'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}]	   <class 'str'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -  n_steps_per_learn  	         1          	   <class 'int'>
+2023-07-08 09:04:34 - SimpleLog - INFO: - ================================================================================
+2023-07-08 09:04:34 - SimpleLog - INFO: - Env Configs:
+2023-07-08 09:04:34 - SimpleLog - INFO: - ================================================================================
+2023-07-08 09:04:34 - SimpleLog - INFO: -         Name        	       Value        	        Type
+2023-07-08 09:04:34 - SimpleLog - INFO: -          id         	    CartPole-v1     	   <class 'str'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -     render_mode     	        None        	   <class 'str'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -       wrapper       	        None        	   <class 'str'>
+2023-07-08 09:04:34 - SimpleLog - INFO: -    ignore_params    	['wrapper', 'ignore_params']	   <class 'str'>
+2023-07-08 09:04:34 - SimpleLog - INFO: - ================================================================================
+2023-07-08 09:04:34 - SimpleLog - INFO: - Start predicting!
+2023-07-08 09:04:35 - SimpleLog - INFO: - Interactor 0 finished episode 1 with reward 200.000 in 200 steps
+2023-07-08 09:04:35 - SimpleLog - INFO: - Interactor 1 finished episode 2 with reward 200.000 in 200 steps
+2023-07-08 09:04:36 - SimpleLog - INFO: - Interactor 0 finished episode 3 with reward 200.000 in 200 steps
+2023-07-08 09:04:36 - SimpleLog - INFO: - Interactor 1 finished episode 4 with reward 200.000 in 200 steps
+2023-07-08 09:04:37 - SimpleLog - INFO: - update_step: 500, online_eval_reward: 200.000
+2023-07-08 09:04:37 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
+2023-07-08 09:04:37 - SimpleLog - INFO: - Interactor 0 finished episode 5 with reward 200.000 in 200 steps
+2023-07-08 09:04:37 - SimpleLog - INFO: - Interactor 1 finished episode 6 with reward 200.000 in 200 steps
+2023-07-08 09:04:37 - SimpleLog - INFO: - Interactor 0 finished episode 7 with reward 200.000 in 200 steps
+2023-07-08 09:04:37 - SimpleLog - INFO: - Interactor 1 finished episode 8 with reward 200.000 in 200 steps
+2023-07-08 09:04:38 - SimpleLog - INFO: - Interactor 0 finished episode 9 with reward 200.000 in 200 steps
+2023-07-08 09:04:38 - SimpleLog - INFO: - Interactor 1 finished episode 10 with reward 200.000 in 200 steps
+2023-07-08 09:04:39 - SimpleLog - INFO: - update_step: 1000, online_eval_reward: 200.000
+2023-07-08 09:04:39 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
+2023-07-08 09:04:39 - SimpleLog - INFO: - Interactor 0 finished episode 11 with reward 200.000 in 200 steps
+2023-07-08 09:04:39 - SimpleLog - INFO: - Interactor 1 finished episode 12 with reward 200.000 in 200 steps
+2023-07-08 09:04:40 - SimpleLog - INFO: - Interactor 0 finished episode 13 with reward 200.000 in 200 steps
+2023-07-08 09:04:40 - SimpleLog - INFO: - Interactor 1 finished episode 14 with reward 200.000 in 200 steps
+2023-07-08 09:04:40 - SimpleLog - INFO: - update_step: 1500, online_eval_reward: 200.000
+2023-07-08 09:04:40 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
+2023-07-08 09:04:41 - SimpleLog - INFO: - Interactor 0 finished episode 15 with reward 200.000 in 200 steps
+2023-07-08 09:04:41 - SimpleLog - INFO: - Interactor 1 finished episode 16 with reward 200.000 in 200 steps
+2023-07-08 09:04:41 - SimpleLog - INFO: - Interactor 0 finished episode 17 with reward 200.000 in 200 steps
+2023-07-08 09:04:41 - SimpleLog - INFO: - Interactor 1 finished episode 18 with reward 200.000 in 200 steps
+2023-07-08 09:04:42 - SimpleLog - INFO: - Interactor 0 finished episode 19 with reward 200.000 in 200 steps
+2023-07-08 09:04:42 - SimpleLog - INFO: - Interactor 1 finished episode 20 with reward 200.000 in 200 steps
+2023-07-08 09:04:42 - SimpleLog - INFO: - update_step: 2000, online_eval_reward: 200.000
+2023-07-08 09:04:42 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
+2023-07-08 09:04:43 - SimpleLog - INFO: - Interactor 0 finished episode 21 with reward 200.000 in 200 steps
+2023-07-08 09:04:43 - SimpleLog - INFO: - Interactor 1 finished episode 22 with reward 200.000 in 200 steps
+2023-07-08 09:04:44 - SimpleLog - INFO: - Interactor 0 finished episode 23 with reward 200.000 in 200 steps
+2023-07-08 09:04:44 - SimpleLog - INFO: - Interactor 1 finished episode 24 with reward 200.000 in 200 steps
+2023-07-08 09:04:44 - SimpleLog - INFO: - update_step: 2500, online_eval_reward: 200.000
+2023-07-08 09:04:44 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
+2023-07-08 09:04:45 - SimpleLog - INFO: - Interactor 0 finished episode 25 with reward 200.000 in 200 steps
+2023-07-08 09:04:45 - SimpleLog - INFO: - Interactor 1 finished episode 26 with reward 200.000 in 200 steps
+2023-07-08 09:04:45 - SimpleLog - INFO: - Interactor 0 finished episode 27 with reward 200.000 in 200 steps
+2023-07-08 09:04:45 - SimpleLog - INFO: - Interactor 1 finished episode 28 with reward 200.000 in 200 steps
+2023-07-08 09:04:46 - SimpleLog - INFO: - Interactor 0 finished episode 29 with reward 200.000 in 200 steps
+2023-07-08 09:04:46 - SimpleLog - INFO: - Interactor 1 finished episode 30 with reward 200.000 in 200 steps
+2023-07-08 09:04:46 - SimpleLog - INFO: - Finish predicting! Time cost: 11.327 s

ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/models/1000 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3aff86a89801bf007bd42a26cb8379736ad5f79d5aec13fef527579c67766b14
+size 544727

ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/models/1500 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88cf9bd409af41977624c975c2732f8a6cd83c707e04326c5def695931ca5262
+size 544727

ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/models/2000 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:847e40b2f6666c4a0bf2a3c47c84bd2d97ad14fba7f80abdcb800d17cd9e4bf3
+size 544727

ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/models/2500 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05143274a91f1d1aeb0b8ef069ad172f7861474fb74cf7b1e118b7c2e7af8ca4
+size 544727

ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/models/500 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:268334ec7b2a74fca824642b2266262f95fb97f907074b8d7d7f26ec17cc3637
+size 544713

ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/models/best ADDED Viewed

Binary file (545 kB). View file

ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/tb_logs/interact/events.out.tfevents.1688778274.LAPTOP-L4C8EI5B.44368.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e84a45da666de552b82238f79db89fc70455e38efdbb6f137ceacb585c8ade4a
+size 3148

ClassControl/CartPole-v1/Predict_CartPole-v1_SoftQ_20230708-090434/tb_logs/policy/events.out.tfevents.1688778274.LAPTOP-L4C8EI5B.44368.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de985e2d617c6cf48abe7657d1671e1b47fe5db32586d13d14a8708a9337227f
+size 150073