thien1892 commited on
Commit
9af6a14
1 Parent(s): d6e7bb2

Upload train_and_push.py

Browse files
Files changed (1) hide show
  1. train_and_push.py +91 -0
train_and_push.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ # Virtual display
4
+ from pyvirtualdisplay import Display
5
+ virtual_display = Display(visible=0, size=(1400, 900))
6
+ virtual_display.start()
7
+
8
+ # Import package
9
+ import gym
10
+ from huggingface_sb3 import load_from_hub, package_to_hub, push_to_hub
11
+ from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.
12
+ from stable_baselines3 import PPO
13
+ from stable_baselines3.common.evaluation import evaluate_policy
14
+ from stable_baselines3.common.env_util import make_vec_env
15
+ from stable_baselines3.common.vec_env import DummyVecEnv
16
+
17
+ def str2bool(v):
18
+ if isinstance(v, bool):
19
+ return v
20
+ if v.lower() in ('yes', 'true', 't', 'y', '1'):
21
+ return True
22
+ elif v.lower() in ('no', 'false', 'f', 'n', '0'):
23
+ return False
24
+ else:
25
+ raise argparse.ArgumentTypeError('Boolean value expected.')
26
+
27
+ parser = argparse.ArgumentParser()
28
+ parser.add_argument('--model_name', dest='model_name',
29
+ default="ppo-LunarLander-v2", type=str, help='model name')
30
+ parser.add_argument('--total_timesteps', dest='total_timesteps',
31
+ default=1000000, type=int, help='total timesteps')
32
+ parser.add_argument('--n_envs', dest='n_envs',
33
+ default=16, type=int, help='n_envs')
34
+ parser.add_argument('--repo_id', dest='repo_id',
35
+ default="thien1892/LunarLander-v2-ppo", type=str, help='repo_id')
36
+ parser.add_argument('--commit_message', dest='commit_message',
37
+ default="Upload PPO LunarLander-v2 trained agent", type=str, help='commit_message')
38
+ parser.add_argument('--re_train', dest='re_train',
39
+ default = True, type= str2bool, help='commit_message')
40
+ parser.add_argument('--id_retrain', dest='id_retrain',
41
+ default="thien1892/LunarLander-v2-ppo-5m", type=str, help='id_retrain')
42
+ parser.add_argument('--filename_retrain', dest='filename_retrain',
43
+ default="ppo-LunarLander-v2-5m.zip", type=str, help='filename_retrain')
44
+ parser.add_argument('--learning_rate', dest='learning_rate',
45
+ default=1e-4, type=float, help='learning_rate')
46
+ args = parser.parse_args()
47
+
48
+ if __name__ == '__main__':
49
+ # Create the environment
50
+ env = make_vec_env('LunarLander-v2', n_envs= args.n_envs)
51
+
52
+ # Model
53
+ if not args.re_train :
54
+ model = PPO(
55
+ policy = 'MlpPolicy',
56
+ env = env,
57
+ n_steps = 1024,
58
+ batch_size = 64,
59
+ n_epochs = 4,
60
+ gamma = 0.999,
61
+ gae_lambda = 0.98,
62
+ ent_coef = 0.01,
63
+ learning_rate = args.learning_rate,
64
+ verbose=1)
65
+ else:
66
+ checkpoint = load_from_hub(args.id_retrain, args.filename_retrain)
67
+ model = PPO.load(checkpoint, reset_num_timesteps=True, print_system_info=True, env = env, learning_rate = args.learning_rate)
68
+
69
+ # Train
70
+ model.learn(total_timesteps = args.total_timesteps)
71
+
72
+ # Save the model
73
+ model.save(args.model_name)
74
+
75
+ # Evaluate model
76
+ eval_env = gym.make("LunarLander-v2") # create new environment
77
+ mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
78
+ print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")
79
+
80
+ # Push to HF hub
81
+ env_id = "LunarLander-v2"
82
+ eval_env = DummyVecEnv([lambda: gym.make(env_id)])
83
+ model_architecture = "PPO"
84
+
85
+ package_to_hub(model = model, # Our trained model
86
+ model_name = args.model_name, # The name of our trained model
87
+ model_architecture = model_architecture, # The model architecture we used: in our case PPO
88
+ env_id = env_id, # Name of the environment
89
+ eval_env = eval_env, # Evaluation Environment
90
+ repo_id = args.repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
91
+ commit_message = args.commit_message)