import os import random from hf_helpers.sb3_eval import eval_model_with_seed import pandas as pd env_id = "LunarLander-v2" models_to_evaluate = [ "ppo-LunarLander-v2_001_000_000_hf_defaults.zip", "ppo-LunarLander-v2_010_000_000_hf_defaults.zip", "ppo-LunarLander-v2_010_000_000_sb3_defaults.zip", "ppo-LunarLander-v2_123_456_789_hf_defaults.zip", ] evaluation_results_fp = "evaluation_results.csv" def store_results(results): results_df = pd.DataFrame(results) header = False if os.path.exists(evaluation_results_fp) else True results_df.to_csv(evaluation_results_fp, mode="a", index=False, header=header) def evaluate_and_store_all_results(): results = [] n_evaluations = 1000 for i in range(n_evaluations): if i > 0 and i % 10 == 0: print(f"Progress: {i}/{n_evaluations}") store_results(results) results = [] # seed = random.randint(0, 1000000000000) # Why this interval? seed = random.randint(0, 10000) # Also try some smaller numbers for seed n_envs = random.randint(1, 16) for model_fp in models_to_evaluate: result, mean_reward, std_reward = eval_model_with_seed( model_fp, env_id, seed, n_eval_episodes=10, n_envs=n_envs ) result_data = { "model_fp": model_fp, "seed": seed, "n_envs": n_envs, "result": result, "mean_reward": mean_reward, "std_reward": std_reward, } results.append(result_data) def analyze_results(): results_df = pd.read_csv(evaluation_results_fp) results_df["model_fp"] = results_df["model_fp"].str.replace(".zip", "", regex=False) aggregated_results = ( results_df.groupby("model_fp")["result"] .agg(["count", "min", "max", "mean"]) .reset_index() ) aggregated_results.columns = [ "Model name", "Number of results", "Min", "Max", "Average", ] aggregated_results = aggregated_results.sort_values(by="Model name") print(aggregated_results.to_markdown(index=False, tablefmt="pipe")) # evaluate_and_store_all_results() analyze_results()