Spaces:

huggingface-projects
/

Deep-RL-Course-Certification

Running

App Files Files Community

Deep-RL-Course-Certification / app.py

ThomasSimonini HF staff

Create app.py

91cb2a2 almost 2 years ago

raw

history blame

7.52 kB

	import gradio as gr
	from huggingface_hub import HfApi, hf_hub_download
	from huggingface_hub.repocard import metadata_load

	import pandas as pd

	from utils import *

	api = HfApi()

	def get_user_models(hf_username, env_tag, lib_tag):
	"""
	List the Reinforcement Learning models
	from user given environment and lib
	:param hf_username: User HF username
	:param env_tag: Environment tag
	:param lib_tag: Library tag
	"""
	api = HfApi()
	models = api.list_models(author=hf_username, filter=["reinforcement-learning", env_tag, lib_tag])

	user_model_ids = [x.modelId for x in models]
	return user_model_ids


	def get_metadata(model_id):
	"""
	Get model metadata (contains evaluation data)
	:param model_id
	"""
	try:
	readme_path = hf_hub_download(model_id, filename="README.md")
	return metadata_load(readme_path)
	except requests.exceptions.HTTPError:
	# 404 README.md not found
	return None


	def parse_metrics_accuracy(meta):
	"""
	Get model results and parse it
	:param meta: model metadata
	"""
	if "model-index" not in meta:
	return None
	result = meta["model-index"][0]["results"]
	metrics = result[0]["metrics"]
	accuracy = metrics[0]["value"]

	return accuracy


	def parse_rewards(accuracy):
	"""
	Parse mean_reward and std_reward
	:param accuracy: model results
	"""
	default_std = -1000
	default_reward= -1000
	if accuracy != None:
	accuracy = str(accuracy)
	parsed = accuracy.split(' +/- ')
	if len(parsed)>1:
	mean_reward = float(parsed[0])
	std_reward = float(parsed[1])
	elif len(parsed)==1: #only mean reward
	mean_reward = float(parsed[0])
	std_reward = float(0)
	else:
	mean_reward = float(default_std)
	std_reward = float(default_reward)
	else:
	mean_reward = float(default_std)
	std_reward = float(default_reward)

	return mean_reward, std_reward

	def calculate_best_result(user_model_ids):
	"""
	Calculate the best results of a unit
	best_result = mean_reward - std_reward
	:param user_model_ids: RL models of a user
	"""
	best_result = -100
	best_model_id = ""
	for model in user_model_ids:
	meta = get_metadata(model)
	if meta is None:
	continue
	accuracy = parse_metrics_accuracy(meta)
	mean_reward, std_reward = parse_rewards(accuracy)
	result = mean_reward - std_reward
	if result > best_result:
	best_result = result
	best_model_id = model

	return best_result, best_model_id

	def check_if_passed(model):
	"""
	Check if result >= baseline
	to know if you pass
	:param model: user model
	"""
	if model["best_result"] >= model["min_result"]:
	model["passed"] = True

	def test_(hf_username):
	results_certification = [
	{
	"unit": "Unit 1",
	"env": "LunarLander-v2",
	"library": "stable-baselines3",
	"min_result": 200,
	"best_result": 0,
	"best_model_id": "",
	"passed": False
	},
	{
	"unit": "Unit 2",
	"env": "Taxi-v3",
	"library": "q-learning",
	"min_result": 4,
	"best_result": 0,
	"best_model_id": "",
	"passed": False
	},
	{
	"unit": "Unit 3",
	"env": "SpaceInvadersNoFrameskip-v4",
	"library": "stable-baselines3",
	"min_result": 200,
	"best_result": 0,
	"best_model_id": "",
	"passed": False
	},
	{
	"unit": "Unit 4",
	"env": "CartPole-v1",
	"library": "reinforce",
	"min_result": 350,
	"best_result": 0,
	"best_model_id": "",
	"passed": False
	},
	{
	"unit": "Unit 4",
	"env": "Pixelcopter-PLE-v0",
	"library": "reinforce",
	"min_result": 5,
	"best_result": 0,
	"best_model_id": "",
	"passed": False
	},
	{
	"unit": "Unit 5",
	"env": "ML-Agents-SnowballTarget",
	"library": "ml-agents",
	"min_result": -100,
	"best_result": 0,
	"best_model_id": "",
	"passed": False
	},
	{
	"unit": "Unit 5",
	"env": "ML-Agents-Pyramids",
	"library": "ml-agents",
	"min_result": -100,
	"best_result": 0,
	"best_model_id": "",
	"passed": False
	},
	{
	"unit": "Unit 6",
	"env": "AntBulletEnv-v0",
	"library": "stable-baselines3",
	"min_result": 650,
	"best_result": 0,
	"best_model_id": "",
	"passed": False
	},
	{
	"unit": "Unit 6",
	"env": "PandaReachDense-v2",
	"library": "stable-baselines3",
	"min_result": -3.5,
	"best_result": 0,
	"best_model_id": "",
	"passed": False
	},
	{
	"unit": "Unit 7",
	"env": "ML-Agents-SoccerTwos",
	"library": "ml-agents",
	"min_result": -100,
	"best_result": 0,
	"best_model_id": "",
	"passed": False
	},
	{
	"unit": "Unit 8 Part 1",
	"env": "GodotRL-JumperHard",
	"library": "cleanrl",
	"min_result": -100,
	"best_result": 0,
	"best_model_id": "",
	"passed": False
	},
	{
	"unit": "Unit 8 Part 2",
	"env": "Vizdoom-Battle",
	"library": "cleanrl",
	"min_result": -100,
	"best_result": 0,
	"best_model_id": "",
	"passed": False
	},
	]
	for unit in results_certification:
	# Get user model
	user_models = get_user_models(hf_username, unit['env'], unit['library'])
	print(user_models)
	# Calculate the best result and get the best_model_id
	best_result, best_model_id = calculate_best_result(user_models)

	# Save best_result and best_model_id
	unit["best_result"] = best_result
	unit["best_model_id"] = make_clickable_model(best_model_id)

	# Based on best_result do we pass the unit?
	check_if_passed(unit)
	#pass_emoji(unit["passed"])

	print(results_certification)

	df = pd.DataFrame (results_certification)

	return df


	with gr.Blocks() as demo:
	gr.Markdown(f"""
	# 🏆 Check your progress in the Deep Reinforcement Learning Course 🏆
	You can check your progress here.

	- To get a certificate of completion, you must pass 80% of the assignments before the end of April 2023.
	- To get an honors certificate, you must pass 100% of the assignments before the end of April 2023.

	To pass an assignment your model result (mean_reward - std_reward) must be >= min_result

	When min_result = -100 it means that you just need to push a model to pass this hands-on. No need to reach a certain result.

	Just type your Hugging Face Username 🤗 (in my case ThomasSimonini)
	""")

	hf_username = gr.Textbox(placeholder="ThomasSimonini", label="Your Hugging Face Username")
	#email = gr.Textbox(placeholder="thomas.simonini@huggingface.co", label="Your Email (to receive your certificate)")
	check_progress_button = gr.Button(value="Check my progress")
	output = gr.components.Dataframe(value= test_(hf_username), headers=["Unit", "Environment", "Library", "Baseline", "Your best result", "Your best model id", "Pass?"], datatype=["markdown", "markdown", "markdown", "number", "number", "markdown", "bool"])
	check_progress_button.click(fn=test_, inputs=hf_username, outputs=output)

	demo.launch()