Spaces:

valory
/

olas-prediction-live-dashboard

Running

App Files Files Community

olas-prediction-live-dashboard / scripts /update_tools_accuracy.py

rosacastillo

cleaning and refactoring code

6e7e273 3 months ago

raw

history blame

4.26 kB

	import os
	import pandas as pd
	import ipfshttpclient
	from pathlib import Path
	from utils import INC_TOOLS
	from typing import List

	ACCURACY_FILENAME = "tools_accuracy.csv"
	IPFS_SERVER = "/dns/registry.autonolas.tech/tcp/443/https"
	SCRIPTS_DIR = Path(__file__).parent
	ROOT_DIR = SCRIPTS_DIR.parent
	DATA_DIR = ROOT_DIR / "data"


	def update_tools_accuracy(
	tools_acc: pd.DataFrame, tools_df: pd.DataFrame, inc_tools: List[str]
	) -> pd.DataFrame:
	"""To compute/update the latest accuracy information for the different mech tools"""

	# computation of the accuracy information
	tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
	# filtering errors
	tools_non_error = tools_inc[tools_inc["error"] != 1]
	tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
	{"no": "No", "yes": "Yes"}
	)
	tools_non_error = tools_non_error[
	tools_non_error["currentAnswer"].isin(["Yes", "No"])
	]
	tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
	tools_non_error["win"] = (
	tools_non_error["currentAnswer"] == tools_non_error["vote"]
	).astype(int)
	tools_non_error.columns = tools_non_error.columns.astype(str)
	print("Tools dataset after filtering")
	print(tools_non_error.head())

	wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0)
	wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100
	wins.reset_index(inplace=True)
	wins["total_requests"] = wins[0] + wins[1]
	wins.columns = wins.columns.astype(str)
	wins = wins[["tool", "tool_accuracy", "total_requests"]]

	print("Wins dataset")
	print(wins.head())
	no_timeline_info = False
	try:
	timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"])
	print("timeline dataset")
	print(timeline.head())
	acc_info = wins.merge(timeline, how="left", on="tool")
	except:
	print("NO REQUEST TIME INFORMATION AVAILABLE")
	no_timeline_info = True
	acc_info = wins

	if tools_acc is None:
	print("Creating accuracy file for the first time")
	return acc_info

	# update the old information
	print("Updating accuracy information")
	tools_to_update = list(acc_info["tool"].values)
	print("tools to update")
	print(tools_to_update)
	existing_tools = list(tools_acc["tool"].values)
	for tool in tools_to_update:
	if tool in existing_tools:
	new_accuracy = acc_info[acc_info["tool"] == tool]["tool_accuracy"].values[0]
	new_volume = acc_info[acc_info["tool"] == tool]["total_requests"].values[0]
	if no_timeline_info:
	new_min_timeline = None
	new_max_timeline = None
	else:
	new_min_timeline = acc_info[acc_info["tool"] == tool]["min"].values[0]
	new_max_timeline = acc_info[acc_info["tool"] == tool]["max"].values[0]
	tools_acc.loc[tools_acc["tool"] == tool, "tool_accuracy"] = new_accuracy
	tools_acc.loc[tools_acc["tool"] == tool, "total_requests"] = new_volume
	tools_acc.loc[tools_acc["tool"] == tool, "min"] = new_min_timeline
	tools_acc.loc[tools_acc["tool"] == tool, "max"] = new_max_timeline
	print(tools_acc)
	return tools_acc


	def compute_tools_accuracy():
	print("Computing accuracy of tools")
	print("Reading tools parquet file")
	tools = pd.read_parquet(DATA_DIR / "tools.parquet")
	print(tools.head())
	# Computing tools accuracy information
	print("Computing tool accuracy information")
	# Check if the file exists
	acc_data = None
	if os.path.exists(DATA_DIR / ACCURACY_FILENAME):
	acc_data = pd.read_csv(DATA_DIR / ACCURACY_FILENAME)
	acc_data = update_tools_accuracy(acc_data, tools, INC_TOOLS)

	# save acc_data into a CSV file
	print("Saving into a csv file")
	acc_data.to_csv(DATA_DIR / ACCURACY_FILENAME, index=False)
	print(acc_data.head())

	# save the data into IPFS
	client = ipfshttpclient.connect(IPFS_SERVER)
	result = client.add(DATA_DIR / ACCURACY_FILENAME)
	print(f"HASH of the tools accuracy file: {result['Hash']}")


	if __name__ == "__main__":
	compute_tools_accuracy()