|
import os |
|
import pandas as pd |
|
import ipfshttpclient |
|
from pathlib import Path |
|
from utils import INC_TOOLS |
|
from typing import List |
|
|
|
ACCURACY_FILENAME = "tools_accuracy.csv" |
|
IPFS_SERVER = "/dns/registry.autonolas.tech/tcp/443/https" |
|
SCRIPTS_DIR = Path(__file__).parent |
|
ROOT_DIR = SCRIPTS_DIR.parent |
|
DATA_DIR = ROOT_DIR / "data" |
|
|
|
|
|
def update_tools_accuracy( |
|
tools_acc: pd.DataFrame, tools_df: pd.DataFrame, inc_tools: List[str] |
|
) -> pd.DataFrame: |
|
"""To compute/update the latest accuracy information for the different mech tools""" |
|
|
|
|
|
tools_inc = tools_df[tools_df["tool"].isin(inc_tools)] |
|
|
|
tools_non_error = tools_inc[tools_inc["error"] != 1] |
|
tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace( |
|
{"no": "No", "yes": "Yes"} |
|
) |
|
tools_non_error = tools_non_error[ |
|
tools_non_error["currentAnswer"].isin(["Yes", "No"]) |
|
] |
|
tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])] |
|
tools_non_error["win"] = ( |
|
tools_non_error["currentAnswer"] == tools_non_error["vote"] |
|
).astype(int) |
|
tools_non_error.columns = tools_non_error.columns.astype(str) |
|
print("Tools dataset after filtering") |
|
print(tools_non_error.head()) |
|
|
|
wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0) |
|
wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100 |
|
wins.reset_index(inplace=True) |
|
wins["total_requests"] = wins[0] + wins[1] |
|
wins.columns = wins.columns.astype(str) |
|
wins = wins[["tool", "tool_accuracy", "total_requests"]] |
|
|
|
print("Wins dataset") |
|
print(wins.head()) |
|
no_timeline_info = False |
|
try: |
|
timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"]) |
|
print("timeline dataset") |
|
print(timeline.head()) |
|
acc_info = wins.merge(timeline, how="left", on="tool") |
|
except: |
|
print("NO REQUEST TIME INFORMATION AVAILABLE") |
|
no_timeline_info = True |
|
acc_info = wins |
|
|
|
if tools_acc is None: |
|
print("Creating accuracy file for the first time") |
|
return acc_info |
|
|
|
|
|
print("Updating accuracy information") |
|
tools_to_update = list(acc_info["tool"].values) |
|
print("tools to update") |
|
print(tools_to_update) |
|
existing_tools = list(tools_acc["tool"].values) |
|
for tool in tools_to_update: |
|
if tool in existing_tools: |
|
new_accuracy = acc_info[acc_info["tool"] == tool]["tool_accuracy"].values[0] |
|
new_volume = acc_info[acc_info["tool"] == tool]["total_requests"].values[0] |
|
if no_timeline_info: |
|
new_min_timeline = None |
|
new_max_timeline = None |
|
else: |
|
new_min_timeline = acc_info[acc_info["tool"] == tool]["min"].values[0] |
|
new_max_timeline = acc_info[acc_info["tool"] == tool]["max"].values[0] |
|
tools_acc.loc[tools_acc["tool"] == tool, "tool_accuracy"] = new_accuracy |
|
tools_acc.loc[tools_acc["tool"] == tool, "total_requests"] = new_volume |
|
tools_acc.loc[tools_acc["tool"] == tool, "min"] = new_min_timeline |
|
tools_acc.loc[tools_acc["tool"] == tool, "max"] = new_max_timeline |
|
print(tools_acc) |
|
return tools_acc |
|
|
|
|
|
def compute_tools_accuracy(): |
|
print("Computing accuracy of tools") |
|
print("Reading tools parquet file") |
|
tools = pd.read_parquet(DATA_DIR / "tools.parquet") |
|
print(tools.head()) |
|
|
|
print("Computing tool accuracy information") |
|
|
|
acc_data = None |
|
if os.path.exists(DATA_DIR / ACCURACY_FILENAME): |
|
acc_data = pd.read_csv(DATA_DIR / ACCURACY_FILENAME) |
|
acc_data = update_tools_accuracy(acc_data, tools, INC_TOOLS) |
|
|
|
|
|
print("Saving into a csv file") |
|
acc_data.to_csv(DATA_DIR / ACCURACY_FILENAME, index=False) |
|
print(acc_data.head()) |
|
|
|
|
|
client = ipfshttpclient.connect(IPFS_SERVER) |
|
result = client.add(DATA_DIR / ACCURACY_FILENAME) |
|
print(f"HASH of the tools accuracy file: {result['Hash']}") |
|
|
|
|
|
if __name__ == "__main__": |
|
compute_tools_accuracy() |
|
|