File size: 4,261 Bytes
5c0ffc8
 
 
24a8076
5c0ffc8
6e7e273
5c0ffc8
 
 
24a8076
 
 
5c0ffc8
 
6e7e273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c0ffc8
24a8076
5c0ffc8
 
 
 
 
 
 
 
 
 
cf2f2ff
5c0ffc8
 
 
cf2f2ff
5c0ffc8
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os
import pandas as pd
import ipfshttpclient
from pathlib import Path
from utils import INC_TOOLS
from typing import List

ACCURACY_FILENAME = "tools_accuracy.csv"
IPFS_SERVER = "/dns/registry.autonolas.tech/tcp/443/https"
SCRIPTS_DIR = Path(__file__).parent
ROOT_DIR = SCRIPTS_DIR.parent
DATA_DIR = ROOT_DIR / "data"


def update_tools_accuracy(
    tools_acc: pd.DataFrame, tools_df: pd.DataFrame, inc_tools: List[str]
) -> pd.DataFrame:
    """To compute/update the latest accuracy information for the different mech tools"""

    # computation of the accuracy information
    tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
    # filtering errors
    tools_non_error = tools_inc[tools_inc["error"] != 1]
    tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
        {"no": "No", "yes": "Yes"}
    )
    tools_non_error = tools_non_error[
        tools_non_error["currentAnswer"].isin(["Yes", "No"])
    ]
    tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
    tools_non_error["win"] = (
        tools_non_error["currentAnswer"] == tools_non_error["vote"]
    ).astype(int)
    tools_non_error.columns = tools_non_error.columns.astype(str)
    print("Tools dataset after filtering")
    print(tools_non_error.head())

    wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0)
    wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100
    wins.reset_index(inplace=True)
    wins["total_requests"] = wins[0] + wins[1]
    wins.columns = wins.columns.astype(str)
    wins = wins[["tool", "tool_accuracy", "total_requests"]]

    print("Wins dataset")
    print(wins.head())
    no_timeline_info = False
    try:
        timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"])
        print("timeline dataset")
        print(timeline.head())
        acc_info = wins.merge(timeline, how="left", on="tool")
    except:
        print("NO REQUEST TIME INFORMATION AVAILABLE")
        no_timeline_info = True
        acc_info = wins

    if tools_acc is None:
        print("Creating accuracy file for the first time")
        return acc_info

    # update the old information
    print("Updating accuracy information")
    tools_to_update = list(acc_info["tool"].values)
    print("tools to update")
    print(tools_to_update)
    existing_tools = list(tools_acc["tool"].values)
    for tool in tools_to_update:
        if tool in existing_tools:
            new_accuracy = acc_info[acc_info["tool"] == tool]["tool_accuracy"].values[0]
            new_volume = acc_info[acc_info["tool"] == tool]["total_requests"].values[0]
            if no_timeline_info:
                new_min_timeline = None
                new_max_timeline = None
            else:
                new_min_timeline = acc_info[acc_info["tool"] == tool]["min"].values[0]
                new_max_timeline = acc_info[acc_info["tool"] == tool]["max"].values[0]
            tools_acc.loc[tools_acc["tool"] == tool, "tool_accuracy"] = new_accuracy
            tools_acc.loc[tools_acc["tool"] == tool, "total_requests"] = new_volume
            tools_acc.loc[tools_acc["tool"] == tool, "min"] = new_min_timeline
            tools_acc.loc[tools_acc["tool"] == tool, "max"] = new_max_timeline
    print(tools_acc)
    return tools_acc


def compute_tools_accuracy():
    print("Computing accuracy of tools")
    print("Reading tools parquet file")
    tools = pd.read_parquet(DATA_DIR / "tools.parquet")
    print(tools.head())
    # Computing tools accuracy information
    print("Computing tool accuracy information")
    # Check if the file exists
    acc_data = None
    if os.path.exists(DATA_DIR / ACCURACY_FILENAME):
        acc_data = pd.read_csv(DATA_DIR / ACCURACY_FILENAME)
    acc_data = update_tools_accuracy(acc_data, tools, INC_TOOLS)

    # save acc_data into a CSV file
    print("Saving into a csv file")
    acc_data.to_csv(DATA_DIR / ACCURACY_FILENAME, index=False)
    print(acc_data.head())

    # save the data into IPFS
    client = ipfshttpclient.connect(IPFS_SERVER)
    result = client.add(DATA_DIR / ACCURACY_FILENAME)
    print(f"HASH of the tools accuracy file: {result['Hash']}")


if __name__ == "__main__":
    compute_tools_accuracy()