cyberosa commited on
Commit
5c0ffc8
·
1 Parent(s): 5267499

new tools accuracy information

Browse files
app.py CHANGED
@@ -26,6 +26,8 @@ from tabs.error import (
26
  )
27
  from tabs.about import about_olas_predict, about_this_dashboard
28
 
 
 
29
 
30
  def get_logger():
31
  logger = logging.getLogger(__name__)
@@ -119,22 +121,6 @@ tools_df, trades_df = prepare_data()
119
 
120
  demo = gr.Blocks()
121
 
122
-
123
- INC_TOOLS = [
124
- "prediction-online",
125
- "prediction-offline",
126
- "claude-prediction-online",
127
- "claude-prediction-offline",
128
- "prediction-offline-sme",
129
- "prediction-online-sme",
130
- "prediction-request-rag",
131
- "prediction-request-reasoning",
132
- "prediction-url-cot-claude",
133
- "prediction-request-rag-claude",
134
- "prediction-request-reasoning-claude",
135
- ]
136
-
137
-
138
  error_df = get_error_data(tools_df=tools_df, inc_tools=INC_TOOLS)
139
  error_overall_df = get_error_data_overall(error_df=error_df)
140
  winning_rate_df = get_tool_winning_rate(tools_df=tools_df, inc_tools=INC_TOOLS)
 
26
  )
27
  from tabs.about import about_olas_predict, about_this_dashboard
28
 
29
+ from scripts.utils import INC_TOOLS
30
+
31
 
32
  def get_logger():
33
  logger = logging.getLogger(__name__)
 
121
 
122
  demo = gr.Blocks()
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  error_df = get_error_data(tools_df=tools_df, inc_tools=INC_TOOLS)
125
  error_overall_df = get_error_data_overall(error_df=error_df)
126
  winning_rate_df = get_tool_winning_rate(tools_df=tools_df, inc_tools=INC_TOOLS)
data/all_trades_profitability.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:906e77fed8d2af6d7a4f4acf73640ce6aa3e3d4357e01c7b00af1c180f3b6eaf
3
- size 2460157
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78ca1c4b1ad00c95f94bdaecef627d6449547984d18e452f2db65fa194417a52
3
+ size 2456352
data/delivers.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d975d5fc2c3c7c50bec7136dffa135aef50fe3802bd1650cd46f236675318af
3
- size 510466826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfad460f3b035878c083a8a98988b0c623643ddf688b364d6269f45c194beae8
3
+ size 561642110
data/fpmmTrades.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:781a9625add04d8337bb3909befb6c0d4c39f3e0c9a5c47c1fb9398a96ba36ae
3
- size 6482896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:295b4f6839111251a764a5fcde484552daaa6b42c63e5753680a33311c53b4e5
3
+ size 6484981
data/fpmms.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2c07ed22be9463c8465d34717222f1553c5890254b350fb07aec2604e925795
3
- size 336922
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c488af7d0ad3070526751daafce3de6c944f9be289503f885ec6a4d4336183b2
3
+ size 338750
data/requests.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bff6dbad037f5f9c3cee3631c126ba0f6e582b32b7e3fb75d79fc60a43a9dadb
3
- size 11547978
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6c5aa48864ff7eb8662d3ec1faff6e533d4067eadab72a16be233adf5df0216
3
+ size 16857932
data/summary_profitability.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5db58ef61b7a256437ee87238249cd1da9d229a4ca5f9af79b02efe61fa73cc
3
- size 40127
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b82f7c4a487ae6b7d8daadda5454f4f8327b83ca85bd9d252d32a8d39105a526
3
+ size 42145
data/t_map.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29c60bf39a93cad87fd54b7a87407cb8f50bb2cd2122e993762cec36d76e76fb
3
- size 8982599
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dc45f8d93c69a51da90edd37aacff70ef5b9cb2680201d0e76711e245d2b240
3
+ size 9252439
data/tools.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf3ed51b3e32b32e3cd9bba607afbc1e6d67583e6ca9126df68585c339964a8a
3
- size 378117330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc017439308d15c93dda7c34cfb46b5ff4de1cbe3614dc2ff18fab7b50622e14
3
+ size 562391192
data/tools_accuracy.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18c334d2f2db204a97501adde354c61569b269514579781f894787e52829407d
3
+ size 1014
notebooks/analysis.ipynb CHANGED
@@ -475,6 +475,20 @@
475
  "total"
476
  ]
477
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  {
479
  "cell_type": "code",
480
  "execution_count": 3,
 
475
  "total"
476
  ]
477
  },
478
+ {
479
+ "cell_type": "code",
480
+ "execution_count": 15,
481
+ "metadata": {},
482
+ "outputs": [],
483
+ "source": [
484
+ "total.to_csv(\"accuracy_info.csv\")"
485
+ ]
486
+ },
487
+ {
488
+ "cell_type": "markdown",
489
+ "metadata": {},
490
+ "source": []
491
+ },
492
  {
493
  "cell_type": "code",
494
  "execution_count": 3,
requirements.txt CHANGED
@@ -5,4 +5,5 @@ pyarrow
5
  requests
6
  gradio==4.13.0
7
  pytz
8
- duckdb
 
 
5
  requests
6
  gradio==4.13.0
7
  pytz
8
+ duckdb
9
+ ipfshttpclient
scripts/pull_data.py CHANGED
@@ -19,9 +19,9 @@ from tools import (
19
  DEFAULT_FILENAME as TOOLS_FILENAME,
20
  update_tools_accuracy,
21
  )
22
- from app import INC_TOOLS
23
  from profitability import run_profitability_analysis
24
-
25
  import gc
26
 
27
  logging.basicConfig(level=logging.INFO)
@@ -153,15 +153,6 @@ def weekly_analysis():
153
  with open(DATA_DIR / "t_map.pkl", "wb") as f:
154
  pickle.dump(t_map, f)
155
 
156
- # Computing tools accuracy information
157
- print("Computing tool accuracy information")
158
- # Check if the file exists
159
- acc_data = None
160
- if os.path.exists(DATA_DIR / ACCURACY_FILENAME):
161
- acc_data = pd.read_csv(DATA_DIR / ACCURACY_FILENAME)
162
- update_tools_accuracy(acc_data, tools, INC_TOOLS)
163
- # TODO save acc_data into a CSV file
164
-
165
  # clean and release all memory
166
  del tools
167
  del fpmms
 
19
  DEFAULT_FILENAME as TOOLS_FILENAME,
20
  update_tools_accuracy,
21
  )
22
+ from utils import INC_TOOLS
23
  from profitability import run_profitability_analysis
24
+ import ipfshttpclient
25
  import gc
26
 
27
  logging.basicConfig(level=logging.INFO)
 
153
  with open(DATA_DIR / "t_map.pkl", "wb") as f:
154
  pickle.dump(t_map, f)
155
 
 
 
 
 
 
 
 
 
 
156
  # clean and release all memory
157
  del tools
158
  del fpmms
scripts/tools.py CHANGED
@@ -501,20 +501,43 @@ def update_tools_accuracy(
501
  tools_non_error["currentAnswer"] == tools_non_error["vote"]
502
  ).astype(int)
503
  tools_non_error.columns = tools_non_error.columns.astype(str)
 
 
 
504
  wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0)
505
  wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100
506
  wins.reset_index(inplace=True)
507
  wins["total_requests"] = wins[0] + wins[1]
508
  wins.columns = wins.columns.astype(str)
509
  wins = wins[["tool", "tool_accuracy", "total_requests"]]
 
 
 
510
  timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"])
 
 
 
511
  acc_info = wins.merge(timeline, how="left", on="tool")
512
 
513
  if tools_acc is None:
514
  print("Creating accuracy file for the first time")
515
  return acc_info
516
 
517
- # TODO update the old information
 
 
 
 
 
 
 
 
 
 
 
 
 
 
518
 
519
 
520
  if __name__ == "__main__":
 
501
  tools_non_error["currentAnswer"] == tools_non_error["vote"]
502
  ).astype(int)
503
  tools_non_error.columns = tools_non_error.columns.astype(str)
504
+ print("Tools dataset after filtering")
505
+ print(tools_non_error.head())
506
+
507
  wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0)
508
  wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100
509
  wins.reset_index(inplace=True)
510
  wins["total_requests"] = wins[0] + wins[1]
511
  wins.columns = wins.columns.astype(str)
512
  wins = wins[["tool", "tool_accuracy", "total_requests"]]
513
+
514
+ print("Wins dataset")
515
+ print(wins.head())
516
  timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"])
517
+
518
+ print("timeline dataset")
519
+ print(timeline.head())
520
  acc_info = wins.merge(timeline, how="left", on="tool")
521
 
522
  if tools_acc is None:
523
  print("Creating accuracy file for the first time")
524
  return acc_info
525
 
526
+ # update the old information
527
+ print("Updating accuracy information")
528
+ tools_to_update = list(acc_info["tool"].values)
529
+ existing_tools = list(tools_acc["tool"].values)
530
+ for tool in tools_to_update:
531
+ if tool in existing_tools:
532
+ new_accuracy = acc_info[acc_info["tool"] == tool, "tool_accuracy"]
533
+ new_volume = acc_info[acc_info["tool"] == tool, "total_requests"]
534
+ new_min_timeline = acc_info[acc_info["tool"] == tool, "min"]
535
+ new_max_timeline = acc_info[acc_info["tool"] == tool, "max"]
536
+ tools_acc[tools_acc["tool"] == tool, "tool_accuracy"] = new_accuracy
537
+ tools_acc[tools_acc["tool"] == tool, "total_requests"] = new_volume
538
+ tools_acc[tools_acc["tool"] == tool, "min"] = new_min_timeline
539
+ tools_acc[tools_acc["tool"] == tool, "max"] = new_max_timeline
540
+ return tools_acc
541
 
542
 
543
  if __name__ == "__main__":
scripts/update_tools_accuracy.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import ipfshttpclient
4
+ from pull_data import DATA_DIR
5
+ from utils import INC_TOOLS
6
+ from tools import update_tools_accuracy
7
+
8
+ ACCURACY_FILENAME = "tools_accuracy.csv"
9
+ IPFS_SERVER = "/dns/registry.autonolas.tech/tcp/443/https"
10
+
11
+
12
+ def compute_tools_accuracy():
13
+ print("Reading tools parquet file")
14
+ tools = pd.read_parquet(DATA_DIR / "tools.parquet")
15
+ print(tools.head())
16
+ # Computing tools accuracy information
17
+ print("Computing tool accuracy information")
18
+ # Check if the file exists
19
+ acc_data = None
20
+ if os.path.exists(DATA_DIR / ACCURACY_FILENAME):
21
+ acc_data = pd.read_csv(DATA_DIR / ACCURACY_FILENAME)
22
+ acc_data = update_tools_accuracy(acc_data, tools, INC_TOOLS)
23
+ # save acc_data into a CSV file
24
+ print("Saving into a csv file")
25
+ acc_data.to_csv(DATA_DIR / ACCURACY_FILENAME, index=False)
26
+
27
+ # save the data into IPFS
28
+ client = ipfshttpclient.connect(IPFS_SERVER)
29
+ result = client.add(DATA_DIR / ACCURACY_FILENAME)
30
+ print(f"HASH of the tools accuracy file: {result['Hash']}")
31
+
32
+
33
+ if __name__ == "__main__":
34
+ compute_tools_accuracy()
scripts/utils.py CHANGED
@@ -27,6 +27,20 @@ HTTPS = HTTP[:4] + "s" + HTTP[4:]
27
  IPFS_ADDRESS = f"{HTTPS}gateway.autonolas.tech/ipfs/"
28
  FORMAT_UPDATE_BLOCK_NUMBER = 30411638
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  class MechEventName(Enum):
32
  """The mech's event names."""
 
27
  IPFS_ADDRESS = f"{HTTPS}gateway.autonolas.tech/ipfs/"
28
  FORMAT_UPDATE_BLOCK_NUMBER = 30411638
29
 
30
+ INC_TOOLS = [
31
+ "prediction-online",
32
+ "prediction-offline",
33
+ "claude-prediction-online",
34
+ "claude-prediction-offline",
35
+ "prediction-offline-sme",
36
+ "prediction-online-sme",
37
+ "prediction-request-rag",
38
+ "prediction-request-reasoning",
39
+ "prediction-url-cot-claude",
40
+ "prediction-request-rag-claude",
41
+ "prediction-request-reasoning-claude",
42
+ ]
43
+
44
 
45
  class MechEventName(Enum):
46
  """The mech's event names."""