rosacastillo commited on
Commit
ea0955a
·
1 Parent(s): 5d61ee1

removed dependency with tools.parquet and new mech calls computation timestamps based

Browse files
app.py CHANGED
@@ -1,4 +1,3 @@
1
- from datetime import datetime, timedelta
2
  import gradio as gr
3
  import pandas as pd
4
  import duckdb
@@ -23,8 +22,6 @@ from tabs.metrics import (
23
  )
24
 
25
  from tabs.tool_win import (
26
- prepare_tools,
27
- get_tool_winning_rate_by_market,
28
  integrated_plot_tool_winnings_overall_per_market_by_week,
29
  integrated_tool_winnings_by_tool_per_market,
30
  )
@@ -44,7 +41,6 @@ from tabs.invalid_markets import (
44
  from tabs.error import (
45
  plot_week_error_data_by_market,
46
  plot_error_data_by_market,
47
- get_error_data_by_market,
48
  get_error_data_overall_by_market,
49
  plot_tool_error_data_by_market,
50
  )
@@ -77,6 +73,12 @@ def get_all_data():
77
  logger.info("Getting all data")
78
 
79
  con = duckdb.connect(":memory:")
 
 
 
 
 
 
80
  query5 = f"""
81
  SELECT *
82
  FROM read_parquet('./data/unknown_traders.parquet')
@@ -107,26 +109,30 @@ def get_all_data():
107
 
108
  query1 = f"""
109
  SELECT *
110
- FROM read_parquet('./data/tools.parquet')
111
  """
112
  df1 = con.execute(query1).fetchdf()
113
- logger.info("Got all data from tools.parquet")
114
 
115
  con.close()
116
 
117
- return df1, df2, df3, df4, df5
118
 
119
 
120
  def prepare_data():
121
  """
122
  Prepare the data for the dashboard
123
  """
124
- tools_df, trades_df, tools_accuracy_info, invalid_trades, unknown_trades = (
125
- get_all_data()
126
- )
 
 
 
 
 
127
  print(trades_df.info())
128
 
129
- tools_df = prepare_tools(tools_df)
130
  trades_df = prepare_trades(trades_df)
131
  unknown_trades = prepare_trades(unknown_trades)
132
 
@@ -145,22 +151,33 @@ def prepare_data():
145
  outliers.to_parquet("./data/outliers.parquet")
146
  trades_df = trades_df.loc[trades_df["roi"] < 1000]
147
 
148
- return tools_df, trades_df, tools_accuracy_info, invalid_trades, unknown_trades
 
 
 
 
 
 
 
149
 
150
 
151
- tools_df, trades_df, tools_accuracy_info, invalid_trades, unknown_trades = (
152
- prepare_data()
153
- )
 
 
 
 
 
154
  trades_df = trades_df.sort_values(by="creation_timestamp", ascending=True)
155
  unknown_trades = unknown_trades.sort_values(by="creation_timestamp", ascending=True)
156
 
157
  demo = gr.Blocks()
158
 
159
  # preparing data for the errors
160
- error_by_markets = get_error_data_by_market(tools_df=tools_df, inc_tools=INC_TOOLS)
161
  error_overall_by_markets = get_error_data_overall_by_market(error_df=error_by_markets)
162
 
163
- winning_df = get_tool_winning_rate_by_market(tools_df, inc_tools=INC_TOOLS)
164
  # preparing data for the trades graph
165
  trades_count_df = get_overall_trades(trades_df=trades_df)
166
  trades_by_market = get_overall_by_market_trades(trades_df=trades_df)
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import duckdb
 
22
  )
23
 
24
  from tabs.tool_win import (
 
 
25
  integrated_plot_tool_winnings_overall_per_market_by_week,
26
  integrated_tool_winnings_by_tool_per_market,
27
  )
 
41
  from tabs.error import (
42
  plot_week_error_data_by_market,
43
  plot_error_data_by_market,
 
44
  get_error_data_overall_by_market,
45
  plot_tool_error_data_by_market,
46
  )
 
73
  logger.info("Getting all data")
74
 
75
  con = duckdb.connect(":memory:")
76
+ query6 = f"""
77
+ SELECT *
78
+ FROM read_parquet('./data/winning_df.parquet')
79
+ """
80
+ df6 = con.execute(query6).fetchdf()
81
+
82
  query5 = f"""
83
  SELECT *
84
  FROM read_parquet('./data/unknown_traders.parquet')
 
109
 
110
  query1 = f"""
111
  SELECT *
112
+ FROM read_parquet('./data/error_by_markets.parquet')
113
  """
114
  df1 = con.execute(query1).fetchdf()
115
+ logger.info("Got all data from error_by_markets.parquet")
116
 
117
  con.close()
118
 
119
+ return df1, df2, df3, df4, df5, df6
120
 
121
 
122
  def prepare_data():
123
  """
124
  Prepare the data for the dashboard
125
  """
126
+ (
127
+ error_by_markets,
128
+ trades_df,
129
+ tools_accuracy_info,
130
+ invalid_trades,
131
+ unknown_trades,
132
+ winning_df,
133
+ ) = get_all_data()
134
  print(trades_df.info())
135
 
 
136
  trades_df = prepare_trades(trades_df)
137
  unknown_trades = prepare_trades(unknown_trades)
138
 
 
151
  outliers.to_parquet("./data/outliers.parquet")
152
  trades_df = trades_df.loc[trades_df["roi"] < 1000]
153
 
154
+ return (
155
+ error_by_markets,
156
+ trades_df,
157
+ tools_accuracy_info,
158
+ invalid_trades,
159
+ unknown_trades,
160
+ winning_df,
161
+ )
162
 
163
 
164
+ (
165
+ error_by_markets,
166
+ trades_df,
167
+ tools_accuracy_info,
168
+ invalid_trades,
169
+ unknown_trades,
170
+ winning_df,
171
+ ) = prepare_data()
172
  trades_df = trades_df.sort_values(by="creation_timestamp", ascending=True)
173
  unknown_trades = unknown_trades.sort_values(by="creation_timestamp", ascending=True)
174
 
175
  demo = gr.Blocks()
176
 
177
  # preparing data for the errors
178
+
179
  error_overall_by_markets = get_error_data_overall_by_market(error_df=error_by_markets)
180
 
 
181
  # preparing data for the trades graph
182
  trades_count_df = get_overall_trades(trades_df=trades_df)
183
  trades_by_market = get_overall_by_market_trades(trades_df=trades_df)
data/all_trades_profitability.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec1f80e9de64d8981ac58b91de9e17f371c0620544c9012168519a6a789b512c
3
- size 3537818
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dc010db5a3f4163f3d09274101a14cd63a860e64c92649c694c816f28799342
3
+ size 6789999
data/{summary_profitability.parquet → error_by_markets.parquet} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20a70aff0b89a48381a0cf73ffb65ae9a41002b81bec1dd1ded9e454b86e9245
3
- size 112166
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbe47e7cb744db4522161c6c121ac9393937d53ca372a2210952f7a469f59489
3
+ size 12067
data/invalid_trades.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88697b4baf7652f32c3413f1fc168f534f2472281761fa4e5208751f1a0bae56
3
- size 123705
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b7c4c831e583b8632a6a45079df9e400fea4e40287bbed594624ad9f9437907
3
+ size 196588
data/service_map.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6109116eb0c946088a55420b04cc85576985cb0bef7ec47c3b2be97ee85688e8
3
  size 90766
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93ac540e1bcd347a48b9978b87443ae64af0f8b0a4daff305c4ad99cd0959a73
3
  size 90766
data/tools.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d5753d5858231903cf1bc20f47a54dae742f35da95ed15ddcb5f44a5be8338f
3
- size 616260724
 
 
 
 
data/tools_accuracy.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33bf015940a44f02ababb579398272ffc258a48d10e16be075179f18f4a2d578
3
- size 1101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:818026934d2218b01f130770ffcb7563c80de0900be6721a55cd2499f9731889
3
+ size 1100
data/unknown_traders.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ab41a7a35d8bf5c588b95849ec650e048578ddcbb18bc62df0e7a3c96902ea5
3
- size 368142
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0164ef5ecaf966a5dcc677d96bba860c344f43cf53e237b6687b797502bd5e36
3
+ size 184719
data/{t_map.pkl → winning_df.parquet} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c6ad41a6442cbe773ea3062c116a132fb67fcbfc2c9dbaaf990088da58b80b7
3
- size 15990448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe676fcd7dde4b833f770dafa8e474a96bbe17fb16b9ceb160c03c2519ba72b4
3
+ size 12980
notebooks/mech_calls_analysis.ipynb CHANGED
@@ -59,7 +59,6 @@
59
  " <th>trade_fee_amount</th>\n",
60
  " <th>outcomes_tokens_traded</th>\n",
61
  " <th>...</th>\n",
62
- " <th>is_invalid</th>\n",
63
  " <th>winning_trade</th>\n",
64
  " <th>earnings</th>\n",
65
  " <th>redeemed</th>\n",
@@ -69,6 +68,7 @@
69
  " <th>net_earnings</th>\n",
70
  " <th>roi</th>\n",
71
  " <th>staking</th>\n",
 
72
  " </tr>\n",
73
  " </thead>\n",
74
  " <tbody>\n",
@@ -76,125 +76,125 @@
76
  " <th>0</th>\n",
77
  " <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
78
  " <td>quickstart</td>\n",
79
- " <td>0x007068173910cf8719b6f2e66a18b6825c9dde820x01...</td>\n",
80
- " <td>2024-10-10 21:43:25+00:00</td>\n",
81
- " <td>Will the emergency public warning tests planne...</td>\n",
82
  " <td>CLOSED</td>\n",
83
- " <td>0.930597</td>\n",
84
- " <td>0</td>\n",
85
- " <td>0.009306</td>\n",
86
- " <td>1.574258</td>\n",
87
  " <td>...</td>\n",
88
- " <td>False</td>\n",
89
  " <td>True</td>\n",
90
- " <td>1.574258</td>\n",
91
  " <td>True</td>\n",
92
- " <td>1.574258</td>\n",
93
- " <td>1</td>\n",
94
- " <td>0.01</td>\n",
95
- " <td>0.624356</td>\n",
96
- " <td>0.657284</td>\n",
97
  " <td>non_staking</td>\n",
 
98
  " </tr>\n",
99
  " <tr>\n",
100
  " <th>1</th>\n",
101
  " <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
102
  " <td>quickstart</td>\n",
103
- " <td>0x00d659d7749fda4f1c9402182ca5d7ce26cf5cd10x01...</td>\n",
104
- " <td>2024-10-18 00:36:50+00:00</td>\n",
105
- " <td>Will the Northern Lights be visible over UK sk...</td>\n",
106
  " <td>CLOSED</td>\n",
107
- " <td>1.375603</td>\n",
108
- " <td>1</td>\n",
109
- " <td>0.013756</td>\n",
110
- " <td>1.942215</td>\n",
111
  " <td>...</td>\n",
112
  " <td>False</td>\n",
113
- " <td>False</td>\n",
114
  " <td>0.000000</td>\n",
115
  " <td>False</td>\n",
116
  " <td>0.000000</td>\n",
117
- " <td>1</td>\n",
118
- " <td>0.01</td>\n",
119
- " <td>-1.399359</td>\n",
120
  " <td>-1.000000</td>\n",
121
  " <td>non_staking</td>\n",
 
122
  " </tr>\n",
123
  " <tr>\n",
124
  " <th>2</th>\n",
125
  " <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
126
  " <td>quickstart</td>\n",
127
- " <td>0x02ccdf04646d9a55332e67a73e4ffdab2368d05f0x01...</td>\n",
128
- " <td>2024-10-23 22:37:35+00:00</td>\n",
129
- " <td>Will any Republican lawmakers introduce legisl...</td>\n",
130
  " <td>CLOSED</td>\n",
131
- " <td>0.471695</td>\n",
132
  " <td>1</td>\n",
133
- " <td>0.004717</td>\n",
134
- " <td>0.784784</td>\n",
135
  " <td>...</td>\n",
136
- " <td>False</td>\n",
137
  " <td>True</td>\n",
138
- " <td>0.784784</td>\n",
139
  " <td>True</td>\n",
140
- " <td>0.784784</td>\n",
141
- " <td>1</td>\n",
142
- " <td>0.01</td>\n",
143
- " <td>0.298372</td>\n",
144
- " <td>0.613414</td>\n",
145
  " <td>non_staking</td>\n",
 
146
  " </tr>\n",
147
  " <tr>\n",
148
  " <th>3</th>\n",
149
  " <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
150
  " <td>quickstart</td>\n",
151
- " <td>0x09f47ce8995abf1d5b91f2cbfa940ede2fb954c30x01...</td>\n",
152
- " <td>2024-10-20 23:58:35+00:00</td>\n",
153
- " <td>Will any new major AI-driven fraud detection t...</td>\n",
154
  " <td>CLOSED</td>\n",
155
- " <td>0.289046</td>\n",
156
  " <td>1</td>\n",
157
- " <td>0.002890</td>\n",
158
- " <td>0.445590</td>\n",
159
  " <td>...</td>\n",
160
  " <td>False</td>\n",
161
- " <td>False</td>\n",
162
  " <td>0.000000</td>\n",
163
  " <td>False</td>\n",
164
  " <td>0.000000</td>\n",
165
- " <td>5</td>\n",
166
- " <td>0.05</td>\n",
167
- " <td>-0.341936</td>\n",
168
  " <td>-1.000000</td>\n",
169
  " <td>non_staking</td>\n",
 
170
  " </tr>\n",
171
  " <tr>\n",
172
  " <th>4</th>\n",
173
  " <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
174
  " <td>quickstart</td>\n",
175
- " <td>0x0c86942c52740316bbdb70303c5aaee40876d8ce0x01...</td>\n",
176
- " <td>2024-10-24 22:42:00+00:00</td>\n",
177
- " <td>Will any new norovirus vaccine trial sites be ...</td>\n",
178
  " <td>CLOSED</td>\n",
179
- " <td>0.286552</td>\n",
180
- " <td>1</td>\n",
181
- " <td>0.002866</td>\n",
182
- " <td>0.470457</td>\n",
183
  " <td>...</td>\n",
184
- " <td>False</td>\n",
185
  " <td>True</td>\n",
186
- " <td>0.470457</td>\n",
187
  " <td>True</td>\n",
188
- " <td>0.470457</td>\n",
189
- " <td>1</td>\n",
190
- " <td>0.01</td>\n",
191
- " <td>0.171040</td>\n",
192
- " <td>0.571242</td>\n",
193
  " <td>non_staking</td>\n",
 
194
  " </tr>\n",
195
  " </tbody>\n",
196
  "</table>\n",
197
- "<p>5 rows × 21 columns</p>\n",
198
  "</div>"
199
  ],
200
  "text/plain": [
@@ -206,48 +206,48 @@
206
  "4 0x01274796ce41aa8e8312e05a427ffb4b0d2148f6 quickstart \n",
207
  "\n",
208
  " trade_id \\\n",
209
- "0 0x007068173910cf8719b6f2e66a18b6825c9dde820x01... \n",
210
- "1 0x00d659d7749fda4f1c9402182ca5d7ce26cf5cd10x01... \n",
211
- "2 0x02ccdf04646d9a55332e67a73e4ffdab2368d05f0x01... \n",
212
- "3 0x09f47ce8995abf1d5b91f2cbfa940ede2fb954c30x01... \n",
213
- "4 0x0c86942c52740316bbdb70303c5aaee40876d8ce0x01... \n",
214
  "\n",
215
  " creation_timestamp \\\n",
216
- "0 2024-10-10 21:43:25+00:00 \n",
217
- "1 2024-10-18 00:36:50+00:00 \n",
218
- "2 2024-10-23 22:37:35+00:00 \n",
219
- "3 2024-10-20 23:58:35+00:00 \n",
220
- "4 2024-10-24 22:42:00+00:00 \n",
221
  "\n",
222
  " title market_status \\\n",
223
- "0 Will the emergency public warning tests planne... CLOSED \n",
224
- "1 Will the Northern Lights be visible over UK sk... CLOSED \n",
225
- "2 Will any Republican lawmakers introduce legisl... CLOSED \n",
226
- "3 Will any new major AI-driven fraud detection t... CLOSED \n",
227
- "4 Will any new norovirus vaccine trial sites be ... CLOSED \n",
228
  "\n",
229
  " collateral_amount outcome_index trade_fee_amount outcomes_tokens_traded \\\n",
230
- "0 0.930597 0 0.009306 1.574258 \n",
231
- "1 1.375603 1 0.013756 1.942215 \n",
232
- "2 0.471695 1 0.004717 0.784784 \n",
233
- "3 0.289046 1 0.002890 0.445590 \n",
234
- "4 0.286552 1 0.002866 0.470457 \n",
235
  "\n",
236
- " ... is_invalid winning_trade earnings redeemed redeemed_amount \\\n",
237
- "0 ... False True 1.574258 True 1.574258 \n",
238
- "1 ... False False 0.000000 False 0.000000 \n",
239
- "2 ... False True 0.784784 True 0.784784 \n",
240
- "3 ... False False 0.000000 False 0.000000 \n",
241
- "4 ... False True 0.470457 True 0.470457 \n",
242
  "\n",
243
- " num_mech_calls mech_fee_amount net_earnings roi staking \n",
244
- "0 1 0.01 0.624356 0.657284 non_staking \n",
245
- "1 1 0.01 -1.399359 -1.000000 non_staking \n",
246
- "2 1 0.01 0.298372 0.613414 non_staking \n",
247
- "3 5 0.05 -0.341936 -1.000000 non_staking \n",
248
- "4 1 0.01 0.171040 0.571242 non_staking \n",
249
  "\n",
250
- "[5 rows x 21 columns]"
251
  ]
252
  },
253
  "execution_count": 3,
@@ -259,6 +259,34 @@
259
  "all_trades.head()"
260
  ]
261
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  {
263
  "cell_type": "code",
264
  "execution_count": 4,
 
59
  " <th>trade_fee_amount</th>\n",
60
  " <th>outcomes_tokens_traded</th>\n",
61
  " <th>...</th>\n",
 
62
  " <th>winning_trade</th>\n",
63
  " <th>earnings</th>\n",
64
  " <th>redeemed</th>\n",
 
68
  " <th>net_earnings</th>\n",
69
  " <th>roi</th>\n",
70
  " <th>staking</th>\n",
71
+ " <th>nr_mech_calls</th>\n",
72
  " </tr>\n",
73
  " </thead>\n",
74
  " <tbody>\n",
 
76
  " <th>0</th>\n",
77
  " <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
78
  " <td>quickstart</td>\n",
79
+ " <td>0x0dfb9821725003c4d3007999968d34d7070959ef0x01...</td>\n",
80
+ " <td>2024-10-27 21:51:25+00:00</td>\n",
81
+ " <td>Will any mainstream U.S. news outlet publish a...</td>\n",
82
  " <td>CLOSED</td>\n",
83
+ " <td>0.461993</td>\n",
84
+ " <td>1</td>\n",
85
+ " <td>0.004620</td>\n",
86
+ " <td>0.734537</td>\n",
87
  " <td>...</td>\n",
 
88
  " <td>True</td>\n",
89
+ " <td>0.734537</td>\n",
90
  " <td>True</td>\n",
91
+ " <td>0.734537</td>\n",
92
+ " <td>2.0</td>\n",
93
+ " <td>0.02</td>\n",
94
+ " <td>0.247924</td>\n",
95
+ " <td>0.509488</td>\n",
96
  " <td>non_staking</td>\n",
97
+ " <td>NaN</td>\n",
98
  " </tr>\n",
99
  " <tr>\n",
100
  " <th>1</th>\n",
101
  " <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
102
  " <td>quickstart</td>\n",
103
+ " <td>0x1082be4e429e512182089162f41b3a86a52eee370x01...</td>\n",
104
+ " <td>2024-10-31 22:50:15+00:00</td>\n",
105
+ " <td>Will Prime Minister Shigeru Ishiba announce a ...</td>\n",
106
  " <td>CLOSED</td>\n",
107
+ " <td>0.859939</td>\n",
108
+ " <td>0</td>\n",
109
+ " <td>0.008599</td>\n",
110
+ " <td>2.714890</td>\n",
111
  " <td>...</td>\n",
112
  " <td>False</td>\n",
 
113
  " <td>0.000000</td>\n",
114
  " <td>False</td>\n",
115
  " <td>0.000000</td>\n",
116
+ " <td>8.0</td>\n",
117
+ " <td>0.08</td>\n",
118
+ " <td>-0.948538</td>\n",
119
  " <td>-1.000000</td>\n",
120
  " <td>non_staking</td>\n",
121
+ " <td>NaN</td>\n",
122
  " </tr>\n",
123
  " <tr>\n",
124
  " <th>2</th>\n",
125
  " <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
126
  " <td>quickstart</td>\n",
127
+ " <td>0x150f4d4e5affa7fe332684d7c828c0a471c4d5de0x01...</td>\n",
128
+ " <td>2024-10-29 02:21:25+00:00</td>\n",
129
+ " <td>Will the Constitutional Democratic Party of Ja...</td>\n",
130
  " <td>CLOSED</td>\n",
131
+ " <td>0.203751</td>\n",
132
  " <td>1</td>\n",
133
+ " <td>0.002038</td>\n",
134
+ " <td>0.305174</td>\n",
135
  " <td>...</td>\n",
 
136
  " <td>True</td>\n",
137
+ " <td>0.305174</td>\n",
138
  " <td>True</td>\n",
139
+ " <td>0.305174</td>\n",
140
+ " <td>2.0</td>\n",
141
+ " <td>0.02</td>\n",
142
+ " <td>0.079385</td>\n",
143
+ " <td>0.351592</td>\n",
144
  " <td>non_staking</td>\n",
145
+ " <td>NaN</td>\n",
146
  " </tr>\n",
147
  " <tr>\n",
148
  " <th>3</th>\n",
149
  " <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
150
  " <td>quickstart</td>\n",
151
+ " <td>0x15edf592dc3eb67e1c163ceb6d23039710cd67fb0x01...</td>\n",
152
+ " <td>2024-10-28 21:59:25+00:00</td>\n",
153
+ " <td>Will there be a public statement from the Bide...</td>\n",
154
  " <td>CLOSED</td>\n",
155
+ " <td>0.412054</td>\n",
156
  " <td>1</td>\n",
157
+ " <td>0.004121</td>\n",
158
+ " <td>0.666936</td>\n",
159
  " <td>...</td>\n",
160
  " <td>False</td>\n",
 
161
  " <td>0.000000</td>\n",
162
  " <td>False</td>\n",
163
  " <td>0.000000</td>\n",
164
+ " <td>2.0</td>\n",
165
+ " <td>0.02</td>\n",
166
+ " <td>-0.436175</td>\n",
167
  " <td>-1.000000</td>\n",
168
  " <td>non_staking</td>\n",
169
+ " <td>NaN</td>\n",
170
  " </tr>\n",
171
  " <tr>\n",
172
  " <th>4</th>\n",
173
  " <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
174
  " <td>quickstart</td>\n",
175
+ " <td>0x187c822a330c393912398884faf8150d21b4a7840x01...</td>\n",
176
+ " <td>2024-10-30 00:30:45+00:00</td>\n",
177
+ " <td>Will the Bank of Japan issue a public statemen...</td>\n",
178
  " <td>CLOSED</td>\n",
179
+ " <td>0.333192</td>\n",
180
+ " <td>0</td>\n",
181
+ " <td>0.003332</td>\n",
182
+ " <td>0.447445</td>\n",
183
  " <td>...</td>\n",
 
184
  " <td>True</td>\n",
185
+ " <td>0.447445</td>\n",
186
  " <td>True</td>\n",
187
+ " <td>0.447445</td>\n",
188
+ " <td>8.0</td>\n",
189
+ " <td>0.08</td>\n",
190
+ " <td>0.030922</td>\n",
191
+ " <td>0.074237</td>\n",
192
  " <td>non_staking</td>\n",
193
+ " <td>NaN</td>\n",
194
  " </tr>\n",
195
  " </tbody>\n",
196
  "</table>\n",
197
+ "<p>5 rows × 22 columns</p>\n",
198
  "</div>"
199
  ],
200
  "text/plain": [
 
206
  "4 0x01274796ce41aa8e8312e05a427ffb4b0d2148f6 quickstart \n",
207
  "\n",
208
  " trade_id \\\n",
209
+ "0 0x0dfb9821725003c4d3007999968d34d7070959ef0x01... \n",
210
+ "1 0x1082be4e429e512182089162f41b3a86a52eee370x01... \n",
211
+ "2 0x150f4d4e5affa7fe332684d7c828c0a471c4d5de0x01... \n",
212
+ "3 0x15edf592dc3eb67e1c163ceb6d23039710cd67fb0x01... \n",
213
+ "4 0x187c822a330c393912398884faf8150d21b4a7840x01... \n",
214
  "\n",
215
  " creation_timestamp \\\n",
216
+ "0 2024-10-27 21:51:25+00:00 \n",
217
+ "1 2024-10-31 22:50:15+00:00 \n",
218
+ "2 2024-10-29 02:21:25+00:00 \n",
219
+ "3 2024-10-28 21:59:25+00:00 \n",
220
+ "4 2024-10-30 00:30:45+00:00 \n",
221
  "\n",
222
  " title market_status \\\n",
223
+ "0 Will any mainstream U.S. news outlet publish a... CLOSED \n",
224
+ "1 Will Prime Minister Shigeru Ishiba announce a ... CLOSED \n",
225
+ "2 Will the Constitutional Democratic Party of Ja... CLOSED \n",
226
+ "3 Will there be a public statement from the Bide... CLOSED \n",
227
+ "4 Will the Bank of Japan issue a public statemen... CLOSED \n",
228
  "\n",
229
  " collateral_amount outcome_index trade_fee_amount outcomes_tokens_traded \\\n",
230
+ "0 0.461993 1 0.004620 0.734537 \n",
231
+ "1 0.859939 0 0.008599 2.714890 \n",
232
+ "2 0.203751 1 0.002038 0.305174 \n",
233
+ "3 0.412054 1 0.004121 0.666936 \n",
234
+ "4 0.333192 0 0.003332 0.447445 \n",
235
  "\n",
236
+ " ... winning_trade earnings redeemed redeemed_amount num_mech_calls \\\n",
237
+ "0 ... True 0.734537 True 0.734537 2.0 \n",
238
+ "1 ... False 0.000000 False 0.000000 8.0 \n",
239
+ "2 ... True 0.305174 True 0.305174 2.0 \n",
240
+ "3 ... False 0.000000 False 0.000000 2.0 \n",
241
+ "4 ... True 0.447445 True 0.447445 8.0 \n",
242
  "\n",
243
+ " mech_fee_amount net_earnings roi staking nr_mech_calls \n",
244
+ "0 0.02 0.247924 0.509488 non_staking NaN \n",
245
+ "1 0.08 -0.948538 -1.000000 non_staking NaN \n",
246
+ "2 0.02 0.079385 0.351592 non_staking NaN \n",
247
+ "3 0.02 -0.436175 -1.000000 non_staking NaN \n",
248
+ "4 0.08 0.030922 0.074237 non_staking NaN \n",
249
  "\n",
250
+ "[5 rows x 22 columns]"
251
  ]
252
  },
253
  "execution_count": 3,
 
259
  "all_trades.head()"
260
  ]
261
  },
262
+ {
263
+ "cell_type": "code",
264
+ "execution_count": 4,
265
+ "metadata": {},
266
+ "outputs": [
267
+ {
268
+ "data": {
269
+ "text/plain": [
270
+ "count 43987.000000\n",
271
+ "mean 6.663537\n",
272
+ "std 13.608287\n",
273
+ "min 0.000000\n",
274
+ "25% 2.000000\n",
275
+ "50% 5.000000\n",
276
+ "75% 8.000000\n",
277
+ "max 650.000000\n",
278
+ "Name: num_mech_calls, dtype: float64"
279
+ ]
280
+ },
281
+ "execution_count": 4,
282
+ "metadata": {},
283
+ "output_type": "execute_result"
284
+ }
285
+ ],
286
+ "source": [
287
+ "all_trades.num_mech_calls.describe()"
288
+ ]
289
+ },
290
  {
291
  "cell_type": "code",
292
  "execution_count": 4,
notebooks/tool_errors_analysis.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -15,16 +15,234 @@
15
  },
16
  {
17
  "cell_type": "code",
18
- "execution_count": 2,
19
  "metadata": {},
20
  "outputs": [],
21
  "source": [
22
- "tools = pd.read_parquet('../data/tools.parquet')"
23
  ]
24
  },
25
  {
26
  "cell_type": "code",
27
- "execution_count": 3,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "metadata": {},
29
  "outputs": [
30
  {
@@ -32,42 +250,713 @@
32
  "output_type": "stream",
33
  "text": [
34
  "<class 'pandas.core.frame.DataFrame'>\n",
35
- "RangeIndex: 286042 entries, 0 to 286041\n",
36
- "Data columns (total 23 columns):\n",
37
- " # Column Non-Null Count Dtype \n",
38
- "--- ------ -------------- ----- \n",
39
- " 0 request_id 286042 non-null object \n",
40
- " 1 request_block 286042 non-null object \n",
41
- " 2 prompt_request 286042 non-null object \n",
42
- " 3 tool 286042 non-null object \n",
43
- " 4 nonce 286042 non-null object \n",
44
- " 5 trader_address 286042 non-null object \n",
45
- " 6 deliver_block 286042 non-null object \n",
46
- " 7 error 286042 non-null int64 \n",
47
- " 8 error_message 3147 non-null object \n",
48
- " 9 prompt_response 285564 non-null object \n",
49
- " 10 mech_address 285656 non-null object \n",
50
- " 11 p_yes 282895 non-null float64\n",
51
- " 12 p_no 282895 non-null float64\n",
52
- " 13 confidence 282895 non-null float64\n",
53
- " 14 info_utility 282895 non-null float64\n",
54
- " 15 vote 205982 non-null object \n",
55
- " 16 win_probability 282895 non-null float64\n",
56
- " 17 market_creator 286042 non-null object \n",
57
- " 18 title 286042 non-null object \n",
58
- " 19 currentAnswer 235482 non-null object \n",
59
- " 20 request_time 286042 non-null object \n",
60
- " 21 request_month_year 286042 non-null object \n",
61
- " 22 request_month_year_week 286042 non-null object \n",
62
- "dtypes: float64(5), int64(1), object(17)\n",
63
- "memory usage: 50.2+ MB\n"
64
  ]
65
  }
66
  ],
67
  "source": [
68
- "tools.info()"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  ]
70
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  {
72
  "cell_type": "code",
73
  "execution_count": 4,
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 47,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
15
  },
16
  {
17
  "cell_type": "code",
18
+ "execution_count": 48,
19
  "metadata": {},
20
  "outputs": [],
21
  "source": [
22
+ "error_by_markets = pd.read_parquet('../data/error_by_markets.parquet')"
23
  ]
24
  },
25
  {
26
  "cell_type": "code",
27
+ "execution_count": 49,
28
+ "metadata": {},
29
+ "outputs": [
30
+ {
31
+ "data": {
32
+ "text/plain": [
33
+ "Index(['tool', 'request_month_year_week', 'market_creator', '0', '1',\n",
34
+ " 'error_perc', 'total_requests'],\n",
35
+ " dtype='object')"
36
+ ]
37
+ },
38
+ "execution_count": 49,
39
+ "metadata": {},
40
+ "output_type": "execute_result"
41
+ }
42
+ ],
43
+ "source": [
44
+ "error_by_markets.columns"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": 51,
50
+ "metadata": {},
51
+ "outputs": [
52
+ {
53
+ "data": {
54
+ "text/html": [
55
+ "<div>\n",
56
+ "<style scoped>\n",
57
+ " .dataframe tbody tr th:only-of-type {\n",
58
+ " vertical-align: middle;\n",
59
+ " }\n",
60
+ "\n",
61
+ " .dataframe tbody tr th {\n",
62
+ " vertical-align: top;\n",
63
+ " }\n",
64
+ "\n",
65
+ " .dataframe thead th {\n",
66
+ " text-align: right;\n",
67
+ " }\n",
68
+ "</style>\n",
69
+ "<table border=\"1\" class=\"dataframe\">\n",
70
+ " <thead>\n",
71
+ " <tr style=\"text-align: right;\">\n",
72
+ " <th></th>\n",
73
+ " <th>tool</th>\n",
74
+ " <th>request_month_year_week</th>\n",
75
+ " <th>market_creator</th>\n",
76
+ " <th>0</th>\n",
77
+ " <th>1</th>\n",
78
+ " <th>error_perc</th>\n",
79
+ " <th>total_requests</th>\n",
80
+ " </tr>\n",
81
+ " </thead>\n",
82
+ " <tbody>\n",
83
+ " <tr>\n",
84
+ " <th>355</th>\n",
85
+ " <td>superforcaster</td>\n",
86
+ " <td>Dec-22</td>\n",
87
+ " <td>all</td>\n",
88
+ " <td>1087.0</td>\n",
89
+ " <td>61.0</td>\n",
90
+ " <td>5.313589</td>\n",
91
+ " <td>1148.0</td>\n",
92
+ " </tr>\n",
93
+ " <tr>\n",
94
+ " <th>356</th>\n",
95
+ " <td>superforcaster</td>\n",
96
+ " <td>Dec-22</td>\n",
97
+ " <td>pearl</td>\n",
98
+ " <td>75.0</td>\n",
99
+ " <td>10.0</td>\n",
100
+ " <td>11.764706</td>\n",
101
+ " <td>85.0</td>\n",
102
+ " </tr>\n",
103
+ " <tr>\n",
104
+ " <th>357</th>\n",
105
+ " <td>superforcaster</td>\n",
106
+ " <td>Dec-29</td>\n",
107
+ " <td>quickstart</td>\n",
108
+ " <td>678.0</td>\n",
109
+ " <td>59.0</td>\n",
110
+ " <td>8.005427</td>\n",
111
+ " <td>737.0</td>\n",
112
+ " </tr>\n",
113
+ " <tr>\n",
114
+ " <th>358</th>\n",
115
+ " <td>superforcaster</td>\n",
116
+ " <td>Dec-29</td>\n",
117
+ " <td>all</td>\n",
118
+ " <td>705.0</td>\n",
119
+ " <td>60.0</td>\n",
120
+ " <td>7.843137</td>\n",
121
+ " <td>765.0</td>\n",
122
+ " </tr>\n",
123
+ " <tr>\n",
124
+ " <th>359</th>\n",
125
+ " <td>superforcaster</td>\n",
126
+ " <td>Dec-29</td>\n",
127
+ " <td>pearl</td>\n",
128
+ " <td>27.0</td>\n",
129
+ " <td>1.0</td>\n",
130
+ " <td>3.571429</td>\n",
131
+ " <td>28.0</td>\n",
132
+ " </tr>\n",
133
+ " </tbody>\n",
134
+ "</table>\n",
135
+ "</div>"
136
+ ],
137
+ "text/plain": [
138
+ " tool request_month_year_week market_creator 0 1 \\\n",
139
+ "355 superforcaster Dec-22 all 1087.0 61.0 \n",
140
+ "356 superforcaster Dec-22 pearl 75.0 10.0 \n",
141
+ "357 superforcaster Dec-29 quickstart 678.0 59.0 \n",
142
+ "358 superforcaster Dec-29 all 705.0 60.0 \n",
143
+ "359 superforcaster Dec-29 pearl 27.0 1.0 \n",
144
+ "\n",
145
+ " error_perc total_requests \n",
146
+ "355 5.313589 1148.0 \n",
147
+ "356 11.764706 85.0 \n",
148
+ "357 8.005427 737.0 \n",
149
+ "358 7.843137 765.0 \n",
150
+ "359 3.571429 28.0 "
151
+ ]
152
+ },
153
+ "execution_count": 51,
154
+ "metadata": {},
155
+ "output_type": "execute_result"
156
+ }
157
+ ],
158
+ "source": [
159
+ "error_by_markets.tail()"
160
+ ]
161
+ },
162
+ {
163
+ "cell_type": "code",
164
+ "execution_count": 46,
165
+ "metadata": {},
166
+ "outputs": [],
167
+ "source": [
168
+ "error_total = (\n",
169
+ " error_by_markets.groupby([\"request_month_year_week\", \"market_creator\"], sort=False)\n",
170
+ " .agg({\"total_requests\": \"sum\", '1': \"sum\", '0': \"sum\"})\n",
171
+ " .reset_index()\n",
172
+ ")\n"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": null,
178
+ "metadata": {},
179
+ "outputs": [],
180
+ "source": []
181
+ },
182
+ {
183
+ "cell_type": "code",
184
+ "execution_count": null,
185
+ "metadata": {},
186
+ "outputs": [],
187
+ "source": []
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "execution_count": 32,
192
+ "metadata": {},
193
+ "outputs": [],
194
+ "source": [
195
+ "new_tools = pd.read_parquet('../data/new_tools.parquet')\n"
196
+ ]
197
+ },
198
+ {
199
+ "cell_type": "code",
200
+ "execution_count": 33,
201
+ "metadata": {},
202
+ "outputs": [
203
+ {
204
+ "data": {
205
+ "text/plain": [
206
+ "155789"
207
+ ]
208
+ },
209
+ "execution_count": 33,
210
+ "metadata": {},
211
+ "output_type": "execute_result"
212
+ }
213
+ ],
214
+ "source": [
215
+ "len(new_tools)"
216
+ ]
217
+ },
218
+ {
219
+ "cell_type": "code",
220
+ "execution_count": 34,
221
+ "metadata": {},
222
+ "outputs": [
223
+ {
224
+ "data": {
225
+ "text/plain": [
226
+ "Index(['request_id', 'request_block', 'request_time', 'tx_hash',\n",
227
+ " 'prompt_request', 'tool', 'nonce', 'trader_address', 'deliver_block',\n",
228
+ " 'error', 'error_message', 'prompt_response', 'mech_address', 'p_yes',\n",
229
+ " 'p_no', 'confidence', 'info_utility', 'vote', 'win_probability',\n",
230
+ " 'market_creator'],\n",
231
+ " dtype='object')"
232
+ ]
233
+ },
234
+ "execution_count": 34,
235
+ "metadata": {},
236
+ "output_type": "execute_result"
237
+ }
238
+ ],
239
+ "source": [
240
+ "new_tools.columns"
241
+ ]
242
+ },
243
+ {
244
+ "cell_type": "code",
245
+ "execution_count": 35,
246
  "metadata": {},
247
  "outputs": [
248
  {
 
250
  "output_type": "stream",
251
  "text": [
252
  "<class 'pandas.core.frame.DataFrame'>\n",
253
+ "RangeIndex: 155789 entries, 0 to 155788\n",
254
+ "Data columns (total 20 columns):\n",
255
+ " # Column Non-Null Count Dtype \n",
256
+ "--- ------ -------------- ----- \n",
257
+ " 0 request_id 155789 non-null object \n",
258
+ " 1 request_block 155789 non-null object \n",
259
+ " 2 request_time 155789 non-null datetime64[ns, UTC]\n",
260
+ " 3 tx_hash 155789 non-null object \n",
261
+ " 4 prompt_request 155789 non-null object \n",
262
+ " 5 tool 155789 non-null object \n",
263
+ " 6 nonce 155789 non-null object \n",
264
+ " 7 trader_address 155789 non-null object \n",
265
+ " 8 deliver_block 155789 non-null object \n",
266
+ " 9 error 155789 non-null int64 \n",
267
+ " 10 error_message 61690 non-null object \n",
268
+ " 11 prompt_response 131002 non-null object \n",
269
+ " 12 mech_address 131002 non-null object \n",
270
+ " 13 p_yes 94099 non-null float64 \n",
271
+ " 14 p_no 94099 non-null float64 \n",
272
+ " 15 confidence 94099 non-null float64 \n",
273
+ " 16 info_utility 94099 non-null float64 \n",
274
+ " 17 vote 66870 non-null object \n",
275
+ " 18 win_probability 94099 non-null float64 \n",
276
+ " 19 market_creator 155789 non-null object \n",
277
+ "dtypes: datetime64[ns, UTC](1), float64(5), int64(1), object(13)\n",
278
+ "memory usage: 23.8+ MB\n"
 
 
 
279
  ]
280
  }
281
  ],
282
  "source": [
283
+ "new_tools.info()"
284
+ ]
285
+ },
286
+ {
287
+ "cell_type": "code",
288
+ "execution_count": 36,
289
+ "metadata": {},
290
+ "outputs": [
291
+ {
292
+ "data": {
293
+ "text/plain": [
294
+ "request_id 5585662306487809905791879720381993204173062765...\n",
295
+ "request_block 37672194\n",
296
+ "request_time 2024-12-23 12:37:05+00:00\n",
297
+ "tx_hash 0x069f675e2cdfd328e9056901e5e79dcfa8cd981c95e3...\n",
298
+ "prompt_request Please take over the role of a Data Scientist ...\n",
299
+ "tool prediction-offline\n",
300
+ "nonce a711182a-8641-428b-8908-ae773516f846\n",
301
+ "trader_address 0x5d621d8bfcb57a70f4fde6e5484a54fa8127a858\n",
302
+ "deliver_block 37672203\n",
303
+ "error 0\n",
304
+ "error_message None\n",
305
+ "prompt_response \\nYou are an LLM inside a multi-agent system t...\n",
306
+ "mech_address 0x5e1d1eb61e1164d5a50b28c575da73a29595dff7\n",
307
+ "p_yes 0.3\n",
308
+ "p_no 0.7\n",
309
+ "confidence 0.4\n",
310
+ "info_utility 0.0\n",
311
+ "vote No\n",
312
+ "win_probability 0.7\n",
313
+ "market_creator quickstart\n",
314
+ "Name: 0, dtype: object"
315
+ ]
316
+ },
317
+ "execution_count": 36,
318
+ "metadata": {},
319
+ "output_type": "execute_result"
320
+ }
321
+ ],
322
+ "source": [
323
+ "new_tools.iloc[0]"
324
+ ]
325
+ },
326
+ {
327
+ "cell_type": "code",
328
+ "execution_count": 37,
329
+ "metadata": {},
330
+ "outputs": [
331
+ {
332
+ "data": {
333
+ "text/plain": [
334
+ "Timestamp('2024-12-23 12:37:05+0000', tz='UTC')"
335
+ ]
336
+ },
337
+ "execution_count": 37,
338
+ "metadata": {},
339
+ "output_type": "execute_result"
340
+ }
341
+ ],
342
+ "source": [
343
+ "new_tools.iloc[0].request_time"
344
+ ]
345
+ },
346
+ {
347
+ "cell_type": "code",
348
+ "execution_count": 22,
349
+ "metadata": {},
350
+ "outputs": [],
351
+ "source": [
352
+ "tools = pd.read_parquet('../tmp/tools.parquet')"
353
+ ]
354
+ },
355
+ {
356
+ "cell_type": "code",
357
+ "execution_count": 30,
358
+ "metadata": {},
359
+ "outputs": [
360
+ {
361
+ "data": {
362
+ "text/plain": [
363
+ "Timestamp('2024-10-26 13:03:55+0000', tz='UTC')"
364
+ ]
365
+ },
366
+ "execution_count": 30,
367
+ "metadata": {},
368
+ "output_type": "execute_result"
369
+ }
370
+ ],
371
+ "source": [
372
+ "tools.iloc[0].request_time"
373
+ ]
374
+ },
375
+ {
376
+ "cell_type": "code",
377
+ "execution_count": 23,
378
+ "metadata": {},
379
+ "outputs": [
380
+ {
381
+ "data": {
382
+ "text/plain": [
383
+ "626382"
384
+ ]
385
+ },
386
+ "execution_count": 23,
387
+ "metadata": {},
388
+ "output_type": "execute_result"
389
+ }
390
+ ],
391
+ "source": [
392
+ "len(tools)"
393
  ]
394
  },
395
+ {
396
+ "cell_type": "code",
397
+ "execution_count": 24,
398
+ "metadata": {},
399
+ "outputs": [
400
+ {
401
+ "data": {
402
+ "text/html": [
403
+ "<div>\n",
404
+ "<style scoped>\n",
405
+ " .dataframe tbody tr th:only-of-type {\n",
406
+ " vertical-align: middle;\n",
407
+ " }\n",
408
+ "\n",
409
+ " .dataframe tbody tr th {\n",
410
+ " vertical-align: top;\n",
411
+ " }\n",
412
+ "\n",
413
+ " .dataframe thead th {\n",
414
+ " text-align: right;\n",
415
+ " }\n",
416
+ "</style>\n",
417
+ "<table border=\"1\" class=\"dataframe\">\n",
418
+ " <thead>\n",
419
+ " <tr style=\"text-align: right;\">\n",
420
+ " <th></th>\n",
421
+ " <th>request_id</th>\n",
422
+ " <th>request_block</th>\n",
423
+ " <th>prompt_request</th>\n",
424
+ " <th>tool</th>\n",
425
+ " <th>nonce</th>\n",
426
+ " <th>trader_address</th>\n",
427
+ " <th>deliver_block</th>\n",
428
+ " <th>error</th>\n",
429
+ " <th>error_message</th>\n",
430
+ " <th>prompt_response</th>\n",
431
+ " <th>...</th>\n",
432
+ " <th>confidence</th>\n",
433
+ " <th>info_utility</th>\n",
434
+ " <th>vote</th>\n",
435
+ " <th>win_probability</th>\n",
436
+ " <th>market_creator</th>\n",
437
+ " <th>title</th>\n",
438
+ " <th>currentAnswer</th>\n",
439
+ " <th>request_time</th>\n",
440
+ " <th>request_month_year</th>\n",
441
+ " <th>request_month_year_week</th>\n",
442
+ " </tr>\n",
443
+ " </thead>\n",
444
+ " <tbody>\n",
445
+ " <tr>\n",
446
+ " <th>0</th>\n",
447
+ " <td>1065794400559258224527965821794720648636282516...</td>\n",
448
+ " <td>36701352</td>\n",
449
+ " <td>Please take over the role of a Data Scientist ...</td>\n",
450
+ " <td>claude-prediction-offline</td>\n",
451
+ " <td>fd5cfa7f-fb38-435b-b571-69f253397a69</td>\n",
452
+ " <td>0x42cc3f5a30420e8964be3c18d0e560b10e8957fa</td>\n",
453
+ " <td>36701366</td>\n",
454
+ " <td>0</td>\n",
455
+ " <td>None</td>\n",
456
+ " <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
457
+ " <td>...</td>\n",
458
+ " <td>0.6</td>\n",
459
+ " <td>0.2</td>\n",
460
+ " <td>No</td>\n",
461
+ " <td>0.7</td>\n",
462
+ " <td>quickstart</td>\n",
463
+ " <td>Will the Caspian Sea countries announce a join...</td>\n",
464
+ " <td>No</td>\n",
465
+ " <td>2024-10-26 13:03:55+00:00</td>\n",
466
+ " <td>2024-10</td>\n",
467
+ " <td>2024-10-21/2024-10-27</td>\n",
468
+ " </tr>\n",
469
+ " <tr>\n",
470
+ " <th>1</th>\n",
471
+ " <td>9015207976398091689774872599965107755141252481...</td>\n",
472
+ " <td>36749933</td>\n",
473
+ " <td>Please take over the role of a Data Scientist ...</td>\n",
474
+ " <td>claude-prediction-offline</td>\n",
475
+ " <td>a61c894b-fa0d-40d9-ab02-e512ef9dd7e6</td>\n",
476
+ " <td>0xc84d1f9fc28ce5628f69cc138ba3092a036b8f69</td>\n",
477
+ " <td>36749986</td>\n",
478
+ " <td>0</td>\n",
479
+ " <td>None</td>\n",
480
+ " <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
481
+ " <td>...</td>\n",
482
+ " <td>0.6</td>\n",
483
+ " <td>0.2</td>\n",
484
+ " <td>No</td>\n",
485
+ " <td>0.9</td>\n",
486
+ " <td>quickstart</td>\n",
487
+ " <td>Will Iran launch another ballistic missile att...</td>\n",
488
+ " <td>Yes</td>\n",
489
+ " <td>2024-10-29 10:50:55+00:00</td>\n",
490
+ " <td>2024-10</td>\n",
491
+ " <td>2024-10-28/2024-11-03</td>\n",
492
+ " </tr>\n",
493
+ " <tr>\n",
494
+ " <th>2</th>\n",
495
+ " <td>8204183583678326945661870665906374091080896102...</td>\n",
496
+ " <td>36756777</td>\n",
497
+ " <td>Please take over the role of a Data Scientist ...</td>\n",
498
+ " <td>claude-prediction-offline</td>\n",
499
+ " <td>e64fa0a7-a98b-4c95-ac1a-755d212a5b78</td>\n",
500
+ " <td>0x992448484862672eb95ca1c877cc43f935c389ce</td>\n",
501
+ " <td>36756791</td>\n",
502
+ " <td>0</td>\n",
503
+ " <td>None</td>\n",
504
+ " <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
505
+ " <td>...</td>\n",
506
+ " <td>0.6</td>\n",
507
+ " <td>0.2</td>\n",
508
+ " <td>No</td>\n",
509
+ " <td>0.7</td>\n",
510
+ " <td>quickstart</td>\n",
511
+ " <td>Will the United Nations Security Council hold ...</td>\n",
512
+ " <td>Yes</td>\n",
513
+ " <td>2024-10-29 20:41:55+00:00</td>\n",
514
+ " <td>2024-10</td>\n",
515
+ " <td>2024-10-28/2024-11-03</td>\n",
516
+ " </tr>\n",
517
+ " <tr>\n",
518
+ " <th>3</th>\n",
519
+ " <td>1225398943131371197254748096595691861478679945...</td>\n",
520
+ " <td>36749393</td>\n",
521
+ " <td>Please take over the role of a Data Scientist ...</td>\n",
522
+ " <td>claude-prediction-offline</td>\n",
523
+ " <td>f84d34ae-3dd0-4d0c-a4d3-d8f28d25677c</td>\n",
524
+ " <td>0x3de0f1a8d9c227af2e324dd92905bbf8bb852ff8</td>\n",
525
+ " <td>36749408</td>\n",
526
+ " <td>0</td>\n",
527
+ " <td>None</td>\n",
528
+ " <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
529
+ " <td>...</td>\n",
530
+ " <td>0.3</td>\n",
531
+ " <td>0.1</td>\n",
532
+ " <td>None</td>\n",
533
+ " <td>0.5</td>\n",
534
+ " <td>quickstart</td>\n",
535
+ " <td>Will the Bank of Japan issue a public statemen...</td>\n",
536
+ " <td>Yes</td>\n",
537
+ " <td>2024-10-29 10:02:45+00:00</td>\n",
538
+ " <td>2024-10</td>\n",
539
+ " <td>2024-10-28/2024-11-03</td>\n",
540
+ " </tr>\n",
541
+ " <tr>\n",
542
+ " <th>4</th>\n",
543
+ " <td>7954746357421406217625419969909404056225427053...</td>\n",
544
+ " <td>36701081</td>\n",
545
+ " <td>Please take over the role of a Data Scientist ...</td>\n",
546
+ " <td>superforcaster</td>\n",
547
+ " <td>1b609b7e-e0d2-4bb8-ad6b-7d0e6e6610b5</td>\n",
548
+ " <td>0x8dd0f0f64e575a356545d9ed096122a1887e64bf</td>\n",
549
+ " <td>36701099</td>\n",
550
+ " <td>0</td>\n",
551
+ " <td>None</td>\n",
552
+ " <td>\\nYou are an advanced AI system which has been...</td>\n",
553
+ " <td>...</td>\n",
554
+ " <td>0.6</td>\n",
555
+ " <td>0.4</td>\n",
556
+ " <td>No</td>\n",
557
+ " <td>0.7</td>\n",
558
+ " <td>quickstart</td>\n",
559
+ " <td>Will any new human rights organizations public...</td>\n",
560
+ " <td>Yes</td>\n",
561
+ " <td>2024-10-26 12:40:25+00:00</td>\n",
562
+ " <td>2024-10</td>\n",
563
+ " <td>2024-10-21/2024-10-27</td>\n",
564
+ " </tr>\n",
565
+ " </tbody>\n",
566
+ "</table>\n",
567
+ "<p>5 rows × 23 columns</p>\n",
568
+ "</div>"
569
+ ],
570
+ "text/plain": [
571
+ " request_id request_block \\\n",
572
+ "0 1065794400559258224527965821794720648636282516... 36701352 \n",
573
+ "1 9015207976398091689774872599965107755141252481... 36749933 \n",
574
+ "2 8204183583678326945661870665906374091080896102... 36756777 \n",
575
+ "3 1225398943131371197254748096595691861478679945... 36749393 \n",
576
+ "4 7954746357421406217625419969909404056225427053... 36701081 \n",
577
+ "\n",
578
+ " prompt_request \\\n",
579
+ "0 Please take over the role of a Data Scientist ... \n",
580
+ "1 Please take over the role of a Data Scientist ... \n",
581
+ "2 Please take over the role of a Data Scientist ... \n",
582
+ "3 Please take over the role of a Data Scientist ... \n",
583
+ "4 Please take over the role of a Data Scientist ... \n",
584
+ "\n",
585
+ " tool nonce \\\n",
586
+ "0 claude-prediction-offline fd5cfa7f-fb38-435b-b571-69f253397a69 \n",
587
+ "1 claude-prediction-offline a61c894b-fa0d-40d9-ab02-e512ef9dd7e6 \n",
588
+ "2 claude-prediction-offline e64fa0a7-a98b-4c95-ac1a-755d212a5b78 \n",
589
+ "3 claude-prediction-offline f84d34ae-3dd0-4d0c-a4d3-d8f28d25677c \n",
590
+ "4 superforcaster 1b609b7e-e0d2-4bb8-ad6b-7d0e6e6610b5 \n",
591
+ "\n",
592
+ " trader_address deliver_block error \\\n",
593
+ "0 0x42cc3f5a30420e8964be3c18d0e560b10e8957fa 36701366 0 \n",
594
+ "1 0xc84d1f9fc28ce5628f69cc138ba3092a036b8f69 36749986 0 \n",
595
+ "2 0x992448484862672eb95ca1c877cc43f935c389ce 36756791 0 \n",
596
+ "3 0x3de0f1a8d9c227af2e324dd92905bbf8bb852ff8 36749408 0 \n",
597
+ "4 0x8dd0f0f64e575a356545d9ed096122a1887e64bf 36701099 0 \n",
598
+ "\n",
599
+ " error_message prompt_response ... \\\n",
600
+ "0 None \\nYou are an LLM inside a multi-agent system t... ... \n",
601
+ "1 None \\nYou are an LLM inside a multi-agent system t... ... \n",
602
+ "2 None \\nYou are an LLM inside a multi-agent system t... ... \n",
603
+ "3 None \\nYou are an LLM inside a multi-agent system t... ... \n",
604
+ "4 None \\nYou are an advanced AI system which has been... ... \n",
605
+ "\n",
606
+ " confidence info_utility vote win_probability market_creator \\\n",
607
+ "0 0.6 0.2 No 0.7 quickstart \n",
608
+ "1 0.6 0.2 No 0.9 quickstart \n",
609
+ "2 0.6 0.2 No 0.7 quickstart \n",
610
+ "3 0.3 0.1 None 0.5 quickstart \n",
611
+ "4 0.6 0.4 No 0.7 quickstart \n",
612
+ "\n",
613
+ " title currentAnswer \\\n",
614
+ "0 Will the Caspian Sea countries announce a join... No \n",
615
+ "1 Will Iran launch another ballistic missile att... Yes \n",
616
+ "2 Will the United Nations Security Council hold ... Yes \n",
617
+ "3 Will the Bank of Japan issue a public statemen... Yes \n",
618
+ "4 Will any new human rights organizations public... Yes \n",
619
+ "\n",
620
+ " request_time request_month_year request_month_year_week \n",
621
+ "0 2024-10-26 13:03:55+00:00 2024-10 2024-10-21/2024-10-27 \n",
622
+ "1 2024-10-29 10:50:55+00:00 2024-10 2024-10-28/2024-11-03 \n",
623
+ "2 2024-10-29 20:41:55+00:00 2024-10 2024-10-28/2024-11-03 \n",
624
+ "3 2024-10-29 10:02:45+00:00 2024-10 2024-10-28/2024-11-03 \n",
625
+ "4 2024-10-26 12:40:25+00:00 2024-10 2024-10-21/2024-10-27 \n",
626
+ "\n",
627
+ "[5 rows x 23 columns]"
628
+ ]
629
+ },
630
+ "execution_count": 24,
631
+ "metadata": {},
632
+ "output_type": "execute_result"
633
+ }
634
+ ],
635
+ "source": [
636
+ "tools.head()"
637
+ ]
638
+ },
639
+ {
640
+ "cell_type": "code",
641
+ "execution_count": 5,
642
+ "metadata": {},
643
+ "outputs": [
644
+ {
645
+ "data": {
646
+ "text/plain": [
647
+ "request_id 4650171020578125420345177840991368739117143085...\n",
648
+ "request_block 36626348\n",
649
+ "prompt_request Please take over the role of a Data Scientist ...\n",
650
+ "tool claude-prediction-online\n",
651
+ "nonce 5a0e84af-fcc3-4015-b2c1-a390430d70ca\n",
652
+ "trader_address 0x1fe2b09de07475b1027b0c73a5bf52693b31a52e\n",
653
+ "deliver_block 36626364\n",
654
+ "error 0\n",
655
+ "error_message None\n",
656
+ "prompt_response \\nYou are an LLM inside a multi-agent system t...\n",
657
+ "mech_address 0x5e1d1eb61e1164d5a50b28c575da73a29595dff7\n",
658
+ "p_yes 0.3\n",
659
+ "p_no 0.7\n",
660
+ "confidence 0.6\n",
661
+ "info_utility 0.2\n",
662
+ "vote No\n",
663
+ "win_probability 0.7\n",
664
+ "market_creator pearl\n",
665
+ "title Will the US government make a public statement...\n",
666
+ "currentAnswer No\n",
667
+ "request_time 2024-10-22 00:56:35+00:00\n",
668
+ "request_month_year 2024-10\n",
669
+ "request_month_year_week 2024-10-21/2024-10-27\n",
670
+ "Name: 0, dtype: object"
671
+ ]
672
+ },
673
+ "execution_count": 5,
674
+ "metadata": {},
675
+ "output_type": "execute_result"
676
+ }
677
+ ],
678
+ "source": [
679
+ "tools.iloc[0]"
680
+ ]
681
+ },
682
+ {
683
+ "cell_type": "code",
684
+ "execution_count": 38,
685
+ "metadata": {},
686
+ "outputs": [],
687
+ "source": [
688
+ "merge_df = pd.concat([tools, new_tools], ignore_index=True)"
689
+ ]
690
+ },
691
+ {
692
+ "cell_type": "code",
693
+ "execution_count": 39,
694
+ "metadata": {},
695
+ "outputs": [
696
+ {
697
+ "data": {
698
+ "text/html": [
699
+ "<div>\n",
700
+ "<style scoped>\n",
701
+ " .dataframe tbody tr th:only-of-type {\n",
702
+ " vertical-align: middle;\n",
703
+ " }\n",
704
+ "\n",
705
+ " .dataframe tbody tr th {\n",
706
+ " vertical-align: top;\n",
707
+ " }\n",
708
+ "\n",
709
+ " .dataframe thead th {\n",
710
+ " text-align: right;\n",
711
+ " }\n",
712
+ "</style>\n",
713
+ "<table border=\"1\" class=\"dataframe\">\n",
714
+ " <thead>\n",
715
+ " <tr style=\"text-align: right;\">\n",
716
+ " <th></th>\n",
717
+ " <th>request_id</th>\n",
718
+ " <th>request_block</th>\n",
719
+ " <th>prompt_request</th>\n",
720
+ " <th>tool</th>\n",
721
+ " <th>nonce</th>\n",
722
+ " <th>trader_address</th>\n",
723
+ " <th>deliver_block</th>\n",
724
+ " <th>error</th>\n",
725
+ " <th>error_message</th>\n",
726
+ " <th>prompt_response</th>\n",
727
+ " <th>...</th>\n",
728
+ " <th>info_utility</th>\n",
729
+ " <th>vote</th>\n",
730
+ " <th>win_probability</th>\n",
731
+ " <th>market_creator</th>\n",
732
+ " <th>title</th>\n",
733
+ " <th>currentAnswer</th>\n",
734
+ " <th>request_time</th>\n",
735
+ " <th>request_month_year</th>\n",
736
+ " <th>request_month_year_week</th>\n",
737
+ " <th>tx_hash</th>\n",
738
+ " </tr>\n",
739
+ " </thead>\n",
740
+ " <tbody>\n",
741
+ " <tr>\n",
742
+ " <th>0</th>\n",
743
+ " <td>1065794400559258224527965821794720648636282516...</td>\n",
744
+ " <td>36701352</td>\n",
745
+ " <td>Please take over the role of a Data Scientist ...</td>\n",
746
+ " <td>claude-prediction-offline</td>\n",
747
+ " <td>fd5cfa7f-fb38-435b-b571-69f253397a69</td>\n",
748
+ " <td>0x42cc3f5a30420e8964be3c18d0e560b10e8957fa</td>\n",
749
+ " <td>36701366</td>\n",
750
+ " <td>0</td>\n",
751
+ " <td>None</td>\n",
752
+ " <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
753
+ " <td>...</td>\n",
754
+ " <td>0.2</td>\n",
755
+ " <td>No</td>\n",
756
+ " <td>0.7</td>\n",
757
+ " <td>quickstart</td>\n",
758
+ " <td>Will the Caspian Sea countries announce a join...</td>\n",
759
+ " <td>No</td>\n",
760
+ " <td>2024-10-26 13:03:55+00:00</td>\n",
761
+ " <td>2024-10</td>\n",
762
+ " <td>2024-10-21/2024-10-27</td>\n",
763
+ " <td>NaN</td>\n",
764
+ " </tr>\n",
765
+ " <tr>\n",
766
+ " <th>1</th>\n",
767
+ " <td>9015207976398091689774872599965107755141252481...</td>\n",
768
+ " <td>36749933</td>\n",
769
+ " <td>Please take over the role of a Data Scientist ...</td>\n",
770
+ " <td>claude-prediction-offline</td>\n",
771
+ " <td>a61c894b-fa0d-40d9-ab02-e512ef9dd7e6</td>\n",
772
+ " <td>0xc84d1f9fc28ce5628f69cc138ba3092a036b8f69</td>\n",
773
+ " <td>36749986</td>\n",
774
+ " <td>0</td>\n",
775
+ " <td>None</td>\n",
776
+ " <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
777
+ " <td>...</td>\n",
778
+ " <td>0.2</td>\n",
779
+ " <td>No</td>\n",
780
+ " <td>0.9</td>\n",
781
+ " <td>quickstart</td>\n",
782
+ " <td>Will Iran launch another ballistic missile att...</td>\n",
783
+ " <td>Yes</td>\n",
784
+ " <td>2024-10-29 10:50:55+00:00</td>\n",
785
+ " <td>2024-10</td>\n",
786
+ " <td>2024-10-28/2024-11-03</td>\n",
787
+ " <td>NaN</td>\n",
788
+ " </tr>\n",
789
+ " <tr>\n",
790
+ " <th>2</th>\n",
791
+ " <td>8204183583678326945661870665906374091080896102...</td>\n",
792
+ " <td>36756777</td>\n",
793
+ " <td>Please take over the role of a Data Scientist ...</td>\n",
794
+ " <td>claude-prediction-offline</td>\n",
795
+ " <td>e64fa0a7-a98b-4c95-ac1a-755d212a5b78</td>\n",
796
+ " <td>0x992448484862672eb95ca1c877cc43f935c389ce</td>\n",
797
+ " <td>36756791</td>\n",
798
+ " <td>0</td>\n",
799
+ " <td>None</td>\n",
800
+ " <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
801
+ " <td>...</td>\n",
802
+ " <td>0.2</td>\n",
803
+ " <td>No</td>\n",
804
+ " <td>0.7</td>\n",
805
+ " <td>quickstart</td>\n",
806
+ " <td>Will the United Nations Security Council hold ...</td>\n",
807
+ " <td>Yes</td>\n",
808
+ " <td>2024-10-29 20:41:55+00:00</td>\n",
809
+ " <td>2024-10</td>\n",
810
+ " <td>2024-10-28/2024-11-03</td>\n",
811
+ " <td>NaN</td>\n",
812
+ " </tr>\n",
813
+ " <tr>\n",
814
+ " <th>3</th>\n",
815
+ " <td>1225398943131371197254748096595691861478679945...</td>\n",
816
+ " <td>36749393</td>\n",
817
+ " <td>Please take over the role of a Data Scientist ...</td>\n",
818
+ " <td>claude-prediction-offline</td>\n",
819
+ " <td>f84d34ae-3dd0-4d0c-a4d3-d8f28d25677c</td>\n",
820
+ " <td>0x3de0f1a8d9c227af2e324dd92905bbf8bb852ff8</td>\n",
821
+ " <td>36749408</td>\n",
822
+ " <td>0</td>\n",
823
+ " <td>None</td>\n",
824
+ " <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
825
+ " <td>...</td>\n",
826
+ " <td>0.1</td>\n",
827
+ " <td>None</td>\n",
828
+ " <td>0.5</td>\n",
829
+ " <td>quickstart</td>\n",
830
+ " <td>Will the Bank of Japan issue a public statemen...</td>\n",
831
+ " <td>Yes</td>\n",
832
+ " <td>2024-10-29 10:02:45+00:00</td>\n",
833
+ " <td>2024-10</td>\n",
834
+ " <td>2024-10-28/2024-11-03</td>\n",
835
+ " <td>NaN</td>\n",
836
+ " </tr>\n",
837
+ " <tr>\n",
838
+ " <th>4</th>\n",
839
+ " <td>7954746357421406217625419969909404056225427053...</td>\n",
840
+ " <td>36701081</td>\n",
841
+ " <td>Please take over the role of a Data Scientist ...</td>\n",
842
+ " <td>superforcaster</td>\n",
843
+ " <td>1b609b7e-e0d2-4bb8-ad6b-7d0e6e6610b5</td>\n",
844
+ " <td>0x8dd0f0f64e575a356545d9ed096122a1887e64bf</td>\n",
845
+ " <td>36701099</td>\n",
846
+ " <td>0</td>\n",
847
+ " <td>None</td>\n",
848
+ " <td>\\nYou are an advanced AI system which has been...</td>\n",
849
+ " <td>...</td>\n",
850
+ " <td>0.4</td>\n",
851
+ " <td>No</td>\n",
852
+ " <td>0.7</td>\n",
853
+ " <td>quickstart</td>\n",
854
+ " <td>Will any new human rights organizations public...</td>\n",
855
+ " <td>Yes</td>\n",
856
+ " <td>2024-10-26 12:40:25+00:00</td>\n",
857
+ " <td>2024-10</td>\n",
858
+ " <td>2024-10-21/2024-10-27</td>\n",
859
+ " <td>NaN</td>\n",
860
+ " </tr>\n",
861
+ " </tbody>\n",
862
+ "</table>\n",
863
+ "<p>5 rows × 24 columns</p>\n",
864
+ "</div>"
865
+ ],
866
+ "text/plain": [
867
+ " request_id request_block \\\n",
868
+ "0 1065794400559258224527965821794720648636282516... 36701352 \n",
869
+ "1 9015207976398091689774872599965107755141252481... 36749933 \n",
870
+ "2 8204183583678326945661870665906374091080896102... 36756777 \n",
871
+ "3 1225398943131371197254748096595691861478679945... 36749393 \n",
872
+ "4 7954746357421406217625419969909404056225427053... 36701081 \n",
873
+ "\n",
874
+ " prompt_request \\\n",
875
+ "0 Please take over the role of a Data Scientist ... \n",
876
+ "1 Please take over the role of a Data Scientist ... \n",
877
+ "2 Please take over the role of a Data Scientist ... \n",
878
+ "3 Please take over the role of a Data Scientist ... \n",
879
+ "4 Please take over the role of a Data Scientist ... \n",
880
+ "\n",
881
+ " tool nonce \\\n",
882
+ "0 claude-prediction-offline fd5cfa7f-fb38-435b-b571-69f253397a69 \n",
883
+ "1 claude-prediction-offline a61c894b-fa0d-40d9-ab02-e512ef9dd7e6 \n",
884
+ "2 claude-prediction-offline e64fa0a7-a98b-4c95-ac1a-755d212a5b78 \n",
885
+ "3 claude-prediction-offline f84d34ae-3dd0-4d0c-a4d3-d8f28d25677c \n",
886
+ "4 superforcaster 1b609b7e-e0d2-4bb8-ad6b-7d0e6e6610b5 \n",
887
+ "\n",
888
+ " trader_address deliver_block error \\\n",
889
+ "0 0x42cc3f5a30420e8964be3c18d0e560b10e8957fa 36701366 0 \n",
890
+ "1 0xc84d1f9fc28ce5628f69cc138ba3092a036b8f69 36749986 0 \n",
891
+ "2 0x992448484862672eb95ca1c877cc43f935c389ce 36756791 0 \n",
892
+ "3 0x3de0f1a8d9c227af2e324dd92905bbf8bb852ff8 36749408 0 \n",
893
+ "4 0x8dd0f0f64e575a356545d9ed096122a1887e64bf 36701099 0 \n",
894
+ "\n",
895
+ " error_message prompt_response ... \\\n",
896
+ "0 None \\nYou are an LLM inside a multi-agent system t... ... \n",
897
+ "1 None \\nYou are an LLM inside a multi-agent system t... ... \n",
898
+ "2 None \\nYou are an LLM inside a multi-agent system t... ... \n",
899
+ "3 None \\nYou are an LLM inside a multi-agent system t... ... \n",
900
+ "4 None \\nYou are an advanced AI system which has been... ... \n",
901
+ "\n",
902
+ " info_utility vote win_probability market_creator \\\n",
903
+ "0 0.2 No 0.7 quickstart \n",
904
+ "1 0.2 No 0.9 quickstart \n",
905
+ "2 0.2 No 0.7 quickstart \n",
906
+ "3 0.1 None 0.5 quickstart \n",
907
+ "4 0.4 No 0.7 quickstart \n",
908
+ "\n",
909
+ " title currentAnswer \\\n",
910
+ "0 Will the Caspian Sea countries announce a join... No \n",
911
+ "1 Will Iran launch another ballistic missile att... Yes \n",
912
+ "2 Will the United Nations Security Council hold ... Yes \n",
913
+ "3 Will the Bank of Japan issue a public statemen... Yes \n",
914
+ "4 Will any new human rights organizations public... Yes \n",
915
+ "\n",
916
+ " request_time request_month_year request_month_year_week tx_hash \n",
917
+ "0 2024-10-26 13:03:55+00:00 2024-10 2024-10-21/2024-10-27 NaN \n",
918
+ "1 2024-10-29 10:50:55+00:00 2024-10 2024-10-28/2024-11-03 NaN \n",
919
+ "2 2024-10-29 20:41:55+00:00 2024-10 2024-10-28/2024-11-03 NaN \n",
920
+ "3 2024-10-29 10:02:45+00:00 2024-10 2024-10-28/2024-11-03 NaN \n",
921
+ "4 2024-10-26 12:40:25+00:00 2024-10 2024-10-21/2024-10-27 NaN \n",
922
+ "\n",
923
+ "[5 rows x 24 columns]"
924
+ ]
925
+ },
926
+ "execution_count": 39,
927
+ "metadata": {},
928
+ "output_type": "execute_result"
929
+ }
930
+ ],
931
+ "source": [
932
+ "merge_df.head()"
933
+ ]
934
+ },
935
+ {
936
+ "cell_type": "code",
937
+ "execution_count": 28,
938
+ "metadata": {},
939
+ "outputs": [],
940
+ "source": [
941
+ "merge_df.drop(columns=\"tx_hash\", inplace=True)"
942
+ ]
943
+ },
944
+ {
945
+ "cell_type": "code",
946
+ "execution_count": 40,
947
+ "metadata": {},
948
+ "outputs": [],
949
+ "source": [
950
+ "merge_df.to_parquet(\"../tmp/tools.parquet\", index=False)"
951
+ ]
952
+ },
953
+ {
954
+ "cell_type": "code",
955
+ "execution_count": null,
956
+ "metadata": {},
957
+ "outputs": [],
958
+ "source": []
959
+ },
960
  {
961
  "cell_type": "code",
962
  "execution_count": 4,
scripts/cleaning_old_info.py CHANGED
@@ -1,7 +1,5 @@
1
  import pandas as pd
2
- from profitability import summary_analyse
3
  from utils import DATA_DIR
4
- from staking import label_trades_by_staking
5
 
6
 
7
  def clean_old_data_from_parquet_files(cutoff_date: str):
@@ -47,21 +45,24 @@ def clean_old_data_from_parquet_files(cutoff_date: str):
47
  except Exception as e:
48
  print(f"Error cleaning all trades profitability file {e}")
49
 
50
- # generate again summary_profitability.parquet
51
  try:
52
- # add staking labels
53
- label_trades_by_staking(trades_df=all_trades, update=False)
54
 
55
- # save to parquet
56
- all_trades.to_parquet(
57
- DATA_DIR / "all_trades_profitability.parquet", index=False
58
  )
59
- print("Summarising trades...")
60
- summary_df = summary_analyse(all_trades)
61
- summary_df.to_parquet(DATA_DIR / "summary_profitability.parquet", index=False)
 
 
 
 
 
62
  except Exception as e:
63
- print(f"Error generating summary and saving all trades profitability file {e}")
64
 
65
 
66
  if __name__ == "__main__":
67
- clean_old_data_from_parquet_files("2024-09-22")
 
1
  import pandas as pd
 
2
  from utils import DATA_DIR
 
3
 
4
 
5
  def clean_old_data_from_parquet_files(cutoff_date: str):
 
45
  except Exception as e:
46
  print(f"Error cleaning all trades profitability file {e}")
47
 
48
+ # clean unknown_traders.parquet
49
  try:
50
+ unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
 
51
 
52
+ unknown_traders["creation_timestamp"] = pd.to_datetime(
53
+ unknown_traders["creation_timestamp"], utc=True
 
54
  )
55
+
56
+ print(f"length before filtering {len(unknown_traders)}")
57
+ unknown_traders = unknown_traders.loc[
58
+ unknown_traders["creation_timestamp"] > min_date_utc
59
+ ]
60
+ print(f"length after filtering {len(unknown_traders)}")
61
+ unknown_traders.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
62
+
63
  except Exception as e:
64
+ print(f"Error cleaning unknown_traders file {e}")
65
 
66
 
67
  if __name__ == "__main__":
68
+ clean_old_data_from_parquet_files("2024-10-25")
scripts/daily_data.py CHANGED
@@ -32,7 +32,7 @@ def prepare_live_metrics(
32
  )
33
 
34
  # staking label
35
- label_trades_by_staking(all_trades_df)
36
 
37
  # create the unknown traders dataset
38
  unknown_traders_df, all_trades_df = create_unknown_traders_df(
 
32
  )
33
 
34
  # staking label
35
+ all_trades_df = label_trades_by_staking(all_trades_df)
36
 
37
  # create the unknown traders dataset
38
  unknown_traders_df, all_trades_df = create_unknown_traders_df(
scripts/get_mech_info.py CHANGED
@@ -11,6 +11,7 @@ from utils import (
11
  import requests
12
  import pandas as pd
13
  import numpy as np
 
14
  from mech_request_utils import (
15
  collect_all_mech_delivers,
16
  collect_all_mech_requests,
@@ -146,7 +147,7 @@ def update_fpmmTrades_parquet(trades_filename: str) -> pd.DataFrame:
146
 
147
  # Remove duplicates
148
  # fpmm.outcomes is a numpy array
149
- merge_df.drop_duplicates("id", inplace=True)
150
  print(f"Final length after removing duplicates in fpmmTrades= {len(merge_df)}")
151
 
152
  # save the parquet file
@@ -174,16 +175,15 @@ def update_all_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
174
  return merge_df
175
 
176
 
177
- def update_tools_parquet(rpc: str, new_tools_filename: pd.DataFrame):
178
  try:
179
- old_tools_df = pd.read_parquet(DATA_DIR / "tools.parquet")
180
  except Exception as e:
181
  print(f"Error reading old tools parquet file {e}")
182
  return None
183
  try:
184
  new_tools_df = pd.read_parquet(DATA_DIR / new_tools_filename)
185
- # the new file has no request_time yet
186
- updating_timestamps(rpc, new_tools_filename)
187
  except Exception as e:
188
  print(f"Error reading new trades parquet file {e}")
189
  return None
@@ -201,7 +201,7 @@ def update_tools_parquet(rpc: str, new_tools_filename: pd.DataFrame):
201
  print(f"Final length after removing duplicates in tools= {len(merge_df)}")
202
 
203
  # save the parquet file
204
- merge_df.to_parquet(DATA_DIR / "tools.parquet", index=False)
205
 
206
 
207
  def get_mech_info_2024() -> dict[str, Any]:
@@ -298,6 +298,10 @@ def get_mech_events_since_last_run():
298
  try:
299
  all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
300
  latest_timestamp = max(all_trades.creation_timestamp)
 
 
 
 
301
  print(f"Updating data since {latest_timestamp}")
302
  except Exception:
303
  print("Error while reading the profitability parquet file")
@@ -351,41 +355,7 @@ def get_mech_events_since_last_run():
351
  return latest_timestamp
352
 
353
 
354
- @measure_execution_time
355
- def get_mech_events_last_60_days():
356
- earliest_block_number = get_last_60_days_block_number()
357
- last_block_number = get_last_block_number()
358
- # mech requests
359
- requests_dict, duplicatedReqId, nr_errors = collect_all_mech_requests(
360
- from_block=earliest_block_number,
361
- to_block=last_block_number,
362
- filename="mech_requests.json",
363
- )
364
-
365
- # mech delivers
366
- delivers_dict, duplicatedIds, nr_errors = collect_all_mech_delivers(
367
- from_block=earliest_block_number,
368
- to_block=last_block_number,
369
- filename="mech_delivers.json",
370
- )
371
-
372
- # clean delivers
373
- clean_mech_delivers("mech_requests.json", "mech_delivers.json")
374
-
375
- # solve duplicated requestIds
376
- block_map = fix_duplicate_requestIds("mech_requests.json", "mech_delivers.json")
377
-
378
- # merge the two files into one source
379
- not_found = merge_requests_delivers(
380
- "mech_requests.json", "mech_delivers.json", "merged_requests.json"
381
- )
382
-
383
- # Add ipfs contents
384
- get_ipfs_data("merged_requests.json", "tools_info.json")
385
-
386
-
387
  if __name__ == "__main__":
388
- get_mech_events_last_60_days()
389
-
390
  # result = get_mech_info_last_60_days()
391
  # print(result)
 
11
  import requests
12
  import pandas as pd
13
  import numpy as np
14
+ from gnosis_timestamps import compute_request_time
15
  from mech_request_utils import (
16
  collect_all_mech_delivers,
17
  collect_all_mech_requests,
 
147
 
148
  # Remove duplicates
149
  # fpmm.outcomes is a numpy array
150
+ merge_df.drop_duplicates("id", keep="last", inplace=True)
151
  print(f"Final length after removing duplicates in fpmmTrades= {len(merge_df)}")
152
 
153
  # save the parquet file
 
175
  return merge_df
176
 
177
 
178
+ def update_tools_parquet(new_tools_filename: pd.DataFrame):
179
  try:
180
+ old_tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
181
  except Exception as e:
182
  print(f"Error reading old tools parquet file {e}")
183
  return None
184
  try:
185
  new_tools_df = pd.read_parquet(DATA_DIR / new_tools_filename)
186
+
 
187
  except Exception as e:
188
  print(f"Error reading new trades parquet file {e}")
189
  return None
 
201
  print(f"Final length after removing duplicates in tools= {len(merge_df)}")
202
 
203
  # save the parquet file
204
+ merge_df.to_parquet(TMP_DIR / "tools.parquet", index=False)
205
 
206
 
207
  def get_mech_info_2024() -> dict[str, Any]:
 
298
  try:
299
  all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
300
  latest_timestamp = max(all_trades.creation_timestamp)
301
+ # cutoff_date = "2024-12-01"
302
+ # latest_timestamp = pd.Timestamp(
303
+ # datetime.strptime(cutoff_date, "%Y-%m-%d")
304
+ # ).tz_localize("UTC")
305
  print(f"Updating data since {latest_timestamp}")
306
  except Exception:
307
  print("Error while reading the profitability parquet file")
 
355
  return latest_timestamp
356
 
357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  if __name__ == "__main__":
359
+ get_mech_events_since_last_run()
 
360
  # result = get_mech_info_last_60_days()
361
  # print(result)
scripts/gnosis_timestamps.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from web3 import Web3
2
+ import os
3
+ import requests
4
+ import time
5
+ import pickle
6
+ from datetime import datetime, timezone
7
+ from functools import partial
8
+ import pandas as pd
9
+ import pytz
10
+ from tqdm import tqdm
11
+ from utils import DATA_DIR, TMP_DIR, measure_execution_time
12
+ from concurrent.futures import ThreadPoolExecutor
13
+
14
+ GNOSIS_API_INTERVAL = 0.2 # 5 calls in 1 second
15
+ GNOSIS_URL = "https://api.gnosisscan.io/api"
16
+ GNOSIS_API_KEY = os.environ.get("GNOSIS_API_KEY", None)
17
+ # https://api.gnosisscan.io/api?module=account&action=txlist&address=0x1fe2b09de07475b1027b0c73a5bf52693b31a52e&startblock=36626348&endblock=36626348&page=1&offset=10&sort=asc&apikey=${gnosis_api_key}""
18
+
19
+ # Connect to Gnosis Chain RPC
20
+ w3 = Web3(Web3.HTTPProvider("https://rpc.gnosischain.com"))
21
+
22
+
23
+ def parallelize_timestamp_computation(df: pd.DataFrame, function: callable) -> list:
24
+ """Parallelize the timestamp conversion."""
25
+ tx_hashes = df["tx_hash"].tolist()
26
+ with ThreadPoolExecutor(max_workers=10) as executor:
27
+ results = list(tqdm(executor.map(function, tx_hashes), total=len(tx_hashes)))
28
+ return results
29
+
30
+
31
+ def transform_timestamp_to_datetime(timestamp):
32
+ dt = datetime.fromtimestamp(timestamp, timezone.utc)
33
+ return dt
34
+
35
+
36
+ def get_tx_hash(trader_address, request_block):
37
+ """Function to get the transaction hash from the address and block number"""
38
+ params = {
39
+ "module": "account",
40
+ "action": "txlist",
41
+ "address": trader_address,
42
+ "page": 1,
43
+ "offset": 100,
44
+ "startblock": request_block,
45
+ "endblock": request_block,
46
+ "sort": "asc",
47
+ "apikey": GNOSIS_API_KEY,
48
+ }
49
+
50
+ try:
51
+ response = requests.get(GNOSIS_URL, params=params)
52
+ tx_list = response.json()["result"]
53
+ time.sleep(GNOSIS_API_INTERVAL)
54
+ if len(tx_list) > 1:
55
+ raise ValueError("More than one transaction found")
56
+ return tx_list[0]["hash"]
57
+ except Exception as e:
58
+ return None
59
+
60
+
61
+ def add_tx_hash_info(filename: str = "tools.parquet"):
62
+ """Function to add the hash info to the saved tools parquet file"""
63
+ tools = pd.read_parquet(DATA_DIR / filename)
64
+ tools["tx_hash"] = None
65
+ total_errors = 0
66
+ for i, mech_request in tqdm(
67
+ tools.iterrows(), total=len(tools), desc="Adding tx hash"
68
+ ):
69
+ try:
70
+ trader_address = mech_request["trader_address"]
71
+ block_number = mech_request["request_block"]
72
+ tools.at[i, "tx_hash"] = get_tx_hash(
73
+ trader_address=trader_address, request_block=block_number
74
+ )
75
+ except Exception as e:
76
+ print(f"Error with mech request {mech_request}")
77
+ total_errors += 1
78
+ continue
79
+
80
+ print(f"Total number of errors = {total_errors}")
81
+ tools.to_parquet(DATA_DIR / filename)
82
+
83
+
84
+ def get_transaction_timestamp(tx_hash: str, web3: Web3):
85
+
86
+ try:
87
+ # Get transaction data
88
+ tx = web3.eth.get_transaction(tx_hash)
89
+ # Get block data
90
+ block = web3.eth.get_block(tx["blockNumber"])
91
+ # Get timestamp
92
+ timestamp = block["timestamp"]
93
+
94
+ # Convert to datetime
95
+ dt = datetime.fromtimestamp(timestamp, tz=pytz.UTC)
96
+
97
+ # return {
98
+ # "timestamp": timestamp,
99
+ # "datetime": dt,
100
+ # "from_address": tx["from"],
101
+ # "to_address": tx["to"],
102
+ # "success": True,
103
+ # }
104
+ return dt.strftime("%Y-%m-%d %H:%M:%S")
105
+ except Exception as e:
106
+ print(f"Error getting the timestamp from {tx_hash}")
107
+ return None
108
+
109
+
110
+ @measure_execution_time
111
+ def compute_request_time(tools_df: pd.DataFrame) -> pd.DataFrame:
112
+ """Function to compute the request timestamp from the tx hash"""
113
+ # read the local info
114
+ try:
115
+ gnosis_info = pickle.load(open(TMP_DIR / "gnosis_info.pkl", "rb"))
116
+ except Exception:
117
+ print("File not found or not created. Creating a new one")
118
+ gnosis_info = {}
119
+
120
+ # any previous information?
121
+ tools_df["request_time"] = tools_df["tx_hash"].map(gnosis_info)
122
+
123
+ # Identify tools with missing request_time and fill them
124
+ missing_time_indices = tools_df[tools_df["request_time"].isna()].index
125
+ print(f"length of missing_time_indices = {len(missing_time_indices)}")
126
+ # traverse all tx hashes and get the timestamp of each tx
127
+ partial_mech_request_timestamp = partial(get_transaction_timestamp, web3=w3)
128
+ missing_timestamps = parallelize_timestamp_computation(
129
+ tools_df.loc[missing_time_indices], partial_mech_request_timestamp
130
+ )
131
+
132
+ # Update the original DataFrame with the missing timestamps
133
+ for i, timestamp in zip(missing_time_indices, missing_timestamps):
134
+ tools_df.at[i, "request_time"] = timestamp
135
+ # creating other time fields
136
+ tools_df["request_month_year"] = pd.to_datetime(
137
+ tools_df["request_time"]
138
+ ).dt.strftime("%Y-%m")
139
+ tools_df["request_month_year_week"] = (
140
+ pd.to_datetime(tools_df["request_time"]).dt.to_period("W").astype(str)
141
+ )
142
+ # Update t_map with new timestamps
143
+ new_timestamps = (
144
+ tools_df[["tx_hash", "request_time"]]
145
+ .dropna()
146
+ .set_index("tx_hash")
147
+ .to_dict()["request_time"]
148
+ )
149
+ gnosis_info.update(new_timestamps)
150
+ # saving gnosis info
151
+ with open(TMP_DIR / "gnosis_info.pkl", "wb") as f:
152
+ pickle.dump(gnosis_info, f)
153
+ return tools_df
154
+
155
+
156
+ def get_account_details(address):
157
+ # gnosis_url = GNOSIS_URL.substitute(gnosis_api_key=GNOSIS_API_KEY, tx_hash=tx_hash)
158
+
159
+ params = {
160
+ "module": "account",
161
+ "action": "txlistinternal",
162
+ "address": address,
163
+ #'page': 1,
164
+ #'offset': 100,
165
+ #'startblock': 0,
166
+ #'endblock': 9999999999,
167
+ #'sort': 'asc',
168
+ "apikey": GNOSIS_API_KEY,
169
+ }
170
+
171
+ try:
172
+ response = requests.get(GNOSIS_URL, params=params)
173
+ return response.json()
174
+ except Exception as e:
175
+ return {"error": str(e)}
176
+
177
+
178
+ if __name__ == "__main__":
179
+ # tx_data = "0x783BFA045BDE2D0BCD65280D97A29E7BD9E4FDC10985848690C9797E767140F4"
180
+ new_tools = pd.read_parquet(DATA_DIR / "new_tools.parquet")
181
+ new_tools = compute_request_time(new_tools)
182
+ new_tools.to_parquet(DATA_DIR / "new_tools.parquet")
183
+ # result = get_tx_hash("0x1fe2b09de07475b1027b0c73a5bf52693b31a52e", 36626348)
184
+ # print(result)
scripts/mech_request_utils.py CHANGED
@@ -23,7 +23,6 @@ import time
23
  import pickle
24
  from random import uniform
25
  from typing import Any, Dict, Tuple
26
- from pathlib import Path
27
  import requests
28
  from gql import Client, gql
29
  from gql.transport.requests import RequestsHTTPTransport
@@ -379,21 +378,22 @@ def clean_mech_delivers(requests_filename: str, delivers_filename: str) -> None:
379
  mech_requests = json.load(file)
380
 
381
  list_reqIds = [mech_requests[k].get("requestId") for k in mech_requests.keys()]
382
- # remove duplicated elements
383
- list_reqIds = list(set(list_reqIds))
384
 
385
  # remove requestIds from delivers that are not in this list
386
  with open(JSON_DATA_DIR / delivers_filename, "r") as file:
387
  mech_delivers = json.load(file)
388
 
389
  print(f"original size of the file {len(mech_delivers)}")
390
- to_delete = []
391
- for r in mech_delivers.keys():
392
- if r not in list_reqIds:
393
- to_delete.append(r)
 
 
 
 
 
394
 
395
- for r in to_delete:
396
- mech_delivers.pop(r, None)
397
  print(f"final size of the file {len(mech_delivers)}")
398
  save_json_file(mech_delivers, delivers_filename)
399
 
 
23
  import pickle
24
  from random import uniform
25
  from typing import Any, Dict, Tuple
 
26
  import requests
27
  from gql import Client, gql
28
  from gql.transport.requests import RequestsHTTPTransport
 
378
  mech_requests = json.load(file)
379
 
380
  list_reqIds = [mech_requests[k].get("requestId") for k in mech_requests.keys()]
 
 
381
 
382
  # remove requestIds from delivers that are not in this list
383
  with open(JSON_DATA_DIR / delivers_filename, "r") as file:
384
  mech_delivers = json.load(file)
385
 
386
  print(f"original size of the file {len(mech_delivers)}")
387
+ mech_delivers = {
388
+ k: v
389
+ for k, v in tqdm(
390
+ mech_delivers.items(),
391
+ total=len(mech_delivers),
392
+ desc="Filtering delivers dictionary",
393
+ )
394
+ if k in set(list_reqIds)
395
+ }
396
 
 
 
397
  print(f"final size of the file {len(mech_delivers)}")
398
  save_json_file(mech_delivers, delivers_filename)
399
 
scripts/nr_mech_calls.py CHANGED
@@ -1,5 +1,5 @@
1
  import pandas as pd
2
- from utils import DATA_DIR, DEFAULT_MECH_FEE
3
  from tqdm import tqdm
4
  from datetime import datetime, timezone
5
  from typing import Dict, Any
@@ -220,9 +220,9 @@ def compute_timestamp_mech_calls(
220
  ]
221
  # traverse market requests
222
  total_mech_calls = 0
223
- for mech_request in market_requests:
224
  # check timestamp (before the trade)
225
- request_ts = mech_request.request_time
226
  if request_ts < trade_ts:
227
  # check the timestamp has not been used in a previous trade
228
  used_timestamps = request_timestamps_used[market]
@@ -246,7 +246,7 @@ def compute_mech_calls_based_on_timestamps(
246
  nr_traders = len(fpmmTrades["trader_address"].unique())
247
  fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
248
  fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
249
- trades_df = trades_df.sort_values(by="creation_timestamp", ascending=True)
250
  tools["request_time"] = pd.to_datetime(tools["request_time"])
251
  tools["request_date"] = tools["request_time"].dt.date
252
  tools = tools.sort_values(by="request_time", ascending=True)
@@ -254,9 +254,9 @@ def compute_mech_calls_based_on_timestamps(
254
  for trader in tqdm(
255
  fpmmTrades["trader_address"].unique(),
256
  total=nr_traders,
257
- desc="creating mech calls estimation based on timestamps",
258
  ):
259
- # compute the mech calls estimations for each trader
260
  all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
261
  all_tools = tools[tools["trader_address"] == trader]
262
  trader_mech_calls = compute_timestamp_mech_calls(all_trades, all_tools)
@@ -266,12 +266,10 @@ def compute_mech_calls_based_on_timestamps(
266
 
267
  if __name__ == "__main__":
268
  # update_trade_nr_mech_calls(non_agents=True)
269
- trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
270
- print("before filtering")
271
- print(trades_df.staking.value_counts())
272
- unknown_df, trades_df = create_unknown_traders_df(trades_df=trades_df)
273
- print("after filtering")
274
- print(trades_df.staking.value_counts())
275
- print("saving unknown traders")
276
- unknown_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
277
- trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
 
1
  import pandas as pd
2
+ from utils import DATA_DIR, DEFAULT_MECH_FEE, TMP_DIR
3
  from tqdm import tqdm
4
  from datetime import datetime, timezone
5
  from typing import Dict, Any
 
220
  ]
221
  # traverse market requests
222
  total_mech_calls = 0
223
+ for i, mech_request in market_requests.iterrows():
224
  # check timestamp (before the trade)
225
+ request_ts = mech_request["request_time"]
226
  if request_ts < trade_ts:
227
  # check the timestamp has not been used in a previous trade
228
  used_timestamps = request_timestamps_used[market]
 
246
  nr_traders = len(fpmmTrades["trader_address"].unique())
247
  fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
248
  fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
249
+ fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
250
  tools["request_time"] = pd.to_datetime(tools["request_time"])
251
  tools["request_date"] = tools["request_time"].dt.date
252
  tools = tools.sort_values(by="request_time", ascending=True)
 
254
  for trader in tqdm(
255
  fpmmTrades["trader_address"].unique(),
256
  total=nr_traders,
257
+ desc="creating mech calls count based on timestamps",
258
  ):
259
+ # compute the mech calls for each trader
260
  all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
261
  all_tools = tools[tools["trader_address"] == trader]
262
  trader_mech_calls = compute_timestamp_mech_calls(all_trades, all_tools)
 
266
 
267
  if __name__ == "__main__":
268
  # update_trade_nr_mech_calls(non_agents=True)
269
+ tools = pd.read_parquet(TMP_DIR / "tools.parquet")
270
+ fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
271
+ fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
272
+ lambda x: transform_to_datetime(x)
273
+ )
274
+ result = compute_mech_calls_based_on_timestamps(fpmmTrades=fpmmTrades, tools=tools)
275
+ result.to_parquet(TMP_DIR / "result_df.parquet", index=False)
 
 
scripts/profitability.py CHANGED
@@ -18,7 +18,6 @@
18
  # ------------------------------------------------------------------------------
19
 
20
  import time
21
- import datetime
22
  import pandas as pd
23
  from typing import Any
24
  from enum import Enum
@@ -38,6 +37,7 @@ from utils import (
38
  JSON_DATA_DIR,
39
  DATA_DIR,
40
  DEFAULT_MECH_FEE,
 
41
  )
42
  from staking import label_trades_by_staking
43
  from nr_mech_calls import (
@@ -122,25 +122,6 @@ ALL_TRADES_STATS_DF_COLS = [
122
  "roi",
123
  ]
124
 
125
- SUMMARY_STATS_DF_COLS = [
126
- "trader_address",
127
- "num_trades",
128
- "num_winning_trades",
129
- "num_redeemed",
130
- "total_investment",
131
- "total_trade_fees",
132
- "num_mech_calls",
133
- "total_mech_fees",
134
- "total_earnings",
135
- "total_redeemed_amount",
136
- "total_net_earnings",
137
- "total_net_earnings_wo_mech_fees",
138
- "total_roi",
139
- "total_roi_wo_mech_fees",
140
- "mean_mech_calls_per_trade",
141
- "mean_mech_fee_amount_per_trade",
142
- ]
143
-
144
 
145
  def _is_redeemed(user_json: dict[str, Any], fpmmTrade: dict[str, Any]) -> bool:
146
  """Returns whether the user has redeemed the position."""
@@ -159,7 +140,6 @@ def _is_redeemed(user_json: dict[str, Any], fpmmTrade: dict[str, Any]) -> bool:
159
 
160
 
161
  def prepare_profitalibity_data(
162
- rpc: str,
163
  tools_filename: str,
164
  trades_filename: str,
165
  ) -> pd.DataFrame:
@@ -167,6 +147,7 @@ def prepare_profitalibity_data(
167
 
168
  # Check if tools.parquet is in the same directory
169
  try:
 
170
  tools = pd.read_parquet(DATA_DIR / tools_filename)
171
 
172
  # make sure creator_address is in the columns
@@ -181,7 +162,7 @@ def prepare_profitalibity_data(
181
  tools.to_parquet(DATA_DIR / tools_filename)
182
  print(f"{tools_filename} loaded")
183
  except FileNotFoundError:
184
- print("tools.parquet not found. Please run tools.py first.")
185
  return
186
 
187
  # Check if fpmmTrades.parquet is in the same directory
@@ -218,7 +199,6 @@ def determine_market_status(trade, current_answer):
218
  def analyse_trader(
219
  trader_address: str,
220
  fpmmTrades: pd.DataFrame,
221
- tools: pd.DataFrame,
222
  trader_estimated_mech_calls: pd.DataFrame,
223
  daily_info: bool = False,
224
  ) -> pd.DataFrame:
@@ -294,7 +274,7 @@ def analyse_trader(
294
  total_mech_calls = trader_estimated_mech_calls.loc[
295
  (trader_estimated_mech_calls["market"] == trade["title"])
296
  & (trader_estimated_mech_calls["trade_id"] == trade_id),
297
- "mech_calls_per_trade",
298
  ].iloc[0]
299
 
300
  net_earnings = (
@@ -341,7 +321,6 @@ def analyse_trader(
341
 
342
  def analyse_all_traders(
343
  trades: pd.DataFrame,
344
- tools: pd.DataFrame,
345
  estimated_mech_calls: pd.DataFrame,
346
  daily_info: bool = False,
347
  ) -> pd.DataFrame:
@@ -357,9 +336,7 @@ def analyse_all_traders(
357
  estimated_mech_calls["trader_address"] == trader
358
  ]
359
  all_traders.append(
360
- analyse_trader(
361
- trader, trades, tools, trader_estimated_mech_calls, daily_info
362
- )
363
  )
364
 
365
  # concat all creators
@@ -368,54 +345,7 @@ def analyse_all_traders(
368
  return all_creators_df
369
 
370
 
371
- def summary_analyse(df):
372
- """Summarise profitability analysis."""
373
- # Ensure DataFrame is not empty
374
- if df.empty:
375
- return pd.DataFrame(columns=SUMMARY_STATS_DF_COLS)
376
-
377
- # Group by trader_address
378
- grouped = df.groupby("trader_address")
379
-
380
- # Create summary DataFrame
381
- summary_df = grouped.agg(
382
- num_trades=("trader_address", "size"),
383
- num_winning_trades=("winning_trade", lambda x: float((x).sum())),
384
- num_redeemed=("redeemed", lambda x: float(x.sum())),
385
- total_investment=("collateral_amount", "sum"),
386
- total_trade_fees=("trade_fee_amount", "sum"),
387
- num_mech_calls=("num_mech_calls", "sum"),
388
- total_mech_fees=("mech_fee_amount", "sum"),
389
- total_earnings=("earnings", "sum"),
390
- total_redeemed_amount=("redeemed_amount", "sum"),
391
- total_net_earnings=("net_earnings", "sum"),
392
- )
393
-
394
- # Calculating additional columns
395
- summary_df["total_roi"] = (
396
- summary_df["total_net_earnings"] / summary_df["total_investment"]
397
- )
398
- summary_df["mean_mech_calls_per_trade"] = (
399
- summary_df["num_mech_calls"] / summary_df["num_trades"]
400
- )
401
- summary_df["mean_mech_fee_amount_per_trade"] = (
402
- summary_df["total_mech_fees"] / summary_df["num_trades"]
403
- )
404
- summary_df["total_net_earnings_wo_mech_fees"] = (
405
- summary_df["total_net_earnings"] + summary_df["total_mech_fees"]
406
- )
407
- summary_df["total_roi_wo_mech_fees"] = (
408
- summary_df["total_net_earnings_wo_mech_fees"] / summary_df["total_investment"]
409
- )
410
-
411
- # Resetting index to include trader_address
412
- summary_df.reset_index(inplace=True)
413
-
414
- return summary_df
415
-
416
-
417
  def run_profitability_analysis(
418
- rpc: str,
419
  tools_filename: str,
420
  trades_filename: str,
421
  merge: bool = False,
@@ -424,10 +354,12 @@ def run_profitability_analysis(
424
 
425
  # load dfs from data folder for analysis
426
  print(f"Preparing data with {tools_filename} and {trades_filename}")
427
- fpmmTrades = prepare_profitalibity_data(rpc, tools_filename, trades_filename)
 
428
  if merge:
429
- update_tools_parquet(rpc, tools_filename)
430
- tools = pd.read_parquet(DATA_DIR / "tools.parquet")
 
431
 
432
  fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
433
  lambda x: transform_to_datetime(x)
@@ -436,9 +368,10 @@ def run_profitability_analysis(
436
  trade_mech_calls = compute_mech_calls_based_on_timestamps(
437
  fpmmTrades=fpmmTrades, tools=tools
438
  )
 
439
  print(trade_mech_calls.total_mech_calls.describe())
440
  print("Analysing trades...")
441
- all_trades_df = analyse_all_traders(fpmmTrades, tools, trade_mech_calls)
442
 
443
  # # merge previous files if requested
444
  if merge:
@@ -470,9 +403,10 @@ def run_profitability_analysis(
470
  all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
471
 
472
  # add staking labels
473
- label_trades_by_staking(trades_df=all_trades_df)
474
 
475
  # create the unknown traders dataset
 
476
  unknown_traders_df, all_trades_df = create_unknown_traders_df(
477
  trades_df=all_trades_df
478
  )
@@ -481,18 +415,18 @@ def run_profitability_analysis(
481
  # save to parquet
482
  all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
483
 
484
- # summarize profitability df
485
- print("Summarising trades...")
486
- summary_df = summary_analyse(all_trades_df)
487
- summary_df.to_parquet(DATA_DIR / "summary_profitability.parquet", index=False)
488
-
489
  print("Done!")
490
 
491
- return all_trades_df, summary_df
492
 
493
 
494
  if __name__ == "__main__":
495
- rpc = "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a"
496
- if os.path.exists(DATA_DIR / "fpmmTrades.parquet"):
497
- os.remove(DATA_DIR / "fpmmTrades.parquet")
498
- run_profitability_analysis(rpc)
 
 
 
 
 
 
18
  # ------------------------------------------------------------------------------
19
 
20
  import time
 
21
  import pandas as pd
22
  from typing import Any
23
  from enum import Enum
 
37
  JSON_DATA_DIR,
38
  DATA_DIR,
39
  DEFAULT_MECH_FEE,
40
+ TMP_DIR,
41
  )
42
  from staking import label_trades_by_staking
43
  from nr_mech_calls import (
 
122
  "roi",
123
  ]
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  def _is_redeemed(user_json: dict[str, Any], fpmmTrade: dict[str, Any]) -> bool:
127
  """Returns whether the user has redeemed the position."""
 
140
 
141
 
142
  def prepare_profitalibity_data(
 
143
  tools_filename: str,
144
  trades_filename: str,
145
  ) -> pd.DataFrame:
 
147
 
148
  # Check if tools.parquet is in the same directory
149
  try:
150
+ # new tools parquet
151
  tools = pd.read_parquet(DATA_DIR / tools_filename)
152
 
153
  # make sure creator_address is in the columns
 
162
  tools.to_parquet(DATA_DIR / tools_filename)
163
  print(f"{tools_filename} loaded")
164
  except FileNotFoundError:
165
+ print(f"{tools_filename} not found.")
166
  return
167
 
168
  # Check if fpmmTrades.parquet is in the same directory
 
199
  def analyse_trader(
200
  trader_address: str,
201
  fpmmTrades: pd.DataFrame,
 
202
  trader_estimated_mech_calls: pd.DataFrame,
203
  daily_info: bool = False,
204
  ) -> pd.DataFrame:
 
274
  total_mech_calls = trader_estimated_mech_calls.loc[
275
  (trader_estimated_mech_calls["market"] == trade["title"])
276
  & (trader_estimated_mech_calls["trade_id"] == trade_id),
277
+ "total_mech_calls",
278
  ].iloc[0]
279
 
280
  net_earnings = (
 
321
 
322
  def analyse_all_traders(
323
  trades: pd.DataFrame,
 
324
  estimated_mech_calls: pd.DataFrame,
325
  daily_info: bool = False,
326
  ) -> pd.DataFrame:
 
336
  estimated_mech_calls["trader_address"] == trader
337
  ]
338
  all_traders.append(
339
+ analyse_trader(trader, trades, trader_estimated_mech_calls, daily_info)
 
 
340
  )
341
 
342
  # concat all creators
 
345
  return all_creators_df
346
 
347
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  def run_profitability_analysis(
 
349
  tools_filename: str,
350
  trades_filename: str,
351
  merge: bool = False,
 
354
 
355
  # load dfs from data folder for analysis
356
  print(f"Preparing data with {tools_filename} and {trades_filename}")
357
+ fpmmTrades = prepare_profitalibity_data(tools_filename, trades_filename)
358
+
359
  if merge:
360
+ update_tools_parquet(tools_filename)
361
+
362
+ tools = pd.read_parquet(TMP_DIR / "tools.parquet")
363
 
364
  fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
365
  lambda x: transform_to_datetime(x)
 
368
  trade_mech_calls = compute_mech_calls_based_on_timestamps(
369
  fpmmTrades=fpmmTrades, tools=tools
370
  )
371
+ trade_mech_calls.to_parquet(TMP_DIR / "trade_mech_calls.parquet")
372
  print(trade_mech_calls.total_mech_calls.describe())
373
  print("Analysing trades...")
374
+ all_trades_df = analyse_all_traders(fpmmTrades, trade_mech_calls)
375
 
376
  # # merge previous files if requested
377
  if merge:
 
403
  all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
404
 
405
  # add staking labels
406
+ all_trades_df = label_trades_by_staking(trades_df=all_trades_df)
407
 
408
  # create the unknown traders dataset
409
+ print("Creating unknown traders dataset")
410
  unknown_traders_df, all_trades_df = create_unknown_traders_df(
411
  trades_df=all_trades_df
412
  )
 
415
  # save to parquet
416
  all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
417
 
 
 
 
 
 
418
  print("Done!")
419
 
420
+ return all_trades_df
421
 
422
 
423
  if __name__ == "__main__":
424
+ # updating the whole fpmmTrades parquet file instead of just the new ones
425
+ # trade_mech_calls = pd.read_parquet(TMP_DIR / "result_df.parquet")
426
+ # fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
427
+ # fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
428
+ # lambda x: transform_to_datetime(x)
429
+ # )
430
+ # all_trades_df = analyse_all_traders(fpmmTrades, trade_mech_calls)
431
+ # all_trades_df.to_parquet(TMP_DIR / "all_trades_df.parquet", index=False)
432
+ run_profitability_analysis("file1", "file2")
scripts/pull_data.py CHANGED
@@ -11,6 +11,7 @@ from utils import (
11
  measure_execution_time,
12
  DATA_DIR,
13
  HIST_DIR,
 
14
  )
15
  from get_mech_info import (
16
  get_mech_events_since_last_run,
@@ -21,6 +22,7 @@ from cleaning_old_info import clean_old_data_from_parquet_files
21
  from web3_utils import updating_timestamps
22
  from manage_space_files import move_files
23
  from cloud_storage import load_historical_file
 
24
 
25
 
26
  logging.basicConfig(level=logging.INFO)
@@ -52,7 +54,7 @@ def save_historical_data():
52
  timestamp = current_datetime.strftime("%Y%m%d_%H%M%S")
53
 
54
  try:
55
- tools = pd.read_parquet(DATA_DIR / "tools.parquet")
56
  filename = f"tools_{timestamp}.parquet"
57
  tools.to_parquet(HIST_DIR / filename, index=False)
58
  # save into cloud storage
@@ -79,7 +81,7 @@ def only_new_weekly_analysis():
79
  rpc = RPC
80
  # Run markets ETL
81
  logging.info("Running markets ETL")
82
- mkt_etl(MARKETS_FILENAME)
83
  logging.info("Markets ETL completed")
84
 
85
  # Mech events ETL
@@ -108,7 +110,6 @@ def only_new_weekly_analysis():
108
  # # Run profitability analysis
109
  logging.info("Running profitability analysis")
110
  run_profitability_analysis(
111
- rpc=rpc,
112
  tools_filename="new_tools.parquet",
113
  trades_filename="new_fpmmTrades.parquet",
114
  merge=True,
@@ -119,19 +120,13 @@ def only_new_weekly_analysis():
119
  # merge new json files with old json files
120
  update_json_files()
121
 
122
- try:
123
- updating_timestamps(rpc, TOOLS_FILENAME)
124
- except Exception as e:
125
- logging.error("Error while updating timestamps of tools")
126
- print(e)
127
-
128
  save_historical_data()
129
 
130
- clean_old_data_from_parquet_files("2024-10-17")
131
 
132
  compute_tools_accuracy()
133
-
134
- # move to tmp folder the new generated files
135
  move_files()
136
  logging.info("Weekly analysis files generated and saved")
137
 
 
11
  measure_execution_time,
12
  DATA_DIR,
13
  HIST_DIR,
14
+ TMP_DIR,
15
  )
16
  from get_mech_info import (
17
  get_mech_events_since_last_run,
 
22
  from web3_utils import updating_timestamps
23
  from manage_space_files import move_files
24
  from cloud_storage import load_historical_file
25
+ from tools_metrics import compute_tools_based_datasets
26
 
27
 
28
  logging.basicConfig(level=logging.INFO)
 
54
  timestamp = current_datetime.strftime("%Y%m%d_%H%M%S")
55
 
56
  try:
57
+ tools = pd.read_parquet(TMP_DIR / "tools.parquet")
58
  filename = f"tools_{timestamp}.parquet"
59
  tools.to_parquet(HIST_DIR / filename, index=False)
60
  # save into cloud storage
 
81
  rpc = RPC
82
  # Run markets ETL
83
  logging.info("Running markets ETL")
84
+ # mkt_etl(MARKETS_FILENAME)
85
  logging.info("Markets ETL completed")
86
 
87
  # Mech events ETL
 
110
  # # Run profitability analysis
111
  logging.info("Running profitability analysis")
112
  run_profitability_analysis(
 
113
  tools_filename="new_tools.parquet",
114
  trades_filename="new_fpmmTrades.parquet",
115
  merge=True,
 
120
  # merge new json files with old json files
121
  update_json_files()
122
 
 
 
 
 
 
 
123
  save_historical_data()
124
 
125
+ clean_old_data_from_parquet_files("2024-10-25")
126
 
127
  compute_tools_accuracy()
128
+ compute_tools_based_datasets()
129
+ # # move to tmp folder the new generated files
130
  move_files()
131
  logging.info("Weekly analysis files generated and saved")
132
 
scripts/roi_analysis.py DELETED
@@ -1,129 +0,0 @@
1
- import logging
2
- import os
3
- import pickle
4
- from web3 import Web3
5
- import pandas as pd
6
- from functools import partial
7
- from datetime import datetime
8
- from markets import (
9
- etl as mkt_etl,
10
- DEFAULT_FILENAME as MARKETS_FILENAME,
11
- )
12
-
13
- TOOLS_FILENAME = "tools_2024.parquet"
14
- from tools import (
15
- etl as tools_etl,
16
- )
17
- from pull_data import (
18
- DATA_DIR,
19
- parallelize_timestamp_conversion,
20
- block_number_to_timestamp,
21
- )
22
- from profitability import run_profitability_analysis
23
- from get_mech_info import get_mech_info_2024
24
- from utils import get_question, current_answer
25
- import gc
26
-
27
- logging.basicConfig(level=logging.INFO)
28
-
29
-
30
- def roi_analysis():
31
- """Run ROI analysis for the trades done in 2024."""
32
- rpc = "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a"
33
- web3 = Web3(Web3.HTTPProvider(rpc))
34
-
35
- # Run markets ETL
36
- logging.info("Running markets ETL")
37
- mkt_etl(MARKETS_FILENAME)
38
- logging.info("Markets ETL completed")
39
-
40
- # Run tools ETL
41
- logging.info("Running tools ETL")
42
-
43
- # This etl is saving already the tools parquet file
44
- tools_etl(
45
- rpcs=[rpc],
46
- mech_info=get_mech_info_2024(),
47
- filename=TOOLS_FILENAME,
48
- )
49
- logging.info("Tools ETL completed")
50
-
51
- # Run profitability analysis
52
- if os.path.exists(DATA_DIR / "fpmmTrades.parquet"):
53
- os.remove(DATA_DIR / "fpmmTrades.parquet")
54
- logging.info("Running profitability analysis")
55
- date = "2024-01-01"
56
- datetime_jan_2024 = datetime.strptime(date, "%Y-%m-%d")
57
- timestamp_jan_2024 = int(datetime_jan_2024.timestamp())
58
- run_profitability_analysis(
59
- rpc=rpc,
60
- tools_filename=TOOLS_FILENAME,
61
- trades_filename="fpmmTrades.parquet",
62
- from_timestamp=timestamp_jan_2024,
63
- )
64
- logging.info("Profitability analysis completed")
65
-
66
- # Get currentAnswer from FPMMS
67
- fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME)
68
- tools = pd.read_parquet(DATA_DIR / TOOLS_FILENAME)
69
-
70
- # Get the question from the tools
71
- logging.info("Getting the question and current answer for the tools")
72
- tools["title"] = tools["prompt_request"].apply(lambda x: get_question(x))
73
- tools["currentAnswer"] = tools["title"].apply(lambda x: current_answer(x, fpmms))
74
-
75
- tools["currentAnswer"] = tools["currentAnswer"].str.replace("yes", "Yes")
76
- tools["currentAnswer"] = tools["currentAnswer"].str.replace("no", "No")
77
-
78
- # Convert block number to timestamp
79
- logging.info("Converting block number to timestamp")
80
- t_map = pickle.load(open(DATA_DIR / "t_map.pkl", "rb"))
81
- tools["request_time"] = tools["request_block"].map(t_map)
82
-
83
- # Identify tools with missing request_time and fill them
84
- missing_time_indices = tools[tools["request_time"].isna()].index
85
- if not missing_time_indices.empty:
86
- partial_block_number_to_timestamp = partial(
87
- block_number_to_timestamp, web3=web3
88
- )
89
- missing_timestamps = parallelize_timestamp_conversion(
90
- tools.loc[missing_time_indices], partial_block_number_to_timestamp
91
- )
92
-
93
- # Update the original DataFrame with the missing timestamps
94
- for i, timestamp in zip(missing_time_indices, missing_timestamps):
95
- tools.at[i, "request_time"] = timestamp
96
-
97
- tools["request_month_year"] = pd.to_datetime(tools["request_time"]).dt.strftime(
98
- "%Y-%m"
99
- )
100
- tools["request_month_year_week"] = (
101
- pd.to_datetime(tools["request_time"]).dt.to_period("W").astype(str)
102
- )
103
-
104
- # Save the tools data after the updates on the content
105
- tools.to_parquet(DATA_DIR / TOOLS_FILENAME, index=False)
106
-
107
- # Update t_map with new timestamps
108
- new_timestamps = (
109
- tools[["request_block", "request_time"]]
110
- .dropna()
111
- .set_index("request_block")
112
- .to_dict()["request_time"]
113
- )
114
- t_map.update(new_timestamps)
115
-
116
- with open(DATA_DIR / "t_map_2024.pkl", "wb") as f:
117
- pickle.dump(t_map, f)
118
-
119
- # clean and release all memory
120
- del tools
121
- del fpmms
122
- del t_map
123
- gc.collect()
124
-
125
- logging.info("ROI analysis files generated and saved")
126
-
127
-
128
- if __name__ == "__main__":
129
- roi_analysis()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/staking.py CHANGED
@@ -1,7 +1,7 @@
1
  import json
2
  import sys
3
  from typing import Any, List
4
- from utils import RPC, DATA_DIR
5
  import requests
6
  from tqdm import tqdm
7
  from web3 import Web3
@@ -194,15 +194,14 @@ def label_trades_by_staking(trades_df: pd.DataFrame, start: int = None) -> None:
194
  staking_label
195
  )
196
  # tqdm.write(f"statking label {staking_label}")
197
- return
198
 
199
 
200
  if __name__ == "__main__":
201
  # create_service_map()
202
- trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
203
- print("before labeling")
204
- print(trades_df.staking.value_counts())
205
- label_trades_by_staking(trades_df=trades_df, start=8)
206
- print("after labeling")
207
  print(trades_df.staking.value_counts())
208
- trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
 
1
  import json
2
  import sys
3
  from typing import Any, List
4
+ from utils import RPC, DATA_DIR, TMP_DIR
5
  import requests
6
  from tqdm import tqdm
7
  from web3 import Web3
 
194
  staking_label
195
  )
196
  # tqdm.write(f"statking label {staking_label}")
197
+ return trades_df
198
 
199
 
200
  if __name__ == "__main__":
201
  # create_service_map()
202
+ trades_df = pd.read_parquet(TMP_DIR / "all_trades_df.parquet")
203
+ trades_df = trades_df.loc[trades_df["is_invalid"] == False]
204
+
205
+ trades_df = label_trades_by_staking(trades_df=trades_df, start=8)
 
206
  print(trades_df.staking.value_counts())
207
+ trades_df.to_parquet(TMP_DIR / "result_staking.parquet", index=False)
scripts/tools.py CHANGED
@@ -17,10 +17,7 @@
17
  #
18
  # ------------------------------------------------------------------------------
19
 
20
- import os.path
21
  import json
22
- import time
23
- import random
24
  from typing import (
25
  Optional,
26
  List,
@@ -30,43 +27,21 @@ from typing import (
30
  )
31
  import pandas as pd
32
  import requests
33
- from eth_typing import ChecksumAddress
34
- from eth_utils import to_checksum_address
35
  from requests.adapters import HTTPAdapter
36
- from requests.exceptions import (
37
- ReadTimeout as RequestsReadTimeoutError,
38
- HTTPError as RequestsHTTPError,
39
- )
40
  from tqdm import tqdm
41
  from urllib3 import Retry
42
- from urllib3.exceptions import (
43
- ReadTimeoutError as Urllib3ReadTimeoutError,
44
- HTTPError as Urllib3HTTPError,
45
- )
46
- from web3 import Web3, HTTPProvider
47
  from markets import add_market_creator
48
  from concurrent.futures import ThreadPoolExecutor, as_completed
49
  from web3_utils import (
50
- read_abi,
51
- SLEEP,
52
- reduce_window,
53
- LATEST_BLOCK,
54
- LATEST_BLOCK_NAME,
55
- BLOCK_DATA_NUMBER,
56
- BLOCKS_CHUNK_SIZE,
57
- N_RPC_RETRIES,
58
  N_IPFS_RETRIES,
59
- RPC_POLL_INTERVAL,
60
- IPFS_POLL_INTERVAL,
61
  )
62
  from utils import (
63
  clean,
64
  BLOCK_FIELD,
65
- gen_event_filename,
66
  limit_text,
67
  DATA_DIR,
68
  JSON_DATA_DIR,
69
- REQUEST_ID_FIELD,
70
  MechEvent,
71
  MechEventName,
72
  MechRequest,
@@ -75,7 +50,6 @@ from utils import (
75
  REQUEST_ID,
76
  HTTP,
77
  HTTPS,
78
- REQUEST_SENDER,
79
  get_result_values,
80
  get_vote,
81
  get_win_probability,
@@ -97,7 +71,6 @@ IPFS_LINKS_SERIES_NAME = "ipfs_links"
97
  BACKOFF_FACTOR = 1
98
  STATUS_FORCELIST = [404, 500, 502, 503, 504]
99
  DEFAULT_FILENAME = "tools.parquet"
100
- RE_RPC_FILTER_ERROR = r"Filter with id: '\d+' does not exist."
101
  ABI_ERROR = "The event signature did not match the provided ABI"
102
  HTTP_TIMEOUT = 10
103
 
@@ -121,127 +94,6 @@ NUM_WORKERS = 10
121
  GET_CONTENTS_BATCH_SIZE = 1000
122
 
123
 
124
- def get_events(
125
- w3: Web3,
126
- event: str,
127
- mech_address: ChecksumAddress,
128
- mech_abi_path: str,
129
- earliest_block: int,
130
- latest_block: int,
131
- ) -> List:
132
- """Get the delivered events."""
133
- abi = read_abi(mech_abi_path)
134
- contract_instance = w3.eth.contract(address=mech_address, abi=abi)
135
-
136
- events = []
137
- from_block = earliest_block
138
- batch_size = BLOCKS_CHUNK_SIZE
139
- with tqdm(
140
- total=latest_block - from_block,
141
- desc=f"Searching {event} events for mech {mech_address}",
142
- unit="blocks",
143
- ) as pbar:
144
- while from_block < latest_block:
145
- events_filter = contract_instance.events[event].build_filter()
146
- events_filter.fromBlock = from_block
147
- events_filter.toBlock = min(from_block + batch_size, latest_block)
148
-
149
- entries = None
150
- retries = 0
151
- while entries is None:
152
- try:
153
- entries = events_filter.deploy(w3).get_all_entries()
154
- retries = 0
155
- except (RequestsHTTPError, Urllib3HTTPError) as exc:
156
- if "Request Entity Too Large" in exc.args[0]:
157
- events_filter, batch_size = reduce_window(
158
- contract_instance,
159
- event,
160
- from_block,
161
- batch_size,
162
- latest_block,
163
- )
164
- except (Urllib3ReadTimeoutError, RequestsReadTimeoutError):
165
- events_filter, batch_size = reduce_window(
166
- contract_instance, event, from_block, batch_size, latest_block
167
- )
168
- except Exception as exc:
169
- retries += 1
170
- if retries == N_RPC_RETRIES:
171
- tqdm.write(
172
- f"Skipping events for blocks {events_filter.fromBlock} - {events_filter.toBlock} "
173
- f"as the retries have been exceeded."
174
- )
175
- break
176
- sleep = SLEEP * retries
177
- # error_message = ""
178
- # if isinstance(exc.args[0], str):
179
- # error_message = exc.args[0]
180
- # elif isinstance(exc, ValueError):
181
- # error_message = exc.args[0].get("message", "")
182
- # if (
183
- # (
184
- # isinstance(exc, ValueError)
185
- # and re.match(RE_RPC_FILTER_ERROR, error_message) is None
186
- # )
187
- # and not isinstance(exc, ValueError)
188
- # and not isinstance(exc, MismatchedABI)
189
- # ):
190
-
191
- tqdm.write(
192
- f"An error was raised from the RPC: {exc}\n Retrying in {sleep} seconds."
193
- )
194
- if hasattr(exc, "message"):
195
- tqdm.write(f"Error message: {exc.message}\n")
196
- time.sleep(sleep)
197
-
198
- from_block += batch_size
199
- pbar.update(batch_size)
200
-
201
- if entries is None:
202
- continue
203
-
204
- chunk = list(entries)
205
- events.extend(chunk)
206
- time.sleep(RPC_POLL_INTERVAL)
207
-
208
- return events
209
-
210
-
211
- def parse_events(raw_events: List) -> List[MechEvent]:
212
- # TODO use dictionary instead of List
213
- """Parse all the specified MechEvents."""
214
- parsed_events = []
215
- for event in raw_events:
216
- for_block = event.get("blockNumber", 0)
217
- args = event.get(EVENT_ARGUMENTS, {})
218
- request_id = args.get(REQUEST_ID, 0)
219
- data = args.get(DATA, b"")
220
- sender = args.get(REQUEST_SENDER, "")
221
- parsed_event = MechEvent(for_block, request_id, data, sender)
222
- parsed_events.append(parsed_event)
223
-
224
- return parsed_events
225
-
226
-
227
- def parse_dict_events(events_dict: dict) -> List[MechEvent]:
228
- # TODO use dictionary instead of List
229
- """Parse all the specified MechEvents."""
230
- parsed_events = []
231
- list_ids = list(events_dict.keys())
232
- for mech_id in list_ids:
233
- event = events_dict[mech_id]
234
- for_block = event.get("blockNumber", 0)
235
- args = event.get(EVENT_ARGUMENTS, {})
236
- request_id = args.get(REQUEST_ID, 0)
237
- data = args.get(DATA, b"")
238
- sender = args.get(REQUEST_SENDER, "")
239
- parsed_event = MechEvent(for_block, request_id, data, sender)
240
- parsed_events.append(parsed_event)
241
-
242
- return parsed_events
243
-
244
-
245
  def create_session() -> requests.Session:
246
  """Create a session with a retry strategy."""
247
  session = requests.Session()
@@ -322,31 +174,6 @@ def parse_ipfs_tools_content(
322
  return mech_response
323
 
324
 
325
- def get_contents(
326
- session: requests.Session, events: List[MechEvent], event_name: MechEventName
327
- ) -> pd.DataFrame:
328
- """Fetch the tools' responses."""
329
- contents = []
330
- for event in tqdm(events, desc=f"Tools' results", unit="results"):
331
- url = event.ipfs_link(event_name)
332
- response = request(session, url)
333
- if response is None:
334
- tqdm.write(f"Skipping {event=}.")
335
- continue
336
-
337
- raw_content = parse_ipfs_response(session, url, event, event_name, response)
338
- if raw_content is None:
339
- continue
340
-
341
- mech_response = parse_ipfs_tools_content(raw_content, event, event_name)
342
- if mech_response is None:
343
- continue
344
- contents.append(mech_response)
345
- time.sleep(IPFS_POLL_INTERVAL)
346
-
347
- return pd.DataFrame(contents)
348
-
349
-
350
  def parse_json_events(json_events: dict, keys_to_traverse: List[int]) -> pd.DataFrame:
351
  """Function to parse the mech info in a json format"""
352
  all_records = []
@@ -356,6 +183,10 @@ def parse_json_events(json_events: dict, keys_to_traverse: List[int]) -> pd.Data
356
  output = {}
357
  output["request_id"] = json_input["requestId"]
358
  output["request_block"] = json_input["blockNumber"]
 
 
 
 
359
  output["prompt_request"] = json_input["ipfsContents"]["prompt"]
360
  output["tool"] = json_input["ipfsContents"]["tool"]
361
  output["nonce"] = json_input["ipfsContents"]["nonce"]
@@ -424,144 +255,6 @@ def transform_deliver(contents: pd.DataFrame) -> pd.DataFrame:
424
  return clean(contents)
425
 
426
 
427
- def store_progress(
428
- filename: str,
429
- event_to_contents: Dict[str, pd.DataFrame],
430
- tools: pd.DataFrame,
431
- ) -> None:
432
- """Store the given progress."""
433
- print("storing given progress")
434
- if filename:
435
- DATA_DIR.mkdir(parents=True, exist_ok=True) # Ensure the directory exists
436
- for event_name, content in event_to_contents.items():
437
- event_filename = gen_event_filename(
438
- event_name
439
- ) # Ensure this function returns a valid filename string
440
- try:
441
- if "result" in content.columns:
442
- content = content.drop(
443
- columns=["result"]
444
- ) # Avoid in-place modification
445
- content.to_parquet(DATA_DIR / event_filename, index=False)
446
- except Exception as e:
447
- print(f"Failed to write {event_name} data: {e}")
448
- # Drop result columns for tools DataFrame
449
- try:
450
- if "result" in tools.columns:
451
- tools = tools.drop(columns=["result"])
452
- tools.to_parquet(DATA_DIR / filename, index=False)
453
- except Exception as e:
454
- print(f"Failed to write tools data: {e}")
455
-
456
-
457
- def etl(
458
- rpcs: List[str],
459
- mech_info: dict[str, Any],
460
- filename: Optional[str] = None,
461
- ) -> pd.DataFrame:
462
- """Fetch from on-chain events, process, store and return the tools' results on
463
- all the questions as a Dataframe."""
464
- w3s = [Web3(HTTPProvider(r)) for r in rpcs]
465
- session = create_session()
466
- event_to_transformer = {
467
- MechEventName.REQUEST: transform_request,
468
- MechEventName.DELIVER: transform_deliver,
469
- }
470
-
471
- mech_to_info = {
472
- to_checksum_address(address): (
473
- os.path.join(CONTRACTS_PATH, filename),
474
- earliest_block,
475
- )
476
- for address, (filename, earliest_block) in mech_info.items()
477
- }
478
-
479
- event_to_contents = {}
480
-
481
- latest_block = LATEST_BLOCK
482
- if latest_block is None:
483
- latest_block = w3s[0].eth.get_block(LATEST_BLOCK_NAME)[BLOCK_DATA_NUMBER]
484
-
485
- next_start_block = None
486
-
487
- # Loop through events in event_to_transformer
488
- for event_name, transformer in event_to_transformer.items():
489
- # if next_start_block is None:
490
- # next_start_block_base = get_earliest_block(event_name)
491
-
492
- # Loop through mech addresses in mech_to_info
493
- events = []
494
- for address, (abi, earliest_block) in mech_to_info.items():
495
- next_start_block = earliest_block
496
- print(
497
- f"Searching for {event_name.value} events for mech {address} from block {next_start_block} to {latest_block}."
498
- )
499
-
500
- # parallelize the fetching of events
501
- with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
502
- futures = []
503
- for i in range(
504
- next_start_block, latest_block, BLOCKS_CHUNK_SIZE * SNAPSHOT_RATE
505
- ):
506
- futures.append(
507
- executor.submit(
508
- get_events,
509
- random.choice(w3s),
510
- event_name.value,
511
- address,
512
- abi,
513
- i,
514
- min(i + BLOCKS_CHUNK_SIZE * SNAPSHOT_RATE, latest_block),
515
- )
516
- )
517
-
518
- for future in tqdm(
519
- as_completed(futures),
520
- total=len(futures),
521
- desc=f"Fetching {event_name.value} Events",
522
- ):
523
- current_mech_events = future.result()
524
- events.extend(current_mech_events)
525
-
526
- print("Parsing events")
527
- parsed = parse_events(events)
528
-
529
- contents = []
530
- with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
531
- futures = []
532
- for i in range(0, len(parsed), GET_CONTENTS_BATCH_SIZE):
533
- futures.append(
534
- executor.submit(
535
- get_contents,
536
- session,
537
- parsed[i : i + GET_CONTENTS_BATCH_SIZE],
538
- event_name,
539
- )
540
- )
541
-
542
- for future in tqdm(
543
- as_completed(futures),
544
- total=len(futures),
545
- desc=f"Fetching {event_name.value} Contents",
546
- ):
547
- current_mech_contents = future.result()
548
- contents.append(current_mech_contents)
549
-
550
- contents = pd.concat(contents, ignore_index=True)
551
-
552
- transformed = transformer(contents)
553
-
554
- event_to_contents[event_name] = transformed.copy()
555
-
556
- # Store progress
557
- tools = pd.merge(*event_to_contents.values(), on=REQUEST_ID_FIELD)
558
- print(tools.info())
559
-
560
- store_progress(filename, event_to_contents, tools)
561
-
562
- return tools
563
-
564
-
565
  def parse_store_json_events_parallel(json_events: Dict[str, Any], output_filename: str):
566
  total_nr_events = len(json_events)
567
  ids_to_traverse = list(json_events.keys())
@@ -614,9 +307,5 @@ def generate_tools_file(input_filename: str, output_filename: str):
614
 
615
 
616
  if __name__ == "__main__":
617
- RPCs = [
618
- "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a",
619
- ]
620
- filename = DEFAULT_FILENAME
621
 
622
- tools = etl(rpcs=RPCs, filename=filename)
 
17
  #
18
  # ------------------------------------------------------------------------------
19
 
 
20
  import json
 
 
21
  from typing import (
22
  Optional,
23
  List,
 
27
  )
28
  import pandas as pd
29
  import requests
30
+ from gnosis_timestamps import transform_timestamp_to_datetime
 
31
  from requests.adapters import HTTPAdapter
 
 
 
 
32
  from tqdm import tqdm
33
  from urllib3 import Retry
 
 
 
 
 
34
  from markets import add_market_creator
35
  from concurrent.futures import ThreadPoolExecutor, as_completed
36
  from web3_utils import (
 
 
 
 
 
 
 
 
37
  N_IPFS_RETRIES,
 
 
38
  )
39
  from utils import (
40
  clean,
41
  BLOCK_FIELD,
 
42
  limit_text,
43
  DATA_DIR,
44
  JSON_DATA_DIR,
 
45
  MechEvent,
46
  MechEventName,
47
  MechRequest,
 
50
  REQUEST_ID,
51
  HTTP,
52
  HTTPS,
 
53
  get_result_values,
54
  get_vote,
55
  get_win_probability,
 
71
  BACKOFF_FACTOR = 1
72
  STATUS_FORCELIST = [404, 500, 502, 503, 504]
73
  DEFAULT_FILENAME = "tools.parquet"
 
74
  ABI_ERROR = "The event signature did not match the provided ABI"
75
  HTTP_TIMEOUT = 10
76
 
 
94
  GET_CONTENTS_BATCH_SIZE = 1000
95
 
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  def create_session() -> requests.Session:
98
  """Create a session with a retry strategy."""
99
  session = requests.Session()
 
174
  return mech_response
175
 
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  def parse_json_events(json_events: dict, keys_to_traverse: List[int]) -> pd.DataFrame:
178
  """Function to parse the mech info in a json format"""
179
  all_records = []
 
183
  output = {}
184
  output["request_id"] = json_input["requestId"]
185
  output["request_block"] = json_input["blockNumber"]
186
+ output["request_time"] = transform_timestamp_to_datetime(
187
+ int(json_input["blockTimestamp"])
188
+ )
189
+ output["tx_hash"] = json_input["transactionHash"]
190
  output["prompt_request"] = json_input["ipfsContents"]["prompt"]
191
  output["tool"] = json_input["ipfsContents"]["tool"]
192
  output["nonce"] = json_input["ipfsContents"]["nonce"]
 
255
  return clean(contents)
256
 
257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  def parse_store_json_events_parallel(json_events: Dict[str, Any], output_filename: str):
259
  total_nr_events = len(json_events)
260
  ids_to_traverse = list(json_events.keys())
 
307
 
308
 
309
  if __name__ == "__main__":
 
 
 
 
310
 
311
+ generate_tools_file()
scripts/tools_metrics.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from typing import List
3
+ from utils import TMP_DIR, INC_TOOLS, DATA_DIR
4
+
5
+
6
+ def get_error_data_by_market(
7
+ tools_df: pd.DataFrame, inc_tools: List[str]
8
+ ) -> pd.DataFrame:
9
+ """Gets the error data for the given tools and calculates the error percentage."""
10
+ tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
11
+ error = (
12
+ tools_inc.groupby(
13
+ ["tool", "request_month_year_week", "market_creator", "error"], sort=False
14
+ )
15
+ .size()
16
+ .unstack()
17
+ .fillna(0)
18
+ .reset_index()
19
+ )
20
+ error["error_perc"] = (error[1] / (error[0] + error[1])) * 100
21
+ error["total_requests"] = error[0] + error[1]
22
+ return error
23
+
24
+
25
+ def get_tool_winning_rate_by_market(
26
+ tools_df: pd.DataFrame, inc_tools: List[str]
27
+ ) -> pd.DataFrame:
28
+ """Gets the tool winning rate data for the given tools by market and calculates the winning percentage."""
29
+ tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
30
+ tools_non_error = tools_inc[tools_inc["error"] != 1]
31
+ tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
32
+ {"no": "No", "yes": "Yes"}
33
+ )
34
+ tools_non_error = tools_non_error[
35
+ tools_non_error["currentAnswer"].isin(["Yes", "No"])
36
+ ]
37
+ tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
38
+ tools_non_error["win"] = (
39
+ tools_non_error["currentAnswer"] == tools_non_error["vote"]
40
+ ).astype(int)
41
+ tools_non_error.columns = tools_non_error.columns.astype(str)
42
+ wins = (
43
+ tools_non_error.groupby(
44
+ ["tool", "request_month_year_week", "market_creator", "win"], sort=False
45
+ )
46
+ .size()
47
+ .unstack()
48
+ .fillna(0)
49
+ )
50
+ wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100
51
+ wins.reset_index(inplace=True)
52
+ wins["total_request"] = wins[0] + wins[1]
53
+ wins.columns = wins.columns.astype(str)
54
+ # Convert request_month_year_week to string and explicitly set type for Altair
55
+ # wins["request_month_year_week"] = wins["request_month_year_week"].astype(str)
56
+ return wins
57
+
58
+
59
+ def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
60
+ tools["request_time"] = pd.to_datetime(tools["request_time"])
61
+ tools = tools.sort_values(by="request_time", ascending=True)
62
+
63
+ tools["request_month_year_week"] = (
64
+ pd.to_datetime(tools["request_time"]).dt.to_period("W").dt.strftime("%b-%d")
65
+ )
66
+ # preparing the tools graph
67
+ # adding the total
68
+ tools_all = tools.copy(deep=True)
69
+ tools_all["market_creator"] = "all"
70
+ # merging both dataframes
71
+ tools = pd.concat([tools, tools_all], ignore_index=True)
72
+ tools = tools.sort_values(by="request_time", ascending=True)
73
+ return tools
74
+
75
+
76
+ def compute_tools_based_datasets():
77
+ try:
78
+ tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
79
+ tools_df = prepare_tools(tools_df)
80
+ except Exception as e:
81
+ print(f"Error reading old tools parquet file {e}")
82
+ return None
83
+ # error by markets
84
+ error_by_markets = get_error_data_by_market(tools_df=tools_df, inc_tools=INC_TOOLS)
85
+ error_by_markets.to_parquet(DATA_DIR / "error_by_markets.parquet", index=False)
86
+ try:
87
+ tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
88
+ tools_df = prepare_tools(tools_df)
89
+ except Exception as e:
90
+ print(f"Error reading old tools parquet file {e}")
91
+ return None
92
+ winning_df = get_tool_winning_rate_by_market(tools_df, inc_tools=INC_TOOLS)
93
+ winning_df.to_parquet(DATA_DIR / "winning_df.parquet", index=False)
scripts/update_tools_accuracy.py CHANGED
@@ -1,15 +1,12 @@
1
  import os
2
  import pandas as pd
3
  import ipfshttpclient
4
- from pathlib import Path
5
  from utils import INC_TOOLS
6
  from typing import List
 
7
 
8
  ACCURACY_FILENAME = "tools_accuracy.csv"
9
  IPFS_SERVER = "/dns/registry.autonolas.tech/tcp/443/https"
10
- SCRIPTS_DIR = Path(__file__).parent
11
- ROOT_DIR = SCRIPTS_DIR.parent
12
- DATA_DIR = ROOT_DIR / "data"
13
 
14
 
15
  def update_tools_accuracy(
@@ -65,6 +62,7 @@ def update_tools_accuracy(
65
  print("tools to update")
66
  print(tools_to_update)
67
  existing_tools = list(tools_acc["tool"].values)
 
68
  acc_info["min"] = acc_info["min"].dt.strftime("%Y-%m-%d %H:%M:%S")
69
  acc_info["max"] = acc_info["max"].dt.strftime("%Y-%m-%d %H:%M:%S")
70
  for tool in tools_to_update:
@@ -101,7 +99,7 @@ def update_tools_accuracy(
101
  def compute_tools_accuracy():
102
  print("Computing accuracy of tools")
103
  print("Reading tools parquet file")
104
- tools = pd.read_parquet(DATA_DIR / "tools.parquet")
105
  print(tools.head())
106
  # Computing tools accuracy information
107
  print("Computing tool accuracy information")
 
1
  import os
2
  import pandas as pd
3
  import ipfshttpclient
 
4
  from utils import INC_TOOLS
5
  from typing import List
6
+ from utils import TMP_DIR, DATA_DIR
7
 
8
  ACCURACY_FILENAME = "tools_accuracy.csv"
9
  IPFS_SERVER = "/dns/registry.autonolas.tech/tcp/443/https"
 
 
 
10
 
11
 
12
  def update_tools_accuracy(
 
62
  print("tools to update")
63
  print(tools_to_update)
64
  existing_tools = list(tools_acc["tool"].values)
65
+ # dt.strftime("%Y-%m-%d %H:%M:%S")
66
  acc_info["min"] = acc_info["min"].dt.strftime("%Y-%m-%d %H:%M:%S")
67
  acc_info["max"] = acc_info["max"].dt.strftime("%Y-%m-%d %H:%M:%S")
68
  for tool in tools_to_update:
 
99
  def compute_tools_accuracy():
100
  print("Computing accuracy of tools")
101
  print("Reading tools parquet file")
102
+ tools = pd.read_parquet(TMP_DIR / "tools.parquet")
103
  print(tools.head())
104
  # Computing tools accuracy information
105
  print("Computing tool accuracy information")
scripts/web3_utils.py CHANGED
@@ -12,7 +12,15 @@ from tqdm import tqdm
12
  from web3 import Web3
13
  from typing import Any, Optional
14
  from web3.types import BlockParams
15
- from utils import JSON_DATA_DIR, DATA_DIR, SUBGRAPH_API_KEY, to_content, SUBGRAPH_URL
 
 
 
 
 
 
 
 
16
  from queries import conditional_tokens_gc_user_query, omen_xdai_trades_query
17
  import pandas as pd
18
 
@@ -96,11 +104,11 @@ def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> li
96
  def updating_timestamps(rpc: str, tools_filename: str):
97
  web3 = Web3(Web3.HTTPProvider(rpc))
98
 
99
- tools = pd.read_parquet(DATA_DIR / tools_filename)
100
 
101
  # Convert block number to timestamp
102
  print("Converting block number to timestamp")
103
- t_map = pickle.load(open(DATA_DIR / "t_map.pkl", "rb"))
104
  tools["request_time"] = tools["request_block"].map(t_map)
105
 
106
  no_data = tools["request_time"].isna().sum()
@@ -129,7 +137,7 @@ def updating_timestamps(rpc: str, tools_filename: str):
129
 
130
  # Save the tools data after the updates on the content
131
  print(f"Updating file {tools_filename} with timestamps")
132
- tools.to_parquet(DATA_DIR / tools_filename, index=False)
133
 
134
  # Update t_map with new timestamps
135
  new_timestamps = (
 
12
  from web3 import Web3
13
  from typing import Any, Optional
14
  from web3.types import BlockParams
15
+ from utils import (
16
+ JSON_DATA_DIR,
17
+ DATA_DIR,
18
+ SUBGRAPH_API_KEY,
19
+ to_content,
20
+ SUBGRAPH_URL,
21
+ HIST_DIR,
22
+ TMP_DIR,
23
+ )
24
  from queries import conditional_tokens_gc_user_query, omen_xdai_trades_query
25
  import pandas as pd
26
 
 
104
  def updating_timestamps(rpc: str, tools_filename: str):
105
  web3 = Web3(Web3.HTTPProvider(rpc))
106
 
107
+ tools = pd.read_parquet(TMP_DIR / tools_filename)
108
 
109
  # Convert block number to timestamp
110
  print("Converting block number to timestamp")
111
+ t_map = pickle.load(open(TMP_DIR / "t_map.pkl", "rb"))
112
  tools["request_time"] = tools["request_block"].map(t_map)
113
 
114
  no_data = tools["request_time"].isna().sum()
 
137
 
138
  # Save the tools data after the updates on the content
139
  print(f"Updating file {tools_filename} with timestamps")
140
+ tools.to_parquet(TMP_DIR / tools_filename, index=False)
141
 
142
  # Update t_map with new timestamps
143
  new_timestamps = (
tabs/error.py CHANGED
@@ -9,33 +9,14 @@ HEIGHT = 600
9
  WIDTH = 1000
10
 
11
 
12
- def get_error_data_by_market(
13
- tools_df: pd.DataFrame, inc_tools: List[str]
14
- ) -> pd.DataFrame:
15
- """Gets the error data for the given tools and calculates the error percentage."""
16
- tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
17
- error = (
18
- tools_inc.groupby(
19
- ["tool", "request_month_year_week", "market_creator", "error"], sort=False
20
- )
21
- .size()
22
- .unstack()
23
- .fillna(0)
24
- .reset_index()
25
- )
26
- error["error_perc"] = (error[1] / (error[0] + error[1])) * 100
27
- error["total_requests"] = error[0] + error[1]
28
- return error
29
-
30
-
31
  def get_error_data_overall_by_market(error_df: pd.DataFrame) -> pd.DataFrame:
32
  """Gets the error data for the given tools and calculates the error percentage."""
33
  error_total = (
34
  error_df.groupby(["request_month_year_week", "market_creator"], sort=False)
35
- .agg({"total_requests": "sum", 1: "sum", 0: "sum"})
36
  .reset_index()
37
  )
38
- error_total["error_perc"] = (error_total[1] / error_total["total_requests"]) * 100
39
  error_total.columns = error_total.columns.astype(str)
40
  error_total["error_perc"] = error_total["error_perc"].apply(lambda x: round(x, 4))
41
  return error_total
 
9
  WIDTH = 1000
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def get_error_data_overall_by_market(error_df: pd.DataFrame) -> pd.DataFrame:
13
  """Gets the error data for the given tools and calculates the error percentage."""
14
  error_total = (
15
  error_df.groupby(["request_month_year_week", "market_creator"], sort=False)
16
+ .agg({"total_requests": "sum", "1": "sum", "0": "sum"})
17
  .reset_index()
18
  )
19
+ error_total["error_perc"] = (error_total["1"] / error_total["total_requests"]) * 100
20
  error_total.columns = error_total.columns.astype(str)
21
  error_total["error_perc"] = error_total["error_perc"].apply(lambda x: round(x, 4))
22
  return error_total
tabs/metrics.py CHANGED
@@ -28,40 +28,6 @@ HEIGHT = 600
28
  WIDTH = 1000
29
 
30
 
31
- def get_metrics(
32
- metric_name: str, column_name: str, market_creator: str, trades_df: pd.DataFrame
33
- ) -> pd.DataFrame:
34
- # this is to filter out the data before 2023-09-01
35
- trades_filtered = trades_df[trades_df["creation_timestamp"] > "2023-09-01"]
36
- if market_creator != "all":
37
- trades_filtered = trades_filtered.loc[
38
- trades_filtered["market_creator"] == market_creator
39
- ]
40
-
41
- trades_filtered = (
42
- trades_filtered.groupby("month_year_week", sort=False)[column_name]
43
- .quantile([0.25, 0.5, 0.75])
44
- .unstack()
45
- )
46
- # reformat the data as percentile, date, value
47
- trades_filtered = trades_filtered.melt(
48
- id_vars=["month_year_week"], var_name="percentile", value_name=metric_name
49
- )
50
- trades_filtered.columns = trades_filtered.columns.astype(str)
51
- trades_filtered.reset_index(inplace=True)
52
- trades_filtered.columns = [
53
- "month_year_week",
54
- "25th_percentile",
55
- "50th_percentile",
56
- "75th_percentile",
57
- ]
58
- # reformat the data as percentile, date, value
59
- trades_filtered = trades_filtered.melt(
60
- id_vars=["month_year_week"], var_name="percentile", value_name=metric_name
61
- )
62
- return trades_filtered
63
-
64
-
65
  def get_boxplot_metrics(column_name: str, trades_df: pd.DataFrame) -> pd.DataFrame:
66
  trades_filtered = trades_df[
67
  ["creation_timestamp", "month_year_week", "market_creator", column_name]
@@ -81,45 +47,6 @@ def get_boxplot_metrics(column_name: str, trades_df: pd.DataFrame) -> pd.DataFra
81
  return all_filtered_trades
82
 
83
 
84
- def plot2_trade_details(
85
- metric_name: str, market_creator: str, trades_df: pd.DataFrame
86
- ) -> gr.Plot:
87
- """Plots the trade details for the given trade detail."""
88
-
89
- if metric_name == "mech calls":
90
- metric_name = "mech_calls"
91
- column_name = "num_mech_calls"
92
- yaxis_title = "Nr of mech calls per trade"
93
- elif metric_name == "ROI":
94
- column_name = "roi"
95
- yaxis_title = "ROI (net profit/cost)"
96
- elif metric_name == "collateral amount":
97
- metric_name = "collateral_amount"
98
- column_name = metric_name
99
- yaxis_title = "Collateral amount per trade (xDAI)"
100
- elif metric_name == "net earnings":
101
- metric_name = "net_earnings"
102
- column_name = metric_name
103
- yaxis_title = "Net profit per trade (xDAI)"
104
- else: # earnings
105
- column_name = metric_name
106
- yaxis_title = "Gross profit per trade (xDAI)"
107
-
108
- trades_filtered = get_metrics(metric_name, column_name, market_creator, trades_df)
109
- fig = px.line(
110
- trades_filtered, x="month_year_week", y=metric_name, color="percentile"
111
- )
112
- fig.update_layout(
113
- xaxis_title="Week",
114
- yaxis_title=yaxis_title,
115
- legend=dict(yanchor="top", y=0.5),
116
- )
117
- fig.update_xaxes(tickformat="%b %d\n%Y")
118
- return gr.Plot(
119
- value=fig,
120
- )
121
-
122
-
123
  def plot_trade_metrics(
124
  metric_name: str, trades_df: pd.DataFrame, trader_filter: str = None
125
  ) -> gr.Plot:
 
28
  WIDTH = 1000
29
 
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  def get_boxplot_metrics(column_name: str, trades_df: pd.DataFrame) -> pd.DataFrame:
32
  trades_filtered = trades_df[
33
  ["creation_timestamp", "month_year_week", "market_creator", column_name]
 
47
  return all_filtered_trades
48
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  def plot_trade_metrics(
51
  metric_name: str, trades_df: pd.DataFrame, trader_filter: str = None
52
  ) -> gr.Plot:
tabs/tool_win.py CHANGED
@@ -26,40 +26,6 @@ def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
26
  return tools
27
 
28
 
29
- def get_tool_winning_rate_by_market(
30
- tools_df: pd.DataFrame, inc_tools: List[str]
31
- ) -> pd.DataFrame:
32
- """Gets the tool winning rate data for the given tools by market and calculates the winning percentage."""
33
- tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
34
- tools_non_error = tools_inc[tools_inc["error"] != 1]
35
- tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
36
- {"no": "No", "yes": "Yes"}
37
- )
38
- tools_non_error = tools_non_error[
39
- tools_non_error["currentAnswer"].isin(["Yes", "No"])
40
- ]
41
- tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
42
- tools_non_error["win"] = (
43
- tools_non_error["currentAnswer"] == tools_non_error["vote"]
44
- ).astype(int)
45
- tools_non_error.columns = tools_non_error.columns.astype(str)
46
- wins = (
47
- tools_non_error.groupby(
48
- ["tool", "request_month_year_week", "market_creator", "win"], sort=False
49
- )
50
- .size()
51
- .unstack()
52
- .fillna(0)
53
- )
54
- wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100
55
- wins.reset_index(inplace=True)
56
- wins["total_request"] = wins[0] + wins[1]
57
- wins.columns = wins.columns.astype(str)
58
- # Convert request_month_year_week to string and explicitly set type for Altair
59
- # wins["request_month_year_week"] = wins["request_month_year_week"].astype(str)
60
- return wins
61
-
62
-
63
  def get_overall_winning_rate_by_market(wins_df: pd.DataFrame) -> pd.DataFrame:
64
  """Gets the overall winning rate data for the given tools and calculates the winning percentage."""
65
  overall_wins = (
 
26
  return tools
27
 
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def get_overall_winning_rate_by_market(wins_df: pd.DataFrame) -> pd.DataFrame:
30
  """Gets the overall winning rate data for the given tools and calculates the winning percentage."""
31
  overall_wins = (