rosacastillo
commited on
Commit
·
ea0955a
1
Parent(s):
5d61ee1
removed dependency with tools.parquet and new mech calls computation timestamps based
Browse files- app.py +34 -17
- data/all_trades_profitability.parquet +2 -2
- data/{summary_profitability.parquet → error_by_markets.parquet} +2 -2
- data/invalid_trades.parquet +2 -2
- data/service_map.pkl +1 -1
- data/tools.parquet +0 -3
- data/tools_accuracy.csv +2 -2
- data/unknown_traders.parquet +2 -2
- data/{t_map.pkl → winning_df.parquet} +2 -2
- notebooks/mech_calls_analysis.ipynb +125 -97
- notebooks/tool_errors_analysis.ipynb +923 -34
- scripts/cleaning_old_info.py +14 -13
- scripts/daily_data.py +1 -1
- scripts/get_mech_info.py +11 -41
- scripts/gnosis_timestamps.py +184 -0
- scripts/mech_request_utils.py +9 -9
- scripts/nr_mech_calls.py +13 -15
- scripts/profitability.py +24 -90
- scripts/pull_data.py +7 -12
- scripts/roi_analysis.py +0 -129
- scripts/staking.py +7 -8
- scripts/tools.py +6 -317
- scripts/tools_metrics.py +93 -0
- scripts/update_tools_accuracy.py +3 -5
- scripts/web3_utils.py +12 -4
- tabs/error.py +2 -21
- tabs/metrics.py +0 -73
- tabs/tool_win.py +0 -34
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
from datetime import datetime, timedelta
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
4 |
import duckdb
|
@@ -23,8 +22,6 @@ from tabs.metrics import (
|
|
23 |
)
|
24 |
|
25 |
from tabs.tool_win import (
|
26 |
-
prepare_tools,
|
27 |
-
get_tool_winning_rate_by_market,
|
28 |
integrated_plot_tool_winnings_overall_per_market_by_week,
|
29 |
integrated_tool_winnings_by_tool_per_market,
|
30 |
)
|
@@ -44,7 +41,6 @@ from tabs.invalid_markets import (
|
|
44 |
from tabs.error import (
|
45 |
plot_week_error_data_by_market,
|
46 |
plot_error_data_by_market,
|
47 |
-
get_error_data_by_market,
|
48 |
get_error_data_overall_by_market,
|
49 |
plot_tool_error_data_by_market,
|
50 |
)
|
@@ -77,6 +73,12 @@ def get_all_data():
|
|
77 |
logger.info("Getting all data")
|
78 |
|
79 |
con = duckdb.connect(":memory:")
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
query5 = f"""
|
81 |
SELECT *
|
82 |
FROM read_parquet('./data/unknown_traders.parquet')
|
@@ -107,26 +109,30 @@ def get_all_data():
|
|
107 |
|
108 |
query1 = f"""
|
109 |
SELECT *
|
110 |
-
FROM read_parquet('./data/
|
111 |
"""
|
112 |
df1 = con.execute(query1).fetchdf()
|
113 |
-
logger.info("Got all data from
|
114 |
|
115 |
con.close()
|
116 |
|
117 |
-
return df1, df2, df3, df4, df5
|
118 |
|
119 |
|
120 |
def prepare_data():
|
121 |
"""
|
122 |
Prepare the data for the dashboard
|
123 |
"""
|
124 |
-
|
125 |
-
|
126 |
-
|
|
|
|
|
|
|
|
|
|
|
127 |
print(trades_df.info())
|
128 |
|
129 |
-
tools_df = prepare_tools(tools_df)
|
130 |
trades_df = prepare_trades(trades_df)
|
131 |
unknown_trades = prepare_trades(unknown_trades)
|
132 |
|
@@ -145,22 +151,33 @@ def prepare_data():
|
|
145 |
outliers.to_parquet("./data/outliers.parquet")
|
146 |
trades_df = trades_df.loc[trades_df["roi"] < 1000]
|
147 |
|
148 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
|
151 |
-
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
154 |
trades_df = trades_df.sort_values(by="creation_timestamp", ascending=True)
|
155 |
unknown_trades = unknown_trades.sort_values(by="creation_timestamp", ascending=True)
|
156 |
|
157 |
demo = gr.Blocks()
|
158 |
|
159 |
# preparing data for the errors
|
160 |
-
|
161 |
error_overall_by_markets = get_error_data_overall_by_market(error_df=error_by_markets)
|
162 |
|
163 |
-
winning_df = get_tool_winning_rate_by_market(tools_df, inc_tools=INC_TOOLS)
|
164 |
# preparing data for the trades graph
|
165 |
trades_count_df = get_overall_trades(trades_df=trades_df)
|
166 |
trades_by_market = get_overall_by_market_trades(trades_df=trades_df)
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
import duckdb
|
|
|
22 |
)
|
23 |
|
24 |
from tabs.tool_win import (
|
|
|
|
|
25 |
integrated_plot_tool_winnings_overall_per_market_by_week,
|
26 |
integrated_tool_winnings_by_tool_per_market,
|
27 |
)
|
|
|
41 |
from tabs.error import (
|
42 |
plot_week_error_data_by_market,
|
43 |
plot_error_data_by_market,
|
|
|
44 |
get_error_data_overall_by_market,
|
45 |
plot_tool_error_data_by_market,
|
46 |
)
|
|
|
73 |
logger.info("Getting all data")
|
74 |
|
75 |
con = duckdb.connect(":memory:")
|
76 |
+
query6 = f"""
|
77 |
+
SELECT *
|
78 |
+
FROM read_parquet('./data/winning_df.parquet')
|
79 |
+
"""
|
80 |
+
df6 = con.execute(query6).fetchdf()
|
81 |
+
|
82 |
query5 = f"""
|
83 |
SELECT *
|
84 |
FROM read_parquet('./data/unknown_traders.parquet')
|
|
|
109 |
|
110 |
query1 = f"""
|
111 |
SELECT *
|
112 |
+
FROM read_parquet('./data/error_by_markets.parquet')
|
113 |
"""
|
114 |
df1 = con.execute(query1).fetchdf()
|
115 |
+
logger.info("Got all data from error_by_markets.parquet")
|
116 |
|
117 |
con.close()
|
118 |
|
119 |
+
return df1, df2, df3, df4, df5, df6
|
120 |
|
121 |
|
122 |
def prepare_data():
|
123 |
"""
|
124 |
Prepare the data for the dashboard
|
125 |
"""
|
126 |
+
(
|
127 |
+
error_by_markets,
|
128 |
+
trades_df,
|
129 |
+
tools_accuracy_info,
|
130 |
+
invalid_trades,
|
131 |
+
unknown_trades,
|
132 |
+
winning_df,
|
133 |
+
) = get_all_data()
|
134 |
print(trades_df.info())
|
135 |
|
|
|
136 |
trades_df = prepare_trades(trades_df)
|
137 |
unknown_trades = prepare_trades(unknown_trades)
|
138 |
|
|
|
151 |
outliers.to_parquet("./data/outliers.parquet")
|
152 |
trades_df = trades_df.loc[trades_df["roi"] < 1000]
|
153 |
|
154 |
+
return (
|
155 |
+
error_by_markets,
|
156 |
+
trades_df,
|
157 |
+
tools_accuracy_info,
|
158 |
+
invalid_trades,
|
159 |
+
unknown_trades,
|
160 |
+
winning_df,
|
161 |
+
)
|
162 |
|
163 |
|
164 |
+
(
|
165 |
+
error_by_markets,
|
166 |
+
trades_df,
|
167 |
+
tools_accuracy_info,
|
168 |
+
invalid_trades,
|
169 |
+
unknown_trades,
|
170 |
+
winning_df,
|
171 |
+
) = prepare_data()
|
172 |
trades_df = trades_df.sort_values(by="creation_timestamp", ascending=True)
|
173 |
unknown_trades = unknown_trades.sort_values(by="creation_timestamp", ascending=True)
|
174 |
|
175 |
demo = gr.Blocks()
|
176 |
|
177 |
# preparing data for the errors
|
178 |
+
|
179 |
error_overall_by_markets = get_error_data_overall_by_market(error_df=error_by_markets)
|
180 |
|
|
|
181 |
# preparing data for the trades graph
|
182 |
trades_count_df = get_overall_trades(trades_df=trades_df)
|
183 |
trades_by_market = get_overall_by_market_trades(trades_df=trades_df)
|
data/all_trades_profitability.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2dc010db5a3f4163f3d09274101a14cd63a860e64c92649c694c816f28799342
|
3 |
+
size 6789999
|
data/{summary_profitability.parquet → error_by_markets.parquet}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbe47e7cb744db4522161c6c121ac9393937d53ca372a2210952f7a469f59489
|
3 |
+
size 12067
|
data/invalid_trades.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b7c4c831e583b8632a6a45079df9e400fea4e40287bbed594624ad9f9437907
|
3 |
+
size 196588
|
data/service_map.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90766
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93ac540e1bcd347a48b9978b87443ae64af0f8b0a4daff305c4ad99cd0959a73
|
3 |
size 90766
|
data/tools.parquet
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:0d5753d5858231903cf1bc20f47a54dae742f35da95ed15ddcb5f44a5be8338f
|
3 |
-
size 616260724
|
|
|
|
|
|
|
|
data/tools_accuracy.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:818026934d2218b01f130770ffcb7563c80de0900be6721a55cd2499f9731889
|
3 |
+
size 1100
|
data/unknown_traders.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0164ef5ecaf966a5dcc677d96bba860c344f43cf53e237b6687b797502bd5e36
|
3 |
+
size 184719
|
data/{t_map.pkl → winning_df.parquet}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe676fcd7dde4b833f770dafa8e474a96bbe17fb16b9ceb160c03c2519ba72b4
|
3 |
+
size 12980
|
notebooks/mech_calls_analysis.ipynb
CHANGED
@@ -59,7 +59,6 @@
|
|
59 |
" <th>trade_fee_amount</th>\n",
|
60 |
" <th>outcomes_tokens_traded</th>\n",
|
61 |
" <th>...</th>\n",
|
62 |
-
" <th>is_invalid</th>\n",
|
63 |
" <th>winning_trade</th>\n",
|
64 |
" <th>earnings</th>\n",
|
65 |
" <th>redeemed</th>\n",
|
@@ -69,6 +68,7 @@
|
|
69 |
" <th>net_earnings</th>\n",
|
70 |
" <th>roi</th>\n",
|
71 |
" <th>staking</th>\n",
|
|
|
72 |
" </tr>\n",
|
73 |
" </thead>\n",
|
74 |
" <tbody>\n",
|
@@ -76,125 +76,125 @@
|
|
76 |
" <th>0</th>\n",
|
77 |
" <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
|
78 |
" <td>quickstart</td>\n",
|
79 |
-
" <td>
|
80 |
-
" <td>2024-10-
|
81 |
-
" <td>Will
|
82 |
" <td>CLOSED</td>\n",
|
83 |
-
" <td>0.
|
84 |
-
" <td>
|
85 |
-
" <td>0.
|
86 |
-
" <td>
|
87 |
" <td>...</td>\n",
|
88 |
-
" <td>False</td>\n",
|
89 |
" <td>True</td>\n",
|
90 |
-
" <td>
|
91 |
" <td>True</td>\n",
|
92 |
-
" <td>
|
93 |
-
" <td>
|
94 |
-
" <td>0.
|
95 |
-
" <td>0.
|
96 |
-
" <td>0.
|
97 |
" <td>non_staking</td>\n",
|
|
|
98 |
" </tr>\n",
|
99 |
" <tr>\n",
|
100 |
" <th>1</th>\n",
|
101 |
" <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
|
102 |
" <td>quickstart</td>\n",
|
103 |
-
" <td>
|
104 |
-
" <td>2024-10-
|
105 |
-
" <td>Will
|
106 |
" <td>CLOSED</td>\n",
|
107 |
-
" <td>
|
108 |
-
" <td>
|
109 |
-
" <td>0.
|
110 |
-
" <td>
|
111 |
" <td>...</td>\n",
|
112 |
" <td>False</td>\n",
|
113 |
-
" <td>False</td>\n",
|
114 |
" <td>0.000000</td>\n",
|
115 |
" <td>False</td>\n",
|
116 |
" <td>0.000000</td>\n",
|
117 |
-
" <td>
|
118 |
-
" <td>0.
|
119 |
-
" <td>-
|
120 |
" <td>-1.000000</td>\n",
|
121 |
" <td>non_staking</td>\n",
|
|
|
122 |
" </tr>\n",
|
123 |
" <tr>\n",
|
124 |
" <th>2</th>\n",
|
125 |
" <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
|
126 |
" <td>quickstart</td>\n",
|
127 |
-
" <td>
|
128 |
-
" <td>2024-10-
|
129 |
-
" <td>Will
|
130 |
" <td>CLOSED</td>\n",
|
131 |
-
" <td>0.
|
132 |
" <td>1</td>\n",
|
133 |
-
" <td>0.
|
134 |
-
" <td>0.
|
135 |
" <td>...</td>\n",
|
136 |
-
" <td>False</td>\n",
|
137 |
" <td>True</td>\n",
|
138 |
-
" <td>0.
|
139 |
" <td>True</td>\n",
|
140 |
-
" <td>0.
|
141 |
-
" <td>
|
142 |
-
" <td>0.
|
143 |
-
" <td>0.
|
144 |
-
" <td>0.
|
145 |
" <td>non_staking</td>\n",
|
|
|
146 |
" </tr>\n",
|
147 |
" <tr>\n",
|
148 |
" <th>3</th>\n",
|
149 |
" <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
|
150 |
" <td>quickstart</td>\n",
|
151 |
-
" <td>
|
152 |
-
" <td>2024-10-
|
153 |
-
" <td>Will
|
154 |
" <td>CLOSED</td>\n",
|
155 |
-
" <td>0.
|
156 |
" <td>1</td>\n",
|
157 |
-
" <td>0.
|
158 |
-
" <td>0.
|
159 |
" <td>...</td>\n",
|
160 |
" <td>False</td>\n",
|
161 |
-
" <td>False</td>\n",
|
162 |
" <td>0.000000</td>\n",
|
163 |
" <td>False</td>\n",
|
164 |
" <td>0.000000</td>\n",
|
165 |
-
" <td>
|
166 |
-
" <td>0.
|
167 |
-
" <td>-0.
|
168 |
" <td>-1.000000</td>\n",
|
169 |
" <td>non_staking</td>\n",
|
|
|
170 |
" </tr>\n",
|
171 |
" <tr>\n",
|
172 |
" <th>4</th>\n",
|
173 |
" <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
|
174 |
" <td>quickstart</td>\n",
|
175 |
-
" <td>
|
176 |
-
" <td>2024-10-
|
177 |
-
" <td>Will
|
178 |
" <td>CLOSED</td>\n",
|
179 |
-
" <td>0.
|
180 |
-
" <td>
|
181 |
-
" <td>0.
|
182 |
-
" <td>0.
|
183 |
" <td>...</td>\n",
|
184 |
-
" <td>False</td>\n",
|
185 |
" <td>True</td>\n",
|
186 |
-
" <td>0.
|
187 |
" <td>True</td>\n",
|
188 |
-
" <td>0.
|
189 |
-
" <td>
|
190 |
-
" <td>0.
|
191 |
-
" <td>0.
|
192 |
-
" <td>0.
|
193 |
" <td>non_staking</td>\n",
|
|
|
194 |
" </tr>\n",
|
195 |
" </tbody>\n",
|
196 |
"</table>\n",
|
197 |
-
"<p>5 rows ×
|
198 |
"</div>"
|
199 |
],
|
200 |
"text/plain": [
|
@@ -206,48 +206,48 @@
|
|
206 |
"4 0x01274796ce41aa8e8312e05a427ffb4b0d2148f6 quickstart \n",
|
207 |
"\n",
|
208 |
" trade_id \\\n",
|
209 |
-
"0
|
210 |
-
"1
|
211 |
-
"2
|
212 |
-
"3
|
213 |
-
"4
|
214 |
"\n",
|
215 |
" creation_timestamp \\\n",
|
216 |
-
"0 2024-10-
|
217 |
-
"1 2024-10-
|
218 |
-
"2 2024-10-
|
219 |
-
"3 2024-10-
|
220 |
-
"4 2024-10-
|
221 |
"\n",
|
222 |
" title market_status \\\n",
|
223 |
-
"0 Will
|
224 |
-
"1 Will
|
225 |
-
"2 Will
|
226 |
-
"3 Will
|
227 |
-
"4 Will
|
228 |
"\n",
|
229 |
" collateral_amount outcome_index trade_fee_amount outcomes_tokens_traded \\\n",
|
230 |
-
"0 0.
|
231 |
-
"1
|
232 |
-
"2 0.
|
233 |
-
"3 0.
|
234 |
-
"4 0.
|
235 |
"\n",
|
236 |
-
" ...
|
237 |
-
"0 ...
|
238 |
-
"1 ...
|
239 |
-
"2 ...
|
240 |
-
"3 ...
|
241 |
-
"4 ...
|
242 |
"\n",
|
243 |
-
"
|
244 |
-
"0
|
245 |
-
"1
|
246 |
-
"2
|
247 |
-
"3
|
248 |
-
"4
|
249 |
"\n",
|
250 |
-
"[5 rows x
|
251 |
]
|
252 |
},
|
253 |
"execution_count": 3,
|
@@ -259,6 +259,34 @@
|
|
259 |
"all_trades.head()"
|
260 |
]
|
261 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
{
|
263 |
"cell_type": "code",
|
264 |
"execution_count": 4,
|
|
|
59 |
" <th>trade_fee_amount</th>\n",
|
60 |
" <th>outcomes_tokens_traded</th>\n",
|
61 |
" <th>...</th>\n",
|
|
|
62 |
" <th>winning_trade</th>\n",
|
63 |
" <th>earnings</th>\n",
|
64 |
" <th>redeemed</th>\n",
|
|
|
68 |
" <th>net_earnings</th>\n",
|
69 |
" <th>roi</th>\n",
|
70 |
" <th>staking</th>\n",
|
71 |
+
" <th>nr_mech_calls</th>\n",
|
72 |
" </tr>\n",
|
73 |
" </thead>\n",
|
74 |
" <tbody>\n",
|
|
|
76 |
" <th>0</th>\n",
|
77 |
" <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
|
78 |
" <td>quickstart</td>\n",
|
79 |
+
" <td>0x0dfb9821725003c4d3007999968d34d7070959ef0x01...</td>\n",
|
80 |
+
" <td>2024-10-27 21:51:25+00:00</td>\n",
|
81 |
+
" <td>Will any mainstream U.S. news outlet publish a...</td>\n",
|
82 |
" <td>CLOSED</td>\n",
|
83 |
+
" <td>0.461993</td>\n",
|
84 |
+
" <td>1</td>\n",
|
85 |
+
" <td>0.004620</td>\n",
|
86 |
+
" <td>0.734537</td>\n",
|
87 |
" <td>...</td>\n",
|
|
|
88 |
" <td>True</td>\n",
|
89 |
+
" <td>0.734537</td>\n",
|
90 |
" <td>True</td>\n",
|
91 |
+
" <td>0.734537</td>\n",
|
92 |
+
" <td>2.0</td>\n",
|
93 |
+
" <td>0.02</td>\n",
|
94 |
+
" <td>0.247924</td>\n",
|
95 |
+
" <td>0.509488</td>\n",
|
96 |
" <td>non_staking</td>\n",
|
97 |
+
" <td>NaN</td>\n",
|
98 |
" </tr>\n",
|
99 |
" <tr>\n",
|
100 |
" <th>1</th>\n",
|
101 |
" <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
|
102 |
" <td>quickstart</td>\n",
|
103 |
+
" <td>0x1082be4e429e512182089162f41b3a86a52eee370x01...</td>\n",
|
104 |
+
" <td>2024-10-31 22:50:15+00:00</td>\n",
|
105 |
+
" <td>Will Prime Minister Shigeru Ishiba announce a ...</td>\n",
|
106 |
" <td>CLOSED</td>\n",
|
107 |
+
" <td>0.859939</td>\n",
|
108 |
+
" <td>0</td>\n",
|
109 |
+
" <td>0.008599</td>\n",
|
110 |
+
" <td>2.714890</td>\n",
|
111 |
" <td>...</td>\n",
|
112 |
" <td>False</td>\n",
|
|
|
113 |
" <td>0.000000</td>\n",
|
114 |
" <td>False</td>\n",
|
115 |
" <td>0.000000</td>\n",
|
116 |
+
" <td>8.0</td>\n",
|
117 |
+
" <td>0.08</td>\n",
|
118 |
+
" <td>-0.948538</td>\n",
|
119 |
" <td>-1.000000</td>\n",
|
120 |
" <td>non_staking</td>\n",
|
121 |
+
" <td>NaN</td>\n",
|
122 |
" </tr>\n",
|
123 |
" <tr>\n",
|
124 |
" <th>2</th>\n",
|
125 |
" <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
|
126 |
" <td>quickstart</td>\n",
|
127 |
+
" <td>0x150f4d4e5affa7fe332684d7c828c0a471c4d5de0x01...</td>\n",
|
128 |
+
" <td>2024-10-29 02:21:25+00:00</td>\n",
|
129 |
+
" <td>Will the Constitutional Democratic Party of Ja...</td>\n",
|
130 |
" <td>CLOSED</td>\n",
|
131 |
+
" <td>0.203751</td>\n",
|
132 |
" <td>1</td>\n",
|
133 |
+
" <td>0.002038</td>\n",
|
134 |
+
" <td>0.305174</td>\n",
|
135 |
" <td>...</td>\n",
|
|
|
136 |
" <td>True</td>\n",
|
137 |
+
" <td>0.305174</td>\n",
|
138 |
" <td>True</td>\n",
|
139 |
+
" <td>0.305174</td>\n",
|
140 |
+
" <td>2.0</td>\n",
|
141 |
+
" <td>0.02</td>\n",
|
142 |
+
" <td>0.079385</td>\n",
|
143 |
+
" <td>0.351592</td>\n",
|
144 |
" <td>non_staking</td>\n",
|
145 |
+
" <td>NaN</td>\n",
|
146 |
" </tr>\n",
|
147 |
" <tr>\n",
|
148 |
" <th>3</th>\n",
|
149 |
" <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
|
150 |
" <td>quickstart</td>\n",
|
151 |
+
" <td>0x15edf592dc3eb67e1c163ceb6d23039710cd67fb0x01...</td>\n",
|
152 |
+
" <td>2024-10-28 21:59:25+00:00</td>\n",
|
153 |
+
" <td>Will there be a public statement from the Bide...</td>\n",
|
154 |
" <td>CLOSED</td>\n",
|
155 |
+
" <td>0.412054</td>\n",
|
156 |
" <td>1</td>\n",
|
157 |
+
" <td>0.004121</td>\n",
|
158 |
+
" <td>0.666936</td>\n",
|
159 |
" <td>...</td>\n",
|
160 |
" <td>False</td>\n",
|
|
|
161 |
" <td>0.000000</td>\n",
|
162 |
" <td>False</td>\n",
|
163 |
" <td>0.000000</td>\n",
|
164 |
+
" <td>2.0</td>\n",
|
165 |
+
" <td>0.02</td>\n",
|
166 |
+
" <td>-0.436175</td>\n",
|
167 |
" <td>-1.000000</td>\n",
|
168 |
" <td>non_staking</td>\n",
|
169 |
+
" <td>NaN</td>\n",
|
170 |
" </tr>\n",
|
171 |
" <tr>\n",
|
172 |
" <th>4</th>\n",
|
173 |
" <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
|
174 |
" <td>quickstart</td>\n",
|
175 |
+
" <td>0x187c822a330c393912398884faf8150d21b4a7840x01...</td>\n",
|
176 |
+
" <td>2024-10-30 00:30:45+00:00</td>\n",
|
177 |
+
" <td>Will the Bank of Japan issue a public statemen...</td>\n",
|
178 |
" <td>CLOSED</td>\n",
|
179 |
+
" <td>0.333192</td>\n",
|
180 |
+
" <td>0</td>\n",
|
181 |
+
" <td>0.003332</td>\n",
|
182 |
+
" <td>0.447445</td>\n",
|
183 |
" <td>...</td>\n",
|
|
|
184 |
" <td>True</td>\n",
|
185 |
+
" <td>0.447445</td>\n",
|
186 |
" <td>True</td>\n",
|
187 |
+
" <td>0.447445</td>\n",
|
188 |
+
" <td>8.0</td>\n",
|
189 |
+
" <td>0.08</td>\n",
|
190 |
+
" <td>0.030922</td>\n",
|
191 |
+
" <td>0.074237</td>\n",
|
192 |
" <td>non_staking</td>\n",
|
193 |
+
" <td>NaN</td>\n",
|
194 |
" </tr>\n",
|
195 |
" </tbody>\n",
|
196 |
"</table>\n",
|
197 |
+
"<p>5 rows × 22 columns</p>\n",
|
198 |
"</div>"
|
199 |
],
|
200 |
"text/plain": [
|
|
|
206 |
"4 0x01274796ce41aa8e8312e05a427ffb4b0d2148f6 quickstart \n",
|
207 |
"\n",
|
208 |
" trade_id \\\n",
|
209 |
+
"0 0x0dfb9821725003c4d3007999968d34d7070959ef0x01... \n",
|
210 |
+
"1 0x1082be4e429e512182089162f41b3a86a52eee370x01... \n",
|
211 |
+
"2 0x150f4d4e5affa7fe332684d7c828c0a471c4d5de0x01... \n",
|
212 |
+
"3 0x15edf592dc3eb67e1c163ceb6d23039710cd67fb0x01... \n",
|
213 |
+
"4 0x187c822a330c393912398884faf8150d21b4a7840x01... \n",
|
214 |
"\n",
|
215 |
" creation_timestamp \\\n",
|
216 |
+
"0 2024-10-27 21:51:25+00:00 \n",
|
217 |
+
"1 2024-10-31 22:50:15+00:00 \n",
|
218 |
+
"2 2024-10-29 02:21:25+00:00 \n",
|
219 |
+
"3 2024-10-28 21:59:25+00:00 \n",
|
220 |
+
"4 2024-10-30 00:30:45+00:00 \n",
|
221 |
"\n",
|
222 |
" title market_status \\\n",
|
223 |
+
"0 Will any mainstream U.S. news outlet publish a... CLOSED \n",
|
224 |
+
"1 Will Prime Minister Shigeru Ishiba announce a ... CLOSED \n",
|
225 |
+
"2 Will the Constitutional Democratic Party of Ja... CLOSED \n",
|
226 |
+
"3 Will there be a public statement from the Bide... CLOSED \n",
|
227 |
+
"4 Will the Bank of Japan issue a public statemen... CLOSED \n",
|
228 |
"\n",
|
229 |
" collateral_amount outcome_index trade_fee_amount outcomes_tokens_traded \\\n",
|
230 |
+
"0 0.461993 1 0.004620 0.734537 \n",
|
231 |
+
"1 0.859939 0 0.008599 2.714890 \n",
|
232 |
+
"2 0.203751 1 0.002038 0.305174 \n",
|
233 |
+
"3 0.412054 1 0.004121 0.666936 \n",
|
234 |
+
"4 0.333192 0 0.003332 0.447445 \n",
|
235 |
"\n",
|
236 |
+
" ... winning_trade earnings redeemed redeemed_amount num_mech_calls \\\n",
|
237 |
+
"0 ... True 0.734537 True 0.734537 2.0 \n",
|
238 |
+
"1 ... False 0.000000 False 0.000000 8.0 \n",
|
239 |
+
"2 ... True 0.305174 True 0.305174 2.0 \n",
|
240 |
+
"3 ... False 0.000000 False 0.000000 2.0 \n",
|
241 |
+
"4 ... True 0.447445 True 0.447445 8.0 \n",
|
242 |
"\n",
|
243 |
+
" mech_fee_amount net_earnings roi staking nr_mech_calls \n",
|
244 |
+
"0 0.02 0.247924 0.509488 non_staking NaN \n",
|
245 |
+
"1 0.08 -0.948538 -1.000000 non_staking NaN \n",
|
246 |
+
"2 0.02 0.079385 0.351592 non_staking NaN \n",
|
247 |
+
"3 0.02 -0.436175 -1.000000 non_staking NaN \n",
|
248 |
+
"4 0.08 0.030922 0.074237 non_staking NaN \n",
|
249 |
"\n",
|
250 |
+
"[5 rows x 22 columns]"
|
251 |
]
|
252 |
},
|
253 |
"execution_count": 3,
|
|
|
259 |
"all_trades.head()"
|
260 |
]
|
261 |
},
|
262 |
+
{
|
263 |
+
"cell_type": "code",
|
264 |
+
"execution_count": 4,
|
265 |
+
"metadata": {},
|
266 |
+
"outputs": [
|
267 |
+
{
|
268 |
+
"data": {
|
269 |
+
"text/plain": [
|
270 |
+
"count 43987.000000\n",
|
271 |
+
"mean 6.663537\n",
|
272 |
+
"std 13.608287\n",
|
273 |
+
"min 0.000000\n",
|
274 |
+
"25% 2.000000\n",
|
275 |
+
"50% 5.000000\n",
|
276 |
+
"75% 8.000000\n",
|
277 |
+
"max 650.000000\n",
|
278 |
+
"Name: num_mech_calls, dtype: float64"
|
279 |
+
]
|
280 |
+
},
|
281 |
+
"execution_count": 4,
|
282 |
+
"metadata": {},
|
283 |
+
"output_type": "execute_result"
|
284 |
+
}
|
285 |
+
],
|
286 |
+
"source": [
|
287 |
+
"all_trades.num_mech_calls.describe()"
|
288 |
+
]
|
289 |
+
},
|
290 |
{
|
291 |
"cell_type": "code",
|
292 |
"execution_count": 4,
|
notebooks/tool_errors_analysis.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
@@ -15,16 +15,234 @@
|
|
15 |
},
|
16 |
{
|
17 |
"cell_type": "code",
|
18 |
-
"execution_count":
|
19 |
"metadata": {},
|
20 |
"outputs": [],
|
21 |
"source": [
|
22 |
-
"
|
23 |
]
|
24 |
},
|
25 |
{
|
26 |
"cell_type": "code",
|
27 |
-
"execution_count":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
"metadata": {},
|
29 |
"outputs": [
|
30 |
{
|
@@ -32,42 +250,713 @@
|
|
32 |
"output_type": "stream",
|
33 |
"text": [
|
34 |
"<class 'pandas.core.frame.DataFrame'>\n",
|
35 |
-
"RangeIndex:
|
36 |
-
"Data columns (total
|
37 |
-
" # Column
|
38 |
-
"--- ------
|
39 |
-
" 0 request_id
|
40 |
-
" 1 request_block
|
41 |
-
" 2
|
42 |
-
" 3
|
43 |
-
" 4
|
44 |
-
" 5
|
45 |
-
" 6
|
46 |
-
" 7
|
47 |
-
" 8
|
48 |
-
" 9
|
49 |
-
" 10
|
50 |
-
" 11
|
51 |
-
" 12
|
52 |
-
" 13
|
53 |
-
" 14
|
54 |
-
" 15
|
55 |
-
" 16
|
56 |
-
" 17
|
57 |
-
" 18
|
58 |
-
" 19
|
59 |
-
"
|
60 |
-
"
|
61 |
-
" 22 request_month_year_week 286042 non-null object \n",
|
62 |
-
"dtypes: float64(5), int64(1), object(17)\n",
|
63 |
-
"memory usage: 50.2+ MB\n"
|
64 |
]
|
65 |
}
|
66 |
],
|
67 |
"source": [
|
68 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
]
|
70 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
{
|
72 |
"cell_type": "code",
|
73 |
"execution_count": 4,
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 47,
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
|
|
15 |
},
|
16 |
{
|
17 |
"cell_type": "code",
|
18 |
+
"execution_count": 48,
|
19 |
"metadata": {},
|
20 |
"outputs": [],
|
21 |
"source": [
|
22 |
+
"error_by_markets = pd.read_parquet('../data/error_by_markets.parquet')"
|
23 |
]
|
24 |
},
|
25 |
{
|
26 |
"cell_type": "code",
|
27 |
+
"execution_count": 49,
|
28 |
+
"metadata": {},
|
29 |
+
"outputs": [
|
30 |
+
{
|
31 |
+
"data": {
|
32 |
+
"text/plain": [
|
33 |
+
"Index(['tool', 'request_month_year_week', 'market_creator', '0', '1',\n",
|
34 |
+
" 'error_perc', 'total_requests'],\n",
|
35 |
+
" dtype='object')"
|
36 |
+
]
|
37 |
+
},
|
38 |
+
"execution_count": 49,
|
39 |
+
"metadata": {},
|
40 |
+
"output_type": "execute_result"
|
41 |
+
}
|
42 |
+
],
|
43 |
+
"source": [
|
44 |
+
"error_by_markets.columns"
|
45 |
+
]
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"cell_type": "code",
|
49 |
+
"execution_count": 51,
|
50 |
+
"metadata": {},
|
51 |
+
"outputs": [
|
52 |
+
{
|
53 |
+
"data": {
|
54 |
+
"text/html": [
|
55 |
+
"<div>\n",
|
56 |
+
"<style scoped>\n",
|
57 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
58 |
+
" vertical-align: middle;\n",
|
59 |
+
" }\n",
|
60 |
+
"\n",
|
61 |
+
" .dataframe tbody tr th {\n",
|
62 |
+
" vertical-align: top;\n",
|
63 |
+
" }\n",
|
64 |
+
"\n",
|
65 |
+
" .dataframe thead th {\n",
|
66 |
+
" text-align: right;\n",
|
67 |
+
" }\n",
|
68 |
+
"</style>\n",
|
69 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
70 |
+
" <thead>\n",
|
71 |
+
" <tr style=\"text-align: right;\">\n",
|
72 |
+
" <th></th>\n",
|
73 |
+
" <th>tool</th>\n",
|
74 |
+
" <th>request_month_year_week</th>\n",
|
75 |
+
" <th>market_creator</th>\n",
|
76 |
+
" <th>0</th>\n",
|
77 |
+
" <th>1</th>\n",
|
78 |
+
" <th>error_perc</th>\n",
|
79 |
+
" <th>total_requests</th>\n",
|
80 |
+
" </tr>\n",
|
81 |
+
" </thead>\n",
|
82 |
+
" <tbody>\n",
|
83 |
+
" <tr>\n",
|
84 |
+
" <th>355</th>\n",
|
85 |
+
" <td>superforcaster</td>\n",
|
86 |
+
" <td>Dec-22</td>\n",
|
87 |
+
" <td>all</td>\n",
|
88 |
+
" <td>1087.0</td>\n",
|
89 |
+
" <td>61.0</td>\n",
|
90 |
+
" <td>5.313589</td>\n",
|
91 |
+
" <td>1148.0</td>\n",
|
92 |
+
" </tr>\n",
|
93 |
+
" <tr>\n",
|
94 |
+
" <th>356</th>\n",
|
95 |
+
" <td>superforcaster</td>\n",
|
96 |
+
" <td>Dec-22</td>\n",
|
97 |
+
" <td>pearl</td>\n",
|
98 |
+
" <td>75.0</td>\n",
|
99 |
+
" <td>10.0</td>\n",
|
100 |
+
" <td>11.764706</td>\n",
|
101 |
+
" <td>85.0</td>\n",
|
102 |
+
" </tr>\n",
|
103 |
+
" <tr>\n",
|
104 |
+
" <th>357</th>\n",
|
105 |
+
" <td>superforcaster</td>\n",
|
106 |
+
" <td>Dec-29</td>\n",
|
107 |
+
" <td>quickstart</td>\n",
|
108 |
+
" <td>678.0</td>\n",
|
109 |
+
" <td>59.0</td>\n",
|
110 |
+
" <td>8.005427</td>\n",
|
111 |
+
" <td>737.0</td>\n",
|
112 |
+
" </tr>\n",
|
113 |
+
" <tr>\n",
|
114 |
+
" <th>358</th>\n",
|
115 |
+
" <td>superforcaster</td>\n",
|
116 |
+
" <td>Dec-29</td>\n",
|
117 |
+
" <td>all</td>\n",
|
118 |
+
" <td>705.0</td>\n",
|
119 |
+
" <td>60.0</td>\n",
|
120 |
+
" <td>7.843137</td>\n",
|
121 |
+
" <td>765.0</td>\n",
|
122 |
+
" </tr>\n",
|
123 |
+
" <tr>\n",
|
124 |
+
" <th>359</th>\n",
|
125 |
+
" <td>superforcaster</td>\n",
|
126 |
+
" <td>Dec-29</td>\n",
|
127 |
+
" <td>pearl</td>\n",
|
128 |
+
" <td>27.0</td>\n",
|
129 |
+
" <td>1.0</td>\n",
|
130 |
+
" <td>3.571429</td>\n",
|
131 |
+
" <td>28.0</td>\n",
|
132 |
+
" </tr>\n",
|
133 |
+
" </tbody>\n",
|
134 |
+
"</table>\n",
|
135 |
+
"</div>"
|
136 |
+
],
|
137 |
+
"text/plain": [
|
138 |
+
" tool request_month_year_week market_creator 0 1 \\\n",
|
139 |
+
"355 superforcaster Dec-22 all 1087.0 61.0 \n",
|
140 |
+
"356 superforcaster Dec-22 pearl 75.0 10.0 \n",
|
141 |
+
"357 superforcaster Dec-29 quickstart 678.0 59.0 \n",
|
142 |
+
"358 superforcaster Dec-29 all 705.0 60.0 \n",
|
143 |
+
"359 superforcaster Dec-29 pearl 27.0 1.0 \n",
|
144 |
+
"\n",
|
145 |
+
" error_perc total_requests \n",
|
146 |
+
"355 5.313589 1148.0 \n",
|
147 |
+
"356 11.764706 85.0 \n",
|
148 |
+
"357 8.005427 737.0 \n",
|
149 |
+
"358 7.843137 765.0 \n",
|
150 |
+
"359 3.571429 28.0 "
|
151 |
+
]
|
152 |
+
},
|
153 |
+
"execution_count": 51,
|
154 |
+
"metadata": {},
|
155 |
+
"output_type": "execute_result"
|
156 |
+
}
|
157 |
+
],
|
158 |
+
"source": [
|
159 |
+
"error_by_markets.tail()"
|
160 |
+
]
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"cell_type": "code",
|
164 |
+
"execution_count": 46,
|
165 |
+
"metadata": {},
|
166 |
+
"outputs": [],
|
167 |
+
"source": [
|
168 |
+
"error_total = (\n",
|
169 |
+
" error_by_markets.groupby([\"request_month_year_week\", \"market_creator\"], sort=False)\n",
|
170 |
+
" .agg({\"total_requests\": \"sum\", '1': \"sum\", '0': \"sum\"})\n",
|
171 |
+
" .reset_index()\n",
|
172 |
+
")\n"
|
173 |
+
]
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"cell_type": "code",
|
177 |
+
"execution_count": null,
|
178 |
+
"metadata": {},
|
179 |
+
"outputs": [],
|
180 |
+
"source": []
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"cell_type": "code",
|
184 |
+
"execution_count": null,
|
185 |
+
"metadata": {},
|
186 |
+
"outputs": [],
|
187 |
+
"source": []
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"cell_type": "code",
|
191 |
+
"execution_count": 32,
|
192 |
+
"metadata": {},
|
193 |
+
"outputs": [],
|
194 |
+
"source": [
|
195 |
+
"new_tools = pd.read_parquet('../data/new_tools.parquet')\n"
|
196 |
+
]
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"cell_type": "code",
|
200 |
+
"execution_count": 33,
|
201 |
+
"metadata": {},
|
202 |
+
"outputs": [
|
203 |
+
{
|
204 |
+
"data": {
|
205 |
+
"text/plain": [
|
206 |
+
"155789"
|
207 |
+
]
|
208 |
+
},
|
209 |
+
"execution_count": 33,
|
210 |
+
"metadata": {},
|
211 |
+
"output_type": "execute_result"
|
212 |
+
}
|
213 |
+
],
|
214 |
+
"source": [
|
215 |
+
"len(new_tools)"
|
216 |
+
]
|
217 |
+
},
|
218 |
+
{
|
219 |
+
"cell_type": "code",
|
220 |
+
"execution_count": 34,
|
221 |
+
"metadata": {},
|
222 |
+
"outputs": [
|
223 |
+
{
|
224 |
+
"data": {
|
225 |
+
"text/plain": [
|
226 |
+
"Index(['request_id', 'request_block', 'request_time', 'tx_hash',\n",
|
227 |
+
" 'prompt_request', 'tool', 'nonce', 'trader_address', 'deliver_block',\n",
|
228 |
+
" 'error', 'error_message', 'prompt_response', 'mech_address', 'p_yes',\n",
|
229 |
+
" 'p_no', 'confidence', 'info_utility', 'vote', 'win_probability',\n",
|
230 |
+
" 'market_creator'],\n",
|
231 |
+
" dtype='object')"
|
232 |
+
]
|
233 |
+
},
|
234 |
+
"execution_count": 34,
|
235 |
+
"metadata": {},
|
236 |
+
"output_type": "execute_result"
|
237 |
+
}
|
238 |
+
],
|
239 |
+
"source": [
|
240 |
+
"new_tools.columns"
|
241 |
+
]
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"cell_type": "code",
|
245 |
+
"execution_count": 35,
|
246 |
"metadata": {},
|
247 |
"outputs": [
|
248 |
{
|
|
|
250 |
"output_type": "stream",
|
251 |
"text": [
|
252 |
"<class 'pandas.core.frame.DataFrame'>\n",
|
253 |
+
"RangeIndex: 155789 entries, 0 to 155788\n",
|
254 |
+
"Data columns (total 20 columns):\n",
|
255 |
+
" # Column Non-Null Count Dtype \n",
|
256 |
+
"--- ------ -------------- ----- \n",
|
257 |
+
" 0 request_id 155789 non-null object \n",
|
258 |
+
" 1 request_block 155789 non-null object \n",
|
259 |
+
" 2 request_time 155789 non-null datetime64[ns, UTC]\n",
|
260 |
+
" 3 tx_hash 155789 non-null object \n",
|
261 |
+
" 4 prompt_request 155789 non-null object \n",
|
262 |
+
" 5 tool 155789 non-null object \n",
|
263 |
+
" 6 nonce 155789 non-null object \n",
|
264 |
+
" 7 trader_address 155789 non-null object \n",
|
265 |
+
" 8 deliver_block 155789 non-null object \n",
|
266 |
+
" 9 error 155789 non-null int64 \n",
|
267 |
+
" 10 error_message 61690 non-null object \n",
|
268 |
+
" 11 prompt_response 131002 non-null object \n",
|
269 |
+
" 12 mech_address 131002 non-null object \n",
|
270 |
+
" 13 p_yes 94099 non-null float64 \n",
|
271 |
+
" 14 p_no 94099 non-null float64 \n",
|
272 |
+
" 15 confidence 94099 non-null float64 \n",
|
273 |
+
" 16 info_utility 94099 non-null float64 \n",
|
274 |
+
" 17 vote 66870 non-null object \n",
|
275 |
+
" 18 win_probability 94099 non-null float64 \n",
|
276 |
+
" 19 market_creator 155789 non-null object \n",
|
277 |
+
"dtypes: datetime64[ns, UTC](1), float64(5), int64(1), object(13)\n",
|
278 |
+
"memory usage: 23.8+ MB\n"
|
|
|
|
|
|
|
279 |
]
|
280 |
}
|
281 |
],
|
282 |
"source": [
|
283 |
+
"new_tools.info()"
|
284 |
+
]
|
285 |
+
},
|
286 |
+
{
|
287 |
+
"cell_type": "code",
|
288 |
+
"execution_count": 36,
|
289 |
+
"metadata": {},
|
290 |
+
"outputs": [
|
291 |
+
{
|
292 |
+
"data": {
|
293 |
+
"text/plain": [
|
294 |
+
"request_id 5585662306487809905791879720381993204173062765...\n",
|
295 |
+
"request_block 37672194\n",
|
296 |
+
"request_time 2024-12-23 12:37:05+00:00\n",
|
297 |
+
"tx_hash 0x069f675e2cdfd328e9056901e5e79dcfa8cd981c95e3...\n",
|
298 |
+
"prompt_request Please take over the role of a Data Scientist ...\n",
|
299 |
+
"tool prediction-offline\n",
|
300 |
+
"nonce a711182a-8641-428b-8908-ae773516f846\n",
|
301 |
+
"trader_address 0x5d621d8bfcb57a70f4fde6e5484a54fa8127a858\n",
|
302 |
+
"deliver_block 37672203\n",
|
303 |
+
"error 0\n",
|
304 |
+
"error_message None\n",
|
305 |
+
"prompt_response \\nYou are an LLM inside a multi-agent system t...\n",
|
306 |
+
"mech_address 0x5e1d1eb61e1164d5a50b28c575da73a29595dff7\n",
|
307 |
+
"p_yes 0.3\n",
|
308 |
+
"p_no 0.7\n",
|
309 |
+
"confidence 0.4\n",
|
310 |
+
"info_utility 0.0\n",
|
311 |
+
"vote No\n",
|
312 |
+
"win_probability 0.7\n",
|
313 |
+
"market_creator quickstart\n",
|
314 |
+
"Name: 0, dtype: object"
|
315 |
+
]
|
316 |
+
},
|
317 |
+
"execution_count": 36,
|
318 |
+
"metadata": {},
|
319 |
+
"output_type": "execute_result"
|
320 |
+
}
|
321 |
+
],
|
322 |
+
"source": [
|
323 |
+
"new_tools.iloc[0]"
|
324 |
+
]
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"cell_type": "code",
|
328 |
+
"execution_count": 37,
|
329 |
+
"metadata": {},
|
330 |
+
"outputs": [
|
331 |
+
{
|
332 |
+
"data": {
|
333 |
+
"text/plain": [
|
334 |
+
"Timestamp('2024-12-23 12:37:05+0000', tz='UTC')"
|
335 |
+
]
|
336 |
+
},
|
337 |
+
"execution_count": 37,
|
338 |
+
"metadata": {},
|
339 |
+
"output_type": "execute_result"
|
340 |
+
}
|
341 |
+
],
|
342 |
+
"source": [
|
343 |
+
"new_tools.iloc[0].request_time"
|
344 |
+
]
|
345 |
+
},
|
346 |
+
{
|
347 |
+
"cell_type": "code",
|
348 |
+
"execution_count": 22,
|
349 |
+
"metadata": {},
|
350 |
+
"outputs": [],
|
351 |
+
"source": [
|
352 |
+
"tools = pd.read_parquet('../tmp/tools.parquet')"
|
353 |
+
]
|
354 |
+
},
|
355 |
+
{
|
356 |
+
"cell_type": "code",
|
357 |
+
"execution_count": 30,
|
358 |
+
"metadata": {},
|
359 |
+
"outputs": [
|
360 |
+
{
|
361 |
+
"data": {
|
362 |
+
"text/plain": [
|
363 |
+
"Timestamp('2024-10-26 13:03:55+0000', tz='UTC')"
|
364 |
+
]
|
365 |
+
},
|
366 |
+
"execution_count": 30,
|
367 |
+
"metadata": {},
|
368 |
+
"output_type": "execute_result"
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"source": [
|
372 |
+
"tools.iloc[0].request_time"
|
373 |
+
]
|
374 |
+
},
|
375 |
+
{
|
376 |
+
"cell_type": "code",
|
377 |
+
"execution_count": 23,
|
378 |
+
"metadata": {},
|
379 |
+
"outputs": [
|
380 |
+
{
|
381 |
+
"data": {
|
382 |
+
"text/plain": [
|
383 |
+
"626382"
|
384 |
+
]
|
385 |
+
},
|
386 |
+
"execution_count": 23,
|
387 |
+
"metadata": {},
|
388 |
+
"output_type": "execute_result"
|
389 |
+
}
|
390 |
+
],
|
391 |
+
"source": [
|
392 |
+
"len(tools)"
|
393 |
]
|
394 |
},
|
395 |
+
{
|
396 |
+
"cell_type": "code",
|
397 |
+
"execution_count": 24,
|
398 |
+
"metadata": {},
|
399 |
+
"outputs": [
|
400 |
+
{
|
401 |
+
"data": {
|
402 |
+
"text/html": [
|
403 |
+
"<div>\n",
|
404 |
+
"<style scoped>\n",
|
405 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
406 |
+
" vertical-align: middle;\n",
|
407 |
+
" }\n",
|
408 |
+
"\n",
|
409 |
+
" .dataframe tbody tr th {\n",
|
410 |
+
" vertical-align: top;\n",
|
411 |
+
" }\n",
|
412 |
+
"\n",
|
413 |
+
" .dataframe thead th {\n",
|
414 |
+
" text-align: right;\n",
|
415 |
+
" }\n",
|
416 |
+
"</style>\n",
|
417 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
418 |
+
" <thead>\n",
|
419 |
+
" <tr style=\"text-align: right;\">\n",
|
420 |
+
" <th></th>\n",
|
421 |
+
" <th>request_id</th>\n",
|
422 |
+
" <th>request_block</th>\n",
|
423 |
+
" <th>prompt_request</th>\n",
|
424 |
+
" <th>tool</th>\n",
|
425 |
+
" <th>nonce</th>\n",
|
426 |
+
" <th>trader_address</th>\n",
|
427 |
+
" <th>deliver_block</th>\n",
|
428 |
+
" <th>error</th>\n",
|
429 |
+
" <th>error_message</th>\n",
|
430 |
+
" <th>prompt_response</th>\n",
|
431 |
+
" <th>...</th>\n",
|
432 |
+
" <th>confidence</th>\n",
|
433 |
+
" <th>info_utility</th>\n",
|
434 |
+
" <th>vote</th>\n",
|
435 |
+
" <th>win_probability</th>\n",
|
436 |
+
" <th>market_creator</th>\n",
|
437 |
+
" <th>title</th>\n",
|
438 |
+
" <th>currentAnswer</th>\n",
|
439 |
+
" <th>request_time</th>\n",
|
440 |
+
" <th>request_month_year</th>\n",
|
441 |
+
" <th>request_month_year_week</th>\n",
|
442 |
+
" </tr>\n",
|
443 |
+
" </thead>\n",
|
444 |
+
" <tbody>\n",
|
445 |
+
" <tr>\n",
|
446 |
+
" <th>0</th>\n",
|
447 |
+
" <td>1065794400559258224527965821794720648636282516...</td>\n",
|
448 |
+
" <td>36701352</td>\n",
|
449 |
+
" <td>Please take over the role of a Data Scientist ...</td>\n",
|
450 |
+
" <td>claude-prediction-offline</td>\n",
|
451 |
+
" <td>fd5cfa7f-fb38-435b-b571-69f253397a69</td>\n",
|
452 |
+
" <td>0x42cc3f5a30420e8964be3c18d0e560b10e8957fa</td>\n",
|
453 |
+
" <td>36701366</td>\n",
|
454 |
+
" <td>0</td>\n",
|
455 |
+
" <td>None</td>\n",
|
456 |
+
" <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
|
457 |
+
" <td>...</td>\n",
|
458 |
+
" <td>0.6</td>\n",
|
459 |
+
" <td>0.2</td>\n",
|
460 |
+
" <td>No</td>\n",
|
461 |
+
" <td>0.7</td>\n",
|
462 |
+
" <td>quickstart</td>\n",
|
463 |
+
" <td>Will the Caspian Sea countries announce a join...</td>\n",
|
464 |
+
" <td>No</td>\n",
|
465 |
+
" <td>2024-10-26 13:03:55+00:00</td>\n",
|
466 |
+
" <td>2024-10</td>\n",
|
467 |
+
" <td>2024-10-21/2024-10-27</td>\n",
|
468 |
+
" </tr>\n",
|
469 |
+
" <tr>\n",
|
470 |
+
" <th>1</th>\n",
|
471 |
+
" <td>9015207976398091689774872599965107755141252481...</td>\n",
|
472 |
+
" <td>36749933</td>\n",
|
473 |
+
" <td>Please take over the role of a Data Scientist ...</td>\n",
|
474 |
+
" <td>claude-prediction-offline</td>\n",
|
475 |
+
" <td>a61c894b-fa0d-40d9-ab02-e512ef9dd7e6</td>\n",
|
476 |
+
" <td>0xc84d1f9fc28ce5628f69cc138ba3092a036b8f69</td>\n",
|
477 |
+
" <td>36749986</td>\n",
|
478 |
+
" <td>0</td>\n",
|
479 |
+
" <td>None</td>\n",
|
480 |
+
" <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
|
481 |
+
" <td>...</td>\n",
|
482 |
+
" <td>0.6</td>\n",
|
483 |
+
" <td>0.2</td>\n",
|
484 |
+
" <td>No</td>\n",
|
485 |
+
" <td>0.9</td>\n",
|
486 |
+
" <td>quickstart</td>\n",
|
487 |
+
" <td>Will Iran launch another ballistic missile att...</td>\n",
|
488 |
+
" <td>Yes</td>\n",
|
489 |
+
" <td>2024-10-29 10:50:55+00:00</td>\n",
|
490 |
+
" <td>2024-10</td>\n",
|
491 |
+
" <td>2024-10-28/2024-11-03</td>\n",
|
492 |
+
" </tr>\n",
|
493 |
+
" <tr>\n",
|
494 |
+
" <th>2</th>\n",
|
495 |
+
" <td>8204183583678326945661870665906374091080896102...</td>\n",
|
496 |
+
" <td>36756777</td>\n",
|
497 |
+
" <td>Please take over the role of a Data Scientist ...</td>\n",
|
498 |
+
" <td>claude-prediction-offline</td>\n",
|
499 |
+
" <td>e64fa0a7-a98b-4c95-ac1a-755d212a5b78</td>\n",
|
500 |
+
" <td>0x992448484862672eb95ca1c877cc43f935c389ce</td>\n",
|
501 |
+
" <td>36756791</td>\n",
|
502 |
+
" <td>0</td>\n",
|
503 |
+
" <td>None</td>\n",
|
504 |
+
" <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
|
505 |
+
" <td>...</td>\n",
|
506 |
+
" <td>0.6</td>\n",
|
507 |
+
" <td>0.2</td>\n",
|
508 |
+
" <td>No</td>\n",
|
509 |
+
" <td>0.7</td>\n",
|
510 |
+
" <td>quickstart</td>\n",
|
511 |
+
" <td>Will the United Nations Security Council hold ...</td>\n",
|
512 |
+
" <td>Yes</td>\n",
|
513 |
+
" <td>2024-10-29 20:41:55+00:00</td>\n",
|
514 |
+
" <td>2024-10</td>\n",
|
515 |
+
" <td>2024-10-28/2024-11-03</td>\n",
|
516 |
+
" </tr>\n",
|
517 |
+
" <tr>\n",
|
518 |
+
" <th>3</th>\n",
|
519 |
+
" <td>1225398943131371197254748096595691861478679945...</td>\n",
|
520 |
+
" <td>36749393</td>\n",
|
521 |
+
" <td>Please take over the role of a Data Scientist ...</td>\n",
|
522 |
+
" <td>claude-prediction-offline</td>\n",
|
523 |
+
" <td>f84d34ae-3dd0-4d0c-a4d3-d8f28d25677c</td>\n",
|
524 |
+
" <td>0x3de0f1a8d9c227af2e324dd92905bbf8bb852ff8</td>\n",
|
525 |
+
" <td>36749408</td>\n",
|
526 |
+
" <td>0</td>\n",
|
527 |
+
" <td>None</td>\n",
|
528 |
+
" <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
|
529 |
+
" <td>...</td>\n",
|
530 |
+
" <td>0.3</td>\n",
|
531 |
+
" <td>0.1</td>\n",
|
532 |
+
" <td>None</td>\n",
|
533 |
+
" <td>0.5</td>\n",
|
534 |
+
" <td>quickstart</td>\n",
|
535 |
+
" <td>Will the Bank of Japan issue a public statemen...</td>\n",
|
536 |
+
" <td>Yes</td>\n",
|
537 |
+
" <td>2024-10-29 10:02:45+00:00</td>\n",
|
538 |
+
" <td>2024-10</td>\n",
|
539 |
+
" <td>2024-10-28/2024-11-03</td>\n",
|
540 |
+
" </tr>\n",
|
541 |
+
" <tr>\n",
|
542 |
+
" <th>4</th>\n",
|
543 |
+
" <td>7954746357421406217625419969909404056225427053...</td>\n",
|
544 |
+
" <td>36701081</td>\n",
|
545 |
+
" <td>Please take over the role of a Data Scientist ...</td>\n",
|
546 |
+
" <td>superforcaster</td>\n",
|
547 |
+
" <td>1b609b7e-e0d2-4bb8-ad6b-7d0e6e6610b5</td>\n",
|
548 |
+
" <td>0x8dd0f0f64e575a356545d9ed096122a1887e64bf</td>\n",
|
549 |
+
" <td>36701099</td>\n",
|
550 |
+
" <td>0</td>\n",
|
551 |
+
" <td>None</td>\n",
|
552 |
+
" <td>\\nYou are an advanced AI system which has been...</td>\n",
|
553 |
+
" <td>...</td>\n",
|
554 |
+
" <td>0.6</td>\n",
|
555 |
+
" <td>0.4</td>\n",
|
556 |
+
" <td>No</td>\n",
|
557 |
+
" <td>0.7</td>\n",
|
558 |
+
" <td>quickstart</td>\n",
|
559 |
+
" <td>Will any new human rights organizations public...</td>\n",
|
560 |
+
" <td>Yes</td>\n",
|
561 |
+
" <td>2024-10-26 12:40:25+00:00</td>\n",
|
562 |
+
" <td>2024-10</td>\n",
|
563 |
+
" <td>2024-10-21/2024-10-27</td>\n",
|
564 |
+
" </tr>\n",
|
565 |
+
" </tbody>\n",
|
566 |
+
"</table>\n",
|
567 |
+
"<p>5 rows × 23 columns</p>\n",
|
568 |
+
"</div>"
|
569 |
+
],
|
570 |
+
"text/plain": [
|
571 |
+
" request_id request_block \\\n",
|
572 |
+
"0 1065794400559258224527965821794720648636282516... 36701352 \n",
|
573 |
+
"1 9015207976398091689774872599965107755141252481... 36749933 \n",
|
574 |
+
"2 8204183583678326945661870665906374091080896102... 36756777 \n",
|
575 |
+
"3 1225398943131371197254748096595691861478679945... 36749393 \n",
|
576 |
+
"4 7954746357421406217625419969909404056225427053... 36701081 \n",
|
577 |
+
"\n",
|
578 |
+
" prompt_request \\\n",
|
579 |
+
"0 Please take over the role of a Data Scientist ... \n",
|
580 |
+
"1 Please take over the role of a Data Scientist ... \n",
|
581 |
+
"2 Please take over the role of a Data Scientist ... \n",
|
582 |
+
"3 Please take over the role of a Data Scientist ... \n",
|
583 |
+
"4 Please take over the role of a Data Scientist ... \n",
|
584 |
+
"\n",
|
585 |
+
" tool nonce \\\n",
|
586 |
+
"0 claude-prediction-offline fd5cfa7f-fb38-435b-b571-69f253397a69 \n",
|
587 |
+
"1 claude-prediction-offline a61c894b-fa0d-40d9-ab02-e512ef9dd7e6 \n",
|
588 |
+
"2 claude-prediction-offline e64fa0a7-a98b-4c95-ac1a-755d212a5b78 \n",
|
589 |
+
"3 claude-prediction-offline f84d34ae-3dd0-4d0c-a4d3-d8f28d25677c \n",
|
590 |
+
"4 superforcaster 1b609b7e-e0d2-4bb8-ad6b-7d0e6e6610b5 \n",
|
591 |
+
"\n",
|
592 |
+
" trader_address deliver_block error \\\n",
|
593 |
+
"0 0x42cc3f5a30420e8964be3c18d0e560b10e8957fa 36701366 0 \n",
|
594 |
+
"1 0xc84d1f9fc28ce5628f69cc138ba3092a036b8f69 36749986 0 \n",
|
595 |
+
"2 0x992448484862672eb95ca1c877cc43f935c389ce 36756791 0 \n",
|
596 |
+
"3 0x3de0f1a8d9c227af2e324dd92905bbf8bb852ff8 36749408 0 \n",
|
597 |
+
"4 0x8dd0f0f64e575a356545d9ed096122a1887e64bf 36701099 0 \n",
|
598 |
+
"\n",
|
599 |
+
" error_message prompt_response ... \\\n",
|
600 |
+
"0 None \\nYou are an LLM inside a multi-agent system t... ... \n",
|
601 |
+
"1 None \\nYou are an LLM inside a multi-agent system t... ... \n",
|
602 |
+
"2 None \\nYou are an LLM inside a multi-agent system t... ... \n",
|
603 |
+
"3 None \\nYou are an LLM inside a multi-agent system t... ... \n",
|
604 |
+
"4 None \\nYou are an advanced AI system which has been... ... \n",
|
605 |
+
"\n",
|
606 |
+
" confidence info_utility vote win_probability market_creator \\\n",
|
607 |
+
"0 0.6 0.2 No 0.7 quickstart \n",
|
608 |
+
"1 0.6 0.2 No 0.9 quickstart \n",
|
609 |
+
"2 0.6 0.2 No 0.7 quickstart \n",
|
610 |
+
"3 0.3 0.1 None 0.5 quickstart \n",
|
611 |
+
"4 0.6 0.4 No 0.7 quickstart \n",
|
612 |
+
"\n",
|
613 |
+
" title currentAnswer \\\n",
|
614 |
+
"0 Will the Caspian Sea countries announce a join... No \n",
|
615 |
+
"1 Will Iran launch another ballistic missile att... Yes \n",
|
616 |
+
"2 Will the United Nations Security Council hold ... Yes \n",
|
617 |
+
"3 Will the Bank of Japan issue a public statemen... Yes \n",
|
618 |
+
"4 Will any new human rights organizations public... Yes \n",
|
619 |
+
"\n",
|
620 |
+
" request_time request_month_year request_month_year_week \n",
|
621 |
+
"0 2024-10-26 13:03:55+00:00 2024-10 2024-10-21/2024-10-27 \n",
|
622 |
+
"1 2024-10-29 10:50:55+00:00 2024-10 2024-10-28/2024-11-03 \n",
|
623 |
+
"2 2024-10-29 20:41:55+00:00 2024-10 2024-10-28/2024-11-03 \n",
|
624 |
+
"3 2024-10-29 10:02:45+00:00 2024-10 2024-10-28/2024-11-03 \n",
|
625 |
+
"4 2024-10-26 12:40:25+00:00 2024-10 2024-10-21/2024-10-27 \n",
|
626 |
+
"\n",
|
627 |
+
"[5 rows x 23 columns]"
|
628 |
+
]
|
629 |
+
},
|
630 |
+
"execution_count": 24,
|
631 |
+
"metadata": {},
|
632 |
+
"output_type": "execute_result"
|
633 |
+
}
|
634 |
+
],
|
635 |
+
"source": [
|
636 |
+
"tools.head()"
|
637 |
+
]
|
638 |
+
},
|
639 |
+
{
|
640 |
+
"cell_type": "code",
|
641 |
+
"execution_count": 5,
|
642 |
+
"metadata": {},
|
643 |
+
"outputs": [
|
644 |
+
{
|
645 |
+
"data": {
|
646 |
+
"text/plain": [
|
647 |
+
"request_id 4650171020578125420345177840991368739117143085...\n",
|
648 |
+
"request_block 36626348\n",
|
649 |
+
"prompt_request Please take over the role of a Data Scientist ...\n",
|
650 |
+
"tool claude-prediction-online\n",
|
651 |
+
"nonce 5a0e84af-fcc3-4015-b2c1-a390430d70ca\n",
|
652 |
+
"trader_address 0x1fe2b09de07475b1027b0c73a5bf52693b31a52e\n",
|
653 |
+
"deliver_block 36626364\n",
|
654 |
+
"error 0\n",
|
655 |
+
"error_message None\n",
|
656 |
+
"prompt_response \\nYou are an LLM inside a multi-agent system t...\n",
|
657 |
+
"mech_address 0x5e1d1eb61e1164d5a50b28c575da73a29595dff7\n",
|
658 |
+
"p_yes 0.3\n",
|
659 |
+
"p_no 0.7\n",
|
660 |
+
"confidence 0.6\n",
|
661 |
+
"info_utility 0.2\n",
|
662 |
+
"vote No\n",
|
663 |
+
"win_probability 0.7\n",
|
664 |
+
"market_creator pearl\n",
|
665 |
+
"title Will the US government make a public statement...\n",
|
666 |
+
"currentAnswer No\n",
|
667 |
+
"request_time 2024-10-22 00:56:35+00:00\n",
|
668 |
+
"request_month_year 2024-10\n",
|
669 |
+
"request_month_year_week 2024-10-21/2024-10-27\n",
|
670 |
+
"Name: 0, dtype: object"
|
671 |
+
]
|
672 |
+
},
|
673 |
+
"execution_count": 5,
|
674 |
+
"metadata": {},
|
675 |
+
"output_type": "execute_result"
|
676 |
+
}
|
677 |
+
],
|
678 |
+
"source": [
|
679 |
+
"tools.iloc[0]"
|
680 |
+
]
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"cell_type": "code",
|
684 |
+
"execution_count": 38,
|
685 |
+
"metadata": {},
|
686 |
+
"outputs": [],
|
687 |
+
"source": [
|
688 |
+
"merge_df = pd.concat([tools, new_tools], ignore_index=True)"
|
689 |
+
]
|
690 |
+
},
|
691 |
+
{
|
692 |
+
"cell_type": "code",
|
693 |
+
"execution_count": 39,
|
694 |
+
"metadata": {},
|
695 |
+
"outputs": [
|
696 |
+
{
|
697 |
+
"data": {
|
698 |
+
"text/html": [
|
699 |
+
"<div>\n",
|
700 |
+
"<style scoped>\n",
|
701 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
702 |
+
" vertical-align: middle;\n",
|
703 |
+
" }\n",
|
704 |
+
"\n",
|
705 |
+
" .dataframe tbody tr th {\n",
|
706 |
+
" vertical-align: top;\n",
|
707 |
+
" }\n",
|
708 |
+
"\n",
|
709 |
+
" .dataframe thead th {\n",
|
710 |
+
" text-align: right;\n",
|
711 |
+
" }\n",
|
712 |
+
"</style>\n",
|
713 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
714 |
+
" <thead>\n",
|
715 |
+
" <tr style=\"text-align: right;\">\n",
|
716 |
+
" <th></th>\n",
|
717 |
+
" <th>request_id</th>\n",
|
718 |
+
" <th>request_block</th>\n",
|
719 |
+
" <th>prompt_request</th>\n",
|
720 |
+
" <th>tool</th>\n",
|
721 |
+
" <th>nonce</th>\n",
|
722 |
+
" <th>trader_address</th>\n",
|
723 |
+
" <th>deliver_block</th>\n",
|
724 |
+
" <th>error</th>\n",
|
725 |
+
" <th>error_message</th>\n",
|
726 |
+
" <th>prompt_response</th>\n",
|
727 |
+
" <th>...</th>\n",
|
728 |
+
" <th>info_utility</th>\n",
|
729 |
+
" <th>vote</th>\n",
|
730 |
+
" <th>win_probability</th>\n",
|
731 |
+
" <th>market_creator</th>\n",
|
732 |
+
" <th>title</th>\n",
|
733 |
+
" <th>currentAnswer</th>\n",
|
734 |
+
" <th>request_time</th>\n",
|
735 |
+
" <th>request_month_year</th>\n",
|
736 |
+
" <th>request_month_year_week</th>\n",
|
737 |
+
" <th>tx_hash</th>\n",
|
738 |
+
" </tr>\n",
|
739 |
+
" </thead>\n",
|
740 |
+
" <tbody>\n",
|
741 |
+
" <tr>\n",
|
742 |
+
" <th>0</th>\n",
|
743 |
+
" <td>1065794400559258224527965821794720648636282516...</td>\n",
|
744 |
+
" <td>36701352</td>\n",
|
745 |
+
" <td>Please take over the role of a Data Scientist ...</td>\n",
|
746 |
+
" <td>claude-prediction-offline</td>\n",
|
747 |
+
" <td>fd5cfa7f-fb38-435b-b571-69f253397a69</td>\n",
|
748 |
+
" <td>0x42cc3f5a30420e8964be3c18d0e560b10e8957fa</td>\n",
|
749 |
+
" <td>36701366</td>\n",
|
750 |
+
" <td>0</td>\n",
|
751 |
+
" <td>None</td>\n",
|
752 |
+
" <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
|
753 |
+
" <td>...</td>\n",
|
754 |
+
" <td>0.2</td>\n",
|
755 |
+
" <td>No</td>\n",
|
756 |
+
" <td>0.7</td>\n",
|
757 |
+
" <td>quickstart</td>\n",
|
758 |
+
" <td>Will the Caspian Sea countries announce a join...</td>\n",
|
759 |
+
" <td>No</td>\n",
|
760 |
+
" <td>2024-10-26 13:03:55+00:00</td>\n",
|
761 |
+
" <td>2024-10</td>\n",
|
762 |
+
" <td>2024-10-21/2024-10-27</td>\n",
|
763 |
+
" <td>NaN</td>\n",
|
764 |
+
" </tr>\n",
|
765 |
+
" <tr>\n",
|
766 |
+
" <th>1</th>\n",
|
767 |
+
" <td>9015207976398091689774872599965107755141252481...</td>\n",
|
768 |
+
" <td>36749933</td>\n",
|
769 |
+
" <td>Please take over the role of a Data Scientist ...</td>\n",
|
770 |
+
" <td>claude-prediction-offline</td>\n",
|
771 |
+
" <td>a61c894b-fa0d-40d9-ab02-e512ef9dd7e6</td>\n",
|
772 |
+
" <td>0xc84d1f9fc28ce5628f69cc138ba3092a036b8f69</td>\n",
|
773 |
+
" <td>36749986</td>\n",
|
774 |
+
" <td>0</td>\n",
|
775 |
+
" <td>None</td>\n",
|
776 |
+
" <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
|
777 |
+
" <td>...</td>\n",
|
778 |
+
" <td>0.2</td>\n",
|
779 |
+
" <td>No</td>\n",
|
780 |
+
" <td>0.9</td>\n",
|
781 |
+
" <td>quickstart</td>\n",
|
782 |
+
" <td>Will Iran launch another ballistic missile att...</td>\n",
|
783 |
+
" <td>Yes</td>\n",
|
784 |
+
" <td>2024-10-29 10:50:55+00:00</td>\n",
|
785 |
+
" <td>2024-10</td>\n",
|
786 |
+
" <td>2024-10-28/2024-11-03</td>\n",
|
787 |
+
" <td>NaN</td>\n",
|
788 |
+
" </tr>\n",
|
789 |
+
" <tr>\n",
|
790 |
+
" <th>2</th>\n",
|
791 |
+
" <td>8204183583678326945661870665906374091080896102...</td>\n",
|
792 |
+
" <td>36756777</td>\n",
|
793 |
+
" <td>Please take over the role of a Data Scientist ...</td>\n",
|
794 |
+
" <td>claude-prediction-offline</td>\n",
|
795 |
+
" <td>e64fa0a7-a98b-4c95-ac1a-755d212a5b78</td>\n",
|
796 |
+
" <td>0x992448484862672eb95ca1c877cc43f935c389ce</td>\n",
|
797 |
+
" <td>36756791</td>\n",
|
798 |
+
" <td>0</td>\n",
|
799 |
+
" <td>None</td>\n",
|
800 |
+
" <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
|
801 |
+
" <td>...</td>\n",
|
802 |
+
" <td>0.2</td>\n",
|
803 |
+
" <td>No</td>\n",
|
804 |
+
" <td>0.7</td>\n",
|
805 |
+
" <td>quickstart</td>\n",
|
806 |
+
" <td>Will the United Nations Security Council hold ...</td>\n",
|
807 |
+
" <td>Yes</td>\n",
|
808 |
+
" <td>2024-10-29 20:41:55+00:00</td>\n",
|
809 |
+
" <td>2024-10</td>\n",
|
810 |
+
" <td>2024-10-28/2024-11-03</td>\n",
|
811 |
+
" <td>NaN</td>\n",
|
812 |
+
" </tr>\n",
|
813 |
+
" <tr>\n",
|
814 |
+
" <th>3</th>\n",
|
815 |
+
" <td>1225398943131371197254748096595691861478679945...</td>\n",
|
816 |
+
" <td>36749393</td>\n",
|
817 |
+
" <td>Please take over the role of a Data Scientist ...</td>\n",
|
818 |
+
" <td>claude-prediction-offline</td>\n",
|
819 |
+
" <td>f84d34ae-3dd0-4d0c-a4d3-d8f28d25677c</td>\n",
|
820 |
+
" <td>0x3de0f1a8d9c227af2e324dd92905bbf8bb852ff8</td>\n",
|
821 |
+
" <td>36749408</td>\n",
|
822 |
+
" <td>0</td>\n",
|
823 |
+
" <td>None</td>\n",
|
824 |
+
" <td>\\nYou are an LLM inside a multi-agent system t...</td>\n",
|
825 |
+
" <td>...</td>\n",
|
826 |
+
" <td>0.1</td>\n",
|
827 |
+
" <td>None</td>\n",
|
828 |
+
" <td>0.5</td>\n",
|
829 |
+
" <td>quickstart</td>\n",
|
830 |
+
" <td>Will the Bank of Japan issue a public statemen...</td>\n",
|
831 |
+
" <td>Yes</td>\n",
|
832 |
+
" <td>2024-10-29 10:02:45+00:00</td>\n",
|
833 |
+
" <td>2024-10</td>\n",
|
834 |
+
" <td>2024-10-28/2024-11-03</td>\n",
|
835 |
+
" <td>NaN</td>\n",
|
836 |
+
" </tr>\n",
|
837 |
+
" <tr>\n",
|
838 |
+
" <th>4</th>\n",
|
839 |
+
" <td>7954746357421406217625419969909404056225427053...</td>\n",
|
840 |
+
" <td>36701081</td>\n",
|
841 |
+
" <td>Please take over the role of a Data Scientist ...</td>\n",
|
842 |
+
" <td>superforcaster</td>\n",
|
843 |
+
" <td>1b609b7e-e0d2-4bb8-ad6b-7d0e6e6610b5</td>\n",
|
844 |
+
" <td>0x8dd0f0f64e575a356545d9ed096122a1887e64bf</td>\n",
|
845 |
+
" <td>36701099</td>\n",
|
846 |
+
" <td>0</td>\n",
|
847 |
+
" <td>None</td>\n",
|
848 |
+
" <td>\\nYou are an advanced AI system which has been...</td>\n",
|
849 |
+
" <td>...</td>\n",
|
850 |
+
" <td>0.4</td>\n",
|
851 |
+
" <td>No</td>\n",
|
852 |
+
" <td>0.7</td>\n",
|
853 |
+
" <td>quickstart</td>\n",
|
854 |
+
" <td>Will any new human rights organizations public...</td>\n",
|
855 |
+
" <td>Yes</td>\n",
|
856 |
+
" <td>2024-10-26 12:40:25+00:00</td>\n",
|
857 |
+
" <td>2024-10</td>\n",
|
858 |
+
" <td>2024-10-21/2024-10-27</td>\n",
|
859 |
+
" <td>NaN</td>\n",
|
860 |
+
" </tr>\n",
|
861 |
+
" </tbody>\n",
|
862 |
+
"</table>\n",
|
863 |
+
"<p>5 rows × 24 columns</p>\n",
|
864 |
+
"</div>"
|
865 |
+
],
|
866 |
+
"text/plain": [
|
867 |
+
" request_id request_block \\\n",
|
868 |
+
"0 1065794400559258224527965821794720648636282516... 36701352 \n",
|
869 |
+
"1 9015207976398091689774872599965107755141252481... 36749933 \n",
|
870 |
+
"2 8204183583678326945661870665906374091080896102... 36756777 \n",
|
871 |
+
"3 1225398943131371197254748096595691861478679945... 36749393 \n",
|
872 |
+
"4 7954746357421406217625419969909404056225427053... 36701081 \n",
|
873 |
+
"\n",
|
874 |
+
" prompt_request \\\n",
|
875 |
+
"0 Please take over the role of a Data Scientist ... \n",
|
876 |
+
"1 Please take over the role of a Data Scientist ... \n",
|
877 |
+
"2 Please take over the role of a Data Scientist ... \n",
|
878 |
+
"3 Please take over the role of a Data Scientist ... \n",
|
879 |
+
"4 Please take over the role of a Data Scientist ... \n",
|
880 |
+
"\n",
|
881 |
+
" tool nonce \\\n",
|
882 |
+
"0 claude-prediction-offline fd5cfa7f-fb38-435b-b571-69f253397a69 \n",
|
883 |
+
"1 claude-prediction-offline a61c894b-fa0d-40d9-ab02-e512ef9dd7e6 \n",
|
884 |
+
"2 claude-prediction-offline e64fa0a7-a98b-4c95-ac1a-755d212a5b78 \n",
|
885 |
+
"3 claude-prediction-offline f84d34ae-3dd0-4d0c-a4d3-d8f28d25677c \n",
|
886 |
+
"4 superforcaster 1b609b7e-e0d2-4bb8-ad6b-7d0e6e6610b5 \n",
|
887 |
+
"\n",
|
888 |
+
" trader_address deliver_block error \\\n",
|
889 |
+
"0 0x42cc3f5a30420e8964be3c18d0e560b10e8957fa 36701366 0 \n",
|
890 |
+
"1 0xc84d1f9fc28ce5628f69cc138ba3092a036b8f69 36749986 0 \n",
|
891 |
+
"2 0x992448484862672eb95ca1c877cc43f935c389ce 36756791 0 \n",
|
892 |
+
"3 0x3de0f1a8d9c227af2e324dd92905bbf8bb852ff8 36749408 0 \n",
|
893 |
+
"4 0x8dd0f0f64e575a356545d9ed096122a1887e64bf 36701099 0 \n",
|
894 |
+
"\n",
|
895 |
+
" error_message prompt_response ... \\\n",
|
896 |
+
"0 None \\nYou are an LLM inside a multi-agent system t... ... \n",
|
897 |
+
"1 None \\nYou are an LLM inside a multi-agent system t... ... \n",
|
898 |
+
"2 None \\nYou are an LLM inside a multi-agent system t... ... \n",
|
899 |
+
"3 None \\nYou are an LLM inside a multi-agent system t... ... \n",
|
900 |
+
"4 None \\nYou are an advanced AI system which has been... ... \n",
|
901 |
+
"\n",
|
902 |
+
" info_utility vote win_probability market_creator \\\n",
|
903 |
+
"0 0.2 No 0.7 quickstart \n",
|
904 |
+
"1 0.2 No 0.9 quickstart \n",
|
905 |
+
"2 0.2 No 0.7 quickstart \n",
|
906 |
+
"3 0.1 None 0.5 quickstart \n",
|
907 |
+
"4 0.4 No 0.7 quickstart \n",
|
908 |
+
"\n",
|
909 |
+
" title currentAnswer \\\n",
|
910 |
+
"0 Will the Caspian Sea countries announce a join... No \n",
|
911 |
+
"1 Will Iran launch another ballistic missile att... Yes \n",
|
912 |
+
"2 Will the United Nations Security Council hold ... Yes \n",
|
913 |
+
"3 Will the Bank of Japan issue a public statemen... Yes \n",
|
914 |
+
"4 Will any new human rights organizations public... Yes \n",
|
915 |
+
"\n",
|
916 |
+
" request_time request_month_year request_month_year_week tx_hash \n",
|
917 |
+
"0 2024-10-26 13:03:55+00:00 2024-10 2024-10-21/2024-10-27 NaN \n",
|
918 |
+
"1 2024-10-29 10:50:55+00:00 2024-10 2024-10-28/2024-11-03 NaN \n",
|
919 |
+
"2 2024-10-29 20:41:55+00:00 2024-10 2024-10-28/2024-11-03 NaN \n",
|
920 |
+
"3 2024-10-29 10:02:45+00:00 2024-10 2024-10-28/2024-11-03 NaN \n",
|
921 |
+
"4 2024-10-26 12:40:25+00:00 2024-10 2024-10-21/2024-10-27 NaN \n",
|
922 |
+
"\n",
|
923 |
+
"[5 rows x 24 columns]"
|
924 |
+
]
|
925 |
+
},
|
926 |
+
"execution_count": 39,
|
927 |
+
"metadata": {},
|
928 |
+
"output_type": "execute_result"
|
929 |
+
}
|
930 |
+
],
|
931 |
+
"source": [
|
932 |
+
"merge_df.head()"
|
933 |
+
]
|
934 |
+
},
|
935 |
+
{
|
936 |
+
"cell_type": "code",
|
937 |
+
"execution_count": 28,
|
938 |
+
"metadata": {},
|
939 |
+
"outputs": [],
|
940 |
+
"source": [
|
941 |
+
"merge_df.drop(columns=\"tx_hash\", inplace=True)"
|
942 |
+
]
|
943 |
+
},
|
944 |
+
{
|
945 |
+
"cell_type": "code",
|
946 |
+
"execution_count": 40,
|
947 |
+
"metadata": {},
|
948 |
+
"outputs": [],
|
949 |
+
"source": [
|
950 |
+
"merge_df.to_parquet(\"../tmp/tools.parquet\", index=False)"
|
951 |
+
]
|
952 |
+
},
|
953 |
+
{
|
954 |
+
"cell_type": "code",
|
955 |
+
"execution_count": null,
|
956 |
+
"metadata": {},
|
957 |
+
"outputs": [],
|
958 |
+
"source": []
|
959 |
+
},
|
960 |
{
|
961 |
"cell_type": "code",
|
962 |
"execution_count": 4,
|
scripts/cleaning_old_info.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
import pandas as pd
|
2 |
-
from profitability import summary_analyse
|
3 |
from utils import DATA_DIR
|
4 |
-
from staking import label_trades_by_staking
|
5 |
|
6 |
|
7 |
def clean_old_data_from_parquet_files(cutoff_date: str):
|
@@ -47,21 +45,24 @@ def clean_old_data_from_parquet_files(cutoff_date: str):
|
|
47 |
except Exception as e:
|
48 |
print(f"Error cleaning all trades profitability file {e}")
|
49 |
|
50 |
-
#
|
51 |
try:
|
52 |
-
|
53 |
-
label_trades_by_staking(trades_df=all_trades, update=False)
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
DATA_DIR / "all_trades_profitability.parquet", index=False
|
58 |
)
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
62 |
except Exception as e:
|
63 |
-
print(f"Error
|
64 |
|
65 |
|
66 |
if __name__ == "__main__":
|
67 |
-
clean_old_data_from_parquet_files("2024-
|
|
|
1 |
import pandas as pd
|
|
|
2 |
from utils import DATA_DIR
|
|
|
3 |
|
4 |
|
5 |
def clean_old_data_from_parquet_files(cutoff_date: str):
|
|
|
45 |
except Exception as e:
|
46 |
print(f"Error cleaning all trades profitability file {e}")
|
47 |
|
48 |
+
# clean unknown_traders.parquet
|
49 |
try:
|
50 |
+
unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
|
|
|
51 |
|
52 |
+
unknown_traders["creation_timestamp"] = pd.to_datetime(
|
53 |
+
unknown_traders["creation_timestamp"], utc=True
|
|
|
54 |
)
|
55 |
+
|
56 |
+
print(f"length before filtering {len(unknown_traders)}")
|
57 |
+
unknown_traders = unknown_traders.loc[
|
58 |
+
unknown_traders["creation_timestamp"] > min_date_utc
|
59 |
+
]
|
60 |
+
print(f"length after filtering {len(unknown_traders)}")
|
61 |
+
unknown_traders.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
|
62 |
+
|
63 |
except Exception as e:
|
64 |
+
print(f"Error cleaning unknown_traders file {e}")
|
65 |
|
66 |
|
67 |
if __name__ == "__main__":
|
68 |
+
clean_old_data_from_parquet_files("2024-10-25")
|
scripts/daily_data.py
CHANGED
@@ -32,7 +32,7 @@ def prepare_live_metrics(
|
|
32 |
)
|
33 |
|
34 |
# staking label
|
35 |
-
label_trades_by_staking(all_trades_df)
|
36 |
|
37 |
# create the unknown traders dataset
|
38 |
unknown_traders_df, all_trades_df = create_unknown_traders_df(
|
|
|
32 |
)
|
33 |
|
34 |
# staking label
|
35 |
+
all_trades_df = label_trades_by_staking(all_trades_df)
|
36 |
|
37 |
# create the unknown traders dataset
|
38 |
unknown_traders_df, all_trades_df = create_unknown_traders_df(
|
scripts/get_mech_info.py
CHANGED
@@ -11,6 +11,7 @@ from utils import (
|
|
11 |
import requests
|
12 |
import pandas as pd
|
13 |
import numpy as np
|
|
|
14 |
from mech_request_utils import (
|
15 |
collect_all_mech_delivers,
|
16 |
collect_all_mech_requests,
|
@@ -146,7 +147,7 @@ def update_fpmmTrades_parquet(trades_filename: str) -> pd.DataFrame:
|
|
146 |
|
147 |
# Remove duplicates
|
148 |
# fpmm.outcomes is a numpy array
|
149 |
-
merge_df.drop_duplicates("id", inplace=True)
|
150 |
print(f"Final length after removing duplicates in fpmmTrades= {len(merge_df)}")
|
151 |
|
152 |
# save the parquet file
|
@@ -174,16 +175,15 @@ def update_all_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
|
|
174 |
return merge_df
|
175 |
|
176 |
|
177 |
-
def update_tools_parquet(
|
178 |
try:
|
179 |
-
old_tools_df = pd.read_parquet(
|
180 |
except Exception as e:
|
181 |
print(f"Error reading old tools parquet file {e}")
|
182 |
return None
|
183 |
try:
|
184 |
new_tools_df = pd.read_parquet(DATA_DIR / new_tools_filename)
|
185 |
-
|
186 |
-
updating_timestamps(rpc, new_tools_filename)
|
187 |
except Exception as e:
|
188 |
print(f"Error reading new trades parquet file {e}")
|
189 |
return None
|
@@ -201,7 +201,7 @@ def update_tools_parquet(rpc: str, new_tools_filename: pd.DataFrame):
|
|
201 |
print(f"Final length after removing duplicates in tools= {len(merge_df)}")
|
202 |
|
203 |
# save the parquet file
|
204 |
-
merge_df.to_parquet(
|
205 |
|
206 |
|
207 |
def get_mech_info_2024() -> dict[str, Any]:
|
@@ -298,6 +298,10 @@ def get_mech_events_since_last_run():
|
|
298 |
try:
|
299 |
all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
|
300 |
latest_timestamp = max(all_trades.creation_timestamp)
|
|
|
|
|
|
|
|
|
301 |
print(f"Updating data since {latest_timestamp}")
|
302 |
except Exception:
|
303 |
print("Error while reading the profitability parquet file")
|
@@ -351,41 +355,7 @@ def get_mech_events_since_last_run():
|
|
351 |
return latest_timestamp
|
352 |
|
353 |
|
354 |
-
@measure_execution_time
|
355 |
-
def get_mech_events_last_60_days():
|
356 |
-
earliest_block_number = get_last_60_days_block_number()
|
357 |
-
last_block_number = get_last_block_number()
|
358 |
-
# mech requests
|
359 |
-
requests_dict, duplicatedReqId, nr_errors = collect_all_mech_requests(
|
360 |
-
from_block=earliest_block_number,
|
361 |
-
to_block=last_block_number,
|
362 |
-
filename="mech_requests.json",
|
363 |
-
)
|
364 |
-
|
365 |
-
# mech delivers
|
366 |
-
delivers_dict, duplicatedIds, nr_errors = collect_all_mech_delivers(
|
367 |
-
from_block=earliest_block_number,
|
368 |
-
to_block=last_block_number,
|
369 |
-
filename="mech_delivers.json",
|
370 |
-
)
|
371 |
-
|
372 |
-
# clean delivers
|
373 |
-
clean_mech_delivers("mech_requests.json", "mech_delivers.json")
|
374 |
-
|
375 |
-
# solve duplicated requestIds
|
376 |
-
block_map = fix_duplicate_requestIds("mech_requests.json", "mech_delivers.json")
|
377 |
-
|
378 |
-
# merge the two files into one source
|
379 |
-
not_found = merge_requests_delivers(
|
380 |
-
"mech_requests.json", "mech_delivers.json", "merged_requests.json"
|
381 |
-
)
|
382 |
-
|
383 |
-
# Add ipfs contents
|
384 |
-
get_ipfs_data("merged_requests.json", "tools_info.json")
|
385 |
-
|
386 |
-
|
387 |
if __name__ == "__main__":
|
388 |
-
|
389 |
-
|
390 |
# result = get_mech_info_last_60_days()
|
391 |
# print(result)
|
|
|
11 |
import requests
|
12 |
import pandas as pd
|
13 |
import numpy as np
|
14 |
+
from gnosis_timestamps import compute_request_time
|
15 |
from mech_request_utils import (
|
16 |
collect_all_mech_delivers,
|
17 |
collect_all_mech_requests,
|
|
|
147 |
|
148 |
# Remove duplicates
|
149 |
# fpmm.outcomes is a numpy array
|
150 |
+
merge_df.drop_duplicates("id", keep="last", inplace=True)
|
151 |
print(f"Final length after removing duplicates in fpmmTrades= {len(merge_df)}")
|
152 |
|
153 |
# save the parquet file
|
|
|
175 |
return merge_df
|
176 |
|
177 |
|
178 |
+
def update_tools_parquet(new_tools_filename: pd.DataFrame):
|
179 |
try:
|
180 |
+
old_tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
|
181 |
except Exception as e:
|
182 |
print(f"Error reading old tools parquet file {e}")
|
183 |
return None
|
184 |
try:
|
185 |
new_tools_df = pd.read_parquet(DATA_DIR / new_tools_filename)
|
186 |
+
|
|
|
187 |
except Exception as e:
|
188 |
print(f"Error reading new trades parquet file {e}")
|
189 |
return None
|
|
|
201 |
print(f"Final length after removing duplicates in tools= {len(merge_df)}")
|
202 |
|
203 |
# save the parquet file
|
204 |
+
merge_df.to_parquet(TMP_DIR / "tools.parquet", index=False)
|
205 |
|
206 |
|
207 |
def get_mech_info_2024() -> dict[str, Any]:
|
|
|
298 |
try:
|
299 |
all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
|
300 |
latest_timestamp = max(all_trades.creation_timestamp)
|
301 |
+
# cutoff_date = "2024-12-01"
|
302 |
+
# latest_timestamp = pd.Timestamp(
|
303 |
+
# datetime.strptime(cutoff_date, "%Y-%m-%d")
|
304 |
+
# ).tz_localize("UTC")
|
305 |
print(f"Updating data since {latest_timestamp}")
|
306 |
except Exception:
|
307 |
print("Error while reading the profitability parquet file")
|
|
|
355 |
return latest_timestamp
|
356 |
|
357 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
if __name__ == "__main__":
|
359 |
+
get_mech_events_since_last_run()
|
|
|
360 |
# result = get_mech_info_last_60_days()
|
361 |
# print(result)
|
scripts/gnosis_timestamps.py
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from web3 import Web3
|
2 |
+
import os
|
3 |
+
import requests
|
4 |
+
import time
|
5 |
+
import pickle
|
6 |
+
from datetime import datetime, timezone
|
7 |
+
from functools import partial
|
8 |
+
import pandas as pd
|
9 |
+
import pytz
|
10 |
+
from tqdm import tqdm
|
11 |
+
from utils import DATA_DIR, TMP_DIR, measure_execution_time
|
12 |
+
from concurrent.futures import ThreadPoolExecutor
|
13 |
+
|
14 |
+
GNOSIS_API_INTERVAL = 0.2 # 5 calls in 1 second
|
15 |
+
GNOSIS_URL = "https://api.gnosisscan.io/api"
|
16 |
+
GNOSIS_API_KEY = os.environ.get("GNOSIS_API_KEY", None)
|
17 |
+
# https://api.gnosisscan.io/api?module=account&action=txlist&address=0x1fe2b09de07475b1027b0c73a5bf52693b31a52e&startblock=36626348&endblock=36626348&page=1&offset=10&sort=asc&apikey=${gnosis_api_key}""
|
18 |
+
|
19 |
+
# Connect to Gnosis Chain RPC
|
20 |
+
w3 = Web3(Web3.HTTPProvider("https://rpc.gnosischain.com"))
|
21 |
+
|
22 |
+
|
23 |
+
def parallelize_timestamp_computation(df: pd.DataFrame, function: callable) -> list:
|
24 |
+
"""Parallelize the timestamp conversion."""
|
25 |
+
tx_hashes = df["tx_hash"].tolist()
|
26 |
+
with ThreadPoolExecutor(max_workers=10) as executor:
|
27 |
+
results = list(tqdm(executor.map(function, tx_hashes), total=len(tx_hashes)))
|
28 |
+
return results
|
29 |
+
|
30 |
+
|
31 |
+
def transform_timestamp_to_datetime(timestamp):
|
32 |
+
dt = datetime.fromtimestamp(timestamp, timezone.utc)
|
33 |
+
return dt
|
34 |
+
|
35 |
+
|
36 |
+
def get_tx_hash(trader_address, request_block):
|
37 |
+
"""Function to get the transaction hash from the address and block number"""
|
38 |
+
params = {
|
39 |
+
"module": "account",
|
40 |
+
"action": "txlist",
|
41 |
+
"address": trader_address,
|
42 |
+
"page": 1,
|
43 |
+
"offset": 100,
|
44 |
+
"startblock": request_block,
|
45 |
+
"endblock": request_block,
|
46 |
+
"sort": "asc",
|
47 |
+
"apikey": GNOSIS_API_KEY,
|
48 |
+
}
|
49 |
+
|
50 |
+
try:
|
51 |
+
response = requests.get(GNOSIS_URL, params=params)
|
52 |
+
tx_list = response.json()["result"]
|
53 |
+
time.sleep(GNOSIS_API_INTERVAL)
|
54 |
+
if len(tx_list) > 1:
|
55 |
+
raise ValueError("More than one transaction found")
|
56 |
+
return tx_list[0]["hash"]
|
57 |
+
except Exception as e:
|
58 |
+
return None
|
59 |
+
|
60 |
+
|
61 |
+
def add_tx_hash_info(filename: str = "tools.parquet"):
|
62 |
+
"""Function to add the hash info to the saved tools parquet file"""
|
63 |
+
tools = pd.read_parquet(DATA_DIR / filename)
|
64 |
+
tools["tx_hash"] = None
|
65 |
+
total_errors = 0
|
66 |
+
for i, mech_request in tqdm(
|
67 |
+
tools.iterrows(), total=len(tools), desc="Adding tx hash"
|
68 |
+
):
|
69 |
+
try:
|
70 |
+
trader_address = mech_request["trader_address"]
|
71 |
+
block_number = mech_request["request_block"]
|
72 |
+
tools.at[i, "tx_hash"] = get_tx_hash(
|
73 |
+
trader_address=trader_address, request_block=block_number
|
74 |
+
)
|
75 |
+
except Exception as e:
|
76 |
+
print(f"Error with mech request {mech_request}")
|
77 |
+
total_errors += 1
|
78 |
+
continue
|
79 |
+
|
80 |
+
print(f"Total number of errors = {total_errors}")
|
81 |
+
tools.to_parquet(DATA_DIR / filename)
|
82 |
+
|
83 |
+
|
84 |
+
def get_transaction_timestamp(tx_hash: str, web3: Web3):
|
85 |
+
|
86 |
+
try:
|
87 |
+
# Get transaction data
|
88 |
+
tx = web3.eth.get_transaction(tx_hash)
|
89 |
+
# Get block data
|
90 |
+
block = web3.eth.get_block(tx["blockNumber"])
|
91 |
+
# Get timestamp
|
92 |
+
timestamp = block["timestamp"]
|
93 |
+
|
94 |
+
# Convert to datetime
|
95 |
+
dt = datetime.fromtimestamp(timestamp, tz=pytz.UTC)
|
96 |
+
|
97 |
+
# return {
|
98 |
+
# "timestamp": timestamp,
|
99 |
+
# "datetime": dt,
|
100 |
+
# "from_address": tx["from"],
|
101 |
+
# "to_address": tx["to"],
|
102 |
+
# "success": True,
|
103 |
+
# }
|
104 |
+
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
105 |
+
except Exception as e:
|
106 |
+
print(f"Error getting the timestamp from {tx_hash}")
|
107 |
+
return None
|
108 |
+
|
109 |
+
|
110 |
+
@measure_execution_time
|
111 |
+
def compute_request_time(tools_df: pd.DataFrame) -> pd.DataFrame:
|
112 |
+
"""Function to compute the request timestamp from the tx hash"""
|
113 |
+
# read the local info
|
114 |
+
try:
|
115 |
+
gnosis_info = pickle.load(open(TMP_DIR / "gnosis_info.pkl", "rb"))
|
116 |
+
except Exception:
|
117 |
+
print("File not found or not created. Creating a new one")
|
118 |
+
gnosis_info = {}
|
119 |
+
|
120 |
+
# any previous information?
|
121 |
+
tools_df["request_time"] = tools_df["tx_hash"].map(gnosis_info)
|
122 |
+
|
123 |
+
# Identify tools with missing request_time and fill them
|
124 |
+
missing_time_indices = tools_df[tools_df["request_time"].isna()].index
|
125 |
+
print(f"length of missing_time_indices = {len(missing_time_indices)}")
|
126 |
+
# traverse all tx hashes and get the timestamp of each tx
|
127 |
+
partial_mech_request_timestamp = partial(get_transaction_timestamp, web3=w3)
|
128 |
+
missing_timestamps = parallelize_timestamp_computation(
|
129 |
+
tools_df.loc[missing_time_indices], partial_mech_request_timestamp
|
130 |
+
)
|
131 |
+
|
132 |
+
# Update the original DataFrame with the missing timestamps
|
133 |
+
for i, timestamp in zip(missing_time_indices, missing_timestamps):
|
134 |
+
tools_df.at[i, "request_time"] = timestamp
|
135 |
+
# creating other time fields
|
136 |
+
tools_df["request_month_year"] = pd.to_datetime(
|
137 |
+
tools_df["request_time"]
|
138 |
+
).dt.strftime("%Y-%m")
|
139 |
+
tools_df["request_month_year_week"] = (
|
140 |
+
pd.to_datetime(tools_df["request_time"]).dt.to_period("W").astype(str)
|
141 |
+
)
|
142 |
+
# Update t_map with new timestamps
|
143 |
+
new_timestamps = (
|
144 |
+
tools_df[["tx_hash", "request_time"]]
|
145 |
+
.dropna()
|
146 |
+
.set_index("tx_hash")
|
147 |
+
.to_dict()["request_time"]
|
148 |
+
)
|
149 |
+
gnosis_info.update(new_timestamps)
|
150 |
+
# saving gnosis info
|
151 |
+
with open(TMP_DIR / "gnosis_info.pkl", "wb") as f:
|
152 |
+
pickle.dump(gnosis_info, f)
|
153 |
+
return tools_df
|
154 |
+
|
155 |
+
|
156 |
+
def get_account_details(address):
|
157 |
+
# gnosis_url = GNOSIS_URL.substitute(gnosis_api_key=GNOSIS_API_KEY, tx_hash=tx_hash)
|
158 |
+
|
159 |
+
params = {
|
160 |
+
"module": "account",
|
161 |
+
"action": "txlistinternal",
|
162 |
+
"address": address,
|
163 |
+
#'page': 1,
|
164 |
+
#'offset': 100,
|
165 |
+
#'startblock': 0,
|
166 |
+
#'endblock': 9999999999,
|
167 |
+
#'sort': 'asc',
|
168 |
+
"apikey": GNOSIS_API_KEY,
|
169 |
+
}
|
170 |
+
|
171 |
+
try:
|
172 |
+
response = requests.get(GNOSIS_URL, params=params)
|
173 |
+
return response.json()
|
174 |
+
except Exception as e:
|
175 |
+
return {"error": str(e)}
|
176 |
+
|
177 |
+
|
178 |
+
if __name__ == "__main__":
|
179 |
+
# tx_data = "0x783BFA045BDE2D0BCD65280D97A29E7BD9E4FDC10985848690C9797E767140F4"
|
180 |
+
new_tools = pd.read_parquet(DATA_DIR / "new_tools.parquet")
|
181 |
+
new_tools = compute_request_time(new_tools)
|
182 |
+
new_tools.to_parquet(DATA_DIR / "new_tools.parquet")
|
183 |
+
# result = get_tx_hash("0x1fe2b09de07475b1027b0c73a5bf52693b31a52e", 36626348)
|
184 |
+
# print(result)
|
scripts/mech_request_utils.py
CHANGED
@@ -23,7 +23,6 @@ import time
|
|
23 |
import pickle
|
24 |
from random import uniform
|
25 |
from typing import Any, Dict, Tuple
|
26 |
-
from pathlib import Path
|
27 |
import requests
|
28 |
from gql import Client, gql
|
29 |
from gql.transport.requests import RequestsHTTPTransport
|
@@ -379,21 +378,22 @@ def clean_mech_delivers(requests_filename: str, delivers_filename: str) -> None:
|
|
379 |
mech_requests = json.load(file)
|
380 |
|
381 |
list_reqIds = [mech_requests[k].get("requestId") for k in mech_requests.keys()]
|
382 |
-
# remove duplicated elements
|
383 |
-
list_reqIds = list(set(list_reqIds))
|
384 |
|
385 |
# remove requestIds from delivers that are not in this list
|
386 |
with open(JSON_DATA_DIR / delivers_filename, "r") as file:
|
387 |
mech_delivers = json.load(file)
|
388 |
|
389 |
print(f"original size of the file {len(mech_delivers)}")
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
|
|
|
|
|
|
|
|
|
|
394 |
|
395 |
-
for r in to_delete:
|
396 |
-
mech_delivers.pop(r, None)
|
397 |
print(f"final size of the file {len(mech_delivers)}")
|
398 |
save_json_file(mech_delivers, delivers_filename)
|
399 |
|
|
|
23 |
import pickle
|
24 |
from random import uniform
|
25 |
from typing import Any, Dict, Tuple
|
|
|
26 |
import requests
|
27 |
from gql import Client, gql
|
28 |
from gql.transport.requests import RequestsHTTPTransport
|
|
|
378 |
mech_requests = json.load(file)
|
379 |
|
380 |
list_reqIds = [mech_requests[k].get("requestId") for k in mech_requests.keys()]
|
|
|
|
|
381 |
|
382 |
# remove requestIds from delivers that are not in this list
|
383 |
with open(JSON_DATA_DIR / delivers_filename, "r") as file:
|
384 |
mech_delivers = json.load(file)
|
385 |
|
386 |
print(f"original size of the file {len(mech_delivers)}")
|
387 |
+
mech_delivers = {
|
388 |
+
k: v
|
389 |
+
for k, v in tqdm(
|
390 |
+
mech_delivers.items(),
|
391 |
+
total=len(mech_delivers),
|
392 |
+
desc="Filtering delivers dictionary",
|
393 |
+
)
|
394 |
+
if k in set(list_reqIds)
|
395 |
+
}
|
396 |
|
|
|
|
|
397 |
print(f"final size of the file {len(mech_delivers)}")
|
398 |
save_json_file(mech_delivers, delivers_filename)
|
399 |
|
scripts/nr_mech_calls.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import pandas as pd
|
2 |
-
from utils import DATA_DIR, DEFAULT_MECH_FEE
|
3 |
from tqdm import tqdm
|
4 |
from datetime import datetime, timezone
|
5 |
from typing import Dict, Any
|
@@ -220,9 +220,9 @@ def compute_timestamp_mech_calls(
|
|
220 |
]
|
221 |
# traverse market requests
|
222 |
total_mech_calls = 0
|
223 |
-
for mech_request in market_requests:
|
224 |
# check timestamp (before the trade)
|
225 |
-
request_ts = mech_request
|
226 |
if request_ts < trade_ts:
|
227 |
# check the timestamp has not been used in a previous trade
|
228 |
used_timestamps = request_timestamps_used[market]
|
@@ -246,7 +246,7 @@ def compute_mech_calls_based_on_timestamps(
|
|
246 |
nr_traders = len(fpmmTrades["trader_address"].unique())
|
247 |
fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
|
248 |
fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
|
249 |
-
|
250 |
tools["request_time"] = pd.to_datetime(tools["request_time"])
|
251 |
tools["request_date"] = tools["request_time"].dt.date
|
252 |
tools = tools.sort_values(by="request_time", ascending=True)
|
@@ -254,9 +254,9 @@ def compute_mech_calls_based_on_timestamps(
|
|
254 |
for trader in tqdm(
|
255 |
fpmmTrades["trader_address"].unique(),
|
256 |
total=nr_traders,
|
257 |
-
desc="creating mech calls
|
258 |
):
|
259 |
-
# compute the mech calls
|
260 |
all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
|
261 |
all_tools = tools[tools["trader_address"] == trader]
|
262 |
trader_mech_calls = compute_timestamp_mech_calls(all_trades, all_tools)
|
@@ -266,12 +266,10 @@ def compute_mech_calls_based_on_timestamps(
|
|
266 |
|
267 |
if __name__ == "__main__":
|
268 |
# update_trade_nr_mech_calls(non_agents=True)
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
unknown_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
|
277 |
-
trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
|
|
|
1 |
import pandas as pd
|
2 |
+
from utils import DATA_DIR, DEFAULT_MECH_FEE, TMP_DIR
|
3 |
from tqdm import tqdm
|
4 |
from datetime import datetime, timezone
|
5 |
from typing import Dict, Any
|
|
|
220 |
]
|
221 |
# traverse market requests
|
222 |
total_mech_calls = 0
|
223 |
+
for i, mech_request in market_requests.iterrows():
|
224 |
# check timestamp (before the trade)
|
225 |
+
request_ts = mech_request["request_time"]
|
226 |
if request_ts < trade_ts:
|
227 |
# check the timestamp has not been used in a previous trade
|
228 |
used_timestamps = request_timestamps_used[market]
|
|
|
246 |
nr_traders = len(fpmmTrades["trader_address"].unique())
|
247 |
fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
|
248 |
fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
|
249 |
+
fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
|
250 |
tools["request_time"] = pd.to_datetime(tools["request_time"])
|
251 |
tools["request_date"] = tools["request_time"].dt.date
|
252 |
tools = tools.sort_values(by="request_time", ascending=True)
|
|
|
254 |
for trader in tqdm(
|
255 |
fpmmTrades["trader_address"].unique(),
|
256 |
total=nr_traders,
|
257 |
+
desc="creating mech calls count based on timestamps",
|
258 |
):
|
259 |
+
# compute the mech calls for each trader
|
260 |
all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
|
261 |
all_tools = tools[tools["trader_address"] == trader]
|
262 |
trader_mech_calls = compute_timestamp_mech_calls(all_trades, all_tools)
|
|
|
266 |
|
267 |
if __name__ == "__main__":
|
268 |
# update_trade_nr_mech_calls(non_agents=True)
|
269 |
+
tools = pd.read_parquet(TMP_DIR / "tools.parquet")
|
270 |
+
fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
|
271 |
+
fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
|
272 |
+
lambda x: transform_to_datetime(x)
|
273 |
+
)
|
274 |
+
result = compute_mech_calls_based_on_timestamps(fpmmTrades=fpmmTrades, tools=tools)
|
275 |
+
result.to_parquet(TMP_DIR / "result_df.parquet", index=False)
|
|
|
|
scripts/profitability.py
CHANGED
@@ -18,7 +18,6 @@
|
|
18 |
# ------------------------------------------------------------------------------
|
19 |
|
20 |
import time
|
21 |
-
import datetime
|
22 |
import pandas as pd
|
23 |
from typing import Any
|
24 |
from enum import Enum
|
@@ -38,6 +37,7 @@ from utils import (
|
|
38 |
JSON_DATA_DIR,
|
39 |
DATA_DIR,
|
40 |
DEFAULT_MECH_FEE,
|
|
|
41 |
)
|
42 |
from staking import label_trades_by_staking
|
43 |
from nr_mech_calls import (
|
@@ -122,25 +122,6 @@ ALL_TRADES_STATS_DF_COLS = [
|
|
122 |
"roi",
|
123 |
]
|
124 |
|
125 |
-
SUMMARY_STATS_DF_COLS = [
|
126 |
-
"trader_address",
|
127 |
-
"num_trades",
|
128 |
-
"num_winning_trades",
|
129 |
-
"num_redeemed",
|
130 |
-
"total_investment",
|
131 |
-
"total_trade_fees",
|
132 |
-
"num_mech_calls",
|
133 |
-
"total_mech_fees",
|
134 |
-
"total_earnings",
|
135 |
-
"total_redeemed_amount",
|
136 |
-
"total_net_earnings",
|
137 |
-
"total_net_earnings_wo_mech_fees",
|
138 |
-
"total_roi",
|
139 |
-
"total_roi_wo_mech_fees",
|
140 |
-
"mean_mech_calls_per_trade",
|
141 |
-
"mean_mech_fee_amount_per_trade",
|
142 |
-
]
|
143 |
-
|
144 |
|
145 |
def _is_redeemed(user_json: dict[str, Any], fpmmTrade: dict[str, Any]) -> bool:
|
146 |
"""Returns whether the user has redeemed the position."""
|
@@ -159,7 +140,6 @@ def _is_redeemed(user_json: dict[str, Any], fpmmTrade: dict[str, Any]) -> bool:
|
|
159 |
|
160 |
|
161 |
def prepare_profitalibity_data(
|
162 |
-
rpc: str,
|
163 |
tools_filename: str,
|
164 |
trades_filename: str,
|
165 |
) -> pd.DataFrame:
|
@@ -167,6 +147,7 @@ def prepare_profitalibity_data(
|
|
167 |
|
168 |
# Check if tools.parquet is in the same directory
|
169 |
try:
|
|
|
170 |
tools = pd.read_parquet(DATA_DIR / tools_filename)
|
171 |
|
172 |
# make sure creator_address is in the columns
|
@@ -181,7 +162,7 @@ def prepare_profitalibity_data(
|
|
181 |
tools.to_parquet(DATA_DIR / tools_filename)
|
182 |
print(f"{tools_filename} loaded")
|
183 |
except FileNotFoundError:
|
184 |
-
print("
|
185 |
return
|
186 |
|
187 |
# Check if fpmmTrades.parquet is in the same directory
|
@@ -218,7 +199,6 @@ def determine_market_status(trade, current_answer):
|
|
218 |
def analyse_trader(
|
219 |
trader_address: str,
|
220 |
fpmmTrades: pd.DataFrame,
|
221 |
-
tools: pd.DataFrame,
|
222 |
trader_estimated_mech_calls: pd.DataFrame,
|
223 |
daily_info: bool = False,
|
224 |
) -> pd.DataFrame:
|
@@ -294,7 +274,7 @@ def analyse_trader(
|
|
294 |
total_mech_calls = trader_estimated_mech_calls.loc[
|
295 |
(trader_estimated_mech_calls["market"] == trade["title"])
|
296 |
& (trader_estimated_mech_calls["trade_id"] == trade_id),
|
297 |
-
"
|
298 |
].iloc[0]
|
299 |
|
300 |
net_earnings = (
|
@@ -341,7 +321,6 @@ def analyse_trader(
|
|
341 |
|
342 |
def analyse_all_traders(
|
343 |
trades: pd.DataFrame,
|
344 |
-
tools: pd.DataFrame,
|
345 |
estimated_mech_calls: pd.DataFrame,
|
346 |
daily_info: bool = False,
|
347 |
) -> pd.DataFrame:
|
@@ -357,9 +336,7 @@ def analyse_all_traders(
|
|
357 |
estimated_mech_calls["trader_address"] == trader
|
358 |
]
|
359 |
all_traders.append(
|
360 |
-
analyse_trader(
|
361 |
-
trader, trades, tools, trader_estimated_mech_calls, daily_info
|
362 |
-
)
|
363 |
)
|
364 |
|
365 |
# concat all creators
|
@@ -368,54 +345,7 @@ def analyse_all_traders(
|
|
368 |
return all_creators_df
|
369 |
|
370 |
|
371 |
-
def summary_analyse(df):
|
372 |
-
"""Summarise profitability analysis."""
|
373 |
-
# Ensure DataFrame is not empty
|
374 |
-
if df.empty:
|
375 |
-
return pd.DataFrame(columns=SUMMARY_STATS_DF_COLS)
|
376 |
-
|
377 |
-
# Group by trader_address
|
378 |
-
grouped = df.groupby("trader_address")
|
379 |
-
|
380 |
-
# Create summary DataFrame
|
381 |
-
summary_df = grouped.agg(
|
382 |
-
num_trades=("trader_address", "size"),
|
383 |
-
num_winning_trades=("winning_trade", lambda x: float((x).sum())),
|
384 |
-
num_redeemed=("redeemed", lambda x: float(x.sum())),
|
385 |
-
total_investment=("collateral_amount", "sum"),
|
386 |
-
total_trade_fees=("trade_fee_amount", "sum"),
|
387 |
-
num_mech_calls=("num_mech_calls", "sum"),
|
388 |
-
total_mech_fees=("mech_fee_amount", "sum"),
|
389 |
-
total_earnings=("earnings", "sum"),
|
390 |
-
total_redeemed_amount=("redeemed_amount", "sum"),
|
391 |
-
total_net_earnings=("net_earnings", "sum"),
|
392 |
-
)
|
393 |
-
|
394 |
-
# Calculating additional columns
|
395 |
-
summary_df["total_roi"] = (
|
396 |
-
summary_df["total_net_earnings"] / summary_df["total_investment"]
|
397 |
-
)
|
398 |
-
summary_df["mean_mech_calls_per_trade"] = (
|
399 |
-
summary_df["num_mech_calls"] / summary_df["num_trades"]
|
400 |
-
)
|
401 |
-
summary_df["mean_mech_fee_amount_per_trade"] = (
|
402 |
-
summary_df["total_mech_fees"] / summary_df["num_trades"]
|
403 |
-
)
|
404 |
-
summary_df["total_net_earnings_wo_mech_fees"] = (
|
405 |
-
summary_df["total_net_earnings"] + summary_df["total_mech_fees"]
|
406 |
-
)
|
407 |
-
summary_df["total_roi_wo_mech_fees"] = (
|
408 |
-
summary_df["total_net_earnings_wo_mech_fees"] / summary_df["total_investment"]
|
409 |
-
)
|
410 |
-
|
411 |
-
# Resetting index to include trader_address
|
412 |
-
summary_df.reset_index(inplace=True)
|
413 |
-
|
414 |
-
return summary_df
|
415 |
-
|
416 |
-
|
417 |
def run_profitability_analysis(
|
418 |
-
rpc: str,
|
419 |
tools_filename: str,
|
420 |
trades_filename: str,
|
421 |
merge: bool = False,
|
@@ -424,10 +354,12 @@ def run_profitability_analysis(
|
|
424 |
|
425 |
# load dfs from data folder for analysis
|
426 |
print(f"Preparing data with {tools_filename} and {trades_filename}")
|
427 |
-
fpmmTrades = prepare_profitalibity_data(
|
|
|
428 |
if merge:
|
429 |
-
update_tools_parquet(
|
430 |
-
|
|
|
431 |
|
432 |
fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
|
433 |
lambda x: transform_to_datetime(x)
|
@@ -436,9 +368,10 @@ def run_profitability_analysis(
|
|
436 |
trade_mech_calls = compute_mech_calls_based_on_timestamps(
|
437 |
fpmmTrades=fpmmTrades, tools=tools
|
438 |
)
|
|
|
439 |
print(trade_mech_calls.total_mech_calls.describe())
|
440 |
print("Analysing trades...")
|
441 |
-
all_trades_df = analyse_all_traders(fpmmTrades,
|
442 |
|
443 |
# # merge previous files if requested
|
444 |
if merge:
|
@@ -470,9 +403,10 @@ def run_profitability_analysis(
|
|
470 |
all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
|
471 |
|
472 |
# add staking labels
|
473 |
-
label_trades_by_staking(trades_df=all_trades_df)
|
474 |
|
475 |
# create the unknown traders dataset
|
|
|
476 |
unknown_traders_df, all_trades_df = create_unknown_traders_df(
|
477 |
trades_df=all_trades_df
|
478 |
)
|
@@ -481,18 +415,18 @@ def run_profitability_analysis(
|
|
481 |
# save to parquet
|
482 |
all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
|
483 |
|
484 |
-
# summarize profitability df
|
485 |
-
print("Summarising trades...")
|
486 |
-
summary_df = summary_analyse(all_trades_df)
|
487 |
-
summary_df.to_parquet(DATA_DIR / "summary_profitability.parquet", index=False)
|
488 |
-
|
489 |
print("Done!")
|
490 |
|
491 |
-
return all_trades_df
|
492 |
|
493 |
|
494 |
if __name__ == "__main__":
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
|
|
|
|
|
|
|
|
|
|
|
18 |
# ------------------------------------------------------------------------------
|
19 |
|
20 |
import time
|
|
|
21 |
import pandas as pd
|
22 |
from typing import Any
|
23 |
from enum import Enum
|
|
|
37 |
JSON_DATA_DIR,
|
38 |
DATA_DIR,
|
39 |
DEFAULT_MECH_FEE,
|
40 |
+
TMP_DIR,
|
41 |
)
|
42 |
from staking import label_trades_by_staking
|
43 |
from nr_mech_calls import (
|
|
|
122 |
"roi",
|
123 |
]
|
124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
def _is_redeemed(user_json: dict[str, Any], fpmmTrade: dict[str, Any]) -> bool:
|
127 |
"""Returns whether the user has redeemed the position."""
|
|
|
140 |
|
141 |
|
142 |
def prepare_profitalibity_data(
|
|
|
143 |
tools_filename: str,
|
144 |
trades_filename: str,
|
145 |
) -> pd.DataFrame:
|
|
|
147 |
|
148 |
# Check if tools.parquet is in the same directory
|
149 |
try:
|
150 |
+
# new tools parquet
|
151 |
tools = pd.read_parquet(DATA_DIR / tools_filename)
|
152 |
|
153 |
# make sure creator_address is in the columns
|
|
|
162 |
tools.to_parquet(DATA_DIR / tools_filename)
|
163 |
print(f"{tools_filename} loaded")
|
164 |
except FileNotFoundError:
|
165 |
+
print(f"{tools_filename} not found.")
|
166 |
return
|
167 |
|
168 |
# Check if fpmmTrades.parquet is in the same directory
|
|
|
199 |
def analyse_trader(
|
200 |
trader_address: str,
|
201 |
fpmmTrades: pd.DataFrame,
|
|
|
202 |
trader_estimated_mech_calls: pd.DataFrame,
|
203 |
daily_info: bool = False,
|
204 |
) -> pd.DataFrame:
|
|
|
274 |
total_mech_calls = trader_estimated_mech_calls.loc[
|
275 |
(trader_estimated_mech_calls["market"] == trade["title"])
|
276 |
& (trader_estimated_mech_calls["trade_id"] == trade_id),
|
277 |
+
"total_mech_calls",
|
278 |
].iloc[0]
|
279 |
|
280 |
net_earnings = (
|
|
|
321 |
|
322 |
def analyse_all_traders(
|
323 |
trades: pd.DataFrame,
|
|
|
324 |
estimated_mech_calls: pd.DataFrame,
|
325 |
daily_info: bool = False,
|
326 |
) -> pd.DataFrame:
|
|
|
336 |
estimated_mech_calls["trader_address"] == trader
|
337 |
]
|
338 |
all_traders.append(
|
339 |
+
analyse_trader(trader, trades, trader_estimated_mech_calls, daily_info)
|
|
|
|
|
340 |
)
|
341 |
|
342 |
# concat all creators
|
|
|
345 |
return all_creators_df
|
346 |
|
347 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
348 |
def run_profitability_analysis(
|
|
|
349 |
tools_filename: str,
|
350 |
trades_filename: str,
|
351 |
merge: bool = False,
|
|
|
354 |
|
355 |
# load dfs from data folder for analysis
|
356 |
print(f"Preparing data with {tools_filename} and {trades_filename}")
|
357 |
+
fpmmTrades = prepare_profitalibity_data(tools_filename, trades_filename)
|
358 |
+
|
359 |
if merge:
|
360 |
+
update_tools_parquet(tools_filename)
|
361 |
+
|
362 |
+
tools = pd.read_parquet(TMP_DIR / "tools.parquet")
|
363 |
|
364 |
fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
|
365 |
lambda x: transform_to_datetime(x)
|
|
|
368 |
trade_mech_calls = compute_mech_calls_based_on_timestamps(
|
369 |
fpmmTrades=fpmmTrades, tools=tools
|
370 |
)
|
371 |
+
trade_mech_calls.to_parquet(TMP_DIR / "trade_mech_calls.parquet")
|
372 |
print(trade_mech_calls.total_mech_calls.describe())
|
373 |
print("Analysing trades...")
|
374 |
+
all_trades_df = analyse_all_traders(fpmmTrades, trade_mech_calls)
|
375 |
|
376 |
# # merge previous files if requested
|
377 |
if merge:
|
|
|
403 |
all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
|
404 |
|
405 |
# add staking labels
|
406 |
+
all_trades_df = label_trades_by_staking(trades_df=all_trades_df)
|
407 |
|
408 |
# create the unknown traders dataset
|
409 |
+
print("Creating unknown traders dataset")
|
410 |
unknown_traders_df, all_trades_df = create_unknown_traders_df(
|
411 |
trades_df=all_trades_df
|
412 |
)
|
|
|
415 |
# save to parquet
|
416 |
all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
|
417 |
|
|
|
|
|
|
|
|
|
|
|
418 |
print("Done!")
|
419 |
|
420 |
+
return all_trades_df
|
421 |
|
422 |
|
423 |
if __name__ == "__main__":
|
424 |
+
# updating the whole fpmmTrades parquet file instead of just the new ones
|
425 |
+
# trade_mech_calls = pd.read_parquet(TMP_DIR / "result_df.parquet")
|
426 |
+
# fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
|
427 |
+
# fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
|
428 |
+
# lambda x: transform_to_datetime(x)
|
429 |
+
# )
|
430 |
+
# all_trades_df = analyse_all_traders(fpmmTrades, trade_mech_calls)
|
431 |
+
# all_trades_df.to_parquet(TMP_DIR / "all_trades_df.parquet", index=False)
|
432 |
+
run_profitability_analysis("file1", "file2")
|
scripts/pull_data.py
CHANGED
@@ -11,6 +11,7 @@ from utils import (
|
|
11 |
measure_execution_time,
|
12 |
DATA_DIR,
|
13 |
HIST_DIR,
|
|
|
14 |
)
|
15 |
from get_mech_info import (
|
16 |
get_mech_events_since_last_run,
|
@@ -21,6 +22,7 @@ from cleaning_old_info import clean_old_data_from_parquet_files
|
|
21 |
from web3_utils import updating_timestamps
|
22 |
from manage_space_files import move_files
|
23 |
from cloud_storage import load_historical_file
|
|
|
24 |
|
25 |
|
26 |
logging.basicConfig(level=logging.INFO)
|
@@ -52,7 +54,7 @@ def save_historical_data():
|
|
52 |
timestamp = current_datetime.strftime("%Y%m%d_%H%M%S")
|
53 |
|
54 |
try:
|
55 |
-
tools = pd.read_parquet(
|
56 |
filename = f"tools_{timestamp}.parquet"
|
57 |
tools.to_parquet(HIST_DIR / filename, index=False)
|
58 |
# save into cloud storage
|
@@ -79,7 +81,7 @@ def only_new_weekly_analysis():
|
|
79 |
rpc = RPC
|
80 |
# Run markets ETL
|
81 |
logging.info("Running markets ETL")
|
82 |
-
mkt_etl(MARKETS_FILENAME)
|
83 |
logging.info("Markets ETL completed")
|
84 |
|
85 |
# Mech events ETL
|
@@ -108,7 +110,6 @@ def only_new_weekly_analysis():
|
|
108 |
# # Run profitability analysis
|
109 |
logging.info("Running profitability analysis")
|
110 |
run_profitability_analysis(
|
111 |
-
rpc=rpc,
|
112 |
tools_filename="new_tools.parquet",
|
113 |
trades_filename="new_fpmmTrades.parquet",
|
114 |
merge=True,
|
@@ -119,19 +120,13 @@ def only_new_weekly_analysis():
|
|
119 |
# merge new json files with old json files
|
120 |
update_json_files()
|
121 |
|
122 |
-
try:
|
123 |
-
updating_timestamps(rpc, TOOLS_FILENAME)
|
124 |
-
except Exception as e:
|
125 |
-
logging.error("Error while updating timestamps of tools")
|
126 |
-
print(e)
|
127 |
-
|
128 |
save_historical_data()
|
129 |
|
130 |
-
clean_old_data_from_parquet_files("2024-10-
|
131 |
|
132 |
compute_tools_accuracy()
|
133 |
-
|
134 |
-
# move to tmp folder the new generated files
|
135 |
move_files()
|
136 |
logging.info("Weekly analysis files generated and saved")
|
137 |
|
|
|
11 |
measure_execution_time,
|
12 |
DATA_DIR,
|
13 |
HIST_DIR,
|
14 |
+
TMP_DIR,
|
15 |
)
|
16 |
from get_mech_info import (
|
17 |
get_mech_events_since_last_run,
|
|
|
22 |
from web3_utils import updating_timestamps
|
23 |
from manage_space_files import move_files
|
24 |
from cloud_storage import load_historical_file
|
25 |
+
from tools_metrics import compute_tools_based_datasets
|
26 |
|
27 |
|
28 |
logging.basicConfig(level=logging.INFO)
|
|
|
54 |
timestamp = current_datetime.strftime("%Y%m%d_%H%M%S")
|
55 |
|
56 |
try:
|
57 |
+
tools = pd.read_parquet(TMP_DIR / "tools.parquet")
|
58 |
filename = f"tools_{timestamp}.parquet"
|
59 |
tools.to_parquet(HIST_DIR / filename, index=False)
|
60 |
# save into cloud storage
|
|
|
81 |
rpc = RPC
|
82 |
# Run markets ETL
|
83 |
logging.info("Running markets ETL")
|
84 |
+
# mkt_etl(MARKETS_FILENAME)
|
85 |
logging.info("Markets ETL completed")
|
86 |
|
87 |
# Mech events ETL
|
|
|
110 |
# # Run profitability analysis
|
111 |
logging.info("Running profitability analysis")
|
112 |
run_profitability_analysis(
|
|
|
113 |
tools_filename="new_tools.parquet",
|
114 |
trades_filename="new_fpmmTrades.parquet",
|
115 |
merge=True,
|
|
|
120 |
# merge new json files with old json files
|
121 |
update_json_files()
|
122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
save_historical_data()
|
124 |
|
125 |
+
clean_old_data_from_parquet_files("2024-10-25")
|
126 |
|
127 |
compute_tools_accuracy()
|
128 |
+
compute_tools_based_datasets()
|
129 |
+
# # move to tmp folder the new generated files
|
130 |
move_files()
|
131 |
logging.info("Weekly analysis files generated and saved")
|
132 |
|
scripts/roi_analysis.py
DELETED
@@ -1,129 +0,0 @@
|
|
1 |
-
import logging
|
2 |
-
import os
|
3 |
-
import pickle
|
4 |
-
from web3 import Web3
|
5 |
-
import pandas as pd
|
6 |
-
from functools import partial
|
7 |
-
from datetime import datetime
|
8 |
-
from markets import (
|
9 |
-
etl as mkt_etl,
|
10 |
-
DEFAULT_FILENAME as MARKETS_FILENAME,
|
11 |
-
)
|
12 |
-
|
13 |
-
TOOLS_FILENAME = "tools_2024.parquet"
|
14 |
-
from tools import (
|
15 |
-
etl as tools_etl,
|
16 |
-
)
|
17 |
-
from pull_data import (
|
18 |
-
DATA_DIR,
|
19 |
-
parallelize_timestamp_conversion,
|
20 |
-
block_number_to_timestamp,
|
21 |
-
)
|
22 |
-
from profitability import run_profitability_analysis
|
23 |
-
from get_mech_info import get_mech_info_2024
|
24 |
-
from utils import get_question, current_answer
|
25 |
-
import gc
|
26 |
-
|
27 |
-
logging.basicConfig(level=logging.INFO)
|
28 |
-
|
29 |
-
|
30 |
-
def roi_analysis():
|
31 |
-
"""Run ROI analysis for the trades done in 2024."""
|
32 |
-
rpc = "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a"
|
33 |
-
web3 = Web3(Web3.HTTPProvider(rpc))
|
34 |
-
|
35 |
-
# Run markets ETL
|
36 |
-
logging.info("Running markets ETL")
|
37 |
-
mkt_etl(MARKETS_FILENAME)
|
38 |
-
logging.info("Markets ETL completed")
|
39 |
-
|
40 |
-
# Run tools ETL
|
41 |
-
logging.info("Running tools ETL")
|
42 |
-
|
43 |
-
# This etl is saving already the tools parquet file
|
44 |
-
tools_etl(
|
45 |
-
rpcs=[rpc],
|
46 |
-
mech_info=get_mech_info_2024(),
|
47 |
-
filename=TOOLS_FILENAME,
|
48 |
-
)
|
49 |
-
logging.info("Tools ETL completed")
|
50 |
-
|
51 |
-
# Run profitability analysis
|
52 |
-
if os.path.exists(DATA_DIR / "fpmmTrades.parquet"):
|
53 |
-
os.remove(DATA_DIR / "fpmmTrades.parquet")
|
54 |
-
logging.info("Running profitability analysis")
|
55 |
-
date = "2024-01-01"
|
56 |
-
datetime_jan_2024 = datetime.strptime(date, "%Y-%m-%d")
|
57 |
-
timestamp_jan_2024 = int(datetime_jan_2024.timestamp())
|
58 |
-
run_profitability_analysis(
|
59 |
-
rpc=rpc,
|
60 |
-
tools_filename=TOOLS_FILENAME,
|
61 |
-
trades_filename="fpmmTrades.parquet",
|
62 |
-
from_timestamp=timestamp_jan_2024,
|
63 |
-
)
|
64 |
-
logging.info("Profitability analysis completed")
|
65 |
-
|
66 |
-
# Get currentAnswer from FPMMS
|
67 |
-
fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME)
|
68 |
-
tools = pd.read_parquet(DATA_DIR / TOOLS_FILENAME)
|
69 |
-
|
70 |
-
# Get the question from the tools
|
71 |
-
logging.info("Getting the question and current answer for the tools")
|
72 |
-
tools["title"] = tools["prompt_request"].apply(lambda x: get_question(x))
|
73 |
-
tools["currentAnswer"] = tools["title"].apply(lambda x: current_answer(x, fpmms))
|
74 |
-
|
75 |
-
tools["currentAnswer"] = tools["currentAnswer"].str.replace("yes", "Yes")
|
76 |
-
tools["currentAnswer"] = tools["currentAnswer"].str.replace("no", "No")
|
77 |
-
|
78 |
-
# Convert block number to timestamp
|
79 |
-
logging.info("Converting block number to timestamp")
|
80 |
-
t_map = pickle.load(open(DATA_DIR / "t_map.pkl", "rb"))
|
81 |
-
tools["request_time"] = tools["request_block"].map(t_map)
|
82 |
-
|
83 |
-
# Identify tools with missing request_time and fill them
|
84 |
-
missing_time_indices = tools[tools["request_time"].isna()].index
|
85 |
-
if not missing_time_indices.empty:
|
86 |
-
partial_block_number_to_timestamp = partial(
|
87 |
-
block_number_to_timestamp, web3=web3
|
88 |
-
)
|
89 |
-
missing_timestamps = parallelize_timestamp_conversion(
|
90 |
-
tools.loc[missing_time_indices], partial_block_number_to_timestamp
|
91 |
-
)
|
92 |
-
|
93 |
-
# Update the original DataFrame with the missing timestamps
|
94 |
-
for i, timestamp in zip(missing_time_indices, missing_timestamps):
|
95 |
-
tools.at[i, "request_time"] = timestamp
|
96 |
-
|
97 |
-
tools["request_month_year"] = pd.to_datetime(tools["request_time"]).dt.strftime(
|
98 |
-
"%Y-%m"
|
99 |
-
)
|
100 |
-
tools["request_month_year_week"] = (
|
101 |
-
pd.to_datetime(tools["request_time"]).dt.to_period("W").astype(str)
|
102 |
-
)
|
103 |
-
|
104 |
-
# Save the tools data after the updates on the content
|
105 |
-
tools.to_parquet(DATA_DIR / TOOLS_FILENAME, index=False)
|
106 |
-
|
107 |
-
# Update t_map with new timestamps
|
108 |
-
new_timestamps = (
|
109 |
-
tools[["request_block", "request_time"]]
|
110 |
-
.dropna()
|
111 |
-
.set_index("request_block")
|
112 |
-
.to_dict()["request_time"]
|
113 |
-
)
|
114 |
-
t_map.update(new_timestamps)
|
115 |
-
|
116 |
-
with open(DATA_DIR / "t_map_2024.pkl", "wb") as f:
|
117 |
-
pickle.dump(t_map, f)
|
118 |
-
|
119 |
-
# clean and release all memory
|
120 |
-
del tools
|
121 |
-
del fpmms
|
122 |
-
del t_map
|
123 |
-
gc.collect()
|
124 |
-
|
125 |
-
logging.info("ROI analysis files generated and saved")
|
126 |
-
|
127 |
-
|
128 |
-
if __name__ == "__main__":
|
129 |
-
roi_analysis()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/staking.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import json
|
2 |
import sys
|
3 |
from typing import Any, List
|
4 |
-
from utils import RPC, DATA_DIR
|
5 |
import requests
|
6 |
from tqdm import tqdm
|
7 |
from web3 import Web3
|
@@ -194,15 +194,14 @@ def label_trades_by_staking(trades_df: pd.DataFrame, start: int = None) -> None:
|
|
194 |
staking_label
|
195 |
)
|
196 |
# tqdm.write(f"statking label {staking_label}")
|
197 |
-
return
|
198 |
|
199 |
|
200 |
if __name__ == "__main__":
|
201 |
# create_service_map()
|
202 |
-
trades_df = pd.read_parquet(
|
203 |
-
|
204 |
-
|
205 |
-
label_trades_by_staking(trades_df=trades_df, start=8)
|
206 |
-
print("after labeling")
|
207 |
print(trades_df.staking.value_counts())
|
208 |
-
trades_df.to_parquet(
|
|
|
1 |
import json
|
2 |
import sys
|
3 |
from typing import Any, List
|
4 |
+
from utils import RPC, DATA_DIR, TMP_DIR
|
5 |
import requests
|
6 |
from tqdm import tqdm
|
7 |
from web3 import Web3
|
|
|
194 |
staking_label
|
195 |
)
|
196 |
# tqdm.write(f"statking label {staking_label}")
|
197 |
+
return trades_df
|
198 |
|
199 |
|
200 |
if __name__ == "__main__":
|
201 |
# create_service_map()
|
202 |
+
trades_df = pd.read_parquet(TMP_DIR / "all_trades_df.parquet")
|
203 |
+
trades_df = trades_df.loc[trades_df["is_invalid"] == False]
|
204 |
+
|
205 |
+
trades_df = label_trades_by_staking(trades_df=trades_df, start=8)
|
|
|
206 |
print(trades_df.staking.value_counts())
|
207 |
+
trades_df.to_parquet(TMP_DIR / "result_staking.parquet", index=False)
|
scripts/tools.py
CHANGED
@@ -17,10 +17,7 @@
|
|
17 |
#
|
18 |
# ------------------------------------------------------------------------------
|
19 |
|
20 |
-
import os.path
|
21 |
import json
|
22 |
-
import time
|
23 |
-
import random
|
24 |
from typing import (
|
25 |
Optional,
|
26 |
List,
|
@@ -30,43 +27,21 @@ from typing import (
|
|
30 |
)
|
31 |
import pandas as pd
|
32 |
import requests
|
33 |
-
from
|
34 |
-
from eth_utils import to_checksum_address
|
35 |
from requests.adapters import HTTPAdapter
|
36 |
-
from requests.exceptions import (
|
37 |
-
ReadTimeout as RequestsReadTimeoutError,
|
38 |
-
HTTPError as RequestsHTTPError,
|
39 |
-
)
|
40 |
from tqdm import tqdm
|
41 |
from urllib3 import Retry
|
42 |
-
from urllib3.exceptions import (
|
43 |
-
ReadTimeoutError as Urllib3ReadTimeoutError,
|
44 |
-
HTTPError as Urllib3HTTPError,
|
45 |
-
)
|
46 |
-
from web3 import Web3, HTTPProvider
|
47 |
from markets import add_market_creator
|
48 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
49 |
from web3_utils import (
|
50 |
-
read_abi,
|
51 |
-
SLEEP,
|
52 |
-
reduce_window,
|
53 |
-
LATEST_BLOCK,
|
54 |
-
LATEST_BLOCK_NAME,
|
55 |
-
BLOCK_DATA_NUMBER,
|
56 |
-
BLOCKS_CHUNK_SIZE,
|
57 |
-
N_RPC_RETRIES,
|
58 |
N_IPFS_RETRIES,
|
59 |
-
RPC_POLL_INTERVAL,
|
60 |
-
IPFS_POLL_INTERVAL,
|
61 |
)
|
62 |
from utils import (
|
63 |
clean,
|
64 |
BLOCK_FIELD,
|
65 |
-
gen_event_filename,
|
66 |
limit_text,
|
67 |
DATA_DIR,
|
68 |
JSON_DATA_DIR,
|
69 |
-
REQUEST_ID_FIELD,
|
70 |
MechEvent,
|
71 |
MechEventName,
|
72 |
MechRequest,
|
@@ -75,7 +50,6 @@ from utils import (
|
|
75 |
REQUEST_ID,
|
76 |
HTTP,
|
77 |
HTTPS,
|
78 |
-
REQUEST_SENDER,
|
79 |
get_result_values,
|
80 |
get_vote,
|
81 |
get_win_probability,
|
@@ -97,7 +71,6 @@ IPFS_LINKS_SERIES_NAME = "ipfs_links"
|
|
97 |
BACKOFF_FACTOR = 1
|
98 |
STATUS_FORCELIST = [404, 500, 502, 503, 504]
|
99 |
DEFAULT_FILENAME = "tools.parquet"
|
100 |
-
RE_RPC_FILTER_ERROR = r"Filter with id: '\d+' does not exist."
|
101 |
ABI_ERROR = "The event signature did not match the provided ABI"
|
102 |
HTTP_TIMEOUT = 10
|
103 |
|
@@ -121,127 +94,6 @@ NUM_WORKERS = 10
|
|
121 |
GET_CONTENTS_BATCH_SIZE = 1000
|
122 |
|
123 |
|
124 |
-
def get_events(
|
125 |
-
w3: Web3,
|
126 |
-
event: str,
|
127 |
-
mech_address: ChecksumAddress,
|
128 |
-
mech_abi_path: str,
|
129 |
-
earliest_block: int,
|
130 |
-
latest_block: int,
|
131 |
-
) -> List:
|
132 |
-
"""Get the delivered events."""
|
133 |
-
abi = read_abi(mech_abi_path)
|
134 |
-
contract_instance = w3.eth.contract(address=mech_address, abi=abi)
|
135 |
-
|
136 |
-
events = []
|
137 |
-
from_block = earliest_block
|
138 |
-
batch_size = BLOCKS_CHUNK_SIZE
|
139 |
-
with tqdm(
|
140 |
-
total=latest_block - from_block,
|
141 |
-
desc=f"Searching {event} events for mech {mech_address}",
|
142 |
-
unit="blocks",
|
143 |
-
) as pbar:
|
144 |
-
while from_block < latest_block:
|
145 |
-
events_filter = contract_instance.events[event].build_filter()
|
146 |
-
events_filter.fromBlock = from_block
|
147 |
-
events_filter.toBlock = min(from_block + batch_size, latest_block)
|
148 |
-
|
149 |
-
entries = None
|
150 |
-
retries = 0
|
151 |
-
while entries is None:
|
152 |
-
try:
|
153 |
-
entries = events_filter.deploy(w3).get_all_entries()
|
154 |
-
retries = 0
|
155 |
-
except (RequestsHTTPError, Urllib3HTTPError) as exc:
|
156 |
-
if "Request Entity Too Large" in exc.args[0]:
|
157 |
-
events_filter, batch_size = reduce_window(
|
158 |
-
contract_instance,
|
159 |
-
event,
|
160 |
-
from_block,
|
161 |
-
batch_size,
|
162 |
-
latest_block,
|
163 |
-
)
|
164 |
-
except (Urllib3ReadTimeoutError, RequestsReadTimeoutError):
|
165 |
-
events_filter, batch_size = reduce_window(
|
166 |
-
contract_instance, event, from_block, batch_size, latest_block
|
167 |
-
)
|
168 |
-
except Exception as exc:
|
169 |
-
retries += 1
|
170 |
-
if retries == N_RPC_RETRIES:
|
171 |
-
tqdm.write(
|
172 |
-
f"Skipping events for blocks {events_filter.fromBlock} - {events_filter.toBlock} "
|
173 |
-
f"as the retries have been exceeded."
|
174 |
-
)
|
175 |
-
break
|
176 |
-
sleep = SLEEP * retries
|
177 |
-
# error_message = ""
|
178 |
-
# if isinstance(exc.args[0], str):
|
179 |
-
# error_message = exc.args[0]
|
180 |
-
# elif isinstance(exc, ValueError):
|
181 |
-
# error_message = exc.args[0].get("message", "")
|
182 |
-
# if (
|
183 |
-
# (
|
184 |
-
# isinstance(exc, ValueError)
|
185 |
-
# and re.match(RE_RPC_FILTER_ERROR, error_message) is None
|
186 |
-
# )
|
187 |
-
# and not isinstance(exc, ValueError)
|
188 |
-
# and not isinstance(exc, MismatchedABI)
|
189 |
-
# ):
|
190 |
-
|
191 |
-
tqdm.write(
|
192 |
-
f"An error was raised from the RPC: {exc}\n Retrying in {sleep} seconds."
|
193 |
-
)
|
194 |
-
if hasattr(exc, "message"):
|
195 |
-
tqdm.write(f"Error message: {exc.message}\n")
|
196 |
-
time.sleep(sleep)
|
197 |
-
|
198 |
-
from_block += batch_size
|
199 |
-
pbar.update(batch_size)
|
200 |
-
|
201 |
-
if entries is None:
|
202 |
-
continue
|
203 |
-
|
204 |
-
chunk = list(entries)
|
205 |
-
events.extend(chunk)
|
206 |
-
time.sleep(RPC_POLL_INTERVAL)
|
207 |
-
|
208 |
-
return events
|
209 |
-
|
210 |
-
|
211 |
-
def parse_events(raw_events: List) -> List[MechEvent]:
|
212 |
-
# TODO use dictionary instead of List
|
213 |
-
"""Parse all the specified MechEvents."""
|
214 |
-
parsed_events = []
|
215 |
-
for event in raw_events:
|
216 |
-
for_block = event.get("blockNumber", 0)
|
217 |
-
args = event.get(EVENT_ARGUMENTS, {})
|
218 |
-
request_id = args.get(REQUEST_ID, 0)
|
219 |
-
data = args.get(DATA, b"")
|
220 |
-
sender = args.get(REQUEST_SENDER, "")
|
221 |
-
parsed_event = MechEvent(for_block, request_id, data, sender)
|
222 |
-
parsed_events.append(parsed_event)
|
223 |
-
|
224 |
-
return parsed_events
|
225 |
-
|
226 |
-
|
227 |
-
def parse_dict_events(events_dict: dict) -> List[MechEvent]:
|
228 |
-
# TODO use dictionary instead of List
|
229 |
-
"""Parse all the specified MechEvents."""
|
230 |
-
parsed_events = []
|
231 |
-
list_ids = list(events_dict.keys())
|
232 |
-
for mech_id in list_ids:
|
233 |
-
event = events_dict[mech_id]
|
234 |
-
for_block = event.get("blockNumber", 0)
|
235 |
-
args = event.get(EVENT_ARGUMENTS, {})
|
236 |
-
request_id = args.get(REQUEST_ID, 0)
|
237 |
-
data = args.get(DATA, b"")
|
238 |
-
sender = args.get(REQUEST_SENDER, "")
|
239 |
-
parsed_event = MechEvent(for_block, request_id, data, sender)
|
240 |
-
parsed_events.append(parsed_event)
|
241 |
-
|
242 |
-
return parsed_events
|
243 |
-
|
244 |
-
|
245 |
def create_session() -> requests.Session:
|
246 |
"""Create a session with a retry strategy."""
|
247 |
session = requests.Session()
|
@@ -322,31 +174,6 @@ def parse_ipfs_tools_content(
|
|
322 |
return mech_response
|
323 |
|
324 |
|
325 |
-
def get_contents(
|
326 |
-
session: requests.Session, events: List[MechEvent], event_name: MechEventName
|
327 |
-
) -> pd.DataFrame:
|
328 |
-
"""Fetch the tools' responses."""
|
329 |
-
contents = []
|
330 |
-
for event in tqdm(events, desc=f"Tools' results", unit="results"):
|
331 |
-
url = event.ipfs_link(event_name)
|
332 |
-
response = request(session, url)
|
333 |
-
if response is None:
|
334 |
-
tqdm.write(f"Skipping {event=}.")
|
335 |
-
continue
|
336 |
-
|
337 |
-
raw_content = parse_ipfs_response(session, url, event, event_name, response)
|
338 |
-
if raw_content is None:
|
339 |
-
continue
|
340 |
-
|
341 |
-
mech_response = parse_ipfs_tools_content(raw_content, event, event_name)
|
342 |
-
if mech_response is None:
|
343 |
-
continue
|
344 |
-
contents.append(mech_response)
|
345 |
-
time.sleep(IPFS_POLL_INTERVAL)
|
346 |
-
|
347 |
-
return pd.DataFrame(contents)
|
348 |
-
|
349 |
-
|
350 |
def parse_json_events(json_events: dict, keys_to_traverse: List[int]) -> pd.DataFrame:
|
351 |
"""Function to parse the mech info in a json format"""
|
352 |
all_records = []
|
@@ -356,6 +183,10 @@ def parse_json_events(json_events: dict, keys_to_traverse: List[int]) -> pd.Data
|
|
356 |
output = {}
|
357 |
output["request_id"] = json_input["requestId"]
|
358 |
output["request_block"] = json_input["blockNumber"]
|
|
|
|
|
|
|
|
|
359 |
output["prompt_request"] = json_input["ipfsContents"]["prompt"]
|
360 |
output["tool"] = json_input["ipfsContents"]["tool"]
|
361 |
output["nonce"] = json_input["ipfsContents"]["nonce"]
|
@@ -424,144 +255,6 @@ def transform_deliver(contents: pd.DataFrame) -> pd.DataFrame:
|
|
424 |
return clean(contents)
|
425 |
|
426 |
|
427 |
-
def store_progress(
|
428 |
-
filename: str,
|
429 |
-
event_to_contents: Dict[str, pd.DataFrame],
|
430 |
-
tools: pd.DataFrame,
|
431 |
-
) -> None:
|
432 |
-
"""Store the given progress."""
|
433 |
-
print("storing given progress")
|
434 |
-
if filename:
|
435 |
-
DATA_DIR.mkdir(parents=True, exist_ok=True) # Ensure the directory exists
|
436 |
-
for event_name, content in event_to_contents.items():
|
437 |
-
event_filename = gen_event_filename(
|
438 |
-
event_name
|
439 |
-
) # Ensure this function returns a valid filename string
|
440 |
-
try:
|
441 |
-
if "result" in content.columns:
|
442 |
-
content = content.drop(
|
443 |
-
columns=["result"]
|
444 |
-
) # Avoid in-place modification
|
445 |
-
content.to_parquet(DATA_DIR / event_filename, index=False)
|
446 |
-
except Exception as e:
|
447 |
-
print(f"Failed to write {event_name} data: {e}")
|
448 |
-
# Drop result columns for tools DataFrame
|
449 |
-
try:
|
450 |
-
if "result" in tools.columns:
|
451 |
-
tools = tools.drop(columns=["result"])
|
452 |
-
tools.to_parquet(DATA_DIR / filename, index=False)
|
453 |
-
except Exception as e:
|
454 |
-
print(f"Failed to write tools data: {e}")
|
455 |
-
|
456 |
-
|
457 |
-
def etl(
|
458 |
-
rpcs: List[str],
|
459 |
-
mech_info: dict[str, Any],
|
460 |
-
filename: Optional[str] = None,
|
461 |
-
) -> pd.DataFrame:
|
462 |
-
"""Fetch from on-chain events, process, store and return the tools' results on
|
463 |
-
all the questions as a Dataframe."""
|
464 |
-
w3s = [Web3(HTTPProvider(r)) for r in rpcs]
|
465 |
-
session = create_session()
|
466 |
-
event_to_transformer = {
|
467 |
-
MechEventName.REQUEST: transform_request,
|
468 |
-
MechEventName.DELIVER: transform_deliver,
|
469 |
-
}
|
470 |
-
|
471 |
-
mech_to_info = {
|
472 |
-
to_checksum_address(address): (
|
473 |
-
os.path.join(CONTRACTS_PATH, filename),
|
474 |
-
earliest_block,
|
475 |
-
)
|
476 |
-
for address, (filename, earliest_block) in mech_info.items()
|
477 |
-
}
|
478 |
-
|
479 |
-
event_to_contents = {}
|
480 |
-
|
481 |
-
latest_block = LATEST_BLOCK
|
482 |
-
if latest_block is None:
|
483 |
-
latest_block = w3s[0].eth.get_block(LATEST_BLOCK_NAME)[BLOCK_DATA_NUMBER]
|
484 |
-
|
485 |
-
next_start_block = None
|
486 |
-
|
487 |
-
# Loop through events in event_to_transformer
|
488 |
-
for event_name, transformer in event_to_transformer.items():
|
489 |
-
# if next_start_block is None:
|
490 |
-
# next_start_block_base = get_earliest_block(event_name)
|
491 |
-
|
492 |
-
# Loop through mech addresses in mech_to_info
|
493 |
-
events = []
|
494 |
-
for address, (abi, earliest_block) in mech_to_info.items():
|
495 |
-
next_start_block = earliest_block
|
496 |
-
print(
|
497 |
-
f"Searching for {event_name.value} events for mech {address} from block {next_start_block} to {latest_block}."
|
498 |
-
)
|
499 |
-
|
500 |
-
# parallelize the fetching of events
|
501 |
-
with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
|
502 |
-
futures = []
|
503 |
-
for i in range(
|
504 |
-
next_start_block, latest_block, BLOCKS_CHUNK_SIZE * SNAPSHOT_RATE
|
505 |
-
):
|
506 |
-
futures.append(
|
507 |
-
executor.submit(
|
508 |
-
get_events,
|
509 |
-
random.choice(w3s),
|
510 |
-
event_name.value,
|
511 |
-
address,
|
512 |
-
abi,
|
513 |
-
i,
|
514 |
-
min(i + BLOCKS_CHUNK_SIZE * SNAPSHOT_RATE, latest_block),
|
515 |
-
)
|
516 |
-
)
|
517 |
-
|
518 |
-
for future in tqdm(
|
519 |
-
as_completed(futures),
|
520 |
-
total=len(futures),
|
521 |
-
desc=f"Fetching {event_name.value} Events",
|
522 |
-
):
|
523 |
-
current_mech_events = future.result()
|
524 |
-
events.extend(current_mech_events)
|
525 |
-
|
526 |
-
print("Parsing events")
|
527 |
-
parsed = parse_events(events)
|
528 |
-
|
529 |
-
contents = []
|
530 |
-
with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
|
531 |
-
futures = []
|
532 |
-
for i in range(0, len(parsed), GET_CONTENTS_BATCH_SIZE):
|
533 |
-
futures.append(
|
534 |
-
executor.submit(
|
535 |
-
get_contents,
|
536 |
-
session,
|
537 |
-
parsed[i : i + GET_CONTENTS_BATCH_SIZE],
|
538 |
-
event_name,
|
539 |
-
)
|
540 |
-
)
|
541 |
-
|
542 |
-
for future in tqdm(
|
543 |
-
as_completed(futures),
|
544 |
-
total=len(futures),
|
545 |
-
desc=f"Fetching {event_name.value} Contents",
|
546 |
-
):
|
547 |
-
current_mech_contents = future.result()
|
548 |
-
contents.append(current_mech_contents)
|
549 |
-
|
550 |
-
contents = pd.concat(contents, ignore_index=True)
|
551 |
-
|
552 |
-
transformed = transformer(contents)
|
553 |
-
|
554 |
-
event_to_contents[event_name] = transformed.copy()
|
555 |
-
|
556 |
-
# Store progress
|
557 |
-
tools = pd.merge(*event_to_contents.values(), on=REQUEST_ID_FIELD)
|
558 |
-
print(tools.info())
|
559 |
-
|
560 |
-
store_progress(filename, event_to_contents, tools)
|
561 |
-
|
562 |
-
return tools
|
563 |
-
|
564 |
-
|
565 |
def parse_store_json_events_parallel(json_events: Dict[str, Any], output_filename: str):
|
566 |
total_nr_events = len(json_events)
|
567 |
ids_to_traverse = list(json_events.keys())
|
@@ -614,9 +307,5 @@ def generate_tools_file(input_filename: str, output_filename: str):
|
|
614 |
|
615 |
|
616 |
if __name__ == "__main__":
|
617 |
-
RPCs = [
|
618 |
-
"https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a",
|
619 |
-
]
|
620 |
-
filename = DEFAULT_FILENAME
|
621 |
|
622 |
-
|
|
|
17 |
#
|
18 |
# ------------------------------------------------------------------------------
|
19 |
|
|
|
20 |
import json
|
|
|
|
|
21 |
from typing import (
|
22 |
Optional,
|
23 |
List,
|
|
|
27 |
)
|
28 |
import pandas as pd
|
29 |
import requests
|
30 |
+
from gnosis_timestamps import transform_timestamp_to_datetime
|
|
|
31 |
from requests.adapters import HTTPAdapter
|
|
|
|
|
|
|
|
|
32 |
from tqdm import tqdm
|
33 |
from urllib3 import Retry
|
|
|
|
|
|
|
|
|
|
|
34 |
from markets import add_market_creator
|
35 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
36 |
from web3_utils import (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
N_IPFS_RETRIES,
|
|
|
|
|
38 |
)
|
39 |
from utils import (
|
40 |
clean,
|
41 |
BLOCK_FIELD,
|
|
|
42 |
limit_text,
|
43 |
DATA_DIR,
|
44 |
JSON_DATA_DIR,
|
|
|
45 |
MechEvent,
|
46 |
MechEventName,
|
47 |
MechRequest,
|
|
|
50 |
REQUEST_ID,
|
51 |
HTTP,
|
52 |
HTTPS,
|
|
|
53 |
get_result_values,
|
54 |
get_vote,
|
55 |
get_win_probability,
|
|
|
71 |
BACKOFF_FACTOR = 1
|
72 |
STATUS_FORCELIST = [404, 500, 502, 503, 504]
|
73 |
DEFAULT_FILENAME = "tools.parquet"
|
|
|
74 |
ABI_ERROR = "The event signature did not match the provided ABI"
|
75 |
HTTP_TIMEOUT = 10
|
76 |
|
|
|
94 |
GET_CONTENTS_BATCH_SIZE = 1000
|
95 |
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
def create_session() -> requests.Session:
|
98 |
"""Create a session with a retry strategy."""
|
99 |
session = requests.Session()
|
|
|
174 |
return mech_response
|
175 |
|
176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
def parse_json_events(json_events: dict, keys_to_traverse: List[int]) -> pd.DataFrame:
|
178 |
"""Function to parse the mech info in a json format"""
|
179 |
all_records = []
|
|
|
183 |
output = {}
|
184 |
output["request_id"] = json_input["requestId"]
|
185 |
output["request_block"] = json_input["blockNumber"]
|
186 |
+
output["request_time"] = transform_timestamp_to_datetime(
|
187 |
+
int(json_input["blockTimestamp"])
|
188 |
+
)
|
189 |
+
output["tx_hash"] = json_input["transactionHash"]
|
190 |
output["prompt_request"] = json_input["ipfsContents"]["prompt"]
|
191 |
output["tool"] = json_input["ipfsContents"]["tool"]
|
192 |
output["nonce"] = json_input["ipfsContents"]["nonce"]
|
|
|
255 |
return clean(contents)
|
256 |
|
257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
def parse_store_json_events_parallel(json_events: Dict[str, Any], output_filename: str):
|
259 |
total_nr_events = len(json_events)
|
260 |
ids_to_traverse = list(json_events.keys())
|
|
|
307 |
|
308 |
|
309 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
310 |
|
311 |
+
generate_tools_file()
|
scripts/tools_metrics.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from typing import List
|
3 |
+
from utils import TMP_DIR, INC_TOOLS, DATA_DIR
|
4 |
+
|
5 |
+
|
6 |
+
def get_error_data_by_market(
|
7 |
+
tools_df: pd.DataFrame, inc_tools: List[str]
|
8 |
+
) -> pd.DataFrame:
|
9 |
+
"""Gets the error data for the given tools and calculates the error percentage."""
|
10 |
+
tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
|
11 |
+
error = (
|
12 |
+
tools_inc.groupby(
|
13 |
+
["tool", "request_month_year_week", "market_creator", "error"], sort=False
|
14 |
+
)
|
15 |
+
.size()
|
16 |
+
.unstack()
|
17 |
+
.fillna(0)
|
18 |
+
.reset_index()
|
19 |
+
)
|
20 |
+
error["error_perc"] = (error[1] / (error[0] + error[1])) * 100
|
21 |
+
error["total_requests"] = error[0] + error[1]
|
22 |
+
return error
|
23 |
+
|
24 |
+
|
25 |
+
def get_tool_winning_rate_by_market(
|
26 |
+
tools_df: pd.DataFrame, inc_tools: List[str]
|
27 |
+
) -> pd.DataFrame:
|
28 |
+
"""Gets the tool winning rate data for the given tools by market and calculates the winning percentage."""
|
29 |
+
tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
|
30 |
+
tools_non_error = tools_inc[tools_inc["error"] != 1]
|
31 |
+
tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
|
32 |
+
{"no": "No", "yes": "Yes"}
|
33 |
+
)
|
34 |
+
tools_non_error = tools_non_error[
|
35 |
+
tools_non_error["currentAnswer"].isin(["Yes", "No"])
|
36 |
+
]
|
37 |
+
tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
|
38 |
+
tools_non_error["win"] = (
|
39 |
+
tools_non_error["currentAnswer"] == tools_non_error["vote"]
|
40 |
+
).astype(int)
|
41 |
+
tools_non_error.columns = tools_non_error.columns.astype(str)
|
42 |
+
wins = (
|
43 |
+
tools_non_error.groupby(
|
44 |
+
["tool", "request_month_year_week", "market_creator", "win"], sort=False
|
45 |
+
)
|
46 |
+
.size()
|
47 |
+
.unstack()
|
48 |
+
.fillna(0)
|
49 |
+
)
|
50 |
+
wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100
|
51 |
+
wins.reset_index(inplace=True)
|
52 |
+
wins["total_request"] = wins[0] + wins[1]
|
53 |
+
wins.columns = wins.columns.astype(str)
|
54 |
+
# Convert request_month_year_week to string and explicitly set type for Altair
|
55 |
+
# wins["request_month_year_week"] = wins["request_month_year_week"].astype(str)
|
56 |
+
return wins
|
57 |
+
|
58 |
+
|
59 |
+
def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
|
60 |
+
tools["request_time"] = pd.to_datetime(tools["request_time"])
|
61 |
+
tools = tools.sort_values(by="request_time", ascending=True)
|
62 |
+
|
63 |
+
tools["request_month_year_week"] = (
|
64 |
+
pd.to_datetime(tools["request_time"]).dt.to_period("W").dt.strftime("%b-%d")
|
65 |
+
)
|
66 |
+
# preparing the tools graph
|
67 |
+
# adding the total
|
68 |
+
tools_all = tools.copy(deep=True)
|
69 |
+
tools_all["market_creator"] = "all"
|
70 |
+
# merging both dataframes
|
71 |
+
tools = pd.concat([tools, tools_all], ignore_index=True)
|
72 |
+
tools = tools.sort_values(by="request_time", ascending=True)
|
73 |
+
return tools
|
74 |
+
|
75 |
+
|
76 |
+
def compute_tools_based_datasets():
|
77 |
+
try:
|
78 |
+
tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
|
79 |
+
tools_df = prepare_tools(tools_df)
|
80 |
+
except Exception as e:
|
81 |
+
print(f"Error reading old tools parquet file {e}")
|
82 |
+
return None
|
83 |
+
# error by markets
|
84 |
+
error_by_markets = get_error_data_by_market(tools_df=tools_df, inc_tools=INC_TOOLS)
|
85 |
+
error_by_markets.to_parquet(DATA_DIR / "error_by_markets.parquet", index=False)
|
86 |
+
try:
|
87 |
+
tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
|
88 |
+
tools_df = prepare_tools(tools_df)
|
89 |
+
except Exception as e:
|
90 |
+
print(f"Error reading old tools parquet file {e}")
|
91 |
+
return None
|
92 |
+
winning_df = get_tool_winning_rate_by_market(tools_df, inc_tools=INC_TOOLS)
|
93 |
+
winning_df.to_parquet(DATA_DIR / "winning_df.parquet", index=False)
|
scripts/update_tools_accuracy.py
CHANGED
@@ -1,15 +1,12 @@
|
|
1 |
import os
|
2 |
import pandas as pd
|
3 |
import ipfshttpclient
|
4 |
-
from pathlib import Path
|
5 |
from utils import INC_TOOLS
|
6 |
from typing import List
|
|
|
7 |
|
8 |
ACCURACY_FILENAME = "tools_accuracy.csv"
|
9 |
IPFS_SERVER = "/dns/registry.autonolas.tech/tcp/443/https"
|
10 |
-
SCRIPTS_DIR = Path(__file__).parent
|
11 |
-
ROOT_DIR = SCRIPTS_DIR.parent
|
12 |
-
DATA_DIR = ROOT_DIR / "data"
|
13 |
|
14 |
|
15 |
def update_tools_accuracy(
|
@@ -65,6 +62,7 @@ def update_tools_accuracy(
|
|
65 |
print("tools to update")
|
66 |
print(tools_to_update)
|
67 |
existing_tools = list(tools_acc["tool"].values)
|
|
|
68 |
acc_info["min"] = acc_info["min"].dt.strftime("%Y-%m-%d %H:%M:%S")
|
69 |
acc_info["max"] = acc_info["max"].dt.strftime("%Y-%m-%d %H:%M:%S")
|
70 |
for tool in tools_to_update:
|
@@ -101,7 +99,7 @@ def update_tools_accuracy(
|
|
101 |
def compute_tools_accuracy():
|
102 |
print("Computing accuracy of tools")
|
103 |
print("Reading tools parquet file")
|
104 |
-
tools = pd.read_parquet(
|
105 |
print(tools.head())
|
106 |
# Computing tools accuracy information
|
107 |
print("Computing tool accuracy information")
|
|
|
1 |
import os
|
2 |
import pandas as pd
|
3 |
import ipfshttpclient
|
|
|
4 |
from utils import INC_TOOLS
|
5 |
from typing import List
|
6 |
+
from utils import TMP_DIR, DATA_DIR
|
7 |
|
8 |
ACCURACY_FILENAME = "tools_accuracy.csv"
|
9 |
IPFS_SERVER = "/dns/registry.autonolas.tech/tcp/443/https"
|
|
|
|
|
|
|
10 |
|
11 |
|
12 |
def update_tools_accuracy(
|
|
|
62 |
print("tools to update")
|
63 |
print(tools_to_update)
|
64 |
existing_tools = list(tools_acc["tool"].values)
|
65 |
+
# dt.strftime("%Y-%m-%d %H:%M:%S")
|
66 |
acc_info["min"] = acc_info["min"].dt.strftime("%Y-%m-%d %H:%M:%S")
|
67 |
acc_info["max"] = acc_info["max"].dt.strftime("%Y-%m-%d %H:%M:%S")
|
68 |
for tool in tools_to_update:
|
|
|
99 |
def compute_tools_accuracy():
|
100 |
print("Computing accuracy of tools")
|
101 |
print("Reading tools parquet file")
|
102 |
+
tools = pd.read_parquet(TMP_DIR / "tools.parquet")
|
103 |
print(tools.head())
|
104 |
# Computing tools accuracy information
|
105 |
print("Computing tool accuracy information")
|
scripts/web3_utils.py
CHANGED
@@ -12,7 +12,15 @@ from tqdm import tqdm
|
|
12 |
from web3 import Web3
|
13 |
from typing import Any, Optional
|
14 |
from web3.types import BlockParams
|
15 |
-
from utils import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
from queries import conditional_tokens_gc_user_query, omen_xdai_trades_query
|
17 |
import pandas as pd
|
18 |
|
@@ -96,11 +104,11 @@ def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> li
|
|
96 |
def updating_timestamps(rpc: str, tools_filename: str):
|
97 |
web3 = Web3(Web3.HTTPProvider(rpc))
|
98 |
|
99 |
-
tools = pd.read_parquet(
|
100 |
|
101 |
# Convert block number to timestamp
|
102 |
print("Converting block number to timestamp")
|
103 |
-
t_map = pickle.load(open(
|
104 |
tools["request_time"] = tools["request_block"].map(t_map)
|
105 |
|
106 |
no_data = tools["request_time"].isna().sum()
|
@@ -129,7 +137,7 @@ def updating_timestamps(rpc: str, tools_filename: str):
|
|
129 |
|
130 |
# Save the tools data after the updates on the content
|
131 |
print(f"Updating file {tools_filename} with timestamps")
|
132 |
-
tools.to_parquet(
|
133 |
|
134 |
# Update t_map with new timestamps
|
135 |
new_timestamps = (
|
|
|
12 |
from web3 import Web3
|
13 |
from typing import Any, Optional
|
14 |
from web3.types import BlockParams
|
15 |
+
from utils import (
|
16 |
+
JSON_DATA_DIR,
|
17 |
+
DATA_DIR,
|
18 |
+
SUBGRAPH_API_KEY,
|
19 |
+
to_content,
|
20 |
+
SUBGRAPH_URL,
|
21 |
+
HIST_DIR,
|
22 |
+
TMP_DIR,
|
23 |
+
)
|
24 |
from queries import conditional_tokens_gc_user_query, omen_xdai_trades_query
|
25 |
import pandas as pd
|
26 |
|
|
|
104 |
def updating_timestamps(rpc: str, tools_filename: str):
|
105 |
web3 = Web3(Web3.HTTPProvider(rpc))
|
106 |
|
107 |
+
tools = pd.read_parquet(TMP_DIR / tools_filename)
|
108 |
|
109 |
# Convert block number to timestamp
|
110 |
print("Converting block number to timestamp")
|
111 |
+
t_map = pickle.load(open(TMP_DIR / "t_map.pkl", "rb"))
|
112 |
tools["request_time"] = tools["request_block"].map(t_map)
|
113 |
|
114 |
no_data = tools["request_time"].isna().sum()
|
|
|
137 |
|
138 |
# Save the tools data after the updates on the content
|
139 |
print(f"Updating file {tools_filename} with timestamps")
|
140 |
+
tools.to_parquet(TMP_DIR / tools_filename, index=False)
|
141 |
|
142 |
# Update t_map with new timestamps
|
143 |
new_timestamps = (
|
tabs/error.py
CHANGED
@@ -9,33 +9,14 @@ HEIGHT = 600
|
|
9 |
WIDTH = 1000
|
10 |
|
11 |
|
12 |
-
def get_error_data_by_market(
|
13 |
-
tools_df: pd.DataFrame, inc_tools: List[str]
|
14 |
-
) -> pd.DataFrame:
|
15 |
-
"""Gets the error data for the given tools and calculates the error percentage."""
|
16 |
-
tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
|
17 |
-
error = (
|
18 |
-
tools_inc.groupby(
|
19 |
-
["tool", "request_month_year_week", "market_creator", "error"], sort=False
|
20 |
-
)
|
21 |
-
.size()
|
22 |
-
.unstack()
|
23 |
-
.fillna(0)
|
24 |
-
.reset_index()
|
25 |
-
)
|
26 |
-
error["error_perc"] = (error[1] / (error[0] + error[1])) * 100
|
27 |
-
error["total_requests"] = error[0] + error[1]
|
28 |
-
return error
|
29 |
-
|
30 |
-
|
31 |
def get_error_data_overall_by_market(error_df: pd.DataFrame) -> pd.DataFrame:
|
32 |
"""Gets the error data for the given tools and calculates the error percentage."""
|
33 |
error_total = (
|
34 |
error_df.groupby(["request_month_year_week", "market_creator"], sort=False)
|
35 |
-
.agg({"total_requests": "sum", 1: "sum", 0: "sum"})
|
36 |
.reset_index()
|
37 |
)
|
38 |
-
error_total["error_perc"] = (error_total[1] / error_total["total_requests"]) * 100
|
39 |
error_total.columns = error_total.columns.astype(str)
|
40 |
error_total["error_perc"] = error_total["error_perc"].apply(lambda x: round(x, 4))
|
41 |
return error_total
|
|
|
9 |
WIDTH = 1000
|
10 |
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def get_error_data_overall_by_market(error_df: pd.DataFrame) -> pd.DataFrame:
|
13 |
"""Gets the error data for the given tools and calculates the error percentage."""
|
14 |
error_total = (
|
15 |
error_df.groupby(["request_month_year_week", "market_creator"], sort=False)
|
16 |
+
.agg({"total_requests": "sum", "1": "sum", "0": "sum"})
|
17 |
.reset_index()
|
18 |
)
|
19 |
+
error_total["error_perc"] = (error_total["1"] / error_total["total_requests"]) * 100
|
20 |
error_total.columns = error_total.columns.astype(str)
|
21 |
error_total["error_perc"] = error_total["error_perc"].apply(lambda x: round(x, 4))
|
22 |
return error_total
|
tabs/metrics.py
CHANGED
@@ -28,40 +28,6 @@ HEIGHT = 600
|
|
28 |
WIDTH = 1000
|
29 |
|
30 |
|
31 |
-
def get_metrics(
|
32 |
-
metric_name: str, column_name: str, market_creator: str, trades_df: pd.DataFrame
|
33 |
-
) -> pd.DataFrame:
|
34 |
-
# this is to filter out the data before 2023-09-01
|
35 |
-
trades_filtered = trades_df[trades_df["creation_timestamp"] > "2023-09-01"]
|
36 |
-
if market_creator != "all":
|
37 |
-
trades_filtered = trades_filtered.loc[
|
38 |
-
trades_filtered["market_creator"] == market_creator
|
39 |
-
]
|
40 |
-
|
41 |
-
trades_filtered = (
|
42 |
-
trades_filtered.groupby("month_year_week", sort=False)[column_name]
|
43 |
-
.quantile([0.25, 0.5, 0.75])
|
44 |
-
.unstack()
|
45 |
-
)
|
46 |
-
# reformat the data as percentile, date, value
|
47 |
-
trades_filtered = trades_filtered.melt(
|
48 |
-
id_vars=["month_year_week"], var_name="percentile", value_name=metric_name
|
49 |
-
)
|
50 |
-
trades_filtered.columns = trades_filtered.columns.astype(str)
|
51 |
-
trades_filtered.reset_index(inplace=True)
|
52 |
-
trades_filtered.columns = [
|
53 |
-
"month_year_week",
|
54 |
-
"25th_percentile",
|
55 |
-
"50th_percentile",
|
56 |
-
"75th_percentile",
|
57 |
-
]
|
58 |
-
# reformat the data as percentile, date, value
|
59 |
-
trades_filtered = trades_filtered.melt(
|
60 |
-
id_vars=["month_year_week"], var_name="percentile", value_name=metric_name
|
61 |
-
)
|
62 |
-
return trades_filtered
|
63 |
-
|
64 |
-
|
65 |
def get_boxplot_metrics(column_name: str, trades_df: pd.DataFrame) -> pd.DataFrame:
|
66 |
trades_filtered = trades_df[
|
67 |
["creation_timestamp", "month_year_week", "market_creator", column_name]
|
@@ -81,45 +47,6 @@ def get_boxplot_metrics(column_name: str, trades_df: pd.DataFrame) -> pd.DataFra
|
|
81 |
return all_filtered_trades
|
82 |
|
83 |
|
84 |
-
def plot2_trade_details(
|
85 |
-
metric_name: str, market_creator: str, trades_df: pd.DataFrame
|
86 |
-
) -> gr.Plot:
|
87 |
-
"""Plots the trade details for the given trade detail."""
|
88 |
-
|
89 |
-
if metric_name == "mech calls":
|
90 |
-
metric_name = "mech_calls"
|
91 |
-
column_name = "num_mech_calls"
|
92 |
-
yaxis_title = "Nr of mech calls per trade"
|
93 |
-
elif metric_name == "ROI":
|
94 |
-
column_name = "roi"
|
95 |
-
yaxis_title = "ROI (net profit/cost)"
|
96 |
-
elif metric_name == "collateral amount":
|
97 |
-
metric_name = "collateral_amount"
|
98 |
-
column_name = metric_name
|
99 |
-
yaxis_title = "Collateral amount per trade (xDAI)"
|
100 |
-
elif metric_name == "net earnings":
|
101 |
-
metric_name = "net_earnings"
|
102 |
-
column_name = metric_name
|
103 |
-
yaxis_title = "Net profit per trade (xDAI)"
|
104 |
-
else: # earnings
|
105 |
-
column_name = metric_name
|
106 |
-
yaxis_title = "Gross profit per trade (xDAI)"
|
107 |
-
|
108 |
-
trades_filtered = get_metrics(metric_name, column_name, market_creator, trades_df)
|
109 |
-
fig = px.line(
|
110 |
-
trades_filtered, x="month_year_week", y=metric_name, color="percentile"
|
111 |
-
)
|
112 |
-
fig.update_layout(
|
113 |
-
xaxis_title="Week",
|
114 |
-
yaxis_title=yaxis_title,
|
115 |
-
legend=dict(yanchor="top", y=0.5),
|
116 |
-
)
|
117 |
-
fig.update_xaxes(tickformat="%b %d\n%Y")
|
118 |
-
return gr.Plot(
|
119 |
-
value=fig,
|
120 |
-
)
|
121 |
-
|
122 |
-
|
123 |
def plot_trade_metrics(
|
124 |
metric_name: str, trades_df: pd.DataFrame, trader_filter: str = None
|
125 |
) -> gr.Plot:
|
|
|
28 |
WIDTH = 1000
|
29 |
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
def get_boxplot_metrics(column_name: str, trades_df: pd.DataFrame) -> pd.DataFrame:
|
32 |
trades_filtered = trades_df[
|
33 |
["creation_timestamp", "month_year_week", "market_creator", column_name]
|
|
|
47 |
return all_filtered_trades
|
48 |
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
def plot_trade_metrics(
|
51 |
metric_name: str, trades_df: pd.DataFrame, trader_filter: str = None
|
52 |
) -> gr.Plot:
|
tabs/tool_win.py
CHANGED
@@ -26,40 +26,6 @@ def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
|
|
26 |
return tools
|
27 |
|
28 |
|
29 |
-
def get_tool_winning_rate_by_market(
|
30 |
-
tools_df: pd.DataFrame, inc_tools: List[str]
|
31 |
-
) -> pd.DataFrame:
|
32 |
-
"""Gets the tool winning rate data for the given tools by market and calculates the winning percentage."""
|
33 |
-
tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
|
34 |
-
tools_non_error = tools_inc[tools_inc["error"] != 1]
|
35 |
-
tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
|
36 |
-
{"no": "No", "yes": "Yes"}
|
37 |
-
)
|
38 |
-
tools_non_error = tools_non_error[
|
39 |
-
tools_non_error["currentAnswer"].isin(["Yes", "No"])
|
40 |
-
]
|
41 |
-
tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
|
42 |
-
tools_non_error["win"] = (
|
43 |
-
tools_non_error["currentAnswer"] == tools_non_error["vote"]
|
44 |
-
).astype(int)
|
45 |
-
tools_non_error.columns = tools_non_error.columns.astype(str)
|
46 |
-
wins = (
|
47 |
-
tools_non_error.groupby(
|
48 |
-
["tool", "request_month_year_week", "market_creator", "win"], sort=False
|
49 |
-
)
|
50 |
-
.size()
|
51 |
-
.unstack()
|
52 |
-
.fillna(0)
|
53 |
-
)
|
54 |
-
wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100
|
55 |
-
wins.reset_index(inplace=True)
|
56 |
-
wins["total_request"] = wins[0] + wins[1]
|
57 |
-
wins.columns = wins.columns.astype(str)
|
58 |
-
# Convert request_month_year_week to string and explicitly set type for Altair
|
59 |
-
# wins["request_month_year_week"] = wins["request_month_year_week"].astype(str)
|
60 |
-
return wins
|
61 |
-
|
62 |
-
|
63 |
def get_overall_winning_rate_by_market(wins_df: pd.DataFrame) -> pd.DataFrame:
|
64 |
"""Gets the overall winning rate data for the given tools and calculates the winning percentage."""
|
65 |
overall_wins = (
|
|
|
26 |
return tools
|
27 |
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
def get_overall_winning_rate_by_market(wins_df: pd.DataFrame) -> pd.DataFrame:
|
30 |
"""Gets the overall winning rate data for the given tools and calculates the winning percentage."""
|
31 |
overall_wins = (
|