rosacastillo
commited on
Commit
·
285f2a6
1
Parent(s):
b3b7123
updating week format starting on Monday, new staking contracts and new weekly data
Browse files- data/all_trades_profitability.parquet +2 -2
- data/daily_info.parquet +2 -2
- data/error_by_markets.parquet +2 -2
- data/invalid_trades.parquet +2 -2
- data/service_map.pkl +2 -2
- data/service_map_bak.pkl +3 -0
- data/tools_accuracy.csv +2 -2
- data/unknown_traders.parquet +2 -2
- data/winning_df.parquet +2 -2
- notebooks/markets_analysis.ipynb +711 -15
- notebooks/weekly_analysis.ipynb +2 -2
- scripts/cleaning_old_info.py +14 -6
- scripts/cloud_storage.py +14 -5
- scripts/daily_data.py +4 -0
- scripts/get_mech_info.py +1 -1
- scripts/gnosis_timestamps.py +3 -1
- scripts/markets.py +14 -5
- scripts/profitability.py +79 -5
- scripts/pull_data.py +28 -6
- scripts/staking.py +97 -7
- scripts/tools_metrics.py +3 -1
- scripts/web3_utils.py +5 -2
- tabs/tool_win.py +3 -1
- tabs/trades.py +3 -1
data/all_trades_profitability.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6256840b7a7704aa5618fd5a4fed41b9444bbf80ea1dcaae068715026c8d52b0
|
3 |
+
size 8218375
|
data/daily_info.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3d8ec77951dad3d522c90ea0009c15e5ab717c3f34624b4f0d205ad58cfa16e
|
3 |
+
size 1054780
|
data/error_by_markets.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9dff09a27b7b5ac4a527d679c446627c6ca4fb2653c6bc50e818d79e29e3c1be
|
3 |
+
size 12928
|
data/invalid_trades.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:099e999dc46d4a2d7086838f3645475aecf27fa88331a8b2d5fd4c9937f1ad81
|
3 |
+
size 782151
|
data/service_map.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32d288a076f719a659159ffdb2bca3f132c3efe3f62ee0412c11e8094c36ffc8
|
3 |
+
size 164076
|
data/service_map_bak.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93ac540e1bcd347a48b9978b87443ae64af0f8b0a4daff305c4ad99cd0959a73
|
3 |
+
size 90766
|
data/tools_accuracy.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb5a70b32e6a7dbd75c7924a2fa887612bf7523a62f6710f2e2397cdc3664fa2
|
3 |
+
size 1100
|
data/unknown_traders.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1633afc5d408263251ae5290e1f45972abaf0d3f0358ab880604de8a0baae559
|
3 |
+
size 283140
|
data/winning_df.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f394838074669231dc3f8dc46167bb05019ae12eb798933e99b2c2de9b9a2c1f
|
3 |
+
size 12636
|
notebooks/markets_analysis.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
@@ -15,32 +15,67 @@
|
|
15 |
},
|
16 |
{
|
17 |
"cell_type": "code",
|
18 |
-
"execution_count":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
"metadata": {},
|
20 |
"outputs": [
|
21 |
{
|
22 |
-
"
|
23 |
-
"
|
24 |
-
"
|
25 |
-
|
26 |
-
"\
|
27 |
-
"
|
28 |
-
"
|
29 |
-
"\
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
]
|
31 |
}
|
32 |
],
|
33 |
"source": [
|
34 |
-
"
|
35 |
]
|
36 |
},
|
37 |
{
|
38 |
"cell_type": "code",
|
39 |
-
"execution_count":
|
40 |
"metadata": {},
|
41 |
"outputs": [],
|
42 |
"source": [
|
43 |
-
"
|
44 |
]
|
45 |
},
|
46 |
{
|
@@ -54,7 +89,7 @@
|
|
54 |
},
|
55 |
{
|
56 |
"cell_type": "code",
|
57 |
-
"execution_count":
|
58 |
"metadata": {},
|
59 |
"outputs": [],
|
60 |
"source": [
|
@@ -72,13 +107,674 @@
|
|
72 |
},
|
73 |
{
|
74 |
"cell_type": "code",
|
75 |
-
"execution_count":
|
76 |
"metadata": {},
|
77 |
"outputs": [],
|
78 |
"source": [
|
79 |
"new_trades = pd.read_parquet(\"../tmp/new_fpmmTrades.parquet\")"
|
80 |
]
|
81 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
{
|
83 |
"cell_type": "code",
|
84 |
"execution_count": 9,
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
|
|
15 |
},
|
16 |
{
|
17 |
"cell_type": "code",
|
18 |
+
"execution_count": 12,
|
19 |
+
"metadata": {},
|
20 |
+
"outputs": [],
|
21 |
+
"source": [
|
22 |
+
"missing_df = pd.read_parquet(\"../data/missing_fpmmTrades.parquet\")"
|
23 |
+
]
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"cell_type": "code",
|
27 |
+
"execution_count": 13,
|
28 |
"metadata": {},
|
29 |
"outputs": [
|
30 |
{
|
31 |
+
"name": "stdout",
|
32 |
+
"output_type": "stream",
|
33 |
+
"text": [
|
34 |
+
"<class 'pandas.core.frame.DataFrame'>\n",
|
35 |
+
"RangeIndex: 24121 entries, 0 to 24120\n",
|
36 |
+
"Data columns (total 24 columns):\n",
|
37 |
+
" # Column Non-Null Count Dtype \n",
|
38 |
+
"--- ------ -------------- ----- \n",
|
39 |
+
" 0 collateralAmount 24121 non-null object\n",
|
40 |
+
" 1 collateralAmountUSD 24121 non-null object\n",
|
41 |
+
" 2 collateralToken 24121 non-null object\n",
|
42 |
+
" 3 creationTimestamp 24121 non-null object\n",
|
43 |
+
" 4 trader_address 24121 non-null object\n",
|
44 |
+
" 5 feeAmount 24121 non-null object\n",
|
45 |
+
" 6 id 24121 non-null object\n",
|
46 |
+
" 7 oldOutcomeTokenMarginalPrice 24121 non-null object\n",
|
47 |
+
" 8 outcomeIndex 24121 non-null object\n",
|
48 |
+
" 9 outcomeTokenMarginalPrice 24121 non-null object\n",
|
49 |
+
" 10 outcomeTokensTraded 24121 non-null object\n",
|
50 |
+
" 11 title 24121 non-null object\n",
|
51 |
+
" 12 transactionHash 24121 non-null object\n",
|
52 |
+
" 13 type 24121 non-null object\n",
|
53 |
+
" 14 market_creator 24121 non-null object\n",
|
54 |
+
" 15 fpmm.answerFinalizedTimestamp 22553 non-null object\n",
|
55 |
+
" 16 fpmm.arbitrationOccurred 24121 non-null bool \n",
|
56 |
+
" 17 fpmm.currentAnswer 22553 non-null object\n",
|
57 |
+
" 18 fpmm.id 24121 non-null object\n",
|
58 |
+
" 19 fpmm.isPendingArbitration 24121 non-null bool \n",
|
59 |
+
" 20 fpmm.openingTimestamp 24121 non-null object\n",
|
60 |
+
" 21 fpmm.outcomes 24121 non-null object\n",
|
61 |
+
" 22 fpmm.title 24121 non-null object\n",
|
62 |
+
" 23 fpmm.condition.id 24121 non-null object\n",
|
63 |
+
"dtypes: bool(2), object(22)\n",
|
64 |
+
"memory usage: 4.1+ MB\n"
|
65 |
]
|
66 |
}
|
67 |
],
|
68 |
"source": [
|
69 |
+
"missing_df.info()"
|
70 |
]
|
71 |
},
|
72 |
{
|
73 |
"cell_type": "code",
|
74 |
+
"execution_count": 26,
|
75 |
"metadata": {},
|
76 |
"outputs": [],
|
77 |
"source": [
|
78 |
+
"old_markets_df = pd.read_parquet(\"../data/old_fpmmTrades.parquet\")"
|
79 |
]
|
80 |
},
|
81 |
{
|
|
|
89 |
},
|
90 |
{
|
91 |
"cell_type": "code",
|
92 |
+
"execution_count": 35,
|
93 |
"metadata": {},
|
94 |
"outputs": [],
|
95 |
"source": [
|
|
|
107 |
},
|
108 |
{
|
109 |
"cell_type": "code",
|
110 |
+
"execution_count": 23,
|
111 |
"metadata": {},
|
112 |
"outputs": [],
|
113 |
"source": [
|
114 |
"new_trades = pd.read_parquet(\"../tmp/new_fpmmTrades.parquet\")"
|
115 |
]
|
116 |
},
|
117 |
+
{
|
118 |
+
"cell_type": "code",
|
119 |
+
"execution_count": 15,
|
120 |
+
"metadata": {},
|
121 |
+
"outputs": [
|
122 |
+
{
|
123 |
+
"data": {
|
124 |
+
"text/plain": [
|
125 |
+
"Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n",
|
126 |
+
" 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n",
|
127 |
+
" 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n",
|
128 |
+
" 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n",
|
129 |
+
" 'transactionHash', 'type', 'market_creator',\n",
|
130 |
+
" 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n",
|
131 |
+
" 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n",
|
132 |
+
" 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n",
|
133 |
+
" 'fpmm.condition.id'],\n",
|
134 |
+
" dtype='object')"
|
135 |
+
]
|
136 |
+
},
|
137 |
+
"execution_count": 15,
|
138 |
+
"metadata": {},
|
139 |
+
"output_type": "execute_result"
|
140 |
+
}
|
141 |
+
],
|
142 |
+
"source": [
|
143 |
+
"new_trades.columns"
|
144 |
+
]
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"cell_type": "code",
|
148 |
+
"execution_count": 36,
|
149 |
+
"metadata": {},
|
150 |
+
"outputs": [],
|
151 |
+
"source": [
|
152 |
+
"from datetime import datetime, timezone\n",
|
153 |
+
"def transform_to_datetime(x):\n",
|
154 |
+
" return datetime.fromtimestamp(int(x), tz=timezone.utc)\n"
|
155 |
+
]
|
156 |
+
},
|
157 |
+
{
|
158 |
+
"cell_type": "code",
|
159 |
+
"execution_count": 17,
|
160 |
+
"metadata": {},
|
161 |
+
"outputs": [],
|
162 |
+
"source": [
|
163 |
+
"new_trades[\"creationTimestamp\"] = new_trades[\"creationTimestamp\"].apply(\n",
|
164 |
+
" lambda x: transform_to_datetime(x)\n",
|
165 |
+
")"
|
166 |
+
]
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"cell_type": "code",
|
170 |
+
"execution_count": 37,
|
171 |
+
"metadata": {},
|
172 |
+
"outputs": [],
|
173 |
+
"source": [
|
174 |
+
"def add_creation_date(df):\n",
|
175 |
+
" try:\n",
|
176 |
+
" df[\"creationTimestamp\"] = df[\"creationTimestamp\"].apply(\n",
|
177 |
+
" lambda x: transform_to_datetime(x)\n",
|
178 |
+
" )\n",
|
179 |
+
" except Exception:\n",
|
180 |
+
" print(\"Ignore\")\n",
|
181 |
+
" df[\"creation_timestamp\"] = pd.to_datetime(df[\"creationTimestamp\"])\n",
|
182 |
+
" df[\"creation_date\"] = df[\"creation_timestamp\"].dt.date\n",
|
183 |
+
" df[\"creation_date\"] = pd.to_datetime(df[\"creation_date\"])\n",
|
184 |
+
" return df"
|
185 |
+
]
|
186 |
+
},
|
187 |
+
{
|
188 |
+
"cell_type": "code",
|
189 |
+
"execution_count": 27,
|
190 |
+
"metadata": {},
|
191 |
+
"outputs": [
|
192 |
+
{
|
193 |
+
"name": "stdout",
|
194 |
+
"output_type": "stream",
|
195 |
+
"text": [
|
196 |
+
"Ignore\n"
|
197 |
+
]
|
198 |
+
},
|
199 |
+
{
|
200 |
+
"data": {
|
201 |
+
"text/plain": [
|
202 |
+
"Timestamp('2025-01-14 00:00:00')"
|
203 |
+
]
|
204 |
+
},
|
205 |
+
"execution_count": 27,
|
206 |
+
"metadata": {},
|
207 |
+
"output_type": "execute_result"
|
208 |
+
}
|
209 |
+
],
|
210 |
+
"source": [
|
211 |
+
"old_markets_df = add_creation_date(old_markets_df)\n",
|
212 |
+
"max(old_markets_df.creation_date)"
|
213 |
+
]
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"cell_type": "code",
|
217 |
+
"execution_count": 38,
|
218 |
+
"metadata": {},
|
219 |
+
"outputs": [
|
220 |
+
{
|
221 |
+
"name": "stdout",
|
222 |
+
"output_type": "stream",
|
223 |
+
"text": [
|
224 |
+
"Ignore\n"
|
225 |
+
]
|
226 |
+
},
|
227 |
+
{
|
228 |
+
"data": {
|
229 |
+
"text/plain": [
|
230 |
+
"Timestamp('2025-01-16 00:00:00')"
|
231 |
+
]
|
232 |
+
},
|
233 |
+
"execution_count": 38,
|
234 |
+
"metadata": {},
|
235 |
+
"output_type": "execute_result"
|
236 |
+
}
|
237 |
+
],
|
238 |
+
"source": [
|
239 |
+
"trades_data = add_creation_date(trades_data)\n",
|
240 |
+
"max(trades_data.creation_date)"
|
241 |
+
]
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"cell_type": "code",
|
245 |
+
"execution_count": 39,
|
246 |
+
"metadata": {},
|
247 |
+
"outputs": [
|
248 |
+
{
|
249 |
+
"data": {
|
250 |
+
"text/plain": [
|
251 |
+
"Timestamp('2024-11-14 00:00:00')"
|
252 |
+
]
|
253 |
+
},
|
254 |
+
"execution_count": 39,
|
255 |
+
"metadata": {},
|
256 |
+
"output_type": "execute_result"
|
257 |
+
}
|
258 |
+
],
|
259 |
+
"source": [
|
260 |
+
"min(trades_data.creation_date)"
|
261 |
+
]
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"cell_type": "code",
|
265 |
+
"execution_count": 24,
|
266 |
+
"metadata": {},
|
267 |
+
"outputs": [
|
268 |
+
{
|
269 |
+
"data": {
|
270 |
+
"text/plain": [
|
271 |
+
"Timestamp('2025-01-14 00:00:00')"
|
272 |
+
]
|
273 |
+
},
|
274 |
+
"execution_count": 24,
|
275 |
+
"metadata": {},
|
276 |
+
"output_type": "execute_result"
|
277 |
+
}
|
278 |
+
],
|
279 |
+
"source": [
|
280 |
+
"new_trades = add_creation_date(new_trades)\n",
|
281 |
+
"max(new_trades.creation_date)"
|
282 |
+
]
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"cell_type": "code",
|
286 |
+
"execution_count": 25,
|
287 |
+
"metadata": {},
|
288 |
+
"outputs": [
|
289 |
+
{
|
290 |
+
"data": {
|
291 |
+
"text/plain": [
|
292 |
+
"Timestamp('2025-01-11 00:00:00')"
|
293 |
+
]
|
294 |
+
},
|
295 |
+
"execution_count": 25,
|
296 |
+
"metadata": {},
|
297 |
+
"output_type": "execute_result"
|
298 |
+
}
|
299 |
+
],
|
300 |
+
"source": [
|
301 |
+
"min(new_trades.creation_date)"
|
302 |
+
]
|
303 |
+
},
|
304 |
+
{
|
305 |
+
"cell_type": "code",
|
306 |
+
"execution_count": 28,
|
307 |
+
"metadata": {},
|
308 |
+
"outputs": [
|
309 |
+
{
|
310 |
+
"name": "stdout",
|
311 |
+
"output_type": "stream",
|
312 |
+
"text": [
|
313 |
+
"Transformation not needed\n",
|
314 |
+
"Transformation not needed\n",
|
315 |
+
"Initial length before removing duplicates in fpmmTrades= 165530\n"
|
316 |
+
]
|
317 |
+
}
|
318 |
+
],
|
319 |
+
"source": [
|
320 |
+
"# lowercase and strip creator_address\n",
|
321 |
+
"new_trades[\"trader_address\"] = (\n",
|
322 |
+
" new_trades[\"trader_address\"].str.lower().str.strip()\n",
|
323 |
+
")\n",
|
324 |
+
"# ensure creationTimestamp compatibility\n",
|
325 |
+
"try:\n",
|
326 |
+
" new_trades[\"creationTimestamp\"] = new_trades[\"creationTimestamp\"].apply(\n",
|
327 |
+
" lambda x: transform_to_datetime(x)\n",
|
328 |
+
" )\n",
|
329 |
+
"\n",
|
330 |
+
"except Exception as e:\n",
|
331 |
+
" print(f\"Transformation not needed\")\n",
|
332 |
+
"try:\n",
|
333 |
+
" old_markets_df[\"creationTimestamp\"] = old_markets_df[\"creationTimestamp\"].apply(\n",
|
334 |
+
" lambda x: transform_to_datetime(x)\n",
|
335 |
+
" )\n",
|
336 |
+
"except Exception as e:\n",
|
337 |
+
" print(f\"Transformation not needed\")\n",
|
338 |
+
"\n",
|
339 |
+
"# merge two dataframes\n",
|
340 |
+
"merge_df = pd.concat([old_markets_df, new_trades], ignore_index=True)\n",
|
341 |
+
"# avoid numpy objects\n",
|
342 |
+
"merge_df[\"fpmm.arbitrationOccurred\"] = merge_df[\"fpmm.arbitrationOccurred\"].astype(\n",
|
343 |
+
" bool\n",
|
344 |
+
")\n",
|
345 |
+
"merge_df[\"fpmm.isPendingArbitration\"] = merge_df[\n",
|
346 |
+
" \"fpmm.isPendingArbitration\"\n",
|
347 |
+
"].astype(bool)\n",
|
348 |
+
"\n",
|
349 |
+
"# Check for duplicates\n",
|
350 |
+
"print(f\"Initial length before removing duplicates in fpmmTrades= {len(merge_df)}\")\n",
|
351 |
+
"\n",
|
352 |
+
"# Remove duplicates\n",
|
353 |
+
"# fpmm.outcomes is a numpy array\n",
|
354 |
+
"merge_df.drop_duplicates(\"id\", keep=\"last\", inplace=True)"
|
355 |
+
]
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"cell_type": "code",
|
359 |
+
"execution_count": 29,
|
360 |
+
"metadata": {},
|
361 |
+
"outputs": [
|
362 |
+
{
|
363 |
+
"data": {
|
364 |
+
"text/plain": [
|
365 |
+
"Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n",
|
366 |
+
" 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n",
|
367 |
+
" 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n",
|
368 |
+
" 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n",
|
369 |
+
" 'transactionHash', 'type', 'market_creator',\n",
|
370 |
+
" 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n",
|
371 |
+
" 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n",
|
372 |
+
" 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n",
|
373 |
+
" 'fpmm.condition.id', 'creation_timestamp', 'creation_date'],\n",
|
374 |
+
" dtype='object')"
|
375 |
+
]
|
376 |
+
},
|
377 |
+
"execution_count": 29,
|
378 |
+
"metadata": {},
|
379 |
+
"output_type": "execute_result"
|
380 |
+
}
|
381 |
+
],
|
382 |
+
"source": [
|
383 |
+
"merge_df.columns"
|
384 |
+
]
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"cell_type": "code",
|
388 |
+
"execution_count": 30,
|
389 |
+
"metadata": {},
|
390 |
+
"outputs": [
|
391 |
+
{
|
392 |
+
"data": {
|
393 |
+
"text/plain": [
|
394 |
+
"Timestamp('2025-01-14 00:00:00')"
|
395 |
+
]
|
396 |
+
},
|
397 |
+
"execution_count": 30,
|
398 |
+
"metadata": {},
|
399 |
+
"output_type": "execute_result"
|
400 |
+
}
|
401 |
+
],
|
402 |
+
"source": [
|
403 |
+
"max(merge_df.creation_date)"
|
404 |
+
]
|
405 |
+
},
|
406 |
+
{
|
407 |
+
"cell_type": "code",
|
408 |
+
"execution_count": 31,
|
409 |
+
"metadata": {},
|
410 |
+
"outputs": [],
|
411 |
+
"source": [
|
412 |
+
"cutoff_date=\"2024-11-13\"\n",
|
413 |
+
"min_date_utc = pd.to_datetime(cutoff_date, format=\"%Y-%m-%d\", utc=True)"
|
414 |
+
]
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"cell_type": "code",
|
418 |
+
"execution_count": 32,
|
419 |
+
"metadata": {},
|
420 |
+
"outputs": [
|
421 |
+
{
|
422 |
+
"name": "stdout",
|
423 |
+
"output_type": "stream",
|
424 |
+
"text": [
|
425 |
+
"length before filtering 161781\n",
|
426 |
+
"length after filtering 160426\n"
|
427 |
+
]
|
428 |
+
}
|
429 |
+
],
|
430 |
+
"source": [
|
431 |
+
"merge_df[\"creation_timestamp\"] = pd.to_datetime(\n",
|
432 |
+
" merge_df[\"creation_timestamp\"], utc=True\n",
|
433 |
+
")\n",
|
434 |
+
"\n",
|
435 |
+
"print(f\"length before filtering {len(merge_df)}\")\n",
|
436 |
+
"merge_df = merge_df.loc[merge_df[\"creation_timestamp\"] > min_date_utc]\n",
|
437 |
+
"print(f\"length after filtering {len(merge_df)}\")\n"
|
438 |
+
]
|
439 |
+
},
|
440 |
+
{
|
441 |
+
"cell_type": "code",
|
442 |
+
"execution_count": 33,
|
443 |
+
"metadata": {},
|
444 |
+
"outputs": [
|
445 |
+
{
|
446 |
+
"data": {
|
447 |
+
"text/plain": [
|
448 |
+
"Timestamp('2025-01-14 00:00:00')"
|
449 |
+
]
|
450 |
+
},
|
451 |
+
"execution_count": 33,
|
452 |
+
"metadata": {},
|
453 |
+
"output_type": "execute_result"
|
454 |
+
}
|
455 |
+
],
|
456 |
+
"source": [
|
457 |
+
"max(merge_df.creation_date)"
|
458 |
+
]
|
459 |
+
},
|
460 |
+
{
|
461 |
+
"cell_type": "code",
|
462 |
+
"execution_count": 34,
|
463 |
+
"metadata": {},
|
464 |
+
"outputs": [],
|
465 |
+
"source": [
|
466 |
+
"merge_df.to_parquet(\"../tmp/fpmmTrades.parquet\", index=False)"
|
467 |
+
]
|
468 |
+
},
|
469 |
+
{
|
470 |
+
"cell_type": "code",
|
471 |
+
"execution_count": 15,
|
472 |
+
"metadata": {},
|
473 |
+
"outputs": [
|
474 |
+
{
|
475 |
+
"data": {
|
476 |
+
"text/plain": [
|
477 |
+
"Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n",
|
478 |
+
" 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n",
|
479 |
+
" 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n",
|
480 |
+
" 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n",
|
481 |
+
" 'transactionHash', 'type', 'market_creator',\n",
|
482 |
+
" 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n",
|
483 |
+
" 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n",
|
484 |
+
" 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n",
|
485 |
+
" 'fpmm.condition.id', 'creation_timestamp', 'creation_date'],\n",
|
486 |
+
" dtype='object')"
|
487 |
+
]
|
488 |
+
},
|
489 |
+
"execution_count": 15,
|
490 |
+
"metadata": {},
|
491 |
+
"output_type": "execute_result"
|
492 |
+
}
|
493 |
+
],
|
494 |
+
"source": [
|
495 |
+
"missing_df = add_creation_date(missing_df)\n",
|
496 |
+
"missing_df.columns"
|
497 |
+
]
|
498 |
+
},
|
499 |
+
{
|
500 |
+
"cell_type": "code",
|
501 |
+
"execution_count": 16,
|
502 |
+
"metadata": {},
|
503 |
+
"outputs": [
|
504 |
+
{
|
505 |
+
"data": {
|
506 |
+
"text/plain": [
|
507 |
+
"<Axes: xlabel='Count', ylabel='creation_date'>"
|
508 |
+
]
|
509 |
+
},
|
510 |
+
"execution_count": 16,
|
511 |
+
"metadata": {},
|
512 |
+
"output_type": "execute_result"
|
513 |
+
},
|
514 |
+
{
|
515 |
+
"data": {
|
516 |
+
"image/png": "",
|
517 |
+
"text/plain": [
|
518 |
+
"<Figure size 640x480 with 1 Axes>"
|
519 |
+
]
|
520 |
+
},
|
521 |
+
"metadata": {},
|
522 |
+
"output_type": "display_data"
|
523 |
+
}
|
524 |
+
],
|
525 |
+
"source": [
|
526 |
+
"import seaborn as sns\n",
|
527 |
+
"\n",
|
528 |
+
"sns.histplot(missing_df, y=\"creation_date\")"
|
529 |
+
]
|
530 |
+
},
|
531 |
+
{
|
532 |
+
"cell_type": "code",
|
533 |
+
"execution_count": 40,
|
534 |
+
"metadata": {},
|
535 |
+
"outputs": [
|
536 |
+
{
|
537 |
+
"data": {
|
538 |
+
"text/plain": [
|
539 |
+
"<Axes: xlabel='Count', ylabel='creation_date'>"
|
540 |
+
]
|
541 |
+
},
|
542 |
+
"execution_count": 40,
|
543 |
+
"metadata": {},
|
544 |
+
"output_type": "execute_result"
|
545 |
+
},
|
546 |
+
{
|
547 |
+
"data": {
|
548 |
+
"image/png": "",
|
549 |
+
"text/plain": [
|
550 |
+
"<Figure size 640x480 with 1 Axes>"
|
551 |
+
]
|
552 |
+
},
|
553 |
+
"metadata": {},
|
554 |
+
"output_type": "display_data"
|
555 |
+
}
|
556 |
+
],
|
557 |
+
"source": [
|
558 |
+
"sns.histplot(trades_data, y=\"creation_date\")"
|
559 |
+
]
|
560 |
+
},
|
561 |
+
{
|
562 |
+
"cell_type": "code",
|
563 |
+
"execution_count": 19,
|
564 |
+
"metadata": {},
|
565 |
+
"outputs": [],
|
566 |
+
"source": [
|
567 |
+
"def add_extra_columns(new_trades):\n",
|
568 |
+
" new_trades[\"creation_timestamp\"] = pd.to_datetime(new_trades[\"creationTimestamp\"])\n",
|
569 |
+
" new_trades[\"creation_date\"] = new_trades[\"creation_timestamp\"].dt.date\n",
|
570 |
+
" new_trades[\"creation_date\"] = pd.to_datetime(new_trades[\"creation_date\"])"
|
571 |
+
]
|
572 |
+
},
|
573 |
+
{
|
574 |
+
"cell_type": "code",
|
575 |
+
"execution_count": null,
|
576 |
+
"metadata": {},
|
577 |
+
"outputs": [],
|
578 |
+
"source": [
|
579 |
+
"add_extra_columns(new_trades=new_trades)"
|
580 |
+
]
|
581 |
+
},
|
582 |
+
{
|
583 |
+
"cell_type": "code",
|
584 |
+
"execution_count": 9,
|
585 |
+
"metadata": {},
|
586 |
+
"outputs": [
|
587 |
+
{
|
588 |
+
"data": {
|
589 |
+
"text/plain": [
|
590 |
+
"Timestamp('2025-01-13 00:00:00')"
|
591 |
+
]
|
592 |
+
},
|
593 |
+
"execution_count": 9,
|
594 |
+
"metadata": {},
|
595 |
+
"output_type": "execute_result"
|
596 |
+
}
|
597 |
+
],
|
598 |
+
"source": [
|
599 |
+
"max(new_trades.creation_date)"
|
600 |
+
]
|
601 |
+
},
|
602 |
+
{
|
603 |
+
"cell_type": "code",
|
604 |
+
"execution_count": 12,
|
605 |
+
"metadata": {},
|
606 |
+
"outputs": [
|
607 |
+
{
|
608 |
+
"data": {
|
609 |
+
"text/plain": [
|
610 |
+
"Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n",
|
611 |
+
" 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n",
|
612 |
+
" 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n",
|
613 |
+
" 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n",
|
614 |
+
" 'transactionHash', 'type', 'market_creator',\n",
|
615 |
+
" 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n",
|
616 |
+
" 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n",
|
617 |
+
" 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n",
|
618 |
+
" 'fpmm.condition.id', 'creation_timestamp', 'creation_date'],\n",
|
619 |
+
" dtype='object')"
|
620 |
+
]
|
621 |
+
},
|
622 |
+
"execution_count": 12,
|
623 |
+
"metadata": {},
|
624 |
+
"output_type": "execute_result"
|
625 |
+
}
|
626 |
+
],
|
627 |
+
"source": [
|
628 |
+
"new_trades.columns"
|
629 |
+
]
|
630 |
+
},
|
631 |
+
{
|
632 |
+
"cell_type": "code",
|
633 |
+
"execution_count": 23,
|
634 |
+
"metadata": {},
|
635 |
+
"outputs": [
|
636 |
+
{
|
637 |
+
"name": "stdout",
|
638 |
+
"output_type": "stream",
|
639 |
+
"text": [
|
640 |
+
"Transformation not needed\n",
|
641 |
+
"Initial length before removing duplicates in fpmmTrades= 137851\n",
|
642 |
+
"Final length after removing duplicates in fpmmTrades= 137851\n"
|
643 |
+
]
|
644 |
+
}
|
645 |
+
],
|
646 |
+
"source": [
|
647 |
+
"old_trades_df = pd.read_parquet(\"../tmp/fpmmTrades.parquet\")\n",
|
648 |
+
"\n",
|
649 |
+
"\n",
|
650 |
+
"# lowercase and strip creator_address\n",
|
651 |
+
"new_trades[\"trader_address\"] = (\n",
|
652 |
+
" new_trades[\"trader_address\"].str.lower().str.strip()\n",
|
653 |
+
")\n",
|
654 |
+
"\n",
|
655 |
+
"\n",
|
656 |
+
"try:\n",
|
657 |
+
" old_trades_df[\"creationTimestamp\"] = old_trades_df[\"creationTimestamp\"].apply(\n",
|
658 |
+
" lambda x: transform_to_datetime(x)\n",
|
659 |
+
" )\n",
|
660 |
+
"except Exception as e:\n",
|
661 |
+
" print(f\"Transformation not needed\")\n",
|
662 |
+
"\n",
|
663 |
+
"# merge two dataframes\n",
|
664 |
+
"merge_df = pd.concat([old_trades_df, new_trades], ignore_index=True)\n",
|
665 |
+
"# avoid numpy objects\n",
|
666 |
+
"merge_df[\"fpmm.arbitrationOccurred\"] = merge_df[\"fpmm.arbitrationOccurred\"].astype(\n",
|
667 |
+
" bool\n",
|
668 |
+
")\n",
|
669 |
+
"merge_df[\"fpmm.isPendingArbitration\"] = merge_df[\n",
|
670 |
+
" \"fpmm.isPendingArbitration\"\n",
|
671 |
+
"].astype(bool)\n",
|
672 |
+
"\n",
|
673 |
+
"# Check for duplicates\n",
|
674 |
+
"print(f\"Initial length before removing duplicates in fpmmTrades= {len(merge_df)}\")\n",
|
675 |
+
"\n",
|
676 |
+
"# Remove duplicates\n",
|
677 |
+
"# fpmm.outcomes is a numpy array\n",
|
678 |
+
"merge_df.drop_duplicates(\"id\", keep=\"last\", inplace=True)\n",
|
679 |
+
"print(f\"Final length after removing duplicates in fpmmTrades= {len(merge_df)}\")"
|
680 |
+
]
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"cell_type": "code",
|
684 |
+
"execution_count": 24,
|
685 |
+
"metadata": {},
|
686 |
+
"outputs": [],
|
687 |
+
"source": [
|
688 |
+
"merge_df.to_parquet(\"../tmp/fpmmTrades.parquet\", index=False)"
|
689 |
+
]
|
690 |
+
},
|
691 |
+
{
|
692 |
+
"cell_type": "code",
|
693 |
+
"execution_count": 13,
|
694 |
+
"metadata": {},
|
695 |
+
"outputs": [
|
696 |
+
{
|
697 |
+
"data": {
|
698 |
+
"text/plain": [
|
699 |
+
"Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n",
|
700 |
+
" 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n",
|
701 |
+
" 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n",
|
702 |
+
" 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n",
|
703 |
+
" 'transactionHash', 'type', 'market_creator',\n",
|
704 |
+
" 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n",
|
705 |
+
" 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n",
|
706 |
+
" 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n",
|
707 |
+
" 'fpmm.condition.id', 'creation_timestamp', 'creation_date'],\n",
|
708 |
+
" dtype='object')"
|
709 |
+
]
|
710 |
+
},
|
711 |
+
"execution_count": 13,
|
712 |
+
"metadata": {},
|
713 |
+
"output_type": "execute_result"
|
714 |
+
}
|
715 |
+
],
|
716 |
+
"source": [
|
717 |
+
"old_trades_df.columns"
|
718 |
+
]
|
719 |
+
},
|
720 |
+
{
|
721 |
+
"cell_type": "code",
|
722 |
+
"execution_count": 20,
|
723 |
+
"metadata": {},
|
724 |
+
"outputs": [],
|
725 |
+
"source": [
|
726 |
+
"add_extra_columns(new_trades=merge_df)"
|
727 |
+
]
|
728 |
+
},
|
729 |
+
{
|
730 |
+
"cell_type": "code",
|
731 |
+
"execution_count": 21,
|
732 |
+
"metadata": {},
|
733 |
+
"outputs": [
|
734 |
+
{
|
735 |
+
"data": {
|
736 |
+
"text/plain": [
|
737 |
+
"Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n",
|
738 |
+
" 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n",
|
739 |
+
" 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n",
|
740 |
+
" 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n",
|
741 |
+
" 'transactionHash', 'type', 'market_creator',\n",
|
742 |
+
" 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n",
|
743 |
+
" 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n",
|
744 |
+
" 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n",
|
745 |
+
" 'fpmm.condition.id', 'creation_timestamp', 'creation_date'],\n",
|
746 |
+
" dtype='object')"
|
747 |
+
]
|
748 |
+
},
|
749 |
+
"execution_count": 21,
|
750 |
+
"metadata": {},
|
751 |
+
"output_type": "execute_result"
|
752 |
+
}
|
753 |
+
],
|
754 |
+
"source": [
|
755 |
+
"merge_df.columns"
|
756 |
+
]
|
757 |
+
},
|
758 |
+
{
|
759 |
+
"cell_type": "code",
|
760 |
+
"execution_count": 22,
|
761 |
+
"metadata": {},
|
762 |
+
"outputs": [
|
763 |
+
{
|
764 |
+
"data": {
|
765 |
+
"text/plain": [
|
766 |
+
"Timestamp('2025-01-13 00:00:00')"
|
767 |
+
]
|
768 |
+
},
|
769 |
+
"execution_count": 22,
|
770 |
+
"metadata": {},
|
771 |
+
"output_type": "execute_result"
|
772 |
+
}
|
773 |
+
],
|
774 |
+
"source": [
|
775 |
+
"max(merge_df.creation_date)"
|
776 |
+
]
|
777 |
+
},
|
778 |
{
|
779 |
"cell_type": "code",
|
780 |
"execution_count": 9,
|
notebooks/weekly_analysis.ipynb
CHANGED
@@ -4676,7 +4676,7 @@
|
|
4676 |
],
|
4677 |
"metadata": {
|
4678 |
"kernelspec": {
|
4679 |
-
"display_name": "
|
4680 |
"language": "python",
|
4681 |
"name": "python3"
|
4682 |
},
|
@@ -4690,7 +4690,7 @@
|
|
4690 |
"name": "python",
|
4691 |
"nbconvert_exporter": "python",
|
4692 |
"pygments_lexer": "ipython3",
|
4693 |
-
"version": "3.12.
|
4694 |
},
|
4695 |
"orig_nbformat": 4
|
4696 |
},
|
|
|
4676 |
],
|
4677 |
"metadata": {
|
4678 |
"kernelspec": {
|
4679 |
+
"display_name": "Python 3",
|
4680 |
"language": "python",
|
4681 |
"name": "python3"
|
4682 |
},
|
|
|
4690 |
"name": "python",
|
4691 |
"nbconvert_exporter": "python",
|
4692 |
"pygments_lexer": "ipython3",
|
4693 |
+
"version": "3.12.3"
|
4694 |
},
|
4695 |
"orig_nbformat": 4
|
4696 |
},
|
scripts/cleaning_old_info.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import pandas as pd
|
2 |
-
from utils import DATA_DIR, TMP_DIR
|
3 |
|
4 |
|
5 |
def clean_old_data_from_parquet_files(cutoff_date: str):
|
@@ -9,7 +9,7 @@ def clean_old_data_from_parquet_files(cutoff_date: str):
|
|
9 |
|
10 |
# clean tools.parquet
|
11 |
try:
|
12 |
-
tools = pd.read_parquet(
|
13 |
|
14 |
# make sure creator_address is in the columns
|
15 |
assert "trader_address" in tools.columns, "trader_address column not found"
|
@@ -22,7 +22,7 @@ def clean_old_data_from_parquet_files(cutoff_date: str):
|
|
22 |
print(f"length before filtering {len(tools)}")
|
23 |
tools = tools.loc[tools["request_time"] > min_date_utc]
|
24 |
print(f"length after filtering {len(tools)}")
|
25 |
-
tools.to_parquet(
|
26 |
|
27 |
except Exception as e:
|
28 |
print(f"Error cleaning tools file {e}")
|
@@ -53,11 +53,11 @@ def clean_old_data_from_parquet_files(cutoff_date: str):
|
|
53 |
unknown_traders["creation_timestamp"], utc=True
|
54 |
)
|
55 |
|
56 |
-
print(f"length before filtering {len(unknown_traders)}")
|
57 |
unknown_traders = unknown_traders.loc[
|
58 |
unknown_traders["creation_timestamp"] > min_date_utc
|
59 |
]
|
60 |
-
print(f"length after filtering {len(unknown_traders)}")
|
61 |
unknown_traders.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
|
62 |
|
63 |
except Exception as e:
|
@@ -66,7 +66,15 @@ def clean_old_data_from_parquet_files(cutoff_date: str):
|
|
66 |
# clean fpmmTrades.parquet
|
67 |
try:
|
68 |
fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
fpmmTrades["creation_timestamp"] = pd.to_datetime(
|
71 |
fpmmTrades["creation_timestamp"], utc=True
|
72 |
)
|
|
|
1 |
import pandas as pd
|
2 |
+
from utils import DATA_DIR, TMP_DIR, transform_to_datetime
|
3 |
|
4 |
|
5 |
def clean_old_data_from_parquet_files(cutoff_date: str):
|
|
|
9 |
|
10 |
# clean tools.parquet
|
11 |
try:
|
12 |
+
tools = pd.read_parquet(TMP_DIR / "tools.parquet")
|
13 |
|
14 |
# make sure creator_address is in the columns
|
15 |
assert "trader_address" in tools.columns, "trader_address column not found"
|
|
|
22 |
print(f"length before filtering {len(tools)}")
|
23 |
tools = tools.loc[tools["request_time"] > min_date_utc]
|
24 |
print(f"length after filtering {len(tools)}")
|
25 |
+
tools.to_parquet(TMP_DIR / "tools.parquet", index=False)
|
26 |
|
27 |
except Exception as e:
|
28 |
print(f"Error cleaning tools file {e}")
|
|
|
53 |
unknown_traders["creation_timestamp"], utc=True
|
54 |
)
|
55 |
|
56 |
+
print(f"length unknown traders before filtering {len(unknown_traders)}")
|
57 |
unknown_traders = unknown_traders.loc[
|
58 |
unknown_traders["creation_timestamp"] > min_date_utc
|
59 |
]
|
60 |
+
print(f"length unknown traders after filtering {len(unknown_traders)}")
|
61 |
unknown_traders.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
|
62 |
|
63 |
except Exception as e:
|
|
|
66 |
# clean fpmmTrades.parquet
|
67 |
try:
|
68 |
fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
|
69 |
+
try:
|
70 |
+
fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
|
71 |
+
lambda x: transform_to_datetime(x)
|
72 |
+
)
|
73 |
+
except Exception as e:
|
74 |
+
print(f"Transformation not needed")
|
75 |
+
fpmmTrades["creation_timestamp"] = pd.to_datetime(
|
76 |
+
fpmmTrades["creationTimestamp"]
|
77 |
+
)
|
78 |
fpmmTrades["creation_timestamp"] = pd.to_datetime(
|
79 |
fpmmTrades["creation_timestamp"], utc=True
|
80 |
)
|
scripts/cloud_storage.py
CHANGED
@@ -23,7 +23,7 @@ def initialize_client():
|
|
23 |
return client
|
24 |
|
25 |
|
26 |
-
def upload_file(client, filename: str, file_path: str):
|
27 |
"""Upload a file to the bucket"""
|
28 |
try:
|
29 |
OBJECT_NAME = FOLDER_NAME + "/" + filename
|
@@ -34,8 +34,10 @@ def upload_file(client, filename: str, file_path: str):
|
|
34 |
BUCKET_NAME, OBJECT_NAME, file_path, part_size=10 * 1024 * 1024
|
35 |
) # 10MB parts
|
36 |
print(f"File '{file_path}' uploaded as '{OBJECT_NAME}'.")
|
|
|
37 |
except S3Error as err:
|
38 |
print(f"Error uploading file: {err}")
|
|
|
39 |
|
40 |
|
41 |
def download_file(client, filename: str, file_path: str):
|
@@ -48,11 +50,16 @@ def download_file(client, filename: str, file_path: str):
|
|
48 |
print(f"Error downloading file: {err}")
|
49 |
|
50 |
|
51 |
-
def load_historical_file(client, filename: str):
|
52 |
"""Function to load one file into the cloud storage"""
|
53 |
file_path = filename
|
54 |
file_path = HIST_DIR / filename
|
55 |
-
upload_file(client, filename, file_path)
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
|
58 |
def process_historical_files(client):
|
@@ -63,8 +70,10 @@ def process_historical_files(client):
|
|
63 |
# Check if file is a parquet file
|
64 |
if filename.endswith(".parquet"):
|
65 |
try:
|
66 |
-
load_historical_file(client, filename)
|
67 |
-
|
|
|
|
|
68 |
except Exception as e:
|
69 |
print(f"Error processing {filename}: {str(e)}")
|
70 |
|
|
|
23 |
return client
|
24 |
|
25 |
|
26 |
+
def upload_file(client, filename: str, file_path: str) -> bool:
|
27 |
"""Upload a file to the bucket"""
|
28 |
try:
|
29 |
OBJECT_NAME = FOLDER_NAME + "/" + filename
|
|
|
34 |
BUCKET_NAME, OBJECT_NAME, file_path, part_size=10 * 1024 * 1024
|
35 |
) # 10MB parts
|
36 |
print(f"File '{file_path}' uploaded as '{OBJECT_NAME}'.")
|
37 |
+
return True
|
38 |
except S3Error as err:
|
39 |
print(f"Error uploading file: {err}")
|
40 |
+
return False
|
41 |
|
42 |
|
43 |
def download_file(client, filename: str, file_path: str):
|
|
|
50 |
print(f"Error downloading file: {err}")
|
51 |
|
52 |
|
53 |
+
def load_historical_file(client, filename: str) -> bool:
|
54 |
"""Function to load one file into the cloud storage"""
|
55 |
file_path = filename
|
56 |
file_path = HIST_DIR / filename
|
57 |
+
return upload_file(client, filename, file_path)
|
58 |
+
|
59 |
+
|
60 |
+
def upload_historical_file(filename: str):
|
61 |
+
client = initialize_client()
|
62 |
+
load_historical_file(client=client, filename=filename)
|
63 |
|
64 |
|
65 |
def process_historical_files(client):
|
|
|
70 |
# Check if file is a parquet file
|
71 |
if filename.endswith(".parquet"):
|
72 |
try:
|
73 |
+
if load_historical_file(client, filename):
|
74 |
+
print(f"Successfully processed {filename}")
|
75 |
+
else:
|
76 |
+
print("Error loading the files")
|
77 |
except Exception as e:
|
78 |
print(f"Error processing {filename}: {str(e)}")
|
79 |
|
scripts/daily_data.py
CHANGED
@@ -11,6 +11,7 @@ from nr_mech_calls import (
|
|
11 |
transform_to_datetime,
|
12 |
)
|
13 |
from markets import check_current_week_data
|
|
|
14 |
|
15 |
logging.basicConfig(level=logging.INFO)
|
16 |
|
@@ -51,6 +52,9 @@ def prepare_live_metrics(
|
|
51 |
# save into a separate file
|
52 |
all_trades_df.to_parquet(DATA_DIR / "daily_info.parquet", index=False)
|
53 |
|
|
|
|
|
|
|
54 |
|
55 |
if __name__ == "__main__":
|
56 |
prepare_live_metrics()
|
|
|
11 |
transform_to_datetime,
|
12 |
)
|
13 |
from markets import check_current_week_data
|
14 |
+
from staking import generate_retention_activity_file
|
15 |
|
16 |
logging.basicConfig(level=logging.INFO)
|
17 |
|
|
|
52 |
# save into a separate file
|
53 |
all_trades_df.to_parquet(DATA_DIR / "daily_info.parquet", index=False)
|
54 |
|
55 |
+
# prepare the retention info file
|
56 |
+
generate_retention_activity_file()
|
57 |
+
|
58 |
|
59 |
if __name__ == "__main__":
|
60 |
prepare_live_metrics()
|
scripts/get_mech_info.py
CHANGED
@@ -316,7 +316,7 @@ def get_mech_events_since_last_run(logger):
|
|
316 |
try:
|
317 |
all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
|
318 |
latest_timestamp = max(all_trades.creation_timestamp)
|
319 |
-
# cutoff_date = "2024-12-
|
320 |
# latest_timestamp = pd.Timestamp(
|
321 |
# datetime.strptime(cutoff_date, "%Y-%m-%d")
|
322 |
# ).tz_localize("UTC")
|
|
|
316 |
try:
|
317 |
all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
|
318 |
latest_timestamp = max(all_trades.creation_timestamp)
|
319 |
+
# cutoff_date = "2024-12-22"
|
320 |
# latest_timestamp = pd.Timestamp(
|
321 |
# datetime.strptime(cutoff_date, "%Y-%m-%d")
|
322 |
# ).tz_localize("UTC")
|
scripts/gnosis_timestamps.py
CHANGED
@@ -137,7 +137,9 @@ def compute_request_time(tools_df: pd.DataFrame) -> pd.DataFrame:
|
|
137 |
tools_df["request_time"]
|
138 |
).dt.strftime("%Y-%m")
|
139 |
tools_df["request_month_year_week"] = (
|
140 |
-
pd.to_datetime(tools_df["request_time"])
|
|
|
|
|
141 |
)
|
142 |
# Update t_map with new timestamps
|
143 |
new_timestamps = (
|
|
|
137 |
tools_df["request_time"]
|
138 |
).dt.strftime("%Y-%m")
|
139 |
tools_df["request_month_year_week"] = (
|
140 |
+
pd.to_datetime(tools_df["request_time"])
|
141 |
+
.dt.to_period("W")
|
142 |
+
.dt.start_time.dt.strftime("%b-%d-%Y")
|
143 |
)
|
144 |
# Update t_map with new timestamps
|
145 |
new_timestamps = (
|
scripts/markets.py
CHANGED
@@ -156,15 +156,19 @@ def transform_fpmmTrades(df: pd.DataFrame) -> pd.DataFrame:
|
|
156 |
return df
|
157 |
|
158 |
|
159 |
-
def create_fpmmTrades(
|
|
|
|
|
|
|
160 |
"""Create fpmmTrades for all trades."""
|
|
|
161 |
# Quickstart trades
|
162 |
qs_trades_json = query_omen_xdai_subgraph(
|
163 |
trader_category="quickstart",
|
164 |
from_timestamp=from_timestamp,
|
165 |
-
to_timestamp=
|
166 |
fpmm_from_timestamp=from_timestamp,
|
167 |
-
fpmm_to_timestamp=
|
168 |
)
|
169 |
|
170 |
print(f"length of the qs_trades_json dataset {len(qs_trades_json)}")
|
@@ -175,6 +179,7 @@ def create_fpmmTrades(rpc: str, from_timestamp: float = DEFAULT_FROM_TIMESTAMP):
|
|
175 |
qs_df = transform_fpmmTrades(qs_df)
|
176 |
|
177 |
# Pearl trades
|
|
|
178 |
pearl_trades_json = query_omen_xdai_subgraph(
|
179 |
trader_category="pearl",
|
180 |
from_timestamp=from_timestamp,
|
@@ -335,10 +340,14 @@ def add_market_creator(tools: pd.DataFrame) -> None:
|
|
335 |
return tools
|
336 |
|
337 |
|
338 |
-
def fpmmTrades_etl(
|
|
|
|
|
339 |
print("Generating the trades file")
|
340 |
try:
|
341 |
-
fpmmTrades = create_fpmmTrades(
|
|
|
|
|
342 |
except FileNotFoundError:
|
343 |
print(f"Error creating {trades_filename} file .")
|
344 |
|
|
|
156 |
return df
|
157 |
|
158 |
|
159 |
+
def create_fpmmTrades(
|
160 |
+
from_timestamp: int = DEFAULT_FROM_TIMESTAMP,
|
161 |
+
to_timestamp: int = DEFAULT_TO_TIMESTAMP,
|
162 |
+
):
|
163 |
"""Create fpmmTrades for all trades."""
|
164 |
+
print("Getting trades from Quickstart markets")
|
165 |
# Quickstart trades
|
166 |
qs_trades_json = query_omen_xdai_subgraph(
|
167 |
trader_category="quickstart",
|
168 |
from_timestamp=from_timestamp,
|
169 |
+
to_timestamp=to_timestamp,
|
170 |
fpmm_from_timestamp=from_timestamp,
|
171 |
+
fpmm_to_timestamp=to_timestamp,
|
172 |
)
|
173 |
|
174 |
print(f"length of the qs_trades_json dataset {len(qs_trades_json)}")
|
|
|
179 |
qs_df = transform_fpmmTrades(qs_df)
|
180 |
|
181 |
# Pearl trades
|
182 |
+
print("Getting trades from Pearl markets")
|
183 |
pearl_trades_json = query_omen_xdai_subgraph(
|
184 |
trader_category="pearl",
|
185 |
from_timestamp=from_timestamp,
|
|
|
340 |
return tools
|
341 |
|
342 |
|
343 |
+
def fpmmTrades_etl(
|
344 |
+
trades_filename: str, from_timestamp: int, to_timestamp: int = DEFAULT_TO_TIMESTAMP
|
345 |
+
) -> None:
|
346 |
print("Generating the trades file")
|
347 |
try:
|
348 |
+
fpmmTrades = create_fpmmTrades(
|
349 |
+
from_timestamp=from_timestamp, to_timestamp=to_timestamp
|
350 |
+
)
|
351 |
except FileNotFoundError:
|
352 |
print(f"Error creating {trades_filename} file .")
|
353 |
|
scripts/profitability.py
CHANGED
@@ -146,7 +146,7 @@ def prepare_profitalibity_data(
|
|
146 |
|
147 |
# Check if tools.parquet is in the same directory
|
148 |
try:
|
149 |
-
#
|
150 |
tools = pd.read_parquet(DATA_DIR / tools_filename)
|
151 |
|
152 |
# make sure creator_address is in the columns
|
@@ -165,7 +165,7 @@ def prepare_profitalibity_data(
|
|
165 |
return
|
166 |
|
167 |
# Check if fpmmTrades.parquet is in the same directory
|
168 |
-
print("Reading the trades file")
|
169 |
try:
|
170 |
fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
|
171 |
except FileNotFoundError:
|
@@ -413,10 +413,8 @@ def run_profitability_analysis(
|
|
413 |
|
414 |
all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
|
415 |
|
416 |
-
# add staking labels
|
417 |
all_trades_df = label_trades_by_staking(trades_df=all_trades_df)
|
418 |
|
419 |
-
# create the unknown traders dataset
|
420 |
print("Creating unknown traders dataset")
|
421 |
unknown_traders_df, all_trades_df = create_unknown_traders_df(
|
422 |
trades_df=all_trades_df
|
@@ -424,9 +422,10 @@ def run_profitability_analysis(
|
|
424 |
# merge with previous unknown traders dataset
|
425 |
previous_unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
|
426 |
|
427 |
-
unknown_traders_df = pd.concat(
|
428 |
[unknown_traders_df, previous_unknown_traders], ignore_index=True
|
429 |
)
|
|
|
430 |
unknown_traders_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
|
431 |
|
432 |
# save to parquet
|
@@ -437,6 +436,81 @@ def run_profitability_analysis(
|
|
437 |
return all_trades_df
|
438 |
|
439 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
440 |
if __name__ == "__main__":
|
441 |
# updating the whole fpmmTrades parquet file instead of just the new ones
|
442 |
# trade_mech_calls = pd.read_parquet(TMP_DIR / "result_df.parquet")
|
|
|
146 |
|
147 |
# Check if tools.parquet is in the same directory
|
148 |
try:
|
149 |
+
# tools parquet file
|
150 |
tools = pd.read_parquet(DATA_DIR / tools_filename)
|
151 |
|
152 |
# make sure creator_address is in the columns
|
|
|
165 |
return
|
166 |
|
167 |
# Check if fpmmTrades.parquet is in the same directory
|
168 |
+
print("Reading the new trades file")
|
169 |
try:
|
170 |
fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
|
171 |
except FileNotFoundError:
|
|
|
413 |
|
414 |
all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
|
415 |
|
|
|
416 |
all_trades_df = label_trades_by_staking(trades_df=all_trades_df)
|
417 |
|
|
|
418 |
print("Creating unknown traders dataset")
|
419 |
unknown_traders_df, all_trades_df = create_unknown_traders_df(
|
420 |
trades_df=all_trades_df
|
|
|
422 |
# merge with previous unknown traders dataset
|
423 |
previous_unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
|
424 |
|
425 |
+
unknown_traders_df: pd.DataFrame = pd.concat(
|
426 |
[unknown_traders_df, previous_unknown_traders], ignore_index=True
|
427 |
)
|
428 |
+
unknown_traders_df.drop_duplicates("trade_id", keep="last", inplace=True)
|
429 |
unknown_traders_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
|
430 |
|
431 |
# save to parquet
|
|
|
436 |
return all_trades_df
|
437 |
|
438 |
|
439 |
+
def add_trades_profitability(trades_filename: str):
|
440 |
+
print("Reading the trades file")
|
441 |
+
try:
|
442 |
+
fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
|
443 |
+
except FileNotFoundError:
|
444 |
+
print(f"Error reading {trades_filename} file .")
|
445 |
+
|
446 |
+
# make sure trader_address is in the columns
|
447 |
+
assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
|
448 |
+
|
449 |
+
# lowercase and strip creator_address
|
450 |
+
fpmmTrades["trader_address"] = fpmmTrades["trader_address"].str.lower().str.strip()
|
451 |
+
|
452 |
+
print("Reading tools parquet file")
|
453 |
+
tools = pd.read_parquet(TMP_DIR / "tools.parquet")
|
454 |
+
|
455 |
+
try:
|
456 |
+
fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
|
457 |
+
lambda x: transform_to_datetime(x)
|
458 |
+
)
|
459 |
+
except Exception as e:
|
460 |
+
print(f"Transformation not needed")
|
461 |
+
|
462 |
+
print("Computing the estimated mech calls dataset")
|
463 |
+
trade_mech_calls = compute_mech_calls_based_on_timestamps(
|
464 |
+
fpmmTrades=fpmmTrades, tools=tools
|
465 |
+
)
|
466 |
+
print(trade_mech_calls.total_mech_calls.describe())
|
467 |
+
print("Analysing trades...")
|
468 |
+
all_trades_df = analyse_all_traders(fpmmTrades, trade_mech_calls)
|
469 |
+
|
470 |
+
# debugging purposes
|
471 |
+
all_trades_df.to_parquet(JSON_DATA_DIR / "missing_trades_df.parquet", index=False)
|
472 |
+
# filter invalid markets. Condition: "is_invalid" is True
|
473 |
+
print("Checking invalid trades")
|
474 |
+
invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
|
475 |
+
if len(invalid_trades) > 0:
|
476 |
+
try:
|
477 |
+
print("Merging invalid trades parquet file")
|
478 |
+
old_invalid_trades = pd.read_parquet(DATA_DIR / "invalid_trades.parquet")
|
479 |
+
merge_df = pd.concat(
|
480 |
+
[old_invalid_trades, invalid_trades], ignore_index=True
|
481 |
+
)
|
482 |
+
invalid_trades = merge_df.drop_duplicates("trade_id")
|
483 |
+
except Exception as e:
|
484 |
+
print(f"Error updating the invalid trades parquet {e}")
|
485 |
+
invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
|
486 |
+
all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
|
487 |
+
|
488 |
+
print("Adding staking labels")
|
489 |
+
all_trades_df = label_trades_by_staking(trades_df=all_trades_df)
|
490 |
+
print("Creating unknown traders dataset")
|
491 |
+
unknown_traders_df, all_trades_df = create_unknown_traders_df(
|
492 |
+
trades_df=all_trades_df
|
493 |
+
)
|
494 |
+
if len(unknown_traders_df) > 0:
|
495 |
+
print("Merging unknown traders info")
|
496 |
+
# merge with previous unknown traders dataset
|
497 |
+
previous_unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
|
498 |
+
|
499 |
+
unknown_traders_df: pd.DataFrame = pd.concat(
|
500 |
+
[unknown_traders_df, previous_unknown_traders], ignore_index=True
|
501 |
+
)
|
502 |
+
unknown_traders_df.drop_duplicates("trade_id", keep="last", inplace=True)
|
503 |
+
unknown_traders_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
|
504 |
+
|
505 |
+
print("merge with previous all_trades_profitability")
|
506 |
+
old_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
|
507 |
+
all_trades_df: pd.DataFrame = pd.concat(
|
508 |
+
[all_trades_df, old_trades], ignore_index=True
|
509 |
+
)
|
510 |
+
all_trades_df.drop_duplicates("trade_id", keep="last", inplace=True)
|
511 |
+
all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
|
512 |
+
|
513 |
+
|
514 |
if __name__ == "__main__":
|
515 |
# updating the whole fpmmTrades parquet file instead of just the new ones
|
516 |
# trade_mech_calls = pd.read_parquet(TMP_DIR / "result_df.parquet")
|
scripts/pull_data.py
CHANGED
@@ -3,7 +3,7 @@ from datetime import datetime
|
|
3 |
import pandas as pd
|
4 |
from markets import etl as mkt_etl, DEFAULT_FILENAME as MARKETS_FILENAME, fpmmTrades_etl
|
5 |
from tools import DEFAULT_FILENAME as TOOLS_FILENAME, generate_tools_file
|
6 |
-
from profitability import run_profitability_analysis
|
7 |
from utils import (
|
8 |
get_question,
|
9 |
current_answer,
|
@@ -22,7 +22,7 @@ from update_tools_accuracy import compute_tools_accuracy
|
|
22 |
from cleaning_old_info import clean_old_data_from_parquet_files
|
23 |
from web3_utils import updating_timestamps
|
24 |
from manage_space_files import move_files
|
25 |
-
from cloud_storage import
|
26 |
from tools_metrics import compute_tools_based_datasets
|
27 |
|
28 |
|
@@ -64,7 +64,7 @@ def save_historical_data():
|
|
64 |
filename = f"tools_{timestamp}.parquet"
|
65 |
tools.to_parquet(HIST_DIR / filename, index=False)
|
66 |
# save into cloud storage
|
67 |
-
|
68 |
except Exception as e:
|
69 |
print(f"Error saving tools file in the historical folder {e}")
|
70 |
|
@@ -73,7 +73,7 @@ def save_historical_data():
|
|
73 |
filename = f"all_trades_profitability_{timestamp}.parquet"
|
74 |
all_trades.to_parquet(HIST_DIR / filename, index=False)
|
75 |
# save into cloud storage
|
76 |
-
|
77 |
|
78 |
except Exception as e:
|
79 |
print(
|
@@ -101,7 +101,6 @@ def only_new_weekly_analysis():
|
|
101 |
|
102 |
# FpmmTrades ETL
|
103 |
fpmmTrades_etl(
|
104 |
-
rpc=rpc,
|
105 |
trades_filename="new_fpmmTrades.parquet",
|
106 |
from_timestamp=int(latest_timestamp.timestamp()),
|
107 |
)
|
@@ -132,7 +131,7 @@ def only_new_weekly_analysis():
|
|
132 |
|
133 |
save_historical_data()
|
134 |
try:
|
135 |
-
clean_old_data_from_parquet_files("2024-11-
|
136 |
except Exception as e:
|
137 |
print("Error cleaning the oldest information from parquet files")
|
138 |
print(f"reason = {e}")
|
@@ -143,5 +142,28 @@ def only_new_weekly_analysis():
|
|
143 |
logging.info("Weekly analysis files generated and saved")
|
144 |
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
if __name__ == "__main__":
|
147 |
only_new_weekly_analysis()
|
|
|
|
3 |
import pandas as pd
|
4 |
from markets import etl as mkt_etl, DEFAULT_FILENAME as MARKETS_FILENAME, fpmmTrades_etl
|
5 |
from tools import DEFAULT_FILENAME as TOOLS_FILENAME, generate_tools_file
|
6 |
+
from profitability import run_profitability_analysis, add_trades_profitability
|
7 |
from utils import (
|
8 |
get_question,
|
9 |
current_answer,
|
|
|
22 |
from cleaning_old_info import clean_old_data_from_parquet_files
|
23 |
from web3_utils import updating_timestamps
|
24 |
from manage_space_files import move_files
|
25 |
+
from cloud_storage import upload_historical_file
|
26 |
from tools_metrics import compute_tools_based_datasets
|
27 |
|
28 |
|
|
|
64 |
filename = f"tools_{timestamp}.parquet"
|
65 |
tools.to_parquet(HIST_DIR / filename, index=False)
|
66 |
# save into cloud storage
|
67 |
+
upload_historical_file(filename)
|
68 |
except Exception as e:
|
69 |
print(f"Error saving tools file in the historical folder {e}")
|
70 |
|
|
|
73 |
filename = f"all_trades_profitability_{timestamp}.parquet"
|
74 |
all_trades.to_parquet(HIST_DIR / filename, index=False)
|
75 |
# save into cloud storage
|
76 |
+
upload_historical_file(filename)
|
77 |
|
78 |
except Exception as e:
|
79 |
print(
|
|
|
101 |
|
102 |
# FpmmTrades ETL
|
103 |
fpmmTrades_etl(
|
|
|
104 |
trades_filename="new_fpmmTrades.parquet",
|
105 |
from_timestamp=int(latest_timestamp.timestamp()),
|
106 |
)
|
|
|
131 |
|
132 |
save_historical_data()
|
133 |
try:
|
134 |
+
clean_old_data_from_parquet_files("2024-11-19")
|
135 |
except Exception as e:
|
136 |
print("Error cleaning the oldest information from parquet files")
|
137 |
print(f"reason = {e}")
|
|
|
142 |
logging.info("Weekly analysis files generated and saved")
|
143 |
|
144 |
|
145 |
+
def restoring_trades_data(from_date: str, to_date: str):
|
146 |
+
# Convert the string to datetime64[ns, UTC]
|
147 |
+
min_date_utc = pd.to_datetime(from_date, format="%Y-%m-%d", utc=True)
|
148 |
+
max_date_utc = pd.to_datetime(to_date, format="%Y-%m-%d", utc=True)
|
149 |
+
logging.info("Running markets ETL")
|
150 |
+
mkt_etl(MARKETS_FILENAME)
|
151 |
+
logging.info("Markets ETL completed")
|
152 |
+
|
153 |
+
fpmmTrades_etl(
|
154 |
+
trades_filename="missing_fpmmTrades.parquet",
|
155 |
+
from_timestamp=int(min_date_utc.timestamp()),
|
156 |
+
to_timestamp=int(max_date_utc.timestamp()),
|
157 |
+
)
|
158 |
+
|
159 |
+
# merge with the old file
|
160 |
+
print("Merging with previous fpmmTrades file")
|
161 |
+
update_fpmmTrades_parquet(trades_filename="missing_fpmmTrades.parquet")
|
162 |
+
|
163 |
+
# adding tools information
|
164 |
+
add_trades_profitability(trades_filename="missing_fpmmTrades.parquet")
|
165 |
+
|
166 |
+
|
167 |
if __name__ == "__main__":
|
168 |
only_new_weekly_analysis()
|
169 |
+
# restoring_trades_data("2024-12-28", "2025-01-07")
|
scripts/staking.py
CHANGED
@@ -22,13 +22,27 @@ STAKING_PROGRAMS_QS = {
|
|
22 |
"quickstart_beta_expert": "0x5344B7DD311e5d3DdDd46A4f71481bD7b05AAA3e",
|
23 |
"quickstart_beta_expert_2": "0xb964e44c126410df341ae04B13aB10A985fE3513",
|
24 |
"quickstart_beta_expert_3": "0x80faD33Cadb5F53f9D29F02Db97D682E8b101618",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
}
|
26 |
|
27 |
STAKING_PROGRAMS_PEARL = {
|
28 |
"pearl_alpha": "0xEE9F19b5DF06c7E8Bfc7B28745dcf944C504198A",
|
29 |
"pearl_beta": "0xeF44Fb0842DDeF59D37f85D61A1eF492bbA6135d",
|
30 |
"pearl_beta_2": "0x1c2F82413666d2a3fD8bC337b0268e62dDF67434",
|
|
|
|
|
|
|
31 |
}
|
|
|
|
|
32 |
SERVICE_REGISTRY_ADDRESS = "0x9338b5153AE39BB89f50468E608eD9d764B755fD"
|
33 |
|
34 |
|
@@ -94,6 +108,8 @@ def get_service_data(service_registry: Any, service_id: int) -> dict:
|
|
94 |
state = data[-1]
|
95 |
# print(f"address = {address}")
|
96 |
# print(f"state={state}")
|
|
|
|
|
97 |
if address != "0x0000000000000000000000000000000000000000":
|
98 |
tmp_map[service_id] = {
|
99 |
"safe_address": address,
|
@@ -103,7 +119,7 @@ def get_service_data(service_registry: Any, service_id: int) -> dict:
|
|
103 |
return tmp_map
|
104 |
|
105 |
|
106 |
-
def update_service_map(start: int = 1, end: int =
|
107 |
if os.path.exists(DATA_DIR / "service_map.pkl"):
|
108 |
with open(DATA_DIR / "service_map.pkl", "rb") as f:
|
109 |
service_map = pickle.load(f)
|
@@ -197,11 +213,85 @@ def label_trades_by_staking(trades_df: pd.DataFrame, start: int = None) -> None:
|
|
197 |
return trades_df
|
198 |
|
199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
if __name__ == "__main__":
|
201 |
# create_service_map()
|
202 |
-
trades_df = pd.read_parquet(TMP_DIR / "all_trades_df.parquet")
|
203 |
-
trades_df = trades_df.loc[trades_df["is_invalid"] == False]
|
204 |
-
|
205 |
-
trades_df = label_trades_by_staking(trades_df=trades_df, start=8)
|
206 |
-
print(trades_df.staking.value_counts())
|
207 |
-
trades_df.to_parquet(TMP_DIR / "result_staking.parquet", index=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
"quickstart_beta_expert": "0x5344B7DD311e5d3DdDd46A4f71481bD7b05AAA3e",
|
23 |
"quickstart_beta_expert_2": "0xb964e44c126410df341ae04B13aB10A985fE3513",
|
24 |
"quickstart_beta_expert_3": "0x80faD33Cadb5F53f9D29F02Db97D682E8b101618",
|
25 |
+
"quickstart_beta_expert_4": "0xaD9d891134443B443D7F30013c7e14Fe27F2E029",
|
26 |
+
"quickstart_beta_expert_5": "0xE56dF1E563De1B10715cB313D514af350D207212",
|
27 |
+
"quickstart_beta_expert_6": "0x2546214aEE7eEa4bEE7689C81231017CA231Dc93",
|
28 |
+
"quickstart_beta_expert_7": "0xD7A3C8b975f71030135f1a66e9e23164d54fF455",
|
29 |
+
"quickstart_beta_expert_8": "0x356C108D49C5eebd21c84c04E9162de41933030c",
|
30 |
+
"quickstart_beta_expert_9": "0x17dBAe44BC5618Cc254055b386A29576b4F87015",
|
31 |
+
"quickstart_beta_expert_10": "0xB0ef657b8302bd2c74B6E6D9B2b4b39145b19c6f",
|
32 |
+
"quickstart_beta_expert_11": "0x3112c1613eAC3dBAE3D4E38CeF023eb9E2C91CF7",
|
33 |
+
"quickstart_beta_expert_12": "0xF4a75F476801B3fBB2e7093aCDcc3576593Cc1fc",
|
34 |
}
|
35 |
|
36 |
STAKING_PROGRAMS_PEARL = {
|
37 |
"pearl_alpha": "0xEE9F19b5DF06c7E8Bfc7B28745dcf944C504198A",
|
38 |
"pearl_beta": "0xeF44Fb0842DDeF59D37f85D61A1eF492bbA6135d",
|
39 |
"pearl_beta_2": "0x1c2F82413666d2a3fD8bC337b0268e62dDF67434",
|
40 |
+
"pearl_beta_3": "0xBd59Ff0522aA773cB6074ce83cD1e4a05A457bc1",
|
41 |
+
"pearl_beta_4": "0x3052451e1eAee78e62E169AfdF6288F8791F2918",
|
42 |
+
"pearl_beta_5": "0x4Abe376Fda28c2F43b84884E5f822eA775DeA9F4",
|
43 |
}
|
44 |
+
|
45 |
+
|
46 |
SERVICE_REGISTRY_ADDRESS = "0x9338b5153AE39BB89f50468E608eD9d764B755fD"
|
47 |
|
48 |
|
|
|
108 |
state = data[-1]
|
109 |
# print(f"address = {address}")
|
110 |
# print(f"state={state}")
|
111 |
+
# PEARL trade
|
112 |
+
|
113 |
if address != "0x0000000000000000000000000000000000000000":
|
114 |
tmp_map[service_id] = {
|
115 |
"safe_address": address,
|
|
|
119 |
return tmp_map
|
120 |
|
121 |
|
122 |
+
def update_service_map(start: int = 1, end: int = 2000):
|
123 |
if os.path.exists(DATA_DIR / "service_map.pkl"):
|
124 |
with open(DATA_DIR / "service_map.pkl", "rb") as f:
|
125 |
service_map = pickle.load(f)
|
|
|
213 |
return trades_df
|
214 |
|
215 |
|
216 |
+
def generate_retention_activity_file():
|
217 |
+
tools = pd.read_parquet(TMP_DIR / "tools.parquet")
|
218 |
+
tools["request_time"] = pd.to_datetime(tools["request_time"])
|
219 |
+
tools["request_date"] = tools["request_time"].dt.date
|
220 |
+
tools = tools.sort_values(by="request_time", ascending=True)
|
221 |
+
reduced_tools_df = tools[
|
222 |
+
["trader_address", "request_time", "market_creator", "request_date"]
|
223 |
+
]
|
224 |
+
print(f"length of reduced tools before labeling = {len(reduced_tools_df)}")
|
225 |
+
reduced_tools_df = label_trades_by_staking(trades_df=reduced_tools_df)
|
226 |
+
print(f"length of reduced tools after labeling = {len(reduced_tools_df)}")
|
227 |
+
reduced_tools_df = reduced_tools_df.sort_values(by="request_time", ascending=True)
|
228 |
+
reduced_tools_df["month_year_week"] = (
|
229 |
+
pd.to_datetime(tools["request_time"])
|
230 |
+
.dt.to_period("W")
|
231 |
+
.dt.start_time.dt.strftime("%b-%d-%Y")
|
232 |
+
)
|
233 |
+
reduced_tools_df.to_parquet(TMP_DIR / "retention_activity.parquet")
|
234 |
+
return True
|
235 |
+
|
236 |
+
|
237 |
+
def check_list_addresses(address_list: list):
|
238 |
+
with open(DATA_DIR / "service_map.pkl", "rb") as f:
|
239 |
+
service_map = pickle.load(f)
|
240 |
+
# check if it is part of any service id on the map
|
241 |
+
mapping = {}
|
242 |
+
print(f"length of service map={len(service_map)}")
|
243 |
+
keys = service_map.keys()
|
244 |
+
last_key = max(keys)
|
245 |
+
|
246 |
+
print(f"last service key = {last_key}")
|
247 |
+
update_service_map(start=last_key)
|
248 |
+
found_key = -1
|
249 |
+
for trader_address in address_list:
|
250 |
+
for key, value in service_map.items():
|
251 |
+
if value["safe_address"].lower() == trader_address.lower():
|
252 |
+
# found a service
|
253 |
+
found_key = key
|
254 |
+
mapping[trader_address] = "Olas"
|
255 |
+
|
256 |
+
if found_key == -1:
|
257 |
+
mapping[trader_address] = "non_Olas"
|
258 |
+
print("mapping")
|
259 |
+
print(mapping)
|
260 |
+
|
261 |
+
|
262 |
+
def check_service_map():
|
263 |
+
with open(DATA_DIR / "service_map.pkl", "rb") as f:
|
264 |
+
service_map = pickle.load(f)
|
265 |
+
# check if it is part of any service id on the map
|
266 |
+
mapping = {}
|
267 |
+
print(f"length of service map={len(service_map)}")
|
268 |
+
keys = service_map.keys()
|
269 |
+
last_key = max(keys)
|
270 |
+
print(f"last key ={last_key}")
|
271 |
+
missing_keys = 0
|
272 |
+
for i in range(1, last_key):
|
273 |
+
if i not in keys:
|
274 |
+
missing_keys += 1
|
275 |
+
print(f"missing key = {i}")
|
276 |
+
print(f"total missing keys = {missing_keys}")
|
277 |
+
|
278 |
+
|
279 |
if __name__ == "__main__":
|
280 |
# create_service_map()
|
281 |
+
# trades_df = pd.read_parquet(TMP_DIR / "all_trades_df.parquet")
|
282 |
+
# trades_df = trades_df.loc[trades_df["is_invalid"] == False]
|
283 |
+
|
284 |
+
# trades_df = label_trades_by_staking(trades_df=trades_df, start=8)
|
285 |
+
# print(trades_df.staking.value_counts())
|
286 |
+
# trades_df.to_parquet(TMP_DIR / "result_staking.parquet", index=False)
|
287 |
+
# generate_retention_activity_file()
|
288 |
+
a_list = [
|
289 |
+
"0x027592700fafc4db3221bb662d7bdc7f546a2bb5",
|
290 |
+
"0x0845f4ad01a2f41da618848c7a9e56b64377965e",
|
291 |
+
]
|
292 |
+
# check_list_addresses(address_list=a_list)
|
293 |
+
# update_service_map()
|
294 |
+
# check_service_map()
|
295 |
+
unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
|
296 |
+
unknown_traders = label_trades_by_staking(trades_df=unknown_traders)
|
297 |
+
unknown_traders.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
|
scripts/tools_metrics.py
CHANGED
@@ -61,7 +61,9 @@ def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
|
|
61 |
tools = tools.sort_values(by="request_time", ascending=True)
|
62 |
|
63 |
tools["request_month_year_week"] = (
|
64 |
-
pd.to_datetime(tools["request_time"])
|
|
|
|
|
65 |
)
|
66 |
# preparing the tools graph
|
67 |
# adding the total
|
|
|
61 |
tools = tools.sort_values(by="request_time", ascending=True)
|
62 |
|
63 |
tools["request_month_year_week"] = (
|
64 |
+
pd.to_datetime(tools["request_time"])
|
65 |
+
.dt.to_period("W")
|
66 |
+
.dt.start_time.dt.strftime("%b-%d-%Y")
|
67 |
)
|
68 |
# preparing the tools graph
|
69 |
# adding the total
|
scripts/web3_utils.py
CHANGED
@@ -132,7 +132,9 @@ def updating_timestamps(rpc: str, tools_filename: str):
|
|
132 |
"%Y-%m"
|
133 |
)
|
134 |
tools["request_month_year_week"] = (
|
135 |
-
pd.to_datetime(tools["request_time"])
|
|
|
|
|
136 |
)
|
137 |
|
138 |
# Save the tools data after the updates on the content
|
@@ -178,7 +180,7 @@ def query_conditional_tokens_gc_subgraph(creator: str) -> dict[str, Any]:
|
|
178 |
userPositions_id_gt=userPositions_id_gt,
|
179 |
)
|
180 |
content_json = {"query": query}
|
181 |
-
print("sending query to subgraph")
|
182 |
res = requests.post(subgraph, headers=headers, json=content_json)
|
183 |
result_json = res.json()
|
184 |
# print(f"result = {result_json}")
|
@@ -229,6 +231,7 @@ def query_omen_xdai_subgraph(
|
|
229 |
first=QUERY_BATCH_SIZE,
|
230 |
id_gt=id_gt,
|
231 |
)
|
|
|
232 |
content_json = to_content(query)
|
233 |
|
234 |
res = requests.post(omen_subgraph, headers=headers, json=content_json)
|
|
|
132 |
"%Y-%m"
|
133 |
)
|
134 |
tools["request_month_year_week"] = (
|
135 |
+
pd.to_datetime(tools["request_time"])
|
136 |
+
.dt.to_period("W")
|
137 |
+
.dt.start_time.dt.strftime("%b-%d-%Y")
|
138 |
)
|
139 |
|
140 |
# Save the tools data after the updates on the content
|
|
|
180 |
userPositions_id_gt=userPositions_id_gt,
|
181 |
)
|
182 |
content_json = {"query": query}
|
183 |
+
# print("sending query to subgraph")
|
184 |
res = requests.post(subgraph, headers=headers, json=content_json)
|
185 |
result_json = res.json()
|
186 |
# print(f"result = {result_json}")
|
|
|
231 |
first=QUERY_BATCH_SIZE,
|
232 |
id_gt=id_gt,
|
233 |
)
|
234 |
+
print(f"omen query={query}")
|
235 |
content_json = to_content(query)
|
236 |
|
237 |
res = requests.post(omen_subgraph, headers=headers, json=content_json)
|
tabs/tool_win.py
CHANGED
@@ -14,7 +14,9 @@ def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
|
|
14 |
tools = tools.sort_values(by="request_time", ascending=True)
|
15 |
|
16 |
tools["request_month_year_week"] = (
|
17 |
-
pd.to_datetime(tools["request_time"])
|
|
|
|
|
18 |
)
|
19 |
# preparing the tools graph
|
20 |
# adding the total
|
|
|
14 |
tools = tools.sort_values(by="request_time", ascending=True)
|
15 |
|
16 |
tools["request_month_year_week"] = (
|
17 |
+
pd.to_datetime(tools["request_time"])
|
18 |
+
.dt.to_period("W")
|
19 |
+
.dt.start_time.dt.strftime("%b-%d-%Y")
|
20 |
)
|
21 |
# preparing the tools graph
|
22 |
# adding the total
|
tabs/trades.py
CHANGED
@@ -21,7 +21,9 @@ def prepare_trades(trades_df: pd.DataFrame) -> pd.DataFrame:
|
|
21 |
trades_df["creation_timestamp"].dt.to_period("M").astype(str)
|
22 |
)
|
23 |
trades_df["month_year_week"] = (
|
24 |
-
trades_df["creation_timestamp"]
|
|
|
|
|
25 |
)
|
26 |
trades_df["winning_trade"] = trades_df["winning_trade"].astype(int)
|
27 |
return trades_df
|
|
|
21 |
trades_df["creation_timestamp"].dt.to_period("M").astype(str)
|
22 |
)
|
23 |
trades_df["month_year_week"] = (
|
24 |
+
trades_df["creation_timestamp"]
|
25 |
+
.dt.to_period("W")
|
26 |
+
.dt.start_time.dt.strftime("%b-%d-%Y")
|
27 |
)
|
28 |
trades_df["winning_trade"] = trades_df["winning_trade"].astype(int)
|
29 |
return trades_df
|