rosacastillo commited on
Commit
285f2a6
·
1 Parent(s): b3b7123

updating week format starting on Monday, new staking contracts and new weekly data

Browse files
data/all_trades_profitability.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04134fbaceed13c1c02eeb952134df58ec787afddcba0e09b69b7ea5ae8693cd
3
- size 6590780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6256840b7a7704aa5618fd5a4fed41b9444bbf80ea1dcaae068715026c8d52b0
3
+ size 8218375
data/daily_info.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:880e896031e8e87ddc4e9fc952941d7bbe5196b86e56ebdf003e572182db6b76
3
- size 1050038
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3d8ec77951dad3d522c90ea0009c15e5ab717c3f34624b4f0d205ad58cfa16e
3
+ size 1054780
data/error_by_markets.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b0f69c6df66a53208bd47167d39db8ef7ec965a47a1a90fe429815cc44cbad9
3
- size 14168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dff09a27b7b5ac4a527d679c446627c6ca4fb2653c6bc50e818d79e29e3c1be
3
+ size 12928
data/invalid_trades.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f29e5a59d7a2c60ef8408cddf58785f601a472105a99bd7339a45de8389fa36f
3
- size 159829
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:099e999dc46d4a2d7086838f3645475aecf27fa88331a8b2d5fd4c9937f1ad81
3
+ size 782151
data/service_map.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93ac540e1bcd347a48b9978b87443ae64af0f8b0a4daff305c4ad99cd0959a73
3
- size 90766
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32d288a076f719a659159ffdb2bca3f132c3efe3f62ee0412c11e8094c36ffc8
3
+ size 164076
data/service_map_bak.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93ac540e1bcd347a48b9978b87443ae64af0f8b0a4daff305c4ad99cd0959a73
3
+ size 90766
data/tools_accuracy.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a521cd7735fbd9aecd1fce18836b08dc03cc58dca1f52add0b60ec2bbb00f722
3
- size 1099
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb5a70b32e6a7dbd75c7924a2fa887612bf7523a62f6710f2e2397cdc3664fa2
3
+ size 1100
data/unknown_traders.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c237838416f8339b145719e5b370df1d9763099f9e5fa3e75d4d69053e5d311
3
- size 200722
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1633afc5d408263251ae5290e1f45972abaf0d3f0358ab880604de8a0baae559
3
+ size 283140
data/winning_df.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c31991027458844d3c8e72da29bb9acd7bcef8ad31ae7c95a59ee168a34ba58
3
- size 14407
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f394838074669231dc3f8dc46167bb05019ae12eb798933e99b2c2de9b9a2c1f
3
+ size 12636
notebooks/markets_analysis.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 3,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -15,32 +15,67 @@
15
  },
16
  {
17
  "cell_type": "code",
18
- "execution_count": 7,
 
 
 
 
 
 
 
 
 
19
  "metadata": {},
20
  "outputs": [
21
  {
22
- "ename": "ModuleNotFoundError",
23
- "evalue": "No module named 'scripts'",
24
- "output_type": "error",
25
- "traceback": [
26
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
27
- "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
28
- "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mscripts\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mget_mech_info\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m update_fpmmTrades_parquet\n",
29
- "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'scripts'"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  ]
31
  }
32
  ],
33
  "source": [
34
- "from scripts.get_mech_info import update_fpmmTrades_parquet"
35
  ]
36
  },
37
  {
38
  "cell_type": "code",
39
- "execution_count": 2,
40
  "metadata": {},
41
  "outputs": [],
42
  "source": [
43
- "fpmms = pd.read_parquet('../data/all_fpmms.parquet')"
44
  ]
45
  },
46
  {
@@ -54,7 +89,7 @@
54
  },
55
  {
56
  "cell_type": "code",
57
- "execution_count": 4,
58
  "metadata": {},
59
  "outputs": [],
60
  "source": [
@@ -72,13 +107,674 @@
72
  },
73
  {
74
  "cell_type": "code",
75
- "execution_count": 8,
76
  "metadata": {},
77
  "outputs": [],
78
  "source": [
79
  "new_trades = pd.read_parquet(\"../tmp/new_fpmmTrades.parquet\")"
80
  ]
81
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  {
83
  "cell_type": "code",
84
  "execution_count": 9,
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
15
  },
16
  {
17
  "cell_type": "code",
18
+ "execution_count": 12,
19
+ "metadata": {},
20
+ "outputs": [],
21
+ "source": [
22
+ "missing_df = pd.read_parquet(\"../data/missing_fpmmTrades.parquet\")"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": 13,
28
  "metadata": {},
29
  "outputs": [
30
  {
31
+ "name": "stdout",
32
+ "output_type": "stream",
33
+ "text": [
34
+ "<class 'pandas.core.frame.DataFrame'>\n",
35
+ "RangeIndex: 24121 entries, 0 to 24120\n",
36
+ "Data columns (total 24 columns):\n",
37
+ " # Column Non-Null Count Dtype \n",
38
+ "--- ------ -------------- ----- \n",
39
+ " 0 collateralAmount 24121 non-null object\n",
40
+ " 1 collateralAmountUSD 24121 non-null object\n",
41
+ " 2 collateralToken 24121 non-null object\n",
42
+ " 3 creationTimestamp 24121 non-null object\n",
43
+ " 4 trader_address 24121 non-null object\n",
44
+ " 5 feeAmount 24121 non-null object\n",
45
+ " 6 id 24121 non-null object\n",
46
+ " 7 oldOutcomeTokenMarginalPrice 24121 non-null object\n",
47
+ " 8 outcomeIndex 24121 non-null object\n",
48
+ " 9 outcomeTokenMarginalPrice 24121 non-null object\n",
49
+ " 10 outcomeTokensTraded 24121 non-null object\n",
50
+ " 11 title 24121 non-null object\n",
51
+ " 12 transactionHash 24121 non-null object\n",
52
+ " 13 type 24121 non-null object\n",
53
+ " 14 market_creator 24121 non-null object\n",
54
+ " 15 fpmm.answerFinalizedTimestamp 22553 non-null object\n",
55
+ " 16 fpmm.arbitrationOccurred 24121 non-null bool \n",
56
+ " 17 fpmm.currentAnswer 22553 non-null object\n",
57
+ " 18 fpmm.id 24121 non-null object\n",
58
+ " 19 fpmm.isPendingArbitration 24121 non-null bool \n",
59
+ " 20 fpmm.openingTimestamp 24121 non-null object\n",
60
+ " 21 fpmm.outcomes 24121 non-null object\n",
61
+ " 22 fpmm.title 24121 non-null object\n",
62
+ " 23 fpmm.condition.id 24121 non-null object\n",
63
+ "dtypes: bool(2), object(22)\n",
64
+ "memory usage: 4.1+ MB\n"
65
  ]
66
  }
67
  ],
68
  "source": [
69
+ "missing_df.info()"
70
  ]
71
  },
72
  {
73
  "cell_type": "code",
74
+ "execution_count": 26,
75
  "metadata": {},
76
  "outputs": [],
77
  "source": [
78
+ "old_markets_df = pd.read_parquet(\"../data/old_fpmmTrades.parquet\")"
79
  ]
80
  },
81
  {
 
89
  },
90
  {
91
  "cell_type": "code",
92
+ "execution_count": 35,
93
  "metadata": {},
94
  "outputs": [],
95
  "source": [
 
107
  },
108
  {
109
  "cell_type": "code",
110
+ "execution_count": 23,
111
  "metadata": {},
112
  "outputs": [],
113
  "source": [
114
  "new_trades = pd.read_parquet(\"../tmp/new_fpmmTrades.parquet\")"
115
  ]
116
  },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": 15,
120
+ "metadata": {},
121
+ "outputs": [
122
+ {
123
+ "data": {
124
+ "text/plain": [
125
+ "Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n",
126
+ " 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n",
127
+ " 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n",
128
+ " 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n",
129
+ " 'transactionHash', 'type', 'market_creator',\n",
130
+ " 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n",
131
+ " 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n",
132
+ " 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n",
133
+ " 'fpmm.condition.id'],\n",
134
+ " dtype='object')"
135
+ ]
136
+ },
137
+ "execution_count": 15,
138
+ "metadata": {},
139
+ "output_type": "execute_result"
140
+ }
141
+ ],
142
+ "source": [
143
+ "new_trades.columns"
144
+ ]
145
+ },
146
+ {
147
+ "cell_type": "code",
148
+ "execution_count": 36,
149
+ "metadata": {},
150
+ "outputs": [],
151
+ "source": [
152
+ "from datetime import datetime, timezone\n",
153
+ "def transform_to_datetime(x):\n",
154
+ " return datetime.fromtimestamp(int(x), tz=timezone.utc)\n"
155
+ ]
156
+ },
157
+ {
158
+ "cell_type": "code",
159
+ "execution_count": 17,
160
+ "metadata": {},
161
+ "outputs": [],
162
+ "source": [
163
+ "new_trades[\"creationTimestamp\"] = new_trades[\"creationTimestamp\"].apply(\n",
164
+ " lambda x: transform_to_datetime(x)\n",
165
+ ")"
166
+ ]
167
+ },
168
+ {
169
+ "cell_type": "code",
170
+ "execution_count": 37,
171
+ "metadata": {},
172
+ "outputs": [],
173
+ "source": [
174
+ "def add_creation_date(df):\n",
175
+ " try:\n",
176
+ " df[\"creationTimestamp\"] = df[\"creationTimestamp\"].apply(\n",
177
+ " lambda x: transform_to_datetime(x)\n",
178
+ " )\n",
179
+ " except Exception:\n",
180
+ " print(\"Ignore\")\n",
181
+ " df[\"creation_timestamp\"] = pd.to_datetime(df[\"creationTimestamp\"])\n",
182
+ " df[\"creation_date\"] = df[\"creation_timestamp\"].dt.date\n",
183
+ " df[\"creation_date\"] = pd.to_datetime(df[\"creation_date\"])\n",
184
+ " return df"
185
+ ]
186
+ },
187
+ {
188
+ "cell_type": "code",
189
+ "execution_count": 27,
190
+ "metadata": {},
191
+ "outputs": [
192
+ {
193
+ "name": "stdout",
194
+ "output_type": "stream",
195
+ "text": [
196
+ "Ignore\n"
197
+ ]
198
+ },
199
+ {
200
+ "data": {
201
+ "text/plain": [
202
+ "Timestamp('2025-01-14 00:00:00')"
203
+ ]
204
+ },
205
+ "execution_count": 27,
206
+ "metadata": {},
207
+ "output_type": "execute_result"
208
+ }
209
+ ],
210
+ "source": [
211
+ "old_markets_df = add_creation_date(old_markets_df)\n",
212
+ "max(old_markets_df.creation_date)"
213
+ ]
214
+ },
215
+ {
216
+ "cell_type": "code",
217
+ "execution_count": 38,
218
+ "metadata": {},
219
+ "outputs": [
220
+ {
221
+ "name": "stdout",
222
+ "output_type": "stream",
223
+ "text": [
224
+ "Ignore\n"
225
+ ]
226
+ },
227
+ {
228
+ "data": {
229
+ "text/plain": [
230
+ "Timestamp('2025-01-16 00:00:00')"
231
+ ]
232
+ },
233
+ "execution_count": 38,
234
+ "metadata": {},
235
+ "output_type": "execute_result"
236
+ }
237
+ ],
238
+ "source": [
239
+ "trades_data = add_creation_date(trades_data)\n",
240
+ "max(trades_data.creation_date)"
241
+ ]
242
+ },
243
+ {
244
+ "cell_type": "code",
245
+ "execution_count": 39,
246
+ "metadata": {},
247
+ "outputs": [
248
+ {
249
+ "data": {
250
+ "text/plain": [
251
+ "Timestamp('2024-11-14 00:00:00')"
252
+ ]
253
+ },
254
+ "execution_count": 39,
255
+ "metadata": {},
256
+ "output_type": "execute_result"
257
+ }
258
+ ],
259
+ "source": [
260
+ "min(trades_data.creation_date)"
261
+ ]
262
+ },
263
+ {
264
+ "cell_type": "code",
265
+ "execution_count": 24,
266
+ "metadata": {},
267
+ "outputs": [
268
+ {
269
+ "data": {
270
+ "text/plain": [
271
+ "Timestamp('2025-01-14 00:00:00')"
272
+ ]
273
+ },
274
+ "execution_count": 24,
275
+ "metadata": {},
276
+ "output_type": "execute_result"
277
+ }
278
+ ],
279
+ "source": [
280
+ "new_trades = add_creation_date(new_trades)\n",
281
+ "max(new_trades.creation_date)"
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "code",
286
+ "execution_count": 25,
287
+ "metadata": {},
288
+ "outputs": [
289
+ {
290
+ "data": {
291
+ "text/plain": [
292
+ "Timestamp('2025-01-11 00:00:00')"
293
+ ]
294
+ },
295
+ "execution_count": 25,
296
+ "metadata": {},
297
+ "output_type": "execute_result"
298
+ }
299
+ ],
300
+ "source": [
301
+ "min(new_trades.creation_date)"
302
+ ]
303
+ },
304
+ {
305
+ "cell_type": "code",
306
+ "execution_count": 28,
307
+ "metadata": {},
308
+ "outputs": [
309
+ {
310
+ "name": "stdout",
311
+ "output_type": "stream",
312
+ "text": [
313
+ "Transformation not needed\n",
314
+ "Transformation not needed\n",
315
+ "Initial length before removing duplicates in fpmmTrades= 165530\n"
316
+ ]
317
+ }
318
+ ],
319
+ "source": [
320
+ "# lowercase and strip creator_address\n",
321
+ "new_trades[\"trader_address\"] = (\n",
322
+ " new_trades[\"trader_address\"].str.lower().str.strip()\n",
323
+ ")\n",
324
+ "# ensure creationTimestamp compatibility\n",
325
+ "try:\n",
326
+ " new_trades[\"creationTimestamp\"] = new_trades[\"creationTimestamp\"].apply(\n",
327
+ " lambda x: transform_to_datetime(x)\n",
328
+ " )\n",
329
+ "\n",
330
+ "except Exception as e:\n",
331
+ " print(f\"Transformation not needed\")\n",
332
+ "try:\n",
333
+ " old_markets_df[\"creationTimestamp\"] = old_markets_df[\"creationTimestamp\"].apply(\n",
334
+ " lambda x: transform_to_datetime(x)\n",
335
+ " )\n",
336
+ "except Exception as e:\n",
337
+ " print(f\"Transformation not needed\")\n",
338
+ "\n",
339
+ "# merge two dataframes\n",
340
+ "merge_df = pd.concat([old_markets_df, new_trades], ignore_index=True)\n",
341
+ "# avoid numpy objects\n",
342
+ "merge_df[\"fpmm.arbitrationOccurred\"] = merge_df[\"fpmm.arbitrationOccurred\"].astype(\n",
343
+ " bool\n",
344
+ ")\n",
345
+ "merge_df[\"fpmm.isPendingArbitration\"] = merge_df[\n",
346
+ " \"fpmm.isPendingArbitration\"\n",
347
+ "].astype(bool)\n",
348
+ "\n",
349
+ "# Check for duplicates\n",
350
+ "print(f\"Initial length before removing duplicates in fpmmTrades= {len(merge_df)}\")\n",
351
+ "\n",
352
+ "# Remove duplicates\n",
353
+ "# fpmm.outcomes is a numpy array\n",
354
+ "merge_df.drop_duplicates(\"id\", keep=\"last\", inplace=True)"
355
+ ]
356
+ },
357
+ {
358
+ "cell_type": "code",
359
+ "execution_count": 29,
360
+ "metadata": {},
361
+ "outputs": [
362
+ {
363
+ "data": {
364
+ "text/plain": [
365
+ "Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n",
366
+ " 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n",
367
+ " 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n",
368
+ " 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n",
369
+ " 'transactionHash', 'type', 'market_creator',\n",
370
+ " 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n",
371
+ " 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n",
372
+ " 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n",
373
+ " 'fpmm.condition.id', 'creation_timestamp', 'creation_date'],\n",
374
+ " dtype='object')"
375
+ ]
376
+ },
377
+ "execution_count": 29,
378
+ "metadata": {},
379
+ "output_type": "execute_result"
380
+ }
381
+ ],
382
+ "source": [
383
+ "merge_df.columns"
384
+ ]
385
+ },
386
+ {
387
+ "cell_type": "code",
388
+ "execution_count": 30,
389
+ "metadata": {},
390
+ "outputs": [
391
+ {
392
+ "data": {
393
+ "text/plain": [
394
+ "Timestamp('2025-01-14 00:00:00')"
395
+ ]
396
+ },
397
+ "execution_count": 30,
398
+ "metadata": {},
399
+ "output_type": "execute_result"
400
+ }
401
+ ],
402
+ "source": [
403
+ "max(merge_df.creation_date)"
404
+ ]
405
+ },
406
+ {
407
+ "cell_type": "code",
408
+ "execution_count": 31,
409
+ "metadata": {},
410
+ "outputs": [],
411
+ "source": [
412
+ "cutoff_date=\"2024-11-13\"\n",
413
+ "min_date_utc = pd.to_datetime(cutoff_date, format=\"%Y-%m-%d\", utc=True)"
414
+ ]
415
+ },
416
+ {
417
+ "cell_type": "code",
418
+ "execution_count": 32,
419
+ "metadata": {},
420
+ "outputs": [
421
+ {
422
+ "name": "stdout",
423
+ "output_type": "stream",
424
+ "text": [
425
+ "length before filtering 161781\n",
426
+ "length after filtering 160426\n"
427
+ ]
428
+ }
429
+ ],
430
+ "source": [
431
+ "merge_df[\"creation_timestamp\"] = pd.to_datetime(\n",
432
+ " merge_df[\"creation_timestamp\"], utc=True\n",
433
+ ")\n",
434
+ "\n",
435
+ "print(f\"length before filtering {len(merge_df)}\")\n",
436
+ "merge_df = merge_df.loc[merge_df[\"creation_timestamp\"] > min_date_utc]\n",
437
+ "print(f\"length after filtering {len(merge_df)}\")\n"
438
+ ]
439
+ },
440
+ {
441
+ "cell_type": "code",
442
+ "execution_count": 33,
443
+ "metadata": {},
444
+ "outputs": [
445
+ {
446
+ "data": {
447
+ "text/plain": [
448
+ "Timestamp('2025-01-14 00:00:00')"
449
+ ]
450
+ },
451
+ "execution_count": 33,
452
+ "metadata": {},
453
+ "output_type": "execute_result"
454
+ }
455
+ ],
456
+ "source": [
457
+ "max(merge_df.creation_date)"
458
+ ]
459
+ },
460
+ {
461
+ "cell_type": "code",
462
+ "execution_count": 34,
463
+ "metadata": {},
464
+ "outputs": [],
465
+ "source": [
466
+ "merge_df.to_parquet(\"../tmp/fpmmTrades.parquet\", index=False)"
467
+ ]
468
+ },
469
+ {
470
+ "cell_type": "code",
471
+ "execution_count": 15,
472
+ "metadata": {},
473
+ "outputs": [
474
+ {
475
+ "data": {
476
+ "text/plain": [
477
+ "Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n",
478
+ " 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n",
479
+ " 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n",
480
+ " 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n",
481
+ " 'transactionHash', 'type', 'market_creator',\n",
482
+ " 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n",
483
+ " 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n",
484
+ " 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n",
485
+ " 'fpmm.condition.id', 'creation_timestamp', 'creation_date'],\n",
486
+ " dtype='object')"
487
+ ]
488
+ },
489
+ "execution_count": 15,
490
+ "metadata": {},
491
+ "output_type": "execute_result"
492
+ }
493
+ ],
494
+ "source": [
495
+ "missing_df = add_creation_date(missing_df)\n",
496
+ "missing_df.columns"
497
+ ]
498
+ },
499
+ {
500
+ "cell_type": "code",
501
+ "execution_count": 16,
502
+ "metadata": {},
503
+ "outputs": [
504
+ {
505
+ "data": {
506
+ "text/plain": [
507
+ "<Axes: xlabel='Count', ylabel='creation_date'>"
508
+ ]
509
+ },
510
+ "execution_count": 16,
511
+ "metadata": {},
512
+ "output_type": "execute_result"
513
+ },
514
+ {
515
+ "data": {
516
+ "image/png": "",
517
+ "text/plain": [
518
+ "<Figure size 640x480 with 1 Axes>"
519
+ ]
520
+ },
521
+ "metadata": {},
522
+ "output_type": "display_data"
523
+ }
524
+ ],
525
+ "source": [
526
+ "import seaborn as sns\n",
527
+ "\n",
528
+ "sns.histplot(missing_df, y=\"creation_date\")"
529
+ ]
530
+ },
531
+ {
532
+ "cell_type": "code",
533
+ "execution_count": 40,
534
+ "metadata": {},
535
+ "outputs": [
536
+ {
537
+ "data": {
538
+ "text/plain": [
539
+ "<Axes: xlabel='Count', ylabel='creation_date'>"
540
+ ]
541
+ },
542
+ "execution_count": 40,
543
+ "metadata": {},
544
+ "output_type": "execute_result"
545
+ },
546
+ {
547
+ "data": {
548
+ "image/png": "",
549
+ "text/plain": [
550
+ "<Figure size 640x480 with 1 Axes>"
551
+ ]
552
+ },
553
+ "metadata": {},
554
+ "output_type": "display_data"
555
+ }
556
+ ],
557
+ "source": [
558
+ "sns.histplot(trades_data, y=\"creation_date\")"
559
+ ]
560
+ },
561
+ {
562
+ "cell_type": "code",
563
+ "execution_count": 19,
564
+ "metadata": {},
565
+ "outputs": [],
566
+ "source": [
567
+ "def add_extra_columns(new_trades):\n",
568
+ " new_trades[\"creation_timestamp\"] = pd.to_datetime(new_trades[\"creationTimestamp\"])\n",
569
+ " new_trades[\"creation_date\"] = new_trades[\"creation_timestamp\"].dt.date\n",
570
+ " new_trades[\"creation_date\"] = pd.to_datetime(new_trades[\"creation_date\"])"
571
+ ]
572
+ },
573
+ {
574
+ "cell_type": "code",
575
+ "execution_count": null,
576
+ "metadata": {},
577
+ "outputs": [],
578
+ "source": [
579
+ "add_extra_columns(new_trades=new_trades)"
580
+ ]
581
+ },
582
+ {
583
+ "cell_type": "code",
584
+ "execution_count": 9,
585
+ "metadata": {},
586
+ "outputs": [
587
+ {
588
+ "data": {
589
+ "text/plain": [
590
+ "Timestamp('2025-01-13 00:00:00')"
591
+ ]
592
+ },
593
+ "execution_count": 9,
594
+ "metadata": {},
595
+ "output_type": "execute_result"
596
+ }
597
+ ],
598
+ "source": [
599
+ "max(new_trades.creation_date)"
600
+ ]
601
+ },
602
+ {
603
+ "cell_type": "code",
604
+ "execution_count": 12,
605
+ "metadata": {},
606
+ "outputs": [
607
+ {
608
+ "data": {
609
+ "text/plain": [
610
+ "Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n",
611
+ " 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n",
612
+ " 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n",
613
+ " 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n",
614
+ " 'transactionHash', 'type', 'market_creator',\n",
615
+ " 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n",
616
+ " 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n",
617
+ " 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n",
618
+ " 'fpmm.condition.id', 'creation_timestamp', 'creation_date'],\n",
619
+ " dtype='object')"
620
+ ]
621
+ },
622
+ "execution_count": 12,
623
+ "metadata": {},
624
+ "output_type": "execute_result"
625
+ }
626
+ ],
627
+ "source": [
628
+ "new_trades.columns"
629
+ ]
630
+ },
631
+ {
632
+ "cell_type": "code",
633
+ "execution_count": 23,
634
+ "metadata": {},
635
+ "outputs": [
636
+ {
637
+ "name": "stdout",
638
+ "output_type": "stream",
639
+ "text": [
640
+ "Transformation not needed\n",
641
+ "Initial length before removing duplicates in fpmmTrades= 137851\n",
642
+ "Final length after removing duplicates in fpmmTrades= 137851\n"
643
+ ]
644
+ }
645
+ ],
646
+ "source": [
647
+ "old_trades_df = pd.read_parquet(\"../tmp/fpmmTrades.parquet\")\n",
648
+ "\n",
649
+ "\n",
650
+ "# lowercase and strip creator_address\n",
651
+ "new_trades[\"trader_address\"] = (\n",
652
+ " new_trades[\"trader_address\"].str.lower().str.strip()\n",
653
+ ")\n",
654
+ "\n",
655
+ "\n",
656
+ "try:\n",
657
+ " old_trades_df[\"creationTimestamp\"] = old_trades_df[\"creationTimestamp\"].apply(\n",
658
+ " lambda x: transform_to_datetime(x)\n",
659
+ " )\n",
660
+ "except Exception as e:\n",
661
+ " print(f\"Transformation not needed\")\n",
662
+ "\n",
663
+ "# merge two dataframes\n",
664
+ "merge_df = pd.concat([old_trades_df, new_trades], ignore_index=True)\n",
665
+ "# avoid numpy objects\n",
666
+ "merge_df[\"fpmm.arbitrationOccurred\"] = merge_df[\"fpmm.arbitrationOccurred\"].astype(\n",
667
+ " bool\n",
668
+ ")\n",
669
+ "merge_df[\"fpmm.isPendingArbitration\"] = merge_df[\n",
670
+ " \"fpmm.isPendingArbitration\"\n",
671
+ "].astype(bool)\n",
672
+ "\n",
673
+ "# Check for duplicates\n",
674
+ "print(f\"Initial length before removing duplicates in fpmmTrades= {len(merge_df)}\")\n",
675
+ "\n",
676
+ "# Remove duplicates\n",
677
+ "# fpmm.outcomes is a numpy array\n",
678
+ "merge_df.drop_duplicates(\"id\", keep=\"last\", inplace=True)\n",
679
+ "print(f\"Final length after removing duplicates in fpmmTrades= {len(merge_df)}\")"
680
+ ]
681
+ },
682
+ {
683
+ "cell_type": "code",
684
+ "execution_count": 24,
685
+ "metadata": {},
686
+ "outputs": [],
687
+ "source": [
688
+ "merge_df.to_parquet(\"../tmp/fpmmTrades.parquet\", index=False)"
689
+ ]
690
+ },
691
+ {
692
+ "cell_type": "code",
693
+ "execution_count": 13,
694
+ "metadata": {},
695
+ "outputs": [
696
+ {
697
+ "data": {
698
+ "text/plain": [
699
+ "Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n",
700
+ " 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n",
701
+ " 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n",
702
+ " 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n",
703
+ " 'transactionHash', 'type', 'market_creator',\n",
704
+ " 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n",
705
+ " 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n",
706
+ " 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n",
707
+ " 'fpmm.condition.id', 'creation_timestamp', 'creation_date'],\n",
708
+ " dtype='object')"
709
+ ]
710
+ },
711
+ "execution_count": 13,
712
+ "metadata": {},
713
+ "output_type": "execute_result"
714
+ }
715
+ ],
716
+ "source": [
717
+ "old_trades_df.columns"
718
+ ]
719
+ },
720
+ {
721
+ "cell_type": "code",
722
+ "execution_count": 20,
723
+ "metadata": {},
724
+ "outputs": [],
725
+ "source": [
726
+ "add_extra_columns(new_trades=merge_df)"
727
+ ]
728
+ },
729
+ {
730
+ "cell_type": "code",
731
+ "execution_count": 21,
732
+ "metadata": {},
733
+ "outputs": [
734
+ {
735
+ "data": {
736
+ "text/plain": [
737
+ "Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n",
738
+ " 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n",
739
+ " 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n",
740
+ " 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n",
741
+ " 'transactionHash', 'type', 'market_creator',\n",
742
+ " 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n",
743
+ " 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n",
744
+ " 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n",
745
+ " 'fpmm.condition.id', 'creation_timestamp', 'creation_date'],\n",
746
+ " dtype='object')"
747
+ ]
748
+ },
749
+ "execution_count": 21,
750
+ "metadata": {},
751
+ "output_type": "execute_result"
752
+ }
753
+ ],
754
+ "source": [
755
+ "merge_df.columns"
756
+ ]
757
+ },
758
+ {
759
+ "cell_type": "code",
760
+ "execution_count": 22,
761
+ "metadata": {},
762
+ "outputs": [
763
+ {
764
+ "data": {
765
+ "text/plain": [
766
+ "Timestamp('2025-01-13 00:00:00')"
767
+ ]
768
+ },
769
+ "execution_count": 22,
770
+ "metadata": {},
771
+ "output_type": "execute_result"
772
+ }
773
+ ],
774
+ "source": [
775
+ "max(merge_df.creation_date)"
776
+ ]
777
+ },
778
  {
779
  "cell_type": "code",
780
  "execution_count": 9,
notebooks/weekly_analysis.ipynb CHANGED
@@ -4676,7 +4676,7 @@
4676
  ],
4677
  "metadata": {
4678
  "kernelspec": {
4679
- "display_name": ".venv",
4680
  "language": "python",
4681
  "name": "python3"
4682
  },
@@ -4690,7 +4690,7 @@
4690
  "name": "python",
4691
  "nbconvert_exporter": "python",
4692
  "pygments_lexer": "ipython3",
4693
- "version": "3.12.2"
4694
  },
4695
  "orig_nbformat": 4
4696
  },
 
4676
  ],
4677
  "metadata": {
4678
  "kernelspec": {
4679
+ "display_name": "Python 3",
4680
  "language": "python",
4681
  "name": "python3"
4682
  },
 
4690
  "name": "python",
4691
  "nbconvert_exporter": "python",
4692
  "pygments_lexer": "ipython3",
4693
+ "version": "3.12.3"
4694
  },
4695
  "orig_nbformat": 4
4696
  },
scripts/cleaning_old_info.py CHANGED
@@ -1,5 +1,5 @@
1
  import pandas as pd
2
- from utils import DATA_DIR, TMP_DIR
3
 
4
 
5
  def clean_old_data_from_parquet_files(cutoff_date: str):
@@ -9,7 +9,7 @@ def clean_old_data_from_parquet_files(cutoff_date: str):
9
 
10
  # clean tools.parquet
11
  try:
12
- tools = pd.read_parquet(DATA_DIR / "tools.parquet")
13
 
14
  # make sure creator_address is in the columns
15
  assert "trader_address" in tools.columns, "trader_address column not found"
@@ -22,7 +22,7 @@ def clean_old_data_from_parquet_files(cutoff_date: str):
22
  print(f"length before filtering {len(tools)}")
23
  tools = tools.loc[tools["request_time"] > min_date_utc]
24
  print(f"length after filtering {len(tools)}")
25
- tools.to_parquet(DATA_DIR / "tools.parquet", index=False)
26
 
27
  except Exception as e:
28
  print(f"Error cleaning tools file {e}")
@@ -53,11 +53,11 @@ def clean_old_data_from_parquet_files(cutoff_date: str):
53
  unknown_traders["creation_timestamp"], utc=True
54
  )
55
 
56
- print(f"length before filtering {len(unknown_traders)}")
57
  unknown_traders = unknown_traders.loc[
58
  unknown_traders["creation_timestamp"] > min_date_utc
59
  ]
60
- print(f"length after filtering {len(unknown_traders)}")
61
  unknown_traders.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
62
 
63
  except Exception as e:
@@ -66,7 +66,15 @@ def clean_old_data_from_parquet_files(cutoff_date: str):
66
  # clean fpmmTrades.parquet
67
  try:
68
  fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
69
-
 
 
 
 
 
 
 
 
70
  fpmmTrades["creation_timestamp"] = pd.to_datetime(
71
  fpmmTrades["creation_timestamp"], utc=True
72
  )
 
1
  import pandas as pd
2
+ from utils import DATA_DIR, TMP_DIR, transform_to_datetime
3
 
4
 
5
  def clean_old_data_from_parquet_files(cutoff_date: str):
 
9
 
10
  # clean tools.parquet
11
  try:
12
+ tools = pd.read_parquet(TMP_DIR / "tools.parquet")
13
 
14
  # make sure creator_address is in the columns
15
  assert "trader_address" in tools.columns, "trader_address column not found"
 
22
  print(f"length before filtering {len(tools)}")
23
  tools = tools.loc[tools["request_time"] > min_date_utc]
24
  print(f"length after filtering {len(tools)}")
25
+ tools.to_parquet(TMP_DIR / "tools.parquet", index=False)
26
 
27
  except Exception as e:
28
  print(f"Error cleaning tools file {e}")
 
53
  unknown_traders["creation_timestamp"], utc=True
54
  )
55
 
56
+ print(f"length unknown traders before filtering {len(unknown_traders)}")
57
  unknown_traders = unknown_traders.loc[
58
  unknown_traders["creation_timestamp"] > min_date_utc
59
  ]
60
+ print(f"length unknown traders after filtering {len(unknown_traders)}")
61
  unknown_traders.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
62
 
63
  except Exception as e:
 
66
  # clean fpmmTrades.parquet
67
  try:
68
  fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
69
+ try:
70
+ fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
71
+ lambda x: transform_to_datetime(x)
72
+ )
73
+ except Exception as e:
74
+ print(f"Transformation not needed")
75
+ fpmmTrades["creation_timestamp"] = pd.to_datetime(
76
+ fpmmTrades["creationTimestamp"]
77
+ )
78
  fpmmTrades["creation_timestamp"] = pd.to_datetime(
79
  fpmmTrades["creation_timestamp"], utc=True
80
  )
scripts/cloud_storage.py CHANGED
@@ -23,7 +23,7 @@ def initialize_client():
23
  return client
24
 
25
 
26
- def upload_file(client, filename: str, file_path: str):
27
  """Upload a file to the bucket"""
28
  try:
29
  OBJECT_NAME = FOLDER_NAME + "/" + filename
@@ -34,8 +34,10 @@ def upload_file(client, filename: str, file_path: str):
34
  BUCKET_NAME, OBJECT_NAME, file_path, part_size=10 * 1024 * 1024
35
  ) # 10MB parts
36
  print(f"File '{file_path}' uploaded as '{OBJECT_NAME}'.")
 
37
  except S3Error as err:
38
  print(f"Error uploading file: {err}")
 
39
 
40
 
41
  def download_file(client, filename: str, file_path: str):
@@ -48,11 +50,16 @@ def download_file(client, filename: str, file_path: str):
48
  print(f"Error downloading file: {err}")
49
 
50
 
51
- def load_historical_file(client, filename: str):
52
  """Function to load one file into the cloud storage"""
53
  file_path = filename
54
  file_path = HIST_DIR / filename
55
- upload_file(client, filename, file_path)
 
 
 
 
 
56
 
57
 
58
  def process_historical_files(client):
@@ -63,8 +70,10 @@ def process_historical_files(client):
63
  # Check if file is a parquet file
64
  if filename.endswith(".parquet"):
65
  try:
66
- load_historical_file(client, filename)
67
- print(f"Successfully processed {filename}")
 
 
68
  except Exception as e:
69
  print(f"Error processing {filename}: {str(e)}")
70
 
 
23
  return client
24
 
25
 
26
+ def upload_file(client, filename: str, file_path: str) -> bool:
27
  """Upload a file to the bucket"""
28
  try:
29
  OBJECT_NAME = FOLDER_NAME + "/" + filename
 
34
  BUCKET_NAME, OBJECT_NAME, file_path, part_size=10 * 1024 * 1024
35
  ) # 10MB parts
36
  print(f"File '{file_path}' uploaded as '{OBJECT_NAME}'.")
37
+ return True
38
  except S3Error as err:
39
  print(f"Error uploading file: {err}")
40
+ return False
41
 
42
 
43
  def download_file(client, filename: str, file_path: str):
 
50
  print(f"Error downloading file: {err}")
51
 
52
 
53
+ def load_historical_file(client, filename: str) -> bool:
54
  """Function to load one file into the cloud storage"""
55
  file_path = filename
56
  file_path = HIST_DIR / filename
57
+ return upload_file(client, filename, file_path)
58
+
59
+
60
+ def upload_historical_file(filename: str):
61
+ client = initialize_client()
62
+ load_historical_file(client=client, filename=filename)
63
 
64
 
65
  def process_historical_files(client):
 
70
  # Check if file is a parquet file
71
  if filename.endswith(".parquet"):
72
  try:
73
+ if load_historical_file(client, filename):
74
+ print(f"Successfully processed {filename}")
75
+ else:
76
+ print("Error loading the files")
77
  except Exception as e:
78
  print(f"Error processing {filename}: {str(e)}")
79
 
scripts/daily_data.py CHANGED
@@ -11,6 +11,7 @@ from nr_mech_calls import (
11
  transform_to_datetime,
12
  )
13
  from markets import check_current_week_data
 
14
 
15
  logging.basicConfig(level=logging.INFO)
16
 
@@ -51,6 +52,9 @@ def prepare_live_metrics(
51
  # save into a separate file
52
  all_trades_df.to_parquet(DATA_DIR / "daily_info.parquet", index=False)
53
 
 
 
 
54
 
55
  if __name__ == "__main__":
56
  prepare_live_metrics()
 
11
  transform_to_datetime,
12
  )
13
  from markets import check_current_week_data
14
+ from staking import generate_retention_activity_file
15
 
16
  logging.basicConfig(level=logging.INFO)
17
 
 
52
  # save into a separate file
53
  all_trades_df.to_parquet(DATA_DIR / "daily_info.parquet", index=False)
54
 
55
+ # prepare the retention info file
56
+ generate_retention_activity_file()
57
+
58
 
59
  if __name__ == "__main__":
60
  prepare_live_metrics()
scripts/get_mech_info.py CHANGED
@@ -316,7 +316,7 @@ def get_mech_events_since_last_run(logger):
316
  try:
317
  all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
318
  latest_timestamp = max(all_trades.creation_timestamp)
319
- # cutoff_date = "2024-12-01"
320
  # latest_timestamp = pd.Timestamp(
321
  # datetime.strptime(cutoff_date, "%Y-%m-%d")
322
  # ).tz_localize("UTC")
 
316
  try:
317
  all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
318
  latest_timestamp = max(all_trades.creation_timestamp)
319
+ # cutoff_date = "2024-12-22"
320
  # latest_timestamp = pd.Timestamp(
321
  # datetime.strptime(cutoff_date, "%Y-%m-%d")
322
  # ).tz_localize("UTC")
scripts/gnosis_timestamps.py CHANGED
@@ -137,7 +137,9 @@ def compute_request_time(tools_df: pd.DataFrame) -> pd.DataFrame:
137
  tools_df["request_time"]
138
  ).dt.strftime("%Y-%m")
139
  tools_df["request_month_year_week"] = (
140
- pd.to_datetime(tools_df["request_time"]).dt.to_period("W").astype(str)
 
 
141
  )
142
  # Update t_map with new timestamps
143
  new_timestamps = (
 
137
  tools_df["request_time"]
138
  ).dt.strftime("%Y-%m")
139
  tools_df["request_month_year_week"] = (
140
+ pd.to_datetime(tools_df["request_time"])
141
+ .dt.to_period("W")
142
+ .dt.start_time.dt.strftime("%b-%d-%Y")
143
  )
144
  # Update t_map with new timestamps
145
  new_timestamps = (
scripts/markets.py CHANGED
@@ -156,15 +156,19 @@ def transform_fpmmTrades(df: pd.DataFrame) -> pd.DataFrame:
156
  return df
157
 
158
 
159
- def create_fpmmTrades(rpc: str, from_timestamp: float = DEFAULT_FROM_TIMESTAMP):
 
 
 
160
  """Create fpmmTrades for all trades."""
 
161
  # Quickstart trades
162
  qs_trades_json = query_omen_xdai_subgraph(
163
  trader_category="quickstart",
164
  from_timestamp=from_timestamp,
165
- to_timestamp=DEFAULT_TO_TIMESTAMP,
166
  fpmm_from_timestamp=from_timestamp,
167
- fpmm_to_timestamp=DEFAULT_TO_TIMESTAMP,
168
  )
169
 
170
  print(f"length of the qs_trades_json dataset {len(qs_trades_json)}")
@@ -175,6 +179,7 @@ def create_fpmmTrades(rpc: str, from_timestamp: float = DEFAULT_FROM_TIMESTAMP):
175
  qs_df = transform_fpmmTrades(qs_df)
176
 
177
  # Pearl trades
 
178
  pearl_trades_json = query_omen_xdai_subgraph(
179
  trader_category="pearl",
180
  from_timestamp=from_timestamp,
@@ -335,10 +340,14 @@ def add_market_creator(tools: pd.DataFrame) -> None:
335
  return tools
336
 
337
 
338
- def fpmmTrades_etl(rpc: str, trades_filename: str, from_timestamp: str) -> None:
 
 
339
  print("Generating the trades file")
340
  try:
341
- fpmmTrades = create_fpmmTrades(rpc, from_timestamp=from_timestamp)
 
 
342
  except FileNotFoundError:
343
  print(f"Error creating {trades_filename} file .")
344
 
 
156
  return df
157
 
158
 
159
+ def create_fpmmTrades(
160
+ from_timestamp: int = DEFAULT_FROM_TIMESTAMP,
161
+ to_timestamp: int = DEFAULT_TO_TIMESTAMP,
162
+ ):
163
  """Create fpmmTrades for all trades."""
164
+ print("Getting trades from Quickstart markets")
165
  # Quickstart trades
166
  qs_trades_json = query_omen_xdai_subgraph(
167
  trader_category="quickstart",
168
  from_timestamp=from_timestamp,
169
+ to_timestamp=to_timestamp,
170
  fpmm_from_timestamp=from_timestamp,
171
+ fpmm_to_timestamp=to_timestamp,
172
  )
173
 
174
  print(f"length of the qs_trades_json dataset {len(qs_trades_json)}")
 
179
  qs_df = transform_fpmmTrades(qs_df)
180
 
181
  # Pearl trades
182
+ print("Getting trades from Pearl markets")
183
  pearl_trades_json = query_omen_xdai_subgraph(
184
  trader_category="pearl",
185
  from_timestamp=from_timestamp,
 
340
  return tools
341
 
342
 
343
+ def fpmmTrades_etl(
344
+ trades_filename: str, from_timestamp: int, to_timestamp: int = DEFAULT_TO_TIMESTAMP
345
+ ) -> None:
346
  print("Generating the trades file")
347
  try:
348
+ fpmmTrades = create_fpmmTrades(
349
+ from_timestamp=from_timestamp, to_timestamp=to_timestamp
350
+ )
351
  except FileNotFoundError:
352
  print(f"Error creating {trades_filename} file .")
353
 
scripts/profitability.py CHANGED
@@ -146,7 +146,7 @@ def prepare_profitalibity_data(
146
 
147
  # Check if tools.parquet is in the same directory
148
  try:
149
- # new tools parquet
150
  tools = pd.read_parquet(DATA_DIR / tools_filename)
151
 
152
  # make sure creator_address is in the columns
@@ -165,7 +165,7 @@ def prepare_profitalibity_data(
165
  return
166
 
167
  # Check if fpmmTrades.parquet is in the same directory
168
- print("Reading the trades file")
169
  try:
170
  fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
171
  except FileNotFoundError:
@@ -413,10 +413,8 @@ def run_profitability_analysis(
413
 
414
  all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
415
 
416
- # add staking labels
417
  all_trades_df = label_trades_by_staking(trades_df=all_trades_df)
418
 
419
- # create the unknown traders dataset
420
  print("Creating unknown traders dataset")
421
  unknown_traders_df, all_trades_df = create_unknown_traders_df(
422
  trades_df=all_trades_df
@@ -424,9 +422,10 @@ def run_profitability_analysis(
424
  # merge with previous unknown traders dataset
425
  previous_unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
426
 
427
- unknown_traders_df = pd.concat(
428
  [unknown_traders_df, previous_unknown_traders], ignore_index=True
429
  )
 
430
  unknown_traders_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
431
 
432
  # save to parquet
@@ -437,6 +436,81 @@ def run_profitability_analysis(
437
  return all_trades_df
438
 
439
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
  if __name__ == "__main__":
441
  # updating the whole fpmmTrades parquet file instead of just the new ones
442
  # trade_mech_calls = pd.read_parquet(TMP_DIR / "result_df.parquet")
 
146
 
147
  # Check if tools.parquet is in the same directory
148
  try:
149
+ # tools parquet file
150
  tools = pd.read_parquet(DATA_DIR / tools_filename)
151
 
152
  # make sure creator_address is in the columns
 
165
  return
166
 
167
  # Check if fpmmTrades.parquet is in the same directory
168
+ print("Reading the new trades file")
169
  try:
170
  fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
171
  except FileNotFoundError:
 
413
 
414
  all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
415
 
 
416
  all_trades_df = label_trades_by_staking(trades_df=all_trades_df)
417
 
 
418
  print("Creating unknown traders dataset")
419
  unknown_traders_df, all_trades_df = create_unknown_traders_df(
420
  trades_df=all_trades_df
 
422
  # merge with previous unknown traders dataset
423
  previous_unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
424
 
425
+ unknown_traders_df: pd.DataFrame = pd.concat(
426
  [unknown_traders_df, previous_unknown_traders], ignore_index=True
427
  )
428
+ unknown_traders_df.drop_duplicates("trade_id", keep="last", inplace=True)
429
  unknown_traders_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
430
 
431
  # save to parquet
 
436
  return all_trades_df
437
 
438
 
439
+ def add_trades_profitability(trades_filename: str):
440
+ print("Reading the trades file")
441
+ try:
442
+ fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
443
+ except FileNotFoundError:
444
+ print(f"Error reading {trades_filename} file .")
445
+
446
+ # make sure trader_address is in the columns
447
+ assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
448
+
449
+ # lowercase and strip creator_address
450
+ fpmmTrades["trader_address"] = fpmmTrades["trader_address"].str.lower().str.strip()
451
+
452
+ print("Reading tools parquet file")
453
+ tools = pd.read_parquet(TMP_DIR / "tools.parquet")
454
+
455
+ try:
456
+ fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
457
+ lambda x: transform_to_datetime(x)
458
+ )
459
+ except Exception as e:
460
+ print(f"Transformation not needed")
461
+
462
+ print("Computing the estimated mech calls dataset")
463
+ trade_mech_calls = compute_mech_calls_based_on_timestamps(
464
+ fpmmTrades=fpmmTrades, tools=tools
465
+ )
466
+ print(trade_mech_calls.total_mech_calls.describe())
467
+ print("Analysing trades...")
468
+ all_trades_df = analyse_all_traders(fpmmTrades, trade_mech_calls)
469
+
470
+ # debugging purposes
471
+ all_trades_df.to_parquet(JSON_DATA_DIR / "missing_trades_df.parquet", index=False)
472
+ # filter invalid markets. Condition: "is_invalid" is True
473
+ print("Checking invalid trades")
474
+ invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
475
+ if len(invalid_trades) > 0:
476
+ try:
477
+ print("Merging invalid trades parquet file")
478
+ old_invalid_trades = pd.read_parquet(DATA_DIR / "invalid_trades.parquet")
479
+ merge_df = pd.concat(
480
+ [old_invalid_trades, invalid_trades], ignore_index=True
481
+ )
482
+ invalid_trades = merge_df.drop_duplicates("trade_id")
483
+ except Exception as e:
484
+ print(f"Error updating the invalid trades parquet {e}")
485
+ invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
486
+ all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
487
+
488
+ print("Adding staking labels")
489
+ all_trades_df = label_trades_by_staking(trades_df=all_trades_df)
490
+ print("Creating unknown traders dataset")
491
+ unknown_traders_df, all_trades_df = create_unknown_traders_df(
492
+ trades_df=all_trades_df
493
+ )
494
+ if len(unknown_traders_df) > 0:
495
+ print("Merging unknown traders info")
496
+ # merge with previous unknown traders dataset
497
+ previous_unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
498
+
499
+ unknown_traders_df: pd.DataFrame = pd.concat(
500
+ [unknown_traders_df, previous_unknown_traders], ignore_index=True
501
+ )
502
+ unknown_traders_df.drop_duplicates("trade_id", keep="last", inplace=True)
503
+ unknown_traders_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
504
+
505
+ print("merge with previous all_trades_profitability")
506
+ old_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
507
+ all_trades_df: pd.DataFrame = pd.concat(
508
+ [all_trades_df, old_trades], ignore_index=True
509
+ )
510
+ all_trades_df.drop_duplicates("trade_id", keep="last", inplace=True)
511
+ all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
512
+
513
+
514
  if __name__ == "__main__":
515
  # updating the whole fpmmTrades parquet file instead of just the new ones
516
  # trade_mech_calls = pd.read_parquet(TMP_DIR / "result_df.parquet")
scripts/pull_data.py CHANGED
@@ -3,7 +3,7 @@ from datetime import datetime
3
  import pandas as pd
4
  from markets import etl as mkt_etl, DEFAULT_FILENAME as MARKETS_FILENAME, fpmmTrades_etl
5
  from tools import DEFAULT_FILENAME as TOOLS_FILENAME, generate_tools_file
6
- from profitability import run_profitability_analysis
7
  from utils import (
8
  get_question,
9
  current_answer,
@@ -22,7 +22,7 @@ from update_tools_accuracy import compute_tools_accuracy
22
  from cleaning_old_info import clean_old_data_from_parquet_files
23
  from web3_utils import updating_timestamps
24
  from manage_space_files import move_files
25
- from cloud_storage import load_historical_file
26
  from tools_metrics import compute_tools_based_datasets
27
 
28
 
@@ -64,7 +64,7 @@ def save_historical_data():
64
  filename = f"tools_{timestamp}.parquet"
65
  tools.to_parquet(HIST_DIR / filename, index=False)
66
  # save into cloud storage
67
- load_historical_file(filename)
68
  except Exception as e:
69
  print(f"Error saving tools file in the historical folder {e}")
70
 
@@ -73,7 +73,7 @@ def save_historical_data():
73
  filename = f"all_trades_profitability_{timestamp}.parquet"
74
  all_trades.to_parquet(HIST_DIR / filename, index=False)
75
  # save into cloud storage
76
- load_historical_file(filename)
77
 
78
  except Exception as e:
79
  print(
@@ -101,7 +101,6 @@ def only_new_weekly_analysis():
101
 
102
  # FpmmTrades ETL
103
  fpmmTrades_etl(
104
- rpc=rpc,
105
  trades_filename="new_fpmmTrades.parquet",
106
  from_timestamp=int(latest_timestamp.timestamp()),
107
  )
@@ -132,7 +131,7 @@ def only_new_weekly_analysis():
132
 
133
  save_historical_data()
134
  try:
135
- clean_old_data_from_parquet_files("2024-11-12")
136
  except Exception as e:
137
  print("Error cleaning the oldest information from parquet files")
138
  print(f"reason = {e}")
@@ -143,5 +142,28 @@ def only_new_weekly_analysis():
143
  logging.info("Weekly analysis files generated and saved")
144
 
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  if __name__ == "__main__":
147
  only_new_weekly_analysis()
 
 
3
  import pandas as pd
4
  from markets import etl as mkt_etl, DEFAULT_FILENAME as MARKETS_FILENAME, fpmmTrades_etl
5
  from tools import DEFAULT_FILENAME as TOOLS_FILENAME, generate_tools_file
6
+ from profitability import run_profitability_analysis, add_trades_profitability
7
  from utils import (
8
  get_question,
9
  current_answer,
 
22
  from cleaning_old_info import clean_old_data_from_parquet_files
23
  from web3_utils import updating_timestamps
24
  from manage_space_files import move_files
25
+ from cloud_storage import upload_historical_file
26
  from tools_metrics import compute_tools_based_datasets
27
 
28
 
 
64
  filename = f"tools_{timestamp}.parquet"
65
  tools.to_parquet(HIST_DIR / filename, index=False)
66
  # save into cloud storage
67
+ upload_historical_file(filename)
68
  except Exception as e:
69
  print(f"Error saving tools file in the historical folder {e}")
70
 
 
73
  filename = f"all_trades_profitability_{timestamp}.parquet"
74
  all_trades.to_parquet(HIST_DIR / filename, index=False)
75
  # save into cloud storage
76
+ upload_historical_file(filename)
77
 
78
  except Exception as e:
79
  print(
 
101
 
102
  # FpmmTrades ETL
103
  fpmmTrades_etl(
 
104
  trades_filename="new_fpmmTrades.parquet",
105
  from_timestamp=int(latest_timestamp.timestamp()),
106
  )
 
131
 
132
  save_historical_data()
133
  try:
134
+ clean_old_data_from_parquet_files("2024-11-19")
135
  except Exception as e:
136
  print("Error cleaning the oldest information from parquet files")
137
  print(f"reason = {e}")
 
142
  logging.info("Weekly analysis files generated and saved")
143
 
144
 
145
+ def restoring_trades_data(from_date: str, to_date: str):
146
+ # Convert the string to datetime64[ns, UTC]
147
+ min_date_utc = pd.to_datetime(from_date, format="%Y-%m-%d", utc=True)
148
+ max_date_utc = pd.to_datetime(to_date, format="%Y-%m-%d", utc=True)
149
+ logging.info("Running markets ETL")
150
+ mkt_etl(MARKETS_FILENAME)
151
+ logging.info("Markets ETL completed")
152
+
153
+ fpmmTrades_etl(
154
+ trades_filename="missing_fpmmTrades.parquet",
155
+ from_timestamp=int(min_date_utc.timestamp()),
156
+ to_timestamp=int(max_date_utc.timestamp()),
157
+ )
158
+
159
+ # merge with the old file
160
+ print("Merging with previous fpmmTrades file")
161
+ update_fpmmTrades_parquet(trades_filename="missing_fpmmTrades.parquet")
162
+
163
+ # adding tools information
164
+ add_trades_profitability(trades_filename="missing_fpmmTrades.parquet")
165
+
166
+
167
  if __name__ == "__main__":
168
  only_new_weekly_analysis()
169
+ # restoring_trades_data("2024-12-28", "2025-01-07")
scripts/staking.py CHANGED
@@ -22,13 +22,27 @@ STAKING_PROGRAMS_QS = {
22
  "quickstart_beta_expert": "0x5344B7DD311e5d3DdDd46A4f71481bD7b05AAA3e",
23
  "quickstart_beta_expert_2": "0xb964e44c126410df341ae04B13aB10A985fE3513",
24
  "quickstart_beta_expert_3": "0x80faD33Cadb5F53f9D29F02Db97D682E8b101618",
 
 
 
 
 
 
 
 
 
25
  }
26
 
27
  STAKING_PROGRAMS_PEARL = {
28
  "pearl_alpha": "0xEE9F19b5DF06c7E8Bfc7B28745dcf944C504198A",
29
  "pearl_beta": "0xeF44Fb0842DDeF59D37f85D61A1eF492bbA6135d",
30
  "pearl_beta_2": "0x1c2F82413666d2a3fD8bC337b0268e62dDF67434",
 
 
 
31
  }
 
 
32
  SERVICE_REGISTRY_ADDRESS = "0x9338b5153AE39BB89f50468E608eD9d764B755fD"
33
 
34
 
@@ -94,6 +108,8 @@ def get_service_data(service_registry: Any, service_id: int) -> dict:
94
  state = data[-1]
95
  # print(f"address = {address}")
96
  # print(f"state={state}")
 
 
97
  if address != "0x0000000000000000000000000000000000000000":
98
  tmp_map[service_id] = {
99
  "safe_address": address,
@@ -103,7 +119,7 @@ def get_service_data(service_registry: Any, service_id: int) -> dict:
103
  return tmp_map
104
 
105
 
106
- def update_service_map(start: int = 1, end: int = 1000):
107
  if os.path.exists(DATA_DIR / "service_map.pkl"):
108
  with open(DATA_DIR / "service_map.pkl", "rb") as f:
109
  service_map = pickle.load(f)
@@ -197,11 +213,85 @@ def label_trades_by_staking(trades_df: pd.DataFrame, start: int = None) -> None:
197
  return trades_df
198
 
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  if __name__ == "__main__":
201
  # create_service_map()
202
- trades_df = pd.read_parquet(TMP_DIR / "all_trades_df.parquet")
203
- trades_df = trades_df.loc[trades_df["is_invalid"] == False]
204
-
205
- trades_df = label_trades_by_staking(trades_df=trades_df, start=8)
206
- print(trades_df.staking.value_counts())
207
- trades_df.to_parquet(TMP_DIR / "result_staking.parquet", index=False)
 
 
 
 
 
 
 
 
 
 
 
 
22
  "quickstart_beta_expert": "0x5344B7DD311e5d3DdDd46A4f71481bD7b05AAA3e",
23
  "quickstart_beta_expert_2": "0xb964e44c126410df341ae04B13aB10A985fE3513",
24
  "quickstart_beta_expert_3": "0x80faD33Cadb5F53f9D29F02Db97D682E8b101618",
25
+ "quickstart_beta_expert_4": "0xaD9d891134443B443D7F30013c7e14Fe27F2E029",
26
+ "quickstart_beta_expert_5": "0xE56dF1E563De1B10715cB313D514af350D207212",
27
+ "quickstart_beta_expert_6": "0x2546214aEE7eEa4bEE7689C81231017CA231Dc93",
28
+ "quickstart_beta_expert_7": "0xD7A3C8b975f71030135f1a66e9e23164d54fF455",
29
+ "quickstart_beta_expert_8": "0x356C108D49C5eebd21c84c04E9162de41933030c",
30
+ "quickstart_beta_expert_9": "0x17dBAe44BC5618Cc254055b386A29576b4F87015",
31
+ "quickstart_beta_expert_10": "0xB0ef657b8302bd2c74B6E6D9B2b4b39145b19c6f",
32
+ "quickstart_beta_expert_11": "0x3112c1613eAC3dBAE3D4E38CeF023eb9E2C91CF7",
33
+ "quickstart_beta_expert_12": "0xF4a75F476801B3fBB2e7093aCDcc3576593Cc1fc",
34
  }
35
 
36
  STAKING_PROGRAMS_PEARL = {
37
  "pearl_alpha": "0xEE9F19b5DF06c7E8Bfc7B28745dcf944C504198A",
38
  "pearl_beta": "0xeF44Fb0842DDeF59D37f85D61A1eF492bbA6135d",
39
  "pearl_beta_2": "0x1c2F82413666d2a3fD8bC337b0268e62dDF67434",
40
+ "pearl_beta_3": "0xBd59Ff0522aA773cB6074ce83cD1e4a05A457bc1",
41
+ "pearl_beta_4": "0x3052451e1eAee78e62E169AfdF6288F8791F2918",
42
+ "pearl_beta_5": "0x4Abe376Fda28c2F43b84884E5f822eA775DeA9F4",
43
  }
44
+
45
+
46
  SERVICE_REGISTRY_ADDRESS = "0x9338b5153AE39BB89f50468E608eD9d764B755fD"
47
 
48
 
 
108
  state = data[-1]
109
  # print(f"address = {address}")
110
  # print(f"state={state}")
111
+ # PEARL trade
112
+
113
  if address != "0x0000000000000000000000000000000000000000":
114
  tmp_map[service_id] = {
115
  "safe_address": address,
 
119
  return tmp_map
120
 
121
 
122
+ def update_service_map(start: int = 1, end: int = 2000):
123
  if os.path.exists(DATA_DIR / "service_map.pkl"):
124
  with open(DATA_DIR / "service_map.pkl", "rb") as f:
125
  service_map = pickle.load(f)
 
213
  return trades_df
214
 
215
 
216
+ def generate_retention_activity_file():
217
+ tools = pd.read_parquet(TMP_DIR / "tools.parquet")
218
+ tools["request_time"] = pd.to_datetime(tools["request_time"])
219
+ tools["request_date"] = tools["request_time"].dt.date
220
+ tools = tools.sort_values(by="request_time", ascending=True)
221
+ reduced_tools_df = tools[
222
+ ["trader_address", "request_time", "market_creator", "request_date"]
223
+ ]
224
+ print(f"length of reduced tools before labeling = {len(reduced_tools_df)}")
225
+ reduced_tools_df = label_trades_by_staking(trades_df=reduced_tools_df)
226
+ print(f"length of reduced tools after labeling = {len(reduced_tools_df)}")
227
+ reduced_tools_df = reduced_tools_df.sort_values(by="request_time", ascending=True)
228
+ reduced_tools_df["month_year_week"] = (
229
+ pd.to_datetime(tools["request_time"])
230
+ .dt.to_period("W")
231
+ .dt.start_time.dt.strftime("%b-%d-%Y")
232
+ )
233
+ reduced_tools_df.to_parquet(TMP_DIR / "retention_activity.parquet")
234
+ return True
235
+
236
+
237
+ def check_list_addresses(address_list: list):
238
+ with open(DATA_DIR / "service_map.pkl", "rb") as f:
239
+ service_map = pickle.load(f)
240
+ # check if it is part of any service id on the map
241
+ mapping = {}
242
+ print(f"length of service map={len(service_map)}")
243
+ keys = service_map.keys()
244
+ last_key = max(keys)
245
+
246
+ print(f"last service key = {last_key}")
247
+ update_service_map(start=last_key)
248
+ found_key = -1
249
+ for trader_address in address_list:
250
+ for key, value in service_map.items():
251
+ if value["safe_address"].lower() == trader_address.lower():
252
+ # found a service
253
+ found_key = key
254
+ mapping[trader_address] = "Olas"
255
+
256
+ if found_key == -1:
257
+ mapping[trader_address] = "non_Olas"
258
+ print("mapping")
259
+ print(mapping)
260
+
261
+
262
+ def check_service_map():
263
+ with open(DATA_DIR / "service_map.pkl", "rb") as f:
264
+ service_map = pickle.load(f)
265
+ # check if it is part of any service id on the map
266
+ mapping = {}
267
+ print(f"length of service map={len(service_map)}")
268
+ keys = service_map.keys()
269
+ last_key = max(keys)
270
+ print(f"last key ={last_key}")
271
+ missing_keys = 0
272
+ for i in range(1, last_key):
273
+ if i not in keys:
274
+ missing_keys += 1
275
+ print(f"missing key = {i}")
276
+ print(f"total missing keys = {missing_keys}")
277
+
278
+
279
  if __name__ == "__main__":
280
  # create_service_map()
281
+ # trades_df = pd.read_parquet(TMP_DIR / "all_trades_df.parquet")
282
+ # trades_df = trades_df.loc[trades_df["is_invalid"] == False]
283
+
284
+ # trades_df = label_trades_by_staking(trades_df=trades_df, start=8)
285
+ # print(trades_df.staking.value_counts())
286
+ # trades_df.to_parquet(TMP_DIR / "result_staking.parquet", index=False)
287
+ # generate_retention_activity_file()
288
+ a_list = [
289
+ "0x027592700fafc4db3221bb662d7bdc7f546a2bb5",
290
+ "0x0845f4ad01a2f41da618848c7a9e56b64377965e",
291
+ ]
292
+ # check_list_addresses(address_list=a_list)
293
+ # update_service_map()
294
+ # check_service_map()
295
+ unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
296
+ unknown_traders = label_trades_by_staking(trades_df=unknown_traders)
297
+ unknown_traders.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
scripts/tools_metrics.py CHANGED
@@ -61,7 +61,9 @@ def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
61
  tools = tools.sort_values(by="request_time", ascending=True)
62
 
63
  tools["request_month_year_week"] = (
64
- pd.to_datetime(tools["request_time"]).dt.to_period("W").dt.strftime("%b-%d-%Y")
 
 
65
  )
66
  # preparing the tools graph
67
  # adding the total
 
61
  tools = tools.sort_values(by="request_time", ascending=True)
62
 
63
  tools["request_month_year_week"] = (
64
+ pd.to_datetime(tools["request_time"])
65
+ .dt.to_period("W")
66
+ .dt.start_time.dt.strftime("%b-%d-%Y")
67
  )
68
  # preparing the tools graph
69
  # adding the total
scripts/web3_utils.py CHANGED
@@ -132,7 +132,9 @@ def updating_timestamps(rpc: str, tools_filename: str):
132
  "%Y-%m"
133
  )
134
  tools["request_month_year_week"] = (
135
- pd.to_datetime(tools["request_time"]).dt.to_period("W").astype(str)
 
 
136
  )
137
 
138
  # Save the tools data after the updates on the content
@@ -178,7 +180,7 @@ def query_conditional_tokens_gc_subgraph(creator: str) -> dict[str, Any]:
178
  userPositions_id_gt=userPositions_id_gt,
179
  )
180
  content_json = {"query": query}
181
- print("sending query to subgraph")
182
  res = requests.post(subgraph, headers=headers, json=content_json)
183
  result_json = res.json()
184
  # print(f"result = {result_json}")
@@ -229,6 +231,7 @@ def query_omen_xdai_subgraph(
229
  first=QUERY_BATCH_SIZE,
230
  id_gt=id_gt,
231
  )
 
232
  content_json = to_content(query)
233
 
234
  res = requests.post(omen_subgraph, headers=headers, json=content_json)
 
132
  "%Y-%m"
133
  )
134
  tools["request_month_year_week"] = (
135
+ pd.to_datetime(tools["request_time"])
136
+ .dt.to_period("W")
137
+ .dt.start_time.dt.strftime("%b-%d-%Y")
138
  )
139
 
140
  # Save the tools data after the updates on the content
 
180
  userPositions_id_gt=userPositions_id_gt,
181
  )
182
  content_json = {"query": query}
183
+ # print("sending query to subgraph")
184
  res = requests.post(subgraph, headers=headers, json=content_json)
185
  result_json = res.json()
186
  # print(f"result = {result_json}")
 
231
  first=QUERY_BATCH_SIZE,
232
  id_gt=id_gt,
233
  )
234
+ print(f"omen query={query}")
235
  content_json = to_content(query)
236
 
237
  res = requests.post(omen_subgraph, headers=headers, json=content_json)
tabs/tool_win.py CHANGED
@@ -14,7 +14,9 @@ def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
14
  tools = tools.sort_values(by="request_time", ascending=True)
15
 
16
  tools["request_month_year_week"] = (
17
- pd.to_datetime(tools["request_time"]).dt.to_period("W").dt.strftime("%b-%d-%Y")
 
 
18
  )
19
  # preparing the tools graph
20
  # adding the total
 
14
  tools = tools.sort_values(by="request_time", ascending=True)
15
 
16
  tools["request_month_year_week"] = (
17
+ pd.to_datetime(tools["request_time"])
18
+ .dt.to_period("W")
19
+ .dt.start_time.dt.strftime("%b-%d-%Y")
20
  )
21
  # preparing the tools graph
22
  # adding the total
tabs/trades.py CHANGED
@@ -21,7 +21,9 @@ def prepare_trades(trades_df: pd.DataFrame) -> pd.DataFrame:
21
  trades_df["creation_timestamp"].dt.to_period("M").astype(str)
22
  )
23
  trades_df["month_year_week"] = (
24
- trades_df["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
 
 
25
  )
26
  trades_df["winning_trade"] = trades_df["winning_trade"].astype(int)
27
  return trades_df
 
21
  trades_df["creation_timestamp"].dt.to_period("M").astype(str)
22
  )
23
  trades_df["month_year_week"] = (
24
+ trades_df["creation_timestamp"]
25
+ .dt.to_period("W")
26
+ .dt.start_time.dt.strftime("%b-%d-%Y")
27
  )
28
  trades_df["winning_trade"] = trades_df["winning_trade"].astype(int)
29
  return trades_df