cyberosa commited on
Commit
570e118
·
1 Parent(s): c5bbc45

cleaning testing data

Browse files
Files changed (3) hide show
  1. notebooks/test.ipynb +0 -363
  2. test.ipynb +0 -410
  3. winning_trades_percentage.csv +0 -3
notebooks/test.ipynb DELETED
@@ -1,363 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 3,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "import pickle\n",
10
- "import pandas as pd\n",
11
- "from pathlib import Path\n",
12
- "from web3 import Web3\n",
13
- "from concurrent.futures import ThreadPoolExecutor\n",
14
- "from tqdm import tqdm\n",
15
- "from functools import partial\n",
16
- "from datetime import datetime\n"
17
- ]
18
- },
19
- {
20
- "cell_type": "markdown",
21
- "metadata": {},
22
- "source": [
23
- "### Make t_map"
24
- ]
25
- },
26
- {
27
- "cell_type": "code",
28
- "execution_count": null,
29
- "metadata": {},
30
- "outputs": [],
31
- "source": [
32
- "tools = pd.read_csv(\"../data/tools.csv\")"
33
- ]
34
- },
35
- {
36
- "cell_type": "code",
37
- "execution_count": null,
38
- "metadata": {},
39
- "outputs": [],
40
- "source": [
41
- "tools.columns"
42
- ]
43
- },
44
- {
45
- "cell_type": "code",
46
- "execution_count": null,
47
- "metadata": {},
48
- "outputs": [],
49
- "source": [
50
- "import pickle\n",
51
- "t_map = tools[['request_block', 'request_time']].set_index('request_block').to_dict()['request_time']\n",
52
- "\n",
53
- "with open('../data/t_map.pkl', 'wb') as f:\n",
54
- " pickle.dump(t_map, f)\n",
55
- "\n"
56
- ]
57
- },
58
- {
59
- "cell_type": "code",
60
- "execution_count": null,
61
- "metadata": {},
62
- "outputs": [],
63
- "source": [
64
- "with open('../data/t_map.pkl', 'rb') as f:\n",
65
- " t_map = pickle.load(f)"
66
- ]
67
- },
68
- {
69
- "cell_type": "markdown",
70
- "metadata": {},
71
- "source": [
72
- "### Markets"
73
- ]
74
- },
75
- {
76
- "cell_type": "code",
77
- "execution_count": 4,
78
- "metadata": {},
79
- "outputs": [
80
- {
81
- "data": {
82
- "text/plain": [
83
- "Index(['id', 'currentAnswer', 'title'], dtype='object')"
84
- ]
85
- },
86
- "execution_count": 4,
87
- "metadata": {},
88
- "output_type": "execute_result"
89
- }
90
- ],
91
- "source": [
92
- "fpmms = pd.read_csv(\"../data/fpmms.csv\")\n",
93
- "fpmms.columns"
94
- ]
95
- },
96
- {
97
- "cell_type": "code",
98
- "execution_count": 6,
99
- "metadata": {},
100
- "outputs": [
101
- {
102
- "name": "stderr",
103
- "output_type": "stream",
104
- "text": [
105
- "/var/folders/l_/g22b1g_n0gn4tmx9lkxqv5x00000gn/T/ipykernel_42934/371090584.py:1: DtypeWarning: Columns (2) have mixed types. Specify dtype option on import or set low_memory=False.\n",
106
- " delivers = pd.read_csv(\"../data/delivers.csv\")\n"
107
- ]
108
- },
109
- {
110
- "data": {
111
- "text/plain": [
112
- "(263613, 12)"
113
- ]
114
- },
115
- "execution_count": 6,
116
- "metadata": {},
117
- "output_type": "execute_result"
118
- }
119
- ],
120
- "source": [
121
- "delivers = pd.read_csv(\"../data/delivers.csv\")\n",
122
- "delivers.shape\n"
123
- ]
124
- },
125
- {
126
- "cell_type": "code",
127
- "execution_count": 7,
128
- "metadata": {},
129
- "outputs": [
130
- {
131
- "data": {
132
- "text/plain": [
133
- "(245092, 6)"
134
- ]
135
- },
136
- "execution_count": 7,
137
- "metadata": {},
138
- "output_type": "execute_result"
139
- }
140
- ],
141
- "source": [
142
- "requests = pd.read_csv(\"../data/requests.csv\")\n",
143
- "requests.columns\n",
144
- "\n",
145
- "requests.shape"
146
- ]
147
- },
148
- {
149
- "cell_type": "code",
150
- "execution_count": 8,
151
- "metadata": {},
152
- "outputs": [
153
- {
154
- "name": "stderr",
155
- "output_type": "stream",
156
- "text": [
157
- "/var/folders/l_/g22b1g_n0gn4tmx9lkxqv5x00000gn/T/ipykernel_42934/3254331204.py:1: DtypeWarning: Columns (7,10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
158
- " tools = pd.read_csv(\"../data/tools.csv\")\n"
159
- ]
160
- },
161
- {
162
- "data": {
163
- "text/plain": [
164
- "Index(['request_id', 'request_block', 'prompt_request', 'tool', 'nonce',\n",
165
- " 'trader_address', 'deliver_block', 'error', 'error_message',\n",
166
- " 'prompt_response', 'mech_address', 'p_yes', 'p_no', 'confidence',\n",
167
- " 'info_utility', 'vote', 'win_probability', 'title', 'currentAnswer',\n",
168
- " 'request_time', 'request_month_year', 'request_month_year_week'],\n",
169
- " dtype='object')"
170
- ]
171
- },
172
- "execution_count": 8,
173
- "metadata": {},
174
- "output_type": "execute_result"
175
- }
176
- ],
177
- "source": [
178
- "tools = pd.read_csv(\"../data/tools.csv\")\n",
179
- "tools.columns"
180
- ]
181
- },
182
- {
183
- "cell_type": "code",
184
- "execution_count": 9,
185
- "metadata": {},
186
- "outputs": [
187
- {
188
- "data": {
189
- "text/plain": [
190
- "841"
191
- ]
192
- },
193
- "execution_count": 9,
194
- "metadata": {},
195
- "output_type": "execute_result"
196
- }
197
- ],
198
- "source": [
199
- "tools['request_time'].isna().sum()"
200
- ]
201
- },
202
- {
203
- "cell_type": "code",
204
- "execution_count": 10,
205
- "metadata": {},
206
- "outputs": [],
207
- "source": [
208
- "def block_number_to_timestamp(block_number: int, web3: Web3) -> str:\n",
209
- " \"\"\"Convert a block number to a timestamp.\"\"\"\n",
210
- " block = web3.eth.get_block(block_number)\n",
211
- " timestamp = datetime.utcfromtimestamp(block['timestamp'])\n",
212
- " return timestamp.strftime('%Y-%m-%d %H:%M:%S')\n",
213
- "\n",
214
- "\n",
215
- "def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list:\n",
216
- " \"\"\"Parallelize the timestamp conversion.\"\"\"\n",
217
- " block_numbers = df['request_block'].tolist()\n",
218
- " with ThreadPoolExecutor(max_workers=10) as executor:\n",
219
- " results = list(tqdm(executor.map(function, block_numbers), total=len(block_numbers))) \n",
220
- " return results\n"
221
- ]
222
- },
223
- {
224
- "cell_type": "code",
225
- "execution_count": 11,
226
- "metadata": {},
227
- "outputs": [],
228
- "source": [
229
- "rpc = \"https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a\"\n",
230
- "web3 = Web3(Web3.HTTPProvider(rpc))\n",
231
- "\n",
232
- "partial_block_number_to_timestamp = partial(block_number_to_timestamp, web3=web3)"
233
- ]
234
- },
235
- {
236
- "cell_type": "code",
237
- "execution_count": 15,
238
- "metadata": {},
239
- "outputs": [
240
- {
241
- "name": "stderr",
242
- "output_type": "stream",
243
- "text": [
244
- "100%|██████████| 841/841 [00:25<00:00, 33.18it/s]\n"
245
- ]
246
- }
247
- ],
248
- "source": [
249
- "missing_time_indices = tools[tools['request_time'].isna()].index\n",
250
- "if not missing_time_indices.empty:\n",
251
- " partial_block_number_to_timestamp = partial(block_number_to_timestamp, web3=web3)\n",
252
- " missing_timestamps = parallelize_timestamp_conversion(tools.loc[missing_time_indices], partial_block_number_to_timestamp)\n",
253
- " \n",
254
- " # Update the original DataFrame with the missing timestamps\n",
255
- " for i, timestamp in zip(missing_time_indices, missing_timestamps):\n",
256
- " tools.at[i, 'request_time'] = timestamp"
257
- ]
258
- },
259
- {
260
- "cell_type": "code",
261
- "execution_count": 16,
262
- "metadata": {},
263
- "outputs": [
264
- {
265
- "data": {
266
- "text/plain": [
267
- "0"
268
- ]
269
- },
270
- "execution_count": 16,
271
- "metadata": {},
272
- "output_type": "execute_result"
273
- }
274
- ],
275
- "source": [
276
- "tools['request_time'].isna().sum()"
277
- ]
278
- },
279
- {
280
- "cell_type": "code",
281
- "execution_count": 17,
282
- "metadata": {},
283
- "outputs": [],
284
- "source": [
285
- "tools['request_month_year'] = pd.to_datetime(tools['request_time']).dt.strftime('%Y-%m')\n",
286
- "tools['request_month_year_week'] = pd.to_datetime(tools['request_time']).dt.to_period('W').astype(str)"
287
- ]
288
- },
289
- {
290
- "cell_type": "code",
291
- "execution_count": 18,
292
- "metadata": {},
293
- "outputs": [
294
- {
295
- "data": {
296
- "text/plain": [
297
- "0"
298
- ]
299
- },
300
- "execution_count": 18,
301
- "metadata": {},
302
- "output_type": "execute_result"
303
- }
304
- ],
305
- "source": [
306
- "tools['request_month_year_week'].isna().sum()\n"
307
- ]
308
- },
309
- {
310
- "cell_type": "code",
311
- "execution_count": 19,
312
- "metadata": {},
313
- "outputs": [],
314
- "source": [
315
- "tools.to_csv(\"../data/tools.csv\", index=False)"
316
- ]
317
- },
318
- {
319
- "cell_type": "code",
320
- "execution_count": 23,
321
- "metadata": {},
322
- "outputs": [],
323
- "source": [
324
- "with open('../data/t_map.pkl', 'rb') as f:\n",
325
- " t_map = pickle.load(f)\n",
326
- "new_timestamps = tools[['request_block', 'request_time']].dropna().set_index('request_block').to_dict()['request_time']\n",
327
- "t_map.update(new_timestamps)\n",
328
- "\n",
329
- "with open('../data/t_map.pkl', 'wb') as f:\n",
330
- " pickle.dump(t_map, f)\n",
331
- "\n"
332
- ]
333
- },
334
- {
335
- "cell_type": "code",
336
- "execution_count": null,
337
- "metadata": {},
338
- "outputs": [],
339
- "source": []
340
- }
341
- ],
342
- "metadata": {
343
- "kernelspec": {
344
- "display_name": "autogen",
345
- "language": "python",
346
- "name": "python3"
347
- },
348
- "language_info": {
349
- "codemirror_mode": {
350
- "name": "ipython",
351
- "version": 3
352
- },
353
- "file_extension": ".py",
354
- "mimetype": "text/x-python",
355
- "name": "python",
356
- "nbconvert_exporter": "python",
357
- "pygments_lexer": "ipython3",
358
- "version": "3.10.13"
359
- }
360
- },
361
- "nbformat": 4,
362
- "nbformat_minor": 2
363
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test.ipynb DELETED
@@ -1,410 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "import pandas as pd\n",
10
- "from datetime import datetime\n",
11
- "from tqdm import tqdm\n",
12
- "\n",
13
- "import time\n",
14
- "import requests\n",
15
- "import datetime\n",
16
- "import pandas as pd\n",
17
- "from collections import defaultdict\n",
18
- "from typing import Any, Union, List\n",
19
- "from string import Template\n",
20
- "from enum import Enum\n",
21
- "from tqdm import tqdm\n",
22
- "import numpy as np\n",
23
- "from pathlib import Path\n",
24
- "import pickle"
25
- ]
26
- },
27
- {
28
- "cell_type": "code",
29
- "execution_count": null,
30
- "metadata": {},
31
- "outputs": [],
32
- "source": [
33
- "# trades = pd.read_parquet('/Users/arshath/play/openautonomy/olas-prediction-live-dashboard_old/data/all_trades_profitability.parquet')\n",
34
- "tools = pd.read_parquet('/Users/arshath/play/openautonomy/olas-prediction-live-dashboard_old/data/tools.parquet')"
35
- ]
36
- },
37
- {
38
- "cell_type": "code",
39
- "execution_count": null,
40
- "metadata": {},
41
- "outputs": [],
42
- "source": [
43
- "tools.groupby(['request_month_year_week', 'error']).size().unstack()"
44
- ]
45
- },
46
- {
47
- "cell_type": "code",
48
- "execution_count": null,
49
- "metadata": {},
50
- "outputs": [],
51
- "source": [
52
- "t_map = pickle.load(open('./data/t_map.pkl', 'rb'))\n",
53
- "tools['request_time'] = tools['request_block'].map(t_map)\n",
54
- "tools.to_parquet('./data/tools.parquet')"
55
- ]
56
- },
57
- {
58
- "cell_type": "code",
59
- "execution_count": null,
60
- "metadata": {},
61
- "outputs": [],
62
- "source": [
63
- "tools['request_time'] = pd.to_datetime(tools['request_time'])\n",
64
- "tools = tools[tools['request_time'] >= pd.to_datetime('2024-05-01')]\n",
65
- "tools['request_block'].max()"
66
- ]
67
- },
68
- {
69
- "cell_type": "code",
70
- "execution_count": null,
71
- "metadata": {},
72
- "outputs": [],
73
- "source": [
74
- "requests = pd.read_parquet(\"./data/requests.parquet\")\n",
75
- "delivers = pd.read_parquet(\"./data/delivers.parquet\")\n",
76
- "print(requests.shape)\n",
77
- "print(delivers.shape)"
78
- ]
79
- },
80
- {
81
- "cell_type": "code",
82
- "execution_count": null,
83
- "metadata": {},
84
- "outputs": [],
85
- "source": [
86
- "requests[requests['request_block'] <= 33714082].reset_index(drop=True).to_parquet(\"./data/requests.parquet\")\n",
87
- "delivers[delivers['deliver_block'] <= 33714082].reset_index(drop=True).to_parquet(\"./data/delivers.parquet\")"
88
- ]
89
- },
90
- {
91
- "cell_type": "code",
92
- "execution_count": null,
93
- "metadata": {},
94
- "outputs": [],
95
- "source": [
96
- "import sys \n",
97
- "\n",
98
- "sys.path.append('./')\n",
99
- "from scripts.tools import *"
100
- ]
101
- },
102
- {
103
- "cell_type": "code",
104
- "execution_count": null,
105
- "metadata": {},
106
- "outputs": [],
107
- "source": [
108
- "RPCs = [\n",
109
- " \"https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a\",\n",
110
- "]\n",
111
- "w3s = [Web3(HTTPProvider(r)) for r in RPCs]\n",
112
- "session = create_session()\n",
113
- "event_to_transformer = {\n",
114
- " MechEventName.REQUEST: transform_request,\n",
115
- " MechEventName.DELIVER: transform_deliver,\n",
116
- "}\n",
117
- "mech_to_info = {\n",
118
- " to_checksum_address(address): (\n",
119
- " os.path.join(CONTRACTS_PATH, filename),\n",
120
- " earliest_block,\n",
121
- " )\n",
122
- " for address, (filename, earliest_block) in MECH_TO_INFO.items()\n",
123
- "}\n",
124
- "event_to_contents = {}\n",
125
- "\n",
126
- "# latest_block = w3s[0].eth.get_block(LATEST_BLOCK_NAME)[BLOCK_DATA_NUMBER]\n",
127
- "latest_block = 34032575\n",
128
- "\n",
129
- "next_start_block = latest_block - 300\n",
130
- "\n",
131
- "events_request = []\n",
132
- "events_deliver = []\n",
133
- "# Loop through events in event_to_transformer\n",
134
- "for event_name, transformer in event_to_transformer.items():\n",
135
- " print(f\"Fetching {event_name.value} events\")\n",
136
- " for address, (abi, earliest_block) in mech_to_info.items():\n",
137
- " # parallelize the fetching of events\n",
138
- " with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:\n",
139
- " futures = []\n",
140
- " for i in range(\n",
141
- " next_start_block, latest_block, BLOCKS_CHUNK_SIZE * SNAPSHOT_RATE\n",
142
- " ):\n",
143
- " futures.append(\n",
144
- " executor.submit(\n",
145
- " get_events,\n",
146
- " random.choice(w3s),\n",
147
- " event_name.value,\n",
148
- " address,\n",
149
- " abi,\n",
150
- " i,\n",
151
- " min(i + BLOCKS_CHUNK_SIZE * SNAPSHOT_RATE, latest_block),\n",
152
- " )\n",
153
- " )\n",
154
- "\n",
155
- " for future in tqdm(\n",
156
- " as_completed(futures),\n",
157
- " total=len(futures),\n",
158
- " desc=f\"Fetching {event_name.value} Events\",\n",
159
- " ):\n",
160
- " current_mech_events = future.result()\n",
161
- " if event_name == MechEventName.REQUEST:\n",
162
- " events_request.extend(current_mech_events)\n",
163
- " elif event_name == MechEventName.DELIVER:\n",
164
- " events_deliver.extend(current_mech_events)\n",
165
- "\n",
166
- " parsed_request = parse_events(events_request)\n",
167
- " parsed_deliver = parse_events(events_deliver)"
168
- ]
169
- },
170
- {
171
- "cell_type": "code",
172
- "execution_count": null,
173
- "metadata": {},
174
- "outputs": [],
175
- "source": [
176
- "contents_request = []\n",
177
- "with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:\n",
178
- " futures = []\n",
179
- " for i in range(0, len(parsed_request), GET_CONTENTS_BATCH_SIZE):\n",
180
- " futures.append(\n",
181
- " executor.submit(\n",
182
- " get_contents,\n",
183
- " session,\n",
184
- " parsed_request[i : i + GET_CONTENTS_BATCH_SIZE],\n",
185
- " MechEventName.REQUEST,\n",
186
- " )\n",
187
- " )\n",
188
- "\n",
189
- " for future in tqdm(\n",
190
- " as_completed(futures),\n",
191
- " total=len(futures),\n",
192
- " desc=f\"Fetching {event_name.value} Contents\",\n",
193
- " ):\n",
194
- " current_mech_contents = future.result()\n",
195
- " contents_request.append(current_mech_contents)\n",
196
- "\n",
197
- "contents_request = pd.concat(contents_request, ignore_index=True)"
198
- ]
199
- },
200
- {
201
- "cell_type": "code",
202
- "execution_count": null,
203
- "metadata": {},
204
- "outputs": [],
205
- "source": [
206
- "contents_deliver = []\n",
207
- "with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:\n",
208
- " futures = []\n",
209
- " for i in range(0, len(parsed_deliver), GET_CONTENTS_BATCH_SIZE):\n",
210
- " futures.append(\n",
211
- " executor.submit(\n",
212
- " get_contents,\n",
213
- " session,\n",
214
- " parsed_deliver[i : i + GET_CONTENTS_BATCH_SIZE],\n",
215
- " MechEventName.DELIVER,\n",
216
- " )\n",
217
- " )\n",
218
- "\n",
219
- " for future in tqdm(\n",
220
- " as_completed(futures),\n",
221
- " total=len(futures),\n",
222
- " desc=f\"Fetching {event_name.value} Contents\",\n",
223
- " ):\n",
224
- " current_mech_contents = future.result()\n",
225
- " contents_deliver.append(current_mech_contents)\n",
226
- "\n",
227
- "contents_deliver = pd.concat(contents_deliver, ignore_index=True)"
228
- ]
229
- },
230
- {
231
- "cell_type": "code",
232
- "execution_count": null,
233
- "metadata": {},
234
- "outputs": [],
235
- "source": [
236
- "full_contents = True\n",
237
- "transformed_request = event_to_transformer[MechEventName.REQUEST](contents_request)\n",
238
- "transformed_deliver = event_to_transformer[MechEventName.DELIVER](contents_deliver, full_contents=full_contents)"
239
- ]
240
- },
241
- {
242
- "cell_type": "code",
243
- "execution_count": null,
244
- "metadata": {},
245
- "outputs": [],
246
- "source": [
247
- "transformed_request.shape"
248
- ]
249
- },
250
- {
251
- "cell_type": "code",
252
- "execution_count": null,
253
- "metadata": {},
254
- "outputs": [],
255
- "source": [
256
- "transformed_deliver.shape"
257
- ]
258
- },
259
- {
260
- "cell_type": "code",
261
- "execution_count": null,
262
- "metadata": {},
263
- "outputs": [],
264
- "source": [
265
- "tools = pd.merge(transformed_request, transformed_deliver, on=REQUEST_ID_FIELD)\n",
266
- "tools.columns"
267
- ]
268
- },
269
- {
270
- "cell_type": "code",
271
- "execution_count": null,
272
- "metadata": {},
273
- "outputs": [],
274
- "source": [
275
- "def store_progress(\n",
276
- " filename: str,\n",
277
- " event_to_contents: Dict[str, pd.DataFrame],\n",
278
- " tools: pd.DataFrame,\n",
279
- ") -> None:\n",
280
- " \"\"\"Store the given progress.\"\"\"\n",
281
- " if filename:\n",
282
- " DATA_DIR.mkdir(parents=True, exist_ok=True) # Ensure the directory exists\n",
283
- " for event_name, content in event_to_contents.items():\n",
284
- " event_filename = gen_event_filename(event_name) # Ensure this function returns a valid filename string\n",
285
- " try:\n",
286
- " if \"result\" in content.columns:\n",
287
- " content = content.drop(columns=[\"result\"]) # Avoid in-place modification\n",
288
- " if 'error' in content.columns:\n",
289
- " content['error'] = content['error'].astype(bool)\n",
290
- " content.to_parquet(DATA_DIR / event_filename, index=False)\n",
291
- " except Exception as e:\n",
292
- " print(f\"Failed to write {event_name}: {e}\")\n",
293
- " try:\n",
294
- " if \"result\" in tools.columns:\n",
295
- " tools = tools.drop(columns=[\"result\"])\n",
296
- " if 'error' in tools.columns:\n",
297
- " tools['error'] = tools['error'].astype(bool)\n",
298
- " tools.to_parquet(DATA_DIR / filename, index=False)\n",
299
- " except Exception as e:\n",
300
- " print(f\"Failed to write tools data: {e}\")"
301
- ]
302
- },
303
- {
304
- "cell_type": "code",
305
- "execution_count": null,
306
- "metadata": {},
307
- "outputs": [],
308
- "source": [
309
- "# store_progress(filename, event_to_contents, tools)"
310
- ]
311
- },
312
- {
313
- "cell_type": "code",
314
- "execution_count": null,
315
- "metadata": {},
316
- "outputs": [],
317
- "source": [
318
- "if 'result' in transformed_deliver.columns:\n",
319
- " transformed_deliver = transformed_deliver.drop(columns=['result'])\n",
320
- "if 'error' in transformed_deliver.columns:\n",
321
- " transformed_deliver['error'] = transformed_deliver['error'].astype(bool)"
322
- ]
323
- },
324
- {
325
- "cell_type": "code",
326
- "execution_count": null,
327
- "metadata": {},
328
- "outputs": [],
329
- "source": [
330
- "transformed_deliver.to_parquet(\"transformed_deliver.parquet\", index=False)"
331
- ]
332
- },
333
- {
334
- "cell_type": "code",
335
- "execution_count": null,
336
- "metadata": {},
337
- "outputs": [],
338
- "source": [
339
- "d = pd.read_parquet(\"transformed_deliver.parquet\")"
340
- ]
341
- },
342
- {
343
- "cell_type": "markdown",
344
- "metadata": {},
345
- "source": [
346
- "### duck db"
347
- ]
348
- },
349
- {
350
- "cell_type": "code",
351
- "execution_count": null,
352
- "metadata": {},
353
- "outputs": [],
354
- "source": [
355
- "import duckdb\n",
356
- "from datetime import datetime, timedelta\n",
357
- "\n",
358
- "# Calculate the date for two months ago\n",
359
- "two_months_ago = (datetime.now() - timedelta(days=60)).strftime('%Y-%m-%d')\n",
360
- "\n",
361
- "# Connect to an in-memory DuckDB instance\n",
362
- "con = duckdb.connect(':memory:')\n",
363
- "\n",
364
- "# Perform a SQL query to select data from the past two months directly from the Parquet file\n",
365
- "query = f\"\"\"\n",
366
- "SELECT *\n",
367
- "FROM read_parquet('/Users/arshath/play/openautonomy/olas-prediction-live-dashboard_old/data/tools.parquet')\n",
368
- "WHERE request_time >= '{two_months_ago}'\n",
369
- "\"\"\"\n",
370
- "\n",
371
- "# Fetch the result as a pandas DataFrame\n",
372
- "df = con.execute(query).fetchdf()\n",
373
- "\n",
374
- "# Close the connection\n",
375
- "con.close()\n",
376
- "\n",
377
- "# Print the DataFrame\n",
378
- "print(df)"
379
- ]
380
- },
381
- {
382
- "cell_type": "code",
383
- "execution_count": null,
384
- "metadata": {},
385
- "outputs": [],
386
- "source": []
387
- }
388
- ],
389
- "metadata": {
390
- "kernelspec": {
391
- "display_name": "akash",
392
- "language": "python",
393
- "name": "python3"
394
- },
395
- "language_info": {
396
- "codemirror_mode": {
397
- "name": "ipython",
398
- "version": 3
399
- },
400
- "file_extension": ".py",
401
- "mimetype": "text/x-python",
402
- "name": "python",
403
- "nbconvert_exporter": "python",
404
- "pygments_lexer": "ipython3",
405
- "version": "3.10.14"
406
- }
407
- },
408
- "nbformat": 4,
409
- "nbformat_minor": 2
410
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
winning_trades_percentage.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:86e278f91e287f692ad257528b82f60a53062ae697adbd911807eecbfb3c8b94
3
- size 26777