cyberosa commited on
Commit
d81ea9b
·
1 Parent(s): 4da34b0

updating scripts

Browse files
scripts/cleaning_old_info.py CHANGED
@@ -1,5 +1,6 @@
1
  import pandas as pd
2
- from profitability import DATA_DIR, summary_analyse
 
3
  from staking import label_trades_by_staking
4
 
5
 
@@ -39,6 +40,9 @@ def clean_old_data_from_parquet_files(cutoff_date: str):
39
  print(f"length before filtering {len(all_trades)}")
40
  all_trades = all_trades.loc[all_trades["creation_timestamp"] > min_date_utc]
41
  print(f"length after filtering {len(all_trades)}")
 
 
 
42
 
43
  except Exception as e:
44
  print(f"Error cleaning all trades profitability file {e}")
 
1
  import pandas as pd
2
+ from profitability import summary_analyse
3
+ from utils import DATA_DIR
4
  from staking import label_trades_by_staking
5
 
6
 
 
40
  print(f"length before filtering {len(all_trades)}")
41
  all_trades = all_trades.loc[all_trades["creation_timestamp"] > min_date_utc]
42
  print(f"length after filtering {len(all_trades)}")
43
+ all_trades.to_parquet(
44
+ DATA_DIR / "all_trades_profitability.parquet", index=False
45
+ )
46
 
47
  except Exception as e:
48
  print(f"Error cleaning all trades profitability file {e}")
scripts/cloud_storage.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from minio import Minio
2
+ from minio.error import S3Error
3
+ import os
4
+ import argparse
5
+
6
+ from utils import HIST_DIR
7
+
8
+ MINIO_ENDPOINT = "minio.autonolas.tech"
9
+ ACCESS_KEY = os.environ.get("CLOUD_ACCESS_KEY", None)
10
+ SECRET_KEY = os.environ.get("CLOUD_SECRET_KEY", None)
11
+ BUCKET_NAME = "weekly-stats"
12
+ FOLDER_NAME = "historical_data"
13
+
14
+
15
+ def initialize_client():
16
+ # Initialize the MinIO client
17
+ client = Minio(
18
+ MINIO_ENDPOINT,
19
+ access_key=ACCESS_KEY,
20
+ secret_key=SECRET_KEY,
21
+ secure=True, # Set to False if not using HTTPS
22
+ )
23
+ return client
24
+
25
+
26
+ def upload_file(client, filename: str, file_path: str):
27
+ """Upload a file to the bucket"""
28
+ try:
29
+ OBJECT_NAME = FOLDER_NAME + "/" + filename
30
+ print(
31
+ f"filename={filename}, object_name={OBJECT_NAME} and file_path={file_path}"
32
+ )
33
+ client.fput_object(
34
+ BUCKET_NAME, OBJECT_NAME, file_path, part_size=10 * 1024 * 1024
35
+ ) # 10MB parts
36
+ print(f"File '{file_path}' uploaded as '{OBJECT_NAME}'.")
37
+ except S3Error as err:
38
+ print(f"Error uploading file: {err}")
39
+
40
+
41
+ def download_file(client, filename: str, file_path: str):
42
+ """Download the file back"""
43
+ try:
44
+ OBJECT_NAME = FOLDER_NAME + "/" + filename
45
+ client.fget_object(BUCKET_NAME, OBJECT_NAME, "downloaded_" + file_path)
46
+ print(f"File '{OBJECT_NAME}' downloaded as 'downloaded_{file_path}'.")
47
+ except S3Error as err:
48
+ print(f"Error downloading file: {err}")
49
+
50
+
51
+ def load_historical_file(client, filename: str):
52
+ """Function to load one file into the cloud storage"""
53
+ file_path = filename
54
+ upload_file(client, filename, file_path)
55
+
56
+
57
+ def process_historical_files(client):
58
+ """Process all parquet files in historical_data folder"""
59
+
60
+ # Walk through all files in the folder
61
+ for filename in os.listdir(HIST_DIR):
62
+ # Check if file is a parquet file
63
+ if filename.endswith(".parquet"):
64
+ try:
65
+ load_historical_file(client, filename)
66
+ print(f"Successfully processed {filename}")
67
+ except Exception as e:
68
+ print(f"Error processing {filename}: {str(e)}")
69
+
70
+
71
+ if __name__ == "__main__":
72
+ parser = argparse.ArgumentParser(
73
+ description="Load files to the cloud storate for historical data"
74
+ )
75
+ parser.add_argument("param_1", type=str, help="Name of the file to upload")
76
+
77
+ # Parse the arguments
78
+ args = parser.parse_args()
79
+ filename = args.param_1
80
+
81
+ client = initialize_client()
82
+ # load_historical_file(client, filename)
83
+ process_historical_files(client)
scripts/daily_data.py CHANGED
@@ -1,7 +1,5 @@
1
  import logging
2
- from utils import measure_execution_time
3
- from pull_data import DATA_DIR
4
- from tools import DEFAULT_FILENAME as TOOLS_FILENAME, generate_tools_file
5
  from profitability import (
6
  analyse_all_traders,
7
  label_trades_by_staking,
@@ -15,8 +13,8 @@ logging.basicConfig(level=logging.INFO)
15
  def prepare_live_metrics(
16
  tools_filename="new_tools.parquet", trades_filename="new_fpmmTrades.parquet"
17
  ):
18
- fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
19
- tools = pd.read_parquet(DATA_DIR / tools_filename)
20
  print("Analysing trades...")
21
  all_trades_df = analyse_all_traders(fpmmTrades, tools, daily_info=True)
22
 
 
1
  import logging
2
+ from utils import measure_execution_time, DATA_DIR, TMP_DIR
 
 
3
  from profitability import (
4
  analyse_all_traders,
5
  label_trades_by_staking,
 
13
  def prepare_live_metrics(
14
  tools_filename="new_tools.parquet", trades_filename="new_fpmmTrades.parquet"
15
  ):
16
+ fpmmTrades = pd.read_parquet(TMP_DIR / trades_filename)
17
+ tools = pd.read_parquet(TMP_DIR / tools_filename)
18
  print("Analysing trades...")
19
  all_trades_df = analyse_all_traders(fpmmTrades, tools, daily_info=True)
20
 
scripts/markets.py CHANGED
@@ -21,14 +21,12 @@ import functools
21
  import warnings
22
  from string import Template
23
  from typing import Optional, Generator, Callable
24
-
25
  import pandas as pd
26
  import requests
27
  from tqdm import tqdm
28
-
29
  from typing import List, Dict
30
- from pathlib import Path
31
- from utils import SUBGRAPH_API_KEY
32
  from queries import (
33
  FPMMS_QUERY,
34
  ID_FIELD,
@@ -44,22 +42,16 @@ from queries import (
44
 
45
  ResponseItemType = List[Dict[str, str]]
46
  SubgraphResponseType = Dict[str, ResponseItemType]
47
-
48
-
49
- CREATOR = "0x89c5cc945dd550BcFfb72Fe42BfF002429F46Fec"
50
- PEARL_CREATOR = "0xFfc8029154ECD55ABED15BD428bA596E7D23f557"
51
  BATCH_SIZE = 1000
52
-
 
53
  OMEN_SUBGRAPH_URL = Template(
54
  """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
55
  )
56
 
57
  MAX_UINT_HEX = "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
58
  DEFAULT_FILENAME = "fpmms.parquet"
59
- SCRIPTS_DIR = Path(__file__).parent
60
- ROOT_DIR = SCRIPTS_DIR.parent
61
- DATA_DIR = ROOT_DIR / "data"
62
- market_creators_map = {"quickstart": CREATOR, "pearl": PEARL_CREATOR}
63
 
64
 
65
  class RetriesExceeded(Exception):
@@ -144,15 +136,72 @@ def query_subgraph(url: str, query: str, key: str) -> SubgraphResponseType:
144
  return data
145
 
146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  def fpmms_fetcher(trader_category: str) -> Generator[ResponseItemType, int, None]:
148
  """An indefinite fetcher for the FPMMs."""
149
  omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
150
  print(f"omen_subgraph = {omen_subgraph}")
151
 
152
  if trader_category == "pearl":
153
- creator_id = PEARL_CREATOR
154
  else: # quickstart
155
- creator_id = CREATOR
156
  while True:
157
  fpmm_id = yield
158
  fpmms_query = FPMMS_QUERY.substitute(
@@ -254,7 +303,7 @@ def add_market_creator(tools: pd.DataFrame) -> None:
254
  # Check if fpmmTrades.parquet is in the same directory
255
  try:
256
  trades_filename = "fpmmTrades.parquet"
257
- fpmms_trades = pd.read_parquet(DATA_DIR / trades_filename)
258
  except FileNotFoundError:
259
  print("Error: fpmmTrades.parquet not found. No market creator added")
260
  return
@@ -278,5 +327,21 @@ def add_market_creator(tools: pd.DataFrame) -> None:
278
  return tools
279
 
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  if __name__ == "__main__":
282
  etl("all_fpmms.parquet")
 
21
  import warnings
22
  from string import Template
23
  from typing import Optional, Generator, Callable
 
24
  import pandas as pd
25
  import requests
26
  from tqdm import tqdm
 
27
  from typing import List, Dict
28
+ from utils import SUBGRAPH_API_KEY, DATA_DIR, TMP_DIR
29
+ from web3_utils import FPMM_QS_CREATOR, FPMM_PEARL_CREATOR, query_omen_xdai_subgraph
30
  from queries import (
31
  FPMMS_QUERY,
32
  ID_FIELD,
 
42
 
43
  ResponseItemType = List[Dict[str, str]]
44
  SubgraphResponseType = Dict[str, ResponseItemType]
 
 
 
 
45
  BATCH_SIZE = 1000
46
+ DEFAULT_TO_TIMESTAMP = 2147483647 # around year 2038
47
+ DEFAULT_FROM_TIMESTAMP = 0
48
  OMEN_SUBGRAPH_URL = Template(
49
  """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
50
  )
51
 
52
  MAX_UINT_HEX = "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
53
  DEFAULT_FILENAME = "fpmms.parquet"
54
+ market_creators_map = {"quickstart": FPMM_QS_CREATOR, "pearl": FPMM_PEARL_CREATOR}
 
 
 
55
 
56
 
57
  class RetriesExceeded(Exception):
 
136
  return data
137
 
138
 
139
+ def transform_fpmmTrades(df: pd.DataFrame) -> pd.DataFrame:
140
+ print("Transforming trades dataframe")
141
+ # convert creator to address
142
+ df["creator"] = df["creator"].apply(lambda x: x["id"])
143
+
144
+ # normalize fpmm column
145
+ fpmm = pd.json_normalize(df["fpmm"])
146
+ fpmm.columns = [f"fpmm.{col}" for col in fpmm.columns]
147
+ df = pd.concat([df, fpmm], axis=1)
148
+
149
+ # drop fpmm column
150
+ df.drop(["fpmm"], axis=1, inplace=True)
151
+
152
+ # change creator to creator_address
153
+ df.rename(columns={"creator": "trader_address"}, inplace=True)
154
+ print(df.head())
155
+ print(df.info())
156
+ return df
157
+
158
+
159
+ def create_fpmmTrades(rpc: str, from_timestamp: float = DEFAULT_FROM_TIMESTAMP):
160
+ """Create fpmmTrades for all trades."""
161
+ # Quickstart trades
162
+ qs_trades_json = query_omen_xdai_subgraph(
163
+ trader_category="quickstart",
164
+ from_timestamp=from_timestamp,
165
+ to_timestamp=DEFAULT_TO_TIMESTAMP,
166
+ fpmm_from_timestamp=from_timestamp,
167
+ fpmm_to_timestamp=DEFAULT_TO_TIMESTAMP,
168
+ )
169
+
170
+ print(f"length of the qs_trades_json dataset {len(qs_trades_json)}")
171
+
172
+ # convert to dataframe
173
+ qs_df = pd.DataFrame(qs_trades_json["data"]["fpmmTrades"])
174
+ qs_df["market_creator"] = "quickstart"
175
+ qs_df = transform_fpmmTrades(qs_df)
176
+
177
+ # Pearl trades
178
+ pearl_trades_json = query_omen_xdai_subgraph(
179
+ trader_category="pearl",
180
+ from_timestamp=from_timestamp,
181
+ to_timestamp=DEFAULT_TO_TIMESTAMP,
182
+ fpmm_from_timestamp=from_timestamp,
183
+ fpmm_to_timestamp=DEFAULT_TO_TIMESTAMP,
184
+ )
185
+
186
+ print(f"length of the pearl_trades_json dataset {len(pearl_trades_json)}")
187
+
188
+ # convert to dataframe
189
+ pearl_df = pd.DataFrame(pearl_trades_json["data"]["fpmmTrades"])
190
+ pearl_df["market_creator"] = "pearl"
191
+ pearl_df = transform_fpmmTrades(pearl_df)
192
+
193
+ return pd.concat([qs_df, pearl_df], ignore_index=True)
194
+
195
+
196
  def fpmms_fetcher(trader_category: str) -> Generator[ResponseItemType, int, None]:
197
  """An indefinite fetcher for the FPMMs."""
198
  omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
199
  print(f"omen_subgraph = {omen_subgraph}")
200
 
201
  if trader_category == "pearl":
202
+ creator_id = FPMM_PEARL_CREATOR
203
  else: # quickstart
204
+ creator_id = FPMM_QS_CREATOR
205
  while True:
206
  fpmm_id = yield
207
  fpmms_query = FPMMS_QUERY.substitute(
 
303
  # Check if fpmmTrades.parquet is in the same directory
304
  try:
305
  trades_filename = "fpmmTrades.parquet"
306
+ fpmms_trades = pd.read_parquet(TMP_DIR / trades_filename)
307
  except FileNotFoundError:
308
  print("Error: fpmmTrades.parquet not found. No market creator added")
309
  return
 
327
  return tools
328
 
329
 
330
+ def fpmmTrades_etl(rpc: str, trades_filename: str, from_timestamp: str) -> None:
331
+ print("Generating the trades file")
332
+ try:
333
+ fpmmTrades = create_fpmmTrades(rpc, from_timestamp=from_timestamp)
334
+ except FileNotFoundError:
335
+ print(f"Error creating {trades_filename} file .")
336
+
337
+ # make sure trader_address is in the columns
338
+ assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
339
+
340
+ # lowercase and strip creator_address
341
+ fpmmTrades["trader_address"] = fpmmTrades["trader_address"].str.lower().str.strip()
342
+ fpmmTrades.to_parquet(DATA_DIR / trades_filename, index=False)
343
+ return
344
+
345
+
346
  if __name__ == "__main__":
347
  etl("all_fpmms.parquet")
scripts/mech_request_utils.py CHANGED
@@ -22,31 +22,27 @@ import json
22
  import time
23
  import pickle
24
  from random import uniform
25
- from typing import Any, Dict, List, Tuple
26
  from pathlib import Path
27
  import requests
28
  from gql import Client, gql
29
  from gql.transport.requests import RequestsHTTPTransport
30
  from tools import (
31
- IPFS_POLL_INTERVAL,
32
  GET_CONTENTS_BATCH_SIZE,
33
  IRRELEVANT_TOOLS,
34
  create_session,
35
  request,
36
  )
37
  from tqdm import tqdm
38
- from markets import PEARL_CREATOR, CREATOR
39
  from concurrent.futures import ThreadPoolExecutor, as_completed
 
40
 
41
  NUM_WORKERS = 10
42
  BLOCKS_CHUNK_SIZE = 10000
43
  TEXT_ALIGNMENT = 30
44
  MINIMUM_WRITE_FILE_DELAY_SECONDS = 20
45
  MECH_FROM_BLOCK_RANGE = 50000
46
- SCRIPTS_DIR = Path(__file__).parent
47
- ROOT_DIR = SCRIPTS_DIR.parent
48
- JSON_DATA_DIR = ROOT_DIR / "json_data"
49
- DATA_DIR = ROOT_DIR / "data"
50
  IPFS_ADDRESS = "https://gateway.autonolas.tech/ipfs/"
51
  THEGRAPH_ENDPOINT = "https://api.studio.thegraph.com/query/57238/mech/0.0.2"
52
  last_write_time = 0.0
@@ -119,7 +115,7 @@ def collect_all_mech_requests(from_block: int, to_block: int, filename: str) ->
119
  id_gt = "0x00"
120
  while True:
121
  variables = {
122
- "sender_not_in": [CREATOR, PEARL_CREATOR],
123
  "id_gt": id_gt,
124
  "blockNumber_gte": str(from_block), # str
125
  "blockNumber_lte": str(to_block), # str
@@ -417,6 +413,7 @@ def update_block_request_map(block_request_id_map: dict) -> None:
417
 
418
 
419
  def fix_duplicate_requestIds(requests_filename: str, delivers_filename: str) -> dict:
 
420
  with open(JSON_DATA_DIR / delivers_filename, "r") as file:
421
  data_delivers = json.load(file)
422
 
@@ -455,7 +452,7 @@ def fix_duplicate_requestIds(requests_filename: str, delivers_filename: str) ->
455
  def merge_requests_delivers(
456
  requests_filename: str, delivers_filename: str, filename: str
457
  ) -> None:
458
- # TODO
459
  """Function to map requests and delivers"""
460
  with open(JSON_DATA_DIR / delivers_filename, "r") as file:
461
  mech_delivers = json.load(file)
 
22
  import time
23
  import pickle
24
  from random import uniform
25
+ from typing import Any, Dict, Tuple
26
  from pathlib import Path
27
  import requests
28
  from gql import Client, gql
29
  from gql.transport.requests import RequestsHTTPTransport
30
  from tools import (
 
31
  GET_CONTENTS_BATCH_SIZE,
32
  IRRELEVANT_TOOLS,
33
  create_session,
34
  request,
35
  )
36
  from tqdm import tqdm
37
+ from web3_utils import FPMM_QS_CREATOR, FPMM_PEARL_CREATOR, IPFS_POLL_INTERVAL
38
  from concurrent.futures import ThreadPoolExecutor, as_completed
39
+ from utils import DATA_DIR, JSON_DATA_DIR
40
 
41
  NUM_WORKERS = 10
42
  BLOCKS_CHUNK_SIZE = 10000
43
  TEXT_ALIGNMENT = 30
44
  MINIMUM_WRITE_FILE_DELAY_SECONDS = 20
45
  MECH_FROM_BLOCK_RANGE = 50000
 
 
 
 
46
  IPFS_ADDRESS = "https://gateway.autonolas.tech/ipfs/"
47
  THEGRAPH_ENDPOINT = "https://api.studio.thegraph.com/query/57238/mech/0.0.2"
48
  last_write_time = 0.0
 
115
  id_gt = "0x00"
116
  while True:
117
  variables = {
118
+ "sender_not_in": [FPMM_QS_CREATOR, FPMM_PEARL_CREATOR],
119
  "id_gt": id_gt,
120
  "blockNumber_gte": str(from_block), # str
121
  "blockNumber_lte": str(to_block), # str
 
413
 
414
 
415
  def fix_duplicate_requestIds(requests_filename: str, delivers_filename: str) -> dict:
416
+ print("Fix duplicated request Ids")
417
  with open(JSON_DATA_DIR / delivers_filename, "r") as file:
418
  data_delivers = json.load(file)
419
 
 
452
  def merge_requests_delivers(
453
  requests_filename: str, delivers_filename: str, filename: str
454
  ) -> None:
455
+ print("Merge request delivers")
456
  """Function to map requests and delivers"""
457
  with open(JSON_DATA_DIR / delivers_filename, "r") as file:
458
  mech_delivers = json.load(file)
scripts/profitability.py CHANGED
@@ -18,17 +18,14 @@
18
  # ------------------------------------------------------------------------------
19
 
20
  import time
21
- import requests
22
  import datetime
23
  import pandas as pd
24
- from collections import defaultdict
25
  from typing import Any
26
- from string import Template
27
  from enum import Enum
28
  from tqdm import tqdm
29
  import numpy as np
30
  import os
31
-
32
  from get_mech_info import (
33
  DATETIME_60_DAYS_AGO,
34
  update_fpmmTrades_parquet,
@@ -36,26 +33,20 @@ from get_mech_info import (
36
  update_all_trades_parquet,
37
  )
38
  from utils import (
39
- SUBGRAPH_API_KEY,
40
  wei_to_unit,
41
  convert_hex_to_int,
42
- _to_content,
43
  JSON_DATA_DIR,
44
  DATA_DIR,
45
  )
46
- from queries import omen_xdai_trades_query, conditional_tokens_gc_user_query
47
  from staking import label_trades_by_staking
48
 
49
- QUERY_BATCH_SIZE = 1000
50
  DUST_THRESHOLD = 10000000000000
51
  INVALID_ANSWER = -1
52
- FPMM_QS_CREATOR = "0x89c5cc945dd550bcffb72fe42bff002429f46fec"
53
- FPMM_PEARL_CREATOR = "0xFfc8029154ECD55ABED15BD428bA596E7D23f557"
54
  DEFAULT_FROM_DATE = "1970-01-01T00:00:00"
55
  DEFAULT_TO_DATE = "2038-01-19T03:14:07"
56
- DEFAULT_FROM_TIMESTAMP = 0
57
  DEFAULT_60_DAYS_AGO_TIMESTAMP = (DATETIME_60_DAYS_AGO).timestamp()
58
- DEFAULT_TO_TIMESTAMP = 2147483647 # around year 2038
59
  WXDAI_CONTRACT_ADDRESS = "0xe91D153E0b41518A2Ce8Dd3D7944Fa863463a97d"
60
  DEFAULT_MECH_FEE = 0.01
61
  DUST_THRESHOLD = 10000000000000
@@ -148,107 +139,6 @@ SUMMARY_STATS_DF_COLS = [
148
  "mean_mech_calls_per_trade",
149
  "mean_mech_fee_amount_per_trade",
150
  ]
151
- headers = {
152
- "Accept": "application/json, multipart/mixed",
153
- "Content-Type": "application/json",
154
- }
155
-
156
-
157
- def _query_omen_xdai_subgraph(
158
- trader_category: str,
159
- from_timestamp: float,
160
- to_timestamp: float,
161
- fpmm_from_timestamp: float,
162
- fpmm_to_timestamp: float,
163
- ) -> dict[str, Any]:
164
- """Query the subgraph."""
165
- OMEN_SUBGRAPH_URL = Template(
166
- """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
167
- )
168
- omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
169
- print(f"omen_subgraph = {omen_subgraph}")
170
- grouped_results = defaultdict(list)
171
- id_gt = ""
172
- if trader_category == "quickstart":
173
- creator_id = FPMM_QS_CREATOR.lower()
174
- else: # pearl
175
- creator_id = FPMM_PEARL_CREATOR.lower()
176
-
177
- while True:
178
- query = omen_xdai_trades_query.substitute(
179
- fpmm_creator=creator_id,
180
- creationTimestamp_gte=int(from_timestamp),
181
- creationTimestamp_lte=int(to_timestamp),
182
- fpmm_creationTimestamp_gte=int(fpmm_from_timestamp),
183
- fpmm_creationTimestamp_lte=int(fpmm_to_timestamp),
184
- first=QUERY_BATCH_SIZE,
185
- id_gt=id_gt,
186
- )
187
- content_json = _to_content(query)
188
-
189
- res = requests.post(omen_subgraph, headers=headers, json=content_json)
190
- result_json = res.json()
191
- # print(f"result = {result_json}")
192
- user_trades = result_json.get("data", {}).get("fpmmTrades", [])
193
-
194
- if not user_trades:
195
- break
196
-
197
- for trade in user_trades:
198
- fpmm_id = trade.get("fpmm", {}).get("id")
199
- grouped_results[fpmm_id].append(trade)
200
-
201
- id_gt = user_trades[len(user_trades) - 1]["id"]
202
-
203
- all_results = {
204
- "data": {
205
- "fpmmTrades": [
206
- trade
207
- for trades_list in grouped_results.values()
208
- for trade in trades_list
209
- ]
210
- }
211
- }
212
-
213
- return all_results
214
-
215
-
216
- def _query_conditional_tokens_gc_subgraph(creator: str) -> dict[str, Any]:
217
- """Query the subgraph."""
218
- SUBGRAPH_URL = Template(
219
- """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/7s9rGBffUTL8kDZuxvvpuc46v44iuDarbrADBFw5uVp2"""
220
- )
221
- subgraph = SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
222
- all_results: dict[str, Any] = {"data": {"user": {"userPositions": []}}}
223
- userPositions_id_gt = ""
224
- while True:
225
- query = conditional_tokens_gc_user_query.substitute(
226
- id=creator.lower(),
227
- first=QUERY_BATCH_SIZE,
228
- userPositions_id_gt=userPositions_id_gt,
229
- )
230
- content_json = {"query": query}
231
- print("sending query to subgraph")
232
- res = requests.post(subgraph, headers=headers, json=content_json)
233
- result_json = res.json()
234
- # print(f"result = {result_json}")
235
- user_data = result_json.get("data", {}).get("user", {})
236
-
237
- if not user_data:
238
- break
239
-
240
- user_positions = user_data.get("userPositions", [])
241
-
242
- if user_positions:
243
- all_results["data"]["user"]["userPositions"].extend(user_positions)
244
- userPositions_id_gt = user_positions[len(user_positions) - 1]["id"]
245
- else:
246
- break
247
-
248
- if len(all_results["data"]["user"]["userPositions"]) == 0:
249
- return {"data": {"user": None}}
250
-
251
- return all_results
252
 
253
 
254
  def _is_redeemed(user_json: dict[str, Any], fpmmTrade: dict[str, Any]) -> bool:
@@ -267,68 +157,10 @@ def _is_redeemed(user_json: dict[str, Any], fpmmTrade: dict[str, Any]) -> bool:
267
  return False
268
 
269
 
270
- def transform_fpmmTrades(df: pd.DataFrame) -> pd.DataFrame:
271
- print("Transforming trades dataframe")
272
- # convert creator to address
273
- df["creator"] = df["creator"].apply(lambda x: x["id"])
274
-
275
- # normalize fpmm column
276
- fpmm = pd.json_normalize(df["fpmm"])
277
- fpmm.columns = [f"fpmm.{col}" for col in fpmm.columns]
278
- df = pd.concat([df, fpmm], axis=1)
279
-
280
- # drop fpmm column
281
- df.drop(["fpmm"], axis=1, inplace=True)
282
-
283
- # change creator to creator_address
284
- df.rename(columns={"creator": "trader_address"}, inplace=True)
285
- print(df.head())
286
- print(df.info())
287
- return df
288
-
289
-
290
- def create_fpmmTrades(rpc: str, from_timestamp: float = DEFAULT_FROM_TIMESTAMP):
291
- """Create fpmmTrades for all trades."""
292
- # Quickstart trades
293
- qs_trades_json = _query_omen_xdai_subgraph(
294
- trader_category="quickstart",
295
- from_timestamp=from_timestamp,
296
- to_timestamp=DEFAULT_TO_TIMESTAMP,
297
- fpmm_from_timestamp=from_timestamp,
298
- fpmm_to_timestamp=DEFAULT_TO_TIMESTAMP,
299
- )
300
-
301
- print(f"length of the qs_trades_json dataset {len(qs_trades_json)}")
302
-
303
- # convert to dataframe
304
- qs_df = pd.DataFrame(qs_trades_json["data"]["fpmmTrades"])
305
- qs_df["market_creator"] = "quickstart"
306
- qs_df = transform_fpmmTrades(qs_df)
307
-
308
- # Pearl trades
309
- pearl_trades_json = _query_omen_xdai_subgraph(
310
- trader_category="pearl",
311
- from_timestamp=from_timestamp,
312
- to_timestamp=DEFAULT_TO_TIMESTAMP,
313
- fpmm_from_timestamp=from_timestamp,
314
- fpmm_to_timestamp=DEFAULT_TO_TIMESTAMP,
315
- )
316
-
317
- print(f"length of the pearl_trades_json dataset {len(pearl_trades_json)}")
318
-
319
- # convert to dataframe
320
- pearl_df = pd.DataFrame(pearl_trades_json["data"]["fpmmTrades"])
321
- pearl_df["market_creator"] = "pearl"
322
- pearl_df = transform_fpmmTrades(pearl_df)
323
-
324
- return pd.concat([qs_df, pearl_df], ignore_index=True)
325
-
326
-
327
  def prepare_profitalibity_data(
328
  rpc: str,
329
  tools_filename: str,
330
  trades_filename: str,
331
- from_timestamp: float,
332
  ) -> pd.DataFrame:
333
  """Prepare data for profitalibity analysis."""
334
 
@@ -354,10 +186,9 @@ def prepare_profitalibity_data(
354
  # Check if fpmmTrades.parquet is in the same directory
355
  print("Generating the trades file")
356
  try:
357
- fpmmTrades = create_fpmmTrades(rpc, from_timestamp=from_timestamp)
358
- fpmmTrades.to_parquet(DATA_DIR / trades_filename, index=False)
359
  except FileNotFoundError:
360
- print(f"Error creating {trades_filename} file .")
361
 
362
  # make sure trader_address is in the columns
363
  assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
@@ -401,7 +232,7 @@ def analyse_trader(
401
 
402
  # Fetch user's conditional tokens gc graph
403
  try:
404
- user_json = _query_conditional_tokens_gc_subgraph(trader_address)
405
  except Exception as e:
406
  print(f"Error fetching user data: {e}")
407
  return trades_df
@@ -575,16 +406,13 @@ def run_profitability_analysis(
575
  rpc: str,
576
  tools_filename: str,
577
  trades_filename: str,
578
- from_timestamp: float,
579
  merge: bool = False,
580
  ):
581
  """Create all trades analysis."""
582
 
583
  # load dfs from data folder for analysis
584
  print(f"Preparing data with {tools_filename} and {trades_filename}")
585
- fpmmTrades = prepare_profitalibity_data(
586
- rpc, tools_filename, trades_filename, from_timestamp
587
- )
588
  if merge:
589
  update_tools_parquet(rpc, tools_filename)
590
  tools = pd.read_parquet(DATA_DIR / "tools.parquet")
 
18
  # ------------------------------------------------------------------------------
19
 
20
  import time
 
21
  import datetime
22
  import pandas as pd
 
23
  from typing import Any
 
24
  from enum import Enum
25
  from tqdm import tqdm
26
  import numpy as np
27
  import os
28
+ from web3_utils import query_conditional_tokens_gc_subgraph
29
  from get_mech_info import (
30
  DATETIME_60_DAYS_AGO,
31
  update_fpmmTrades_parquet,
 
33
  update_all_trades_parquet,
34
  )
35
  from utils import (
 
36
  wei_to_unit,
37
  convert_hex_to_int,
 
38
  JSON_DATA_DIR,
39
  DATA_DIR,
40
  )
 
41
  from staking import label_trades_by_staking
42
 
 
43
  DUST_THRESHOLD = 10000000000000
44
  INVALID_ANSWER = -1
 
 
45
  DEFAULT_FROM_DATE = "1970-01-01T00:00:00"
46
  DEFAULT_TO_DATE = "2038-01-19T03:14:07"
47
+
48
  DEFAULT_60_DAYS_AGO_TIMESTAMP = (DATETIME_60_DAYS_AGO).timestamp()
49
+
50
  WXDAI_CONTRACT_ADDRESS = "0xe91D153E0b41518A2Ce8Dd3D7944Fa863463a97d"
51
  DEFAULT_MECH_FEE = 0.01
52
  DUST_THRESHOLD = 10000000000000
 
139
  "mean_mech_calls_per_trade",
140
  "mean_mech_fee_amount_per_trade",
141
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
 
144
  def _is_redeemed(user_json: dict[str, Any], fpmmTrade: dict[str, Any]) -> bool:
 
157
  return False
158
 
159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  def prepare_profitalibity_data(
161
  rpc: str,
162
  tools_filename: str,
163
  trades_filename: str,
 
164
  ) -> pd.DataFrame:
165
  """Prepare data for profitalibity analysis."""
166
 
 
186
  # Check if fpmmTrades.parquet is in the same directory
187
  print("Generating the trades file")
188
  try:
189
+ fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
 
190
  except FileNotFoundError:
191
+ print(f"Error reading {trades_filename} file .")
192
 
193
  # make sure trader_address is in the columns
194
  assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
 
232
 
233
  # Fetch user's conditional tokens gc graph
234
  try:
235
+ user_json = query_conditional_tokens_gc_subgraph(trader_address)
236
  except Exception as e:
237
  print(f"Error fetching user data: {e}")
238
  return trades_df
 
406
  rpc: str,
407
  tools_filename: str,
408
  trades_filename: str,
 
409
  merge: bool = False,
410
  ):
411
  """Create all trades analysis."""
412
 
413
  # load dfs from data folder for analysis
414
  print(f"Preparing data with {tools_filename} and {trades_filename}")
415
+ fpmmTrades = prepare_profitalibity_data(rpc, tools_filename, trades_filename)
 
 
416
  if merge:
417
  update_tools_parquet(rpc, tools_filename)
418
  tools = pd.read_parquet(DATA_DIR / "tools.parquet")
scripts/pull_data.py CHANGED
@@ -1,10 +1,7 @@
1
  import logging
2
  from datetime import datetime
3
  import pandas as pd
4
- from markets import (
5
- etl as mkt_etl,
6
- DEFAULT_FILENAME as MARKETS_FILENAME,
7
- )
8
  from tools import DEFAULT_FILENAME as TOOLS_FILENAME, generate_tools_file
9
  from profitability import run_profitability_analysis, DEFAULT_60_DAYS_AGO_TIMESTAMP
10
  from utils import (
@@ -23,6 +20,8 @@ from get_mech_info import (
23
  from update_tools_accuracy import compute_tools_accuracy
24
  from cleaning_old_info import clean_old_data_from_parquet_files
25
  from web3_utils import updating_timestamps
 
 
26
 
27
 
28
  logging.basicConfig(level=logging.INFO)
@@ -57,7 +56,8 @@ def save_historical_data():
57
  tools = pd.read_parquet(DATA_DIR / "tools.parquet")
58
  filename = f"tools_{timestamp}.parquet"
59
  tools.to_parquet(HIST_DIR / filename, index=False)
60
-
 
61
  except Exception as e:
62
  print(f"Error saving tools file in the historical folder {e}")
63
 
@@ -65,6 +65,8 @@ def save_historical_data():
65
  all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
66
  filename = f"all_trades_profitability_{timestamp}.parquet"
67
  all_trades.to_parquet(HIST_DIR / filename, index=False)
 
 
68
 
69
  except Exception as e:
70
  print(
@@ -81,7 +83,7 @@ def only_new_weekly_analysis():
81
  mkt_etl(MARKETS_FILENAME)
82
  logging.info("Markets ETL completed")
83
 
84
- # New tools ETL
85
  logging.info("Generating the mech json files")
86
  # get only new data
87
  latest_timestamp = get_mech_events_since_last_run()
@@ -90,6 +92,12 @@ def only_new_weekly_analysis():
90
  return
91
  logging.info(f"Finished generating the mech json files from {latest_timestamp}")
92
 
 
 
 
 
 
 
93
  # Run tools ETL
94
  logging.info("Generate and parse the tools content")
95
  # generate only new file
@@ -104,7 +112,6 @@ def only_new_weekly_analysis():
104
  rpc=rpc,
105
  tools_filename="new_tools.parquet",
106
  trades_filename="new_fpmmTrades.parquet",
107
- from_timestamp=int(latest_timestamp.timestamp()),
108
  merge=True,
109
  )
110
 
@@ -121,58 +128,14 @@ def only_new_weekly_analysis():
121
 
122
  save_historical_data()
123
 
124
- clean_old_data_from_parquet_files("2024-10-06")
125
-
126
- compute_tools_accuracy()
127
-
128
- logging.info("Weekly analysis files generated and saved")
129
-
130
-
131
- def old_weekly_analysis():
132
- """Run weekly analysis for the FPMMS project."""
133
- rpc = RPC
134
- # Run markets ETL
135
- logging.info("Running markets ETL")
136
- mkt_etl(MARKETS_FILENAME)
137
- logging.info("Markets ETL completed")
138
-
139
- # New tools ETL
140
- logging.info("Generating the mech json files")
141
-
142
- get_mech_events_last_60_days()
143
- logging.info("Finished generating the mech json files")
144
-
145
- # Run tools ETL
146
- logging.info("Generate and parse the tools content")
147
-
148
- generate_tools_file("tools_info.json", TOOLS_FILENAME)
149
- logging.info("Tools ETL completed")
150
- add_current_answer(TOOLS_FILENAME)
151
-
152
- # Run profitability analysis
153
- logging.info("Running profitability analysis")
154
- run_profitability_analysis(
155
- rpc=rpc,
156
- tools_filename=TOOLS_FILENAME,
157
- trades_filename="fpmmTrades.parquet",
158
- from_timestamp=DEFAULT_60_DAYS_AGO_TIMESTAMP,
159
- )
160
- logging.info("Profitability analysis completed")
161
-
162
- try:
163
- updating_timestamps(rpc, TOOLS_FILENAME)
164
- except Exception as e:
165
- logging.error("Error while updating timestamps of tools")
166
- print(e)
167
 
168
  compute_tools_accuracy()
169
 
 
 
170
  logging.info("Weekly analysis files generated and saved")
171
 
172
 
173
  if __name__ == "__main__":
174
  only_new_weekly_analysis()
175
- # weekly_analysis()
176
- # rpc = RPC
177
- # updating_timestamps(rpc)
178
- # compute_tools_accuracy()
 
1
  import logging
2
  from datetime import datetime
3
  import pandas as pd
4
+ from markets import etl as mkt_etl, DEFAULT_FILENAME as MARKETS_FILENAME, fpmmTrades_etl
 
 
 
5
  from tools import DEFAULT_FILENAME as TOOLS_FILENAME, generate_tools_file
6
  from profitability import run_profitability_analysis, DEFAULT_60_DAYS_AGO_TIMESTAMP
7
  from utils import (
 
20
  from update_tools_accuracy import compute_tools_accuracy
21
  from cleaning_old_info import clean_old_data_from_parquet_files
22
  from web3_utils import updating_timestamps
23
+ from manage_space_files import move_files
24
+ from cloud_storage import load_historical_file
25
 
26
 
27
  logging.basicConfig(level=logging.INFO)
 
56
  tools = pd.read_parquet(DATA_DIR / "tools.parquet")
57
  filename = f"tools_{timestamp}.parquet"
58
  tools.to_parquet(HIST_DIR / filename, index=False)
59
+ # save into cloud storage
60
+ # load_historical_file(filename)
61
  except Exception as e:
62
  print(f"Error saving tools file in the historical folder {e}")
63
 
 
65
  all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
66
  filename = f"all_trades_profitability_{timestamp}.parquet"
67
  all_trades.to_parquet(HIST_DIR / filename, index=False)
68
+ # save into cloud storage
69
+ # load_historical_file(filename)
70
 
71
  except Exception as e:
72
  print(
 
83
  mkt_etl(MARKETS_FILENAME)
84
  logging.info("Markets ETL completed")
85
 
86
+ # Mech events ETL
87
  logging.info("Generating the mech json files")
88
  # get only new data
89
  latest_timestamp = get_mech_events_since_last_run()
 
92
  return
93
  logging.info(f"Finished generating the mech json files from {latest_timestamp}")
94
 
95
+ # FpmmTrades ETL
96
+ fpmmTrades_etl(
97
+ rpc=rpc,
98
+ trades_filename="new_fpmmTrades.parquet",
99
+ from_timestamp=int(latest_timestamp.timestamp()),
100
+ )
101
  # Run tools ETL
102
  logging.info("Generate and parse the tools content")
103
  # generate only new file
 
112
  rpc=rpc,
113
  tools_filename="new_tools.parquet",
114
  trades_filename="new_fpmmTrades.parquet",
 
115
  merge=True,
116
  )
117
 
 
128
 
129
  save_historical_data()
130
 
131
+ clean_old_data_from_parquet_files("2024-10-13")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  compute_tools_accuracy()
134
 
135
+ # move to tmp folder the new generated files
136
+ move_files()
137
  logging.info("Weekly analysis files generated and saved")
138
 
139
 
140
  if __name__ == "__main__":
141
  only_new_weekly_analysis()
 
 
 
 
scripts/staking.py CHANGED
@@ -1,6 +1,5 @@
1
  import json
2
  import sys
3
- from pathlib import Path
4
  from typing import Any, List
5
  from utils import RPC, DATA_DIR
6
  import requests
 
1
  import json
2
  import sys
 
3
  from typing import Any, List
4
  from utils import RPC, DATA_DIR
5
  import requests
scripts/tools.py CHANGED
@@ -44,17 +44,25 @@ from urllib3.exceptions import (
44
  HTTPError as Urllib3HTTPError,
45
  )
46
  from web3 import Web3, HTTPProvider
47
- from web3.exceptions import MismatchedABI
48
  from markets import add_market_creator
49
- from web3.types import BlockParams
50
  from concurrent.futures import ThreadPoolExecutor, as_completed
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  from utils import (
52
  clean,
53
  BLOCK_FIELD,
54
  gen_event_filename,
55
- read_abi,
56
- SLEEP,
57
- reduce_window,
58
  limit_text,
59
  DATA_DIR,
60
  JSON_DATA_DIR,
@@ -82,10 +90,7 @@ MECH_TO_INFO = {
82
  "0x77af31de935740567cf4ff1986d04b2c964a786a": ("new_mech_abi.json", 30776879),
83
  }
84
  # optionally set the latest block to stop searching for the delivered events
85
- LATEST_BLOCK: Optional[int] = None
86
- LATEST_BLOCK_NAME: BlockParams = "latest"
87
- BLOCK_DATA_NUMBER = "number"
88
- BLOCKS_CHUNK_SIZE = 10_000
89
  EVENT_ARGUMENTS = "args"
90
  DATA = "data"
91
  IPFS_LINKS_SERIES_NAME = "ipfs_links"
@@ -95,11 +100,7 @@ DEFAULT_FILENAME = "tools.parquet"
95
  RE_RPC_FILTER_ERROR = r"Filter with id: '\d+' does not exist."
96
  ABI_ERROR = "The event signature did not match the provided ABI"
97
  HTTP_TIMEOUT = 10
98
- N_IPFS_RETRIES = 1
99
- N_RPC_RETRIES = 100
100
- RPC_POLL_INTERVAL = 0.05
101
- # IPFS_POLL_INTERVAL = 0.05 # low speed
102
- IPFS_POLL_INTERVAL = 0.2 # high speed
103
  IRRELEVANT_TOOLS = [
104
  "openai-text-davinci-002",
105
  "openai-text-davinci-003",
 
44
  HTTPError as Urllib3HTTPError,
45
  )
46
  from web3 import Web3, HTTPProvider
 
47
  from markets import add_market_creator
 
48
  from concurrent.futures import ThreadPoolExecutor, as_completed
49
+ from web3_utils import (
50
+ read_abi,
51
+ SLEEP,
52
+ reduce_window,
53
+ LATEST_BLOCK,
54
+ LATEST_BLOCK_NAME,
55
+ BLOCK_DATA_NUMBER,
56
+ BLOCKS_CHUNK_SIZE,
57
+ N_RPC_RETRIES,
58
+ N_IPFS_RETRIES,
59
+ RPC_POLL_INTERVAL,
60
+ IPFS_POLL_INTERVAL,
61
+ )
62
  from utils import (
63
  clean,
64
  BLOCK_FIELD,
65
  gen_event_filename,
 
 
 
66
  limit_text,
67
  DATA_DIR,
68
  JSON_DATA_DIR,
 
90
  "0x77af31de935740567cf4ff1986d04b2c964a786a": ("new_mech_abi.json", 30776879),
91
  }
92
  # optionally set the latest block to stop searching for the delivered events
93
+
 
 
 
94
  EVENT_ARGUMENTS = "args"
95
  DATA = "data"
96
  IPFS_LINKS_SERIES_NAME = "ipfs_links"
 
100
  RE_RPC_FILTER_ERROR = r"Filter with id: '\d+' does not exist."
101
  ABI_ERROR = "The event signature did not match the provided ABI"
102
  HTTP_TIMEOUT = 10
103
+
 
 
 
 
104
  IRRELEVANT_TOOLS = [
105
  "openai-text-davinci-002",
106
  "openai-text-davinci-003",
scripts/utils.py CHANGED
@@ -1,8 +1,6 @@
1
- import sys
2
  import json
3
  import os
4
  import time
5
- from tqdm import tqdm
6
  from typing import List, Any, Optional, Union, Tuple
7
  import numpy as np
8
  import pandas as pd
@@ -20,6 +18,8 @@ SCRIPTS_DIR = Path(__file__).parent
20
  ROOT_DIR = SCRIPTS_DIR.parent
21
  DATA_DIR = ROOT_DIR / "data"
22
  JSON_DATA_DIR = ROOT_DIR / "json_data"
 
 
23
  BLOCK_FIELD = "block"
24
  CID_PREFIX = "f01701220"
25
  REQUEST_ID = "requestId"
@@ -236,31 +236,6 @@ def measure_execution_time(func):
236
  return wrapper
237
 
238
 
239
- def parse_args() -> str:
240
- """Parse the arguments and return the RPC."""
241
- if len(sys.argv) != 2:
242
- raise ValueError("Expected the RPC as a positional argument.")
243
- return sys.argv[1]
244
-
245
-
246
- def read_abi(abi_path: str) -> str:
247
- """Read and return the wxDAI contract's ABI."""
248
- with open(abi_path) as abi_file:
249
- return abi_file.read()
250
-
251
-
252
- def reduce_window(contract_instance, event, from_block, batch_size, latest_block):
253
- """Dynamically reduce the batch size window."""
254
- keep_fraction = 1 - REDUCE_FACTOR
255
- events_filter = contract_instance.events[event].build_filter()
256
- events_filter.fromBlock = from_block
257
- batch_size = int(batch_size * keep_fraction)
258
- events_filter.toBlock = min(from_block + batch_size, latest_block)
259
- tqdm.write(f"RPC timed out! Resizing batch size to {batch_size}.")
260
- time.sleep(SLEEP)
261
- return events_filter, batch_size
262
-
263
-
264
  def limit_text(text: str, limit: int = 200) -> str:
265
  """Limit the given text"""
266
  if len(text) > limit:
@@ -314,21 +289,6 @@ def read_n_last_lines(filename: str, n: int = 1) -> str:
314
  return last_line
315
 
316
 
317
- def get_earliest_block(event_name: MechEventName) -> int:
318
- """Get the earliest block number to use when filtering for events."""
319
- filename = gen_event_filename(event_name)
320
- if not os.path.exists(DATA_DIR / filename):
321
- return 0
322
-
323
- df = pd.read_parquet(DATA_DIR / filename)
324
- block_field = f"{event_name.value.lower()}_{BLOCK_FIELD}"
325
- earliest_block = int(df[block_field].max())
326
- # clean and release all memory
327
- del df
328
- gc.collect()
329
- return earliest_block
330
-
331
-
332
  def get_question(text: str) -> str:
333
  """Get the question from a text."""
334
  # Regex to find text within double quotes
@@ -366,18 +326,6 @@ def wei_to_unit(wei: int) -> float:
366
  return wei / 10**18
367
 
368
 
369
- def measure_execution_time(func):
370
- def wrapper(*args, **kwargs):
371
- start_time = time.time()
372
- result = func(*args, **kwargs)
373
- end_time = time.time()
374
- execution_time = end_time - start_time
375
- print(f"Execution time: {execution_time:.6f} seconds")
376
- return result
377
-
378
- return wrapper
379
-
380
-
381
  def get_vote(p_yes, p_no) -> Optional[str]:
382
  """Return the vote."""
383
  if p_no == p_yes:
@@ -420,7 +368,7 @@ def get_prediction_values(params: dict) -> Tuple:
420
  return p_yes, p_no, confidence, info_utility
421
 
422
 
423
- def _to_content(q: str) -> dict[str, Any]:
424
  """Convert the given query string to payload content, i.e., add it under a `queries` key and convert it to bytes."""
425
  finalized_query = {
426
  "query": q,
 
 
1
  import json
2
  import os
3
  import time
 
4
  from typing import List, Any, Optional, Union, Tuple
5
  import numpy as np
6
  import pandas as pd
 
18
  ROOT_DIR = SCRIPTS_DIR.parent
19
  DATA_DIR = ROOT_DIR / "data"
20
  JSON_DATA_DIR = ROOT_DIR / "json_data"
21
+ HIST_DIR = ROOT_DIR / "historical_data"
22
+ TMP_DIR = ROOT_DIR / "tmp"
23
  BLOCK_FIELD = "block"
24
  CID_PREFIX = "f01701220"
25
  REQUEST_ID = "requestId"
 
236
  return wrapper
237
 
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  def limit_text(text: str, limit: int = 200) -> str:
240
  """Limit the given text"""
241
  if len(text) > limit:
 
289
  return last_line
290
 
291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  def get_question(text: str) -> str:
293
  """Get the question from a text."""
294
  # Regex to find text within double quotes
 
326
  return wei / 10**18
327
 
328
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  def get_vote(p_yes, p_no) -> Optional[str]:
330
  """Return the vote."""
331
  if p_no == p_yes:
 
368
  return p_yes, p_no, confidence, info_utility
369
 
370
 
371
+ def to_content(q: str) -> dict[str, Any]:
372
  """Convert the given query string to payload content, i.e., add it under a `queries` key and convert it to bytes."""
373
  finalized_query = {
374
  "query": q,
scripts/web3_utils.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import pickle
3
+ import gc
4
+ import time
5
+ import requests
6
+ from functools import partial
7
+ from string import Template
8
+ from datetime import datetime
9
+ from concurrent.futures import ThreadPoolExecutor
10
+ from collections import defaultdict
11
+ from tqdm import tqdm
12
+ from web3 import Web3
13
+ from typing import Any, Optional
14
+ from web3.types import BlockParams
15
+ from utils import JSON_DATA_DIR, DATA_DIR, SUBGRAPH_API_KEY, to_content
16
+ from queries import conditional_tokens_gc_user_query, omen_xdai_trades_query
17
+ import pandas as pd
18
+
19
+ REDUCE_FACTOR = 0.25
20
+ SLEEP = 0.5
21
+ QUERY_BATCH_SIZE = 1000
22
+ FPMM_QS_CREATOR = "0x89c5cc945dd550bcffb72fe42bff002429f46fec"
23
+ FPMM_PEARL_CREATOR = "0xFfc8029154ECD55ABED15BD428bA596E7D23f557"
24
+ LATEST_BLOCK: Optional[int] = None
25
+ LATEST_BLOCK_NAME: BlockParams = "latest"
26
+ BLOCK_DATA_NUMBER = "number"
27
+ BLOCKS_CHUNK_SIZE = 10_000
28
+ N_IPFS_RETRIES = 1
29
+ N_RPC_RETRIES = 100
30
+ RPC_POLL_INTERVAL = 0.05
31
+ # IPFS_POLL_INTERVAL = 0.05 # low speed
32
+ IPFS_POLL_INTERVAL = 0.2 # high speed
33
+
34
+ headers = {
35
+ "Accept": "application/json, multipart/mixed",
36
+ "Content-Type": "application/json",
37
+ }
38
+
39
+
40
+ def parse_args() -> str:
41
+ """Parse the arguments and return the RPC."""
42
+ if len(sys.argv) != 2:
43
+ raise ValueError("Expected the RPC as a positional argument.")
44
+ return sys.argv[1]
45
+
46
+
47
+ def read_abi(abi_path: str) -> str:
48
+ """Read and return the wxDAI contract's ABI."""
49
+ with open(abi_path) as abi_file:
50
+ return abi_file.read()
51
+
52
+
53
+ def update_block_request_map(block_request_id_map: dict) -> None:
54
+ print("Saving block request id map info")
55
+ with open(JSON_DATA_DIR / "block_request_id_map.pickle", "wb") as handle:
56
+ pickle.dump(block_request_id_map, handle, protocol=pickle.HIGHEST_PROTOCOL)
57
+
58
+
59
+ def reduce_window(contract_instance, event, from_block, batch_size, latest_block):
60
+ """Dynamically reduce the batch size window."""
61
+ keep_fraction = 1 - REDUCE_FACTOR
62
+ events_filter = contract_instance.events[event].build_filter()
63
+ events_filter.fromBlock = from_block
64
+ batch_size = int(batch_size * keep_fraction)
65
+ events_filter.toBlock = min(from_block + batch_size, latest_block)
66
+ tqdm.write(f"RPC timed out! Resizing batch size to {batch_size}.")
67
+ time.sleep(SLEEP)
68
+ return events_filter, batch_size
69
+
70
+
71
+ def block_number_to_timestamp(block_number: int, web3: Web3) -> str:
72
+ """Convert a block number to a timestamp."""
73
+ block = web3.eth.get_block(block_number)
74
+ timestamp = datetime.utcfromtimestamp(block["timestamp"])
75
+ try:
76
+ timestamp_str = timestamp.strftime("%Y-%m-%d %H:%M:%S")
77
+ timestamp = datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S.%f")
78
+ except Exception as e:
79
+ timestamp = datetime.utcfromtimestamp(block["timestamp"])
80
+ return timestamp.strftime("%Y-%m-%d %H:%M:%S")
81
+
82
+
83
+ def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list:
84
+ """Parallelize the timestamp conversion."""
85
+ block_numbers = df["request_block"].tolist()
86
+ with ThreadPoolExecutor(max_workers=10) as executor:
87
+ results = list(
88
+ tqdm(executor.map(function, block_numbers), total=len(block_numbers))
89
+ )
90
+ return results
91
+
92
+
93
+ def updating_timestamps(rpc: str, tools_filename: str):
94
+ web3 = Web3(Web3.HTTPProvider(rpc))
95
+
96
+ tools = pd.read_parquet(DATA_DIR / tools_filename)
97
+
98
+ # Convert block number to timestamp
99
+ print("Converting block number to timestamp")
100
+ t_map = pickle.load(open(DATA_DIR / "t_map.pkl", "rb"))
101
+ tools["request_time"] = tools["request_block"].map(t_map)
102
+
103
+ no_data = tools["request_time"].isna().sum()
104
+ print(f"Total rows with no request time info = {no_data}")
105
+
106
+ # Identify tools with missing request_time and fill them
107
+ missing_time_indices = tools[tools["request_time"].isna()].index
108
+ if not missing_time_indices.empty:
109
+ partial_block_number_to_timestamp = partial(
110
+ block_number_to_timestamp, web3=web3
111
+ )
112
+ missing_timestamps = parallelize_timestamp_conversion(
113
+ tools.loc[missing_time_indices], partial_block_number_to_timestamp
114
+ )
115
+
116
+ # Update the original DataFrame with the missing timestamps
117
+ for i, timestamp in zip(missing_time_indices, missing_timestamps):
118
+ tools.at[i, "request_time"] = timestamp
119
+
120
+ tools["request_month_year"] = pd.to_datetime(tools["request_time"]).dt.strftime(
121
+ "%Y-%m"
122
+ )
123
+ tools["request_month_year_week"] = (
124
+ pd.to_datetime(tools["request_time"]).dt.to_period("W").astype(str)
125
+ )
126
+
127
+ # Save the tools data after the updates on the content
128
+ print(f"Updating file {tools_filename} with timestamps")
129
+ tools.to_parquet(DATA_DIR / tools_filename, index=False)
130
+
131
+ # Update t_map with new timestamps
132
+ new_timestamps = (
133
+ tools[["request_block", "request_time"]]
134
+ .dropna()
135
+ .set_index("request_block")
136
+ .to_dict()["request_time"]
137
+ )
138
+ t_map.update(new_timestamps)
139
+
140
+ with open(DATA_DIR / "t_map.pkl", "wb") as f:
141
+ pickle.dump(t_map, f)
142
+
143
+ # clean and release all memory
144
+ del tools
145
+ del t_map
146
+ gc.collect()
147
+
148
+
149
+ def query_conditional_tokens_gc_subgraph(creator: str) -> dict[str, Any]:
150
+ """Query the subgraph."""
151
+ SUBGRAPH_URL = Template(
152
+ """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/7s9rGBffUTL8kDZuxvvpuc46v44iuDarbrADBFw5uVp2"""
153
+ )
154
+ subgraph = SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
155
+ all_results: dict[str, Any] = {"data": {"user": {"userPositions": []}}}
156
+ userPositions_id_gt = ""
157
+ while True:
158
+ query = conditional_tokens_gc_user_query.substitute(
159
+ id=creator.lower(),
160
+ first=QUERY_BATCH_SIZE,
161
+ userPositions_id_gt=userPositions_id_gt,
162
+ )
163
+ content_json = {"query": query}
164
+ print("sending query to subgraph")
165
+ res = requests.post(subgraph, headers=headers, json=content_json)
166
+ result_json = res.json()
167
+ # print(f"result = {result_json}")
168
+ user_data = result_json.get("data", {}).get("user", {})
169
+
170
+ if not user_data:
171
+ break
172
+
173
+ user_positions = user_data.get("userPositions", [])
174
+
175
+ if user_positions:
176
+ all_results["data"]["user"]["userPositions"].extend(user_positions)
177
+ userPositions_id_gt = user_positions[len(user_positions) - 1]["id"]
178
+ else:
179
+ break
180
+
181
+ if len(all_results["data"]["user"]["userPositions"]) == 0:
182
+ return {"data": {"user": None}}
183
+
184
+ return all_results
185
+
186
+
187
+ def query_omen_xdai_subgraph(
188
+ trader_category: str,
189
+ from_timestamp: float,
190
+ to_timestamp: float,
191
+ fpmm_from_timestamp: float,
192
+ fpmm_to_timestamp: float,
193
+ ) -> dict[str, Any]:
194
+ """Query the subgraph."""
195
+ OMEN_SUBGRAPH_URL = Template(
196
+ """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
197
+ )
198
+ omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
199
+ print(f"omen_subgraph = {omen_subgraph}")
200
+ grouped_results = defaultdict(list)
201
+ id_gt = ""
202
+ if trader_category == "quickstart":
203
+ creator_id = FPMM_QS_CREATOR.lower()
204
+ else: # pearl
205
+ creator_id = FPMM_PEARL_CREATOR.lower()
206
+
207
+ while True:
208
+ query = omen_xdai_trades_query.substitute(
209
+ fpmm_creator=creator_id,
210
+ creationTimestamp_gte=int(from_timestamp),
211
+ creationTimestamp_lte=int(to_timestamp),
212
+ fpmm_creationTimestamp_gte=int(fpmm_from_timestamp),
213
+ fpmm_creationTimestamp_lte=int(fpmm_to_timestamp),
214
+ first=QUERY_BATCH_SIZE,
215
+ id_gt=id_gt,
216
+ )
217
+ content_json = to_content(query)
218
+
219
+ res = requests.post(omen_subgraph, headers=headers, json=content_json)
220
+ result_json = res.json()
221
+ # print(f"result = {result_json}")
222
+ user_trades = result_json.get("data", {}).get("fpmmTrades", [])
223
+
224
+ if not user_trades:
225
+ break
226
+
227
+ for trade in user_trades:
228
+ fpmm_id = trade.get("fpmm", {}).get("id")
229
+ grouped_results[fpmm_id].append(trade)
230
+
231
+ id_gt = user_trades[len(user_trades) - 1]["id"]
232
+
233
+ all_results = {
234
+ "data": {
235
+ "fpmmTrades": [
236
+ trade
237
+ for trades_list in grouped_results.values()
238
+ for trade in trades_list
239
+ ]
240
+ }
241
+ }
242
+
243
+ return all_results
244
+
245
+
246
+ # def get_earliest_block(event_name: MechEventName) -> int:
247
+ # """Get the earliest block number to use when filtering for events."""
248
+ # filename = gen_event_filename(event_name)
249
+ # if not os.path.exists(DATA_DIR / filename):
250
+ # return 0
251
+
252
+ # df = pd.read_parquet(DATA_DIR / filename)
253
+ # block_field = f"{event_name.value.lower()}_{BLOCK_FIELD}"
254
+ # earliest_block = int(df[block_field].max())
255
+ # # clean and release all memory
256
+ # del df
257
+ # gc.collect()
258
+ # return earliest_block