cyberosa commited on
Commit
16f8a8e
·
1 Parent(s): d48b280

Updated all subgraph links and weekly data

Browse files
data/all_trades_profitability.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bc0f6a578ad51a8243244fe2548cb2726dedc03a003a5103faed170353e5294
3
- size 2350354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3863cd1823d9c96890cdb0fb06bb5c2b176c5e90161c42963493b5633c7a7135
3
+ size 2035738
data/delivers.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:630af4a00273b0f620531301da650491fac225a96becf6cd9103331d3a40e634
3
- size 577925918
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0c51d4db04dd9d767797d82b196815acea957db6ef85cf96f9e9b73dc46643f
3
+ size 494395523
data/fpmmTrades.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb876c1221e2fc4bc493fec651cf167c5243b131bc932e52fc817450d9fdf954
3
- size 6246723
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa04be34e99e3b2a632363a174ba6385f7f97b7466b01be9a940ce4a1872c575
3
+ size 6334162
data/fpmms.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f750f72d608999c43ac581b761f153e89d23ecca7aebd4a9b41bc044fb5e429e
3
- size 341069
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97de347d8c8c9be2960166665eaaa6df3b939583b06208fa46091bb2c6eee122
3
+ size 346572
data/requests.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b534581f7239aa57fa7895859c895f4cfff92f3372683bfc8188b72f8f3803a3
3
- size 15857748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ea345dab01b7fa4da218e43841927beea8d656f6dd18e96bd2364fad86dcf99
3
+ size 15765143
data/summary_profitability.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42395ab29e85b1e4c0907132f30895400c20422287be4fb8921e1a884eac0738
3
- size 42213
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eb2307c7ba3953abcb7f1217d90117ac113107ff3f2f27d8dbdda71651e20e5
3
+ size 42038
data/t_map.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c180f7b0823819b45ea78c1956ebc4d33cc0d374ad528c9a86acb1797549d341
3
- size 9548930
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23679bddce247a4cb21cf6acb06c12d9d6d171d03e0e304420107828d1411e40
3
+ size 9830355
data/tools.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4dd77eb7642fb291caab50750842c1509fe121abe1fd873cb43d12643d781626
3
- size 580056931
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:805dc8fd4063d966536a31c19e494315943e8708a7951519cbd0081bc17f376b
3
+ size 495380729
notebooks/analysis.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -2555,10 +2555,169 @@
2555
  },
2556
  {
2557
  "cell_type": "code",
2558
- "execution_count": null,
2559
  "metadata": {},
2560
  "outputs": [],
2561
- "source": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2562
  }
2563
  ],
2564
  "metadata": {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 28,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
2555
  },
2556
  {
2557
  "cell_type": "code",
2558
+ "execution_count": 34,
2559
  "metadata": {},
2560
  "outputs": [],
2561
+ "source": [
2562
+ "def get_question(text: str) -> str:\n",
2563
+ " \"\"\"Get the question from a text.\"\"\"\n",
2564
+ " # Regex to find text within double quotes\n",
2565
+ " pattern = r'\"([^\"]*)\"'\n",
2566
+ "\n",
2567
+ " # Find all occurrences\n",
2568
+ " questions = re.findall(pattern, text)\n",
2569
+ "\n",
2570
+ " # Assuming you want the first question if there are multiple\n",
2571
+ " question = questions[0] if questions else None\n",
2572
+ "\n",
2573
+ " return question"
2574
+ ]
2575
+ },
2576
+ {
2577
+ "cell_type": "code",
2578
+ "execution_count": 41,
2579
+ "metadata": {},
2580
+ "outputs": [],
2581
+ "source": [
2582
+ "from web3 import Web3\n",
2583
+ "from typing import Optional\n",
2584
+ "import re\n",
2585
+ "import pickle\n",
2586
+ "\n",
2587
+ "def block_number_to_timestamp(block_number: int, web3: Web3) -> str:\n",
2588
+ " \"\"\"Convert a block number to a timestamp.\"\"\"\n",
2589
+ " block = web3.eth.get_block(block_number)\n",
2590
+ " timestamp = datetime.utcfromtimestamp(block[\"timestamp\"])\n",
2591
+ " return timestamp.strftime(\"%Y-%m-%d %H:%M:%S\")\n",
2592
+ "\n",
2593
+ "\n",
2594
+ "def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list:\n",
2595
+ " \"\"\"Parallelize the timestamp conversion.\"\"\"\n",
2596
+ " block_numbers = df[\"request_block\"].tolist()\n",
2597
+ " with ThreadPoolExecutor(max_workers=10) as executor:\n",
2598
+ " results = list(\n",
2599
+ " tqdm(executor.map(function, block_numbers), total=len(block_numbers))\n",
2600
+ " )\n",
2601
+ " return results"
2602
+ ]
2603
+ },
2604
+ {
2605
+ "cell_type": "code",
2606
+ "execution_count": 38,
2607
+ "metadata": {},
2608
+ "outputs": [],
2609
+ "source": [
2610
+ "def current_answer(text: str, fpmms: pd.DataFrame) -> Optional[str]:\n",
2611
+ " \"\"\"Get the current answer for a question.\"\"\"\n",
2612
+ " row = fpmms[fpmms[\"title\"] == text]\n",
2613
+ " if row.shape[0] == 0:\n",
2614
+ " return None\n",
2615
+ " return row[\"currentAnswer\"].values[0]"
2616
+ ]
2617
+ },
2618
+ {
2619
+ "cell_type": "code",
2620
+ "execution_count": 47,
2621
+ "metadata": {},
2622
+ "outputs": [],
2623
+ "source": [
2624
+ "from functools import partial\n",
2625
+ "from concurrent.futures import ThreadPoolExecutor\n",
2626
+ "def weekly_analysis():\n",
2627
+ " rpc = \"https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a\"\n",
2628
+ " web3 = Web3(Web3.HTTPProvider(rpc))\n",
2629
+ " # Get currentAnswer from FPMMS\n",
2630
+ " fpmms = pd.read_parquet('../data/fpmms.parquet')\n",
2631
+ " tools = pd.read_parquet('../data/tools.parquet')\n",
2632
+ "\n",
2633
+ " # Get the question from the tools\n",
2634
+ " print(\"Getting the question and current answer for the tools\")\n",
2635
+ " tools[\"title\"] = tools[\"prompt_request\"].apply(lambda x: get_question(x))\n",
2636
+ " tools[\"currentAnswer\"] = tools[\"title\"].apply(lambda x: current_answer(x, fpmms))\n",
2637
+ "\n",
2638
+ " tools[\"currentAnswer\"] = tools[\"currentAnswer\"].str.replace(\"yes\", \"Yes\")\n",
2639
+ " tools[\"currentAnswer\"] = tools[\"currentAnswer\"].str.replace(\"no\", \"No\")\n",
2640
+ "\n",
2641
+ " # Convert block number to timestamp\n",
2642
+ " print(\"Converting block number to timestamp\")\n",
2643
+ " t_map = pickle.load(open(\"../data/t_map.pkl\", \"rb\"))\n",
2644
+ " tools[\"request_time\"] = tools[\"request_block\"].map(t_map)\n",
2645
+ "\n",
2646
+ " # Identify tools with missing request_time and fill them\n",
2647
+ " missing_time_indices = tools[tools[\"request_time\"].isna()].index\n",
2648
+ " if not missing_time_indices.empty:\n",
2649
+ " partial_block_number_to_timestamp = partial(\n",
2650
+ " block_number_to_timestamp, web3=web3\n",
2651
+ " )\n",
2652
+ " missing_timestamps = parallelize_timestamp_conversion(\n",
2653
+ " tools.loc[missing_time_indices], partial_block_number_to_timestamp\n",
2654
+ " )\n",
2655
+ "\n",
2656
+ " # Update the original DataFrame with the missing timestamps\n",
2657
+ " for i, timestamp in zip(missing_time_indices, missing_timestamps):\n",
2658
+ " tools.at[i, \"request_time\"] = timestamp\n",
2659
+ "\n",
2660
+ " tools[\"request_month_year\"] = pd.to_datetime(tools[\"request_time\"]).dt.strftime(\n",
2661
+ " \"%Y-%m\"\n",
2662
+ " )\n",
2663
+ " tools[\"request_month_year_week\"] = (\n",
2664
+ " pd.to_datetime(tools[\"request_time\"]).dt.to_period(\"W\").astype(str)\n",
2665
+ " )\n",
2666
+ "\n",
2667
+ " # Save the tools data after the updates on the content\n",
2668
+ " tools.to_parquet('../data/tools.parquet', index=False)\n",
2669
+ "\n",
2670
+ " # Update t_map with new timestamps\n",
2671
+ " new_timestamps = (\n",
2672
+ " tools[[\"request_block\", \"request_time\"]]\n",
2673
+ " .dropna()\n",
2674
+ " .set_index(\"request_block\")\n",
2675
+ " .to_dict()[\"request_time\"]\n",
2676
+ " )\n",
2677
+ " t_map.update(new_timestamps)\n",
2678
+ "\n",
2679
+ " with open(\"../data/t_map.pkl\", \"wb\") as f:\n",
2680
+ " pickle.dump(t_map, f)\n",
2681
+ "\n",
2682
+ " # clean and release all memory\n",
2683
+ " del tools\n",
2684
+ " del fpmms\n",
2685
+ " del t_map\n",
2686
+ " gc.collect()\n",
2687
+ "\n",
2688
+ " print(\"Weekly analysis files generated and saved\")\n"
2689
+ ]
2690
+ },
2691
+ {
2692
+ "cell_type": "code",
2693
+ "execution_count": 46,
2694
+ "metadata": {},
2695
+ "outputs": [
2696
+ {
2697
+ "name": "stdout",
2698
+ "output_type": "stream",
2699
+ "text": [
2700
+ "Getting the question and current answer for the tools\n",
2701
+ "Converting block number to timestamp\n"
2702
+ ]
2703
+ },
2704
+ {
2705
+ "ename": "NameError",
2706
+ "evalue": "name 'ThreadPoolExecutor' is not defined",
2707
+ "output_type": "error",
2708
+ "traceback": [
2709
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
2710
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
2711
+ "Cell \u001b[0;32mIn[46], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mweekly_analysis\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
2712
+ "Cell \u001b[0;32mIn[45], line 28\u001b[0m, in \u001b[0;36mweekly_analysis\u001b[0;34m()\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m missing_time_indices\u001b[38;5;241m.\u001b[39mempty:\n\u001b[1;32m 25\u001b[0m partial_block_number_to_timestamp \u001b[38;5;241m=\u001b[39m partial(\n\u001b[1;32m 26\u001b[0m block_number_to_timestamp, web3\u001b[38;5;241m=\u001b[39mweb3\n\u001b[1;32m 27\u001b[0m )\n\u001b[0;32m---> 28\u001b[0m missing_timestamps \u001b[38;5;241m=\u001b[39m \u001b[43mparallelize_timestamp_conversion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 29\u001b[0m \u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloc\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmissing_time_indices\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpartial_block_number_to_timestamp\u001b[49m\n\u001b[1;32m 30\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;66;03m# Update the original DataFrame with the missing timestamps\u001b[39;00m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, timestamp \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(missing_time_indices, missing_timestamps):\n",
2713
+ "Cell \u001b[0;32mIn[41], line 16\u001b[0m, in \u001b[0;36mparallelize_timestamp_conversion\u001b[0;34m(df, function)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Parallelize the timestamp conversion.\"\"\"\u001b[39;00m\n\u001b[1;32m 15\u001b[0m block_numbers \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrequest_block\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mtolist()\n\u001b[0;32m---> 16\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mThreadPoolExecutor\u001b[49m(max_workers\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m executor:\n\u001b[1;32m 17\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\n\u001b[1;32m 18\u001b[0m tqdm(executor\u001b[38;5;241m.\u001b[39mmap(function, block_numbers), total\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mlen\u001b[39m(block_numbers))\n\u001b[1;32m 19\u001b[0m )\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m results\n",
2714
+ "\u001b[0;31mNameError\u001b[0m: name 'ThreadPoolExecutor' is not defined"
2715
+ ]
2716
+ }
2717
+ ],
2718
+ "source": [
2719
+ "weekly_analysis()"
2720
+ ]
2721
  }
2722
  ],
2723
  "metadata": {
scripts/get_mech_info.py CHANGED
@@ -2,9 +2,17 @@ from dataclasses import dataclass
2
  from string import Template
3
  from typing import Any
4
  from datetime import datetime, timedelta, UTC
 
5
  import requests
6
 
7
- MECH_SUBGRAPH_URL = "https://api.thegraph.com/subgraphs/name/stakewise/ethereum-gnosis"
 
 
 
 
 
 
 
8
  SUBGRAPH_HEADERS = {
9
  "Accept": "application/json, multipart/mixed",
10
  "Content-Type": "application/json",
@@ -23,7 +31,8 @@ BLOCK_NUMBER = Template(
23
  timestamp_lte: "${timestamp_to}"
24
  }
25
  ){
26
- id
 
27
  }
28
  }
29
  """
@@ -37,9 +46,11 @@ def fetch_block_number(timestamp_from: int, timestamp_to: int) -> dict:
37
  timestamp_from=timestamp_from, timestamp_to=timestamp_to
38
  )
39
  # print(f"Sending query for the subgraph = {query}")
40
-
 
 
41
  response = requests.post(
42
- MECH_SUBGRAPH_URL,
43
  headers=SUBGRAPH_HEADERS,
44
  json={"query": query},
45
  timeout=300,
@@ -64,7 +75,7 @@ def get_mech_info_last_60_days() -> dict[str, Any]:
64
  timestamp_60_days_ago, timestamp_60_days_ago_plus_margin
65
  )
66
  # expecting only one block
67
- last_month_block_number = last_month_block_number.get("id", "")
68
  if last_month_block_number.isdigit():
69
  last_month_block_number = int(last_month_block_number)
70
 
 
2
  from string import Template
3
  from typing import Any
4
  from datetime import datetime, timedelta, UTC
5
+ from utils import SUBGRAPH_API_KEY
6
  import requests
7
 
8
+ OLD_MECH_SUBGRAPH_URL = (
9
+ "https://api.thegraph.com/subgraphs/name/stakewise/ethereum-gnosis"
10
+ )
11
+ # MECH_SUBGRAPH_URL = "https://api.studio.thegraph.com/query/57238/mech/0.0.2"
12
+ NETWORK_SUBGRAPH_URL = Template(
13
+ """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/FxV6YUix58SpYmLBwc9gEHkwjfkqwe1X5FJQjn8nKPyA"""
14
+ )
15
+
16
  SUBGRAPH_HEADERS = {
17
  "Accept": "application/json, multipart/mixed",
18
  "Content-Type": "application/json",
 
31
  timestamp_lte: "${timestamp_to}"
32
  }
33
  ){
34
+ id,
35
+ number,
36
  }
37
  }
38
  """
 
46
  timestamp_from=timestamp_from, timestamp_to=timestamp_to
47
  )
48
  # print(f"Sending query for the subgraph = {query}")
49
+ network_subgraph_url = NETWORK_SUBGRAPH_URL.substitute(
50
+ subgraph_api_key=SUBGRAPH_API_KEY
51
+ )
52
  response = requests.post(
53
+ network_subgraph_url,
54
  headers=SUBGRAPH_HEADERS,
55
  json={"query": query},
56
  timeout=300,
 
75
  timestamp_60_days_ago, timestamp_60_days_ago_plus_margin
76
  )
77
  # expecting only one block
78
+ last_month_block_number = last_month_block_number.get("number", "")
79
  if last_month_block_number.isdigit():
80
  last_month_block_number = int(last_month_block_number)
81
 
scripts/markets.py CHANGED
@@ -28,6 +28,7 @@ from tqdm import tqdm
28
 
29
  from typing import List, Dict
30
  from pathlib import Path
 
31
 
32
  ResponseItemType = List[Dict[str, str]]
33
  SubgraphResponseType = Dict[str, ResponseItemType]
@@ -35,7 +36,10 @@ SubgraphResponseType = Dict[str, ResponseItemType]
35
 
36
  CREATOR = "0x89c5cc945dd550BcFfb72Fe42BfF002429F46Fec"
37
  BATCH_SIZE = 1000
38
- OMEN_SUBGRAPH = "https://api.thegraph.com/subgraphs/name/protofire/omen-xdai"
 
 
 
39
  FPMMS_FIELD = "fixedProductMarketMakers"
40
  QUERY_FIELD = "query"
41
  ERROR_FIELD = "errors"
@@ -158,6 +162,8 @@ def query_subgraph(url: str, query: str, key: str) -> SubgraphResponseType:
158
 
159
  def fpmms_fetcher() -> Generator[ResponseItemType, int, None]:
160
  """An indefinite fetcher for the FPMMs."""
 
 
161
  while True:
162
  fpmm_id = yield
163
  fpmms_query = FPMMS_QUERY.substitute(
@@ -171,7 +177,8 @@ def fpmms_fetcher() -> Generator[ResponseItemType, int, None]:
171
  outcomes_field=OUTCOMES_FIELD,
172
  title_field=TITLE_FIELD,
173
  )
174
- yield query_subgraph(OMEN_SUBGRAPH, fpmms_query, FPMMS_FIELD)
 
175
 
176
 
177
  def fetch_fpmms() -> pd.DataFrame:
@@ -225,4 +232,3 @@ def etl(filename: Optional[str] = None) -> pd.DataFrame:
225
 
226
  if __name__ == "__main__":
227
  etl(DEFAULT_FILENAME)
228
-
 
28
 
29
  from typing import List, Dict
30
  from pathlib import Path
31
+ from utils import SUBGRAPH_API_KEY
32
 
33
  ResponseItemType = List[Dict[str, str]]
34
  SubgraphResponseType = Dict[str, ResponseItemType]
 
36
 
37
  CREATOR = "0x89c5cc945dd550BcFfb72Fe42BfF002429F46Fec"
38
  BATCH_SIZE = 1000
39
+ # OMEN_SUBGRAPH = "https://api.thegraph.com/subgraphs/name/protofire/omen-xdai"
40
+ OMEN_SUBGRAPH_URL = Template(
41
+ """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
42
+ )
43
  FPMMS_FIELD = "fixedProductMarketMakers"
44
  QUERY_FIELD = "query"
45
  ERROR_FIELD = "errors"
 
162
 
163
  def fpmms_fetcher() -> Generator[ResponseItemType, int, None]:
164
  """An indefinite fetcher for the FPMMs."""
165
+ omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
166
+ print(f"omen_subgraph = {omen_subgraph}")
167
  while True:
168
  fpmm_id = yield
169
  fpmms_query = FPMMS_QUERY.substitute(
 
177
  outcomes_field=OUTCOMES_FIELD,
178
  title_field=TITLE_FIELD,
179
  )
180
+
181
+ yield query_subgraph(omen_subgraph, fpmms_query, FPMMS_FIELD)
182
 
183
 
184
  def fetch_fpmms() -> pd.DataFrame:
 
232
 
233
  if __name__ == "__main__":
234
  etl(DEFAULT_FILENAME)
 
scripts/profitability.py CHANGED
@@ -29,6 +29,7 @@ from tqdm import tqdm
29
  import numpy as np
30
  from pathlib import Path
31
  from get_mech_info import DATETIME_60_DAYS_AGO
 
32
 
33
  IRRELEVANT_TOOLS = [
34
  "openai-text-davinci-002",
@@ -250,8 +251,12 @@ def _query_omen_xdai_subgraph(
250
  fpmm_to_timestamp: float,
251
  ) -> dict[str, Any]:
252
  """Query the subgraph."""
253
- url = "https://api.thegraph.com/subgraphs/name/protofire/omen-xdai"
254
-
 
 
 
 
255
  grouped_results = defaultdict(list)
256
  id_gt = ""
257
 
@@ -266,8 +271,10 @@ def _query_omen_xdai_subgraph(
266
  id_gt=id_gt,
267
  )
268
  content_json = _to_content(query)
269
- res = requests.post(url, headers=headers, json=content_json)
 
270
  result_json = res.json()
 
271
  user_trades = result_json.get("data", {}).get("fpmmTrades", [])
272
 
273
  if not user_trades:
@@ -294,8 +301,11 @@ def _query_omen_xdai_subgraph(
294
 
295
  def _query_conditional_tokens_gc_subgraph(creator: str) -> dict[str, Any]:
296
  """Query the subgraph."""
297
- url = "https://api.thegraph.com/subgraphs/name/gnosis/conditional-tokens-gc"
298
-
 
 
 
299
  all_results: dict[str, Any] = {"data": {"user": {"userPositions": []}}}
300
  userPositions_id_gt = ""
301
  while True:
@@ -305,8 +315,10 @@ def _query_conditional_tokens_gc_subgraph(creator: str) -> dict[str, Any]:
305
  userPositions_id_gt=userPositions_id_gt,
306
  )
307
  content_json = {"query": query}
308
- res = requests.post(url, headers=headers, json=content_json)
 
309
  result_json = res.json()
 
310
  user_data = result_json.get("data", {}).get("user", {})
311
 
312
  if not user_data:
@@ -368,8 +380,12 @@ def create_fpmmTrades(rpc: str, from_timestamp: float = DEFAULT_FROM_TIMESTAMP):
368
  fpmm_to_timestamp=DEFAULT_TO_TIMESTAMP,
369
  )
370
 
 
 
371
  # convert to dataframe
372
  df = pd.DataFrame(trades_json["data"]["fpmmTrades"])
 
 
373
 
374
  # convert creator to address
375
  df["creator"] = df["creator"].apply(lambda x: x["id"])
 
29
  import numpy as np
30
  from pathlib import Path
31
  from get_mech_info import DATETIME_60_DAYS_AGO
32
+ from utils import SUBGRAPH_API_KEY
33
 
34
  IRRELEVANT_TOOLS = [
35
  "openai-text-davinci-002",
 
251
  fpmm_to_timestamp: float,
252
  ) -> dict[str, Any]:
253
  """Query the subgraph."""
254
+ # url = "https://api.thegraph.com/subgraphs/name/protofire/omen-xdai"
255
+ OMEN_SUBGRAPH_URL = Template(
256
+ """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
257
+ )
258
+ omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
259
+ print(f"omen_subgraph = {omen_subgraph}")
260
  grouped_results = defaultdict(list)
261
  id_gt = ""
262
 
 
271
  id_gt=id_gt,
272
  )
273
  content_json = _to_content(query)
274
+
275
+ res = requests.post(omen_subgraph, headers=headers, json=content_json)
276
  result_json = res.json()
277
+ # print(f"result = {result_json}")
278
  user_trades = result_json.get("data", {}).get("fpmmTrades", [])
279
 
280
  if not user_trades:
 
301
 
302
  def _query_conditional_tokens_gc_subgraph(creator: str) -> dict[str, Any]:
303
  """Query the subgraph."""
304
+ # url = "https://api.thegraph.com/subgraphs/name/gnosis/conditional-tokens-gc"
305
+ SUBGRAPH_URL = Template(
306
+ """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/7s9rGBffUTL8kDZuxvvpuc46v44iuDarbrADBFw5uVp2"""
307
+ )
308
+ subgraph = SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
309
  all_results: dict[str, Any] = {"data": {"user": {"userPositions": []}}}
310
  userPositions_id_gt = ""
311
  while True:
 
315
  userPositions_id_gt=userPositions_id_gt,
316
  )
317
  content_json = {"query": query}
318
+ print("sending query to subgraph")
319
+ res = requests.post(subgraph, headers=headers, json=content_json)
320
  result_json = res.json()
321
+ # print(f"result = {result_json}")
322
  user_data = result_json.get("data", {}).get("user", {})
323
 
324
  if not user_data:
 
380
  fpmm_to_timestamp=DEFAULT_TO_TIMESTAMP,
381
  )
382
 
383
+ print(f"length of the trades_json dataset {len(trades_json)}")
384
+
385
  # convert to dataframe
386
  df = pd.DataFrame(trades_json["data"]["fpmmTrades"])
387
+ print(df.head())
388
+ print(df.info())
389
 
390
  # convert creator to address
391
  df["creator"] = df["creator"].apply(lambda x: x["id"])
scripts/pull_data.py CHANGED
@@ -1,12 +1,10 @@
1
  import logging
2
- import re
3
  import os
4
  import pickle
5
  from datetime import datetime
6
  from concurrent.futures import ThreadPoolExecutor
7
  from tqdm import tqdm
8
  from web3 import Web3
9
- from typing import Optional
10
  import pandas as pd
11
  from pathlib import Path
12
  from functools import partial
@@ -20,6 +18,7 @@ from tools import (
20
  update_tools_accuracy,
21
  )
22
  from profitability import run_profitability_analysis
 
23
 
24
  import gc
25
 
@@ -30,28 +29,6 @@ ROOT_DIR = SCRIPTS_DIR.parent
30
  DATA_DIR = ROOT_DIR / "data"
31
 
32
 
33
- def get_question(text: str) -> str:
34
- """Get the question from a text."""
35
- # Regex to find text within double quotes
36
- pattern = r'"([^"]*)"'
37
-
38
- # Find all occurrences
39
- questions = re.findall(pattern, text)
40
-
41
- # Assuming you want the first question if there are multiple
42
- question = questions[0] if questions else None
43
-
44
- return question
45
-
46
-
47
- def current_answer(text: str, fpmms: pd.DataFrame) -> Optional[str]:
48
- """Get the current answer for a question."""
49
- row = fpmms[fpmms["title"] == text]
50
- if row.shape[0] == 0:
51
- return None
52
- return row["currentAnswer"].values[0]
53
-
54
-
55
  def block_number_to_timestamp(block_number: int, web3: Web3) -> str:
56
  """Convert a block number to a timestamp."""
57
  block = web3.eth.get_block(block_number)
 
1
  import logging
 
2
  import os
3
  import pickle
4
  from datetime import datetime
5
  from concurrent.futures import ThreadPoolExecutor
6
  from tqdm import tqdm
7
  from web3 import Web3
 
8
  import pandas as pd
9
  from pathlib import Path
10
  from functools import partial
 
18
  update_tools_accuracy,
19
  )
20
  from profitability import run_profitability_analysis
21
+ from utils import get_question, current_answer
22
 
23
  import gc
24
 
 
29
  DATA_DIR = ROOT_DIR / "data"
30
 
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def block_number_to_timestamp(block_number: int, web3: Web3) -> str:
33
  """Convert a block number to a timestamp."""
34
  block = web3.eth.get_block(block_number)
scripts/utils.py CHANGED
@@ -6,6 +6,7 @@ from tqdm import tqdm
6
  from typing import List, Any, Optional
7
  import pandas as pd
8
  import gc
 
9
  from dataclasses import dataclass
10
  from pathlib import Path
11
  from enum import Enum
@@ -41,6 +42,8 @@ INC_TOOLS = [
41
  "prediction-request-reasoning-claude",
42
  ]
43
 
 
 
44
 
45
  class MechEventName(Enum):
46
  """The mech's event names."""
@@ -305,3 +308,25 @@ def get_earliest_block(event_name: MechEventName) -> int:
305
  del df
306
  gc.collect()
307
  return earliest_block
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from typing import List, Any, Optional
7
  import pandas as pd
8
  import gc
9
+ import re
10
  from dataclasses import dataclass
11
  from pathlib import Path
12
  from enum import Enum
 
42
  "prediction-request-reasoning-claude",
43
  ]
44
 
45
+ SUBGRAPH_API_KEY = os.environ.get("SUBGRAPH_API_KEY", None)
46
+
47
 
48
  class MechEventName(Enum):
49
  """The mech's event names."""
 
308
  del df
309
  gc.collect()
310
  return earliest_block
311
+
312
+
313
+ def get_question(text: str) -> str:
314
+ """Get the question from a text."""
315
+ # Regex to find text within double quotes
316
+ pattern = r'"([^"]*)"'
317
+
318
+ # Find all occurrences
319
+ questions = re.findall(pattern, text)
320
+
321
+ # Assuming you want the first question if there are multiple
322
+ question = questions[0] if questions else None
323
+
324
+ return question
325
+
326
+
327
+ def current_answer(text: str, fpmms: pd.DataFrame) -> Optional[str]:
328
+ """Get the current answer for a question."""
329
+ row = fpmms[fpmms["title"] == text]
330
+ if row.shape[0] == 0:
331
+ return None
332
+ return row["currentAnswer"].values[0]
tabs/about.py CHANGED
@@ -8,5 +8,5 @@ about_this_dashboard = """\
8
  This dashboard is pulling data from the omen subgraph during some specific time window. As the data is distributed by weeks, it is possible that some weeks contain incomplete data not showing the total volume of information.
9
  This is in particular relevant for:
10
  * the first week: since we might have started collecting information not from the beginning of the week.
11
- * the last week: some markets have not been closed yet and the information is not published yet.
12
  """
 
8
  This dashboard is pulling data from the omen subgraph during some specific time window. As the data is distributed by weeks, it is possible that some weeks contain incomplete data not showing the total volume of information.
9
  This is in particular relevant for:
10
  * the first week: since we might have started collecting information not from the beginning of the week.
11
+ * the last week: some markets have not been closed yet and the information is not published yet.
12
  """