{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "import pandas as pd\n",
    "from pathlib import Path\n",
    "from web3 import Web3\n",
    "from concurrent.futures import ThreadPoolExecutor\n",
    "from tqdm import tqdm\n",
    "from functools import partial\n",
    "from datetime import datetime\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Make t_map"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tools = pd.read_csv(\"../data/tools.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tools.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "t_map = tools[['request_block', 'request_time']].set_index('request_block').to_dict()['request_time']\n",
    "\n",
    "with open('../data/t_map.pkl', 'wb') as f:\n",
    "    pickle.dump(t_map, f)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('../data/t_map.pkl', 'rb') as f:\n",
    "    t_map = pickle.load(f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Markets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['id', 'currentAnswer', 'title'], dtype='object')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fpmms = pd.read_csv(\"../data/fpmms.csv\")\n",
    "fpmms.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/l_/g22b1g_n0gn4tmx9lkxqv5x00000gn/T/ipykernel_42934/371090584.py:1: DtypeWarning: Columns (2) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  delivers = pd.read_csv(\"../data/delivers.csv\")\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(263613, 12)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "delivers = pd.read_csv(\"../data/delivers.csv\")\n",
    "delivers.shape\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(245092, 6)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "requests = pd.read_csv(\"../data/requests.csv\")\n",
    "requests.columns\n",
    "\n",
    "requests.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/l_/g22b1g_n0gn4tmx9lkxqv5x00000gn/T/ipykernel_42934/3254331204.py:1: DtypeWarning: Columns (7,10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  tools = pd.read_csv(\"../data/tools.csv\")\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Index(['request_id', 'request_block', 'prompt_request', 'tool', 'nonce',\n",
       "       'trader_address', 'deliver_block', 'error', 'error_message',\n",
       "       'prompt_response', 'mech_address', 'p_yes', 'p_no', 'confidence',\n",
       "       'info_utility', 'vote', 'win_probability', 'title', 'currentAnswer',\n",
       "       'request_time', 'request_month_year', 'request_month_year_week'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tools = pd.read_csv(\"../data/tools.csv\")\n",
    "tools.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "841"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tools['request_time'].isna().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def block_number_to_timestamp(block_number: int, web3: Web3) -> str:\n",
    "    \"\"\"Convert a block number to a timestamp.\"\"\"\n",
    "    block = web3.eth.get_block(block_number)\n",
    "    timestamp = datetime.utcfromtimestamp(block['timestamp'])\n",
    "    return timestamp.strftime('%Y-%m-%d %H:%M:%S')\n",
    "\n",
    "\n",
    "def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list:\n",
    "    \"\"\"Parallelize the timestamp conversion.\"\"\"\n",
    "    block_numbers = df['request_block'].tolist()\n",
    "    with ThreadPoolExecutor(max_workers=10) as executor:\n",
    "        results = list(tqdm(executor.map(function, block_numbers), total=len(block_numbers)))    \n",
    "    return results\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "rpc = \"https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a\"\n",
    "web3 = Web3(Web3.HTTPProvider(rpc))\n",
    "\n",
    "partial_block_number_to_timestamp = partial(block_number_to_timestamp, web3=web3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 841/841 [00:25<00:00, 33.18it/s]\n"
     ]
    }
   ],
   "source": [
    "missing_time_indices = tools[tools['request_time'].isna()].index\n",
    "if not missing_time_indices.empty:\n",
    "    partial_block_number_to_timestamp = partial(block_number_to_timestamp, web3=web3)\n",
    "    missing_timestamps = parallelize_timestamp_conversion(tools.loc[missing_time_indices], partial_block_number_to_timestamp)\n",
    "    \n",
    "    # Update the original DataFrame with the missing timestamps\n",
    "    for i, timestamp in zip(missing_time_indices, missing_timestamps):\n",
    "        tools.at[i, 'request_time'] = timestamp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tools['request_time'].isna().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "tools['request_month_year'] = pd.to_datetime(tools['request_time']).dt.strftime('%Y-%m')\n",
    "tools['request_month_year_week'] = pd.to_datetime(tools['request_time']).dt.to_period('W').astype(str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tools['request_month_year_week'].isna().sum()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "tools.to_csv(\"../data/tools.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('../data/t_map.pkl', 'rb') as f:\n",
    "    t_map = pickle.load(f)\n",
    "new_timestamps = tools[['request_block', 'request_time']].dropna().set_index('request_block').to_dict()['request_time']\n",
    "t_map.update(new_timestamps)\n",
    "\n",
    "with open('../data/t_map.pkl', 'wb') as f:\n",
    "    pickle.dump(t_map, f)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "autogen",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}