diff --git "a/notebooks/outliers_analysis.ipynb" "b/notebooks/outliers_analysis.ipynb" new file mode 100644--- /dev/null +++ "b/notebooks/outliers_analysis.ipynb" @@ -0,0 +1,3219 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import gc\n", + "sns.set_style(\"darkgrid\")" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "outliers = pd.read_parquet('../data/outliers.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [], + "source": [ + "all_trades = pd.read_parquet('../data/all_trades_profitability.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [], + "source": [ + "all_trades[\"creation_date\"] = all_trades[\"creation_timestamp\"].dt.date" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/gp/02mb1d514ng739czlxw1lhh00000gn/T/ipykernel_40712/1825242321.py:6: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n", + " all_trades[\"creation_timestamp\"].dt.to_period(\"W\").dt.strftime(\"%b-%d\")\n" + ] + } + ], + "source": [ + "all_trades = all_trades.sort_values(\n", + " by=\"creation_timestamp\", ascending=True\n", + ")\n", + "\n", + "all_trades[\"month_year_week\"] = (\n", + " all_trades[\"creation_timestamp\"].dt.to_period(\"W\").dt.strftime(\"%b-%d\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 2 entries, 24957 to 9513\n", + "Data columns (total 23 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 trader_address 2 non-null object \n", + " 1 market_creator 2 non-null object \n", + " 2 trade_id 2 non-null object \n", + " 3 creation_timestamp 2 non-null datetime64[us, UTC]\n", + " 4 title 2 non-null object \n", + " 5 market_status 2 non-null object \n", + " 6 collateral_amount 2 non-null float64 \n", + " 7 outcome_index 2 non-null object \n", + " 8 trade_fee_amount 2 non-null float64 \n", + " 9 outcomes_tokens_traded 2 non-null float64 \n", + " 10 current_answer 2 non-null int64 \n", + " 11 is_invalid 2 non-null bool \n", + " 12 winning_trade 2 non-null int64 \n", + " 13 earnings 2 non-null float64 \n", + " 14 redeemed 2 non-null bool \n", + " 15 redeemed_amount 2 non-null float64 \n", + " 16 num_mech_calls 2 non-null int64 \n", + " 17 mech_fee_amount 2 non-null float64 \n", + " 18 net_earnings 2 non-null float64 \n", + " 19 roi 2 non-null float64 \n", + " 20 staking 2 non-null object \n", + " 21 month_year 2 non-null object \n", + " 22 month_year_week 2 non-null object \n", + "dtypes: bool(2), datetime64[us, UTC](1), float64(8), int64(3), object(9)\n", + "memory usage: 356.0+ bytes\n" + ] + } + ], + "source": [ + "outliers.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trader_addressmarket_creatortrade_idcreation_timestamptitlemarket_statuscollateral_amountoutcome_indextrade_fee_amountoutcomes_tokens_traded...earningsredeemedredeemed_amountnum_mech_callsmech_fee_amountnet_earningsroistakingmonth_yearmonth_year_week
249570x3666da333dadd05083fef9ff6ddee588d26e4307quickstart0x11cf6ec9649097127238ffb789b0703da448d9fa0x36...2024-09-15 02:02:05+00:00Will Apple launch the iPhone 16 by 15 Septembe...CLOSED1.000000e-0502.000000e-070.020738...0.020738True0.02073800.00.0207272.032090e+03non_agent2024-09Sep-15
95130xf21c4230f137ffcee12e69786d854e62a7b4b0aepearl0xa51ffc63bc0afd06e17130ff2e0ebedf0491b1730xf2...2024-10-08 15:20:55+00:00Will Donald Trump visit the city of Valdosta, ...CLOSED2.080980e-0704.161961e-091.368652...1.368652True1.36865200.01.3686526.447997e+06non_agent2024-10Oct-13
\n", + "

2 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " trader_address market_creator \\\n", + "24957 0x3666da333dadd05083fef9ff6ddee588d26e4307 quickstart \n", + "9513 0xf21c4230f137ffcee12e69786d854e62a7b4b0ae pearl \n", + "\n", + " trade_id \\\n", + "24957 0x11cf6ec9649097127238ffb789b0703da448d9fa0x36... \n", + "9513 0xa51ffc63bc0afd06e17130ff2e0ebedf0491b1730xf2... \n", + "\n", + " creation_timestamp \\\n", + "24957 2024-09-15 02:02:05+00:00 \n", + "9513 2024-10-08 15:20:55+00:00 \n", + "\n", + " title market_status \\\n", + "24957 Will Apple launch the iPhone 16 by 15 Septembe... CLOSED \n", + "9513 Will Donald Trump visit the city of Valdosta, ... CLOSED \n", + "\n", + " collateral_amount outcome_index trade_fee_amount \\\n", + "24957 1.000000e-05 0 2.000000e-07 \n", + "9513 2.080980e-07 0 4.161961e-09 \n", + "\n", + " outcomes_tokens_traded ... earnings redeemed redeemed_amount \\\n", + "24957 0.020738 ... 0.020738 True 0.020738 \n", + "9513 1.368652 ... 1.368652 True 1.368652 \n", + "\n", + " num_mech_calls mech_fee_amount net_earnings roi staking \\\n", + "24957 0 0.0 0.020727 2.032090e+03 non_agent \n", + "9513 0 0.0 1.368652 6.447997e+06 non_agent \n", + "\n", + " month_year month_year_week \n", + "24957 2024-09 Sep-15 \n", + "9513 2024-10 Oct-13 \n", + "\n", + "[2 rows x 23 columns]" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "outliers" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "24957 0x3666da333dadd05083fef9ff6ddee588d26e4307\n", + "9513 0xf21c4230f137ffcee12e69786d854e62a7b4b0ae\n", + "Name: trader_address, dtype: object" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "outliers.trader_address" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "tools = pd.read_parquet('../data/tools.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 331196 entries, 0 to 331195\n", + "Data columns (total 23 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 request_id 331196 non-null object \n", + " 1 request_block 331196 non-null object \n", + " 2 prompt_request 331196 non-null object \n", + " 3 tool 331196 non-null object \n", + " 4 nonce 331196 non-null object \n", + " 5 trader_address 331196 non-null object \n", + " 6 deliver_block 331196 non-null object \n", + " 7 error 331196 non-null int64 \n", + " 8 error_message 3352 non-null object \n", + " 9 prompt_response 330370 non-null object \n", + " 10 mech_address 330462 non-null object \n", + " 11 p_yes 327844 non-null float64 \n", + " 12 p_no 327844 non-null float64 \n", + " 13 confidence 327844 non-null float64 \n", + " 14 info_utility 327844 non-null float64 \n", + " 15 vote 239273 non-null object \n", + " 16 win_probability 327844 non-null float64 \n", + " 17 market_creator 331196 non-null object \n", + " 18 title 331196 non-null object \n", + " 19 currentAnswer 261062 non-null object \n", + " 20 request_time 331196 non-null datetime64[ns, UTC]\n", + " 21 request_month_year 331196 non-null object \n", + " 22 request_month_year_week 331196 non-null object \n", + "dtypes: datetime64[ns, UTC](1), float64(5), int64(1), object(16)\n", + "memory usage: 58.1+ MB\n" + ] + } + ], + "source": [ + "tools.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['request_id', 'request_block', 'prompt_request', 'tool', 'nonce',\n", + " 'trader_address', 'deliver_block', 'error', 'error_message',\n", + " 'prompt_response', 'mech_address', 'p_yes', 'p_no', 'confidence',\n", + " 'info_utility', 'vote', 'win_probability', 'market_creator', 'title',\n", + " 'currentAnswer', 'request_time', 'request_month_year',\n", + " 'request_month_year_week'],\n", + " dtype='object')" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tools.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [], + "source": [ + "trader = \"0x87f0fcfe810502555f8d1439793155cbfa2eb583\"\n", + "selected_week = \"Nov-03\"" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "trader_data = all_trades.loc[(all_trades[\"trader_address\"]==trader)&(all_trades[\"month_year_week\"]==selected_week)]" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [], + "source": [ + "trader_data_selected = trader_data.loc[trader_data[\"num_mech_calls\"]>200]" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "title\n", + "Will the U.S. Congress hold a hearing to discuss the security threats faced by former U.S. Presidents before November 1, 2024? 64\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trader_data_selected.title.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "creation_date\n", + "2024-10-29 32\n", + "2024-10-30 29\n", + "2024-10-28 3\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trader_data_selected.creation_date.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "trader_address 0x87f0fcfe810502555f8d1439793155cbfa2eb583\n", + "market_creator pearl\n", + "trade_id 0xf58e542f0fa539fa332605a4de8d9affcc24bf0e0x87...\n", + "creation_timestamp 2024-10-28 21:16:00+00:00\n", + "title Will the U.S. Congress hold a hearing to discu...\n", + "market_status CLOSED\n", + "collateral_amount 0.025\n", + "outcome_index 0\n", + "trade_fee_amount 0.00025\n", + "outcomes_tokens_traded 0.048743\n", + "current_answer 1\n", + "is_invalid False\n", + "winning_trade False\n", + "earnings 0.0\n", + "redeemed True\n", + "redeemed_amount 0.0\n", + "num_mech_calls 206\n", + "mech_fee_amount 2.06\n", + "net_earnings -2.08525\n", + "roi -1.0\n", + "staking pearl\n", + "creation_date 2024-10-28\n", + "month_year_week Nov-03\n", + "Name: 26553, dtype: object" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trader_data_selected.iloc[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "trader_address 0x87f0fcfe810502555f8d1439793155cbfa2eb583\n", + "market_creator pearl\n", + "trade_id 0xf58e542f0fa539fa332605a4de8d9affcc24bf0e0x87...\n", + "creation_timestamp 2024-10-28 21:55:25+00:00\n", + "title Will the U.S. Congress hold a hearing to discu...\n", + "market_status CLOSED\n", + "collateral_amount 0.025\n", + "outcome_index 0\n", + "trade_fee_amount 0.00025\n", + "outcomes_tokens_traded 0.048771\n", + "current_answer 1\n", + "is_invalid False\n", + "winning_trade False\n", + "earnings 0.0\n", + "redeemed True\n", + "redeemed_amount 0.0\n", + "num_mech_calls 206\n", + "mech_fee_amount 2.06\n", + "net_earnings -2.08525\n", + "roi -1.0\n", + "staking pearl\n", + "creation_date 2024-10-28\n", + "month_year_week Nov-03\n", + "Name: 26583, dtype: object" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trader_data_selected.iloc[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [], + "source": [ + "tools_trader_data = tools.loc[tools[\"trader_address\"]==trader]" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "request_month_year_week\n", + "2024-10-28/2024-11-03 417\n", + "2024-10-14/2024-10-20 181\n", + "2024-10-21/2024-10-27 135\n", + "2024-09-23/2024-09-29 125\n", + "2024-10-07/2024-10-13 106\n", + "2024-09-30/2024-10-06 88\n", + "2024-09-16/2024-09-22 83\n", + "2024-11-04/2024-11-10 58\n", + "2024-11-11/2024-11-17 41\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tools_trader_data.request_month_year_week.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "title =\"Will the U.S. Congress hold a hearing to discuss the security threats faced by former U.S. Presidents before November 1, 2024?\"" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [], + "source": [ + "selected_week_data = tools_trader_data.loc[(tools_trader_data[\"request_month_year_week\"]==\"2024-10-28/2024-11-03\") & (tools_trader_data[\"title\"]==title)]" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
request_idrequest_blockprompt_requesttoolnoncetrader_addressdeliver_blockerrorerror_messageprompt_response...confidenceinfo_utilityvotewin_probabilitymarket_creatortitlecurrentAnswerrequest_timerequest_month_yearrequest_month_year_week
18826070731428242628112067281008842699423624277259...36773107Please take over the role of a Data Scientist ...prediction-offline-sme5587828f-fcf6-4d3f-9cc3-27983ccc94a70x87f0fcfe810502555f8d1439793155cbfa2eb583367731170None\\nYou are an LLM inside a multi-agent system t......0.600.0Yes0.55pearlWill the U.S. Congress hold a hearing to discu...No2024-10-30 20:12:35+00:002024-102024-10-28/2024-11-03
20179425450168771161496526925826843497586130204397...36754843Please take over the role of a Data Scientist ...prediction-offline-sme030ad748-6d05-475d-99a8-4e19296a97610x87f0fcfe810502555f8d1439793155cbfa2eb583367548560None\\nYou are an LLM inside a multi-agent system t......0.600.0Yes0.65pearlWill the U.S. Congress hold a hearing to discu...No2024-10-29 17:56:05+00:002024-102024-10-28/2024-11-03
24612653517810834386215376709232162100681051077563...36749585Please take over the role of a Data Scientist ...prediction-offline-smed089522e-e585-42db-8d97-6b49f36004090x87f0fcfe810502555f8d1439793155cbfa2eb583367495940None\\nYou are an LLM inside a multi-agent system t......0.600.0Yes0.55pearlWill the U.S. Congress hold a hearing to discu...No2024-10-29 10:20:00+00:002024-102024-10-28/2024-11-03
27371314824321782229000141984840300067466538324042...36753259Please take over the role of a Data Scientist ...prediction-offline-sme8d691113-76d5-4b65-8b18-b61a0499cd740x87f0fcfe810502555f8d1439793155cbfa2eb583367532710None\\nYou are an LLM inside a multi-agent system t......0.650.0Yes0.55pearlWill the U.S. Congress hold a hearing to discu...No2024-10-29 15:40:05+00:002024-102024-10-28/2024-11-03
29101889334396632607880382108537244783614667358553...36770936Please take over the role of a Data Scientist ...prediction-offline-smefc7783d8-80d9-4658-8cdd-c7f1f37ded5f0x87f0fcfe810502555f8d1439793155cbfa2eb583367709530None\\nYou are an LLM inside a multi-agent system t......0.500.0No0.60pearlWill the U.S. Congress hold a hearing to discu...No2024-10-30 17:05:35+00:002024-102024-10-28/2024-11-03
\n", + "

5 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " request_id request_block \\\n", + "1882 6070731428242628112067281008842699423624277259... 36773107 \n", + "2017 9425450168771161496526925826843497586130204397... 36754843 \n", + "2461 2653517810834386215376709232162100681051077563... 36749585 \n", + "2737 1314824321782229000141984840300067466538324042... 36753259 \n", + "2910 1889334396632607880382108537244783614667358553... 36770936 \n", + "\n", + " prompt_request \\\n", + "1882 Please take over the role of a Data Scientist ... \n", + "2017 Please take over the role of a Data Scientist ... \n", + "2461 Please take over the role of a Data Scientist ... \n", + "2737 Please take over the role of a Data Scientist ... \n", + "2910 Please take over the role of a Data Scientist ... \n", + "\n", + " tool nonce \\\n", + "1882 prediction-offline-sme 5587828f-fcf6-4d3f-9cc3-27983ccc94a7 \n", + "2017 prediction-offline-sme 030ad748-6d05-475d-99a8-4e19296a9761 \n", + "2461 prediction-offline-sme d089522e-e585-42db-8d97-6b49f3600409 \n", + "2737 prediction-offline-sme 8d691113-76d5-4b65-8b18-b61a0499cd74 \n", + "2910 prediction-offline-sme fc7783d8-80d9-4658-8cdd-c7f1f37ded5f \n", + "\n", + " trader_address deliver_block error \\\n", + "1882 0x87f0fcfe810502555f8d1439793155cbfa2eb583 36773117 0 \n", + "2017 0x87f0fcfe810502555f8d1439793155cbfa2eb583 36754856 0 \n", + "2461 0x87f0fcfe810502555f8d1439793155cbfa2eb583 36749594 0 \n", + "2737 0x87f0fcfe810502555f8d1439793155cbfa2eb583 36753271 0 \n", + "2910 0x87f0fcfe810502555f8d1439793155cbfa2eb583 36770953 0 \n", + "\n", + " error_message prompt_response ... \\\n", + "1882 None \\nYou are an LLM inside a multi-agent system t... ... \n", + "2017 None \\nYou are an LLM inside a multi-agent system t... ... \n", + "2461 None \\nYou are an LLM inside a multi-agent system t... ... \n", + "2737 None \\nYou are an LLM inside a multi-agent system t... ... \n", + "2910 None \\nYou are an LLM inside a multi-agent system t... ... \n", + "\n", + " confidence info_utility vote win_probability market_creator \\\n", + "1882 0.60 0.0 Yes 0.55 pearl \n", + "2017 0.60 0.0 Yes 0.65 pearl \n", + "2461 0.60 0.0 Yes 0.55 pearl \n", + "2737 0.65 0.0 Yes 0.55 pearl \n", + "2910 0.50 0.0 No 0.60 pearl \n", + "\n", + " title currentAnswer \\\n", + "1882 Will the U.S. Congress hold a hearing to discu... No \n", + "2017 Will the U.S. Congress hold a hearing to discu... No \n", + "2461 Will the U.S. Congress hold a hearing to discu... No \n", + "2737 Will the U.S. Congress hold a hearing to discu... No \n", + "2910 Will the U.S. Congress hold a hearing to discu... No \n", + "\n", + " request_time request_month_year request_month_year_week \n", + "1882 2024-10-30 20:12:35+00:00 2024-10 2024-10-28/2024-11-03 \n", + "2017 2024-10-29 17:56:05+00:00 2024-10 2024-10-28/2024-11-03 \n", + "2461 2024-10-29 10:20:00+00:00 2024-10 2024-10-28/2024-11-03 \n", + "2737 2024-10-29 15:40:05+00:00 2024-10 2024-10-28/2024-11-03 \n", + "2910 2024-10-30 17:05:35+00:00 2024-10 2024-10-28/2024-11-03 \n", + "\n", + "[5 rows x 23 columns]" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "selected_week_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Please take over the role of a Data Scientist to evaluate the given question. With the given question \"Will the U.S. Congress hold a hearing to discuss the security threats faced by former U.S. Presidents before November 1, 2024?\" and the `yes` option represented by `Yes` and the `no` option represented by `No`, what are the respective probabilities of `p_yes` and `p_no` occurring?'" + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "selected_week_data.iloc[0].prompt_request" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Please take over the role of a Data Scientist to evaluate the given question. With the given question \"Will the U.S. Congress hold a hearing to discuss the security threats faced by former U.S. Presidents before November 1, 2024?\" and the `yes` option represented by `Yes` and the `no` option represented by `No`, what are the respective probabilities of `p_yes` and `p_no` occurring?'" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "selected_week_data.iloc[0].prompt_request" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(title in selected_week_data.iloc[0].prompt_request)" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/gp/02mb1d514ng739czlxw1lhh00000gn/T/ipykernel_40712/3528210695.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " selected_week_data[\"request_date\"] = selected_week_data['request_time'].dt.date\n" + ] + } + ], + "source": [ + "tools[\"request_date\"] = tools['request_time'].dt.date\n", + "selected_week_data[\"request_date\"] = selected_week_data['request_time'].dt.date" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "request_date\n", + "2024-10-30 108\n", + "2024-10-29 92\n", + "2024-10-28 6\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "selected_week_data.request_date.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "request_date\n", + "2024-10-28 6\n", + "2024-10-29 92\n", + "2024-10-30 108\n", + "Name: prompt_request, dtype: int64" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "correct_number_of_mech_calls = selected_week_data.groupby('request_date')['prompt_request'].apply(lambda x: x.apply(lambda y: title in y).sum())\n", + "correct_number_of_mech_calls" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [], + "source": [ + "num_mech_calls = (\n", + " selected_week_data[\"prompt_request\"]\n", + " .apply(lambda x: title in x)\n", + " .sum()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "206" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "num_mech_calls" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 0 entries\n", + "Data columns (total 23 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 request_id 0 non-null object \n", + " 1 request_block 0 non-null object \n", + " 2 prompt_request 0 non-null object \n", + " 3 tool 0 non-null object \n", + " 4 nonce 0 non-null object \n", + " 5 trader_address 0 non-null object \n", + " 6 deliver_block 0 non-null object \n", + " 7 error 0 non-null int64 \n", + " 8 error_message 0 non-null object \n", + " 9 prompt_response 0 non-null object \n", + " 10 mech_address 0 non-null object \n", + " 11 p_yes 0 non-null float64 \n", + " 12 p_no 0 non-null float64 \n", + " 13 confidence 0 non-null float64 \n", + " 14 info_utility 0 non-null float64 \n", + " 15 vote 0 non-null object \n", + " 16 win_probability 0 non-null float64 \n", + " 17 market_creator 0 non-null object \n", + " 18 title 0 non-null object \n", + " 19 currentAnswer 0 non-null object \n", + " 20 request_time 0 non-null datetime64[ns, UTC]\n", + " 21 request_month_year 0 non-null object \n", + " 22 request_month_year_week 0 non-null object \n", + "dtypes: datetime64[ns, UTC](1), float64(5), int64(1), object(16)\n", + "memory usage: 0.0+ bytes\n" + ] + } + ], + "source": [ + "tools_trader_data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tool\n", + "claude-prediction-offline 73070\n", + "prediction-request-reasoning 40602\n", + "prediction-offline-sme 35646\n", + "claude-prediction-online 29455\n", + "prediction-request-rag-claude 24377\n", + "prediction-offline 5765\n", + "prediction-online 1808\n", + "prediction-online-sme 1616\n", + "prediction-request-rag 1599\n", + "prediction-request-reasoning-claude 1551\n", + "prediction-url-cot-claude 1513\n", + "superforcaster 570\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tools.tool.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "inc_tools = [\n", + " \"prediction-online\",\n", + " \"prediction-offline\",\n", + " \"claude-prediction-online\",\n", + " \"claude-prediction-offline\",\n", + " \"prediction-offline-sme\",\n", + " \"prediction-online-sme\",\n", + " \"prediction-request-rag\",\n", + " \"prediction-request-reasoning\",\n", + " \"prediction-url-cot-claude\",\n", + " \"prediction-request-rag-claude\",\n", + " \"prediction-request-reasoning-claude\",\n", + " \"superforcaster\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "tools_inc = tools[tools[\"tool\"].isin(inc_tools)]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tool\n", + "claude-prediction-offline 73070\n", + "prediction-request-reasoning 40602\n", + "prediction-offline-sme 35646\n", + "claude-prediction-online 29455\n", + "prediction-request-rag-claude 24377\n", + "prediction-offline 5765\n", + "prediction-online 1808\n", + "prediction-online-sme 1616\n", + "prediction-request-rag 1599\n", + "prediction-request-reasoning-claude 1551\n", + "prediction-url-cot-claude 1513\n", + "superforcaster 570\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tools_inc.tool.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# filtering errors\n", + "tools_non_error = tools_inc[tools_inc[\"error\"] != 1]" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "tools_superforcaster = tools_non_error.loc[tools_non_error[\"tool\"]==\"superforcaster\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "currentAnswer\n", + "No 216\n", + "Yes 183\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tools_superforcaster.currentAnswer.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "tools_non_error.loc[:, \"currentAnswer\"] = tools_non_error[\"currentAnswer\"].replace(\n", + " {\"no\": \"No\", \"yes\": \"Yes\"}\n", + ")\n", + "tools_non_error = tools_non_error[\n", + " tools_non_error[\"currentAnswer\"].isin([\"Yes\", \"No\"])\n", + "]\n", + "tools_non_error = tools_non_error[tools_non_error[\"vote\"].isin([\"Yes\", \"No\"])]\n", + "tools_non_error[\"win\"] = (\n", + " tools_non_error[\"currentAnswer\"] == tools_non_error[\"vote\"]\n", + ").astype(int)\n", + "tools_non_error.columns = tools_non_error.columns.astype(str)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "wins = tools_non_error.groupby([\"tool\", \"win\"]).size().unstack().fillna(0)\n", + "wins[\"tool_accuracy\"] = (wins[1] / (wins[0] + wins[1])) * 100\n", + "wins.reset_index(inplace=True)\n", + "wins[\"total_requests\"] = wins[0] + wins[1]\n", + "wins.columns = wins.columns.astype(str)\n", + "wins = wins[[\"tool\", \"tool_accuracy\", \"total_requests\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "timeline dataset\n", + " min max\n", + "tool \n", + "claude-prediction-offline 2024-09-05 11:53:05 2024-11-03 10:37:55\n", + "claude-prediction-online 2024-09-05 11:10:25 2024-11-03 21:47:05\n", + "prediction-offline 2024-09-05 10:39:05 2024-11-02 15:50:50\n", + "prediction-offline-sme 2024-09-05 07:36:40 2024-11-03 00:11:45\n", + "prediction-online 2024-09-05 11:43:35 2024-11-02 23:43:20\n" + ] + } + ], + "source": [ + "timeline = tools_non_error.groupby([\"tool\"])[\"request_time\"].agg([\"min\", \"max\"])\n", + "print(\"timeline dataset\")\n", + "print(timeline.head())\n", + "acc_info = wins.merge(timeline, how=\"left\", on=\"tool\")" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tooltool_accuracytotal_requestsminmax
7prediction-request-rag-claude60.852545206912024-09-05 07:12:402024-11-03 00:02:55
8prediction-request-reasoning59.186747332002024-09-05 07:45:152024-11-03 16:59:10
9prediction-request-reasoning-claude63.79310311602024-09-05 08:52:102024-11-02 19:07:45
10prediction-url-cot-claude62.38902312392024-09-05 08:35:502024-11-02 23:46:25
11superforcaster59.6491233992024-10-25 18:50:252024-11-02 15:56:20
\n", + "
" + ], + "text/plain": [ + " tool tool_accuracy total_requests \\\n", + "7 prediction-request-rag-claude 60.852545 20691 \n", + "8 prediction-request-reasoning 59.186747 33200 \n", + "9 prediction-request-reasoning-claude 63.793103 1160 \n", + "10 prediction-url-cot-claude 62.389023 1239 \n", + "11 superforcaster 59.649123 399 \n", + "\n", + " min max \n", + "7 2024-09-05 07:12:40 2024-11-03 00:02:55 \n", + "8 2024-09-05 07:45:15 2024-11-03 16:59:10 \n", + "9 2024-09-05 08:52:10 2024-11-02 19:07:45 \n", + "10 2024-09-05 08:35:50 2024-11-02 23:46:25 \n", + "11 2024-10-25 18:50:25 2024-11-02 15:56:20 " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "acc_info.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "acc_data = pd.read_csv(\"../data/tools_accuracy.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating accuracy information\n", + "New tool superforcaster\n" + ] + } + ], + "source": [ + "print(\"Updating accuracy information\")\n", + "tools_to_update = list(acc_info[\"tool\"].values)\n", + "no_timeline_info = False\n", + "existing_tools = list(acc_data[\"tool\"].values)\n", + "for tool in tools_to_update:\n", + " new_accuracy = acc_info[acc_info[\"tool\"] == tool][\"tool_accuracy\"].values[0]\n", + " new_volume = acc_info[acc_info[\"tool\"] == tool][\"total_requests\"].values[0]\n", + " if no_timeline_info:\n", + " new_min_timeline = None\n", + " new_max_timeline = None\n", + " else:\n", + " new_min_timeline = acc_info[acc_info[\"tool\"] == tool][\"min\"].values[0]\n", + " new_max_timeline = acc_info[acc_info[\"tool\"] == tool][\"max\"].values[0]\n", + " if tool in existing_tools:\n", + " continue\n", + " else:\n", + " # new tool to add to the file\n", + " # tool,tool_accuracy,total_requests,min,max\n", + " print(f\"New tool {tool}\")\n", + " new_row = [{\n", + " \"tool\": tool,\n", + " \"tool_accuracy\": new_accuracy,\n", + " \"total_requests\": new_volume,\n", + " \"min\": new_min_timeline,\n", + " \"max\": new_max_timeline,\n", + " }]\n", + " tools_acc = pd.concat([acc_data, pd.DataFrame(new_row)], ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "tools_acc.to_csv(\"../data/tools_accuracy.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HASH of the tools accuracy file: QmNm4BjhAebZjDsTgYhQcRHsobBy33FHzpJGQo9B3fB6jn\n" + ] + } + ], + "source": [ + "import ipfshttpclient\n", + "\n", + "ACCURACY_FILENAME = \"tools_accuracy.csv\"\n", + "IPFS_SERVER = \"/dns/registry.autonolas.tech/tcp/443/https\"\n", + "client = ipfshttpclient.connect(IPFS_SERVER)\n", + "result = client.add(\"../data/tools_accuracy.csv\")\n", + "print(f\"HASH of the tools accuracy file: {result['Hash']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "tools_superforcaster = tools.loc[tools[\"tool\"]==\"superforcaster\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "trades = pd.read_parquet('../json_data/all_trades.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "old_trades = pd.read_parquet(\"../data/fpmmTrades.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "new_trades = pd.read_parquet(\"../data/new_fpmmTrades.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 10156 entries, 0 to 10155\n", + "Data columns (total 24 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 collateralAmount 10156 non-null object\n", + " 1 collateralAmountUSD 10156 non-null object\n", + " 2 collateralToken 10156 non-null object\n", + " 3 creationTimestamp 10156 non-null object\n", + " 4 trader_address 10156 non-null object\n", + " 5 feeAmount 10156 non-null object\n", + " 6 id 10156 non-null object\n", + " 7 oldOutcomeTokenMarginalPrice 10156 non-null object\n", + " 8 outcomeIndex 10156 non-null object\n", + " 9 outcomeTokenMarginalPrice 10156 non-null object\n", + " 10 outcomeTokensTraded 10156 non-null object\n", + " 11 title 10156 non-null object\n", + " 12 transactionHash 10156 non-null object\n", + " 13 type 10156 non-null object\n", + " 14 market_creator 10156 non-null object\n", + " 15 fpmm.answerFinalizedTimestamp 6463 non-null object\n", + " 16 fpmm.arbitrationOccurred 10156 non-null bool \n", + " 17 fpmm.currentAnswer 6463 non-null object\n", + " 18 fpmm.id 10156 non-null object\n", + " 19 fpmm.isPendingArbitration 10156 non-null bool \n", + " 20 fpmm.openingTimestamp 10156 non-null object\n", + " 21 fpmm.outcomes 10156 non-null object\n", + " 22 fpmm.title 10156 non-null object\n", + " 23 fpmm.condition.id 10156 non-null object\n", + "dtypes: bool(2), object(22)\n", + "memory usage: 1.7+ MB\n" + ] + } + ], + "source": [ + "new_trades.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 36970 entries, 0 to 36969\n", + "Data columns (total 24 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 collateralAmount 36970 non-null object\n", + " 1 collateralAmountUSD 36970 non-null object\n", + " 2 collateralToken 36970 non-null object\n", + " 3 creationTimestamp 36970 non-null object\n", + " 4 trader_address 36970 non-null object\n", + " 5 feeAmount 36970 non-null object\n", + " 6 id 36970 non-null object\n", + " 7 oldOutcomeTokenMarginalPrice 36970 non-null object\n", + " 8 outcomeIndex 36970 non-null object\n", + " 9 outcomeTokenMarginalPrice 36970 non-null object\n", + " 10 outcomeTokensTraded 36970 non-null object\n", + " 11 title 36970 non-null object\n", + " 12 transactionHash 36970 non-null object\n", + " 13 type 36970 non-null object\n", + " 14 market_creator 36970 non-null object\n", + " 15 fpmm.answerFinalizedTimestamp 33241 non-null object\n", + " 16 fpmm.arbitrationOccurred 36970 non-null bool \n", + " 17 fpmm.currentAnswer 33241 non-null object\n", + " 18 fpmm.id 36970 non-null object\n", + " 19 fpmm.isPendingArbitration 36970 non-null bool \n", + " 20 fpmm.openingTimestamp 36970 non-null object\n", + " 21 fpmm.outcomes 36970 non-null object\n", + " 22 fpmm.title 36970 non-null object\n", + " 23 fpmm.condition.id 36970 non-null object\n", + "dtypes: bool(2), object(22)\n", + "memory usage: 6.3+ MB\n" + ] + } + ], + "source": [ + "old_trades.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "merge_df = pd.concat([old_trades, new_trades], ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n", + " 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n", + " 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n", + " 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n", + " 'transactionHash', 'type', 'market_creator',\n", + " 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n", + " 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n", + " 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n", + " 'fpmm.condition.id'],\n", + " dtype='object')" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merge_df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "merge_df['fpmm.arbitrationOccurred'] = merge_df['fpmm.arbitrationOccurred'].astype(bool)\n", + "merge_df['fpmm.isPendingArbitration'] = merge_df['fpmm.isPendingArbitration'].astype(bool)\n", + "merge_df['fpmm.outcomes'] = merge_df['fpmm.outcomes'].apply(list)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 47126 entries, 0 to 47125\n", + "Data columns (total 24 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 collateralAmount 47126 non-null object\n", + " 1 collateralAmountUSD 47126 non-null object\n", + " 2 collateralToken 47126 non-null object\n", + " 3 creationTimestamp 47126 non-null object\n", + " 4 trader_address 47126 non-null object\n", + " 5 feeAmount 47126 non-null object\n", + " 6 id 47126 non-null object\n", + " 7 oldOutcomeTokenMarginalPrice 47126 non-null object\n", + " 8 outcomeIndex 47126 non-null object\n", + " 9 outcomeTokenMarginalPrice 47126 non-null object\n", + " 10 outcomeTokensTraded 47126 non-null object\n", + " 11 title 47126 non-null object\n", + " 12 transactionHash 47126 non-null object\n", + " 13 type 47126 non-null object\n", + " 14 market_creator 47126 non-null object\n", + " 15 fpmm.answerFinalizedTimestamp 39704 non-null object\n", + " 16 fpmm.arbitrationOccurred 47126 non-null bool \n", + " 17 fpmm.currentAnswer 39704 non-null object\n", + " 18 fpmm.id 47126 non-null object\n", + " 19 fpmm.isPendingArbitration 47126 non-null bool \n", + " 20 fpmm.openingTimestamp 47126 non-null object\n", + " 21 fpmm.outcomes 47126 non-null object\n", + " 22 fpmm.title 47126 non-null object\n", + " 23 fpmm.condition.id 47126 non-null object\n", + "dtypes: bool(2), object(22)\n", + "memory usage: 8.0+ MB\n" + ] + } + ], + "source": [ + "merge_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
collateralAmountcollateralAmountUSDcollateralTokencreationTimestamptrader_addressfeeAmountidoldOutcomeTokenMarginalPriceoutcomeIndexoutcomeTokenMarginalPrice...market_creatorfpmm.answerFinalizedTimestampfpmm.arbitrationOccurredfpmm.currentAnswerfpmm.idfpmm.isPendingArbitrationfpmm.openingTimestampfpmm.outcomesfpmm.titlefpmm.condition.id
09305967650456174080.93059779934117533864348280336664730xe91d153e0b41518a2ce8dd3d7944fa863463a97d17285966050x01274796ce41aa8e8312e05a427ffb4b0d2148f693059676504561740x007068173910cf8719b6f2e66a18b6825c9dde820x01...0.558111979762980196833836180241856400.611825749650855211231211687533889...quickstart1728822710False0x00000000000000000000000000000000000000000000...0x007068173910cf8719b6f2e66a18b6825c9dde82False1728691200[Yes, No]Will the emergency public warning tests planne...0xa610166e379c42404bd27bf12a16119fdb5171990c3e...
110332472347961938001.0332501260033394937910329936745250xe91d153e0b41518a2ce8dd3d7944fa863463a97d17285055750x034c4ad84f7ac6638bf19300d5bbe7d9b981e736103324723479619380x007068173910cf8719b6f2e66a18b6825c9dde820x03...0.660208990298303445124446130809070700.7034159692833852946883644485233207...quickstart1728822710False0x00000000000000000000000000000000000000000000...0x007068173910cf8719b6f2e66a18b6825c9dde82False1728691200[Yes, No]Will the emergency public warning tests planne...0xa610166e379c42404bd27bf12a16119fdb5171990c3e...
212066923688428983001.2066915962481879683670637170788840xe91d153e0b41518a2ce8dd3d7944fa863463a97d17285628950x05e8bbdb89c84a14d05194bbbae81caf2340db72120669236884289830x007068173910cf8719b6f2e66a18b6825c9dde820x05...0.193145918304372186430986421068454610.3033804066591317111055858533563476...quickstart1728822710False0x00000000000000000000000000000000000000000000...0x007068173910cf8719b6f2e66a18b6825c9dde82False1728691200[Yes, No]Will the emergency public warning tests planne...0xa610166e379c42404bd27bf12a16119fdb5171990c3e...
39305982032745443840.93059923757170080912179287297934220xe91d153e0b41518a2ce8dd3d7944fa863463a97d17285966450x17c17ca981b7e244d0bad80b632a082dc1db36e593059820327454430x007068173910cf8719b6f2e66a18b6825c9dde820x17...0.61182574965085521123121168753388900.6579972404391247884756597316198778...quickstart1728822710False0x00000000000000000000000000000000000000000000...0x007068173910cf8719b6f2e66a18b6825c9dde82False1728691200[Yes, No]Will the emergency public warning tests planne...0xa610166e379c42404bd27bf12a16119fdb5171990c3e...
417986959651029184001.7986967959313423139361257822752250xe91d153e0b41518a2ce8dd3d7944fa863463a97d17283377800x1d942103400c1f1657dcbffd5e08904787ea936b179869596510291840x007068173910cf8719b6f2e66a18b6825c9dde820x1d...0.763615736941978768175557728675570300.8080447772492735383356100969932859...quickstart1728822710False0x00000000000000000000000000000000000000000000...0x007068173910cf8719b6f2e66a18b6825c9dde82False1728691200[Yes, No]Will the emergency public warning tests planne...0xa610166e379c42404bd27bf12a16119fdb5171990c3e...
..................................................................
47121250000000000000000.024999798657898049970033532754559420xe91d153e0b41518a2ce8dd3d7944fa863463a97d17308555750xac3ebb0ab2e0dc9aff761a9841e91e02e537cdbf2500000000000000xfff7eca2cfb2e53781a6e3aeed843d23324d1c2d0xac...0.505275445124332354557575617598941910.5070214284001796010550698533200886...pearl1731371725False0x00000000000000000000000000000000000000000000...0xfff7eca2cfb2e53781a6e3aeed843d23324d1c2dFalse1731283200[Yes, No]Will any government health agency endorse the ...0x63a63448a35cc9ca2e846cca20f1b97209ec360dbf2e...
47122250000000000000000.024999818399047984114266577521472030xe91d153e0b41518a2ce8dd3d7944fa863463a97d17308629950xaeb8c31302361d42ec806faf406ef0c30b6eba5f2500000000000000xfff7eca2cfb2e53781a6e3aeed843d23324d1c2d0xae...0.513942914519699615728092386411306410.5156576876136041031258720624505359...pearl1731371725False0x00000000000000000000000000000000000000000000...0xfff7eca2cfb2e53781a6e3aeed843d23324d1c2dFalse1731283200[Yes, No]Will any government health agency endorse the ...0x63a63448a35cc9ca2e846cca20f1b97209ec360dbf2e...
47123250000000000000000.024999914083826582974372421840209790xe91d153e0b41518a2ce8dd3d7944fa863463a97d17308590100xb42a955a0e06b3e6bdf229c9abfd2fdad20688a72500000000000000xfff7eca2cfb2e53781a6e3aeed843d23324d1c2d0xb4...0.512221905132587079427043661633185410.5139429145196996157280923864113064...pearl1731371725False0x00000000000000000000000000000000000000000000...0xfff7eca2cfb2e53781a6e3aeed843d23324d1c2dFalse1731283200[Yes, No]Will any government health agency endorse the ...0x63a63448a35cc9ca2e846cca20f1b97209ec360dbf2e...
47124250000000000000000.024999893440994675884212471977820770xe91d153e0b41518a2ce8dd3d7944fa863463a97d17308545450xce9e38ee41e5e4b20d6670e2cba28c06dcd9470c2500000000000000xfff7eca2cfb2e53781a6e3aeed843d23324d1c2d0xce...0.510.5017647318434458790959496906595747...pearl1731371725False0x00000000000000000000000000000000000000000000...0xfff7eca2cfb2e53781a6e3aeed843d23324d1c2dFalse1731283200[Yes, No]Will any government health agency endorse the ...0x63a63448a35cc9ca2e846cca20f1b97209ec360dbf2e...
47125250000000000000000.02499985660634600387505389875173410xe91d153e0b41518a2ce8dd3d7944fa863463a97d17308964500xd0d8e2b90946dc8ac5f5f48a08d9d5e7e5c5b3a02500000000000000xfff7eca2cfb2e53781a6e3aeed843d23324d1c2d0xd0...0.583886879309206052085215404686561510.5853343937326606060459394462230992...pearl1731371725False0x00000000000000000000000000000000000000000000...0xfff7eca2cfb2e53781a6e3aeed843d23324d1c2dFalse1731283200[Yes, No]Will any government health agency endorse the ...0x63a63448a35cc9ca2e846cca20f1b97209ec360dbf2e...
\n", + "

47113 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " collateralAmount collateralAmountUSD \\\n", + "0 930596765045617408 0.9305977993411753386434828033666473 \n", + "1 1033247234796193800 1.033250126003339493791032993674525 \n", + "2 1206692368842898300 1.206691596248187968367063717078884 \n", + "3 930598203274544384 0.9305992375717008091217928729793422 \n", + "4 1798695965102918400 1.798696795931342313936125782275225 \n", + "... ... ... \n", + "47121 25000000000000000 0.02499979865789804997003353275455942 \n", + "47122 25000000000000000 0.02499981839904798411426657752147203 \n", + "47123 25000000000000000 0.02499991408382658297437242184020979 \n", + "47124 25000000000000000 0.02499989344099467588421247197782077 \n", + "47125 25000000000000000 0.0249998566063460038750538987517341 \n", + "\n", + " collateralToken creationTimestamp \\\n", + "0 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1728596605 \n", + "1 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1728505575 \n", + "2 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1728562895 \n", + "3 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1728596645 \n", + "4 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1728337780 \n", + "... ... ... \n", + "47121 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1730855575 \n", + "47122 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1730862995 \n", + "47123 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1730859010 \n", + "47124 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1730854545 \n", + "47125 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1730896450 \n", + "\n", + " trader_address feeAmount \\\n", + "0 0x01274796ce41aa8e8312e05a427ffb4b0d2148f6 9305967650456174 \n", + "1 0x034c4ad84f7ac6638bf19300d5bbe7d9b981e736 10332472347961938 \n", + "2 0x05e8bbdb89c84a14d05194bbbae81caf2340db72 12066923688428983 \n", + "3 0x17c17ca981b7e244d0bad80b632a082dc1db36e5 9305982032745443 \n", + "4 0x1d942103400c1f1657dcbffd5e08904787ea936b 17986959651029184 \n", + "... ... ... \n", + "47121 0xac3ebb0ab2e0dc9aff761a9841e91e02e537cdbf 250000000000000 \n", + "47122 0xaeb8c31302361d42ec806faf406ef0c30b6eba5f 250000000000000 \n", + "47123 0xb42a955a0e06b3e6bdf229c9abfd2fdad20688a7 250000000000000 \n", + "47124 0xce9e38ee41e5e4b20d6670e2cba28c06dcd9470c 250000000000000 \n", + "47125 0xd0d8e2b90946dc8ac5f5f48a08d9d5e7e5c5b3a0 250000000000000 \n", + "\n", + " id \\\n", + "0 0x007068173910cf8719b6f2e66a18b6825c9dde820x01... \n", + "1 0x007068173910cf8719b6f2e66a18b6825c9dde820x03... \n", + "2 0x007068173910cf8719b6f2e66a18b6825c9dde820x05... \n", + "3 0x007068173910cf8719b6f2e66a18b6825c9dde820x17... \n", + "4 0x007068173910cf8719b6f2e66a18b6825c9dde820x1d... \n", + "... ... \n", + "47121 0xfff7eca2cfb2e53781a6e3aeed843d23324d1c2d0xac... \n", + "47122 0xfff7eca2cfb2e53781a6e3aeed843d23324d1c2d0xae... \n", + "47123 0xfff7eca2cfb2e53781a6e3aeed843d23324d1c2d0xb4... \n", + "47124 0xfff7eca2cfb2e53781a6e3aeed843d23324d1c2d0xce... \n", + "47125 0xfff7eca2cfb2e53781a6e3aeed843d23324d1c2d0xd0... \n", + "\n", + " oldOutcomeTokenMarginalPrice outcomeIndex \\\n", + "0 0.5581119797629801968338361802418564 0 \n", + "1 0.6602089902983034451244461308090707 0 \n", + "2 0.1931459183043721864309864210684546 1 \n", + "3 0.611825749650855211231211687533889 0 \n", + "4 0.7636157369419787681755577286755703 0 \n", + "... ... ... \n", + "47121 0.5052754451243323545575756175989419 1 \n", + "47122 0.5139429145196996157280923864113064 1 \n", + "47123 0.5122219051325870794270436616331854 1 \n", + "47124 0.5 1 \n", + "47125 0.5838868793092060520852154046865615 1 \n", + "\n", + " outcomeTokenMarginalPrice ... market_creator \\\n", + "0 0.611825749650855211231211687533889 ... quickstart \n", + "1 0.7034159692833852946883644485233207 ... quickstart \n", + "2 0.3033804066591317111055858533563476 ... quickstart \n", + "3 0.6579972404391247884756597316198778 ... quickstart \n", + "4 0.8080447772492735383356100969932859 ... quickstart \n", + "... ... ... ... \n", + "47121 0.5070214284001796010550698533200886 ... pearl \n", + "47122 0.5156576876136041031258720624505359 ... pearl \n", + "47123 0.5139429145196996157280923864113064 ... pearl \n", + "47124 0.5017647318434458790959496906595747 ... pearl \n", + "47125 0.5853343937326606060459394462230992 ... pearl \n", + "\n", + " fpmm.answerFinalizedTimestamp fpmm.arbitrationOccurred \\\n", + "0 1728822710 False \n", + "1 1728822710 False \n", + "2 1728822710 False \n", + "3 1728822710 False \n", + "4 1728822710 False \n", + "... ... ... \n", + "47121 1731371725 False \n", + "47122 1731371725 False \n", + "47123 1731371725 False \n", + "47124 1731371725 False \n", + "47125 1731371725 False \n", + "\n", + " fpmm.currentAnswer \\\n", + "0 0x00000000000000000000000000000000000000000000... \n", + "1 0x00000000000000000000000000000000000000000000... \n", + "2 0x00000000000000000000000000000000000000000000... \n", + "3 0x00000000000000000000000000000000000000000000... \n", + "4 0x00000000000000000000000000000000000000000000... \n", + "... ... \n", + "47121 0x00000000000000000000000000000000000000000000... \n", + "47122 0x00000000000000000000000000000000000000000000... \n", + "47123 0x00000000000000000000000000000000000000000000... \n", + "47124 0x00000000000000000000000000000000000000000000... \n", + "47125 0x00000000000000000000000000000000000000000000... \n", + "\n", + " fpmm.id fpmm.isPendingArbitration \\\n", + "0 0x007068173910cf8719b6f2e66a18b6825c9dde82 False \n", + "1 0x007068173910cf8719b6f2e66a18b6825c9dde82 False \n", + "2 0x007068173910cf8719b6f2e66a18b6825c9dde82 False \n", + "3 0x007068173910cf8719b6f2e66a18b6825c9dde82 False \n", + "4 0x007068173910cf8719b6f2e66a18b6825c9dde82 False \n", + "... ... ... \n", + "47121 0xfff7eca2cfb2e53781a6e3aeed843d23324d1c2d False \n", + "47122 0xfff7eca2cfb2e53781a6e3aeed843d23324d1c2d False \n", + "47123 0xfff7eca2cfb2e53781a6e3aeed843d23324d1c2d False \n", + "47124 0xfff7eca2cfb2e53781a6e3aeed843d23324d1c2d False \n", + "47125 0xfff7eca2cfb2e53781a6e3aeed843d23324d1c2d False \n", + "\n", + " fpmm.openingTimestamp fpmm.outcomes \\\n", + "0 1728691200 [Yes, No] \n", + "1 1728691200 [Yes, No] \n", + "2 1728691200 [Yes, No] \n", + "3 1728691200 [Yes, No] \n", + "4 1728691200 [Yes, No] \n", + "... ... ... \n", + "47121 1731283200 [Yes, No] \n", + "47122 1731283200 [Yes, No] \n", + "47123 1731283200 [Yes, No] \n", + "47124 1731283200 [Yes, No] \n", + "47125 1731283200 [Yes, No] \n", + "\n", + " fpmm.title \\\n", + "0 Will the emergency public warning tests planne... \n", + "1 Will the emergency public warning tests planne... \n", + "2 Will the emergency public warning tests planne... \n", + "3 Will the emergency public warning tests planne... \n", + "4 Will the emergency public warning tests planne... \n", + "... ... \n", + "47121 Will any government health agency endorse the ... \n", + "47122 Will any government health agency endorse the ... \n", + "47123 Will any government health agency endorse the ... \n", + "47124 Will any government health agency endorse the ... \n", + "47125 Will any government health agency endorse the ... \n", + "\n", + " fpmm.condition.id \n", + "0 0xa610166e379c42404bd27bf12a16119fdb5171990c3e... \n", + "1 0xa610166e379c42404bd27bf12a16119fdb5171990c3e... \n", + "2 0xa610166e379c42404bd27bf12a16119fdb5171990c3e... \n", + "3 0xa610166e379c42404bd27bf12a16119fdb5171990c3e... \n", + "4 0xa610166e379c42404bd27bf12a16119fdb5171990c3e... \n", + "... ... \n", + "47121 0x63a63448a35cc9ca2e846cca20f1b97209ec360dbf2e... \n", + "47122 0x63a63448a35cc9ca2e846cca20f1b97209ec360dbf2e... \n", + "47123 0x63a63448a35cc9ca2e846cca20f1b97209ec360dbf2e... \n", + "47124 0x63a63448a35cc9ca2e846cca20f1b97209ec360dbf2e... \n", + "47125 0x63a63448a35cc9ca2e846cca20f1b97209ec360dbf2e... \n", + "\n", + "[47113 rows x 24 columns]" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merge_df.drop_duplicates(subset=[col for col in merge_df.columns if col != 'fpmm.outcomes'])" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Checking column fpmm.answerFinalizedTimestamp:\n", + "\n", + "Checking column fpmm.arbitrationOccurred:\n", + "\n", + "Checking column fpmm.currentAnswer:\n", + "\n", + "Checking column fpmm.id:\n", + "\n", + "Checking column fpmm.isPendingArbitration:\n", + "\n", + "Checking column fpmm.openingTimestamp:\n", + "\n", + "Checking column fpmm.outcomes:\n", + "\n", + "Checking column fpmm.title:\n", + "\n", + "Checking column fpmm.condition.id:\n", + "\n" + ] + } + ], + "source": [ + "for col in ['fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n", + " 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n", + " 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n", + " 'fpmm.condition.id']:\n", + " print(f\"Checking column {col}:\")\n", + " print(merge_df[col].iloc[0].__class__)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4861" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(trades)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trader_addressmarket_creatortrade_idcreation_timestamptitlemarket_statuscollateral_amountoutcome_indextrade_fee_amountoutcomes_tokens_tradedcurrent_answeris_invalidwinning_tradeearningsredeemedredeemed_amountnum_mech_callsmech_fee_amountnet_earningsroi
00x034c4ad84f7ac6638bf19300d5bbe7d9b981e736quickstart0x039ec9bcbcd776ce9b105ed981d0594a0b5d5f5b0x03...2024-11-06 12:25:00+00:00Will Wendy's announce any additional restauran...CLOSED1.36037810.0136041.6808580FalseFalse0.000000True0.00000000.00-1.373982-1.000000
10x034c4ad84f7ac6638bf19300d5bbe7d9b981e736quickstart0x08181acebfc1b308fbfecbebd24d060fed0cd84e0x03...2024-11-08 16:31:10+00:00Will any major news outlet report on inaccurac...CLOSED1.69052810.0169052.3776601FalseTrue2.377660True2.37766000.000.6702260.392534
20x034c4ad84f7ac6638bf19300d5bbe7d9b981e736quickstart0x08a5f2e0ca0d721a74662833a83cb634afa65e350x03...2024-11-08 16:35:35+00:00Will Microsoft unveil a new climate initiative...CLOSED2.28249410.0228253.3769360FalseFalse0.000000True0.00000000.00-2.305319-1.000000
30x034c4ad84f7ac6638bf19300d5bbe7d9b981e736quickstart0x1d7c76bc561696cf66c010e66ea035347e7491a80x03...2024-11-08 18:32:30+00:00Will Microsoft complete the construction of it...CLOSED2.21621010.0221623.8432711FalseTrue3.843271True3.84327100.001.6048990.716994
40x034c4ad84f7ac6638bf19300d5bbe7d9b981e736quickstart0x245b1b25aa62caf1b9d4379e7dd393d47e3fe0eb0x03...2024-11-06 14:23:20+00:00Will Wendy's open at least 140 new restaurant ...CLOSED3.74683200.0374686.7191340FalseTrue6.719134True6.71913400.002.9348330.775529
...............................................................
48560x6b5d38596ccb989fd9ef8184b0ba76bde1ae3b4bpearl0xfb91d659d1c6acf665abbb7a42d4f18da4d8ce9e0x6b...2024-11-02 19:13:20+00:00Will Kamala Harris win the state of Michigan i...CLOSED0.02500000.0002500.051386-1TrueFalse0.025000False0.00000010.01-0.010250-0.290780
48570xc5bc3ae599aa5dc2f56faeb074e0544d39193790pearl0xfb91d659d1c6acf665abbb7a42d4f18da4d8ce9e0xc5...2024-11-04 14:10:50+00:00Will Kamala Harris win the state of Michigan i...CLOSED0.02500000.0002500.048030-1TrueFalse0.025000False0.00000020.02-0.020250-0.447514
48580xc5bc3ae599aa5dc2f56faeb074e0544d39193790pearl0xfd8612f9cad2a0672844e2cb794b8e1b7294a9040xc5...2024-11-04 14:20:10+00:00Will any major cybersecurity firm announce new...CLOSED0.02500000.0002500.0460051FalseFalse0.000000False0.00000030.03-0.055250-1.000000
48590xc5bc3ae599aa5dc2f56faeb074e0544d39193790pearl0xfd8612f9cad2a0672844e2cb794b8e1b7294a9040xc5...2024-11-06 17:48:50+00:00Will any major cybersecurity firm announce new...CLOSED0.02500000.0002500.0430491FalseFalse0.000000False0.00000030.03-0.055250-1.000000
48600xc5bc3ae599aa5dc2f56faeb074e0544d39193790pearl0xffdd70e81b9af2aac15bf6820a4085be4d79254d0xc5...2024-11-06 17:45:45+00:00Will any country announce increased military a...CLOSED0.02500000.0002500.0430760FalseTrue0.043076False0.00000010.010.0078260.222004
\n", + "

4861 rows × 20 columns

\n", + "
" + ], + "text/plain": [ + " trader_address market_creator \\\n", + "0 0x034c4ad84f7ac6638bf19300d5bbe7d9b981e736 quickstart \n", + "1 0x034c4ad84f7ac6638bf19300d5bbe7d9b981e736 quickstart \n", + "2 0x034c4ad84f7ac6638bf19300d5bbe7d9b981e736 quickstart \n", + "3 0x034c4ad84f7ac6638bf19300d5bbe7d9b981e736 quickstart \n", + "4 0x034c4ad84f7ac6638bf19300d5bbe7d9b981e736 quickstart \n", + "... ... ... \n", + "4856 0x6b5d38596ccb989fd9ef8184b0ba76bde1ae3b4b pearl \n", + "4857 0xc5bc3ae599aa5dc2f56faeb074e0544d39193790 pearl \n", + "4858 0xc5bc3ae599aa5dc2f56faeb074e0544d39193790 pearl \n", + "4859 0xc5bc3ae599aa5dc2f56faeb074e0544d39193790 pearl \n", + "4860 0xc5bc3ae599aa5dc2f56faeb074e0544d39193790 pearl \n", + "\n", + " trade_id \\\n", + "0 0x039ec9bcbcd776ce9b105ed981d0594a0b5d5f5b0x03... \n", + "1 0x08181acebfc1b308fbfecbebd24d060fed0cd84e0x03... \n", + "2 0x08a5f2e0ca0d721a74662833a83cb634afa65e350x03... \n", + "3 0x1d7c76bc561696cf66c010e66ea035347e7491a80x03... \n", + "4 0x245b1b25aa62caf1b9d4379e7dd393d47e3fe0eb0x03... \n", + "... ... \n", + "4856 0xfb91d659d1c6acf665abbb7a42d4f18da4d8ce9e0x6b... \n", + "4857 0xfb91d659d1c6acf665abbb7a42d4f18da4d8ce9e0xc5... \n", + "4858 0xfd8612f9cad2a0672844e2cb794b8e1b7294a9040xc5... \n", + "4859 0xfd8612f9cad2a0672844e2cb794b8e1b7294a9040xc5... \n", + "4860 0xffdd70e81b9af2aac15bf6820a4085be4d79254d0xc5... \n", + "\n", + " creation_timestamp \\\n", + "0 2024-11-06 12:25:00+00:00 \n", + "1 2024-11-08 16:31:10+00:00 \n", + "2 2024-11-08 16:35:35+00:00 \n", + "3 2024-11-08 18:32:30+00:00 \n", + "4 2024-11-06 14:23:20+00:00 \n", + "... ... \n", + "4856 2024-11-02 19:13:20+00:00 \n", + "4857 2024-11-04 14:10:50+00:00 \n", + "4858 2024-11-04 14:20:10+00:00 \n", + "4859 2024-11-06 17:48:50+00:00 \n", + "4860 2024-11-06 17:45:45+00:00 \n", + "\n", + " title market_status \\\n", + "0 Will Wendy's announce any additional restauran... CLOSED \n", + "1 Will any major news outlet report on inaccurac... CLOSED \n", + "2 Will Microsoft unveil a new climate initiative... CLOSED \n", + "3 Will Microsoft complete the construction of it... CLOSED \n", + "4 Will Wendy's open at least 140 new restaurant ... CLOSED \n", + "... ... ... \n", + "4856 Will Kamala Harris win the state of Michigan i... CLOSED \n", + "4857 Will Kamala Harris win the state of Michigan i... CLOSED \n", + "4858 Will any major cybersecurity firm announce new... CLOSED \n", + "4859 Will any major cybersecurity firm announce new... CLOSED \n", + "4860 Will any country announce increased military a... CLOSED \n", + "\n", + " collateral_amount outcome_index trade_fee_amount \\\n", + "0 1.360378 1 0.013604 \n", + "1 1.690528 1 0.016905 \n", + "2 2.282494 1 0.022825 \n", + "3 2.216210 1 0.022162 \n", + "4 3.746832 0 0.037468 \n", + "... ... ... ... \n", + "4856 0.025000 0 0.000250 \n", + "4857 0.025000 0 0.000250 \n", + "4858 0.025000 0 0.000250 \n", + "4859 0.025000 0 0.000250 \n", + "4860 0.025000 0 0.000250 \n", + "\n", + " outcomes_tokens_traded current_answer is_invalid winning_trade \\\n", + "0 1.680858 0 False False \n", + "1 2.377660 1 False True \n", + "2 3.376936 0 False False \n", + "3 3.843271 1 False True \n", + "4 6.719134 0 False True \n", + "... ... ... ... ... \n", + "4856 0.051386 -1 True False \n", + "4857 0.048030 -1 True False \n", + "4858 0.046005 1 False False \n", + "4859 0.043049 1 False False \n", + "4860 0.043076 0 False True \n", + "\n", + " earnings redeemed redeemed_amount num_mech_calls mech_fee_amount \\\n", + "0 0.000000 True 0.000000 0 0.00 \n", + "1 2.377660 True 2.377660 0 0.00 \n", + "2 0.000000 True 0.000000 0 0.00 \n", + "3 3.843271 True 3.843271 0 0.00 \n", + "4 6.719134 True 6.719134 0 0.00 \n", + "... ... ... ... ... ... \n", + "4856 0.025000 False 0.000000 1 0.01 \n", + "4857 0.025000 False 0.000000 2 0.02 \n", + "4858 0.000000 False 0.000000 3 0.03 \n", + "4859 0.000000 False 0.000000 3 0.03 \n", + "4860 0.043076 False 0.000000 1 0.01 \n", + "\n", + " net_earnings roi \n", + "0 -1.373982 -1.000000 \n", + "1 0.670226 0.392534 \n", + "2 -2.305319 -1.000000 \n", + "3 1.604899 0.716994 \n", + "4 2.934833 0.775529 \n", + "... ... ... \n", + "4856 -0.010250 -0.290780 \n", + "4857 -0.020250 -0.447514 \n", + "4858 -0.055250 -1.000000 \n", + "4859 -0.055250 -1.000000 \n", + "4860 0.007826 0.222004 \n", + "\n", + "[4861 rows x 20 columns]" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trades.drop_duplicates()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "trades[\"creationTimestamp\"]= pd.to_datetime(\n", + " trades[\"creationTimestamp\"]\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'1730108820'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "max(trades.creationTimestamp)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "str" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(trades.creationTimestamp.iloc[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "all_trades = pd.read_parquet('../data/all_trades_profitability.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 26861 entries, 0 to 26860\n", + "Data columns (total 21 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 trader_address 26861 non-null object \n", + " 1 market_creator 26861 non-null object \n", + " 2 trade_id 26861 non-null object \n", + " 3 creation_timestamp 26861 non-null datetime64[ns, UTC]\n", + " 4 title 26861 non-null object \n", + " 5 market_status 26861 non-null object \n", + " 6 collateral_amount 26861 non-null float64 \n", + " 7 outcome_index 26861 non-null object \n", + " 8 trade_fee_amount 26861 non-null float64 \n", + " 9 outcomes_tokens_traded 26861 non-null float64 \n", + " 10 current_answer 26861 non-null int64 \n", + " 11 is_invalid 26861 non-null bool \n", + " 12 winning_trade 26861 non-null bool \n", + " 13 earnings 26861 non-null float64 \n", + " 14 redeemed 26861 non-null bool \n", + " 15 redeemed_amount 26861 non-null float64 \n", + " 16 num_mech_calls 26861 non-null int64 \n", + " 17 mech_fee_amount 26861 non-null float64 \n", + " 18 net_earnings 26861 non-null float64 \n", + " 19 roi 26861 non-null float64 \n", + " 20 staking 26861 non-null object \n", + "dtypes: bool(3), datetime64[ns, UTC](1), float64(8), int64(2), object(7)\n", + "memory usage: 3.8+ MB\n" + ] + } + ], + "source": [ + "all_trades.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "latest = max(all_trades.creation_timestamp)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1729903200" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "int(latest.timestamp())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "\n", + "def extract_title(text: str) -> str:\n", + " question = re.search('\"([^\"]+)\"', text).group(1)\n", + " return question" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "tools[\"title\"] = tools.apply(lambda x: extract_title(x.prompt_request), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
request_idrequest_blockprompt_requesttoolnoncetrader_addressdeliver_blockerrorerror_messageprompt_response...confidenceinfo_utilityvotewin_probabilitymarket_creatortitlecurrentAnswerrequest_timerequest_month_yearrequest_month_year_week
5822220491272998867787999910776935393408411702877...36720164Please take over the role of a Data Scientist ...superforcaster9d205bb8-09d3-42f9-aa92-c58a31a4082e0xd127e0434a284e04034b0e73e891c501f583ad3d367201740None\\nYou are an advanced AI system which has been......0.60.4No0.85quickstartWill any regulatory body impose penalties on C...None2024-10-27 16:05:202024-102024-10-21/2024-10-27
41647954746357421406217625419969909404056225427053...36701081Please take over the role of a Data Scientist ...superforcaster1b609b7e-e0d2-4bb8-ad6b-7d0e6e6610b50x8dd0f0f64e575a356545d9ed096122a1887e64bf367010990None\\nYou are an advanced AI system which has been......0.60.4No0.70quickstartWill any new human rights organizations public...None2024-10-26 12:40:252024-102024-10-21/2024-10-27
60544565494807135187962212494187805259514627250416...36703764Please take over the role of a Data Scientist ...superforcasterc8cabb44-c1e5-4e35-a7a3-005fb3eecc0e0xacb24b20805c6e475d2c17edb2a997c1ba47de79367037770None\\nYou are an advanced AI system which has been......0.60.3No0.90quickstartWill any cybersecurity firm publicly announce ...None2024-10-26 16:31:552024-102024-10-21/2024-10-27
72657132897945614190217838117925907875446836020321...36702802Please take over the role of a Data Scientist ...superforcastera8e456c8-b87a-41fd-ba17-a69d9b2dc1950xacb24b20805c6e475d2c17edb2a997c1ba47de79367028150None\\nYou are an advanced AI system which has been......0.60.4No0.70quickstartWill any of the Caspian Sea countries publicly...None2024-10-26 15:09:352024-102024-10-21/2024-10-27
91492657251108286481564605188898254109837583373485...36708789Please take over the role of a Data Scientist ...superforcaster631c87dc-f7b7-42c7-a7db-4248834bc9410xd11e4a1aa52a75ee7186da44b4c84555f0b9aa95367088040None\\nYou are an advanced AI system which has been......0.60.3No0.95quickstartWill any cybersecurity firm publicly announce ...None2024-10-26 23:43:502024-102024-10-21/2024-10-27
\n", + "

5 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " request_id request_block \\\n", + "582 2220491272998867787999910776935393408411702877... 36720164 \n", + "4164 7954746357421406217625419969909404056225427053... 36701081 \n", + "6054 4565494807135187962212494187805259514627250416... 36703764 \n", + "7265 7132897945614190217838117925907875446836020321... 36702802 \n", + "9149 2657251108286481564605188898254109837583373485... 36708789 \n", + "\n", + " prompt_request tool \\\n", + "582 Please take over the role of a Data Scientist ... superforcaster \n", + "4164 Please take over the role of a Data Scientist ... superforcaster \n", + "6054 Please take over the role of a Data Scientist ... superforcaster \n", + "7265 Please take over the role of a Data Scientist ... superforcaster \n", + "9149 Please take over the role of a Data Scientist ... superforcaster \n", + "\n", + " nonce \\\n", + "582 9d205bb8-09d3-42f9-aa92-c58a31a4082e \n", + "4164 1b609b7e-e0d2-4bb8-ad6b-7d0e6e6610b5 \n", + "6054 c8cabb44-c1e5-4e35-a7a3-005fb3eecc0e \n", + "7265 a8e456c8-b87a-41fd-ba17-a69d9b2dc195 \n", + "9149 631c87dc-f7b7-42c7-a7db-4248834bc941 \n", + "\n", + " trader_address deliver_block error \\\n", + "582 0xd127e0434a284e04034b0e73e891c501f583ad3d 36720174 0 \n", + "4164 0x8dd0f0f64e575a356545d9ed096122a1887e64bf 36701099 0 \n", + "6054 0xacb24b20805c6e475d2c17edb2a997c1ba47de79 36703777 0 \n", + "7265 0xacb24b20805c6e475d2c17edb2a997c1ba47de79 36702815 0 \n", + "9149 0xd11e4a1aa52a75ee7186da44b4c84555f0b9aa95 36708804 0 \n", + "\n", + " error_message prompt_response ... \\\n", + "582 None \\nYou are an advanced AI system which has been... ... \n", + "4164 None \\nYou are an advanced AI system which has been... ... \n", + "6054 None \\nYou are an advanced AI system which has been... ... \n", + "7265 None \\nYou are an advanced AI system which has been... ... \n", + "9149 None \\nYou are an advanced AI system which has been... ... \n", + "\n", + " confidence info_utility vote win_probability market_creator \\\n", + "582 0.6 0.4 No 0.85 quickstart \n", + "4164 0.6 0.4 No 0.70 quickstart \n", + "6054 0.6 0.3 No 0.90 quickstart \n", + "7265 0.6 0.4 No 0.70 quickstart \n", + "9149 0.6 0.3 No 0.95 quickstart \n", + "\n", + " title currentAnswer \\\n", + "582 Will any regulatory body impose penalties on C... None \n", + "4164 Will any new human rights organizations public... None \n", + "6054 Will any cybersecurity firm publicly announce ... None \n", + "7265 Will any of the Caspian Sea countries publicly... None \n", + "9149 Will any cybersecurity firm publicly announce ... None \n", + "\n", + " request_time request_month_year request_month_year_week \n", + "582 2024-10-27 16:05:20 2024-10 2024-10-21/2024-10-27 \n", + "4164 2024-10-26 12:40:25 2024-10 2024-10-21/2024-10-27 \n", + "6054 2024-10-26 16:31:55 2024-10 2024-10-21/2024-10-27 \n", + "7265 2024-10-26 15:09:35 2024-10 2024-10-21/2024-10-27 \n", + "9149 2024-10-26 23:43:50 2024-10 2024-10-21/2024-10-27 \n", + "\n", + "[5 rows x 23 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tools_superforcaster.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "superforcaster_trades = pd.merge(all_trades, tools_superforcaster, on=[\"title\",\"trader_address\"], how=\"inner\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 0 entries\n", + "Data columns (total 42 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 trader_address 0 non-null object \n", + " 1 market_creator_x 0 non-null object \n", + " 2 trade_id 0 non-null object \n", + " 3 creation_timestamp 0 non-null datetime64[ns, UTC]\n", + " 4 title 0 non-null object \n", + " 5 market_status 0 non-null object \n", + " 6 collateral_amount 0 non-null float64 \n", + " 7 outcome_index 0 non-null object \n", + " 8 trade_fee_amount 0 non-null float64 \n", + " 9 outcomes_tokens_traded 0 non-null float64 \n", + " 10 current_answer 0 non-null int64 \n", + " 11 is_invalid 0 non-null bool \n", + " 12 winning_trade 0 non-null bool \n", + " 13 earnings 0 non-null float64 \n", + " 14 redeemed 0 non-null bool \n", + " 15 redeemed_amount 0 non-null float64 \n", + " 16 num_mech_calls 0 non-null int64 \n", + " 17 mech_fee_amount 0 non-null float64 \n", + " 18 net_earnings 0 non-null float64 \n", + " 19 roi 0 non-null float64 \n", + " 20 staking 0 non-null object \n", + " 21 request_id 0 non-null object \n", + " 22 request_block 0 non-null object \n", + " 23 prompt_request 0 non-null object \n", + " 24 tool 0 non-null object \n", + " 25 nonce 0 non-null object \n", + " 26 deliver_block 0 non-null object \n", + " 27 error 0 non-null int64 \n", + " 28 error_message 0 non-null object \n", + " 29 prompt_response 0 non-null object \n", + " 30 mech_address 0 non-null object \n", + " 31 p_yes 0 non-null float64 \n", + " 32 p_no 0 non-null float64 \n", + " 33 confidence 0 non-null float64 \n", + " 34 info_utility 0 non-null float64 \n", + " 35 vote 0 non-null object \n", + " 36 win_probability 0 non-null float64 \n", + " 37 market_creator_y 0 non-null object \n", + " 38 currentAnswer 0 non-null object \n", + " 39 request_time 0 non-null object \n", + " 40 request_month_year 0 non-null object \n", + " 41 request_month_year_week 0 non-null object \n", + "dtypes: bool(3), datetime64[ns, UTC](1), float64(13), int64(3), object(22)\n", + "memory usage: 132.0+ bytes\n" + ] + } + ], + "source": [ + "superforcaster_trades.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Will the emergency public warning tests planned by Russia on Wednesday be successful?'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_trades.iloc[0].title" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Will any regulatory body impose penalties on CrowdStrike regarding the software update issue by October 31, 2024?'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tools_superforcaster.iloc[0].title" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "superforcaster_titles = tools_superforcaster.title.unique()\n", + "for title in superforcaster_titles:\n", + " all_trades_title = all_trades.loc[all_trades[\"title\"]== title]\n", + " superforcaster_data = tools_superforcaster.loc[tools_superforcaster[\"title\"]==title]\n", + " matched_data = pd.merge(all_trades_title,superforcaster_data, on=[\"trader_address\"], how=\"inner\")\n", + " if len(matched_data) > 0 :\n", + " print(matched_data.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "hf_dashboards", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}