{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Import"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import gc\n",
"sns.set_style(\"darkgrid\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"pd.set_option('display.float_format', lambda x: '%.3f' % x)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Preparation"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"tools = pd.read_parquet('../data/tools.parquet')\n",
"fpmms = pd.read_parquet('../data/fpmms.parquet')\n",
"summary_traders = pd.read_parquet('../data/summary_profitability.parquet')\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"all_trades = pd.read_parquet('../data/all_trades_profitability.parquet')"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" trader_address | \n",
" trade_id | \n",
" creation_timestamp | \n",
" title | \n",
" market_status | \n",
" collateral_amount | \n",
" outcome_index | \n",
" trade_fee_amount | \n",
" outcomes_tokens_traded | \n",
" current_answer | \n",
" is_invalid | \n",
" winning_trade | \n",
" earnings | \n",
" redeemed | \n",
" redeemed_amount | \n",
" num_mech_calls | \n",
" mech_fee_amount | \n",
" net_earnings | \n",
" roi | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0x022b36c50b85b8ae7addfb8a35d76c59d5814834 | \n",
" 0x017947579ab51313c31fe1cc562c0f1726ec09c90x02... | \n",
" 2024-05-19 01:26:30+00:00 | \n",
" Will Google's Pixel 9 lineup be officially rel... | \n",
" CLOSED | \n",
" 0.638 | \n",
" 1 | \n",
" 0.013 | \n",
" 1.206 | \n",
" 1 | \n",
" False | \n",
" True | \n",
" 1.206 | \n",
" True | \n",
" 1.206 | \n",
" 0 | \n",
" 0.000 | \n",
" 0.556 | \n",
" 0.854 | \n",
"
\n",
" \n",
" 1 | \n",
" 0x022b36c50b85b8ae7addfb8a35d76c59d5814834 | \n",
" 0x027f6bc849e273477f4a63085192714084917fcc0x02... | \n",
" 2024-06-12 01:16:55+00:00 | \n",
" Will the 2D-animated Paramount Plus show 'Tale... | \n",
" CLOSED | \n",
" 1.000 | \n",
" 1 | \n",
" 0.020 | \n",
" 1.840 | \n",
" 1 | \n",
" False | \n",
" True | \n",
" 1.840 | \n",
" True | \n",
" 1.840 | \n",
" 3 | \n",
" 0.030 | \n",
" 0.790 | \n",
" 0.752 | \n",
"
\n",
" \n",
" 2 | \n",
" 0x022b36c50b85b8ae7addfb8a35d76c59d5814834 | \n",
" 0x027f6bc849e273477f4a63085192714084917fcc0x02... | \n",
" 2024-06-12 15:08:00+00:00 | \n",
" Will the 2D-animated Paramount Plus show 'Tale... | \n",
" CLOSED | \n",
" 0.800 | \n",
" 1 | \n",
" 0.016 | \n",
" 1.518 | \n",
" 1 | \n",
" False | \n",
" True | \n",
" 1.518 | \n",
" True | \n",
" 1.518 | \n",
" 3 | \n",
" 0.030 | \n",
" 0.672 | \n",
" 0.795 | \n",
"
\n",
" \n",
" 3 | \n",
" 0x022b36c50b85b8ae7addfb8a35d76c59d5814834 | \n",
" 0x027f6bc849e273477f4a63085192714084917fcc0x02... | \n",
" 2024-06-13 07:22:55+00:00 | \n",
" Will the 2D-animated Paramount Plus show 'Tale... | \n",
" CLOSED | \n",
" 0.456 | \n",
" 0 | \n",
" 0.009 | \n",
" 1.003 | \n",
" 1 | \n",
" False | \n",
" False | \n",
" 0.000 | \n",
" True | \n",
" 0.000 | \n",
" 3 | \n",
" 0.030 | \n",
" -0.495 | \n",
" -1.000 | \n",
"
\n",
" \n",
" 4 | \n",
" 0x022b36c50b85b8ae7addfb8a35d76c59d5814834 | \n",
" 0x027f6bc849e273477f4a63085192714084917fcc0x02... | \n",
" 2024-06-14 03:01:20+00:00 | \n",
" Will the 2D-animated Paramount Plus show 'Tale... | \n",
" CLOSED | \n",
" 0.704 | \n",
" 0 | \n",
" 0.014 | \n",
" 1.198 | \n",
" 1 | \n",
" False | \n",
" False | \n",
" 0.000 | \n",
" True | \n",
" 0.000 | \n",
" 3 | \n",
" 0.030 | \n",
" -0.748 | \n",
" -1.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" trader_address \\\n",
"0 0x022b36c50b85b8ae7addfb8a35d76c59d5814834 \n",
"1 0x022b36c50b85b8ae7addfb8a35d76c59d5814834 \n",
"2 0x022b36c50b85b8ae7addfb8a35d76c59d5814834 \n",
"3 0x022b36c50b85b8ae7addfb8a35d76c59d5814834 \n",
"4 0x022b36c50b85b8ae7addfb8a35d76c59d5814834 \n",
"\n",
" trade_id \\\n",
"0 0x017947579ab51313c31fe1cc562c0f1726ec09c90x02... \n",
"1 0x027f6bc849e273477f4a63085192714084917fcc0x02... \n",
"2 0x027f6bc849e273477f4a63085192714084917fcc0x02... \n",
"3 0x027f6bc849e273477f4a63085192714084917fcc0x02... \n",
"4 0x027f6bc849e273477f4a63085192714084917fcc0x02... \n",
"\n",
" creation_timestamp \\\n",
"0 2024-05-19 01:26:30+00:00 \n",
"1 2024-06-12 01:16:55+00:00 \n",
"2 2024-06-12 15:08:00+00:00 \n",
"3 2024-06-13 07:22:55+00:00 \n",
"4 2024-06-14 03:01:20+00:00 \n",
"\n",
" title market_status \\\n",
"0 Will Google's Pixel 9 lineup be officially rel... CLOSED \n",
"1 Will the 2D-animated Paramount Plus show 'Tale... CLOSED \n",
"2 Will the 2D-animated Paramount Plus show 'Tale... CLOSED \n",
"3 Will the 2D-animated Paramount Plus show 'Tale... CLOSED \n",
"4 Will the 2D-animated Paramount Plus show 'Tale... CLOSED \n",
"\n",
" collateral_amount outcome_index trade_fee_amount outcomes_tokens_traded \\\n",
"0 0.638 1 0.013 1.206 \n",
"1 1.000 1 0.020 1.840 \n",
"2 0.800 1 0.016 1.518 \n",
"3 0.456 0 0.009 1.003 \n",
"4 0.704 0 0.014 1.198 \n",
"\n",
" current_answer is_invalid winning_trade earnings redeemed \\\n",
"0 1 False True 1.206 True \n",
"1 1 False True 1.840 True \n",
"2 1 False True 1.518 True \n",
"3 1 False False 0.000 True \n",
"4 1 False False 0.000 True \n",
"\n",
" redeemed_amount num_mech_calls mech_fee_amount net_earnings roi \n",
"0 1.206 0 0.000 0.556 0.854 \n",
"1 1.840 3 0.030 0.790 0.752 \n",
"2 1.518 3 0.030 0.672 0.795 \n",
"3 0.000 3 0.030 -0.495 -1.000 \n",
"4 0.000 3 0.030 -0.748 -1.000 "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_trades.head()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" trader_address | \n",
" trade_id | \n",
" creation_timestamp | \n",
" title | \n",
" market_status | \n",
" collateral_amount | \n",
" outcome_index | \n",
" trade_fee_amount | \n",
" outcomes_tokens_traded | \n",
" current_answer | \n",
" is_invalid | \n",
" winning_trade | \n",
" earnings | \n",
" redeemed | \n",
" redeemed_amount | \n",
" num_mech_calls | \n",
" mech_fee_amount | \n",
" net_earnings | \n",
" roi | \n",
"
\n",
" \n",
" \n",
" \n",
" 18936 | \n",
" 0xc8929dd39bb5f685435ab16345929a47caacc46b | \n",
" 0xd068383c5d6d1466d10db660f33524c2725f8fb60xc8... | \n",
" 2024-05-22 19:05:00+00:00 | \n",
" Will Elon Musk's Neuralink successfully test i... | \n",
" CLOSED | \n",
" 0.100 | \n",
" 1 | \n",
" 0.002 | \n",
" 0.200 | \n",
" 1 | \n",
" False | \n",
" True | \n",
" 0.200 | \n",
" False | \n",
" 0.000 | \n",
" 2 | \n",
" 0.020 | \n",
" 0.078 | \n",
" 0.640 | \n",
"
\n",
" \n",
" 18937 | \n",
" 0xc8929dd39bb5f685435ab16345929a47caacc46b | \n",
" 0xec5578e95c71ddbad6aabf8517dcd35cf53da4970xc8... | \n",
" 2024-05-22 17:57:35+00:00 | \n",
" Will Kevin Spacey return to acting by 25 May 2... | \n",
" CLOSED | \n",
" 0.160 | \n",
" 0 | \n",
" 0.003 | \n",
" 0.309 | \n",
" 0 | \n",
" False | \n",
" True | \n",
" 0.309 | \n",
" False | \n",
" 0.000 | \n",
" 2 | \n",
" 0.020 | \n",
" 0.126 | \n",
" 0.686 | \n",
"
\n",
" \n",
" 18938 | \n",
" 0xc8929dd39bb5f685435ab16345929a47caacc46b | \n",
" 0xf2c74ef39065ee2e239bf8551aedddd6b2d6add70xc8... | \n",
" 2024-05-22 20:00:05+00:00 | \n",
" Will Donald Trump testify in the hush money ca... | \n",
" CLOSED | \n",
" 0.100 | \n",
" 1 | \n",
" 0.002 | \n",
" 0.200 | \n",
" 1 | \n",
" False | \n",
" True | \n",
" 0.200 | \n",
" False | \n",
" 0.000 | \n",
" 3 | \n",
" 0.030 | \n",
" 0.068 | \n",
" 0.518 | \n",
"
\n",
" \n",
" 18939 | \n",
" 0xc8929dd39bb5f685435ab16345929a47caacc46b | \n",
" 0xfdf1a762eaae0a4472599f26aeafeae043b37d360xc8... | \n",
" 2024-05-22 19:42:35+00:00 | \n",
" Will a new Marvel Cinematic Universe (MCU) mov... | \n",
" CLOSED | \n",
" 0.100 | \n",
" 0 | \n",
" 0.002 | \n",
" 0.211 | \n",
" 1 | \n",
" False | \n",
" False | \n",
" 0.000 | \n",
" False | \n",
" 0.000 | \n",
" 2 | \n",
" 0.020 | \n",
" -0.122 | \n",
" -1.000 | \n",
"
\n",
" \n",
" 18940 | \n",
" 0x7b2e78d4dfaaba045a167a70da285e30e8fca196 | \n",
" 0xaf8fa4b8e04bbbee6903fede1d27b3aad25b468e0x7b... | \n",
" 2024-07-05 09:10:40+00:00 | \n",
" Will Vice President Kamala Harris be the Democ... | \n",
" CLOSED | \n",
" 1.000 | \n",
" 1 | \n",
" 0.020 | \n",
" 1.717 | \n",
" 1 | \n",
" False | \n",
" True | \n",
" 1.717 | \n",
" False | \n",
" 0.000 | \n",
" 0 | \n",
" 0.000 | \n",
" 0.697 | \n",
" 0.684 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" trader_address \\\n",
"18936 0xc8929dd39bb5f685435ab16345929a47caacc46b \n",
"18937 0xc8929dd39bb5f685435ab16345929a47caacc46b \n",
"18938 0xc8929dd39bb5f685435ab16345929a47caacc46b \n",
"18939 0xc8929dd39bb5f685435ab16345929a47caacc46b \n",
"18940 0x7b2e78d4dfaaba045a167a70da285e30e8fca196 \n",
"\n",
" trade_id \\\n",
"18936 0xd068383c5d6d1466d10db660f33524c2725f8fb60xc8... \n",
"18937 0xec5578e95c71ddbad6aabf8517dcd35cf53da4970xc8... \n",
"18938 0xf2c74ef39065ee2e239bf8551aedddd6b2d6add70xc8... \n",
"18939 0xfdf1a762eaae0a4472599f26aeafeae043b37d360xc8... \n",
"18940 0xaf8fa4b8e04bbbee6903fede1d27b3aad25b468e0x7b... \n",
"\n",
" creation_timestamp \\\n",
"18936 2024-05-22 19:05:00+00:00 \n",
"18937 2024-05-22 17:57:35+00:00 \n",
"18938 2024-05-22 20:00:05+00:00 \n",
"18939 2024-05-22 19:42:35+00:00 \n",
"18940 2024-07-05 09:10:40+00:00 \n",
"\n",
" title market_status \\\n",
"18936 Will Elon Musk's Neuralink successfully test i... CLOSED \n",
"18937 Will Kevin Spacey return to acting by 25 May 2... CLOSED \n",
"18938 Will Donald Trump testify in the hush money ca... CLOSED \n",
"18939 Will a new Marvel Cinematic Universe (MCU) mov... CLOSED \n",
"18940 Will Vice President Kamala Harris be the Democ... CLOSED \n",
"\n",
" collateral_amount outcome_index trade_fee_amount \\\n",
"18936 0.100 1 0.002 \n",
"18937 0.160 0 0.003 \n",
"18938 0.100 1 0.002 \n",
"18939 0.100 0 0.002 \n",
"18940 1.000 1 0.020 \n",
"\n",
" outcomes_tokens_traded current_answer is_invalid winning_trade \\\n",
"18936 0.200 1 False True \n",
"18937 0.309 0 False True \n",
"18938 0.200 1 False True \n",
"18939 0.211 1 False False \n",
"18940 1.717 1 False True \n",
"\n",
" earnings redeemed redeemed_amount num_mech_calls mech_fee_amount \\\n",
"18936 0.200 False 0.000 2 0.020 \n",
"18937 0.309 False 0.000 2 0.020 \n",
"18938 0.200 False 0.000 3 0.030 \n",
"18939 0.000 False 0.000 2 0.020 \n",
"18940 1.717 False 0.000 0 0.000 \n",
"\n",
" net_earnings roi \n",
"18936 0.078 0.640 \n",
"18937 0.126 0.686 \n",
"18938 0.068 0.518 \n",
"18939 -0.122 -1.000 \n",
"18940 0.697 0.684 "
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_trades.tail()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 17067 entries, 0 to 17066\n",
"Data columns (total 19 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 trader_address 17067 non-null object \n",
" 1 trade_id 17067 non-null object \n",
" 2 creation_timestamp 17067 non-null datetime64[ns, UTC]\n",
" 3 title 17067 non-null object \n",
" 4 market_status 17067 non-null object \n",
" 5 collateral_amount 17067 non-null float64 \n",
" 6 outcome_index 17067 non-null object \n",
" 7 trade_fee_amount 17067 non-null float64 \n",
" 8 outcomes_tokens_traded 17067 non-null float64 \n",
" 9 current_answer 17067 non-null int64 \n",
" 10 is_invalid 17067 non-null bool \n",
" 11 winning_trade 17067 non-null bool \n",
" 12 earnings 17067 non-null float64 \n",
" 13 redeemed 17067 non-null bool \n",
" 14 redeemed_amount 17067 non-null float64 \n",
" 15 num_mech_calls 17067 non-null int64 \n",
" 16 mech_fee_amount 17067 non-null float64 \n",
" 17 net_earnings 17067 non-null float64 \n",
" 18 roi 17067 non-null float64 \n",
"dtypes: bool(3), datetime64[ns, UTC](1), float64(8), int64(2), object(5)\n",
"memory usage: 2.1+ MB\n"
]
}
],
"source": [
"all_trades.info()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timestamp('2024-05-12 00:04:25+0000', tz='UTC')"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_trades.creation_timestamp.min()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timestamp('2024-07-14 01:09:10+0000', tz='UTC')"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_trades.creation_timestamp.max()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timestamp('2024-07-08 02:29:40+0000', tz='UTC')"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_trades.creation_timestamp.max()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(339443, 22)\n"
]
},
{
"data": {
"text/plain": [
"(28911882, 34138429)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(tools.shape)\n",
"tools.request_block.min(), tools.request_block.max()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(89518, 22)\n"
]
},
{
"data": {
"text/plain": [
"(33989007, 34993418)"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(tools.shape)\n",
"tools.request_block.min(), tools.request_block.max()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. Error analysis"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Error analysis only starts from block 321. We weren't capturing the error message prior"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['prediction-online', 'prediction-offline', 'normal-sme-generator',\n",
" 'strong-sme-generator', 'prediction-online-sme',\n",
" 'prediction-offline-sme', 'claude-prediction-offline', 'openai',\n",
" 'claude-prediction-online',\n",
" 'prediction-sentence-embedding-conservative',\n",
" 'prediction-online-summarized-info',\n",
" 'prediction-sentence-embedding-bold',\n",
" 'prediction-online-sum-url-content',\n",
" 'openai-gpt-3.5-turbo-instruct',\n",
" 'resolve-market-reasoning-gpt-3.5-turbo',\n",
" 'resolve-market-reasoning-gpt-4', 'prediction-request-rag',\n",
" 'prediction-request-reasoning',\n",
" 'prediction-request-reasoning-claude', 'prediction-url-cot-claude',\n",
" 'prediction-request-rag-claude', 'native_transfer_request',\n",
" 'native_transfer'], dtype=object)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tools.tool.unique()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 339443 entries, 0 to 339442\n",
"Data columns (total 22 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 request_id 339443 non-null object \n",
" 1 request_block 339443 non-null int64 \n",
" 2 prompt_request 339443 non-null object \n",
" 3 tool 339443 non-null object \n",
" 4 nonce 339443 non-null object \n",
" 5 trader_address 339443 non-null object \n",
" 6 deliver_block 339443 non-null int64 \n",
" 7 error 339440 non-null float64\n",
" 8 error_message 56715 non-null object \n",
" 9 prompt_response 252711 non-null object \n",
" 10 mech_address 286960 non-null object \n",
" 11 p_yes 282717 non-null float64\n",
" 12 p_no 282717 non-null float64\n",
" 13 confidence 282717 non-null float64\n",
" 14 info_utility 282717 non-null float64\n",
" 15 vote 256610 non-null object \n",
" 16 win_probability 282717 non-null float64\n",
" 17 title 329340 non-null object \n",
" 18 currentAnswer 267690 non-null object \n",
" 19 request_time 339443 non-null object \n",
" 20 request_month_year 339443 non-null object \n",
" 21 request_month_year_week 339443 non-null object \n",
"dtypes: float64(6), int64(2), object(14)\n",
"memory usage: 57.0+ MB\n"
]
}
],
"source": [
"tools.info()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'2023-07-12 11:58:40'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tools.request_time.min()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"inc_tools = [\n",
" 'prediction-online', \n",
" 'prediction-offline', \n",
" 'claude-prediction-online', \n",
" 'claude-prediction-offline', \n",
" 'prediction-offline-sme',\n",
" 'prediction-online-sme',\n",
" 'prediction-request-rag',\n",
" 'prediction-request-reasoning',\n",
" 'prediction-url-cot-claude', \n",
" 'prediction-request-rag-claude',\n",
" 'prediction-request-reasoning-claude'\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# include only tools that are in inc_tools\n",
"tools_inc = tools[tools['tool'].isin(inc_tools)]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack().fillna(0).reset_index()\n",
"error[\"error_perc\"] = (error[1] / (error[0] + error[1])) * 100\n",
"error[\"total_requests\"] = error[0] + error[1]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"error_total = (\n",
" error.groupby(\"request_month_year_week\")\n",
" .agg({\"total_requests\": \"sum\", 1: \"sum\", 0: \"sum\"})\n",
" .reset_index()\n",
")\n",
"error_total[\"error_perc\"] = (error_total[1] / error_total[\"total_requests\"]) * 100\n",
"error_total.columns = error_total.columns.astype(str)\n",
"error_total[\"error_perc\"] = error_total[\"error_perc\"].apply(lambda x: round(x, 4))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" error | \n",
" request_month_year_week | \n",
" total_requests | \n",
" 1 | \n",
" 0 | \n",
" error_perc | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2023-07-10/2023-07-16 | \n",
" 44.000 | \n",
" 31.000 | \n",
" 13.000 | \n",
" 70.454 | \n",
"
\n",
" \n",
" 1 | \n",
" 2023-07-17/2023-07-23 | \n",
" 56.000 | \n",
" 0.000 | \n",
" 56.000 | \n",
" 0.000 | \n",
"
\n",
" \n",
" 2 | \n",
" 2023-07-24/2023-07-30 | \n",
" 48.000 | \n",
" 5.000 | \n",
" 43.000 | \n",
" 10.417 | \n",
"
\n",
" \n",
" 3 | \n",
" 2023-07-31/2023-08-06 | \n",
" 922.000 | \n",
" 203.000 | \n",
" 719.000 | \n",
" 22.017 | \n",
"
\n",
" \n",
" 4 | \n",
" 2023-08-07/2023-08-13 | \n",
" 313.000 | \n",
" 9.000 | \n",
" 304.000 | \n",
" 2.875 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"error request_month_year_week total_requests 1 0 error_perc\n",
"0 2023-07-10/2023-07-16 44.000 31.000 13.000 70.454\n",
"1 2023-07-17/2023-07-23 56.000 0.000 56.000 0.000\n",
"2 2023-07-24/2023-07-30 48.000 5.000 43.000 10.417\n",
"3 2023-07-31/2023-08-06 922.000 203.000 719.000 22.017\n",
"4 2023-08-07/2023-08-13 313.000 9.000 304.000 2.875"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"error_total.head()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, 'Error Percentage by Month-Year')"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(10, 6))\n",
"ax=sns.lineplot(error_total, x='request_month_year_week', y='error_perc',color=\"green\")\n",
"ax.set_xticklabels(ax.get_xticklabels(), rotation=90)\n",
"plt.xlabel('Month-Year')\n",
"plt.ylabel('Error Percentage')\n",
"plt.title('Error Percentage by Month-Year')"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" error | \n",
" error_perc | \n",
" total_requests | \n",
"
\n",
" \n",
" tool | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" prediction-request-rag-claude | \n",
" 13.498 | \n",
" 1704.000 | \n",
"
\n",
" \n",
" prediction-request-rag | \n",
" 12.041 | \n",
" 490.000 | \n",
"
\n",
" \n",
" prediction-online-sme | \n",
" 9.076 | \n",
" 2457.000 | \n",
"
\n",
" \n",
" prediction-online | \n",
" 4.769 | \n",
" 2516.000 | \n",
"
\n",
" \n",
" prediction-request-reasoning | \n",
" 3.247 | \n",
" 5883.000 | \n",
"
\n",
" \n",
" prediction-request-reasoning-claude | \n",
" 1.408 | \n",
" 639.000 | \n",
"
\n",
" \n",
" claude-prediction-offline | \n",
" 0.000 | \n",
" 107.000 | \n",
"
\n",
" \n",
" claude-prediction-online | \n",
" 0.000 | \n",
" 241.000 | \n",
"
\n",
" \n",
" prediction-offline | \n",
" 0.000 | \n",
" 1453.000 | \n",
"
\n",
" \n",
" prediction-url-cot-claude | \n",
" 0.000 | \n",
" 293.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"error error_perc total_requests\n",
"tool \n",
"prediction-request-rag-claude 13.498 1704.000\n",
"prediction-request-rag 12.041 490.000\n",
"prediction-online-sme 9.076 2457.000\n",
"prediction-online 4.769 2516.000\n",
"prediction-request-reasoning 3.247 5883.000\n",
"prediction-request-reasoning-claude 1.408 639.000\n",
"claude-prediction-offline 0.000 107.000\n",
"claude-prediction-online 0.000 241.000\n",
"prediction-offline 0.000 1453.000\n",
"prediction-url-cot-claude 0.000 293.000"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"error[error['request_month_year_week'] == '2024-05-20/2024-05-26'].groupby('tool').agg({'error_perc': 'mean', 'total_requests': 'sum'}).sort_values('error_perc', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" error | \n",
" tool | \n",
" request_month_year_week | \n",
" 0.000 | \n",
" 1.000 | \n",
" error_perc | \n",
" total_requests | \n",
"
\n",
" \n",
" \n",
" \n",
" 19 | \n",
" claude-prediction-offline | \n",
" 2024-05-20/2024-05-26 | \n",
" 107.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 107.000 | \n",
"
\n",
" \n",
" 18 | \n",
" claude-prediction-offline | \n",
" 2024-05-13/2024-05-19 | \n",
" 203.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 203.000 | \n",
"
\n",
" \n",
" 17 | \n",
" claude-prediction-offline | \n",
" 2024-05-06/2024-05-12 | \n",
" 156.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 156.000 | \n",
"
\n",
" \n",
" 16 | \n",
" claude-prediction-offline | \n",
" 2024-04-29/2024-05-05 | \n",
" 531.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 531.000 | \n",
"
\n",
" \n",
" 15 | \n",
" claude-prediction-offline | \n",
" 2024-04-22/2024-04-28 | \n",
" 816.000 | \n",
" 5.000 | \n",
" 0.609 | \n",
" 821.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"error tool request_month_year_week 0.000 1.000 \\\n",
"19 claude-prediction-offline 2024-05-20/2024-05-26 107.000 0.000 \n",
"18 claude-prediction-offline 2024-05-13/2024-05-19 203.000 0.000 \n",
"17 claude-prediction-offline 2024-05-06/2024-05-12 156.000 0.000 \n",
"16 claude-prediction-offline 2024-04-29/2024-05-05 531.000 0.000 \n",
"15 claude-prediction-offline 2024-04-22/2024-04-28 816.000 5.000 \n",
"\n",
"error error_perc total_requests \n",
"19 0.000 107.000 \n",
"18 0.000 203.000 \n",
"17 0.000 156.000 \n",
"16 0.000 531.000 \n",
"15 0.609 821.000 "
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# clude-prediction-offline\n",
"claude_prediction_offline = error[error['tool'] == 'claude-prediction-offline']\n",
"claude_prediction_offline = claude_prediction_offline.sort_values('request_month_year_week', ascending=False)\n",
"claude_prediction_offline.head()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" error | \n",
" tool | \n",
" request_month_year_week | \n",
" 0.000 | \n",
" 1.000 | \n",
" error_perc | \n",
" total_requests | \n",
"
\n",
" \n",
" \n",
" \n",
" 54 | \n",
" claude-prediction-online | \n",
" 2024-05-20/2024-05-26 | \n",
" 241.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 241.000 | \n",
"
\n",
" \n",
" 53 | \n",
" claude-prediction-online | \n",
" 2024-05-13/2024-05-19 | \n",
" 37.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 37.000 | \n",
"
\n",
" \n",
" 52 | \n",
" claude-prediction-online | \n",
" 2024-05-06/2024-05-12 | \n",
" 176.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 176.000 | \n",
"
\n",
" \n",
" 51 | \n",
" claude-prediction-online | \n",
" 2024-04-29/2024-05-05 | \n",
" 192.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 192.000 | \n",
"
\n",
" \n",
" 50 | \n",
" claude-prediction-online | \n",
" 2024-04-22/2024-04-28 | \n",
" 1937.000 | \n",
" 155.000 | \n",
" 7.409 | \n",
" 2092.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"error tool request_month_year_week 0.000 1.000 \\\n",
"54 claude-prediction-online 2024-05-20/2024-05-26 241.000 0.000 \n",
"53 claude-prediction-online 2024-05-13/2024-05-19 37.000 0.000 \n",
"52 claude-prediction-online 2024-05-06/2024-05-12 176.000 0.000 \n",
"51 claude-prediction-online 2024-04-29/2024-05-05 192.000 0.000 \n",
"50 claude-prediction-online 2024-04-22/2024-04-28 1937.000 155.000 \n",
"\n",
"error error_perc total_requests \n",
"54 0.000 241.000 \n",
"53 0.000 37.000 \n",
"52 0.000 176.000 \n",
"51 0.000 192.000 \n",
"50 7.409 2092.000 "
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# claude-prediction-online\n",
"claude_prediction_online = error[error['tool'] == 'claude-prediction-online']\n",
"claude_prediction_online = claude_prediction_online.sort_values('request_month_year_week', ascending=False)\n",
"claude_prediction_online.head()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" error | \n",
" tool | \n",
" request_month_year_week | \n",
" 0.000 | \n",
" 1.000 | \n",
" error_perc | \n",
" total_requests | \n",
"
\n",
" \n",
" \n",
" \n",
" 84 | \n",
" prediction-offline | \n",
" 2024-05-20/2024-05-26 | \n",
" 1453.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 1453.000 | \n",
"
\n",
" \n",
" 83 | \n",
" prediction-offline | \n",
" 2024-05-13/2024-05-19 | \n",
" 4270.000 | \n",
" 1.000 | \n",
" 0.023 | \n",
" 4271.000 | \n",
"
\n",
" \n",
" 82 | \n",
" prediction-offline | \n",
" 2024-05-06/2024-05-12 | \n",
" 2500.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 2500.000 | \n",
"
\n",
" \n",
" 81 | \n",
" prediction-offline | \n",
" 2024-04-29/2024-05-05 | \n",
" 1825.000 | \n",
" 2.000 | \n",
" 0.109 | \n",
" 1827.000 | \n",
"
\n",
" \n",
" 80 | \n",
" prediction-offline | \n",
" 2024-04-22/2024-04-28 | \n",
" 381.000 | \n",
" 375.000 | \n",
" 49.603 | \n",
" 756.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"error tool request_month_year_week 0.000 1.000 \\\n",
"84 prediction-offline 2024-05-20/2024-05-26 1453.000 0.000 \n",
"83 prediction-offline 2024-05-13/2024-05-19 4270.000 1.000 \n",
"82 prediction-offline 2024-05-06/2024-05-12 2500.000 0.000 \n",
"81 prediction-offline 2024-04-29/2024-05-05 1825.000 2.000 \n",
"80 prediction-offline 2024-04-22/2024-04-28 381.000 375.000 \n",
"\n",
"error error_perc total_requests \n",
"84 0.000 1453.000 \n",
"83 0.023 4271.000 \n",
"82 0.000 2500.000 \n",
"81 0.109 1827.000 \n",
"80 49.603 756.000 "
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# prediction-offline\n",
"prediction_offline = error[error['tool'] == 'prediction-offline']\n",
"prediction_offline = prediction_offline.sort_values('request_month_year_week', ascending=False)\n",
"prediction_offline.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" error | \n",
" tool | \n",
" request_month_year_week | \n",
" 0.000 | \n",
" 1.000 | \n",
" error_perc | \n",
" total_requests | \n",
"
\n",
" \n",
" \n",
" \n",
" 139 | \n",
" prediction-online | \n",
" 2024-05-20/2024-05-26 | \n",
" 2396.000 | \n",
" 120.000 | \n",
" 4.769 | \n",
" 2516.000 | \n",
"
\n",
" \n",
" 138 | \n",
" prediction-online | \n",
" 2024-05-13/2024-05-19 | \n",
" 2642.000 | \n",
" 393.000 | \n",
" 12.949 | \n",
" 3035.000 | \n",
"
\n",
" \n",
" 137 | \n",
" prediction-online | \n",
" 2024-05-06/2024-05-12 | \n",
" 2840.000 | \n",
" 266.000 | \n",
" 8.564 | \n",
" 3106.000 | \n",
"
\n",
" \n",
" 136 | \n",
" prediction-online | \n",
" 2024-04-29/2024-05-05 | \n",
" 2155.000 | \n",
" 24.000 | \n",
" 1.101 | \n",
" 2179.000 | \n",
"
\n",
" \n",
" 135 | \n",
" prediction-online | \n",
" 2024-04-22/2024-04-28 | \n",
" 252.000 | \n",
" 153.000 | \n",
" 37.778 | \n",
" 405.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"error tool request_month_year_week 0.000 1.000 \\\n",
"139 prediction-online 2024-05-20/2024-05-26 2396.000 120.000 \n",
"138 prediction-online 2024-05-13/2024-05-19 2642.000 393.000 \n",
"137 prediction-online 2024-05-06/2024-05-12 2840.000 266.000 \n",
"136 prediction-online 2024-04-29/2024-05-05 2155.000 24.000 \n",
"135 prediction-online 2024-04-22/2024-04-28 252.000 153.000 \n",
"\n",
"error error_perc total_requests \n",
"139 4.769 2516.000 \n",
"138 12.949 3035.000 \n",
"137 8.564 3106.000 \n",
"136 1.101 2179.000 \n",
"135 37.778 405.000 "
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# prediction-online\n",
"prediction_online = error[error['tool'] == 'prediction-online']\n",
"prediction_online = prediction_online.sort_values('request_month_year_week', ascending=False)\n",
"prediction_online.head()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" error | \n",
" tool | \n",
" request_month_year_week | \n",
" 0.000 | \n",
" 1.000 | \n",
" error_perc | \n",
" total_requests | \n",
"
\n",
" \n",
" \n",
" \n",
" 104 | \n",
" prediction-offline-sme | \n",
" 2024-04-29/2024-05-05 | \n",
" 8.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 8.000 | \n",
"
\n",
" \n",
" 103 | \n",
" prediction-offline-sme | \n",
" 2024-04-22/2024-04-28 | \n",
" 159.000 | \n",
" 2.000 | \n",
" 1.242 | \n",
" 161.000 | \n",
"
\n",
" \n",
" 102 | \n",
" prediction-offline-sme | \n",
" 2024-04-15/2024-04-21 | \n",
" 717.000 | \n",
" 2.000 | \n",
" 0.278 | \n",
" 719.000 | \n",
"
\n",
" \n",
" 101 | \n",
" prediction-offline-sme | \n",
" 2024-04-08/2024-04-14 | \n",
" 4.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 4.000 | \n",
"
\n",
" \n",
" 100 | \n",
" prediction-offline-sme | \n",
" 2024-04-01/2024-04-07 | \n",
" 197.000 | \n",
" 1.000 | \n",
" 0.505 | \n",
" 198.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"error tool request_month_year_week 0.000 1.000 \\\n",
"104 prediction-offline-sme 2024-04-29/2024-05-05 8.000 0.000 \n",
"103 prediction-offline-sme 2024-04-22/2024-04-28 159.000 2.000 \n",
"102 prediction-offline-sme 2024-04-15/2024-04-21 717.000 2.000 \n",
"101 prediction-offline-sme 2024-04-08/2024-04-14 4.000 0.000 \n",
"100 prediction-offline-sme 2024-04-01/2024-04-07 197.000 1.000 \n",
"\n",
"error error_perc total_requests \n",
"104 0.000 8.000 \n",
"103 1.242 161.000 \n",
"102 0.278 719.000 \n",
"101 0.000 4.000 \n",
"100 0.505 198.000 "
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# prediction-offline-sme\n",
"prediction_offline_sme = error[error['tool'] == 'prediction-offline-sme']\n",
"prediction_offline_sme = prediction_offline_sme.sort_values('request_month_year_week', ascending=False)\n",
"prediction_offline_sme.head()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" error | \n",
" tool | \n",
" request_month_year_week | \n",
" 0.000 | \n",
" 1.000 | \n",
" error_perc | \n",
" total_requests | \n",
"
\n",
" \n",
" \n",
" \n",
" 175 | \n",
" prediction-online-sme | \n",
" 2024-05-20/2024-05-26 | \n",
" 2234.000 | \n",
" 223.000 | \n",
" 9.076 | \n",
" 2457.000 | \n",
"
\n",
" \n",
" 174 | \n",
" prediction-online-sme | \n",
" 2024-05-13/2024-05-19 | \n",
" 3141.000 | \n",
" 668.000 | \n",
" 17.537 | \n",
" 3809.000 | \n",
"
\n",
" \n",
" 173 | \n",
" prediction-online-sme | \n",
" 2024-05-06/2024-05-12 | \n",
" 3799.000 | \n",
" 562.000 | \n",
" 12.887 | \n",
" 4361.000 | \n",
"
\n",
" \n",
" 172 | \n",
" prediction-online-sme | \n",
" 2024-04-29/2024-05-05 | \n",
" 2534.000 | \n",
" 6.000 | \n",
" 0.236 | \n",
" 2540.000 | \n",
"
\n",
" \n",
" 171 | \n",
" prediction-online-sme | \n",
" 2024-04-22/2024-04-28 | \n",
" 2679.000 | \n",
" 1075.000 | \n",
" 28.636 | \n",
" 3754.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"error tool request_month_year_week 0.000 1.000 \\\n",
"175 prediction-online-sme 2024-05-20/2024-05-26 2234.000 223.000 \n",
"174 prediction-online-sme 2024-05-13/2024-05-19 3141.000 668.000 \n",
"173 prediction-online-sme 2024-05-06/2024-05-12 3799.000 562.000 \n",
"172 prediction-online-sme 2024-04-29/2024-05-05 2534.000 6.000 \n",
"171 prediction-online-sme 2024-04-22/2024-04-28 2679.000 1075.000 \n",
"\n",
"error error_perc total_requests \n",
"175 9.076 2457.000 \n",
"174 17.537 3809.000 \n",
"173 12.887 4361.000 \n",
"172 0.236 2540.000 \n",
"171 28.636 3754.000 "
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# prediction-online-sme\n",
"prediction_online_sme = error[error['tool'] == 'prediction-online-sme']\n",
"prediction_online_sme = prediction_online_sme.sort_values('request_month_year_week', ascending=False)\n",
"prediction_online_sme.head()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" error | \n",
" tool | \n",
" request_month_year_week | \n",
" 0.000 | \n",
" 1.000 | \n",
" error_perc | \n",
" total_requests | \n",
"
\n",
" \n",
" \n",
" \n",
" 188 | \n",
" prediction-request-rag | \n",
" 2024-05-20/2024-05-26 | \n",
" 431.000 | \n",
" 59.000 | \n",
" 12.041 | \n",
" 490.000 | \n",
"
\n",
" \n",
" 187 | \n",
" prediction-request-rag | \n",
" 2024-05-13/2024-05-19 | \n",
" 355.000 | \n",
" 55.000 | \n",
" 13.415 | \n",
" 410.000 | \n",
"
\n",
" \n",
" 186 | \n",
" prediction-request-rag | \n",
" 2024-05-06/2024-05-12 | \n",
" 470.000 | \n",
" 125.000 | \n",
" 21.008 | \n",
" 595.000 | \n",
"
\n",
" \n",
" 185 | \n",
" prediction-request-rag | \n",
" 2024-04-29/2024-05-05 | \n",
" 544.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 544.000 | \n",
"
\n",
" \n",
" 184 | \n",
" prediction-request-rag | \n",
" 2024-04-22/2024-04-28 | \n",
" 2011.000 | \n",
" 881.000 | \n",
" 30.463 | \n",
" 2892.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"error tool request_month_year_week 0.000 1.000 \\\n",
"188 prediction-request-rag 2024-05-20/2024-05-26 431.000 59.000 \n",
"187 prediction-request-rag 2024-05-13/2024-05-19 355.000 55.000 \n",
"186 prediction-request-rag 2024-05-06/2024-05-12 470.000 125.000 \n",
"185 prediction-request-rag 2024-04-29/2024-05-05 544.000 0.000 \n",
"184 prediction-request-rag 2024-04-22/2024-04-28 2011.000 881.000 \n",
"\n",
"error error_perc total_requests \n",
"188 12.041 490.000 \n",
"187 13.415 410.000 \n",
"186 21.008 595.000 \n",
"185 0.000 544.000 \n",
"184 30.463 2892.000 "
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# prediction-request-rag\n",
"prediction_request_rag = error[error['tool'] == 'prediction-request-rag']\n",
"prediction_request_rag = prediction_request_rag.sort_values('request_month_year_week', ascending=False)\n",
"prediction_request_rag.head()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" error | \n",
" tool | \n",
" request_month_year_week | \n",
" 0.000 | \n",
" 1.000 | \n",
" error_perc | \n",
" total_requests | \n",
"
\n",
" \n",
" \n",
" \n",
" 214 | \n",
" prediction-request-reasoning-claude | \n",
" 2024-05-20/2024-05-26 | \n",
" 630.000 | \n",
" 9.000 | \n",
" 1.408 | \n",
" 639.000 | \n",
"
\n",
" \n",
" 213 | \n",
" prediction-request-reasoning-claude | \n",
" 2024-05-13/2024-05-19 | \n",
" 309.000 | \n",
" 205.000 | \n",
" 39.883 | \n",
" 514.000 | \n",
"
\n",
" \n",
" 212 | \n",
" prediction-request-reasoning-claude | \n",
" 2024-05-06/2024-05-12 | \n",
" 478.000 | \n",
" 54.000 | \n",
" 10.150 | \n",
" 532.000 | \n",
"
\n",
" \n",
" 211 | \n",
" prediction-request-reasoning-claude | \n",
" 2024-04-29/2024-05-05 | \n",
" 218.000 | \n",
" 8.000 | \n",
" 3.540 | \n",
" 226.000 | \n",
"
\n",
" \n",
" 210 | \n",
" prediction-request-reasoning-claude | \n",
" 2024-04-22/2024-04-28 | \n",
" 2053.000 | \n",
" 575.000 | \n",
" 21.880 | \n",
" 2628.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"error tool request_month_year_week 0.000 \\\n",
"214 prediction-request-reasoning-claude 2024-05-20/2024-05-26 630.000 \n",
"213 prediction-request-reasoning-claude 2024-05-13/2024-05-19 309.000 \n",
"212 prediction-request-reasoning-claude 2024-05-06/2024-05-12 478.000 \n",
"211 prediction-request-reasoning-claude 2024-04-29/2024-05-05 218.000 \n",
"210 prediction-request-reasoning-claude 2024-04-22/2024-04-28 2053.000 \n",
"\n",
"error 1.000 error_perc total_requests \n",
"214 9.000 1.408 639.000 \n",
"213 205.000 39.883 514.000 \n",
"212 54.000 10.150 532.000 \n",
"211 8.000 3.540 226.000 \n",
"210 575.000 21.880 2628.000 "
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# prediction-request-reasoning-claude\n",
"prediction_request_reasoning_claude = error[error['tool'] == 'prediction-request-reasoning-claude']\n",
"prediction_request_reasoning_claude = prediction_request_reasoning_claude.sort_values('request_month_year_week', ascending=False)\n",
"prediction_request_reasoning_claude.head()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" error | \n",
" tool | \n",
" request_month_year_week | \n",
" 0.000 | \n",
" 1.000 | \n",
" error_perc | \n",
" total_requests | \n",
"
\n",
" \n",
" \n",
" \n",
" 196 | \n",
" prediction-request-rag-claude | \n",
" 2024-05-20/2024-05-26 | \n",
" 1474.000 | \n",
" 230.000 | \n",
" 13.498 | \n",
" 1704.000 | \n",
"
\n",
" \n",
" 195 | \n",
" prediction-request-rag-claude | \n",
" 2024-05-13/2024-05-19 | \n",
" 2378.000 | \n",
" 274.000 | \n",
" 10.332 | \n",
" 2652.000 | \n",
"
\n",
" \n",
" 194 | \n",
" prediction-request-rag-claude | \n",
" 2024-05-06/2024-05-12 | \n",
" 2850.000 | \n",
" 777.000 | \n",
" 21.423 | \n",
" 3627.000 | \n",
"
\n",
" \n",
" 193 | \n",
" prediction-request-rag-claude | \n",
" 2024-04-29/2024-05-05 | \n",
" 1313.000 | \n",
" 8.000 | \n",
" 0.606 | \n",
" 1321.000 | \n",
"
\n",
" \n",
" 192 | \n",
" prediction-request-rag-claude | \n",
" 2024-04-22/2024-04-28 | \n",
" 1113.000 | \n",
" 345.000 | \n",
" 23.663 | \n",
" 1458.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"error tool request_month_year_week 0.000 \\\n",
"196 prediction-request-rag-claude 2024-05-20/2024-05-26 1474.000 \n",
"195 prediction-request-rag-claude 2024-05-13/2024-05-19 2378.000 \n",
"194 prediction-request-rag-claude 2024-05-06/2024-05-12 2850.000 \n",
"193 prediction-request-rag-claude 2024-04-29/2024-05-05 1313.000 \n",
"192 prediction-request-rag-claude 2024-04-22/2024-04-28 1113.000 \n",
"\n",
"error 1.000 error_perc total_requests \n",
"196 230.000 13.498 1704.000 \n",
"195 274.000 10.332 2652.000 \n",
"194 777.000 21.423 3627.000 \n",
"193 8.000 0.606 1321.000 \n",
"192 345.000 23.663 1458.000 "
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prediction_request_rag_claude = error[error['tool'] == 'prediction-request-rag-claude']\n",
"prediction_request_rag_claude = prediction_request_rag_claude.sort_values('request_month_year_week', ascending=False)\n",
"prediction_request_rag_claude.head()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" error | \n",
" tool | \n",
" request_month_year_week | \n",
" 0.000 | \n",
" 1.000 | \n",
" error_perc | \n",
" total_requests | \n",
"
\n",
" \n",
" \n",
" \n",
" 222 | \n",
" prediction-url-cot-claude | \n",
" 2024-05-20/2024-05-26 | \n",
" 293.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 293.000 | \n",
"
\n",
" \n",
" 221 | \n",
" prediction-url-cot-claude | \n",
" 2024-05-13/2024-05-19 | \n",
" 93.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 93.000 | \n",
"
\n",
" \n",
" 220 | \n",
" prediction-url-cot-claude | \n",
" 2024-05-06/2024-05-12 | \n",
" 225.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 225.000 | \n",
"
\n",
" \n",
" 219 | \n",
" prediction-url-cot-claude | \n",
" 2024-04-29/2024-05-05 | \n",
" 270.000 | \n",
" 0.000 | \n",
" 0.000 | \n",
" 270.000 | \n",
"
\n",
" \n",
" 218 | \n",
" prediction-url-cot-claude | \n",
" 2024-04-22/2024-04-28 | \n",
" 1506.000 | \n",
" 65.000 | \n",
" 4.137 | \n",
" 1571.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"error tool request_month_year_week 0.000 1.000 \\\n",
"222 prediction-url-cot-claude 2024-05-20/2024-05-26 293.000 0.000 \n",
"221 prediction-url-cot-claude 2024-05-13/2024-05-19 93.000 0.000 \n",
"220 prediction-url-cot-claude 2024-05-06/2024-05-12 225.000 0.000 \n",
"219 prediction-url-cot-claude 2024-04-29/2024-05-05 270.000 0.000 \n",
"218 prediction-url-cot-claude 2024-04-22/2024-04-28 1506.000 65.000 \n",
"\n",
"error error_perc total_requests \n",
"222 0.000 293.000 \n",
"221 0.000 93.000 \n",
"220 0.000 225.000 \n",
"219 0.000 270.000 \n",
"218 4.137 1571.000 "
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prediction_url_cot_claude = error[error['tool'] == 'prediction-url-cot-claude']\n",
"prediction_url_cot_claude = prediction_url_cot_claude.sort_values('request_month_year_week', ascending=False)\n",
"prediction_url_cot_claude.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'tools_inc' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[7], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m tools\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m \u001b[43mtools_inc\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m error\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m error_total\n",
"\u001b[0;31mNameError\u001b[0m: name 'tools_inc' is not defined"
]
}
],
"source": [
"del tools\n",
"del tools_inc\n",
"del error\n",
"del error_total\n",
"\n",
"gc.collect()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2. Win analysis"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"# only include non error requests\n",
"tools_non_error = tools_inc[tools_inc['error'] != 1]\n",
"tools_non_error['currentAnswer'].replace('no', 'No', inplace=True)\n",
"tools_non_error['currentAnswer'].replace('yes', 'Yes', inplace=True)\n",
"tools_non_error = tools_non_error[tools_non_error['currentAnswer'].isin(['Yes', 'No'])]\n",
"tools_non_error = tools_non_error[tools_non_error['vote'].isin(['Yes', 'No'])]"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"tools_non_error['win'] = tools_non_error['currentAnswer'] == tools_non_error['vote']\n",
"tools_non_error['win'] = tools_non_error['win'].astype(int)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"wins = tools_non_error.groupby(['tool', 'request_month_year_week', 'win']).size().unstack().fillna(0)\n",
"wins['win_perc'] = (wins[1] / (wins[0] + wins[1]))*100\n",
"wins.reset_index(inplace=True)\n",
"wins['total_request'] = wins[0] + wins[1]"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['claude-prediction-offline', 'claude-prediction-online',\n",
" 'prediction-offline', 'prediction-offline-sme',\n",
" 'prediction-online', 'prediction-online-sme',\n",
" 'prediction-request-rag', 'prediction-request-rag-claude',\n",
" 'prediction-request-reasoning',\n",
" 'prediction-request-reasoning-claude', 'prediction-url-cot-claude'],\n",
" dtype=object)"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wins['tool'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" win | \n",
" 0 | \n",
" 1 | \n",
" win_perc | \n",
" total_request | \n",
"
\n",
" \n",
" request_month_year_week | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2023-07-17/2023-07-23 | \n",
" 1.000 | \n",
" 1.000 | \n",
" 50.000 | \n",
" 2.000 | \n",
"
\n",
" \n",
" 2023-07-24/2023-07-30 | \n",
" 12.000 | \n",
" 12.000 | \n",
" 50.000 | \n",
" 24.000 | \n",
"
\n",
" \n",
" 2023-07-31/2023-08-06 | \n",
" 360.000 | \n",
" 229.000 | \n",
" 41.979 | \n",
" 589.000 | \n",
"
\n",
" \n",
" 2023-08-07/2023-08-13 | \n",
" 177.000 | \n",
" 110.000 | \n",
" 44.123 | \n",
" 287.000 | \n",
"
\n",
" \n",
" 2023-08-14/2023-08-20 | \n",
" 784.000 | \n",
" 559.000 | \n",
" 41.623 | \n",
" 1343.000 | \n",
"
\n",
" \n",
" 2023-08-21/2023-08-27 | \n",
" 596.000 | \n",
" 502.000 | \n",
" 45.719 | \n",
" 1098.000 | \n",
"
\n",
" \n",
" 2023-08-28/2023-09-03 | \n",
" 958.000 | \n",
" 502.000 | \n",
" 34.384 | \n",
" 1460.000 | \n",
"
\n",
" \n",
" 2023-09-04/2023-09-10 | \n",
" 1609.000 | \n",
" 1418.000 | \n",
" 46.845 | \n",
" 3027.000 | \n",
"
\n",
" \n",
" 2023-09-11/2023-09-17 | \n",
" 1171.000 | \n",
" 1380.000 | \n",
" 54.096 | \n",
" 2551.000 | \n",
"
\n",
" \n",
" 2023-09-18/2023-09-24 | \n",
" 2150.000 | \n",
" 2307.000 | \n",
" 60.968 | \n",
" 4457.000 | \n",
"
\n",
" \n",
" 2023-09-25/2023-10-01 | \n",
" 992.000 | \n",
" 817.000 | \n",
" 47.635 | \n",
" 1809.000 | \n",
"
\n",
" \n",
" 2023-10-02/2023-10-08 | \n",
" 1625.000 | \n",
" 1842.000 | \n",
" 54.240 | \n",
" 3467.000 | \n",
"
\n",
" \n",
" 2023-10-09/2023-10-15 | \n",
" 1594.000 | \n",
" 2096.000 | \n",
" 57.281 | \n",
" 3690.000 | \n",
"
\n",
" \n",
" 2023-10-16/2023-10-22 | \n",
" 1291.000 | \n",
" 1623.000 | \n",
" 55.496 | \n",
" 2914.000 | \n",
"
\n",
" \n",
" 2023-10-23/2023-10-29 | \n",
" 1018.000 | \n",
" 1084.000 | \n",
" 50.802 | \n",
" 2102.000 | \n",
"
\n",
" \n",
" 2023-10-30/2023-11-05 | \n",
" 541.000 | \n",
" 825.000 | \n",
" 64.848 | \n",
" 1366.000 | \n",
"
\n",
" \n",
" 2023-11-06/2023-11-12 | \n",
" 1545.000 | \n",
" 1776.000 | \n",
" 69.014 | \n",
" 3321.000 | \n",
"
\n",
" \n",
" 2023-11-13/2023-11-19 | \n",
" 1825.000 | \n",
" 2056.000 | \n",
" 55.202 | \n",
" 3881.000 | \n",
"
\n",
" \n",
" 2023-11-20/2023-11-26 | \n",
" 1567.000 | \n",
" 1874.000 | \n",
" 58.482 | \n",
" 3441.000 | \n",
"
\n",
" \n",
" 2023-11-27/2023-12-03 | \n",
" 1555.000 | \n",
" 1773.000 | \n",
" 67.721 | \n",
" 3328.000 | \n",
"
\n",
" \n",
" 2023-12-04/2023-12-10 | \n",
" 1245.000 | \n",
" 1470.000 | \n",
" 33.705 | \n",
" 2715.000 | \n",
"
\n",
" \n",
" 2023-12-11/2023-12-17 | \n",
" 1462.000 | \n",
" 1788.000 | \n",
" 52.404 | \n",
" 3250.000 | \n",
"
\n",
" \n",
" 2023-12-18/2023-12-24 | \n",
" 1332.000 | \n",
" 1557.000 | \n",
" 46.687 | \n",
" 2889.000 | \n",
"
\n",
" \n",
" 2023-12-25/2023-12-31 | \n",
" 1397.000 | \n",
" 1257.000 | \n",
" 48.222 | \n",
" 2654.000 | \n",
"
\n",
" \n",
" 2024-01-01/2024-01-07 | \n",
" 2159.000 | \n",
" 1713.000 | \n",
" 43.436 | \n",
" 3872.000 | \n",
"
\n",
" \n",
" 2024-01-08/2024-01-14 | \n",
" 1034.000 | \n",
" 890.000 | \n",
" 41.597 | \n",
" 1924.000 | \n",
"
\n",
" \n",
" 2024-01-15/2024-01-21 | \n",
" 2228.000 | \n",
" 1758.000 | \n",
" 40.827 | \n",
" 3986.000 | \n",
"
\n",
" \n",
" 2024-01-22/2024-01-28 | \n",
" 2036.000 | \n",
" 1970.000 | \n",
" 31.617 | \n",
" 4006.000 | \n",
"
\n",
" \n",
" 2024-01-29/2024-02-04 | \n",
" 2303.000 | \n",
" 1791.000 | \n",
" 37.106 | \n",
" 4094.000 | \n",
"
\n",
" \n",
" 2024-02-05/2024-02-11 | \n",
" 2149.000 | \n",
" 2189.000 | \n",
" 49.808 | \n",
" 4338.000 | \n",
"
\n",
" \n",
" 2024-02-12/2024-02-18 | \n",
" 1979.000 | \n",
" 1956.000 | \n",
" 55.949 | \n",
" 3935.000 | \n",
"
\n",
" \n",
" 2024-02-19/2024-02-25 | \n",
" 1788.000 | \n",
" 2002.000 | \n",
" 57.697 | \n",
" 3790.000 | \n",
"
\n",
" \n",
" 2024-02-26/2024-03-03 | \n",
" 2299.000 | \n",
" 2350.000 | \n",
" 42.051 | \n",
" 4649.000 | \n",
"
\n",
" \n",
" 2024-03-04/2024-03-10 | \n",
" 4523.000 | \n",
" 3500.000 | \n",
" 44.989 | \n",
" 8023.000 | \n",
"
\n",
" \n",
" 2024-03-11/2024-03-17 | \n",
" 4516.000 | \n",
" 4705.000 | \n",
" 56.713 | \n",
" 9221.000 | \n",
"
\n",
" \n",
" 2024-03-18/2024-03-24 | \n",
" 5561.000 | \n",
" 5581.000 | \n",
" 52.903 | \n",
" 11142.000 | \n",
"
\n",
" \n",
" 2024-03-25/2024-03-31 | \n",
" 5200.000 | \n",
" 6965.000 | \n",
" 54.644 | \n",
" 12165.000 | \n",
"
\n",
" \n",
" 2024-04-01/2024-04-07 | \n",
" 2923.000 | \n",
" 4258.000 | \n",
" 61.323 | \n",
" 7181.000 | \n",
"
\n",
" \n",
" 2024-04-08/2024-04-14 | \n",
" 1331.000 | \n",
" 3412.000 | \n",
" 69.522 | \n",
" 4743.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"win 0 1 win_perc total_request\n",
"request_month_year_week \n",
"2023-07-17/2023-07-23 1.000 1.000 50.000 2.000\n",
"2023-07-24/2023-07-30 12.000 12.000 50.000 24.000\n",
"2023-07-31/2023-08-06 360.000 229.000 41.979 589.000\n",
"2023-08-07/2023-08-13 177.000 110.000 44.123 287.000\n",
"2023-08-14/2023-08-20 784.000 559.000 41.623 1343.000\n",
"2023-08-21/2023-08-27 596.000 502.000 45.719 1098.000\n",
"2023-08-28/2023-09-03 958.000 502.000 34.384 1460.000\n",
"2023-09-04/2023-09-10 1609.000 1418.000 46.845 3027.000\n",
"2023-09-11/2023-09-17 1171.000 1380.000 54.096 2551.000\n",
"2023-09-18/2023-09-24 2150.000 2307.000 60.968 4457.000\n",
"2023-09-25/2023-10-01 992.000 817.000 47.635 1809.000\n",
"2023-10-02/2023-10-08 1625.000 1842.000 54.240 3467.000\n",
"2023-10-09/2023-10-15 1594.000 2096.000 57.281 3690.000\n",
"2023-10-16/2023-10-22 1291.000 1623.000 55.496 2914.000\n",
"2023-10-23/2023-10-29 1018.000 1084.000 50.802 2102.000\n",
"2023-10-30/2023-11-05 541.000 825.000 64.848 1366.000\n",
"2023-11-06/2023-11-12 1545.000 1776.000 69.014 3321.000\n",
"2023-11-13/2023-11-19 1825.000 2056.000 55.202 3881.000\n",
"2023-11-20/2023-11-26 1567.000 1874.000 58.482 3441.000\n",
"2023-11-27/2023-12-03 1555.000 1773.000 67.721 3328.000\n",
"2023-12-04/2023-12-10 1245.000 1470.000 33.705 2715.000\n",
"2023-12-11/2023-12-17 1462.000 1788.000 52.404 3250.000\n",
"2023-12-18/2023-12-24 1332.000 1557.000 46.687 2889.000\n",
"2023-12-25/2023-12-31 1397.000 1257.000 48.222 2654.000\n",
"2024-01-01/2024-01-07 2159.000 1713.000 43.436 3872.000\n",
"2024-01-08/2024-01-14 1034.000 890.000 41.597 1924.000\n",
"2024-01-15/2024-01-21 2228.000 1758.000 40.827 3986.000\n",
"2024-01-22/2024-01-28 2036.000 1970.000 31.617 4006.000\n",
"2024-01-29/2024-02-04 2303.000 1791.000 37.106 4094.000\n",
"2024-02-05/2024-02-11 2149.000 2189.000 49.808 4338.000\n",
"2024-02-12/2024-02-18 1979.000 1956.000 55.949 3935.000\n",
"2024-02-19/2024-02-25 1788.000 2002.000 57.697 3790.000\n",
"2024-02-26/2024-03-03 2299.000 2350.000 42.051 4649.000\n",
"2024-03-04/2024-03-10 4523.000 3500.000 44.989 8023.000\n",
"2024-03-11/2024-03-17 4516.000 4705.000 56.713 9221.000\n",
"2024-03-18/2024-03-24 5561.000 5581.000 52.903 11142.000\n",
"2024-03-25/2024-03-31 5200.000 6965.000 54.644 12165.000\n",
"2024-04-01/2024-04-07 2923.000 4258.000 61.323 7181.000\n",
"2024-04-08/2024-04-14 1331.000 3412.000 69.522 4743.000"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wins.groupby('request_month_year_week').agg({\n",
" 0: 'sum',\n",
" 1: 'sum',\n",
" 'win_perc': 'mean',\n",
" 'total_request': 'sum'\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" win | \n",
" request_month_year_week | \n",
" win_perc | \n",
" total_request | \n",
"
\n",
" \n",
" \n",
" \n",
" 11 | \n",
" 2023-09-18/2023-09-24 | \n",
" 100.000 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 12 | \n",
" 2023-09-25/2023-10-01 | \n",
" 58.333 | \n",
" 48.000 | \n",
"
\n",
" \n",
" 13 | \n",
" 2023-10-02/2023-10-08 | \n",
" 61.783 | \n",
" 157.000 | \n",
"
\n",
" \n",
" 14 | \n",
" 2023-10-09/2023-10-15 | \n",
" 60.588 | \n",
" 680.000 | \n",
"
\n",
" \n",
" 15 | \n",
" 2023-10-16/2023-10-22 | \n",
" 58.791 | \n",
" 364.000 | \n",
"
\n",
" \n",
" 16 | \n",
" 2023-10-23/2023-10-29 | \n",
" 47.143 | \n",
" 70.000 | \n",
"
\n",
" \n",
" 17 | \n",
" 2023-10-30/2023-11-05 | \n",
" 67.647 | \n",
" 34.000 | \n",
"
\n",
" \n",
" 18 | \n",
" 2023-11-20/2023-11-26 | \n",
" 100.000 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 19 | \n",
" 2023-11-27/2023-12-03 | \n",
" 57.143 | \n",
" 7.000 | \n",
"
\n",
" \n",
" 20 | \n",
" 2023-12-04/2023-12-10 | \n",
" 66.667 | \n",
" 6.000 | \n",
"
\n",
" \n",
" 21 | \n",
" 2023-12-11/2023-12-17 | \n",
" 50.000 | \n",
" 2.000 | \n",
"
\n",
" \n",
" 22 | \n",
" 2023-12-25/2023-12-31 | \n",
" 55.814 | \n",
" 43.000 | \n",
"
\n",
" \n",
" 23 | \n",
" 2024-01-01/2024-01-07 | \n",
" 28.400 | \n",
" 250.000 | \n",
"
\n",
" \n",
" 24 | \n",
" 2024-01-08/2024-01-14 | \n",
" 35.789 | \n",
" 190.000 | \n",
"
\n",
" \n",
" 25 | \n",
" 2024-01-15/2024-01-21 | \n",
" 36.986 | \n",
" 292.000 | \n",
"
\n",
" \n",
" 26 | \n",
" 2024-01-22/2024-01-28 | \n",
" 45.387 | \n",
" 271.000 | \n",
"
\n",
" \n",
" 27 | \n",
" 2024-01-29/2024-02-04 | \n",
" 29.555 | \n",
" 247.000 | \n",
"
\n",
" \n",
" 28 | \n",
" 2024-02-05/2024-02-11 | \n",
" 49.064 | \n",
" 267.000 | \n",
"
\n",
" \n",
" 29 | \n",
" 2024-02-12/2024-02-18 | \n",
" 63.300 | \n",
" 297.000 | \n",
"
\n",
" \n",
" 30 | \n",
" 2024-02-19/2024-02-25 | \n",
" 65.362 | \n",
" 690.000 | \n",
"
\n",
" \n",
" 31 | \n",
" 2024-03-18/2024-03-24 | \n",
" 71.575 | \n",
" 781.000 | \n",
"
\n",
" \n",
" 32 | \n",
" 2024-03-25/2024-03-31 | \n",
" 69.052 | \n",
" 3648.000 | \n",
"
\n",
" \n",
" 33 | \n",
" 2024-04-01/2024-04-07 | \n",
" 60.991 | \n",
" 2402.000 | \n",
"
\n",
" \n",
" 34 | \n",
" 2024-04-08/2024-04-14 | \n",
" 62.205 | \n",
" 635.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"win request_month_year_week win_perc total_request\n",
"11 2023-09-18/2023-09-24 100.000 1.000\n",
"12 2023-09-25/2023-10-01 58.333 48.000\n",
"13 2023-10-02/2023-10-08 61.783 157.000\n",
"14 2023-10-09/2023-10-15 60.588 680.000\n",
"15 2023-10-16/2023-10-22 58.791 364.000\n",
"16 2023-10-23/2023-10-29 47.143 70.000\n",
"17 2023-10-30/2023-11-05 67.647 34.000\n",
"18 2023-11-20/2023-11-26 100.000 1.000\n",
"19 2023-11-27/2023-12-03 57.143 7.000\n",
"20 2023-12-04/2023-12-10 66.667 6.000\n",
"21 2023-12-11/2023-12-17 50.000 2.000\n",
"22 2023-12-25/2023-12-31 55.814 43.000\n",
"23 2024-01-01/2024-01-07 28.400 250.000\n",
"24 2024-01-08/2024-01-14 35.789 190.000\n",
"25 2024-01-15/2024-01-21 36.986 292.000\n",
"26 2024-01-22/2024-01-28 45.387 271.000\n",
"27 2024-01-29/2024-02-04 29.555 247.000\n",
"28 2024-02-05/2024-02-11 49.064 267.000\n",
"29 2024-02-12/2024-02-18 63.300 297.000\n",
"30 2024-02-19/2024-02-25 65.362 690.000\n",
"31 2024-03-18/2024-03-24 71.575 781.000\n",
"32 2024-03-25/2024-03-31 69.052 3648.000\n",
"33 2024-04-01/2024-04-07 60.991 2402.000\n",
"34 2024-04-08/2024-04-14 62.205 635.000"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# select only claude-prediction-online and plot request_month_year_week vs win_perc\n",
"claude_prediction_online = wins[wins['tool'] == 'claude-prediction-online']\n",
"claude_prediction_online = claude_prediction_online[['request_month_year_week', 'win_perc', 'total_request']]\n",
"claude_prediction_online = claude_prediction_online.sort_values(by='request_month_year_week')\n",
"\n",
"claude_prediction_online.head()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" win | \n",
" request_month_year_week | \n",
" win_perc | \n",
" total_request | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2023-09-18/2023-09-24 | \n",
" 16.667 | \n",
" 6.000 | \n",
"
\n",
" \n",
" 1 | \n",
" 2023-09-25/2023-10-01 | \n",
" 53.205 | \n",
" 156.000 | \n",
"
\n",
" \n",
" 2 | \n",
" 2023-10-02/2023-10-08 | \n",
" 53.333 | \n",
" 285.000 | \n",
"
\n",
" \n",
" 3 | \n",
" 2023-10-09/2023-10-15 | \n",
" 60.477 | \n",
" 377.000 | \n",
"
\n",
" \n",
" 4 | \n",
" 2023-10-16/2023-10-22 | \n",
" 57.854 | \n",
" 522.000 | \n",
"
\n",
" \n",
" 5 | \n",
" 2023-10-23/2023-10-29 | \n",
" 56.383 | \n",
" 376.000 | \n",
"
\n",
" \n",
" 6 | \n",
" 2023-10-30/2023-11-05 | \n",
" 72.000 | \n",
" 75.000 | \n",
"
\n",
" \n",
" 7 | \n",
" 2023-11-06/2023-11-12 | \n",
" 100.000 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 8 | \n",
" 2023-11-13/2023-11-19 | \n",
" 100.000 | \n",
" 2.000 | \n",
"
\n",
" \n",
" 9 | \n",
" 2023-12-18/2023-12-24 | \n",
" 20.000 | \n",
" 5.000 | \n",
"
\n",
" \n",
" 10 | \n",
" 2024-03-25/2024-03-31 | \n",
" 100.000 | \n",
" 2.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"win request_month_year_week win_perc total_request\n",
"0 2023-09-18/2023-09-24 16.667 6.000\n",
"1 2023-09-25/2023-10-01 53.205 156.000\n",
"2 2023-10-02/2023-10-08 53.333 285.000\n",
"3 2023-10-09/2023-10-15 60.477 377.000\n",
"4 2023-10-16/2023-10-22 57.854 522.000\n",
"5 2023-10-23/2023-10-29 56.383 376.000\n",
"6 2023-10-30/2023-11-05 72.000 75.000\n",
"7 2023-11-06/2023-11-12 100.000 1.000\n",
"8 2023-11-13/2023-11-19 100.000 2.000\n",
"9 2023-12-18/2023-12-24 20.000 5.000\n",
"10 2024-03-25/2024-03-31 100.000 2.000"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# select only claude-prediction-offline and plot request_month_year_week vs win_perc\n",
"claude_prediction_offline = wins[wins['tool'] == 'claude-prediction-offline']\n",
"claude_prediction_offline = claude_prediction_offline[['request_month_year_week', 'win_perc', 'total_request']]\n",
"claude_prediction_offline = claude_prediction_offline.sort_values(by='request_month_year_week')\n",
"\n",
"claude_prediction_offline.head()"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" win | \n",
" request_month_year_week | \n",
" win_perc | \n",
" total_request | \n",
"
\n",
" \n",
" \n",
" \n",
" 72 | \n",
" 2023-07-17/2023-07-23 | \n",
" 50.000 | \n",
" 2.000 | \n",
"
\n",
" \n",
" 73 | \n",
" 2023-07-24/2023-07-30 | \n",
" 50.000 | \n",
" 24.000 | \n",
"
\n",
" \n",
" 74 | \n",
" 2023-07-31/2023-08-06 | \n",
" 38.306 | \n",
" 543.000 | \n",
"
\n",
" \n",
" 75 | \n",
" 2023-08-07/2023-08-13 | \n",
" 38.246 | \n",
" 285.000 | \n",
"
\n",
" \n",
" 76 | \n",
" 2023-08-14/2023-08-20 | \n",
" 41.623 | \n",
" 1343.000 | \n",
"
\n",
" \n",
" 77 | \n",
" 2023-08-21/2023-08-27 | \n",
" 45.719 | \n",
" 1098.000 | \n",
"
\n",
" \n",
" 78 | \n",
" 2023-08-28/2023-09-03 | \n",
" 34.384 | \n",
" 1460.000 | \n",
"
\n",
" \n",
" 79 | \n",
" 2023-09-04/2023-09-10 | \n",
" 46.845 | \n",
" 3027.000 | \n",
"
\n",
" \n",
" 80 | \n",
" 2023-09-11/2023-09-17 | \n",
" 54.096 | \n",
" 2551.000 | \n",
"
\n",
" \n",
" 81 | \n",
" 2023-09-18/2023-09-24 | \n",
" 51.602 | \n",
" 4246.000 | \n",
"
\n",
" \n",
" 82 | \n",
" 2023-09-25/2023-10-01 | \n",
" 43.876 | \n",
" 743.000 | \n",
"
\n",
" \n",
" 83 | \n",
" 2023-10-02/2023-10-08 | \n",
" 50.538 | \n",
" 837.000 | \n",
"
\n",
" \n",
" 84 | \n",
" 2023-10-09/2023-10-15 | \n",
" 50.976 | \n",
" 973.000 | \n",
"
\n",
" \n",
" 85 | \n",
" 2023-10-16/2023-10-22 | \n",
" 56.146 | \n",
" 903.000 | \n",
"
\n",
" \n",
" 86 | \n",
" 2023-10-23/2023-10-29 | \n",
" 48.822 | \n",
" 594.000 | \n",
"
\n",
" \n",
" 87 | \n",
" 2023-10-30/2023-11-05 | \n",
" 60.392 | \n",
" 664.000 | \n",
"
\n",
" \n",
" 88 | \n",
" 2023-11-06/2023-11-12 | \n",
" 52.533 | \n",
" 1757.000 | \n",
"
\n",
" \n",
" 89 | \n",
" 2023-11-13/2023-11-19 | \n",
" 53.892 | \n",
" 2004.000 | \n",
"
\n",
" \n",
" 90 | \n",
" 2023-11-20/2023-11-26 | \n",
" 53.202 | \n",
" 1780.000 | \n",
"
\n",
" \n",
" 91 | \n",
" 2023-11-27/2023-12-03 | \n",
" 54.253 | \n",
" 1058.000 | \n",
"
\n",
" \n",
" 92 | \n",
" 2023-12-04/2023-12-10 | \n",
" 47.500 | \n",
" 80.000 | \n",
"
\n",
" \n",
" 93 | \n",
" 2023-12-11/2023-12-17 | \n",
" 52.174 | \n",
" 23.000 | \n",
"
\n",
" \n",
" 94 | \n",
" 2023-12-18/2023-12-24 | \n",
" 69.863 | \n",
" 73.000 | \n",
"
\n",
" \n",
" 95 | \n",
" 2023-12-25/2023-12-31 | \n",
" 41.509 | \n",
" 53.000 | \n",
"
\n",
" \n",
" 96 | \n",
" 2024-01-01/2024-01-07 | \n",
" 0.000 | \n",
" 2.000 | \n",
"
\n",
" \n",
" 97 | \n",
" 2024-01-22/2024-01-28 | \n",
" 0.000 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 98 | \n",
" 2024-03-25/2024-03-31 | \n",
" 0.000 | \n",
" 1.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"win request_month_year_week win_perc total_request\n",
"72 2023-07-17/2023-07-23 50.000 2.000\n",
"73 2023-07-24/2023-07-30 50.000 24.000\n",
"74 2023-07-31/2023-08-06 38.306 543.000\n",
"75 2023-08-07/2023-08-13 38.246 285.000\n",
"76 2023-08-14/2023-08-20 41.623 1343.000\n",
"77 2023-08-21/2023-08-27 45.719 1098.000\n",
"78 2023-08-28/2023-09-03 34.384 1460.000\n",
"79 2023-09-04/2023-09-10 46.845 3027.000\n",
"80 2023-09-11/2023-09-17 54.096 2551.000\n",
"81 2023-09-18/2023-09-24 51.602 4246.000\n",
"82 2023-09-25/2023-10-01 43.876 743.000\n",
"83 2023-10-02/2023-10-08 50.538 837.000\n",
"84 2023-10-09/2023-10-15 50.976 973.000\n",
"85 2023-10-16/2023-10-22 56.146 903.000\n",
"86 2023-10-23/2023-10-29 48.822 594.000\n",
"87 2023-10-30/2023-11-05 60.392 664.000\n",
"88 2023-11-06/2023-11-12 52.533 1757.000\n",
"89 2023-11-13/2023-11-19 53.892 2004.000\n",
"90 2023-11-20/2023-11-26 53.202 1780.000\n",
"91 2023-11-27/2023-12-03 54.253 1058.000\n",
"92 2023-12-04/2023-12-10 47.500 80.000\n",
"93 2023-12-11/2023-12-17 52.174 23.000\n",
"94 2023-12-18/2023-12-24 69.863 73.000\n",
"95 2023-12-25/2023-12-31 41.509 53.000\n",
"96 2024-01-01/2024-01-07 0.000 2.000\n",
"97 2024-01-22/2024-01-28 0.000 1.000\n",
"98 2024-03-25/2024-03-31 0.000 1.000"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# select only prediction-online and plot request_month_year_week vs win_perc\n",
"prediction_online = wins[wins['tool'] == 'prediction-online']\n",
"prediction_online = prediction_online[['request_month_year_week', 'win_perc', 'total_request']]\n",
"prediction_online = prediction_online.sort_values(by='request_month_year_week')\n",
"\n",
"prediction_online.head()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" win | \n",
" request_month_year_week | \n",
" win_perc | \n",
" total_request | \n",
"
\n",
" \n",
" \n",
" \n",
" 35 | \n",
" 2023-07-31/2023-08-06 | \n",
" 45.652 | \n",
" 46.000 | \n",
"
\n",
" \n",
" 36 | \n",
" 2023-08-07/2023-08-13 | \n",
" 50.000 | \n",
" 2.000 | \n",
"
\n",
" \n",
" 37 | \n",
" 2023-09-18/2023-09-24 | \n",
" 51.128 | \n",
" 133.000 | \n",
"
\n",
" \n",
" 38 | \n",
" 2023-09-25/2023-10-01 | \n",
" 36.864 | \n",
" 236.000 | \n",
"
\n",
" \n",
" 39 | \n",
" 2023-10-02/2023-10-08 | \n",
" 50.077 | \n",
" 651.000 | \n",
"
\n",
" \n",
" 40 | \n",
" 2023-10-09/2023-10-15 | \n",
" 52.392 | \n",
" 418.000 | \n",
"
\n",
" \n",
" 41 | \n",
" 2023-10-16/2023-10-22 | \n",
" 52.658 | \n",
" 395.000 | \n",
"
\n",
" \n",
" 42 | \n",
" 2023-10-23/2023-10-29 | \n",
" 45.503 | \n",
" 189.000 | \n",
"
\n",
" \n",
" 43 | \n",
" 2023-10-30/2023-11-05 | \n",
" 75.000 | \n",
" 40.000 | \n",
"
\n",
" \n",
" 44 | \n",
" 2023-11-13/2023-11-19 | \n",
" 50.000 | \n",
" 2.000 | \n",
"
\n",
" \n",
" 45 | \n",
" 2023-11-20/2023-11-26 | \n",
" 33.333 | \n",
" 3.000 | \n",
"
\n",
" \n",
" 46 | \n",
" 2023-11-27/2023-12-03 | \n",
" 88.235 | \n",
" 17.000 | \n",
"
\n",
" \n",
" 47 | \n",
" 2023-12-04/2023-12-10 | \n",
" 0.000 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 48 | \n",
" 2023-12-18/2023-12-24 | \n",
" 50.000 | \n",
" 6.000 | \n",
"
\n",
" \n",
" 49 | \n",
" 2024-01-01/2024-01-07 | \n",
" 100.000 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 50 | \n",
" 2024-03-11/2024-03-17 | \n",
" 62.808 | \n",
" 406.000 | \n",
"
\n",
" \n",
" 51 | \n",
" 2024-03-18/2024-03-24 | \n",
" 54.453 | \n",
" 2448.000 | \n",
"
\n",
" \n",
" 52 | \n",
" 2024-03-25/2024-03-31 | \n",
" 58.729 | \n",
" 2360.000 | \n",
"
\n",
" \n",
" 53 | \n",
" 2024-04-01/2024-04-07 | \n",
" 57.055 | \n",
" 652.000 | \n",
"
\n",
" \n",
" 54 | \n",
" 2024-04-08/2024-04-14 | \n",
" 75.641 | \n",
" 468.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"win request_month_year_week win_perc total_request\n",
"35 2023-07-31/2023-08-06 45.652 46.000\n",
"36 2023-08-07/2023-08-13 50.000 2.000\n",
"37 2023-09-18/2023-09-24 51.128 133.000\n",
"38 2023-09-25/2023-10-01 36.864 236.000\n",
"39 2023-10-02/2023-10-08 50.077 651.000\n",
"40 2023-10-09/2023-10-15 52.392 418.000\n",
"41 2023-10-16/2023-10-22 52.658 395.000\n",
"42 2023-10-23/2023-10-29 45.503 189.000\n",
"43 2023-10-30/2023-11-05 75.000 40.000\n",
"44 2023-11-13/2023-11-19 50.000 2.000\n",
"45 2023-11-20/2023-11-26 33.333 3.000\n",
"46 2023-11-27/2023-12-03 88.235 17.000\n",
"47 2023-12-04/2023-12-10 0.000 1.000\n",
"48 2023-12-18/2023-12-24 50.000 6.000\n",
"49 2024-01-01/2024-01-07 100.000 1.000\n",
"50 2024-03-11/2024-03-17 62.808 406.000\n",
"51 2024-03-18/2024-03-24 54.453 2448.000\n",
"52 2024-03-25/2024-03-31 58.729 2360.000\n",
"53 2024-04-01/2024-04-07 57.055 652.000\n",
"54 2024-04-08/2024-04-14 75.641 468.000"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# select only prediction-offline and plot request_month_year_week vs win_perc\n",
"prediction_offline = wins[wins['tool'] == 'prediction-offline']\n",
"prediction_offline = prediction_offline[['request_month_year_week', 'win_perc', 'total_request']]\n",
"prediction_offline = prediction_offline.sort_values(by='request_month_year_week')\n",
"\n",
"prediction_offline"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" win | \n",
" request_month_year_week | \n",
" win_perc | \n",
" total_request | \n",
"
\n",
" \n",
" \n",
" \n",
" 55 | \n",
" 2023-09-18/2023-09-24 | \n",
" 83.333 | \n",
" 6.000 | \n",
"
\n",
" \n",
" 56 | \n",
" 2023-09-25/2023-10-01 | \n",
" 45.545 | \n",
" 303.000 | \n",
"
\n",
" \n",
" 57 | \n",
" 2023-10-02/2023-10-08 | \n",
" 54.208 | \n",
" 701.000 | \n",
"
\n",
" \n",
" 58 | \n",
" 2023-10-09/2023-10-15 | \n",
" 58.883 | \n",
" 591.000 | \n",
"
\n",
" \n",
" 59 | \n",
" 2023-10-16/2023-10-22 | \n",
" 54.407 | \n",
" 329.000 | \n",
"
\n",
" \n",
" 60 | \n",
" 2023-10-23/2023-10-29 | \n",
" 51.064 | \n",
" 517.000 | \n",
"
\n",
" \n",
" 61 | \n",
" 2023-10-30/2023-11-05 | \n",
" 60.265 | \n",
" 302.000 | \n",
"
\n",
" \n",
" 62 | \n",
" 2023-11-13/2023-11-19 | \n",
" 20.000 | \n",
" 10.000 | \n",
"
\n",
" \n",
" 63 | \n",
" 2023-11-20/2023-11-26 | \n",
" 50.000 | \n",
" 14.000 | \n",
"
\n",
" \n",
" 64 | \n",
" 2023-11-27/2023-12-03 | \n",
" 86.667 | \n",
" 15.000 | \n",
"
\n",
" \n",
" 65 | \n",
" 2023-12-04/2023-12-10 | \n",
" 0.000 | \n",
" 1.000 | \n",
"
\n",
" \n",
" 66 | \n",
" 2023-12-18/2023-12-24 | \n",
" 40.000 | \n",
" 5.000 | \n",
"
\n",
" \n",
" 67 | \n",
" 2024-03-11/2024-03-17 | \n",
" 60.947 | \n",
" 169.000 | \n",
"
\n",
" \n",
" 68 | \n",
" 2024-03-18/2024-03-24 | \n",
" 44.016 | \n",
" 493.000 | \n",
"
\n",
" \n",
" 69 | \n",
" 2024-03-25/2024-03-31 | \n",
" 60.000 | \n",
" 10.000 | \n",
"
\n",
" \n",
" 70 | \n",
" 2024-04-01/2024-04-07 | \n",
" 61.039 | \n",
" 77.000 | \n",
"
\n",
" \n",
" 71 | \n",
" 2024-04-08/2024-04-14 | \n",
" 50.000 | \n",
" 2.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"win request_month_year_week win_perc total_request\n",
"55 2023-09-18/2023-09-24 83.333 6.000\n",
"56 2023-09-25/2023-10-01 45.545 303.000\n",
"57 2023-10-02/2023-10-08 54.208 701.000\n",
"58 2023-10-09/2023-10-15 58.883 591.000\n",
"59 2023-10-16/2023-10-22 54.407 329.000\n",
"60 2023-10-23/2023-10-29 51.064 517.000\n",
"61 2023-10-30/2023-11-05 60.265 302.000\n",
"62 2023-11-13/2023-11-19 20.000 10.000\n",
"63 2023-11-20/2023-11-26 50.000 14.000\n",
"64 2023-11-27/2023-12-03 86.667 15.000\n",
"65 2023-12-04/2023-12-10 0.000 1.000\n",
"66 2023-12-18/2023-12-24 40.000 5.000\n",
"67 2024-03-11/2024-03-17 60.947 169.000\n",
"68 2024-03-18/2024-03-24 44.016 493.000\n",
"69 2024-03-25/2024-03-31 60.000 10.000\n",
"70 2024-04-01/2024-04-07 61.039 77.000\n",
"71 2024-04-08/2024-04-14 50.000 2.000"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# select only prediction-offline-sme and plot request_month_year_week vs win_perc\n",
"prediction_offline_sme = wins[wins['tool'] == 'prediction-offline-sme']\n",
"prediction_offline_sme = prediction_offline_sme[['request_month_year_week', 'win_perc', 'total_request']]\n",
"prediction_offline_sme = prediction_offline_sme.sort_values(by='request_month_year_week')\n",
"\n",
"prediction_offline_sme"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" win | \n",
" request_month_year_week | \n",
" win_perc | \n",
" total_request | \n",
"
\n",
" \n",
" \n",
" \n",
" 99 | \n",
" 2023-09-18/2023-09-24 | \n",
" 63.077 | \n",
" 65.000 | \n",
"
\n",
" \n",
" 100 | \n",
" 2023-09-25/2023-10-01 | \n",
" 47.988 | \n",
" 323.000 | \n",
"
\n",
" \n",
" 101 | \n",
" 2023-10-02/2023-10-08 | \n",
" 55.502 | \n",
" 836.000 | \n",
"
\n",
" \n",
" 102 | \n",
" 2023-10-09/2023-10-15 | \n",
" 60.369 | \n",
" 651.000 | \n",
"
\n",
" \n",
" 103 | \n",
" 2023-10-16/2023-10-22 | \n",
" 53.117 | \n",
" 401.000 | \n",
"
\n",
" \n",
" 104 | \n",
" 2023-10-23/2023-10-29 | \n",
" 55.899 | \n",
" 356.000 | \n",
"
\n",
" \n",
" 105 | \n",
" 2023-10-30/2023-11-05 | \n",
" 53.785 | \n",
" 251.000 | \n",
"
\n",
" \n",
" 106 | \n",
" 2023-11-06/2023-11-12 | \n",
" 54.511 | \n",
" 1563.000 | \n",
"
\n",
" \n",
" 107 | \n",
" 2023-11-13/2023-11-19 | \n",
" 52.120 | \n",
" 1863.000 | \n",
"
\n",
" \n",
" 108 | \n",
" 2023-11-20/2023-11-26 | \n",
" 55.873 | \n",
" 1643.000 | \n",
"
\n",
" \n",
" 109 | \n",
" 2023-11-27/2023-12-03 | \n",
" 52.308 | \n",
" 2231.000 | \n",
"
\n",
" \n",
" 110 | \n",
" 2023-12-04/2023-12-10 | \n",
" 54.359 | \n",
" 2627.000 | \n",
"
\n",
" \n",
" 111 | \n",
" 2023-12-11/2023-12-17 | \n",
" 55.039 | \n",
" 3225.000 | \n",
"
\n",
" \n",
" 112 | \n",
" 2023-12-18/2023-12-24 | \n",
" 53.571 | \n",
" 2800.000 | \n",
"
\n",
" \n",
" 113 | \n",
" 2023-12-25/2023-12-31 | \n",
" 47.342 | \n",
" 2558.000 | \n",
"
\n",
" \n",
" 114 | \n",
" 2024-01-01/2024-01-07 | \n",
" 45.344 | \n",
" 3619.000 | \n",
"
\n",
" \n",
" 115 | \n",
" 2024-01-08/2024-01-14 | \n",
" 47.405 | \n",
" 1734.000 | \n",
"
\n",
" \n",
" 116 | \n",
" 2024-01-15/2024-01-21 | \n",
" 44.667 | \n",
" 3694.000 | \n",
"
\n",
" \n",
" 117 | \n",
" 2024-01-22/2024-01-28 | \n",
" 49.464 | \n",
" 3734.000 | \n",
"
\n",
" \n",
" 118 | \n",
" 2024-01-29/2024-02-04 | \n",
" 44.658 | \n",
" 3847.000 | \n",
"
\n",
" \n",
" 119 | \n",
" 2024-02-05/2024-02-11 | \n",
" 50.553 | \n",
" 4071.000 | \n",
"
\n",
" \n",
" 120 | \n",
" 2024-02-12/2024-02-18 | \n",
" 48.598 | \n",
" 3638.000 | \n",
"
\n",
" \n",
" 121 | \n",
" 2024-02-19/2024-02-25 | \n",
" 50.032 | \n",
" 3100.000 | \n",
"
\n",
" \n",
" 122 | \n",
" 2024-02-26/2024-03-03 | \n",
" 51.717 | \n",
" 4368.000 | \n",
"
\n",
" \n",
" 123 | \n",
" 2024-03-04/2024-03-10 | \n",
" 54.806 | \n",
" 3454.000 | \n",
"
\n",
" \n",
" 124 | \n",
" 2024-03-11/2024-03-17 | \n",
" 55.848 | \n",
" 3044.000 | \n",
"
\n",
" \n",
" 125 | \n",
" 2024-03-18/2024-03-24 | \n",
" 48.639 | \n",
" 2535.000 | \n",
"
\n",
" \n",
" 126 | \n",
" 2024-03-25/2024-03-31 | \n",
" 41.345 | \n",
" 1398.000 | \n",
"
\n",
" \n",
" 127 | \n",
" 2024-04-01/2024-04-07 | \n",
" 59.435 | \n",
" 1097.000 | \n",
"
\n",
" \n",
" 128 | \n",
" 2024-04-08/2024-04-14 | \n",
" 68.281 | \n",
" 413.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"win request_month_year_week win_perc total_request\n",
"99 2023-09-18/2023-09-24 63.077 65.000\n",
"100 2023-09-25/2023-10-01 47.988 323.000\n",
"101 2023-10-02/2023-10-08 55.502 836.000\n",
"102 2023-10-09/2023-10-15 60.369 651.000\n",
"103 2023-10-16/2023-10-22 53.117 401.000\n",
"104 2023-10-23/2023-10-29 55.899 356.000\n",
"105 2023-10-30/2023-11-05 53.785 251.000\n",
"106 2023-11-06/2023-11-12 54.511 1563.000\n",
"107 2023-11-13/2023-11-19 52.120 1863.000\n",
"108 2023-11-20/2023-11-26 55.873 1643.000\n",
"109 2023-11-27/2023-12-03 52.308 2231.000\n",
"110 2023-12-04/2023-12-10 54.359 2627.000\n",
"111 2023-12-11/2023-12-17 55.039 3225.000\n",
"112 2023-12-18/2023-12-24 53.571 2800.000\n",
"113 2023-12-25/2023-12-31 47.342 2558.000\n",
"114 2024-01-01/2024-01-07 45.344 3619.000\n",
"115 2024-01-08/2024-01-14 47.405 1734.000\n",
"116 2024-01-15/2024-01-21 44.667 3694.000\n",
"117 2024-01-22/2024-01-28 49.464 3734.000\n",
"118 2024-01-29/2024-02-04 44.658 3847.000\n",
"119 2024-02-05/2024-02-11 50.553 4071.000\n",
"120 2024-02-12/2024-02-18 48.598 3638.000\n",
"121 2024-02-19/2024-02-25 50.032 3100.000\n",
"122 2024-02-26/2024-03-03 51.717 4368.000\n",
"123 2024-03-04/2024-03-10 54.806 3454.000\n",
"124 2024-03-11/2024-03-17 55.848 3044.000\n",
"125 2024-03-18/2024-03-24 48.639 2535.000\n",
"126 2024-03-25/2024-03-31 41.345 1398.000\n",
"127 2024-04-01/2024-04-07 59.435 1097.000\n",
"128 2024-04-08/2024-04-14 68.281 413.000"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# select only prediction-online-sme and plot request_month_year_week vs win_perc\n",
"prediction_online_sme = wins[wins['tool'] == 'prediction-online-sme']\n",
"prediction_online_sme = prediction_online_sme[['request_month_year_week', 'win_perc', 'total_request']]\n",
"prediction_online_sme = prediction_online_sme.sort_values(by='request_month_year_week')\n",
"\n",
"prediction_online_sme.head()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" win | \n",
" request_month_year_week | \n",
" win_perc | \n",
" total_request | \n",
"
\n",
" \n",
" \n",
" \n",
" 129 | \n",
" 2024-02-26/2024-03-03 | \n",
" 32.384 | \n",
" 281.000 | \n",
"
\n",
" \n",
" 130 | \n",
" 2024-03-04/2024-03-10 | \n",
" 35.172 | \n",
" 4569.000 | \n",
"
\n",
" \n",
" 131 | \n",
" 2024-03-11/2024-03-17 | \n",
" 47.251 | \n",
" 5602.000 | \n",
"
\n",
" \n",
" 132 | \n",
" 2024-03-18/2024-03-24 | \n",
" 45.834 | \n",
" 4885.000 | \n",
"
\n",
" \n",
" 133 | \n",
" 2024-03-25/2024-03-31 | \n",
" 51.273 | \n",
" 4006.000 | \n",
"
\n",
" \n",
" 134 | \n",
" 2024-04-01/2024-04-07 | \n",
" 57.021 | \n",
" 1289.000 | \n",
"
\n",
" \n",
" 135 | \n",
" 2024-04-08/2024-04-14 | \n",
" 70.376 | \n",
" 692.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"win request_month_year_week win_perc total_request\n",
"129 2024-02-26/2024-03-03 32.384 281.000\n",
"130 2024-03-04/2024-03-10 35.172 4569.000\n",
"131 2024-03-11/2024-03-17 47.251 5602.000\n",
"132 2024-03-18/2024-03-24 45.834 4885.000\n",
"133 2024-03-25/2024-03-31 51.273 4006.000\n",
"134 2024-04-01/2024-04-07 57.021 1289.000\n",
"135 2024-04-08/2024-04-14 70.376 692.000"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# select only prediction-request-rag and plot request_month_year_week vs win_perc\n",
"prediction_request_rag = wins[wins['tool'] == 'prediction-request-rag']\n",
"prediction_request_rag = prediction_request_rag[['request_month_year_week', 'win_perc', 'total_request']]\n",
"prediction_request_rag = prediction_request_rag.sort_values(by='request_month_year_week')\n",
"\n",
"prediction_request_rag.head()"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" win | \n",
" request_month_year_week | \n",
" win_perc | \n",
" total_request | \n",
"
\n",
" \n",
" \n",
" \n",
" 138 | \n",
" 2024-03-25/2024-03-31 | \n",
" 56.757 | \n",
" 740.000 | \n",
"
\n",
" \n",
" 139 | \n",
" 2024-04-01/2024-04-07 | \n",
" 58.025 | \n",
" 1458.000 | \n",
"
\n",
" \n",
" 140 | \n",
" 2024-04-08/2024-04-14 | \n",
" 73.679 | \n",
" 1003.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"win request_month_year_week win_perc total_request\n",
"138 2024-03-25/2024-03-31 56.757 740.000\n",
"139 2024-04-01/2024-04-07 58.025 1458.000\n",
"140 2024-04-08/2024-04-14 73.679 1003.000"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prediction_request_reasoning = wins[wins['tool'] == 'prediction-request-reasoning']\n",
"prediction_request_reasoning = prediction_request_reasoning[['request_month_year_week', 'win_perc', 'total_request']]\n",
"prediction_request_reasoning = prediction_request_reasoning.sort_values(by='request_month_year_week')\n",
"\n",
"prediction_request_reasoning"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" win | \n",
" request_month_year_week | \n",
" win_perc | \n",
" total_request | \n",
"
\n",
" \n",
" \n",
" \n",
" 141 | \n",
" 2024-04-01/2024-04-07 | \n",
" 68.387 | \n",
" 155.000 | \n",
"
\n",
" \n",
" 142 | \n",
" 2024-04-08/2024-04-14 | \n",
" 78.514 | \n",
" 619.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"win request_month_year_week win_perc total_request\n",
"141 2024-04-01/2024-04-07 68.387 155.000\n",
"142 2024-04-08/2024-04-14 78.514 619.000"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prediction_request_reasoning_claude = wins[wins['tool'] == 'prediction-request-reasoning-claude']\n",
"prediction_request_reasoning_claude = prediction_request_reasoning_claude[['request_month_year_week', 'win_perc', 'total_request']]\n",
"prediction_request_reasoning_claude = prediction_request_reasoning_claude.sort_values(by='request_month_year_week')\n",
"\n",
"prediction_request_reasoning_claude"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" win | \n",
" request_month_year_week | \n",
" win_perc | \n",
" total_request | \n",
"
\n",
" \n",
" \n",
" \n",
" 136 | \n",
" 2024-04-01/2024-04-07 | \n",
" 68.627 | \n",
" 51.000 | \n",
"
\n",
" \n",
" 137 | \n",
" 2024-04-08/2024-04-14 | \n",
" 74.184 | \n",
" 337.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"win request_month_year_week win_perc total_request\n",
"136 2024-04-01/2024-04-07 68.627 51.000\n",
"137 2024-04-08/2024-04-14 74.184 337.000"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prediction_request_rag_claude = wins[wins['tool'] == 'prediction-request-rag-claude']\n",
"prediction_request_rag_claude = prediction_request_rag_claude[['request_month_year_week', 'win_perc', 'total_request']]\n",
"prediction_request_rag_claude = prediction_request_rag_claude.sort_values(by='request_month_year_week')\n",
"\n",
"prediction_request_rag_claude"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" win | \n",
" request_month_year_week | \n",
" win_perc | \n",
" total_request | \n",
"
\n",
" \n",
" \n",
" \n",
" 143 | \n",
" 2024-04-08/2024-04-14 | \n",
" 72.822 | \n",
" 574.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"win request_month_year_week win_perc total_request\n",
"143 2024-04-08/2024-04-14 72.822 574.000"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prediction_url_cot_claude = wins[wins['tool'] == 'prediction-url-cot-claude']\n",
"prediction_url_cot_claude = prediction_url_cot_claude[['request_month_year_week', 'win_perc', 'total_request']]\n",
"prediction_url_cot_claude = prediction_url_cot_claude.sort_values(by='request_month_year_week')\n",
"\n",
"prediction_url_cot_claude.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3. Profitability analysis"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"all_trades['creation_timestamp'] = pd.to_datetime(all_trades['creation_timestamp'])\n",
"all_trades = all_trades[all_trades['current_answer'].isin([0., 1., -1.])].reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of traders: 184\n",
"Number of trades: 18,941\n"
]
}
],
"source": [
"print(f\"Number of traders: {len(summary_traders):,}\")\n",
"print(f\"Number of trades: {all_trades['trade_id'].nunique():,}\")"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Number of trades')"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"