{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv\n",
    "import requests\n",
    "hash = \"QmR8etyW3TPFadNtNrW54vfnFqmh8vBrMARWV76EmxCZyk\"\n",
    "ipfs_address = \"https://gateway.autonolas.tech/ipfs/\"\n",
    "\n",
    "accuracy_link= ipfs_address + hash\n",
    "response = requests.get(accuracy_link)\n",
    "print(response)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "headers = ['tool', 'tool_accuracy', 'total_requests', 'min', 'max']\n"
     ]
    }
   ],
   "source": [
    "from io import StringIO\n",
    "accuracy_store = {}\n",
    "data = StringIO(response.text)\n",
    "csv_reader = csv.reader(data, delimiter=',')\n",
    "for row in csv_reader:\n",
    "    if row[0] == \"tool\":\n",
    "        print(f\"headers = {row}\")\n",
    "        continue\n",
    "    accuracy_store[row[0]] = [\n",
    "        row[2],\n",
    "        row[1],\n",
    "    ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'claude-prediction-offline': ['481', '57.380457380457386'], 'claude-prediction-online': ['1055', '61.137440758293835'], 'prediction-offline': ['4465', '67.41321388577828'], 'prediction-offline-sme': ['61', '70.49180327868852'], 'prediction-online': ['9490', '66.00632244467862'], 'prediction-online-sme': ['14642', '65.67408823931157'], 'prediction-request-rag': ['2691', '63.58231140839836'], 'prediction-request-rag-claude': ['7428', '65.64351103931072'], 'prediction-request-reasoning': ['17372', '67.11374625834677'], 'prediction-request-reasoning-claude': ['2470', '66.72064777327935'], 'prediction-url-cot-claude': ['1596', '61.904761904761905']}\n"
     ]
    }
   ],
   "source": [
    "print(accuracy_store)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "fpmms = pd.read_parquet('../data/fpmms.parquet')\n",
    "tools = pd.read_parquet('../data/tools.parquet')\n",
    "trades = pd.read_parquet('../data/all_trades_profitability.parquet')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "INC_TOOLS = [\n",
    "    \"prediction-online\",\n",
    "    \"prediction-offline\",\n",
    "    \"claude-prediction-online\",\n",
    "    \"claude-prediction-offline\",\n",
    "    \"prediction-offline-sme\",\n",
    "    \"prediction-online-sme\",\n",
    "    \"prediction-request-rag\",\n",
    "    \"prediction-request-reasoning\",\n",
    "    \"prediction-url-cot-claude\",\n",
    "    \"prediction-request-rag-claude\",\n",
    "    \"prediction-request-reasoning-claude\",\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>win</th>\n",
       "      <th>tool</th>\n",
       "      <th>tool_accuracy</th>\n",
       "      <th>total_requests</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>claude-prediction-offline</td>\n",
       "      <td>66.308244</td>\n",
       "      <td>279</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>claude-prediction-online</td>\n",
       "      <td>58.914027</td>\n",
       "      <td>1105</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>prediction-offline</td>\n",
       "      <td>67.717915</td>\n",
       "      <td>2283</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>prediction-offline-sme</td>\n",
       "      <td>55.555556</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>prediction-online</td>\n",
       "      <td>65.459066</td>\n",
       "      <td>5631</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>prediction-online-sme</td>\n",
       "      <td>67.417656</td>\n",
       "      <td>8167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>prediction-request-rag</td>\n",
       "      <td>64.217072</td>\n",
       "      <td>1769</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>prediction-request-rag-claude</td>\n",
       "      <td>69.554566</td>\n",
       "      <td>4490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>prediction-request-reasoning</td>\n",
       "      <td>68.813594</td>\n",
       "      <td>9828</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>prediction-request-reasoning-claude</td>\n",
       "      <td>68.910256</td>\n",
       "      <td>2184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>prediction-url-cot-claude</td>\n",
       "      <td>64.584980</td>\n",
       "      <td>1265</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "win                                 tool  tool_accuracy  total_requests\n",
       "0              claude-prediction-offline      66.308244             279\n",
       "1               claude-prediction-online      58.914027            1105\n",
       "2                     prediction-offline      67.717915            2283\n",
       "3                 prediction-offline-sme      55.555556              18\n",
       "4                      prediction-online      65.459066            5631\n",
       "5                  prediction-online-sme      67.417656            8167\n",
       "6                 prediction-request-rag      64.217072            1769\n",
       "7          prediction-request-rag-claude      69.554566            4490\n",
       "8           prediction-request-reasoning      68.813594            9828\n",
       "9    prediction-request-reasoning-claude      68.910256            2184\n",
       "10             prediction-url-cot-claude      64.584980            1265"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tools_inc = tools[tools['tool'].isin(INC_TOOLS)]\n",
    "# filtering errors\n",
    "tools_non_error = tools_inc[tools_inc['error'] != 1]\n",
    "tools_non_error.loc[:, 'currentAnswer'] = tools_non_error['currentAnswer'].replace({'no': 'No', 'yes': 'Yes'})\n",
    "tools_non_error = tools_non_error[tools_non_error['currentAnswer'].isin(['Yes', 'No'])]\n",
    "tools_non_error = tools_non_error[tools_non_error['vote'].isin(['Yes', 'No'])]\n",
    "tools_non_error['win'] = (tools_non_error['currentAnswer'] == tools_non_error['vote']).astype(int)\n",
    "tools_non_error.columns = tools_non_error.columns.astype(str)\n",
    "wins = tools_non_error.groupby(['tool', 'win']).size().unstack().fillna(0)\n",
    "wins['tool_accuracy'] = (wins[1] / (wins[0] + wins[1])) * 100\n",
    "wins.reset_index(inplace=True)\n",
    "wins['total_requests'] = wins[0] + wins[1]\n",
    "wins.columns = wins.columns.astype(str)\n",
    "wins = wins[[\"tool\", \"tool_accuracy\", \"total_requests\"]]\n",
    "wins"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>min</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tool</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>claude-prediction-offline</th>\n",
       "      <td>2024-04-23 13:09:30</td>\n",
       "      <td>2024-06-10 00:31:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>claude-prediction-online</th>\n",
       "      <td>2024-04-12 12:24:20</td>\n",
       "      <td>2024-06-09 21:41:20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-offline</th>\n",
       "      <td>2024-04-12 12:20:10</td>\n",
       "      <td>2024-06-08 23:45:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-offline-sme</th>\n",
       "      <td>2024-04-16 07:58:45</td>\n",
       "      <td>2024-04-29 20:45:15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-online</th>\n",
       "      <td>2024-04-16 05:52:40</td>\n",
       "      <td>2024-06-09 21:47:20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-online-sme</th>\n",
       "      <td>2024-04-12 11:51:30</td>\n",
       "      <td>2024-06-10 00:06:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-request-rag</th>\n",
       "      <td>2024-04-12 11:39:40</td>\n",
       "      <td>2024-06-09 21:17:45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-request-rag-claude</th>\n",
       "      <td>2024-04-12 11:14:30</td>\n",
       "      <td>2024-06-07 11:42:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-request-reasoning</th>\n",
       "      <td>2024-04-12 11:57:05</td>\n",
       "      <td>2024-06-09 21:50:45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-request-reasoning-claude</th>\n",
       "      <td>2024-04-12 11:53:55</td>\n",
       "      <td>2024-06-05 05:00:10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-url-cot-claude</th>\n",
       "      <td>2024-04-12 11:37:15</td>\n",
       "      <td>2024-06-05 05:21:10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                     min                  max\n",
       "tool                                                                         \n",
       "claude-prediction-offline            2024-04-23 13:09:30  2024-06-10 00:31:30\n",
       "claude-prediction-online             2024-04-12 12:24:20  2024-06-09 21:41:20\n",
       "prediction-offline                   2024-04-12 12:20:10  2024-06-08 23:45:00\n",
       "prediction-offline-sme               2024-04-16 07:58:45  2024-04-29 20:45:15\n",
       "prediction-online                    2024-04-16 05:52:40  2024-06-09 21:47:20\n",
       "prediction-online-sme                2024-04-12 11:51:30  2024-06-10 00:06:00\n",
       "prediction-request-rag               2024-04-12 11:39:40  2024-06-09 21:17:45\n",
       "prediction-request-rag-claude        2024-04-12 11:14:30  2024-06-07 11:42:30\n",
       "prediction-request-reasoning         2024-04-12 11:57:05  2024-06-09 21:50:45\n",
       "prediction-request-reasoning-claude  2024-04-12 11:53:55  2024-06-05 05:00:10\n",
       "prediction-url-cot-claude            2024-04-12 11:37:15  2024-06-05 05:21:10"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tools_inc = tools[tools['tool'].isin(INC_TOOLS)]\n",
    "# filtering errors\n",
    "tools_non_error = tools_inc[tools_inc['error'] != 1]\n",
    "tools_non_error.loc[:, 'currentAnswer'] = tools_non_error['currentAnswer'].replace({'no': 'No', 'yes': 'Yes'})\n",
    "tools_non_error = tools_non_error[tools_non_error['currentAnswer'].isin(['Yes', 'No'])]\n",
    "tools_non_error = tools_non_error[tools_non_error['vote'].isin(['Yes', 'No'])]\n",
    "tools_non_error['win'] = (tools_non_error['currentAnswer'] == tools_non_error['vote']).astype(int)\n",
    "tools_non_error.columns = tools_non_error.columns.astype(str)\n",
    "timeline = tools_non_error.groupby(['tool'])[\"request_time\"].agg([\"min\",\"max\"])\n",
    "timeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tool</th>\n",
       "      <th>tool_accuracy</th>\n",
       "      <th>total_requests</th>\n",
       "      <th>min</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>claude-prediction-offline</td>\n",
       "      <td>66.308244</td>\n",
       "      <td>279</td>\n",
       "      <td>2024-04-23 13:09:30</td>\n",
       "      <td>2024-06-10 00:31:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>claude-prediction-online</td>\n",
       "      <td>58.914027</td>\n",
       "      <td>1105</td>\n",
       "      <td>2024-04-12 12:24:20</td>\n",
       "      <td>2024-06-09 21:41:20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>prediction-offline</td>\n",
       "      <td>67.717915</td>\n",
       "      <td>2283</td>\n",
       "      <td>2024-04-12 12:20:10</td>\n",
       "      <td>2024-06-08 23:45:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>prediction-offline-sme</td>\n",
       "      <td>55.555556</td>\n",
       "      <td>18</td>\n",
       "      <td>2024-04-16 07:58:45</td>\n",
       "      <td>2024-04-29 20:45:15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>prediction-online</td>\n",
       "      <td>65.459066</td>\n",
       "      <td>5631</td>\n",
       "      <td>2024-04-16 05:52:40</td>\n",
       "      <td>2024-06-09 21:47:20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>prediction-online-sme</td>\n",
       "      <td>67.417656</td>\n",
       "      <td>8167</td>\n",
       "      <td>2024-04-12 11:51:30</td>\n",
       "      <td>2024-06-10 00:06:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>prediction-request-rag</td>\n",
       "      <td>64.217072</td>\n",
       "      <td>1769</td>\n",
       "      <td>2024-04-12 11:39:40</td>\n",
       "      <td>2024-06-09 21:17:45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>prediction-request-rag-claude</td>\n",
       "      <td>69.554566</td>\n",
       "      <td>4490</td>\n",
       "      <td>2024-04-12 11:14:30</td>\n",
       "      <td>2024-06-07 11:42:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>prediction-request-reasoning</td>\n",
       "      <td>68.813594</td>\n",
       "      <td>9828</td>\n",
       "      <td>2024-04-12 11:57:05</td>\n",
       "      <td>2024-06-09 21:50:45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>prediction-request-reasoning-claude</td>\n",
       "      <td>68.910256</td>\n",
       "      <td>2184</td>\n",
       "      <td>2024-04-12 11:53:55</td>\n",
       "      <td>2024-06-05 05:00:10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>prediction-url-cot-claude</td>\n",
       "      <td>64.584980</td>\n",
       "      <td>1265</td>\n",
       "      <td>2024-04-12 11:37:15</td>\n",
       "      <td>2024-06-05 05:21:10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   tool  tool_accuracy  total_requests  \\\n",
       "0             claude-prediction-offline      66.308244             279   \n",
       "1              claude-prediction-online      58.914027            1105   \n",
       "2                    prediction-offline      67.717915            2283   \n",
       "3                prediction-offline-sme      55.555556              18   \n",
       "4                     prediction-online      65.459066            5631   \n",
       "5                 prediction-online-sme      67.417656            8167   \n",
       "6                prediction-request-rag      64.217072            1769   \n",
       "7         prediction-request-rag-claude      69.554566            4490   \n",
       "8          prediction-request-reasoning      68.813594            9828   \n",
       "9   prediction-request-reasoning-claude      68.910256            2184   \n",
       "10            prediction-url-cot-claude      64.584980            1265   \n",
       "\n",
       "                    min                  max  \n",
       "0   2024-04-23 13:09:30  2024-06-10 00:31:30  \n",
       "1   2024-04-12 12:24:20  2024-06-09 21:41:20  \n",
       "2   2024-04-12 12:20:10  2024-06-08 23:45:00  \n",
       "3   2024-04-16 07:58:45  2024-04-29 20:45:15  \n",
       "4   2024-04-16 05:52:40  2024-06-09 21:47:20  \n",
       "5   2024-04-12 11:51:30  2024-06-10 00:06:00  \n",
       "6   2024-04-12 11:39:40  2024-06-09 21:17:45  \n",
       "7   2024-04-12 11:14:30  2024-06-07 11:42:30  \n",
       "8   2024-04-12 11:57:05  2024-06-09 21:50:45  \n",
       "9   2024-04-12 11:53:55  2024-06-05 05:00:10  \n",
       "10  2024-04-12 11:37:15  2024-06-05 05:21:10  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "total = wins.merge(timeline,how=\"left\", on=\"tool\")\n",
    "total"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "total.to_csv(\"accuracy_info.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def extract_question(text):\n",
    "    pattern = r'\"([^\"]+\\?)\"'\n",
    "    match = re.search(pattern, text)\n",
    "    if match:\n",
    "        return match.group(1)\n",
    "    return text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_current_answer(q):\n",
    "    return trades[trades['title'] == q]['current_answer'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# only select trades in May 2024\n",
    "trades['creation_timestamp'] = pd.to_datetime(trades['creation_timestamp'])\n",
    "trades = trades[trades['creation_timestamp'].dt.month == 5]\n",
    "trades = trades[trades['creation_timestamp'].dt.year == 2024]\n",
    "\n",
    "# make a column for winning_vote\n",
    "tools['winning_vote'] = (tools['vote'] == tools['currentAnswer'])\n",
    "tools = tools[tools['tool']!= 'resolve-market-reasoning-gpt-4'].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "tools['prompt_request'] = tools['prompt_request'].apply(extract_question)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "trades_grouped = trades.groupby(['title', 'winning_trade']).size().unstack().fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "winning_trades_percentage = trades_grouped[True] / trades_grouped.sum(axis=1)\n",
    "winning_trades_percentage = winning_trades_percentage.reset_index()\n",
    "winning_trades_percentage.columns = ['title', 'winning_trade_percentage']\n",
    "winning_trades_percentage['num_trades'] = list(trades_grouped.sum(axis=1).values)\n",
    "winning_trades_percentage_bottom_50 = winning_trades_percentage.sort_values(by='winning_trade_percentage', ascending=False)[-50:].reset_index(drop=True)\n",
    "winning_trades_percentage_top_50 = winning_trades_percentage.sort_values(by='winning_trade_percentage', ascending=False)[:50].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# winning_trades_percentage.sort_values(by='winning_trade_percentage', ascending=False).reset_index(drop=True).to_csv('winning_trades_percentage.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Will Kylian Mbappe leave Paris St-Germain at the end of the season by 16 May 2024?',\n",
       " 'Will BlizzCon be reinstated on or by 1 May 2024 after its cancellation in 2024?',\n",
       " 'Will Joe Biden approve more weapons for Ukraine by 4 May 2024?',\n",
       " \"Will FiiO's new custom in-ear monitors become the top-selling wireless earbuds by 9 May 2024?\",\n",
       " 'Will Mohamed Salah leave Liverpool on 7 May 2024?',\n",
       " \"Will Ryan Gosling accept a 'dark' role in a film by 14 May 2024?\",\n",
       " 'Will the Philadelphia 76ers win the NBA play-offs on 7 May 2024?',\n",
       " 'Will the Panamanian presidential election result in a clear victor by 12 May 2024?',\n",
       " 'Will the Museum of Old and New Art in Tasmania be allowed to keep its exhibit women-only by 14 May 2024?',\n",
       " \"Will Diego Maradona's 'Stolen' Golden Ball be auctioned off on 14 May 2024?\",\n",
       " 'Will the Mercedes G-Wagen release an electric version on 1 May 2024?',\n",
       " 'Will the Israeli government lift the broadcast ban on Al Jazeera on or before 13 May 2024?',\n",
       " 'Will Intel release its Core Ultra 200 Arrow Lake CPUs by 16 May 2024?',\n",
       " 'Will the Atlanta City Council pay $3.8 million to settle a lawsuit by the family of a church deacon who died in a struggle with a city police officer by 13 May 2024?',\n",
       " 'Will Voyager-1 continue to send readable data until 1 May 2024?',\n",
       " 'Will the Amber Alert issued in New Mexico result in the discovery of the missing 10-month-old baby by 13 May 2024?',\n",
       " \"Will Florida's ban on lab-grown meat be overturned by 12 May 2024?\",\n",
       " \"Will the US government successfully distribute the $138.7 million payout to Larry Nassar's victims by 1 May 2024?\",\n",
       " 'Will a new sport be officially added to the Olympics programme on 16 May 2024?',\n",
       " \"Will Kristi Noem be announced as Donald Trump's vice presidential running mate by 6 May 2024?\",\n",
       " 'Will the United Auto Workers union strike against Daimler Truck on or by 7 May 2024?',\n",
       " 'Will the World Snooker Championship 2024 conclude with Judd Trump or Tom Ford as the winner by May 5, 2024?',\n",
       " \"Will Maria Georgas be announced as the next 'Bachelorette' lead on 9 May 2024?\",\n",
       " 'Will Apple release new iPads at their event on May 7, 2024?',\n",
       " 'Will Joe Biden still be the President of the United States on 11 May 2024?',\n",
       " \"Will the world's biggest 3D printer be used to make parts of houses by 2 May 2024?\",\n",
       " \"Will Anthony Edwards be named NBA's MVP on 11 May 2024?\",\n",
       " 'Will a winner be declared in the Eurovision 2024 grand final by 19 May 2024?',\n",
       " \"Will a new mission be launched to explore the moon's 'hidden side' by 12 May 2024?\",\n",
       " 'Will Mike Tyson win his bout against Jake Paul on 7 May 2024?',\n",
       " 'Will the bird flu outbreak be declared a global pandemic by 12 May 2024?',\n",
       " 'Will the new Apple Pencil Pro be revealed by 15 May 2024?',\n",
       " \"Will the amateur angler who landed UK's 'biggest fish' in Essex catch another record-breaking fish by 7 May 2024?\",\n",
       " \"Will Saul 'Canelo' Alvarez successfully defend his WBA, WBC, WBO, and IBF titles again by 13 May 2024?\",\n",
       " \"Will Taylor Swift's 'The Tortured Poets Department' album reach number 1 on Billboard 200 on 3 May 2024?\",\n",
       " 'Will Joe Biden attend the White House Correspondents Dinner on 5 May 2024?',\n",
       " 'Will King Charles perform public duties on 5 May 2024, after his progress in cancer treatment?',\n",
       " \"Will LinkedIn's new puzzle games Pinpoint, Queens, and Crossclimb be successful on their platform by 9 May 2024?\",\n",
       " 'Will South Dakota Governor Kristi Noem resign over the puppy killing controversy by 15 May 2024?',\n",
       " 'Will Apple announce the release of a new M4 chip by 13 May 2024?',\n",
       " 'Will Eric Adams still be the mayor of New York City on 10 May 2024?',\n",
       " \"Will the livestream video 'portals' connecting New York City and Dublin still be operational on 19 May 2024?\",\n",
       " 'Will there be more pro-Palestinian protests on US university campuses on 6 May 2024?',\n",
       " 'Will Google Pixel 8a be released at Google I/O 2024 on 14 May?',\n",
       " 'Will Apple announce more than just a spec bump at the May 2024 iPad event?',\n",
       " \"Will Apple's new Magic Keyboard for the iPad Pro M4 be released by 15 May 2024?\",\n",
       " 'Will the UEFA Champions League final be between PSG and Borussia Dortmund on 13 May 2024?',\n",
       " 'Will the FBI report an increase in scams targeting Americans older than 60 in 2024?',\n",
       " 'Will Erik ten Hag remain as Manchester United manager on 17 May 2024?',\n",
       " 'Will Jofra Archer be a part of the England squad for T20 World Cup in June 2024?']"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "winning_trades_percentage_top_50['title'].tolist()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[\"Will 'Scavengers Reign' be renewed for a second season on Netflix by 19 May 2024?\",\n",
       " 'Will Fiona Harvey officially file a lawsuit against Netflix and Richard Gadd by 17 May 2024?',\n",
       " 'Will the final report on the Baltimore bridge collapse be released by 20 May 2024?',\n",
       " 'Will the Autonomous Racing League successfully hold their second race by May 3, 2024?',\n",
       " 'Will Trent Staggs win the Senatorial race to replace Sen. Mitt Romney (R-UT) on 5 May 2024?',\n",
       " 'Will the Houston area experience flooding conditions on 11 May 2024?',\n",
       " \"Will 'Wednesday' season 2 be released on Netflix by 1 May 2024?\",\n",
       " 'Will Arsenal win against Bournemouth in the Premier League match on 12 May 2024?',\n",
       " 'Will Qualcomm release its Snapdragon X Plus laptop chip by 1 May 2024?',\n",
       " \"Will Feyenoord's Arne Slot become the new manager of Liverpool by 1 May 2024?\",\n",
       " 'Will the FCC receive additional funding for replacing Huawei gear by 10 May 2024?',\n",
       " 'Will there be any major cyber attack on an organization using AI before 2 May 2024?',\n",
       " 'Will Sony complete the takeover of Paramount by 11 May 2024?',\n",
       " \"Will 'Hell's Kitchen' win the Tony Awards for Best Musical on 7 May 2024?\",\n",
       " 'Will Tesla announce reinstating any laid off supercharger workers by 11 May 2024?',\n",
       " 'Will there be another tornado in Nebraska and Iowa on 6 May 2024?',\n",
       " 'Will the DJI drones be officially banned in the United States by 4 May 2024?',\n",
       " 'Will OpenAI debut a multimodal AI digital assistant by 19 May 2024?',\n",
       " 'Will TikTok be purchased by a Wall Street or Tech billionaire by 2 May 2024?',\n",
       " \"Will the 'Lost' Gustav Klimt painting be sold at the auction in Vienna on 3 May 2024?\",\n",
       " \"Will the Federal Communications Commission levy fines against AT&T, Sprint, T-Mobile, and Verizon for illegally sharing customers' location data by 9 May 2024?\",\n",
       " 'Will the Manchester City win the WSL title on 14 May 2024?',\n",
       " 'Will Meta start making profit from generative AI by 3 May 2024?',\n",
       " 'Will Apple launch an AI-powered iOS 18 on or by 1 May 2024?',\n",
       " 'Will iOS 18 receive a major AI overhaul by 6 May 2024?',\n",
       " 'Will Ippei Mizuhara be sentenced for bank fraud by 15 May 2024?',\n",
       " 'Will Tesla lay off nearly 2,700 workers at its Austin, Texas factory by 1 May 2024?',\n",
       " 'Will Manchester City win the Premier League title on 11 May 2024?',\n",
       " 'Will there be another deadly pandemic by 8 May 2024?',\n",
       " 'Will China successfully collect samples from the far side of the Moon on 10 May 2024?',\n",
       " \"Will the American Airlines correct their system's error of mistaking 101-year-old passenger for a baby by 7 May 2024?\",\n",
       " 'Will the Boeing Starliner capsule successfully complete its first astronaut-crewed flight to the International Space Station by 13 May 2024?',\n",
       " \"Will the Technics' special-edition turntable in collaboration with Lamborghini be released by 17 May 2024?\",\n",
       " 'Will the Florida Panthers win against the Boston Bruins in the Game 3 on 17 May 2024?',\n",
       " 'Will Harvard Yard be free from Anti-Israel protests by 2 May 2024?',\n",
       " \"Will Samsung's latest jibe have any impact on Apple's sales by 11 May 2024?\",\n",
       " \"Will the Miss USA organization respond to the call for 'full transparency' from contestants by 16 May 2024?\",\n",
       " 'Will Tom Daley win a medal at the Paris Olympics 2024 by 14 May 2024?',\n",
       " \"Will Liverpool win any more trophies in Jurgen Klopp's final season?\",\n",
       " 'Will Liverpool win any more trophies by 2 May 2024?',\n",
       " 'Will Caitlin Clark score more than 20 points in her next NBA game by 10 May 2024?',\n",
       " 'Will the statues of civil rights leader Daisy Bates and singer Johnny Cash replace the Arkansas statues at the U.S Capitol by 14 May 2024?',\n",
       " \"Will the season 6 of Netflix's Cobra Kai be released in 3 parts by 12 May 2024?\",\n",
       " \"Will the 'Don't Say Gay' education restrictions bill be implemented in Alabama on or before 1 May 2024?\",\n",
       " \"Will the 'lost' Gustav Klimt painting be successfully auctioned by 3 May 2024?\",\n",
       " 'Will the Kansas City Chiefs win their next game on or before May 15, 2024?',\n",
       " 'Will Lando Norris win another F1 race by 15 May 2024?',\n",
       " 'Will Pennsylvania be a red state by 6 May 2024?',\n",
       " 'Will Tesla face significant financial troubles by 11 May 2024?',\n",
       " 'Will the BattlerGC Pro be released for the GameCube on or by 3 May 2024?']"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "winning_trades_percentage_bottom_50['title'].tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "def losing_percentage(q):\n",
    "    print(f\"Losing percentage for: {q}\")\n",
    "    q_losing = tools[tools['prompt_request'].str.contains(q)].groupby(['tool', 'winning_vote']).size().unstack().fillna(0)\n",
    "    q_losing_perc = q_losing[False] / (q_losing[False] + q_losing[True])\n",
    "    q_losing_perc = q_losing_perc.reset_index()\n",
    "    q_losing_perc.columns = ['tool', 'losing_percentage']\n",
    "    q_losing_perc['num_calls'] = list(q_losing.sum(axis=1).values)\n",
    "    q_losing_perc = q_losing_perc.sort_values(by='losing_percentage', ascending=False)\n",
    "    return q_losing_perc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Losing percentage for: Will 'Scavengers Reign' be renewed for a second season on Netflix by 19 May 2024?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tool</th>\n",
       "      <th>losing_percentage</th>\n",
       "      <th>num_calls</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>prediction-offline</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>prediction-request-rag-claude</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>prediction-url-cot-claude</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>prediction-online-sme</td>\n",
       "      <td>0.656716</td>\n",
       "      <td>67.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>prediction-request-reasoning-claude</td>\n",
       "      <td>0.571429</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>prediction-request-reasoning</td>\n",
       "      <td>0.538462</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>prediction-request-rag</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>prediction-online</td>\n",
       "      <td>0.185185</td>\n",
       "      <td>27.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  tool  losing_percentage  num_calls\n",
       "0                   prediction-offline           1.000000       40.0\n",
       "4        prediction-request-rag-claude           1.000000       17.0\n",
       "7            prediction-url-cot-claude           1.000000        2.0\n",
       "2                prediction-online-sme           0.656716       67.0\n",
       "6  prediction-request-reasoning-claude           0.571429        7.0\n",
       "5         prediction-request-reasoning           0.538462       52.0\n",
       "3               prediction-request-rag           0.250000        4.0\n",
       "1                    prediction-online           0.185185       27.0"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# have confirmed market resolution was correct\n",
    "losing_percentage(winning_trades_percentage_bottom_50.loc[0, 'title'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Losing percentage for: Will 'Scavengers Reign' be renewed for a second season on Netflix by 19 May 2024?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tool</th>\n",
       "      <th>losing_percentage</th>\n",
       "      <th>num_calls</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>prediction-offline</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>prediction-request-rag-claude</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>prediction-url-cot-claude</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>prediction-online-sme</td>\n",
       "      <td>0.656716</td>\n",
       "      <td>67.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>prediction-request-reasoning-claude</td>\n",
       "      <td>0.571429</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>prediction-request-reasoning</td>\n",
       "      <td>0.538462</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>prediction-request-rag</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>prediction-online</td>\n",
       "      <td>0.185185</td>\n",
       "      <td>27.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  tool  losing_percentage  num_calls\n",
       "0                   prediction-offline           1.000000       40.0\n",
       "4        prediction-request-rag-claude           1.000000       17.0\n",
       "7            prediction-url-cot-claude           1.000000        2.0\n",
       "2                prediction-online-sme           0.656716       67.0\n",
       "6  prediction-request-reasoning-claude           0.571429        7.0\n",
       "5         prediction-request-reasoning           0.538462       52.0\n",
       "3               prediction-request-rag           0.250000        4.0\n",
       "1                    prediction-online           0.185185       27.0"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# have confirmed currentAnswer\n",
    "losing_percentage(winning_trades_percentage_bottom_50.loc[0, 'title'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Losing percentage for: Will Fiona Harvey officially file a lawsuit against Netflix and Richard Gadd by 17 May 2024?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tool</th>\n",
       "      <th>losing_percentage</th>\n",
       "      <th>num_calls</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>prediction-url-cot-claude</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>prediction-online-sme</td>\n",
       "      <td>0.977273</td>\n",
       "      <td>44.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>prediction-online</td>\n",
       "      <td>0.975000</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>prediction-offline</td>\n",
       "      <td>0.677419</td>\n",
       "      <td>31.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>prediction-request-reasoning</td>\n",
       "      <td>0.534483</td>\n",
       "      <td>58.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>prediction-request-rag-claude</td>\n",
       "      <td>0.223881</td>\n",
       "      <td>67.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>prediction-request-reasoning-claude</td>\n",
       "      <td>0.200000</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>prediction-request-rag</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  tool  losing_percentage  num_calls\n",
       "7            prediction-url-cot-claude           1.000000        1.0\n",
       "2                prediction-online-sme           0.977273       44.0\n",
       "1                    prediction-online           0.975000       40.0\n",
       "0                   prediction-offline           0.677419       31.0\n",
       "5         prediction-request-reasoning           0.534483       58.0\n",
       "4        prediction-request-rag-claude           0.223881       67.0\n",
       "6  prediction-request-reasoning-claude           0.200000        5.0\n",
       "3               prediction-request-rag           0.000000        8.0"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# have confirmed currentAnswer\n",
    "losing_percentage(winning_trades_percentage_bottom_50.loc[1, 'title'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Losing percentage for: Will the final report on the Baltimore bridge collapse be released by 20 May 2024?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tool</th>\n",
       "      <th>losing_percentage</th>\n",
       "      <th>num_calls</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>claude-prediction-offline</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>claude-prediction-online</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>prediction-offline</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>87.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>prediction-request-rag-claude</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>prediction-url-cot-claude</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>prediction-online</td>\n",
       "      <td>0.951220</td>\n",
       "      <td>41.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>prediction-request-reasoning-claude</td>\n",
       "      <td>0.833333</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>prediction-request-rag</td>\n",
       "      <td>0.714286</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>prediction-request-reasoning</td>\n",
       "      <td>0.437500</td>\n",
       "      <td>48.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>prediction-online-sme</td>\n",
       "      <td>0.394366</td>\n",
       "      <td>71.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  tool  losing_percentage  num_calls\n",
       "0            claude-prediction-offline           1.000000        5.0\n",
       "1             claude-prediction-online           1.000000        1.0\n",
       "2                   prediction-offline           1.000000       87.0\n",
       "6        prediction-request-rag-claude           1.000000       25.0\n",
       "9            prediction-url-cot-claude           1.000000        1.0\n",
       "3                    prediction-online           0.951220       41.0\n",
       "8  prediction-request-reasoning-claude           0.833333        6.0\n",
       "5               prediction-request-rag           0.714286        7.0\n",
       "7         prediction-request-reasoning           0.437500       48.0\n",
       "4                prediction-online-sme           0.394366       71.0"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# have confirmed currentAnswer\n",
    "losing_percentage(winning_trades_percentage_bottom_50.loc[2, 'title'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Losing percentage for: Will the Autonomous Racing League successfully hold their second race by May 3, 2024?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tool</th>\n",
       "      <th>losing_percentage</th>\n",
       "      <th>num_calls</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>claude-prediction-offline</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>prediction-offline</td>\n",
       "      <td>1.0</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>prediction-online</td>\n",
       "      <td>1.0</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>prediction-online-sme</td>\n",
       "      <td>1.0</td>\n",
       "      <td>18.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>prediction-request-rag</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>prediction-request-rag-claude</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>prediction-url-cot-claude</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>prediction-request-reasoning</td>\n",
       "      <td>0.0</td>\n",
       "      <td>18.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>prediction-request-reasoning-claude</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  tool  losing_percentage  num_calls\n",
       "0            claude-prediction-offline                1.0        2.0\n",
       "1                   prediction-offline                1.0       23.0\n",
       "2                    prediction-online                1.0       14.0\n",
       "3                prediction-online-sme                1.0       18.0\n",
       "4               prediction-request-rag                1.0        5.0\n",
       "5        prediction-request-rag-claude                1.0        8.0\n",
       "8            prediction-url-cot-claude                1.0        6.0\n",
       "6         prediction-request-reasoning                0.0       18.0\n",
       "7  prediction-request-reasoning-claude                0.0        3.0"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# have confirmed currentAnswer\n",
    "losing_percentage(winning_trades_percentage_bottom_50.loc[3, 'title'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Losing percentage for: Will the Houston area experience flooding conditions on 11 May 2024?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tool</th>\n",
       "      <th>losing_percentage</th>\n",
       "      <th>num_calls</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>claude-prediction-offline</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>claude-prediction-online</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>prediction-offline</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>58.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>prediction-online-sme</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>39.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>prediction-request-rag</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>prediction-request-reasoning-claude</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>prediction-url-cot-claude</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>prediction-request-rag-claude</td>\n",
       "      <td>0.754717</td>\n",
       "      <td>53.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>prediction-request-reasoning</td>\n",
       "      <td>0.369048</td>\n",
       "      <td>84.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>prediction-online</td>\n",
       "      <td>0.166667</td>\n",
       "      <td>72.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  tool  losing_percentage  num_calls\n",
       "0            claude-prediction-offline           1.000000        2.0\n",
       "1             claude-prediction-online           1.000000        6.0\n",
       "2                   prediction-offline           1.000000       58.0\n",
       "4                prediction-online-sme           1.000000       39.0\n",
       "5               prediction-request-rag           1.000000        4.0\n",
       "8  prediction-request-reasoning-claude           1.000000        8.0\n",
       "9            prediction-url-cot-claude           1.000000        5.0\n",
       "6        prediction-request-rag-claude           0.754717       53.0\n",
       "7         prediction-request-reasoning           0.369048       84.0\n",
       "3                    prediction-online           0.166667       72.0"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "losing_percentage(winning_trades_percentage_bottom_50.loc[5, 'title'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Losing percentage for: Will 'Wednesday' season 2 be released on Netflix by 1 May 2024?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tool</th>\n",
       "      <th>losing_percentage</th>\n",
       "      <th>num_calls</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>prediction-online-sme</td>\n",
       "      <td>0.750000</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>prediction-request-reasoning-claude</td>\n",
       "      <td>0.750000</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>prediction-request-rag</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>prediction-request-rag-claude</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>prediction-request-reasoning</td>\n",
       "      <td>0.400000</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>claude-prediction-online</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  tool  losing_percentage  num_calls\n",
       "1                prediction-online-sme           0.750000        4.0\n",
       "5  prediction-request-reasoning-claude           0.750000        4.0\n",
       "2               prediction-request-rag           0.666667        6.0\n",
       "3        prediction-request-rag-claude           0.500000        2.0\n",
       "4         prediction-request-reasoning           0.400000        5.0\n",
       "0             claude-prediction-online           0.000000        1.0"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "losing_percentage(winning_trades_percentage_bottom_50.loc[6, 'title'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Losing percentage for: Will Arsenal win against Bournemouth in the Premier League match on 12 May 2024?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tool</th>\n",
       "      <th>losing_percentage</th>\n",
       "      <th>num_calls</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>prediction-offline</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>11.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>prediction-online</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>prediction-online-sme</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>30.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>prediction-request-rag-claude</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>45.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>prediction-request-reasoning</td>\n",
       "      <td>0.874016</td>\n",
       "      <td>127.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>prediction-request-rag</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>prediction-request-reasoning-claude</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  tool  losing_percentage  num_calls\n",
       "0                   prediction-offline           1.000000       11.0\n",
       "1                    prediction-online           1.000000       17.0\n",
       "2                prediction-online-sme           1.000000       30.0\n",
       "4        prediction-request-rag-claude           1.000000       45.0\n",
       "5         prediction-request-reasoning           0.874016      127.0\n",
       "3               prediction-request-rag           0.250000        4.0\n",
       "6  prediction-request-reasoning-claude           0.000000        2.0"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "losing_percentage(winning_trades_percentage_bottom_50.loc[7, 'title'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Losing percentage for: Will Qualcomm release its Snapdragon X Plus laptop chip by 1 May 2024?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tool</th>\n",
       "      <th>losing_percentage</th>\n",
       "      <th>num_calls</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>claude-prediction-offline</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>prediction-offline</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>prediction-online-sme</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>prediction-request-rag-claude</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>prediction-request-rag</td>\n",
       "      <td>0.941176</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>prediction-online</td>\n",
       "      <td>0.800000</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>prediction-request-reasoning-claude</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>prediction-request-reasoning</td>\n",
       "      <td>0.652174</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>prediction-url-cot-claude</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  tool  losing_percentage  num_calls\n",
       "0            claude-prediction-offline           1.000000        7.0\n",
       "1                   prediction-offline           1.000000        1.0\n",
       "3                prediction-online-sme           1.000000       19.0\n",
       "5        prediction-request-rag-claude           1.000000       15.0\n",
       "4               prediction-request-rag           0.941176       17.0\n",
       "2                    prediction-online           0.800000        5.0\n",
       "7  prediction-request-reasoning-claude           0.666667       15.0\n",
       "6         prediction-request-reasoning           0.652174       23.0\n",
       "8            prediction-url-cot-claude           0.333333        3.0"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "losing_percentage(winning_trades_percentage_bottom_50.loc[8, 'title'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Losing percentage for: Will Feyenoord's Arne Slot become the new manager of Liverpool by 1 May 2024?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tool</th>\n",
       "      <th>losing_percentage</th>\n",
       "      <th>num_calls</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>claude-prediction-offline</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>prediction-offline</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>prediction-url-cot-claude</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>prediction-request-reasoning</td>\n",
       "      <td>0.916667</td>\n",
       "      <td>12.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>prediction-request-reasoning-claude</td>\n",
       "      <td>0.900000</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>prediction-request-rag</td>\n",
       "      <td>0.714286</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>prediction-online-sme</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>prediction-online</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>prediction-request-rag-claude</td>\n",
       "      <td>0.454545</td>\n",
       "      <td>11.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  tool  losing_percentage  num_calls\n",
       "0            claude-prediction-offline           1.000000        4.0\n",
       "1                   prediction-offline           1.000000        2.0\n",
       "8            prediction-url-cot-claude           1.000000        2.0\n",
       "6         prediction-request-reasoning           0.916667       12.0\n",
       "7  prediction-request-reasoning-claude           0.900000       10.0\n",
       "4               prediction-request-rag           0.714286       14.0\n",
       "3                prediction-online-sme           0.666667        9.0\n",
       "2                    prediction-online           0.500000        2.0\n",
       "5        prediction-request-rag-claude           0.454545       11.0"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "losing_percentage(winning_trades_percentage_bottom_50.loc[9, 'title'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Losing percentage for: Will the FCC receive additional funding for replacing Huawei gear by 10 May 2024?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tool</th>\n",
       "      <th>losing_percentage</th>\n",
       "      <th>num_calls</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>claude-prediction-offline</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>claude-prediction-online</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>prediction-offline</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>36.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>prediction-request-rag-claude</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>50.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>prediction-online-sme</td>\n",
       "      <td>0.986486</td>\n",
       "      <td>74.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>prediction-request-rag</td>\n",
       "      <td>0.947368</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>prediction-online</td>\n",
       "      <td>0.910714</td>\n",
       "      <td>56.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>prediction-url-cot-claude</td>\n",
       "      <td>0.777778</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>prediction-request-reasoning</td>\n",
       "      <td>0.465753</td>\n",
       "      <td>73.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>prediction-request-reasoning-claude</td>\n",
       "      <td>0.071429</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  tool  losing_percentage  num_calls\n",
       "0            claude-prediction-offline           1.000000        6.0\n",
       "1             claude-prediction-online           1.000000        3.0\n",
       "2                   prediction-offline           1.000000       36.0\n",
       "6        prediction-request-rag-claude           1.000000       50.0\n",
       "4                prediction-online-sme           0.986486       74.0\n",
       "5               prediction-request-rag           0.947368       19.0\n",
       "3                    prediction-online           0.910714       56.0\n",
       "9            prediction-url-cot-claude           0.777778        9.0\n",
       "7         prediction-request-reasoning           0.465753       73.0\n",
       "8  prediction-request-reasoning-claude           0.071429       14.0"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "losing_percentage(winning_trades_percentage_bottom_50.loc[10, 'title'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_q = winning_trades_percentage_bottom_50['title'].unique().tolist()\n",
    "q_losing = tools[tools['prompt_request'].isin(all_q)]\n",
    "q_losing = q_losing.groupby(['tool'])['winning_vote'].value_counts().unstack().fillna(0)\n",
    "q_losing_perc = q_losing[False] / (q_losing[False] + q_losing[True])\n",
    "q_losing_perc = q_losing_perc.reset_index()\n",
    "q_losing_perc.columns = ['tool', 'losing_percentage']\n",
    "q_losing_perc['num_calls'] = list(q_losing.sum(axis=1).values)\n",
    "q_losing_perc = q_losing_perc.sort_values(by='losing_percentage', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tool</th>\n",
       "      <th>losing_percentage</th>\n",
       "      <th>num_calls</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>prediction-offline-sme</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>prediction-request-rag-claude</td>\n",
       "      <td>0.913007</td>\n",
       "      <td>1184.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>prediction-offline</td>\n",
       "      <td>0.893281</td>\n",
       "      <td>1012.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>prediction-request-rag</td>\n",
       "      <td>0.889881</td>\n",
       "      <td>336.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>prediction-online-sme</td>\n",
       "      <td>0.857143</td>\n",
       "      <td>1722.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>prediction-online</td>\n",
       "      <td>0.853553</td>\n",
       "      <td>1154.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>prediction-request-reasoning</td>\n",
       "      <td>0.847451</td>\n",
       "      <td>2727.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>prediction-url-cot-claude</td>\n",
       "      <td>0.846154</td>\n",
       "      <td>130.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>claude-prediction-online</td>\n",
       "      <td>0.735849</td>\n",
       "      <td>53.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>prediction-request-reasoning-claude</td>\n",
       "      <td>0.659664</td>\n",
       "      <td>238.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>claude-prediction-offline</td>\n",
       "      <td>0.591549</td>\n",
       "      <td>142.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   tool  losing_percentage  num_calls\n",
       "3                prediction-offline-sme           1.000000        2.0\n",
       "7         prediction-request-rag-claude           0.913007     1184.0\n",
       "2                    prediction-offline           0.893281     1012.0\n",
       "6                prediction-request-rag           0.889881      336.0\n",
       "5                 prediction-online-sme           0.857143     1722.0\n",
       "4                     prediction-online           0.853553     1154.0\n",
       "8          prediction-request-reasoning           0.847451     2727.0\n",
       "10            prediction-url-cot-claude           0.846154      130.0\n",
       "1              claude-prediction-online           0.735849       53.0\n",
       "9   prediction-request-reasoning-claude           0.659664      238.0\n",
       "0             claude-prediction-offline           0.591549      142.0"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "q_losing_perc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>confidence</th>\n",
       "      <th>0.00</th>\n",
       "      <th>0.10</th>\n",
       "      <th>0.20</th>\n",
       "      <th>0.30</th>\n",
       "      <th>0.40</th>\n",
       "      <th>0.50</th>\n",
       "      <th>0.55</th>\n",
       "      <th>0.60</th>\n",
       "      <th>0.65</th>\n",
       "      <th>0.70</th>\n",
       "      <th>0.75</th>\n",
       "      <th>0.80</th>\n",
       "      <th>0.85</th>\n",
       "      <th>0.90</th>\n",
       "      <th>0.95</th>\n",
       "      <th>0.99</th>\n",
       "      <th>1.00</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tool</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>claude-prediction-offline</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>46.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>claude-prediction-online</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-offline</th>\n",
       "      <td>0.0</td>\n",
       "      <td>267.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>302.0</td>\n",
       "      <td>189.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>231.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-offline-sme</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-online</th>\n",
       "      <td>0.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>670.0</td>\n",
       "      <td>99.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>55.0</td>\n",
       "      <td>25.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>20.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-online-sme</th>\n",
       "      <td>1.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>71.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>679.0</td>\n",
       "      <td>234.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>149.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>109.0</td>\n",
       "      <td>80.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>39.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-request-rag</th>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>25.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>20.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-request-rag-claude</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>175.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>513.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>209.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-request-reasoning</th>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>103.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>97.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>315.0</td>\n",
       "      <td>176.0</td>\n",
       "      <td>441.0</td>\n",
       "      <td>317.0</td>\n",
       "      <td>339.0</td>\n",
       "      <td>159.0</td>\n",
       "      <td>44.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>97.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-request-reasoning-claude</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>38.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prediction-url-cot-claude</th>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "confidence                           0.00   0.10   0.20  0.30   0.40   0.50  \\\n",
       "tool                                                                          \n",
       "claude-prediction-offline             0.0    0.0    5.0  46.0    4.0    0.0   \n",
       "claude-prediction-online              0.0    0.0    2.0  10.0    7.0    3.0   \n",
       "prediction-offline                    0.0  267.0    2.0  13.0  302.0  189.0   \n",
       "prediction-offline-sme                0.0    0.0    0.0   0.0    0.0    0.0   \n",
       "prediction-online                     0.0   22.0    4.0   5.0   43.0   23.0   \n",
       "prediction-online-sme                 1.0   27.0   10.0   0.0   71.0    2.0   \n",
       "prediction-request-rag                0.0    3.0    2.0   0.0    4.0    4.0   \n",
       "prediction-request-rag-claude         0.0    0.0    1.0  32.0    0.0    0.0   \n",
       "prediction-request-reasoning          0.0    3.0  103.0   1.0   58.0   97.0   \n",
       "prediction-request-reasoning-claude   0.0    0.0    0.0   3.0    4.0    0.0   \n",
       "prediction-url-cot-claude             0.0    2.0    1.0   2.0    0.0    0.0   \n",
       "\n",
       "confidence                           0.55   0.60   0.65   0.70   0.75   0.80  \\\n",
       "tool                                                                           \n",
       "claude-prediction-offline             0.0   87.0    0.0    0.0    0.0    0.0   \n",
       "claude-prediction-online              0.0   30.0    0.0    0.0    0.0    0.0   \n",
       "prediction-offline                    0.0  231.0    3.0    0.0    0.0    0.0   \n",
       "prediction-offline-sme                0.0    0.0    0.0    0.0    2.0    0.0   \n",
       "prediction-online                     8.0  670.0   99.0    2.0   76.0   28.0   \n",
       "prediction-online-sme                 0.0  679.0  234.0   39.0  149.0   76.0   \n",
       "prediction-request-rag                0.0   25.0    5.0   48.0   11.0   36.0   \n",
       "prediction-request-rag-claude         0.0  175.0    0.0  513.0    0.0  209.0   \n",
       "prediction-request-reasoning          0.0  315.0  176.0  441.0  317.0  339.0   \n",
       "prediction-request-reasoning-claude   0.0   27.0    0.0   38.0    4.0   76.0   \n",
       "prediction-url-cot-claude             0.0   40.0    0.0   60.0    0.0   22.0   \n",
       "\n",
       "confidence                            0.85  0.90  0.95  0.99  1.00  \n",
       "tool                                                                \n",
       "claude-prediction-offline              0.0   0.0   0.0   0.0   0.0  \n",
       "claude-prediction-online               0.0   1.0   0.0   0.0   0.0  \n",
       "prediction-offline                     1.0   2.0   0.0   0.0   1.0  \n",
       "prediction-offline-sme                 0.0   0.0   0.0   0.0   0.0  \n",
       "prediction-online                     55.0  25.0  11.0   0.0  20.0  \n",
       "prediction-online-sme                109.0  80.0   6.0   0.0  39.0  \n",
       "prediction-request-rag                57.0  16.0  11.0   1.0  20.0  \n",
       "prediction-request-rag-claude          3.0  40.0   3.0   0.0   0.0  \n",
       "prediction-request-reasoning         159.0  44.0  58.0   0.0  97.0  \n",
       "prediction-request-reasoning-claude    0.0   8.0   1.0   0.0   2.0  \n",
       "prediction-url-cot-claude              0.0   3.0   0.0   0.0   0.0  "
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_q = winning_trades_percentage_bottom_50['title'].unique().tolist()\n",
    "q_losing = tools[tools['prompt_request'].isin(all_q)]\n",
    "q_losing.groupby(['tool'])['confidence'].value_counts().unstack().fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_question(text: str) -> str:\n",
    "    \"\"\"Get the question from a text.\"\"\"\n",
    "    # Regex to find text within double quotes\n",
    "    pattern = r'\"([^\"]*)\"'\n",
    "\n",
    "    # Find all occurrences\n",
    "    questions = re.findall(pattern, text)\n",
    "\n",
    "    # Assuming you want the first question if there are multiple\n",
    "    question = questions[0] if questions else None\n",
    "\n",
    "    return question"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "from web3 import Web3\n",
    "from typing import Optional\n",
    "import re\n",
    "import pickle\n",
    "\n",
    "def block_number_to_timestamp(block_number: int, web3: Web3) -> str:\n",
    "    \"\"\"Convert a block number to a timestamp.\"\"\"\n",
    "    block = web3.eth.get_block(block_number)\n",
    "    timestamp = datetime.utcfromtimestamp(block[\"timestamp\"])\n",
    "    return timestamp.strftime(\"%Y-%m-%d %H:%M:%S\")\n",
    "\n",
    "\n",
    "def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list:\n",
    "    \"\"\"Parallelize the timestamp conversion.\"\"\"\n",
    "    block_numbers = df[\"request_block\"].tolist()\n",
    "    with ThreadPoolExecutor(max_workers=10) as executor:\n",
    "        results = list(\n",
    "            tqdm(executor.map(function, block_numbers), total=len(block_numbers))\n",
    "        )\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "def current_answer(text: str, fpmms: pd.DataFrame) -> Optional[str]:\n",
    "    \"\"\"Get the current answer for a question.\"\"\"\n",
    "    row = fpmms[fpmms[\"title\"] == text]\n",
    "    if row.shape[0] == 0:\n",
    "        return None\n",
    "    return row[\"currentAnswer\"].values[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "from functools import partial\n",
    "from concurrent.futures import ThreadPoolExecutor\n",
    "def weekly_analysis():\n",
    "    rpc = \"https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a\"\n",
    "    web3 = Web3(Web3.HTTPProvider(rpc))\n",
    "    # Get currentAnswer from FPMMS\n",
    "    fpmms = pd.read_parquet('../data/fpmms.parquet')\n",
    "    tools = pd.read_parquet('../data/tools.parquet')\n",
    "\n",
    "    # Get the question from the tools\n",
    "    print(\"Getting the question and current answer for the tools\")\n",
    "    tools[\"title\"] = tools[\"prompt_request\"].apply(lambda x: get_question(x))\n",
    "    tools[\"currentAnswer\"] = tools[\"title\"].apply(lambda x: current_answer(x, fpmms))\n",
    "\n",
    "    tools[\"currentAnswer\"] = tools[\"currentAnswer\"].str.replace(\"yes\", \"Yes\")\n",
    "    tools[\"currentAnswer\"] = tools[\"currentAnswer\"].str.replace(\"no\", \"No\")\n",
    "\n",
    "    # Convert block number to timestamp\n",
    "    print(\"Converting block number to timestamp\")\n",
    "    t_map = pickle.load(open(\"../data/t_map.pkl\", \"rb\"))\n",
    "    tools[\"request_time\"] = tools[\"request_block\"].map(t_map)\n",
    "\n",
    "    # Identify tools with missing request_time and fill them\n",
    "    missing_time_indices = tools[tools[\"request_time\"].isna()].index\n",
    "    if not missing_time_indices.empty:\n",
    "        partial_block_number_to_timestamp = partial(\n",
    "            block_number_to_timestamp, web3=web3\n",
    "        )\n",
    "        missing_timestamps = parallelize_timestamp_conversion(\n",
    "            tools.loc[missing_time_indices], partial_block_number_to_timestamp\n",
    "        )\n",
    "\n",
    "        # Update the original DataFrame with the missing timestamps\n",
    "        for i, timestamp in zip(missing_time_indices, missing_timestamps):\n",
    "            tools.at[i, \"request_time\"] = timestamp\n",
    "\n",
    "    tools[\"request_month_year\"] = pd.to_datetime(tools[\"request_time\"]).dt.strftime(\n",
    "        \"%Y-%m\"\n",
    "    )\n",
    "    tools[\"request_month_year_week\"] = (\n",
    "        pd.to_datetime(tools[\"request_time\"]).dt.to_period(\"W\").astype(str)\n",
    "    )\n",
    "\n",
    "    # Save the tools data after the updates on the content\n",
    "    tools.to_parquet('../data/tools.parquet', index=False)\n",
    "\n",
    "    # Update t_map with new timestamps\n",
    "    new_timestamps = (\n",
    "        tools[[\"request_block\", \"request_time\"]]\n",
    "        .dropna()\n",
    "        .set_index(\"request_block\")\n",
    "        .to_dict()[\"request_time\"]\n",
    "    )\n",
    "    t_map.update(new_timestamps)\n",
    "\n",
    "    with open(\"../data/t_map.pkl\", \"wb\") as f:\n",
    "        pickle.dump(t_map, f)\n",
    "\n",
    "    # clean and release all memory\n",
    "    del tools\n",
    "    del fpmms\n",
    "    del t_map\n",
    "    gc.collect()\n",
    "\n",
    "    print(\"Weekly analysis files generated and saved\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Getting the question and current answer for the tools\n",
      "Converting block number to timestamp\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 0/11690 [00:00<?, ?it/s]/var/folders/gp/02mb1d514ng739czlxw1lhh00000gn/T/ipykernel_28372/2484496282.py:9: DeprecationWarning: datetime.datetime.utcfromtimestamp() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.fromtimestamp(timestamp, datetime.UTC).\n",
      "  timestamp = datetime.utcfromtimestamp(block[\"timestamp\"])\n",
      "100%|██████████| 11690/11690 [01:40<00:00, 116.87it/s]\n"
     ]
    },
    {
     "ename": "NameError",
     "evalue": "name 'gc' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[50], line 10\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpathlib\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Path\n\u001b[1;32m      9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfunctools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m partial\n\u001b[0;32m---> 10\u001b[0m \u001b[43mweekly_analysis\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "Cell \u001b[0;32mIn[48], line 63\u001b[0m, in \u001b[0;36mweekly_analysis\u001b[0;34m()\u001b[0m\n\u001b[1;32m     61\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m fpmms\n\u001b[1;32m     62\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m t_map\n\u001b[0;32m---> 63\u001b[0m \u001b[43mgc\u001b[49m\u001b[38;5;241m.\u001b[39mcollect()\n\u001b[1;32m     65\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWeekly analysis files generated and saved\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "\u001b[0;31mNameError\u001b[0m: name 'gc' is not defined"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import pickle\n",
    "from datetime import datetime\n",
    "from concurrent.futures import ThreadPoolExecutor\n",
    "from tqdm import tqdm\n",
    "from web3 import Web3\n",
    "import pandas as pd\n",
    "from pathlib import Path\n",
    "from functools import partial\n",
    "weekly_analysis()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1187"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import gc\n",
    "gc.collect()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "akash",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}