{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# middleschool-cardlist\n",
    "\n",
    "## Prepare the data\n",
    "\n",
    "Download raw data from [MTGJSON](https://mtgjson.com/) (uncomment and run only once)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !cd data\n",
    "# !wget \"https://mtgjson.com/api/v5/AllPrintings.json.bz2\"\n",
    "# !bunzip2 AllPrintings.json.bz2\n",
    "# !cd -\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The Raw data is very large, so let's make JSON files for all relevant sets\n",
    "\n",
    "Note: this cell can take a couple minutes to run\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "setlist = ['4ED', 'ICE', 'CHR', 'HML', 'ALL', 'MIR', 'VIS', '5ED',\n",
    "           'WTH', 'POR', 'TMP', 'STH', 'EXO', 'P02', 'USG', 'ULG',\n",
    "           '6ED', 'UDS', 'PTK', 'S99', 'MMQ', 'NEM', 'PCY', 'S00',\n",
    "           'INV', 'PLS', '7ED', 'APC', 'ODY', 'TOR', 'JUD', 'ONS',\n",
    "           'LGN', 'SCG', 'PDRC', 'PHPR', 'ATH', 'BRB', 'BTD', 'DKM']\n",
    "for set in setlist:\n",
    "    # Write a separate JSON document for each Middle School legal set\n",
    "    command = 'cat data/AllPrintings.json | jq \\'.data.\\\"' + \\\n",
    "        set + '\\\".cards\\' > data/set_' + set + '.json'\n",
    "    !{command}\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Concatenate all set files into `middleschool.json`\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "command = \"jq -s add data/set_* > data/middleschool.json\"\n",
    "!{command}\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a list with each card's oracle ID, English name, and Japanese name\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5800 cards found\n",
      "These are the first and last 5 cards\n",
      "                              oracle_id               name    name_ja\n",
      "0  8adbba6e-03ef-4278-aec5-8a4496b377a8       Abandon Hope         断念\n",
      "0  5a70ccfa-d12d-4e62-a1a4-f05cda2fd442  Abandoned Outpost  見捨てられた前哨地\n",
      "0  c208b959-d0e4-4a9a-8255-2c7cc7596767    Abbey Gargoyles  修道院のガーゴイル\n",
      "0  62e3f285-886c-414e-b4ff-403a7c01c23a       Abbey Matron       None\n",
      "0  d0e1904e-1a37-41f6-8582-b9ea794bb886          Abduction         誘拐\n",
      "                              oracle_id                      name    name_ja\n",
      "0  ae8773a3-05f2-4074-9a53-033b0c127235  Zuo Ci, the Mocking Sage  嘲笑する仙人 左慈\n",
      "0  c6eaa147-3566-43a9-999a-d58b877496f5            Zur's Weirding   ズアーの運命支配\n",
      "0  ee0f883f-d7c9-4acf-a78f-f733b6f268d3           Zuran Enchanter       None\n",
      "0  08cb8a30-9cb4-4517-bee5-8848aa60d1a2                 Zuran Orb       None\n",
      "0  bc7b90b1-3517-4e5d-9bd8-68b4d8a259fd         Zuran Spellcaster       None\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import pandas as pd\n",
    "\n",
    "with open(\"data/middleschool.json\") as json_data:\n",
    "    cards = json.loads(json_data.read())\n",
    "\n",
    "# Create a pandas DataFrame with all cards from all legal sets\n",
    "column_names = [\"oracle_id\", \"name\", \"name_ja\"]\n",
    "middleschool_df = pd.DataFrame(columns=column_names)\n",
    "for card in cards:\n",
    "    oracle_id = card[\"identifiers\"][\"scryfallOracleId\"]\n",
    "    name = card[\"name\"]\n",
    "    lang_ja = [lang for lang in card[\"foreignData\"] if lang[\"language\"] == \"Japanese\"]\n",
    "    # Some cards do not have a Japanese name\n",
    "    if len(lang_ja) > 0:\n",
    "        name_ja = lang_ja[0][\"name\"]\n",
    "    else:\n",
    "        name_ja = None\n",
    "    temporary_df = pd.DataFrame(\n",
    "        {\"oracle_id\": [oracle_id], \"name\": [name], \"name_ja\": [name_ja]}\n",
    "    )\n",
    "    middleschool_df = pd.concat([middleschool_df, temporary_df])\n",
    "\n",
    "# For cards with multiple occurrences, put the rows that have the Japanese name on top\n",
    "middleschool_df = middleschool_df.sort_values(by=[\"name\", \"name_ja\"])\n",
    "# For cards with multiple occurrences, delete all rows except for the top one\n",
    "middleschool_df = middleschool_df.drop_duplicates(subset=[\"oracle_id\"])\n",
    "print(middleschool_df.shape[0], \"cards found\")\n",
    "print(\"These are the first and last 5 cards\")\n",
    "print(middleschool_df.head())\n",
    "print(middleschool_df.tail())\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Remove Japanese card names that are wrong on MTGJSON\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Before:\n",
      "                              oracle_id             name          name_ja\n",
      "0  0fe602b7-9f88-4d3d-af24-7790df867ed5   Aether Barrier    Æther Barrier\n",
      "0  1e33f39b-a61a-4a09-a541-16cc1bd53d02     Aether Burst      Æther Burst\n",
      "0  15e83068-6253-4c65-8679-7295f3dc2075    Aether Charge     Æther Charge\n",
      "0  a3c35742-e306-49b6-b042-db4f685c6f86     Aether Flash      Æther Flash\n",
      "0  6697fe5b-90ac-4321-aa2f-cdc6ec283cb4  Aether Mutation  Aether Mutation\n",
      "0  61105cb5-d7a1-4021-a006-dd1b947dfa68     Aether Sting      Æther Sting\n",
      "0  ff4297d3-3d96-4bd6-a606-1bdc20a6df2b     Aether Storm      Æther Storm\n",
      "0  2fbf95b4-bcf4-4b5e-b5dc-0294f2b48d3e      Aether Tide       Æther Tide\n",
      "0  a61ceda1-5993-479e-945f-15753eeb7049   Tainted Aether    Tainted Æther\n",
      "0  05a7ca83-e820-433f-b9e9-151e817d3708  Tar Pit Warrior  Tar Pit Warrior\n",
      "After:\n",
      "                              oracle_id             name name_ja\n",
      "0  0fe602b7-9f88-4d3d-af24-7790df867ed5   Aether Barrier    None\n",
      "0  1e33f39b-a61a-4a09-a541-16cc1bd53d02     Aether Burst    None\n",
      "0  15e83068-6253-4c65-8679-7295f3dc2075    Aether Charge    None\n",
      "0  a3c35742-e306-49b6-b042-db4f685c6f86     Aether Flash    None\n",
      "0  6697fe5b-90ac-4321-aa2f-cdc6ec283cb4  Aether Mutation    None\n",
      "0  61105cb5-d7a1-4021-a006-dd1b947dfa68     Aether Sting    None\n",
      "0  ff4297d3-3d96-4bd6-a606-1bdc20a6df2b     Aether Storm    None\n",
      "0  2fbf95b4-bcf4-4b5e-b5dc-0294f2b48d3e      Aether Tide    None\n",
      "0  a61ceda1-5993-479e-945f-15753eeb7049   Tainted Aether    None\n",
      "0  05a7ca83-e820-433f-b9e9-151e817d3708  Tar Pit Warrior    None\n"
     ]
    }
   ],
   "source": [
    "wrongnames = [\n",
    "    \"Aether Barrier\",\n",
    "    \"Aether Burst\",\n",
    "    \"Aether Charge\",\n",
    "    \"Aether Flash\",\n",
    "    \"Aether Mutation\",\n",
    "    \"Aether Sting\",\n",
    "    \"Aether Storm\",\n",
    "    \"Aether Tide\",\n",
    "    \"Tainted Aether\",\n",
    "    \"Tar Pit Warrior\",\n",
    "]\n",
    "print(\"Before:\")\n",
    "print(middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames)])\n",
    "middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames), \"name_ja\"] = None\n",
    "print(\"After:\")\n",
    "print(middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames)])\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Find Japanese names for cards that were not released in Japanese in Middle School legal sets\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "......................"
     ]
    }
   ],
   "source": [
    "import time\n",
    "from requests_html import HTMLSession\n",
    "\n",
    "session = HTMLSession()\n",
    "\n",
    "\n",
    "def find_japanese_name(name):\n",
    "    url = \"http://whisper.wisdom-guild.net/card/\" + name + \"/\"\n",
    "    r = session.get(url)\n",
    "    # Find the text on the <title> element in the HTML document\n",
    "    title = r.html.find(\"title\")[0].text\n",
    "    # Find the position of the English card name within the title\n",
    "    idx = title.find(name)\n",
    "    # The Japanese name should be before the English name,\n",
    "    # so if idx is 0, there is no Japanese name\n",
    "    if idx == 0:\n",
    "        return None\n",
    "    # If the exact English card name can't be found, we look for a '/'\n",
    "    if idx == -1:\n",
    "        idx = title.find(\"/\")\n",
    "        # No '/' means no Japanese name\n",
    "        if idx == -1:\n",
    "            return None\n",
    "        # Take only the Japanese name from the title\n",
    "        name_ja = title[0:idx]\n",
    "    else:\n",
    "        # Take only the Japanese name from the title\n",
    "        name_ja = title[0 : idx - 1]\n",
    "    return name_ja\n",
    "\n",
    "\n",
    "english_only_cards = middleschool_df[middleschool_df[\"name_ja\"].isnull()]\n",
    "name_list = english_only_cards[\"name\"].to_list()\n",
    "for idx, name in enumerate(name_list):\n",
    "    middleschool_df.loc[\n",
    "        middleschool_df[\"name\"] == name, \"name_ja\"\n",
    "    ] = find_japanese_name(name)\n",
    "    # print(middleschool_df.loc[middleschool_df['name'] == name])\n",
    "    print(\".\", end=\"\")\n",
    "    if idx % 80 == 79:\n",
    "        print()\n",
    "    time.sleep(1)\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Exclude all cards banned in Middle School\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cards legal by set: 5800\n",
      "Banned cards: 25\n",
      "Cards legal by set and not banned: 5775\n"
     ]
    }
   ],
   "source": [
    "banlist = [\n",
    "    \"Amulet of Quoz\",\n",
    "    \"Balance\",\n",
    "    \"Brainstorm\",\n",
    "    \"Bronze Tablet\",\n",
    "    \"Channel\",\n",
    "    \"Dark Ritual\",\n",
    "    \"Demonic Consultation\",\n",
    "    \"Flash\",\n",
    "    \"Goblin Recruiter\",\n",
    "    \"Imperial Seal\",\n",
    "    \"Jeweled Bird\",\n",
    "    \"Mana Crypt\",\n",
    "    \"Mana Vault\",\n",
    "    \"Memory Jar\",\n",
    "    \"Mind's Desire\",\n",
    "    \"Mind Twist\",\n",
    "    \"Rebirth\",\n",
    "    \"Strip Mine\",\n",
    "    \"Tempest Efreet\",\n",
    "    \"Timmerian Fiends\",\n",
    "    \"Tolarian Academy\",\n",
    "    \"Vampiric Tutor\",\n",
    "    \"Windfall\",\n",
    "    \"Yawgmoth's Bargain\",\n",
    "    \"Yawgmoth's Will\",\n",
    "]\n",
    "print(\"Cards legal by set:\", middleschool_df.shape[0])\n",
    "# Find the rows with the banned cards\n",
    "banned_df = middleschool_df[\n",
    "    pd.DataFrame(middleschool_df.name.tolist()).isin(banlist).any(axis=1).values\n",
    "]\n",
    "print(\"Banned cards:\", banned_df.shape[0])\n",
    "# Append the banned cards to the main Middle School DataFrame,\n",
    "# then remove any rows that appear twice,\n",
    "# effectively leaving only the legal cards\n",
    "middleschool_df = pd.concat([middleschool_df, banned_df]).drop_duplicates(keep=False)\n",
    "print(\"Cards legal by set and not banned:\", middleschool_df.shape[0])\n",
    "middleschool_df = middleschool_df.reset_index(drop=True)\n",
    "middleschool_df = middleschool_df[[\"oracle_id\", \"name\", \"name_ja\"]]\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save the list to a CSV file and a JSON file\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "middleschool_df.to_csv(\"output/middleschool.csv\")\n",
    "middleschool_df.to_json(\"output/middleschool.json\")\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Feel free to delete everything in the `data` directory after you are done\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}