{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# middleschool-cardlist\n", "\n", "## Prepare the data\n", "\n", "Download raw data from [MTGJSON](https://mtgjson.com/) (uncomment and run only once)\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# !cd data\n", "# !wget \"https://mtgjson.com/api/v5/AllPrintings.json.bz2\"\n", "# !bunzip2 AllPrintings.json.bz2\n", "# !cd -\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "The Raw data is very large, so let's make JSON files for all relevant sets\n", "\n", "Note: this cell can take a couple minutes to run\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "setlist = ['4ED', 'ICE', 'CHR', 'HML', 'ALL', 'MIR', 'VIS', '5ED',\n", " 'WTH', 'POR', 'TMP', 'STH', 'EXO', 'P02', 'USG', 'ULG',\n", " '6ED', 'UDS', 'PTK', 'S99', 'MMQ', 'NEM', 'PCY', 'S00',\n", " 'INV', 'PLS', '7ED', 'APC', 'ODY', 'TOR', 'JUD', 'ONS',\n", " 'LGN', 'SCG', 'PDRC', 'PHPR', 'ATH', 'BRB', 'BTD', 'DKM']\n", "for set in setlist:\n", " # Write a separate JSON document for each Middle School legal set\n", " command = 'cat data/AllPrintings.json | jq \\'.data.\\\"' + \\\n", " set + '\\\".cards\\' > data/set_' + set + '.json'\n", " !{command}\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Concatenate all set files into `middleschool.json`\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "command = \"jq -s add data/set_* > data/middleschool.json\"\n", "!{command}\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Create a list with each card's oracle ID, English name, and Japanese name\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5800 cards found\n", "These are the first and last 5 cards\n", " oracle_id name name_ja\n", "0 8adbba6e-03ef-4278-aec5-8a4496b377a8 Abandon Hope 断念\n", "0 5a70ccfa-d12d-4e62-a1a4-f05cda2fd442 Abandoned Outpost 見捨てられた前哨地\n", "0 c208b959-d0e4-4a9a-8255-2c7cc7596767 Abbey Gargoyles 修道院のガーゴイル\n", "0 62e3f285-886c-414e-b4ff-403a7c01c23a Abbey Matron None\n", "0 d0e1904e-1a37-41f6-8582-b9ea794bb886 Abduction 誘拐\n", " oracle_id name name_ja\n", "0 ae8773a3-05f2-4074-9a53-033b0c127235 Zuo Ci, the Mocking Sage 嘲笑する仙人 左慈\n", "0 c6eaa147-3566-43a9-999a-d58b877496f5 Zur's Weirding ズアーの運命支配\n", "0 ee0f883f-d7c9-4acf-a78f-f733b6f268d3 Zuran Enchanter None\n", "0 08cb8a30-9cb4-4517-bee5-8848aa60d1a2 Zuran Orb None\n", "0 bc7b90b1-3517-4e5d-9bd8-68b4d8a259fd Zuran Spellcaster None\n" ] } ], "source": [ "import json\n", "import pandas as pd\n", "\n", "with open(\"data/middleschool.json\") as json_data:\n", " cards = json.loads(json_data.read())\n", "\n", "# Create a pandas DataFrame with all cards from all legal sets\n", "column_names = [\"oracle_id\", \"name\", \"name_ja\"]\n", "middleschool_df = pd.DataFrame(columns=column_names)\n", "for card in cards:\n", " oracle_id = card[\"identifiers\"][\"scryfallOracleId\"]\n", " name = card[\"name\"]\n", " lang_ja = [lang for lang in card[\"foreignData\"] if lang[\"language\"] == \"Japanese\"]\n", " # Some cards do not have a Japanese name\n", " if len(lang_ja) > 0:\n", " name_ja = lang_ja[0][\"name\"]\n", " else:\n", " name_ja = None\n", " temporary_df = pd.DataFrame(\n", " {\"oracle_id\": [oracle_id], \"name\": [name], \"name_ja\": [name_ja]}\n", " )\n", " middleschool_df = pd.concat([middleschool_df, temporary_df])\n", "\n", "# For cards with multiple occurrences, put the rows that have the Japanese name on top\n", "middleschool_df = middleschool_df.sort_values(by=[\"name\", \"name_ja\"])\n", "# For cards with multiple occurrences, delete all rows except for the top one\n", "middleschool_df = middleschool_df.drop_duplicates(subset=[\"oracle_id\"])\n", "print(middleschool_df.shape[0], \"cards found\")\n", "print(\"These are the first and last 5 cards\")\n", "print(middleschool_df.head())\n", "print(middleschool_df.tail())\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Remove Japanese card names that are wrong on MTGJSON\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Before:\n", " oracle_id name name_ja\n", "0 0fe602b7-9f88-4d3d-af24-7790df867ed5 Aether Barrier Æther Barrier\n", "0 1e33f39b-a61a-4a09-a541-16cc1bd53d02 Aether Burst Æther Burst\n", "0 15e83068-6253-4c65-8679-7295f3dc2075 Aether Charge Æther Charge\n", "0 a3c35742-e306-49b6-b042-db4f685c6f86 Aether Flash Æther Flash\n", "0 6697fe5b-90ac-4321-aa2f-cdc6ec283cb4 Aether Mutation Aether Mutation\n", "0 61105cb5-d7a1-4021-a006-dd1b947dfa68 Aether Sting Æther Sting\n", "0 ff4297d3-3d96-4bd6-a606-1bdc20a6df2b Aether Storm Æther Storm\n", "0 2fbf95b4-bcf4-4b5e-b5dc-0294f2b48d3e Aether Tide Æther Tide\n", "0 a61ceda1-5993-479e-945f-15753eeb7049 Tainted Aether Tainted Æther\n", "0 05a7ca83-e820-433f-b9e9-151e817d3708 Tar Pit Warrior Tar Pit Warrior\n", "After:\n", " oracle_id name name_ja\n", "0 0fe602b7-9f88-4d3d-af24-7790df867ed5 Aether Barrier None\n", "0 1e33f39b-a61a-4a09-a541-16cc1bd53d02 Aether Burst None\n", "0 15e83068-6253-4c65-8679-7295f3dc2075 Aether Charge None\n", "0 a3c35742-e306-49b6-b042-db4f685c6f86 Aether Flash None\n", "0 6697fe5b-90ac-4321-aa2f-cdc6ec283cb4 Aether Mutation None\n", "0 61105cb5-d7a1-4021-a006-dd1b947dfa68 Aether Sting None\n", "0 ff4297d3-3d96-4bd6-a606-1bdc20a6df2b Aether Storm None\n", "0 2fbf95b4-bcf4-4b5e-b5dc-0294f2b48d3e Aether Tide None\n", "0 a61ceda1-5993-479e-945f-15753eeb7049 Tainted Aether None\n", "0 05a7ca83-e820-433f-b9e9-151e817d3708 Tar Pit Warrior None\n" ] } ], "source": [ "wrongnames = [\n", " \"Aether Barrier\",\n", " \"Aether Burst\",\n", " \"Aether Charge\",\n", " \"Aether Flash\",\n", " \"Aether Mutation\",\n", " \"Aether Sting\",\n", " \"Aether Storm\",\n", " \"Aether Tide\",\n", " \"Tainted Aether\",\n", " \"Tar Pit Warrior\",\n", "]\n", "print(\"Before:\")\n", "print(middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames)])\n", "middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames), \"name_ja\"] = None\n", "print(\"After:\")\n", "print(middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames)])\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Find Japanese names for cards that were not released in Japanese in Middle School legal sets\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "................................................................................\n", "................................................................................\n", "................................................................................\n", "................................................................................\n", "................................................................................\n", "................................................................................\n", "......................" ] } ], "source": [ "import time\n", "from requests_html import HTMLSession\n", "\n", "session = HTMLSession()\n", "\n", "\n", "def find_japanese_name(name):\n", " url = \"http://whisper.wisdom-guild.net/card/\" + name + \"/\"\n", " r = session.get(url)\n", " # Find the text on the element in the HTML document\n", " title = r.html.find(\"title\")[0].text\n", " # Find the position of the English card name within the title\n", " idx = title.find(name)\n", " # The Japanese name should be before the English name,\n", " # so if idx is 0, there is no Japanese name\n", " if idx == 0:\n", " return None\n", " # If the exact English card name can't be found, we look for a '/'\n", " if idx == -1:\n", " idx = title.find(\"/\")\n", " # No '/' means no Japanese name\n", " if idx == -1:\n", " return None\n", " # Take only the Japanese name from the title\n", " name_ja = title[0:idx]\n", " else:\n", " # Take only the Japanese name from the title\n", " name_ja = title[0 : idx - 1]\n", " return name_ja\n", "\n", "\n", "english_only_cards = middleschool_df[middleschool_df[\"name_ja\"].isnull()]\n", "name_list = english_only_cards[\"name\"].to_list()\n", "for idx, name in enumerate(name_list):\n", " middleschool_df.loc[\n", " middleschool_df[\"name\"] == name, \"name_ja\"\n", " ] = find_japanese_name(name)\n", " # print(middleschool_df.loc[middleschool_df['name'] == name])\n", " print(\".\", end=\"\")\n", " if idx % 80 == 79:\n", " print()\n", " time.sleep(1)\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Exclude all cards banned in Middle School\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cards legal by set: 5800\n", "Banned cards: 25\n", "Cards legal by set and not banned: 5775\n" ] } ], "source": [ "banlist = [\n", " \"Amulet of Quoz\",\n", " \"Balance\",\n", " \"Brainstorm\",\n", " \"Bronze Tablet\",\n", " \"Channel\",\n", " \"Dark Ritual\",\n", " \"Demonic Consultation\",\n", " \"Flash\",\n", " \"Goblin Recruiter\",\n", " \"Imperial Seal\",\n", " \"Jeweled Bird\",\n", " \"Mana Crypt\",\n", " \"Mana Vault\",\n", " \"Memory Jar\",\n", " \"Mind's Desire\",\n", " \"Mind Twist\",\n", " \"Rebirth\",\n", " \"Strip Mine\",\n", " \"Tempest Efreet\",\n", " \"Timmerian Fiends\",\n", " \"Tolarian Academy\",\n", " \"Vampiric Tutor\",\n", " \"Windfall\",\n", " \"Yawgmoth's Bargain\",\n", " \"Yawgmoth's Will\",\n", "]\n", "print(\"Cards legal by set:\", middleschool_df.shape[0])\n", "# Find the rows with the banned cards\n", "banned_df = middleschool_df[\n", " pd.DataFrame(middleschool_df.name.tolist()).isin(banlist).any(axis=1).values\n", "]\n", "print(\"Banned cards:\", banned_df.shape[0])\n", "# Append the banned cards to the main Middle School DataFrame,\n", "# then remove any rows that appear twice,\n", "# effectively leaving only the legal cards\n", "middleschool_df = pd.concat([middleschool_df, banned_df]).drop_duplicates(keep=False)\n", "print(\"Cards legal by set and not banned:\", middleschool_df.shape[0])\n", "middleschool_df = middleschool_df.reset_index(drop=True)\n", "middleschool_df = middleschool_df[[\"oracle_id\", \"name\", \"name_ja\"]]\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Save the list to a CSV file and a JSON file\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "middleschool_df.to_csv(\"output/middleschool.csv\")\n", "middleschool_df.to_json(\"output/middleschool.json\")\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Feel free to delete everything in the `data` directory after you are done\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e" } } }, "nbformat": 4, "nbformat_minor": 2 }