{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# middleschool-cardlist\n", "\n", "## Prepare the data\n", "\n", "Download raw data from [MTGJSON](https://mtgjson.com/) (uncomment and run only once)\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# !cd data\n", "# !wget \"https://mtgjson.com/api/v5/AllPrintings.json.bz2\"\n", "# !bunzip2 AllPrintings.json.bz2\n", "# !cd -\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The Raw data is very large, so let's make JSON files for all relevant sets\n", "\n", "Note: this cell can take a couple minutes to run\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "setlist = ['4ED', 'ICE', 'CHR', 'HML', 'ALL', 'MIR', 'VIS', '5ED',\n", " 'WTH', 'POR', 'TMP', 'STH', 'EXO', 'P02', 'USG', 'ULG',\n", " '6ED', 'UDS', 'PTK', 'S99', 'MMQ', 'NEM', 'PCY', 'S00',\n", " 'INV', 'PLS', '7ED', 'APC', 'ODY', 'TOR', 'JUD', 'ONS',\n", " 'LGN', 'SCG', 'PDRC', 'PHPR', 'ATH', 'BRB', 'BTD', 'DKM']\n", "for set in setlist:\n", " # Write a separate JSON document for each Middle School legal set\n", " command = 'cat data/AllPrintings.json | jq \\'.data.\\\"' + \\\n", " set + '\\\".cards\\' > data/set_' + set + '.json'\n", " !{command}\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Concatenate all set files into `middleschool.json`\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "command = \"jq -s add data/set_* > data/middleschool.json\"\n", "!{command}\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a list with each card's oracle ID, English name, and Japanese name\n" ] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5800 cards found\n", "These are the first and last 5 cards\n", " oracle_id name name_ja\n", "0 8adbba6e-03ef-4278-aec5-8a4496b377a8 Abandon Hope 断念\n", "0 5a70ccfa-d12d-4e62-a1a4-f05cda2fd442 Abandoned Outpost 見捨てられた前哨地\n", "0 c208b959-d0e4-4a9a-8255-2c7cc7596767 Abbey Gargoyles 修道院のガーゴイル\n", "0 62e3f285-886c-414e-b4ff-403a7c01c23a Abbey Matron None\n", "0 d0e1904e-1a37-41f6-8582-b9ea794bb886 Abduction 誘拐\n", " oracle_id name name_ja\n", "0 ae8773a3-05f2-4074-9a53-033b0c127235 Zuo Ci, the Mocking Sage 嘲笑する仙人 左慈\n", "0 c6eaa147-3566-43a9-999a-d58b877496f5 Zur's Weirding ズアーの運命支配\n", "0 ee0f883f-d7c9-4acf-a78f-f733b6f268d3 Zuran Enchanter None\n", "0 08cb8a30-9cb4-4517-bee5-8848aa60d1a2 Zuran Orb None\n", "0 bc7b90b1-3517-4e5d-9bd8-68b4d8a259fd Zuran Spellcaster None\n" ] } ], "source": [ "import json\n", "import pandas as pd\n", "\n", "with open(\"data/middleschool.json\") as json_data:\n", " cards = json.loads(json_data.read())\n", "\n", "# Create a pandas DataFrame with all cards from all legal sets\n", "column_names = ['oracle_id', 'name', 'name_ja']\n", "middleschool_df = pd.DataFrame(columns=column_names)\n", "for card in cards:\n", " oracle_id = card['identifiers']['scryfallOracleId']\n", " name = card['name']\n", " lang_ja = [lang for lang in card['foreignData']\n", " if lang['language'] == 'Japanese']\n", " # Some cards do not have a Japanese name\n", " if (len(lang_ja) > 0):\n", " name_ja = lang_ja[0]['name']\n", " else:\n", " name_ja = None\n", " temporary_df = pd.DataFrame({\n", " 'oracle_id': [oracle_id],\n", " 'name': [name],\n", " 'name_ja': [name_ja]\n", " })\n", " middleschool_df = pd.concat([middleschool_df, temporary_df])\n", "\n", "# For cards with multiple occurrences, put the rows that have the Japanese name on top\n", "middleschool_df = middleschool_df.sort_values(by=['name', 'name_ja'])\n", "# For cards with multiple occurrences, delete all rows except for the top one\n", "middleschool_df = middleschool_df.drop_duplicates(subset=['oracle_id'])\n", "print(middleschool_df.shape[0], 'cards found')\n", "print('These are the first and last 5 cards')\n", "print(middleschool_df.head())\n", "print(middleschool_df.tail())\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Find Japanese names for cards that were not released in Japanese in Middle School legal sets" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "................................................................................\n", "................................................................................\n", "................................................................................\n", "................................................................................\n", "................................................................................\n", "................................................................................\n", "..........." ] } ], "source": [ "import time\n", "from requests_html import HTMLSession\n", "session = HTMLSession()\n", "\n", "\n", "def find_japanese_name(name):\n", " url = 'http://whisper.wisdom-guild.net/card/' + name + '/'\n", " r = session.get(url)\n", " # Find the text on the element in the HTML document\n", " title = r.html.find('title')[0].text\n", " # Find the position of the English card name within the title\n", " idx = title.find(name)\n", " # The Japanese name should be before the English name,\n", " # so if idx is 0, there is no Japanese name\n", " if idx == 0:\n", " return None\n", " # If the exact English card name can't be found, we look for a '/'\n", " if idx == -1:\n", " idx = title.find('/')\n", " # No '/' means no Japanese name\n", " if idx == -1:\n", " return None\n", " # Take only the Japanese name from the title\n", " name_ja = title[0:idx]\n", " else:\n", " # Take only the Japanese name from the title\n", " name_ja = title[0:idx - 1]\n", " return name_ja\n", "\n", "\n", "english_only_cards = middleschool_df[middleschool_df['name_ja'].isnull()]\n", "name_list = english_only_cards['name'].to_list()\n", "for idx, name in enumerate(name_list):\n", " middleschool_df.loc[middleschool_df['name'] ==\n", " name, 'name_ja'] = find_japanese_name(name)\n", " # print(middleschool_df.loc[middleschool_df['name'] == name])\n", " print('.', end='')\n", " if idx % 80 == 79:\n", " print()\n", " time.sleep(1)\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Exclude all cards banned in Middle School" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cards legal by set: 5800\n", "Banned cards: 26\n", "Cards legal by set and not banned: 5774\n" ] } ], "source": [ "banlist = [\"Amulet of Quoz\",\n", " \"Balance\",\n", " \"Brainstorm\",\n", " \"Bronze Tablet\",\n", " \"Channel\",\n", " \"Dark Ritual\",\n", " \"Demonic Consultation\",\n", " \"Flash\",\n", " \"Goblin Recruiter\",\n", " \"Imperial Seal\",\n", " \"Jeweled Bird\",\n", " \"Lion's Eye Diamond\",\n", " \"Mana Crypt\",\n", " \"Mana Vault\",\n", " \"Memory Jar\",\n", " \"Mind's Desire\",\n", " \"Mind Twist\",\n", " \"Rebirth\",\n", " \"Strip Mine\",\n", " \"Tempest Efreet\",\n", " \"Timmerian Fiends\",\n", " \"Tolarian Academy\",\n", " \"Vampiric Tutor\",\n", " \"Windfall\",\n", " \"Yawgmoth's Bargain\",\n", " \"Yawgmoth's Will\"]\n", "print('Cards legal by set:', middleschool_df.shape[0])\n", "# Find the rows with the banned cards\n", "banned_df = middleschool_df[pd.DataFrame(\n", " middleschool_df.name.tolist()).isin(banlist).any(axis=1).values]\n", "print('Banned cards:', banned_df.shape[0])\n", "# Append the banned cards to the main Middle School DataFrame,\n", "# then remove any rows that appear twice,\n", "# effectively leaving only the legal cards\n", "middleschool_df = pd.concat(\n", " [middleschool_df, banned_df]).drop_duplicates(keep=False)\n", "print('Cards legal by set and not banned:', middleschool_df.shape[0])\n", "middleschool_df = middleschool_df.reset_index(drop=True)\n", "middleschool_df = middleschool_df[['oracle_id', 'name', 'name_ja']]\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Save the list to a CSV file and a JSON file\n" ] }, { "cell_type": "code", "execution_count": 139, "metadata": {}, "outputs": [], "source": [ "middleschool_df.to_csv('output/middleschool.csv')\n", "middleschool_df.to_json('output/middleschool.json')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Feel free to delete everything in the `data` directory after you are done" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e" } } }, "nbformat": 4, "nbformat_minor": 2 }