{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# middleschool-cardlist\n", "\n", "Note: this notebook is only for introductory purposes, and is not the way our list is compiled anymore. See the README for more information.\n", "\n", "## Prepare the data\n", "\n", "Download raw data from [MTGJSON](https://mtgjson.com/) (uncomment and run only once)\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# !cd data\n", "# !wget \"https://mtgjson.com/api/v5/AllPrintings.json.bz2\"\n", "# !bunzip2 AllPrintings.json.bz2\n", "# !cd -" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "The Raw data is very large, so let's make JSON files for all relevant sets\n", "\n", "Note: this cell can take a couple minutes to run\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "setlist = ['4ED', 'ICE', 'CHR', 'HML', 'ALL', 'MIR', 'VIS', '5ED',\n", " 'WTH', 'POR', 'TMP', 'STH', 'EXO', 'P02', 'USG', 'ULG',\n", " '6ED', 'UDS', 'PTK', 'S99', 'MMQ', 'NEM', 'PCY', 'S00',\n", " 'INV', 'PLS', '7ED', 'APC', 'ODY', 'TOR', 'JUD', 'ONS',\n", " 'LGN', 'SCG', 'PDRC', 'PHPR', 'ATH', 'BRB', 'BTD', 'DKM']\n", "for set in setlist:\n", " # Write a separate JSON document for each Middle School legal set\n", " command = 'cat data/AllPrintings.json | jq \\'.data.\\\"' + \\\n", " set + '\\\".cards\\' > data/set_' + set + '.json'\n", " !{command}\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Concatenate all set files into `middleschool.json`\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "command = \"jq -s add data/set_* > data/middleschool.json\"\n", "!{command}\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Create a list with each card's oracle ID, English name, and Japanese name\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5800 cards found\n", "These are the first and last 5 cards\n", " oracle_id name name_ja\n", "0 8adbba6e-03ef-4278-aec5-8a4496b377a8 Abandon Hope 断念\n", "0 5a70ccfa-d12d-4e62-a1a4-f05cda2fd442 Abandoned Outpost 見捨てられた前哨地\n", "0 c208b959-d0e4-4a9a-8255-2c7cc7596767 Abbey Gargoyles 修道院のガーゴイル\n", "0 62e3f285-886c-414e-b4ff-403a7c01c23a Abbey Matron None\n", "0 d0e1904e-1a37-41f6-8582-b9ea794bb886 Abduction 誘拐\n", " oracle_id name name_ja\n", "0 ae8773a3-05f2-4074-9a53-033b0c127235 Zuo Ci, the Mocking Sage 嘲笑する仙人 左慈\n", "0 c6eaa147-3566-43a9-999a-d58b877496f5 Zur's Weirding ズアーの運命支配\n", "0 ee0f883f-d7c9-4acf-a78f-f733b6f268d3 Zuran Enchanter None\n", "0 08cb8a30-9cb4-4517-bee5-8848aa60d1a2 Zuran Orb None\n", "0 bc7b90b1-3517-4e5d-9bd8-68b4d8a259fd Zuran Spellcaster None\n" ] } ], "source": [ "import json\n", "import pandas as pd\n", "\n", "with open(\"data/middleschool.json\") as json_data:\n", " cards = json.loads(json_data.read())\n", "\n", "# Create a pandas DataFrame with all cards from all legal sets\n", "column_names = [\"oracle_id\", \"name\", \"name_ja\"]\n", "middleschool_df = pd.DataFrame(columns=column_names)\n", "for card in cards:\n", " oracle_id = card[\"identifiers\"][\"scryfallOracleId\"]\n", " name = card[\"name\"]\n", " lang_ja = [lang for lang in card[\"foreignData\"] if lang[\"language\"] == \"Japanese\"]\n", " # Some cards do not have a Japanese name\n", " if len(lang_ja) > 0:\n", " name_ja = lang_ja[0][\"name\"]\n", " else:\n", " name_ja = None\n", " temporary_df = pd.DataFrame(\n", " {\"oracle_id\": [oracle_id], \"name\": [name], \"name_ja\": [name_ja]}\n", " )\n", " middleschool_df = pd.concat([middleschool_df, temporary_df])\n", "\n", "# For cards with multiple occurrences, put the rows that have the Japanese name on top\n", "middleschool_df = middleschool_df.sort_values(by=[\"name\", \"name_ja\"])\n", "# For cards with multiple occurrences, delete all rows except for the top one\n", "middleschool_df = middleschool_df.drop_duplicates(subset=[\"oracle_id\"])\n", "print(middleschool_df.shape[0], \"cards found\")\n", "print(\"These are the first and last 5 cards\")\n", "print(middleschool_df.head())\n", "print(middleschool_df.tail())" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Remove Japanese card names that are wrong on MTGJSON\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Before:\n", " oracle_id name name_ja\n", "0 0fe602b7-9f88-4d3d-af24-7790df867ed5 Aether Barrier Æther Barrier\n", "0 1e33f39b-a61a-4a09-a541-16cc1bd53d02 Aether Burst Æther Burst\n", "0 15e83068-6253-4c65-8679-7295f3dc2075 Aether Charge Æther Charge\n", "0 a3c35742-e306-49b6-b042-db4f685c6f86 Aether Flash Æther Flash\n", "0 6697fe5b-90ac-4321-aa2f-cdc6ec283cb4 Aether Mutation Aether Mutation\n", "0 61105cb5-d7a1-4021-a006-dd1b947dfa68 Aether Sting Æther Sting\n", "0 ff4297d3-3d96-4bd6-a606-1bdc20a6df2b Aether Storm Æther Storm\n", "0 2fbf95b4-bcf4-4b5e-b5dc-0294f2b48d3e Aether Tide Æther Tide\n", "0 a61ceda1-5993-479e-945f-15753eeb7049 Tainted Aether Tainted Æther\n", "0 05a7ca83-e820-433f-b9e9-151e817d3708 Tar Pit Warrior Tar Pit Warrior\n", "After:\n", " oracle_id name name_ja\n", "0 0fe602b7-9f88-4d3d-af24-7790df867ed5 Aether Barrier None\n", "0 1e33f39b-a61a-4a09-a541-16cc1bd53d02 Aether Burst None\n", "0 15e83068-6253-4c65-8679-7295f3dc2075 Aether Charge None\n", "0 a3c35742-e306-49b6-b042-db4f685c6f86 Aether Flash None\n", "0 6697fe5b-90ac-4321-aa2f-cdc6ec283cb4 Aether Mutation None\n", "0 61105cb5-d7a1-4021-a006-dd1b947dfa68 Aether Sting None\n", "0 ff4297d3-3d96-4bd6-a606-1bdc20a6df2b Aether Storm None\n", "0 2fbf95b4-bcf4-4b5e-b5dc-0294f2b48d3e Aether Tide None\n", "0 a61ceda1-5993-479e-945f-15753eeb7049 Tainted Aether None\n", "0 05a7ca83-e820-433f-b9e9-151e817d3708 Tar Pit Warrior None\n" ] } ], "source": [ "wrongnames = [\n", " \"Aether Barrier\",\n", " \"Aether Burst\",\n", " \"Aether Charge\",\n", " \"Aether Flash\",\n", " \"Aether Mutation\",\n", " \"Aether Sting\",\n", " \"Aether Storm\",\n", " \"Aether Tide\",\n", " \"Tainted Aether\",\n", " \"Tar Pit Warrior\",\n", "]\n", "print(\"Before:\")\n", "print(middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames)])\n", "middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames), \"name_ja\"] = None\n", "print(\"After:\")\n", "print(middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames)])" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Find Japanese names for cards that were not released in Japanese in Middle School legal sets\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "................................................................................\n", "................................................................................\n", "................................................................................\n", "................................................................................\n", "................................................................................\n", "................................................................................\n", "......................" ] } ], "source": [ "import time\n", "from requests_html import HTMLSession\n", "\n", "session = HTMLSession()\n", "\n", "\n", "def find_japanese_name(name):\n", " url = \"http://whisper.wisdom-guild.net/card/\" + name + \"/\"\n", " r = session.get(url)\n", " # Find the text on the