{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# middleschool-cardlist\n", "\n", "## Prepare the data\n", "\n", "Download raw data from [MTGJSON](https://mtgjson.com/) (uncomment and run only once)\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# !cd data\n", "# !wget \"https://mtgjson.com/api/v5/AllPrintings.json.bz2\"\n", "# !bunzip2 AllPrintings.json.bz2\n", "# !cd -\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "The Raw data is very large, so let's make JSON files for all relevant sets\n", "\n", "Note: this cell can take a couple minutes to run\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "setlist = ['4ED', 'ICE', 'CHR', 'HML', 'ALL', 'MIR', 'VIS', '5ED',\n", " 'WTH', 'POR', 'TMP', 'STH', 'EXO', 'P02', 'USG', 'ULG',\n", " '6ED', 'UDS', 'PTK', 'S99', 'MMQ', 'NEM', 'PCY', 'S00',\n", " 'INV', 'PLS', '7ED', 'APC', 'ODY', 'TOR', 'JUD', 'ONS',\n", " 'LGN', 'SCG', 'PDRC', 'PHPR', 'ATH', 'BRB', 'BTD', 'DKM']\n", "for set in setlist:\n", " # Write a separate JSON document for each Middle School legal set\n", " command = 'cat data/AllPrintings.json | jq \\'.data.\\\"' + \\\n", " set + '\\\".cards\\' > data/set_' + set + '.json'\n", " !{command}\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Concatenate all set files into `middleschool.json`\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "command = \"jq -s add data/set_* > data/middleschool.json\"\n", "!{command}\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Create a list with each card's oracle ID, English name, and Japanese name\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5800 cards found\n", "These are the first and last 5 cards\n", " oracle_id name name_ja\n", "0 8adbba6e-03ef-4278-aec5-8a4496b377a8 Abandon Hope 断念\n", "0 5a70ccfa-d12d-4e62-a1a4-f05cda2fd442 Abandoned Outpost 見捨てられた前哨地\n", "0 c208b959-d0e4-4a9a-8255-2c7cc7596767 Abbey Gargoyles 修道院のガーゴイル\n", "0 62e3f285-886c-414e-b4ff-403a7c01c23a Abbey Matron None\n", "0 d0e1904e-1a37-41f6-8582-b9ea794bb886 Abduction 誘拐\n", " oracle_id name name_ja\n", "0 ae8773a3-05f2-4074-9a53-033b0c127235 Zuo Ci, the Mocking Sage 嘲笑する仙人 左慈\n", "0 c6eaa147-3566-43a9-999a-d58b877496f5 Zur's Weirding ズアーの運命支配\n", "0 ee0f883f-d7c9-4acf-a78f-f733b6f268d3 Zuran Enchanter None\n", "0 08cb8a30-9cb4-4517-bee5-8848aa60d1a2 Zuran Orb None\n", "0 bc7b90b1-3517-4e5d-9bd8-68b4d8a259fd Zuran Spellcaster None\n" ] } ], "source": [ "import json\n", "import pandas as pd\n", "\n", "with open(\"data/middleschool.json\") as json_data:\n", " cards = json.loads(json_data.read())\n", "\n", "# Create a pandas DataFrame with all cards from all legal sets\n", "column_names = ['oracle_id', 'name', 'name_ja']\n", "middleschool_df = pd.DataFrame(columns=column_names)\n", "for card in cards:\n", " oracle_id = card['identifiers']['scryfallOracleId']\n", " name = card['name']\n", " lang_ja = [lang for lang in card['foreignData']\n", " if lang['language'] == 'Japanese']\n", " # Some cards do not have a Japanese name\n", " if (len(lang_ja) > 0):\n", " name_ja = lang_ja[0]['name']\n", " else:\n", " name_ja = None\n", " temporary_df = pd.DataFrame({\n", " 'oracle_id': [oracle_id],\n", " 'name': [name],\n", " 'name_ja': [name_ja]\n", " })\n", " middleschool_df = pd.concat([middleschool_df, temporary_df])\n", "\n", "# For cards with multiple occurrences, put the rows that have the Japanese name on top\n", "middleschool_df = middleschool_df.sort_values(by=['name', 'name_ja'])\n", "# For cards with multiple occurrences, delete all rows except for the top one\n", "middleschool_df = middleschool_df.drop_duplicates(subset=['oracle_id'])\n", "print(middleschool_df.shape[0], 'cards found')\n", "print('These are the first and last 5 cards')\n", "print(middleschool_df.head())\n", "print(middleschool_df.tail())\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Remove Japanese card names that are wrong on MTGJSON" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Before:\n", " oracle_id name name_ja\n", "0 0fe602b7-9f88-4d3d-af24-7790df867ed5 Aether Barrier Æther Barrier\n", "0 1e33f39b-a61a-4a09-a541-16cc1bd53d02 Aether Burst Æther Burst\n", "0 15e83068-6253-4c65-8679-7295f3dc2075 Aether Charge Æther Charge\n", "0 a3c35742-e306-49b6-b042-db4f685c6f86 Aether Flash Æther Flash\n", "0 6697fe5b-90ac-4321-aa2f-cdc6ec283cb4 Aether Mutation Aether Mutation\n", "0 61105cb5-d7a1-4021-a006-dd1b947dfa68 Aether Sting Æther Sting\n", "0 ff4297d3-3d96-4bd6-a606-1bdc20a6df2b Aether Storm Æther Storm\n", "0 2fbf95b4-bcf4-4b5e-b5dc-0294f2b48d3e Aether Tide Æther Tide\n", "0 a61ceda1-5993-479e-945f-15753eeb7049 Tainted Aether Tainted Æther\n", "0 05a7ca83-e820-433f-b9e9-151e817d3708 Tar Pit Warrior Tar Pit Warrior\n", "After:\n", " oracle_id name name_ja\n", "0 0fe602b7-9f88-4d3d-af24-7790df867ed5 Aether Barrier None\n", "0 1e33f39b-a61a-4a09-a541-16cc1bd53d02 Aether Burst None\n", "0 15e83068-6253-4c65-8679-7295f3dc2075 Aether Charge None\n", "0 a3c35742-e306-49b6-b042-db4f685c6f86 Aether Flash None\n", "0 6697fe5b-90ac-4321-aa2f-cdc6ec283cb4 Aether Mutation None\n", "0 61105cb5-d7a1-4021-a006-dd1b947dfa68 Aether Sting None\n", "0 ff4297d3-3d96-4bd6-a606-1bdc20a6df2b Aether Storm None\n", "0 2fbf95b4-bcf4-4b5e-b5dc-0294f2b48d3e Aether Tide None\n", "0 a61ceda1-5993-479e-945f-15753eeb7049 Tainted Aether None\n", "0 05a7ca83-e820-433f-b9e9-151e817d3708 Tar Pit Warrior None\n" ] } ], "source": [ "wrongnames = ['Aether Barrier', 'Aether Burst', 'Aether Charge', 'Aether Flash', 'Aether Mutation',\n", " 'Aether Sting', 'Aether Storm', 'Aether Tide', 'Tainted Aether', 'Tar Pit Warrior']\n", "print('Before:')\n", "print(middleschool_df.loc[middleschool_df['name'].isin(wrongnames)])\n", "middleschool_df.loc[middleschool_df['name'].isin(wrongnames), 'name_ja'] = None\n", "print('After:')\n", "print(middleschool_df.loc[middleschool_df['name'].isin(wrongnames)])\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Find Japanese names for cards that were not released in Japanese in Middle School legal sets" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "................................................................................\n", "................................................................................\n", "................................................................................\n", "................................................................................\n", "................................................................................\n", "................................................................................\n", "......................" ] } ], "source": [ "import time\n", "from requests_html import HTMLSession\n", "session = HTMLSession()\n", "\n", "\n", "def find_japanese_name(name):\n", " url = 'http://whisper.wisdom-guild.net/card/' + name + '/'\n", " r = session.get(url)\n", " # Find the text on the