Spaces:

alecrem
/

middleschool

Running

File size: 11,053 Bytes

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# middleschool-cardlist\n",
    "\n",
    "## Prepare the data\n",
    "\n",
    "Download raw data from [MTGJSON](https://mtgjson.com/) (uncomment and run only once)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !cd data\n",
    "# !wget \"https://mtgjson.com/api/v5/AllPrintings.json.bz2\"\n",
    "# !bunzip2 AllPrintings.json.bz2\n",
    "# !cd -\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The Raw data is very large, so let's make JSON files for all relevant sets\n",
    "\n",
    "Note: this cell can take a couple minutes to run\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "setlist = ['4ED', 'ICE', 'CHR', 'HML', 'ALL', 'MIR', 'VIS', '5ED',\n",
    "           'WTH', 'POR', 'TMP', 'STH', 'EXO', 'P02', 'USG', 'ULG',\n",
    "           '6ED', 'UDS', 'PTK', 'S99', 'MMQ', 'NEM', 'PCY', 'S00',\n",
    "           'INV', 'PLS', '7ED', 'APC', 'ODY', 'TOR', 'JUD', 'ONS',\n",
    "           'LGN', 'SCG', 'PDRC', 'PHPR', 'ATH', 'BRB', 'BTD', 'DKM']\n",
    "for set in setlist:\n",
    "    # Write a separate JSON document for each Middle School legal set\n",
    "    command = 'cat data/AllPrintings.json | jq \\'.data.\\\"' + \\\n",
    "        set + '\\\".cards\\' > data/set_' + set + '.json'\n",
    "    !{command}\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Concatenate all set files into `middleschool.json`\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "command = \"jq -s add data/set_* > data/middleschool.json\"\n",
    "!{command}\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a list with each card's oracle ID, English name, and Japanese name\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5800 cards found\n",
      "These are the first and last 5 cards\n",
      "                              oracle_id               name    name_ja\n",
      "0  8adbba6e-03ef-4278-aec5-8a4496b377a8       Abandon Hope         断念\n",
      "0  5a70ccfa-d12d-4e62-a1a4-f05cda2fd442  Abandoned Outpost  見捨てられた前哨地\n",
      "0  c208b959-d0e4-4a9a-8255-2c7cc7596767    Abbey Gargoyles  修道院のガーゴイル\n",
      "0  62e3f285-886c-414e-b4ff-403a7c01c23a       Abbey Matron       None\n",
      "0  d0e1904e-1a37-41f6-8582-b9ea794bb886          Abduction         誘拐\n",
      "                              oracle_id                      name    name_ja\n",
      "0  ae8773a3-05f2-4074-9a53-033b0c127235  Zuo Ci, the Mocking Sage  嘲笑する仙人 左慈\n",
      "0  c6eaa147-3566-43a9-999a-d58b877496f5            Zur's Weirding   ズアーの運命支配\n",
      "0  ee0f883f-d7c9-4acf-a78f-f733b6f268d3           Zuran Enchanter       None\n",
      "0  08cb8a30-9cb4-4517-bee5-8848aa60d1a2                 Zuran Orb       None\n",
      "0  bc7b90b1-3517-4e5d-9bd8-68b4d8a259fd         Zuran Spellcaster       None\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import pandas as pd\n",
    "\n",
    "with open(\"data/middleschool.json\") as json_data:\n",
    "    cards = json.loads(json_data.read())\n",
    "\n",
    "# Create a pandas DataFrame with all cards from all legal sets\n",
    "column_names = ['oracle_id', 'name', 'name_ja']\n",
    "middleschool_df = pd.DataFrame(columns=column_names)\n",
    "for card in cards:\n",
    "    oracle_id = card['identifiers']['scryfallOracleId']\n",
    "    name = card['name']\n",
    "    lang_ja = [lang for lang in card['foreignData']\n",
    "               if lang['language'] == 'Japanese']\n",
    "    # Some cards do not have a Japanese name\n",
    "    if (len(lang_ja) > 0):\n",
    "        name_ja = lang_ja[0]['name']\n",
    "    else:\n",
    "        name_ja = None\n",
    "    temporary_df = pd.DataFrame({\n",
    "        'oracle_id': [oracle_id],\n",
    "        'name':      [name],\n",
    "        'name_ja':   [name_ja]\n",
    "    })\n",
    "    middleschool_df = pd.concat([middleschool_df, temporary_df])\n",
    "\n",
    "# For cards with multiple occurrences, put the rows that have the Japanese name on top\n",
    "middleschool_df = middleschool_df.sort_values(by=['name', 'name_ja'])\n",
    "# For cards with multiple occurrences, delete all rows except for the top one\n",
    "middleschool_df = middleschool_df.drop_duplicates(subset=['oracle_id'])\n",
    "print(middleschool_df.shape[0], 'cards found')\n",
    "print('These are the first and last 5 cards')\n",
    "print(middleschool_df.head())\n",
    "print(middleschool_df.tail())\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Find Japanese names for cards that were not released in Japanese in Middle School legal sets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "..........."
     ]
    }
   ],
   "source": [
    "import time\n",
    "from requests_html import HTMLSession\n",
    "session = HTMLSession()\n",
    "\n",
    "\n",
    "def find_japanese_name(name):\n",
    "    url = 'http://whisper.wisdom-guild.net/card/' + name + '/'\n",
    "    r = session.get(url)\n",
    "    # Find the text on the <title> element in the HTML document\n",
    "    title = r.html.find('title')[0].text\n",
    "    # Find the position of the English card name within the title\n",
    "    idx = title.find(name)\n",
    "    # The Japanese name should be before the English name,\n",
    "    # so if idx is 0, there is no Japanese name\n",
    "    if idx == 0:\n",
    "        return None\n",
    "    # If the exact English card name can't be found, we look for a '/'\n",
    "    if idx == -1:\n",
    "        idx = title.find('/')\n",
    "        # No '/' means no Japanese name\n",
    "        if idx == -1:\n",
    "            return None\n",
    "        # Take only the Japanese name from the title\n",
    "        name_ja = title[0:idx]\n",
    "    else:\n",
    "        # Take only the Japanese name from the title\n",
    "        name_ja = title[0:idx - 1]\n",
    "    return name_ja\n",
    "\n",
    "\n",
    "english_only_cards = middleschool_df[middleschool_df['name_ja'].isnull()]\n",
    "name_list = english_only_cards['name'].to_list()\n",
    "for idx, name in enumerate(name_list):\n",
    "    middleschool_df.loc[middleschool_df['name'] ==\n",
    "                        name, 'name_ja'] = find_japanese_name(name)\n",
    "    # print(middleschool_df.loc[middleschool_df['name'] == name])\n",
    "    print('.', end='')\n",
    "    if idx % 80 == 79:\n",
    "        print()\n",
    "    time.sleep(1)\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Exclude all cards banned in Middle School"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cards legal by set: 5800\n",
      "Banned cards: 26\n",
      "Cards legal by set and not banned: 5774\n"
     ]
    }
   ],
   "source": [
    "banlist = [\"Amulet of Quoz\",\n",
    "           \"Balance\",\n",
    "           \"Brainstorm\",\n",
    "           \"Bronze Tablet\",\n",
    "           \"Channel\",\n",
    "           \"Dark Ritual\",\n",
    "           \"Demonic Consultation\",\n",
    "           \"Flash\",\n",
    "           \"Goblin Recruiter\",\n",
    "           \"Imperial Seal\",\n",
    "           \"Jeweled Bird\",\n",
    "           \"Lion's Eye Diamond\",\n",
    "           \"Mana Crypt\",\n",
    "           \"Mana Vault\",\n",
    "           \"Memory Jar\",\n",
    "           \"Mind's Desire\",\n",
    "           \"Mind Twist\",\n",
    "           \"Rebirth\",\n",
    "           \"Strip Mine\",\n",
    "           \"Tempest Efreet\",\n",
    "           \"Timmerian Fiends\",\n",
    "           \"Tolarian Academy\",\n",
    "           \"Vampiric Tutor\",\n",
    "           \"Windfall\",\n",
    "           \"Yawgmoth's Bargain\",\n",
    "           \"Yawgmoth's Will\"]\n",
    "print('Cards legal by set:', middleschool_df.shape[0])\n",
    "# Find the rows with the banned cards\n",
    "banned_df = middleschool_df[pd.DataFrame(\n",
    "    middleschool_df.name.tolist()).isin(banlist).any(axis=1).values]\n",
    "print('Banned cards:', banned_df.shape[0])\n",
    "# Append the banned cards to the main Middle School DataFrame,\n",
    "# then remove any rows that appear twice,\n",
    "# effectively leaving only the legal cards\n",
    "middleschool_df = pd.concat(\n",
    "    [middleschool_df, banned_df]).drop_duplicates(keep=False)\n",
    "print('Cards legal by set and not banned:', middleschool_df.shape[0])\n",
    "middleschool_df = middleschool_df.reset_index(drop=True)\n",
    "middleschool_df = middleschool_df[['oracle_id', 'name', 'name_ja']]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save the list to a CSV file and a JSON file\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [],
   "source": [
    "middleschool_df.to_csv('output/middleschool.csv')\n",
    "middleschool_df.to_json('output/middleschool.json')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Feel free to delete everything in the `data` directory after you are done"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}