Spaces:

alecrem
/

middleschool

Running

File size: 13,864 Bytes

952f04c
 
 
32deb70
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32deb70
952f04c
 
 
 
 
 
 
 
 
 
32deb70
952f04c
 
 
 
 
 
 
41aa31c
952f04c
dd85b1e
952f04c
 
 
 
 
 
32deb70
952f04c
 
 
 
 
 
 
 
32deb70
952f04c
 
 
 
 
 
 
 
32deb70
952f04c
 
 
 
 
 
 
 
32deb70
952f04c
 
 
 
 
 
41aa31c
dd85b1e
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd85b1e
6ca52a1
952f04c
 
6ca52a1
 
 
dd85b1e
6ca52a1
 
952f04c
 
6ca52a1
 
 
952f04c
 
dd85b1e
6ca52a1
dd85b1e
6ca52a1
 
 
952f04c
 
 
 
004feb9
 
 
 
 
6ca52a1
004feb9
 
 
 
32deb70
004feb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ca52a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
004feb9
 
952f04c
dd85b1e
952f04c
 
 
6ca52a1
952f04c
 
106cfa9
 
32deb70
106cfa9
 
 
 
 
 
6789de9
 
 
 
 
 
004feb9
106cfa9
 
 
 
 
 
6ca52a1
106cfa9
 
 
 
6ca52a1
106cfa9
dd85b1e
6ca52a1
dd85b1e
106cfa9
6789de9
dd85b1e
106cfa9
 
6789de9
 
6ca52a1
6789de9
 
 
 
 
106cfa9
dd85b1e
6ca52a1
6789de9
106cfa9
 
6ca52a1
 
6789de9
6ca52a1
 
 
6789de9
6ca52a1
6789de9
 
 
106cfa9
 
dd85b1e
 
 
 
 
6ca52a1
dd85b1e
 
952f04c
 
32deb70
952f04c
 
 
 
 
 
41aa31c
32deb70
 
41aa31c
952f04c
 
 
6ca52a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd85b1e
6ca52a1
 
 
 
dd85b1e
 
 
6ca52a1
 
952f04c
6ca52a1
952f04c
 
 
32deb70
952f04c
 
 
 
 
 
 
 
32deb70
952f04c
 
 
6ca52a1
 
952f04c
29b06fb
 
32deb70
29b06fb
 
 
6ca52a1
29b06fb
952f04c
 
 
 
41aa31c
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
6ca52a1
952f04c

{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# middleschool-cardlist\n",
    "\n",
    "## Prepare the data\n",
    "\n",
    "Download raw data from [MTGJSON](https://mtgjson.com/) (uncomment and run only once)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !cd data\n",
    "# !wget \"https://mtgjson.com/api/v5/AllPrintings.json.bz2\"\n",
    "# !bunzip2 AllPrintings.json.bz2\n",
    "# !cd -\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The Raw data is very large, so let's make JSON files for all relevant sets\n",
    "\n",
    "Note: this cell can take a couple minutes to run\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "setlist = ['4ED', 'ICE', 'CHR', 'HML', 'ALL', 'MIR', 'VIS', '5ED',\n",
    "           'WTH', 'POR', 'TMP', 'STH', 'EXO', 'P02', 'USG', 'ULG',\n",
    "           '6ED', 'UDS', 'PTK', 'S99', 'MMQ', 'NEM', 'PCY', 'S00',\n",
    "           'INV', 'PLS', '7ED', 'APC', 'ODY', 'TOR', 'JUD', 'ONS',\n",
    "           'LGN', 'SCG', 'PDRC', 'PHPR', 'ATH', 'BRB', 'BTD', 'DKM']\n",
    "for set in setlist:\n",
    "    # Write a separate JSON document for each Middle School legal set\n",
    "    command = 'cat data/AllPrintings.json | jq \\'.data.\\\"' + \\\n",
    "        set + '\\\".cards\\' > data/set_' + set + '.json'\n",
    "    !{command}\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Concatenate all set files into `middleschool.json`\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "command = \"jq -s add data/set_* > data/middleschool.json\"\n",
    "!{command}\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a list with each card's oracle ID, English name, and Japanese name\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5800 cards found\n",
      "These are the first and last 5 cards\n",
      "                              oracle_id               name    name_ja\n",
      "0  8adbba6e-03ef-4278-aec5-8a4496b377a8       Abandon Hope         断念\n",
      "0  5a70ccfa-d12d-4e62-a1a4-f05cda2fd442  Abandoned Outpost  見捨てられた前哨地\n",
      "0  c208b959-d0e4-4a9a-8255-2c7cc7596767    Abbey Gargoyles  修道院のガーゴイル\n",
      "0  62e3f285-886c-414e-b4ff-403a7c01c23a       Abbey Matron       None\n",
      "0  d0e1904e-1a37-41f6-8582-b9ea794bb886          Abduction         誘拐\n",
      "                              oracle_id                      name    name_ja\n",
      "0  ae8773a3-05f2-4074-9a53-033b0c127235  Zuo Ci, the Mocking Sage  嘲笑する仙人 左慈\n",
      "0  c6eaa147-3566-43a9-999a-d58b877496f5            Zur's Weirding   ズアーの運命支配\n",
      "0  ee0f883f-d7c9-4acf-a78f-f733b6f268d3           Zuran Enchanter       None\n",
      "0  08cb8a30-9cb4-4517-bee5-8848aa60d1a2                 Zuran Orb       None\n",
      "0  bc7b90b1-3517-4e5d-9bd8-68b4d8a259fd         Zuran Spellcaster       None\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import pandas as pd\n",
    "\n",
    "with open(\"data/middleschool.json\") as json_data:\n",
    "    cards = json.loads(json_data.read())\n",
    "\n",
    "# Create a pandas DataFrame with all cards from all legal sets\n",
    "column_names = [\"oracle_id\", \"name\", \"name_ja\"]\n",
    "middleschool_df = pd.DataFrame(columns=column_names)\n",
    "for card in cards:\n",
    "    oracle_id = card[\"identifiers\"][\"scryfallOracleId\"]\n",
    "    name = card[\"name\"]\n",
    "    lang_ja = [lang for lang in card[\"foreignData\"] if lang[\"language\"] == \"Japanese\"]\n",
    "    # Some cards do not have a Japanese name\n",
    "    if len(lang_ja) > 0:\n",
    "        name_ja = lang_ja[0][\"name\"]\n",
    "    else:\n",
    "        name_ja = None\n",
    "    temporary_df = pd.DataFrame(\n",
    "        {\"oracle_id\": [oracle_id], \"name\": [name], \"name_ja\": [name_ja]}\n",
    "    )\n",
    "    middleschool_df = pd.concat([middleschool_df, temporary_df])\n",
    "\n",
    "# For cards with multiple occurrences, put the rows that have the Japanese name on top\n",
    "middleschool_df = middleschool_df.sort_values(by=[\"name\", \"name_ja\"])\n",
    "# For cards with multiple occurrences, delete all rows except for the top one\n",
    "middleschool_df = middleschool_df.drop_duplicates(subset=[\"oracle_id\"])\n",
    "print(middleschool_df.shape[0], \"cards found\")\n",
    "print(\"These are the first and last 5 cards\")\n",
    "print(middleschool_df.head())\n",
    "print(middleschool_df.tail())\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Remove Japanese card names that are wrong on MTGJSON\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Before:\n",
      "                              oracle_id             name          name_ja\n",
      "0  0fe602b7-9f88-4d3d-af24-7790df867ed5   Aether Barrier    Æther Barrier\n",
      "0  1e33f39b-a61a-4a09-a541-16cc1bd53d02     Aether Burst      Æther Burst\n",
      "0  15e83068-6253-4c65-8679-7295f3dc2075    Aether Charge     Æther Charge\n",
      "0  a3c35742-e306-49b6-b042-db4f685c6f86     Aether Flash      Æther Flash\n",
      "0  6697fe5b-90ac-4321-aa2f-cdc6ec283cb4  Aether Mutation  Aether Mutation\n",
      "0  61105cb5-d7a1-4021-a006-dd1b947dfa68     Aether Sting      Æther Sting\n",
      "0  ff4297d3-3d96-4bd6-a606-1bdc20a6df2b     Aether Storm      Æther Storm\n",
      "0  2fbf95b4-bcf4-4b5e-b5dc-0294f2b48d3e      Aether Tide       Æther Tide\n",
      "0  a61ceda1-5993-479e-945f-15753eeb7049   Tainted Aether    Tainted Æther\n",
      "0  05a7ca83-e820-433f-b9e9-151e817d3708  Tar Pit Warrior  Tar Pit Warrior\n",
      "After:\n",
      "                              oracle_id             name name_ja\n",
      "0  0fe602b7-9f88-4d3d-af24-7790df867ed5   Aether Barrier    None\n",
      "0  1e33f39b-a61a-4a09-a541-16cc1bd53d02     Aether Burst    None\n",
      "0  15e83068-6253-4c65-8679-7295f3dc2075    Aether Charge    None\n",
      "0  a3c35742-e306-49b6-b042-db4f685c6f86     Aether Flash    None\n",
      "0  6697fe5b-90ac-4321-aa2f-cdc6ec283cb4  Aether Mutation    None\n",
      "0  61105cb5-d7a1-4021-a006-dd1b947dfa68     Aether Sting    None\n",
      "0  ff4297d3-3d96-4bd6-a606-1bdc20a6df2b     Aether Storm    None\n",
      "0  2fbf95b4-bcf4-4b5e-b5dc-0294f2b48d3e      Aether Tide    None\n",
      "0  a61ceda1-5993-479e-945f-15753eeb7049   Tainted Aether    None\n",
      "0  05a7ca83-e820-433f-b9e9-151e817d3708  Tar Pit Warrior    None\n"
     ]
    }
   ],
   "source": [
    "wrongnames = [\n",
    "    \"Aether Barrier\",\n",
    "    \"Aether Burst\",\n",
    "    \"Aether Charge\",\n",
    "    \"Aether Flash\",\n",
    "    \"Aether Mutation\",\n",
    "    \"Aether Sting\",\n",
    "    \"Aether Storm\",\n",
    "    \"Aether Tide\",\n",
    "    \"Tainted Aether\",\n",
    "    \"Tar Pit Warrior\",\n",
    "]\n",
    "print(\"Before:\")\n",
    "print(middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames)])\n",
    "middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames), \"name_ja\"] = None\n",
    "print(\"After:\")\n",
    "print(middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames)])\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Find Japanese names for cards that were not released in Japanese in Middle School legal sets\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "......................"
     ]
    }
   ],
   "source": [
    "import time\n",
    "from requests_html import HTMLSession\n",
    "\n",
    "session = HTMLSession()\n",
    "\n",
    "\n",
    "def find_japanese_name(name):\n",
    "    url = \"http://whisper.wisdom-guild.net/card/\" + name + \"/\"\n",
    "    r = session.get(url)\n",
    "    # Find the text on the <title> element in the HTML document\n",
    "    title = r.html.find(\"title\")[0].text\n",
    "    # Find the position of the English card name within the title\n",
    "    idx = title.find(name)\n",
    "    # The Japanese name should be before the English name,\n",
    "    # so if idx is 0, there is no Japanese name\n",
    "    if idx == 0:\n",
    "        return None\n",
    "    # If the exact English card name can't be found, we look for a '/'\n",
    "    if idx == -1:\n",
    "        idx = title.find(\"/\")\n",
    "        # No '/' means no Japanese name\n",
    "        if idx == -1:\n",
    "            return None\n",
    "        # Take only the Japanese name from the title\n",
    "        name_ja = title[0:idx]\n",
    "    else:\n",
    "        # Take only the Japanese name from the title\n",
    "        name_ja = title[0 : idx - 1]\n",
    "    return name_ja\n",
    "\n",
    "\n",
    "english_only_cards = middleschool_df[middleschool_df[\"name_ja\"].isnull()]\n",
    "name_list = english_only_cards[\"name\"].to_list()\n",
    "for idx, name in enumerate(name_list):\n",
    "    middleschool_df.loc[\n",
    "        middleschool_df[\"name\"] == name, \"name_ja\"\n",
    "    ] = find_japanese_name(name)\n",
    "    # print(middleschool_df.loc[middleschool_df['name'] == name])\n",
    "    print(\".\", end=\"\")\n",
    "    if idx % 80 == 79:\n",
    "        print()\n",
    "    time.sleep(1)\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Exclude all cards banned in Middle School\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cards legal by set: 5800\n",
      "Banned cards: 25\n",
      "Cards legal by set and not banned: 5775\n"
     ]
    }
   ],
   "source": [
    "banlist = [\n",
    "    \"Amulet of Quoz\",\n",
    "    \"Balance\",\n",
    "    \"Brainstorm\",\n",
    "    \"Bronze Tablet\",\n",
    "    \"Channel\",\n",
    "    \"Dark Ritual\",\n",
    "    \"Demonic Consultation\",\n",
    "    \"Flash\",\n",
    "    \"Goblin Recruiter\",\n",
    "    \"Imperial Seal\",\n",
    "    \"Jeweled Bird\",\n",
    "    \"Mana Crypt\",\n",
    "    \"Mana Vault\",\n",
    "    \"Memory Jar\",\n",
    "    \"Mind's Desire\",\n",
    "    \"Mind Twist\",\n",
    "    \"Rebirth\",\n",
    "    \"Strip Mine\",\n",
    "    \"Tempest Efreet\",\n",
    "    \"Timmerian Fiends\",\n",
    "    \"Tolarian Academy\",\n",
    "    \"Vampiric Tutor\",\n",
    "    \"Windfall\",\n",
    "    \"Yawgmoth's Bargain\",\n",
    "    \"Yawgmoth's Will\",\n",
    "]\n",
    "print(\"Cards legal by set:\", middleschool_df.shape[0])\n",
    "# Find the rows with the banned cards\n",
    "banned_df = middleschool_df[\n",
    "    pd.DataFrame(middleschool_df.name.tolist()).isin(banlist).any(axis=1).values\n",
    "]\n",
    "print(\"Banned cards:\", banned_df.shape[0])\n",
    "# Append the banned cards to the main Middle School DataFrame,\n",
    "# then remove any rows that appear twice,\n",
    "# effectively leaving only the legal cards\n",
    "middleschool_df = pd.concat([middleschool_df, banned_df]).drop_duplicates(keep=False)\n",
    "print(\"Cards legal by set and not banned:\", middleschool_df.shape[0])\n",
    "middleschool_df = middleschool_df.reset_index(drop=True)\n",
    "middleschool_df = middleschool_df[[\"oracle_id\", \"name\", \"name_ja\"]]\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save the list to a CSV file and a JSON file\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "middleschool_df.to_csv(\"output/middleschool.csv\")\n",
    "middleschool_df.to_json(\"output/middleschool.json\")\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Feel free to delete everything in the `data` directory after you are done\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}