File size: 11,053 Bytes
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41aa31c
952f04c
 
 
 
 
 
 
41aa31c
952f04c
dd85b1e
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
41aa31c
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6789de9
952f04c
 
 
 
 
 
41aa31c
dd85b1e
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd85b1e
952f04c
 
 
 
 
 
 
dd85b1e
952f04c
 
 
 
 
 
 
 
 
 
 
dd85b1e
41aa31c
dd85b1e
952f04c
 
dd85b1e
952f04c
 
 
 
 
dd85b1e
952f04c
 
 
dd85b1e
952f04c
 
106cfa9
 
6789de9
106cfa9
 
 
 
 
 
6789de9
 
 
 
 
 
 
106cfa9
 
 
 
 
 
 
 
 
 
6789de9
106cfa9
dd85b1e
106cfa9
dd85b1e
106cfa9
6789de9
dd85b1e
106cfa9
 
6789de9
 
 
 
 
 
 
 
106cfa9
dd85b1e
106cfa9
6789de9
106cfa9
 
 
6789de9
 
 
 
 
 
 
 
 
106cfa9
 
dd85b1e
 
 
 
 
 
 
 
952f04c
 
6789de9
952f04c
 
 
 
 
 
41aa31c
952f04c
41aa31c
 
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd85b1e
952f04c
00af251
952f04c
dd85b1e
 
 
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6789de9
952f04c
 
 
 
 
 
29b06fb
 
 
 
 
 
 
952f04c
 
 
 
41aa31c
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
41aa31c
952f04c
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# middleschool-cardlist\n",
    "\n",
    "## Prepare the data\n",
    "\n",
    "Download raw data from [MTGJSON](https://mtgjson.com/) (uncomment and run only once)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !cd data\n",
    "# !wget \"https://mtgjson.com/api/v5/AllPrintings.json.bz2\"\n",
    "# !bunzip2 AllPrintings.json.bz2\n",
    "# !cd -\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The Raw data is very large, so let's make JSON files for all relevant sets\n",
    "\n",
    "Note: this cell can take a couple minutes to run\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "setlist = ['4ED', 'ICE', 'CHR', 'HML', 'ALL', 'MIR', 'VIS', '5ED',\n",
    "           'WTH', 'POR', 'TMP', 'STH', 'EXO', 'P02', 'USG', 'ULG',\n",
    "           '6ED', 'UDS', 'PTK', 'S99', 'MMQ', 'NEM', 'PCY', 'S00',\n",
    "           'INV', 'PLS', '7ED', 'APC', 'ODY', 'TOR', 'JUD', 'ONS',\n",
    "           'LGN', 'SCG', 'PDRC', 'PHPR', 'ATH', 'BRB', 'BTD', 'DKM']\n",
    "for set in setlist:\n",
    "    # Write a separate JSON document for each Middle School legal set\n",
    "    command = 'cat data/AllPrintings.json | jq \\'.data.\\\"' + \\\n",
    "        set + '\\\".cards\\' > data/set_' + set + '.json'\n",
    "    !{command}\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Concatenate all set files into `middleschool.json`\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "command = \"jq -s add data/set_* > data/middleschool.json\"\n",
    "!{command}\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a list with each card's oracle ID, English name, and Japanese name\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5800 cards found\n",
      "These are the first and last 5 cards\n",
      "                              oracle_id               name    name_ja\n",
      "0  8adbba6e-03ef-4278-aec5-8a4496b377a8       Abandon Hope         断念\n",
      "0  5a70ccfa-d12d-4e62-a1a4-f05cda2fd442  Abandoned Outpost  見捨てられた前哨地\n",
      "0  c208b959-d0e4-4a9a-8255-2c7cc7596767    Abbey Gargoyles  修道院のガーゴイル\n",
      "0  62e3f285-886c-414e-b4ff-403a7c01c23a       Abbey Matron       None\n",
      "0  d0e1904e-1a37-41f6-8582-b9ea794bb886          Abduction         誘拐\n",
      "                              oracle_id                      name    name_ja\n",
      "0  ae8773a3-05f2-4074-9a53-033b0c127235  Zuo Ci, the Mocking Sage  嘲笑する仙人 左慈\n",
      "0  c6eaa147-3566-43a9-999a-d58b877496f5            Zur's Weirding   ズアーの運命支配\n",
      "0  ee0f883f-d7c9-4acf-a78f-f733b6f268d3           Zuran Enchanter       None\n",
      "0  08cb8a30-9cb4-4517-bee5-8848aa60d1a2                 Zuran Orb       None\n",
      "0  bc7b90b1-3517-4e5d-9bd8-68b4d8a259fd         Zuran Spellcaster       None\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import pandas as pd\n",
    "\n",
    "with open(\"data/middleschool.json\") as json_data:\n",
    "    cards = json.loads(json_data.read())\n",
    "\n",
    "# Create a pandas DataFrame with all cards from all legal sets\n",
    "column_names = ['oracle_id', 'name', 'name_ja']\n",
    "middleschool_df = pd.DataFrame(columns=column_names)\n",
    "for card in cards:\n",
    "    oracle_id = card['identifiers']['scryfallOracleId']\n",
    "    name = card['name']\n",
    "    lang_ja = [lang for lang in card['foreignData']\n",
    "               if lang['language'] == 'Japanese']\n",
    "    # Some cards do not have a Japanese name\n",
    "    if (len(lang_ja) > 0):\n",
    "        name_ja = lang_ja[0]['name']\n",
    "    else:\n",
    "        name_ja = None\n",
    "    temporary_df = pd.DataFrame({\n",
    "        'oracle_id': [oracle_id],\n",
    "        'name':      [name],\n",
    "        'name_ja':   [name_ja]\n",
    "    })\n",
    "    middleschool_df = pd.concat([middleschool_df, temporary_df])\n",
    "\n",
    "# For cards with multiple occurrences, put the rows that have the Japanese name on top\n",
    "middleschool_df = middleschool_df.sort_values(by=['name', 'name_ja'])\n",
    "# For cards with multiple occurrences, delete all rows except for the top one\n",
    "middleschool_df = middleschool_df.drop_duplicates(subset=['oracle_id'])\n",
    "print(middleschool_df.shape[0], 'cards found')\n",
    "print('These are the first and last 5 cards')\n",
    "print(middleschool_df.head())\n",
    "print(middleschool_df.tail())\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Find Japanese names for cards that were not released in Japanese in Middle School legal sets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "..........."
     ]
    }
   ],
   "source": [
    "import time\n",
    "from requests_html import HTMLSession\n",
    "session = HTMLSession()\n",
    "\n",
    "\n",
    "def find_japanese_name(name):\n",
    "    url = 'http://whisper.wisdom-guild.net/card/' + name + '/'\n",
    "    r = session.get(url)\n",
    "    # Find the text on the <title> element in the HTML document\n",
    "    title = r.html.find('title')[0].text\n",
    "    # Find the position of the English card name within the title\n",
    "    idx = title.find(name)\n",
    "    # The Japanese name should be before the English name,\n",
    "    # so if idx is 0, there is no Japanese name\n",
    "    if idx == 0:\n",
    "        return None\n",
    "    # If the exact English card name can't be found, we look for a '/'\n",
    "    if idx == -1:\n",
    "        idx = title.find('/')\n",
    "        # No '/' means no Japanese name\n",
    "        if idx == -1:\n",
    "            return None\n",
    "        # Take only the Japanese name from the title\n",
    "        name_ja = title[0:idx]\n",
    "    else:\n",
    "        # Take only the Japanese name from the title\n",
    "        name_ja = title[0:idx - 1]\n",
    "    return name_ja\n",
    "\n",
    "\n",
    "english_only_cards = middleschool_df[middleschool_df['name_ja'].isnull()]\n",
    "name_list = english_only_cards['name'].to_list()\n",
    "for idx, name in enumerate(name_list):\n",
    "    middleschool_df.loc[middleschool_df['name'] ==\n",
    "                        name, 'name_ja'] = find_japanese_name(name)\n",
    "    # print(middleschool_df.loc[middleschool_df['name'] == name])\n",
    "    print('.', end='')\n",
    "    if idx % 80 == 79:\n",
    "        print()\n",
    "    time.sleep(1)\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Exclude all cards banned in Middle School"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cards legal by set: 5800\n",
      "Banned cards: 26\n",
      "Cards legal by set and not banned: 5774\n"
     ]
    }
   ],
   "source": [
    "banlist = [\"Amulet of Quoz\",\n",
    "           \"Balance\",\n",
    "           \"Brainstorm\",\n",
    "           \"Bronze Tablet\",\n",
    "           \"Channel\",\n",
    "           \"Dark Ritual\",\n",
    "           \"Demonic Consultation\",\n",
    "           \"Flash\",\n",
    "           \"Goblin Recruiter\",\n",
    "           \"Imperial Seal\",\n",
    "           \"Jeweled Bird\",\n",
    "           \"Lion's Eye Diamond\",\n",
    "           \"Mana Crypt\",\n",
    "           \"Mana Vault\",\n",
    "           \"Memory Jar\",\n",
    "           \"Mind's Desire\",\n",
    "           \"Mind Twist\",\n",
    "           \"Rebirth\",\n",
    "           \"Strip Mine\",\n",
    "           \"Tempest Efreet\",\n",
    "           \"Timmerian Fiends\",\n",
    "           \"Tolarian Academy\",\n",
    "           \"Vampiric Tutor\",\n",
    "           \"Windfall\",\n",
    "           \"Yawgmoth's Bargain\",\n",
    "           \"Yawgmoth's Will\"]\n",
    "print('Cards legal by set:', middleschool_df.shape[0])\n",
    "# Find the rows with the banned cards\n",
    "banned_df = middleschool_df[pd.DataFrame(\n",
    "    middleschool_df.name.tolist()).isin(banlist).any(axis=1).values]\n",
    "print('Banned cards:', banned_df.shape[0])\n",
    "# Append the banned cards to the main Middle School DataFrame,\n",
    "# then remove any rows that appear twice,\n",
    "# effectively leaving only the legal cards\n",
    "middleschool_df = pd.concat(\n",
    "    [middleschool_df, banned_df]).drop_duplicates(keep=False)\n",
    "print('Cards legal by set and not banned:', middleschool_df.shape[0])\n",
    "middleschool_df = middleschool_df.reset_index(drop=True)\n",
    "middleschool_df = middleschool_df[['oracle_id', 'name', 'name_ja']]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save the list to a CSV file and a JSON file\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [],
   "source": [
    "middleschool_df.to_csv('output/middleschool.csv')\n",
    "middleschool_df.to_json('output/middleschool.json')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Feel free to delete everything in the `data` directory after you are done"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}