File size: 13,864 Bytes
952f04c
 
 
32deb70
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32deb70
952f04c
 
 
 
 
 
 
 
 
 
32deb70
952f04c
 
 
 
 
 
 
41aa31c
952f04c
dd85b1e
952f04c
 
 
 
 
 
32deb70
952f04c
 
 
 
 
 
 
 
32deb70
952f04c
 
 
 
 
 
 
 
32deb70
952f04c
 
 
 
 
 
 
 
32deb70
952f04c
 
 
 
 
 
41aa31c
dd85b1e
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd85b1e
6ca52a1
952f04c
 
6ca52a1
 
 
dd85b1e
6ca52a1
 
952f04c
 
6ca52a1
 
 
952f04c
 
dd85b1e
6ca52a1
dd85b1e
6ca52a1
 
 
952f04c
 
 
 
004feb9
 
 
 
 
6ca52a1
004feb9
 
 
 
32deb70
004feb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ca52a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
004feb9
 
952f04c
dd85b1e
952f04c
 
 
6ca52a1
952f04c
 
106cfa9
 
32deb70
106cfa9
 
 
 
 
 
6789de9
 
 
 
 
 
004feb9
106cfa9
 
 
 
 
 
6ca52a1
106cfa9
 
 
 
6ca52a1
106cfa9
dd85b1e
6ca52a1
dd85b1e
106cfa9
6789de9
dd85b1e
106cfa9
 
6789de9
 
6ca52a1
6789de9
 
 
 
 
106cfa9
dd85b1e
6ca52a1
6789de9
106cfa9
 
6ca52a1
 
6789de9
6ca52a1
 
 
6789de9
6ca52a1
6789de9
 
 
106cfa9
 
dd85b1e
 
 
 
 
6ca52a1
dd85b1e
 
952f04c
 
32deb70
952f04c
 
 
 
 
 
41aa31c
32deb70
 
41aa31c
952f04c
 
 
6ca52a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd85b1e
6ca52a1
 
 
 
dd85b1e
 
 
6ca52a1
 
952f04c
6ca52a1
952f04c
 
 
32deb70
952f04c
 
 
 
 
 
 
 
32deb70
952f04c
 
 
6ca52a1
 
952f04c
29b06fb
 
32deb70
29b06fb
 
 
6ca52a1
29b06fb
952f04c
 
 
 
41aa31c
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
6ca52a1
952f04c
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# middleschool-cardlist\n",
    "\n",
    "## Prepare the data\n",
    "\n",
    "Download raw data from [MTGJSON](https://mtgjson.com/) (uncomment and run only once)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !cd data\n",
    "# !wget \"https://mtgjson.com/api/v5/AllPrintings.json.bz2\"\n",
    "# !bunzip2 AllPrintings.json.bz2\n",
    "# !cd -\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The Raw data is very large, so let's make JSON files for all relevant sets\n",
    "\n",
    "Note: this cell can take a couple minutes to run\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "setlist = ['4ED', 'ICE', 'CHR', 'HML', 'ALL', 'MIR', 'VIS', '5ED',\n",
    "           'WTH', 'POR', 'TMP', 'STH', 'EXO', 'P02', 'USG', 'ULG',\n",
    "           '6ED', 'UDS', 'PTK', 'S99', 'MMQ', 'NEM', 'PCY', 'S00',\n",
    "           'INV', 'PLS', '7ED', 'APC', 'ODY', 'TOR', 'JUD', 'ONS',\n",
    "           'LGN', 'SCG', 'PDRC', 'PHPR', 'ATH', 'BRB', 'BTD', 'DKM']\n",
    "for set in setlist:\n",
    "    # Write a separate JSON document for each Middle School legal set\n",
    "    command = 'cat data/AllPrintings.json | jq \\'.data.\\\"' + \\\n",
    "        set + '\\\".cards\\' > data/set_' + set + '.json'\n",
    "    !{command}\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Concatenate all set files into `middleschool.json`\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "command = \"jq -s add data/set_* > data/middleschool.json\"\n",
    "!{command}\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a list with each card's oracle ID, English name, and Japanese name\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5800 cards found\n",
      "These are the first and last 5 cards\n",
      "                              oracle_id               name    name_ja\n",
      "0  8adbba6e-03ef-4278-aec5-8a4496b377a8       Abandon Hope         断念\n",
      "0  5a70ccfa-d12d-4e62-a1a4-f05cda2fd442  Abandoned Outpost  見捨てられた前哨地\n",
      "0  c208b959-d0e4-4a9a-8255-2c7cc7596767    Abbey Gargoyles  修道院のガーゴイル\n",
      "0  62e3f285-886c-414e-b4ff-403a7c01c23a       Abbey Matron       None\n",
      "0  d0e1904e-1a37-41f6-8582-b9ea794bb886          Abduction         誘拐\n",
      "                              oracle_id                      name    name_ja\n",
      "0  ae8773a3-05f2-4074-9a53-033b0c127235  Zuo Ci, the Mocking Sage  嘲笑する仙人 左慈\n",
      "0  c6eaa147-3566-43a9-999a-d58b877496f5            Zur's Weirding   ズアーの運命支配\n",
      "0  ee0f883f-d7c9-4acf-a78f-f733b6f268d3           Zuran Enchanter       None\n",
      "0  08cb8a30-9cb4-4517-bee5-8848aa60d1a2                 Zuran Orb       None\n",
      "0  bc7b90b1-3517-4e5d-9bd8-68b4d8a259fd         Zuran Spellcaster       None\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import pandas as pd\n",
    "\n",
    "with open(\"data/middleschool.json\") as json_data:\n",
    "    cards = json.loads(json_data.read())\n",
    "\n",
    "# Create a pandas DataFrame with all cards from all legal sets\n",
    "column_names = [\"oracle_id\", \"name\", \"name_ja\"]\n",
    "middleschool_df = pd.DataFrame(columns=column_names)\n",
    "for card in cards:\n",
    "    oracle_id = card[\"identifiers\"][\"scryfallOracleId\"]\n",
    "    name = card[\"name\"]\n",
    "    lang_ja = [lang for lang in card[\"foreignData\"] if lang[\"language\"] == \"Japanese\"]\n",
    "    # Some cards do not have a Japanese name\n",
    "    if len(lang_ja) > 0:\n",
    "        name_ja = lang_ja[0][\"name\"]\n",
    "    else:\n",
    "        name_ja = None\n",
    "    temporary_df = pd.DataFrame(\n",
    "        {\"oracle_id\": [oracle_id], \"name\": [name], \"name_ja\": [name_ja]}\n",
    "    )\n",
    "    middleschool_df = pd.concat([middleschool_df, temporary_df])\n",
    "\n",
    "# For cards with multiple occurrences, put the rows that have the Japanese name on top\n",
    "middleschool_df = middleschool_df.sort_values(by=[\"name\", \"name_ja\"])\n",
    "# For cards with multiple occurrences, delete all rows except for the top one\n",
    "middleschool_df = middleschool_df.drop_duplicates(subset=[\"oracle_id\"])\n",
    "print(middleschool_df.shape[0], \"cards found\")\n",
    "print(\"These are the first and last 5 cards\")\n",
    "print(middleschool_df.head())\n",
    "print(middleschool_df.tail())\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Remove Japanese card names that are wrong on MTGJSON\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Before:\n",
      "                              oracle_id             name          name_ja\n",
      "0  0fe602b7-9f88-4d3d-af24-7790df867ed5   Aether Barrier    Æther Barrier\n",
      "0  1e33f39b-a61a-4a09-a541-16cc1bd53d02     Aether Burst      Æther Burst\n",
      "0  15e83068-6253-4c65-8679-7295f3dc2075    Aether Charge     Æther Charge\n",
      "0  a3c35742-e306-49b6-b042-db4f685c6f86     Aether Flash      Æther Flash\n",
      "0  6697fe5b-90ac-4321-aa2f-cdc6ec283cb4  Aether Mutation  Aether Mutation\n",
      "0  61105cb5-d7a1-4021-a006-dd1b947dfa68     Aether Sting      Æther Sting\n",
      "0  ff4297d3-3d96-4bd6-a606-1bdc20a6df2b     Aether Storm      Æther Storm\n",
      "0  2fbf95b4-bcf4-4b5e-b5dc-0294f2b48d3e      Aether Tide       Æther Tide\n",
      "0  a61ceda1-5993-479e-945f-15753eeb7049   Tainted Aether    Tainted Æther\n",
      "0  05a7ca83-e820-433f-b9e9-151e817d3708  Tar Pit Warrior  Tar Pit Warrior\n",
      "After:\n",
      "                              oracle_id             name name_ja\n",
      "0  0fe602b7-9f88-4d3d-af24-7790df867ed5   Aether Barrier    None\n",
      "0  1e33f39b-a61a-4a09-a541-16cc1bd53d02     Aether Burst    None\n",
      "0  15e83068-6253-4c65-8679-7295f3dc2075    Aether Charge    None\n",
      "0  a3c35742-e306-49b6-b042-db4f685c6f86     Aether Flash    None\n",
      "0  6697fe5b-90ac-4321-aa2f-cdc6ec283cb4  Aether Mutation    None\n",
      "0  61105cb5-d7a1-4021-a006-dd1b947dfa68     Aether Sting    None\n",
      "0  ff4297d3-3d96-4bd6-a606-1bdc20a6df2b     Aether Storm    None\n",
      "0  2fbf95b4-bcf4-4b5e-b5dc-0294f2b48d3e      Aether Tide    None\n",
      "0  a61ceda1-5993-479e-945f-15753eeb7049   Tainted Aether    None\n",
      "0  05a7ca83-e820-433f-b9e9-151e817d3708  Tar Pit Warrior    None\n"
     ]
    }
   ],
   "source": [
    "wrongnames = [\n",
    "    \"Aether Barrier\",\n",
    "    \"Aether Burst\",\n",
    "    \"Aether Charge\",\n",
    "    \"Aether Flash\",\n",
    "    \"Aether Mutation\",\n",
    "    \"Aether Sting\",\n",
    "    \"Aether Storm\",\n",
    "    \"Aether Tide\",\n",
    "    \"Tainted Aether\",\n",
    "    \"Tar Pit Warrior\",\n",
    "]\n",
    "print(\"Before:\")\n",
    "print(middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames)])\n",
    "middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames), \"name_ja\"] = None\n",
    "print(\"After:\")\n",
    "print(middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames)])\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Find Japanese names for cards that were not released in Japanese in Middle School legal sets\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "................................................................................\n",
      "......................"
     ]
    }
   ],
   "source": [
    "import time\n",
    "from requests_html import HTMLSession\n",
    "\n",
    "session = HTMLSession()\n",
    "\n",
    "\n",
    "def find_japanese_name(name):\n",
    "    url = \"http://whisper.wisdom-guild.net/card/\" + name + \"/\"\n",
    "    r = session.get(url)\n",
    "    # Find the text on the <title> element in the HTML document\n",
    "    title = r.html.find(\"title\")[0].text\n",
    "    # Find the position of the English card name within the title\n",
    "    idx = title.find(name)\n",
    "    # The Japanese name should be before the English name,\n",
    "    # so if idx is 0, there is no Japanese name\n",
    "    if idx == 0:\n",
    "        return None\n",
    "    # If the exact English card name can't be found, we look for a '/'\n",
    "    if idx == -1:\n",
    "        idx = title.find(\"/\")\n",
    "        # No '/' means no Japanese name\n",
    "        if idx == -1:\n",
    "            return None\n",
    "        # Take only the Japanese name from the title\n",
    "        name_ja = title[0:idx]\n",
    "    else:\n",
    "        # Take only the Japanese name from the title\n",
    "        name_ja = title[0 : idx - 1]\n",
    "    return name_ja\n",
    "\n",
    "\n",
    "english_only_cards = middleschool_df[middleschool_df[\"name_ja\"].isnull()]\n",
    "name_list = english_only_cards[\"name\"].to_list()\n",
    "for idx, name in enumerate(name_list):\n",
    "    middleschool_df.loc[\n",
    "        middleschool_df[\"name\"] == name, \"name_ja\"\n",
    "    ] = find_japanese_name(name)\n",
    "    # print(middleschool_df.loc[middleschool_df['name'] == name])\n",
    "    print(\".\", end=\"\")\n",
    "    if idx % 80 == 79:\n",
    "        print()\n",
    "    time.sleep(1)\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Exclude all cards banned in Middle School\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cards legal by set: 5800\n",
      "Banned cards: 25\n",
      "Cards legal by set and not banned: 5775\n"
     ]
    }
   ],
   "source": [
    "banlist = [\n",
    "    \"Amulet of Quoz\",\n",
    "    \"Balance\",\n",
    "    \"Brainstorm\",\n",
    "    \"Bronze Tablet\",\n",
    "    \"Channel\",\n",
    "    \"Dark Ritual\",\n",
    "    \"Demonic Consultation\",\n",
    "    \"Flash\",\n",
    "    \"Goblin Recruiter\",\n",
    "    \"Imperial Seal\",\n",
    "    \"Jeweled Bird\",\n",
    "    \"Mana Crypt\",\n",
    "    \"Mana Vault\",\n",
    "    \"Memory Jar\",\n",
    "    \"Mind's Desire\",\n",
    "    \"Mind Twist\",\n",
    "    \"Rebirth\",\n",
    "    \"Strip Mine\",\n",
    "    \"Tempest Efreet\",\n",
    "    \"Timmerian Fiends\",\n",
    "    \"Tolarian Academy\",\n",
    "    \"Vampiric Tutor\",\n",
    "    \"Windfall\",\n",
    "    \"Yawgmoth's Bargain\",\n",
    "    \"Yawgmoth's Will\",\n",
    "]\n",
    "print(\"Cards legal by set:\", middleschool_df.shape[0])\n",
    "# Find the rows with the banned cards\n",
    "banned_df = middleschool_df[\n",
    "    pd.DataFrame(middleschool_df.name.tolist()).isin(banlist).any(axis=1).values\n",
    "]\n",
    "print(\"Banned cards:\", banned_df.shape[0])\n",
    "# Append the banned cards to the main Middle School DataFrame,\n",
    "# then remove any rows that appear twice,\n",
    "# effectively leaving only the legal cards\n",
    "middleschool_df = pd.concat([middleschool_df, banned_df]).drop_duplicates(keep=False)\n",
    "print(\"Cards legal by set and not banned:\", middleschool_df.shape[0])\n",
    "middleschool_df = middleschool_df.reset_index(drop=True)\n",
    "middleschool_df = middleschool_df[[\"oracle_id\", \"name\", \"name_ja\"]]\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save the list to a CSV file and a JSON file\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "middleschool_df.to_csv(\"output/middleschool.csv\")\n",
    "middleschool_df.to_json(\"output/middleschool.json\")\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Feel free to delete everything in the `data` directory after you are done\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}