File size: 9,590 Bytes
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41aa31c
952f04c
 
 
 
 
 
 
41aa31c
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41aa31c
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00af251
952f04c
 
 
 
 
 
41aa31c
 
 
 
 
 
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41aa31c
 
952f04c
41aa31c
952f04c
 
 
 
 
 
 
 
 
 
 
 
106cfa9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
952f04c
 
00af251
952f04c
 
 
 
 
 
41aa31c
952f04c
41aa31c
 
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00af251
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00af251
952f04c
 
 
 
 
 
29b06fb
 
 
 
 
 
 
952f04c
 
 
 
41aa31c
952f04c
 
 
 
 
 
 
 
 
 
 
 
 
41aa31c
952f04c
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# middleschool-cardlist\n",
    "\n",
    "## Prepare the data\n",
    "\n",
    "Download raw data from [MTGJSON](https://mtgjson.com/) (uncomment and run only once)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !cd data\n",
    "# !wget \"https://mtgjson.com/api/v5/AllPrintings.json.bz2\"\n",
    "# !bunzip2 AllPrintings.json.bz2\n",
    "# !cd -\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The Raw data is very large, so let's make JSON files for all relevant sets\n",
    "\n",
    "Note: this cell can take a couple minutes to run\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "setlist = ['4ED', 'ICE', 'CHR', 'HML', 'ALL', 'MIR', 'VIS', '5ED',\n",
    "           'WTH', 'POR', 'TMP', 'STH', 'EXO', 'P02', 'USG', 'ULG',\n",
    "           '6ED', 'UDS', 'PTK', 'S99', 'MMQ', 'NEM', 'PCY', 'S00',\n",
    "           'INV', 'PLS', '7ED', 'APC', 'ODY', 'TOR', 'JUD', 'ONS',\n",
    "           'LGN', 'SCG', 'PDRC', 'PHPR', 'ATH', 'BRB', 'BTD', 'DKM']\n",
    "for set in setlist:\n",
    "    command = 'cat data/AllPrintings.json | jq \\'.data.\\\"' + \\\n",
    "        set + '\\\".cards\\' > data/set_' + set + '.json'\n",
    "    !{command}\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Concatenate all set files into `middleschool.json`\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "command = \"jq -s add data/set_* > data/middleschool.json\"\n",
    "!{command}\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a list with each card's oracle ID, English name, and Japanese name\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                              oracle_id           name name_ja\n",
      "0  5acc8b39-3c3e-4012-8cfd-ac3c2c4ca982  Arc Lightning   弧状の稲妻\n",
      "0  5acc8b39-3c3e-4012-8cfd-ac3c2c4ca982  Arc Lightning    None\n",
      "                              oracle_id           name name_ja\n",
      "0  5acc8b39-3c3e-4012-8cfd-ac3c2c4ca982  Arc Lightning   弧状の稲妻\n",
      "5800 cards found\n",
      "                              oracle_id               name    name_ja\n",
      "0  8adbba6e-03ef-4278-aec5-8a4496b377a8       Abandon Hope         断念\n",
      "0  5a70ccfa-d12d-4e62-a1a4-f05cda2fd442  Abandoned Outpost  見捨てられた前哨地\n",
      "0  c208b959-d0e4-4a9a-8255-2c7cc7596767    Abbey Gargoyles  修道院のガーゴイル\n",
      "0  62e3f285-886c-414e-b4ff-403a7c01c23a       Abbey Matron       None\n",
      "0  d0e1904e-1a37-41f6-8582-b9ea794bb886          Abduction         誘拐\n",
      "                              oracle_id                      name    name_ja\n",
      "0  ae8773a3-05f2-4074-9a53-033b0c127235  Zuo Ci, the Mocking Sage  嘲笑する仙人 左慈\n",
      "0  c6eaa147-3566-43a9-999a-d58b877496f5            Zur's Weirding   ズアーの運命支配\n",
      "0  ee0f883f-d7c9-4acf-a78f-f733b6f268d3           Zuran Enchanter       None\n",
      "0  08cb8a30-9cb4-4517-bee5-8848aa60d1a2                 Zuran Orb       None\n",
      "0  bc7b90b1-3517-4e5d-9bd8-68b4d8a259fd         Zuran Spellcaster       None\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import pandas as pd\n",
    "\n",
    "with open(\"data/middleschool.json\") as json_data:\n",
    "    cards = json.loads(json_data.read())\n",
    "\n",
    "column_names = ['oracle_id', 'name', 'name_ja']\n",
    "middleschool_df = pd.DataFrame(columns=column_names)\n",
    "for card in cards:\n",
    "    oracle_id = card['identifiers']['scryfallOracleId']\n",
    "    name = card['name']\n",
    "    lang_ja = [lang for lang in card['foreignData']\n",
    "               if lang['language'] == 'Japanese']\n",
    "    if (len(lang_ja) > 0):\n",
    "        name_ja = lang_ja[0]['name']\n",
    "    else:\n",
    "        name_ja = None\n",
    "    temporary_df = pd.DataFrame({\n",
    "        'oracle_id': [oracle_id],\n",
    "        'name':      [name],\n",
    "        'name_ja':   [name_ja]\n",
    "    })\n",
    "    middleschool_df = pd.concat([middleschool_df, temporary_df])\n",
    "\n",
    "middleschool_df = middleschool_df.sort_values(by=['name', 'name_ja'])\n",
    "print(middleschool_df[middleschool_df['name'] == 'Arc Lightning'])\n",
    "middleschool_df = middleschool_df.drop_duplicates(subset=['oracle_id'])\n",
    "print(middleschool_df[middleschool_df['name'] == 'Arc Lightning'])\n",
    "print(middleschool_df.shape[0], 'cards found')\n",
    "print(middleschool_df.head())\n",
    "print(middleschool_df.tail())\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Exclude all cards banned in Middle School\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Abbey Matron → None\n",
      "Zuran Spellcaster → ズアーの投呪士\n",
      "Sea Eagle → 海の鷲\n",
      "Yavimaya Ants → ヤヴィマヤの蟻\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "from requests_html import HTMLSession\n",
    "session = HTMLSession()\n",
    "\n",
    "\n",
    "def find_japanese_name(name):\n",
    "    url = 'http://whisper.wisdom-guild.net/search.php?q=' + name\n",
    "    r = session.get(url)\n",
    "    title = r.html.find('title')[0].text\n",
    "    idx = title.find(name)\n",
    "    if idx == 0:\n",
    "        return None\n",
    "    else:\n",
    "        name_ja = title[0:idx - 1]\n",
    "        return name_ja\n",
    "\n",
    "\n",
    "english_only_cards = middleschool_df[middleschool_df['name_ja'].isnull()]\n",
    "name = english_only_cards.iloc[0]['name']\n",
    "print(name, '', find_japanese_name(name))\n",
    "time.sleep(1)\n",
    "name = english_only_cards.iloc[-1]['name']\n",
    "print(name, '', find_japanese_name(name))\n",
    "time.sleep(1)\n",
    "name = \"Sea Eagle\"\n",
    "print(name, '', find_japanese_name(name))\n",
    "time.sleep(1)\n",
    "name = \"Yavimaya Ants\"\n",
    "print(name, '', find_japanese_name(name))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cards legal by set: 5800\n",
      "Banned cards: 26\n",
      "Cards legal by set and not banned: 5774\n"
     ]
    }
   ],
   "source": [
    "banlist = [\"Amulet of Quoz\",\n",
    "           \"Balance\",\n",
    "           \"Brainstorm\",\n",
    "           \"Bronze Tablet\",\n",
    "           \"Channel\",\n",
    "           \"Dark Ritual\",\n",
    "           \"Demonic Consultation\",\n",
    "           \"Flash\",\n",
    "           \"Goblin Recruiter\",\n",
    "           \"Imperial Seal\",\n",
    "           \"Jeweled Bird\",\n",
    "           \"Lion's Eye Diamond\",\n",
    "           \"Mana Crypt\",\n",
    "           \"Mana Vault\",\n",
    "           \"Memory Jar\",\n",
    "           \"Mind's Desire\",\n",
    "           \"Mind Twist\",\n",
    "           \"Rebirth\",\n",
    "           \"Strip Mine\",\n",
    "           \"Tempest Efreet\",\n",
    "           \"Timmerian Fiends\",\n",
    "           \"Tolarian Academy\",\n",
    "           \"Vampiric Tutor\",\n",
    "           \"Windfall\",\n",
    "           \"Yawgmoth's Bargain\",\n",
    "           \"Yawgmoth's Will\"]\n",
    "print('Cards legal by set:', middleschool_df.shape[0])\n",
    "banned_df = middleschool_df[pd.DataFrame(\n",
    "    middleschool_df.name.tolist()).isin(banlist).any(axis=1).values]\n",
    "print('Banned cards:', banned_df.shape[0])\n",
    "middleschool_df = pd.concat(\n",
    "    [middleschool_df, banned_df]).drop_duplicates(keep=False)\n",
    "print('Cards legal by set and not banned:', middleschool_df.shape[0])\n",
    "middleschool_df = middleschool_df.reset_index(drop=True)\n",
    "middleschool_df = middleschool_df[['oracle_id', 'name', 'name_ja']]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save the list to a CSV file and a JSON file\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "middleschool_df.to_csv('output/middleschool.csv')\n",
    "middleschool_df.to_json('output/middleschool.json')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Feel free to delete everything in the `data` directory after you are done"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}