Spaces:

alecrem
/

middleschool

Running

App Files Files Community

Alejandro Cremades commited on Oct 7, 2023

Commit

17dcef2

1 Parent(s): b2d9d69

Compile card list with sequential scripts

Browse files

Files changed (9) hide show

.gitignore +1 -0
list_scripts/1_download_mtgjson.sh +10 -0
list_scripts/2_per_set_json_files.py +57 -0
list_scripts/4_compile_from_legal_sets.py +30 -0
list_scripts/5_remove_wrong_names.py +20 -0
list_scripts/6_find_remaining_japanese_names.py +46 -0
list_scripts/7_remove_banned_cards.py +51 -0
output/middleschool.csv +0 -0
output/middleschool.json +0 -0

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	data/*


1	data/*
2	+ list_scripts/3_separate_json_files_per_set.sh

list_scripts/1_download_mtgjson.sh ADDED Viewed

	@@ -0,0 +1,10 @@

+# Download mtgjson data and extract it on the `data` directory
+# Feel free to make the file available in any other way
+# Important: run this script from the parent directory
+# (the root directory in this repository)
+cd data
+wget "https://mtgjson.com/api/v5/AllPrintings.json.bz2"
+bunzip2 AllPrintings.json.bz2
+cd -

list_scripts/2_per_set_json_files.py ADDED Viewed

	@@ -0,0 +1,57 @@

+## Important: run this script from the parent directory
+## (the root directory in this repository)
+#
+# python3 list_scripts/1.py
+# The Raw data is very large, so let's make JSON files for all relevant sets
+# Note: this can take a couple minutes to run
+setlist = [
+    "4ED",
+    "ICE",
+    "CHR",
+    "HML",
+    "ALL",
+    "MIR",
+    "VIS",
+    "5ED",
+    "WTH",
+    "POR",
+    "TMP",
+    "STH",
+    "EXO",
+    "P02",
+    "USG",
+    "ULG",
+    "6ED",
+    "UDS",
+    "PTK",
+    "S99",
+    "MMQ",
+    "NEM",
+    "PCY",
+    "S00",
+    "INV",
+    "PLS",
+    "7ED",
+    "APC",
+    "ODY",
+    "TOR",
+    "JUD",
+    "ONS",
+    "LGN",
+    "SCG",
+    "PDRC",
+    "PHPR",
+    "ATH",
+    "BRB",
+    "BTD",
+    "DKM",
+]
+with open("list_scripts/3_separate_json_files_per_set.sh", "w") as f:
+    for set in setlist:
+        # Write a separate JSON document for each Middle School legal set
+        line = f"cat data/AllPrintings.json | jq '.data.\"{set}\".cards' > data/set_{set}.json"
+        f.write(line + "\n")
+    line = "jq -s add data/set_* > data/middleschool.json"
+    f.write(line + "\n")

list_scripts/4_compile_from_legal_sets.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import json
+import pandas as pd
+with open("data/middleschool.json") as json_data:
+    cards = json.loads(json_data.read())
+# Create a pandas DataFrame with all cards from all legal sets
+column_names = ["oracle_id", "name", "name_ja"]
+middleschool_df = pd.DataFrame(columns=column_names)
+for card in cards:
+    oracle_id = card["identifiers"]["scryfallOracleId"]
+    name = card["name"]
+    lang_ja = [lang for lang in card["foreignData"] if lang["language"] == "Japanese"]
+    # Some cards do not have a Japanese name
+    if len(lang_ja) > 0:
+        name_ja = lang_ja[0]["name"]
+    else:
+        name_ja = None
+    temporary_df = pd.DataFrame(
+        {"oracle_id": [oracle_id], "name": [name], "name_ja": [name_ja]}
+    )
+    middleschool_df = pd.concat([middleschool_df, temporary_df])
+# For cards with multiple occurrences, put the rows that have the Japanese name on top
+middleschool_df = middleschool_df.sort_values(by=["name", "name_ja"])
+# For cards with multiple occurrences, delete all rows except for the top one
+middleschool_df = middleschool_df.drop_duplicates(subset=["oracle_id"])
+# Write a CSV file
+middleschool_df.to_csv("data/middleschool_all_sets.csv")

list_scripts/5_remove_wrong_names.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import pandas as pd
+# Remove Japanese card names that are wrong on MTGJSON
+wrongnames = [
+    "Aether Barrier",
+    "Aether Burst",
+    "Aether Charge",
+    "Aether Flash",
+    "Aether Mutation",
+    "Aether Sting",
+    "Aether Storm",
+    "Aether Tide",
+    "Tainted Aether",
+    "Tar Pit Warrior",
+]
+middleschool_df = pd.read_csv("data/middleschool_all_sets.csv")
+# Write a CSV file
+middleschool_df.to_csv("data/middleschool_all_sets_removed_wrong_names.csv")

list_scripts/6_find_remaining_japanese_names.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import pandas as pd
+import time
+from requests_html import HTMLSession
+middleschool_df = pd.read_csv("data/middleschool_all_sets_removed_wrong_names.csv")
+session = HTMLSession()
+def find_japanese_name(name):
+    url = "http://whisper.wisdom-guild.net/card/" + name + "/"
+    r = session.get(url)
+    # Find the text on the <title> element in the HTML document
+    title = r.html.find("title")[0].text
+    # Find the position of the English card name within the title
+    idx = title.find(name)
+    # The Japanese name should be before the English name,
+    # so if idx is 0, there is no Japanese name
+    if idx == 0:
+        print(f"{name} ->")
+        return None
+    # If the exact English card name can't be found, we look for a '/'
+    if idx == -1:
+        idx = title.find("/")
+        # No '/' means no Japanese name
+        if idx == -1:
+            return None
+        # Take only the Japanese name from the title
+        name_ja = title[0:idx]
+    else:
+        # Take only the Japanese name from the title
+        name_ja = title[0 : idx - 1]
+    print(f"{name} -> {name_ja}")
+    return name_ja
+english_only_cards = middleschool_df[middleschool_df["name_ja"].isnull()]
+name_list = english_only_cards["name"].to_list()
+for idx, name in enumerate(name_list):
+    middleschool_df.loc[
+        middleschool_df["name"] == name, "name_ja"
+    ] = find_japanese_name(name)
+    time.sleep(1)
+# Write a CSV file
+middleschool_df.to_csv("data/middleschool_all_sets_added_japanese_names.csv")

list_scripts/7_remove_banned_cards.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import pandas as pd
+# Remove cards that are banned in the format
+banlist = [
+    "Amulet of Quoz",
+    "Balance",
+    "Brainstorm",
+    "Bronze Tablet",
+    "Channel",
+    "Dark Ritual",
+    "Demonic Consultation",
+    "Flash",
+    "Goblin Recruiter",
+    "Imperial Seal",
+    "Jeweled Bird",
+    "Mana Crypt",
+    "Mana Vault",
+    "Memory Jar",
+    "Mind's Desire",
+    "Mind Twist",
+    "Rebirth",
+    "Strip Mine",
+    "Tempest Efreet",
+    "Timmerian Fiends",
+    "Tolarian Academy",
+    "Vampiric Tutor",
+    "Windfall",
+    "Yawgmoth's Bargain",
+    "Yawgmoth's Will",
+]
+middleschool_df = pd.read_csv("data/middleschool_all_sets_added_japanese_names.csv")
+print("Cards legal by set:", middleschool_df.shape[0])
+# Find the rows with the banned cards
+banned_df = middleschool_df[
+    pd.DataFrame(middleschool_df.name.tolist()).isin(banlist).any(axis=1).values
+]
+print("Banned cards:", banned_df.shape[0])
+# Append the banned cards to the main Middle School DataFrame,
+# then remove any rows that appear twice,
+# effectively leaving only the legal cards
+middleschool_df = pd.concat([middleschool_df, banned_df]).drop_duplicates(keep=False)
+print("Cards legal by set and not banned:", middleschool_df.shape[0])
+middleschool_df = middleschool_df.reset_index(drop=True)
+middleschool_df = middleschool_df[["oracle_id", "name", "name_ja"]]
+middleschool_df = middleschool_df.sort_values(by=["name", "name_ja"])
+# Write a CSV file
+middleschool_df.to_csv("output/middleschool.csv")
+middleschool_df.to_json("output/middleschool.json")

output/middleschool.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

output/middleschool.json CHANGED Viewed

The diff for this file is too large to render. See raw diff