Spaces:
Sleeping
Sleeping
File size: 1,501 Bytes
17dcef2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import pandas as pd
import time
from requests_html import HTMLSession
middleschool_df = pd.read_csv("data/middleschool_all_sets_removed_wrong_names.csv")
session = HTMLSession()
def find_japanese_name(name):
url = "http://whisper.wisdom-guild.net/card/" + name + "/"
r = session.get(url)
# Find the text on the <title> element in the HTML document
title = r.html.find("title")[0].text
# Find the position of the English card name within the title
idx = title.find(name)
# The Japanese name should be before the English name,
# so if idx is 0, there is no Japanese name
if idx == 0:
print(f"{name} ->")
return None
# If the exact English card name can't be found, we look for a '/'
if idx == -1:
idx = title.find("/")
# No '/' means no Japanese name
if idx == -1:
return None
# Take only the Japanese name from the title
name_ja = title[0:idx]
else:
# Take only the Japanese name from the title
name_ja = title[0 : idx - 1]
print(f"{name} -> {name_ja}")
return name_ja
english_only_cards = middleschool_df[middleschool_df["name_ja"].isnull()]
name_list = english_only_cards["name"].to_list()
for idx, name in enumerate(name_list):
middleschool_df.loc[
middleschool_df["name"] == name, "name_ja"
] = find_japanese_name(name)
time.sleep(1)
# Write a CSV file
middleschool_df.to_csv("data/middleschool_all_sets_added_japanese_names.csv")
|