Spaces:
Running
Running
import pandas as pd | |
import time | |
from requests_html import HTMLSession | |
middleschool_df = pd.read_csv("data/middleschool_all_sets_removed_wrong_names.csv") | |
session = HTMLSession() | |
def find_japanese_name(name): | |
url = "http://whisper.wisdom-guild.net/card/" + name + "/" | |
r = session.get(url) | |
# Find the text on the <title> element in the HTML document | |
title = r.html.find("title")[0].text | |
# Find the position of the English card name within the title | |
idx = title.find(name) | |
# The Japanese name should be before the English name, | |
# so if idx is 0, there is no Japanese name | |
if idx == 0: | |
print(f"{name} ->") | |
return None | |
# If the exact English card name can't be found, we look for a '/' | |
if idx == -1: | |
idx = title.find("/") | |
# No '/' means no Japanese name | |
if idx == -1: | |
return None | |
# Take only the Japanese name from the title | |
name_ja = title[0:idx] | |
else: | |
# Take only the Japanese name from the title | |
name_ja = title[0 : idx - 1] | |
print(f"{name} -> {name_ja}") | |
return name_ja | |
english_only_cards = middleschool_df[middleschool_df["name_ja"].isnull()] | |
name_list = english_only_cards["name"].to_list() | |
for idx, name in enumerate(name_list): | |
middleschool_df.loc[ | |
middleschool_df["name"] == name, "name_ja" | |
] = find_japanese_name(name) | |
time.sleep(1) | |
# Write a CSV file | |
middleschool_df.to_csv("data/middleschool_all_sets_added_japanese_names.csv") | |