Spaces:

alecrem
/

middleschool

Running

middleschool / list_scripts /6_find_remaining_japanese_names.py

Alejandro Cremades

Compile card list with sequential scripts

17dcef2 about 1 year ago

1.5 kB

	import pandas as pd
	import time
	from requests_html import HTMLSession

	middleschool_df = pd.read_csv("data/middleschool_all_sets_removed_wrong_names.csv")

	session = HTMLSession()


	def find_japanese_name(name):
	url = "http://whisper.wisdom-guild.net/card/" + name + "/"
	r = session.get(url)
	# Find the text on the <title> element in the HTML document
	title = r.html.find("title")[0].text
	# Find the position of the English card name within the title
	idx = title.find(name)
	# The Japanese name should be before the English name,
	# so if idx is 0, there is no Japanese name
	if idx == 0:
	print(f"{name} ->")
	return None
	# If the exact English card name can't be found, we look for a '/'
	if idx == -1:
	idx = title.find("/")
	# No '/' means no Japanese name
	if idx == -1:
	return None
	# Take only the Japanese name from the title
	name_ja = title[0:idx]
	else:
	# Take only the Japanese name from the title
	name_ja = title[0 : idx - 1]
	print(f"{name} -> {name_ja}")
	return name_ja


	english_only_cards = middleschool_df[middleschool_df["name_ja"].isnull()]
	name_list = english_only_cards["name"].to_list()
	for idx, name in enumerate(name_list):
	middleschool_df.loc[
	middleschool_df["name"] == name, "name_ja"
	] = find_japanese_name(name)
	time.sleep(1)

	# Write a CSV file
	middleschool_df.to_csv("data/middleschool_all_sets_added_japanese_names.csv")