Spaces:

ivwhy
/

lol_champion_pick_predictor

Sleeping

lol_champion_pick_predictor / util /Meta_scrapper.py

Jimin Park

kermitting soon

19e44c2 4 months ago

7.1 kB

	import re
	import os
	import pandas as pd
	from selenium import webdriver
	from selenium.webdriver.chrome.service import Service
	from selenium.webdriver.chrome.options import Options
	from webdriver_manager.chrome import ChromeDriverManager
	from selenium.webdriver.common.by import By
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	from webdriver_manager.core.os_manager import ChromeType

	# Constants
	ROLES = ["top", "jungle", "mid", "adc", "support"]
	BASE_URL = "https://www.op.gg/champions?position={role}"
	TIER_COLOR_MAPPING = {
	"#0093FF": 1, # Blue
	"#00BBA3": 2, # Teal
	"#FFB900": 3, # Yellow
	"#9AA4AF": 4, # Gray
	}


	'''Original driver
	def setup_driver():
	"""Setup and return a configured Chrome WebDriver with optimized settings"""
	chrome_options = Options()
	chrome_options.add_argument("--headless")
	chrome_options.add_argument("--no-sandbox")
	chrome_options.add_argument("--disable-dev-shm-usage")
	chrome_options.add_argument("--disable-gpu")
	chrome_options.add_argument("--disable-extensions")
	chrome_options.add_argument("--disable-logging")
	chrome_options.add_argument("--log-level=3")
	chrome_options.add_argument("--silent")
	chrome_options.add_argument(
	"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	)

	# Remove log_level parameter from ChromeDriverManager
	service = Service(ChromeDriverManager().install())
	return webdriver.Chrome(service=service, options=chrome_options)
	'''

	#Test setup
	def setup_driver():
	"""Setup and return a configured Chrome WebDriver with optimized settings."""
	# Define chrome options
	chrome_options = Options()
	chrome_options.add_argument("--headless")
	chrome_options.add_argument("--no-sandbox")
	chrome_options.add_argument("--disable-dev-shm-usage")
	chrome_options.add_argument("--disable-gpu")
	chrome_options.add_argument("--disable-extensions")
	chrome_options.add_argument("--disable-logging")
	chrome_options.add_argument("--log-level=3")
	chrome_options.add_argument("--silent")
	chrome_options.add_argument(
	"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	)

	# Check if we're running in Hugging Face Spaces or locally
	if 'HF_SPACE' in os.environ:
	# Hugging Face Space is detected, handle accordingly (example for versioning)
	print("Running on Hugging Face Space.")
	chromedriver_path = ChromeDriverManager().install()
	else:
	# Local environment setup
	print("Running chrome webdriver.")
	chromedriver_path = ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install()

	# Create the Service object using the installed chromedriver
	service = Service(executable_path=chromedriver_path)

	# Return the configured WebDriver instance
	driver = webdriver.Chrome(service=service, options=chrome_options)

	return driver

	def parse_rate(rate_str):
	"""Convert percentage string to float"""
	try:
	return float(rate_str.strip().rstrip('%')) / 100
	except:
	return 0.0

	def extract_counter_champions(counter_column):
	"""Extract counter champions from column"""
	counter_champions = []
	try:
	counter_list = counter_column.find_elements(By.TAG_NAME, "a")
	for counter in counter_list[:3]:
	img_element = counter.find_element(By.TAG_NAME, "img")
	champion_name = img_element.get_attribute("alt")
	counter_champions.append(champion_name)
	except Exception:
	pass
	return counter_champions + [""] * (3 - len(counter_champions))

	def get_champion_table_data(driver, url, role):
	"""Extract champion data from a specific role page with optimized parsing"""
	try:
	driver.get(url)
	table = WebDriverWait(driver, 20).until(
	EC.presence_of_element_located((By.CSS_SELECTOR, "#content-container > div.flex.gap-2.md\\:mx-auto.md\\:w-width-limit.mt-2.flex-col.overflow-hidden > div.flex.flex-row-reverse.gap-2 > main > div:nth-child(2) > table"))
	)

	champions_data = []
	for row in table.find_elements(By.TAG_NAME, "tr"):
	cols = row.find_elements(By.TAG_NAME, "td")
	if len(cols) <= 1:
	continue

	# Get tier value
	tier_element = cols[2].find_element(By.TAG_NAME, "svg")
	tier = 5
	if tier_element:
	for path in tier_element.find_elements(By.TAG_NAME, "path"):
	fill_color = path.get_attribute("fill")
	if fill_color in TIER_COLOR_MAPPING:
	tier = TIER_COLOR_MAPPING[fill_color]
	break

	# Extract ban rate
	ban_rate_html = cols[6].get_attribute("innerHTML").strip()
	ban_rate_match = re.search(r"([\d.]+)", ban_rate_html.replace("<!-- -->", ""))
	ban_rate = float(ban_rate_match.group(1)) / 100 if ban_rate_match else 0.0

	# Get counter champions
	counter1, counter2, counter3 = extract_counter_champions(cols[7])

	champions_data.append({
	"rank": cols[0].text.strip(),
	"champion": cols[1].text.strip(),
	"tier": tier,
	"role": role,
	"win_rate": parse_rate(cols[4].text),
	"pick_rate": parse_rate(cols[5].text),
	"ban_rate": ban_rate,
	"counter1": counter1,
	"counter2": counter2,
	"counter3": counter3,
	})

	return champions_data

	except Exception as e:
	print(f"Error extracting table data for {role}: {e}")
	return []

	def get_meta_stats():
	"""Main function to scrape champion data with improved error handling and logging"""
	driver = None

	print("================== inside get_meta_stats ========================\n")

	try:
	driver = setup_driver()
	all_roles_data = []

	for role in ROLES:
	role_url = BASE_URL.format(role=role)
	role_data = get_champion_table_data(driver, role_url, role)
	all_roles_data.extend(role_data)

	if not all_roles_data:
	print("No data was collected from any role")
	return pd.DataFrame()

	df = pd.DataFrame(all_roles_data)

	# Save data
	save_dir = os.path.join("util", "data")
	os.makedirs(save_dir, exist_ok=True)
	filepath = os.path.join(save_dir, "meta_stats.csv")
	df.to_csv(filepath, index=False)
	print(f"Saved meta stats to {filepath}")
	print("================== Exiting get_meta_stats ========================\n")
	return df

	except Exception as e:
	print(f"Error in get_meta_stats: {e}")
	return pd.DataFrame()

	finally:
	if driver:
	driver.quit()