from typing import Generator import requests from bs4 import BeautifulSoup from concurrent.futures import ThreadPoolExecutor, as_completed SUPPORTED_MODEL_NAME_PAGES_FORMAT = "https://huggingface.co/models?pipeline_tag=text-generation&library=pytorch" MAX_WORKERS = 10 def get_model_name(model_card: BeautifulSoup) -> str: h4_class = "text-md truncate font-mono text-black dark:group-hover:text-yellow-500 group-hover:text-indigo-600" h4_tag = model_card.find("h4", class_=h4_class) return h4_tag.text def get_page(page_index: int): curr_page_url = f"{SUPPORTED_MODEL_NAME_PAGES_FORMAT}&p={page_index}" response = requests.get(curr_page_url) if response.status_code == 200: soup = BeautifulSoup(response.content, "html.parser") return soup return None def get_model_names(soup): model_cards = soup.find_all("article", class_="overview-card-wrapper group", recursive=True) return [get_model_name(model_card) for model_card in model_cards] def generate_supported_model_names() -> Generator[str, None, None]: with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: future_to_index = {executor.submit(get_page, index): index for index in range(100)} for future in as_completed(future_to_index): soup = future.result() if soup: yield from get_model_names(soup) def get_supported_model_names() -> set[str]: return set(generate_supported_model_names())