yonikremer commited on
Commit
da1451c
·
1 Parent(s): 3a9aacf

parameterized min number of likes and downloads

Browse files
Files changed (1) hide show
  1. supported_models.py +38 -11
supported_models.py CHANGED
@@ -59,7 +59,12 @@ def get_page(page_index: int):
59
  return None
60
 
61
 
62
- def card_filter(model_card: Tag, model_name: str) -> bool:
 
 
 
 
 
63
  if model_name in BLACKLISTED_MODEL_NAMES:
64
  return False
65
  numeric_contents = get_numeric_contents(model_card)
@@ -68,33 +73,55 @@ def card_filter(model_card: Tag, model_name: str) -> bool:
68
  # It means that he doesn't have any downloads/likes, so it's not a valid model card.
69
  return False
70
  number_of_downloads = convert_to_int(numeric_contents[0])
71
- if number_of_downloads < MIN_NUMBER_OF_DOWNLOADS:
72
  return False
73
  number_of_likes = convert_to_int(numeric_contents[1])
74
- if number_of_likes < MIN_NUMBER_OF_LIKES:
75
  return False
76
  return True
77
 
78
 
79
- def get_model_names(soup: BeautifulSoup):
 
 
 
 
80
  model_cards: List[Tag] = soup.find_all("article", class_="overview-card-wrapper group", recursive=True)
81
  for model_card in model_cards:
82
  model_name = get_model_name(model_card)
83
- if card_filter(model_card, model_name):
 
 
 
 
 
84
  yield model_name
85
 
86
 
87
- def generate_supported_model_names() -> Generator[str, None, None]:
 
 
 
88
  with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
89
  future_to_index = {executor.submit(get_page, index): index for index in range(100)}
90
  for future in as_completed(future_to_index):
91
  soup = future.result()
92
  if soup:
93
- yield from get_model_names(soup)
94
-
95
-
96
- def get_supported_model_names() -> Set[str]:
97
- return set(generate_supported_model_names())
 
 
 
 
 
 
 
 
 
 
98
 
99
 
100
  if __name__ == "__main__":
 
59
  return None
60
 
61
 
62
+ def card_filter(
63
+ model_card: Tag,
64
+ model_name: str,
65
+ min_number_of_downloads: int,
66
+ min_number_of_likes: int,
67
+ ) -> bool:
68
  if model_name in BLACKLISTED_MODEL_NAMES:
69
  return False
70
  numeric_contents = get_numeric_contents(model_card)
 
73
  # It means that he doesn't have any downloads/likes, so it's not a valid model card.
74
  return False
75
  number_of_downloads = convert_to_int(numeric_contents[0])
76
+ if number_of_downloads < min_number_of_downloads:
77
  return False
78
  number_of_likes = convert_to_int(numeric_contents[1])
79
+ if number_of_likes < min_number_of_likes:
80
  return False
81
  return True
82
 
83
 
84
+ def get_model_names(
85
+ soup: BeautifulSoup,
86
+ min_number_of_downloads: int,
87
+ min_number_of_likes: int,
88
+ ) -> Generator[str, None, None]:
89
  model_cards: List[Tag] = soup.find_all("article", class_="overview-card-wrapper group", recursive=True)
90
  for model_card in model_cards:
91
  model_name = get_model_name(model_card)
92
+ if card_filter(
93
+ model_card=model_card,
94
+ model_name=model_name,
95
+ min_number_of_downloads=min_number_of_downloads,
96
+ min_number_of_likes=min_number_of_likes
97
+ ):
98
  yield model_name
99
 
100
 
101
+ def generate_supported_model_names(
102
+ min_number_of_downloads: int,
103
+ min_number_of_likes: int,
104
+ ) -> Generator[str, None, None]:
105
  with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
106
  future_to_index = {executor.submit(get_page, index): index for index in range(100)}
107
  for future in as_completed(future_to_index):
108
  soup = future.result()
109
  if soup:
110
+ yield from get_model_names(
111
+ soup=soup,
112
+ min_number_of_downloads=min_number_of_downloads,
113
+ min_number_of_likes=min_number_of_likes,
114
+ )
115
+
116
+
117
+ def get_supported_model_names(
118
+ min_number_of_downloads: int = MIN_NUMBER_OF_DOWNLOADS,
119
+ min_number_of_likes: int = MIN_NUMBER_OF_LIKES,
120
+ ) -> Set[str]:
121
+ return set(generate_supported_model_names(
122
+ min_number_of_downloads=min_number_of_downloads,
123
+ min_number_of_likes=min_number_of_likes,
124
+ ))
125
 
126
 
127
  if __name__ == "__main__":