Spaces:

yonikremer
/

grouped-sampling-demo

Paused

App Files Files Community

yonikremer commited on Feb 8, 2023

Commit

b1dd47e

1 Parent(s): 15bf463

Added if check that the model is supported

Browse files

Files changed (4) hide show

app.py +8 -8
hanlde_form_submit.py +10 -0
requirements.txt +4 -3
supported_models.py +33 -40

app.py CHANGED Viewed

@@ -12,7 +12,6 @@ from on_server_start import main as on_server_start_main
 on_server_start_main()
-AVAILABLE_MODEL_NAMES = "https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads"
 st.title("Grouped Sampling Demo")
@@ -22,8 +21,6 @@ with st.form("request_form"):
         label="Model name",
         value="gpt2",
         help=f"The name of the model to use."
-             f" Must be a model from this list:"
-             f" {AVAILABLE_MODEL_NAMES}"
     )
     output_length: int = st.number_input(
@@ -35,18 +32,21 @@ with st.form("request_form"):
     )
     submitted_prompt: str = st.text_area(
-        label="Input for the model",
         help="Enter the prompt for the model. The model will generate a response based on this prompt.",
         max_chars=16384,
     )
     submitted: bool = st.form_submit_button(
         label="Generate",
         help="Generate the output text.",
-        disabled=False
     )
     if submitted:
-        output = on_form_submit(selected_model_name, output_length, submitted_prompt)
-        st.write(f"Generated text: {output}")

 on_server_start_main()
 st.title("Grouped Sampling Demo")
         label="Model name",
         value="gpt2",
         help=f"The name of the model to use."
     )
     output_length: int = st.number_input(
     )
     submitted_prompt: str = st.text_area(
+        label="Input for the model, It is highly recommended to write an English prompt.",
         help="Enter the prompt for the model. The model will generate a response based on this prompt.",
         max_chars=16384,
+        min_chars=16,
     )
     submitted: bool = st.form_submit_button(
         label="Generate",
         help="Generate the output text.",
+        disabled=False,
     )
     if submitted:
+        try:
+            output = on_form_submit(selected_model_name, output_length, submitted_prompt)
+            st.write(f"Generated text: {output}")
+        except ValueError as e:
+            st.error(e)

hanlde_form_submit.py CHANGED Viewed

@@ -2,6 +2,12 @@ import streamlit as st
 from grouped_sampling import GroupedSamplingPipeLine
 def create_pipeline(model_name: str, group_size) -> GroupedSamplingPipeLine:
     """
     Creates a pipeline with the given model name and group size.
@@ -25,6 +31,10 @@ def on_form_submit(model_name: str, group_size: int, prompt: str) -> str:
     :param prompt: The prompt to use.
     :return: The output of the model.
     """
     pipeline = create_pipeline(
         model_name,
         group_size,

 from grouped_sampling import GroupedSamplingPipeLine
+from supported_models import get_supported_model_names
+SUPPORTED_MODEL_NAMES = get_supported_model_names()
 def create_pipeline(model_name: str, group_size) -> GroupedSamplingPipeLine:
     """
     Creates a pipeline with the given model name and group size.
     :param prompt: The prompt to use.
     :return: The output of the model.
     """
+    if model_name not in SUPPORTED_MODEL_NAMES:
+        raise ValueError(f"The selected model {model_name} is not supported."
+                         f"Supported models are all the models in:"
+                         f" https://huggingface.co/models?pipeline_tag=text-generation&library=pytorch")
     pipeline = create_pipeline(
         model_name,
         group_size,

requirements.txt CHANGED Viewed

@@ -1,7 +1,8 @@
 grouped-sampling>=1.0.4
 streamlit==1.17.0
 torch>1.12.1
-transformers
 hatchling
-beautifulsoup4
-urllib3

 grouped-sampling>=1.0.4
 streamlit==1.17.0
 torch>1.12.1
+transformers~=4.26.0
 hatchling
+beautifulsoup4~=4.11.2
+urllib3
+requests~=2.28.2

supported_models.py CHANGED Viewed

@@ -1,48 +1,41 @@
-from typing import List, Generator
-from bs4 import BeautifulSoup, Tag
-import urllib3
-SUPPORTED_MODEL_NAME_PAGES_FORMAT: str = "https://huggingface.co/models?pipeline_tag=text-generation&library=pytorch"
-def get_model_name(model_card: Tag) -> str:
-    """
-    Gets the model name from the model card.
-    :param model_card: The model card to get the model name from.
-    :return: The model name.
-    """
     h4_class = "text-md truncate font-mono text-black dark:group-hover:text-yellow-500 group-hover:text-indigo-600"
-    h4_tag: Tag = model_card.find("h4", class_=h4_class)
     return h4_tag.text
-def get_soups() -> Generator[BeautifulSoup, None, None]:
-    """
-    Gets the pages to scrape.
-    :return: A list of the pages to scrape.
-    """
-    curr_page_index = 0
-    while True:
-        curr_page_url = f"{SUPPORTED_MODEL_NAME_PAGES_FORMAT}&p={curr_page_index}"
-        request = urllib3.PoolManager().request("GET", curr_page_url)
-        if request.status != 200:
-            return
-        yield BeautifulSoup(request.data, "html.parser")
-        curr_page_index += 1
-def get_supported_model_names() -> Generator[str, None, None]:
-    """
-    Scrapes the supported model names from the hugging face website.
-    :return: A list of the supported model names.
-    """
-    for soup in get_soups():
-        model_cards: List[Tag] = soup.find_all("article", class_="overview-card-wrapper group", recursive=True)
-        for model_card in model_cards:
-            yield get_model_name(model_card)
-if __name__ == "__main__":
-    for model_name in get_supported_model_names():
-        print(model_name)

+from typing import Generator
+import requests
+from bs4 import BeautifulSoup
+from concurrent.futures import ThreadPoolExecutor, as_completed
+SUPPORTED_MODEL_NAME_PAGES_FORMAT = "https://huggingface.co/models?pipeline_tag=text-generation&library=pytorch"
+MAX_WORKERS = 10
+def get_model_name(model_card: BeautifulSoup) -> str:
     h4_class = "text-md truncate font-mono text-black dark:group-hover:text-yellow-500 group-hover:text-indigo-600"
+    h4_tag = model_card.find("h4", class_=h4_class)
     return h4_tag.text
+def get_page(page_index: int):
+    curr_page_url = f"{SUPPORTED_MODEL_NAME_PAGES_FORMAT}&p={page_index}"
+    response = requests.get(curr_page_url)
+    if response.status_code == 200:
+        soup = BeautifulSoup(response.content, "html.parser")
+        return soup
+    return None
+def get_model_names(soup):
+    model_cards = soup.find_all("article", class_="overview-card-wrapper group", recursive=True)
+    return [get_model_name(model_card) for model_card in model_cards]
+def generate_supported_model_names() -> Generator[str, None, None]:
+    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
+        future_to_index = {executor.submit(get_page, index): index for index in range(100)}
+        for future in as_completed(future_to_index):
+            soup = future.result()
+            if soup:
+                yield from get_model_names(soup)
+def get_supported_model_names() -> set[str]:
+    return set(generate_supported_model_names())