Spaces:

yonikremer
/

grouped-sampling-demo

Paused

@@ -1,20 +0,0 @@
-FROM bitnami/pytorch
-RUN mkdir --mode 777 /app/my_streamlit_app
-WORKDIR /app/my_stramlit_app
-COPY ./requirements.txt /app/my_streamlit_app/requirements.txt
-RUN pip install --no-cache-dir -r /app/my_streamlit_app/requirements.txt
-RUN mkdir --mode 777 "/app/my_streamlit_app/.cache/"
-RUN mkdir --mode 777 "/app/my_streamlit_app/.cache/huggingface/"
-ENV HUGGINGFACE_HUB_CACHE="/app/my_streamlit_app/.cache/huggingface"
-RUN mkdir --mode 777 "/app/my_streamlit_app/.cache/transformers/"
-ENV TRANSFORMERS_CACHE="/app/my_streamlit_app/.cache/transformers"
-ENV TOKENIZERS_PARALLELISM=false
-COPY . /app/my_streamlit_app/
-CMD ["streamlit", "run", "--server.port", "7860", "--server.enableCORS", "false", "--server.enableXsrfProtection", "false", "--browser.gatherUsageStats", "false", "--theme.base", "dark", "--server.maxUploadSize", "1000", "/app/my_streamlit_app/app.py"]

README.md CHANGED Viewed

@@ -3,12 +3,13 @@ title: Grouped Sampling Demo
 emoji: 🐠
 colorFrom: pink
 colorTo: purple
-sdk: docker
-app_port: 7860
-pinned: true
 fullWidth: true
-models: [decapoda-research/llama-30b-hf, decapoda-research/llama-13b-hf]
-tags: [text-generation, pytorch, transformers, streamlit, docker]
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 emoji: 🐠
 colorFrom: pink
 colorTo: purple
+sdk: streamlit
+sdk_version: 1.17.0
+app_file: app.py
+pinned: false
 fullWidth: true
+tags: [text-generation, pytorch, transformers, streamlit]
+pinned: true
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -3,61 +3,27 @@ The Streamlit app for the project demo.
 In the demo, the user can write a prompt
  and the model will generate a response using the grouped sampling algorithm.
 """
-import os
-from time import time
 import streamlit as st
-from grouped_sampling import GroupedSamplingPipeLine
 from torch.cuda import CudaError
-from huggingface_hub import logging as hf_hub_logging
 from available_models import AVAILABLE_MODELS
 from hanlde_form_submit import on_form_submit
-def create_pipeline(model_name: str, group_size: int) -> GroupedSamplingPipeLine:
-    """
-    Creates a pipeline with the given model name and group size.
-    :param model_name: The name of the model to use.
-    :param group_size: The size of the groups to use.
-    :return: A pipeline with the given model name and group size.
-    """
-    st.write(f"Starts creating pipeline with model: {model_name}")
-    pipeline_start_time = time()
-    pipeline = GroupedSamplingPipeLine(
-        model_name=model_name,
-        group_size=group_size,
-        end_of_sentence_stop=False,
-        top_k=50,
-    )
-    pipeline_end_time = time()
-    pipeline_time = pipeline_end_time - pipeline_start_time
-    st.write(f"Finished creating pipeline with model: {model_name} in {pipeline_time:,.2f} seconds.")
-    return pipeline
-hf_hub_logging.set_verbosity_error()
-st.set_page_config(
-    page_title="דגימה בקבוצות - שימוש יעיל במודלי שפה סיבתיים",
-    layout="wide",
-)
-pipelines = {
-    model_name: create_pipeline(model_name, 1024) for model_name in AVAILABLE_MODELS[1:]
-}
 with st.form("request_form"):
     selected_model_name: str = st.selectbox(
         label="בחרו מודל",
         options=AVAILABLE_MODELS,
-        help="llama-30b-hf generates better texts but is slower",
     )
     output_length: int = st.number_input(
-        label="כמות המילים המקסימלית בפלט - בין 1 ל-1024",
         min_value=1,
-        max_value=1024,
         value=5,
     )
@@ -65,7 +31,7 @@ with st.form("request_form"):
         label="הקלט לאלוגריתם (באנגלית בלבד)",
         value="Instruction: Answer in yes or no.\n"
               "Question: Is the sky blue?\n"
-              "Answer:",
         max_chars=2048,
     )
@@ -77,7 +43,7 @@ with st.form("request_form"):
     if submitted:
         try:
             output = on_form_submit(
-                pipelines[selected_model_name],
                 output_length,
                 submitted_prompt,
             )
@@ -89,10 +55,6 @@ with st.form("request_form"):
             st.write(f"Generated text: {output}")
-user_instructions_file = os.path.join(
-    os.path.dirname(__file__),
-    "user_instructions_hebrew.md",
-)
-with open(user_instructions_file, "r") as fh:
     long_description = fh.read()
 st.markdown(long_description)

 In the demo, the user can write a prompt
  and the model will generate a response using the grouped sampling algorithm.
 """
 import streamlit as st
 from torch.cuda import CudaError
 from available_models import AVAILABLE_MODELS
 from hanlde_form_submit import on_form_submit
+st.title("דגימה בקבוצות - שימוש יעיל במודלי שפה סיבתיים")
 with st.form("request_form"):
     selected_model_name: str = st.selectbox(
         label="בחרו מודל",
         options=AVAILABLE_MODELS,
+        help="opt-iml-max-30b generates better texts but is slower",
     )
     output_length: int = st.number_input(
+        label="כמות המילים המקסימלית בפלט - בין 1 ל-4096",
         min_value=1,
+        max_value=4096,
         value=5,
     )
         label="הקלט לאלוגריתם (באנגלית בלבד)",
         value="Instruction: Answer in yes or no.\n"
               "Question: Is the sky blue?\n"
+              "Answer: ",
         max_chars=2048,
     )
     if submitted:
         try:
             output = on_form_submit(
+                selected_model_name,
                 output_length,
                 submitted_prompt,
             )
             st.write(f"Generated text: {output}")
+with open("user_instructions_hebrew.md", "r") as fh:
     long_description = fh.read()
 st.markdown(long_description)

available_models.py CHANGED Viewed

@@ -1,4 +1,4 @@
 AVAILABLE_MODELS = (
-    "decapoda-research/llama-30b-hf",
-    "decapoda-research/llama-13b-hf",
 )

 AVAILABLE_MODELS = (
+    "facebook/opt-iml-max-1.3b",
+    "facebook/opt-iml-max-30b",
 )

download_repo.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import urllib3
+from huggingface_hub import snapshot_download
+from available_models import AVAILABLE_MODELS
+def change_default_timeout(new_timeout: int) -> None:
+    """
+    Changes the default timeout for downloading repositories from the Hugging Face Hub.
+    Prevents the following errors:
+    urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='huggingface.co', port=443):
+    Read timed out. (read timeout=10)
+    """
+    urllib3.util.timeout.DEFAULT_TIMEOUT = new_timeout
+def download_pytorch_model(name: str) -> None:
+    """
+    Downloads a pytorch model and all the small files from the model's repository.
+    Other model formats (tensorflow, tflite, safetensors, msgpack, ot...) are not downloaded.
+    """
+    number_of_seconds_in_a_year: int = 60 * 60 * 24 * 365
+    change_default_timeout(number_of_seconds_in_a_year)
+    snapshot_download(
+        repo_id=name,
+        etag_timeout=number_of_seconds_in_a_year,
+        resume_download=True,
+        repo_type="model",
+        library_name="pt",
+        # h5, tflite, safetensors, msgpack and ot models files are not needed
+        ignore_patterns=[
+            "*.h5",
+            "*.tflite",
+            "*.safetensors",
+            "*.msgpack",
+            "*.ot",
+            "*.md"
+        ],
+    )
+if __name__ == "__main__":
+    for model_name in AVAILABLE_MODELS:
+        download_pytorch_model(model_name)

hanlde_form_submit.py CHANGED Viewed

@@ -1,8 +1,51 @@
 from time import time
 import streamlit as st
 from grouped_sampling import GroupedSamplingPipeLine
 def generate_text(
         pipeline: GroupedSamplingPipeLine,
@@ -25,13 +68,13 @@ def generate_text(
 def on_form_submit(
-        pipeline: GroupedSamplingPipeLine,
         output_length: int,
         prompt: str,
 ) -> str:
     """
     Called when the user submits the form.
-    :param pipeline: The pipeline to use. GroupedSamplingPipeLine.
     :param output_length: The size of the groups to use.
     :param prompt: The prompt to use.
     :return: The output of the model.
@@ -43,8 +86,16 @@ def on_form_submit(
     """
     if len(prompt) == 0:
         raise ValueError("The prompt must not be empty.")
     st.write("Generating text...")
-    print("Generating text...")
     generation_start_time = time()
     generated_text = generate_text(
         pipeline=pipeline,
@@ -54,5 +105,8 @@ def on_form_submit(
     generation_end_time = time()
     generation_time = generation_end_time - generation_start_time
     st.write(f"Finished generating text in {generation_time:,.2f} seconds.")
-    print(f"Finished generating text in {generation_time:,.2f} seconds.")
     return generated_text

+import os
 from time import time
 import streamlit as st
 from grouped_sampling import GroupedSamplingPipeLine
+from download_repo import download_pytorch_model
+def is_downloaded(model_name: str) -> bool:
+    """
+    Checks if the model is downloaded.
+    :param model_name: The name of the model to check.
+    :return: True if the model is downloaded, False otherwise.
+    """
+    models_dir = "/root/.cache/huggingface/hub"
+    model_dir = os.path.join(models_dir, f"models--{model_name.replace('/', '--')}")
+    return os.path.isdir(model_dir)
+def create_pipeline(model_name: str, group_size: int) -> GroupedSamplingPipeLine:
+    """
+    Creates a pipeline with the given model name and group size.
+    :param model_name: The name of the model to use.
+    :param group_size: The size of the groups to use.
+    :return: A pipeline with the given model name and group size.
+    """
+    if not is_downloaded(model_name):
+        download_repository_start_time = time()
+        st.write(f"Starts downloading model: {model_name} from the internet.")
+        download_pytorch_model(model_name)
+        download_repository_end_time = time()
+        download_time = download_repository_end_time - download_repository_start_time
+        st.write(f"Finished downloading model: {model_name} from the internet in {download_time:,.2f} seconds.")
+    st.write(f"Starts creating pipeline with model: {model_name}")
+    pipeline_start_time = time()
+    pipeline = GroupedSamplingPipeLine(
+        model_name=model_name,
+        group_size=group_size,
+        end_of_sentence_stop=False,
+        top_k=50,
+        load_in_8bit=False,
+    )
+    pipeline_end_time = time()
+    pipeline_time = pipeline_end_time - pipeline_start_time
+    st.write(f"Finished creating pipeline with model: {model_name} in {pipeline_time:,.2f} seconds.")
+    return pipeline
 def generate_text(
         pipeline: GroupedSamplingPipeLine,
 def on_form_submit(
+        model_name: str,
         output_length: int,
         prompt: str,
 ) -> str:
     """
     Called when the user submits the form.
+    :param model_name: The name of the model to use.
     :param output_length: The size of the groups to use.
     :param prompt: The prompt to use.
     :return: The output of the model.
     """
     if len(prompt) == 0:
         raise ValueError("The prompt must not be empty.")
+    st.write(f"Loading model: {model_name}...")
+    loading_start_time = time()
+    pipeline = create_pipeline(
+        model_name=model_name,
+        group_size=output_length,
+    )
+    loading_end_time = time()
+    loading_time = loading_end_time - loading_start_time
+    st.write(f"Finished loading model: {model_name}  in {loading_time:,.2f} seconds.")
     st.write("Generating text...")
     generation_start_time = time()
     generated_text = generate_text(
         pipeline=pipeline,
     generation_end_time = time()
     generation_time = generation_end_time - generation_start_time
     st.write(f"Finished generating text in {generation_time:,.2f} seconds.")
+    if not isinstance(generated_text, str):
+        raise RuntimeError(f"The model {model_name} did not generate any text.")
+    if len(generated_text) == 0:
+        raise RuntimeError(f"The model {model_name} did not generate any text.")
     return generated_text

tests.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import pytest as pytest
-from grouped_sampling import GroupedSamplingPipeLine, UnsupportedModelNameException
 from hanlde_form_submit import create_pipeline, on_form_submit
@@ -14,13 +15,13 @@ def test_on_form_submit():
     empty_prompt = ""
     with pytest.raises(ValueError):
         on_form_submit(model_name, output_length, empty_prompt)
-    unsupported_model_name = "unsupported_model_name"
-    with pytest.raises(UnsupportedModelNameException):
-        on_form_submit(unsupported_model_name, output_length, prompt)
-def test_create_pipeline():
-    model_name = "gpt2"
     pipeline: GroupedSamplingPipeLine = create_pipeline(model_name, 5)
     assert pipeline is not None
     assert pipeline.model_name == model_name

 import pytest as pytest
+from grouped_sampling import GroupedSamplingPipeLine
+from available_models import AVAILABLE_MODELS
 from hanlde_form_submit import create_pipeline, on_form_submit
     empty_prompt = ""
     with pytest.raises(ValueError):
         on_form_submit(model_name, output_length, empty_prompt)
+@pytest.mark.parametrize(
+    "model_name",
+    AVAILABLE_MODELS,
+)
+def test_create_pipeline(model_name: str):
     pipeline: GroupedSamplingPipeLine = create_pipeline(model_name, 5)
     assert pipeline is not None
     assert pipeline.model_name == model_name