Spaces:

seanpedrickcase
/

topic_modelling

Running

seanpedrickcase commited on Nov 20, 2024

Commit

49e0db8

1 Parent(s): 08eb30d

Added and replaced relevant files to download in download_model.py to allow for app use on AWS

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -2,7 +2,7 @@
 FROM public.ecr.aws/docker/library/python:3.11.9-slim-bookworm AS builder
 # Install Lambda web adapter in case you want to run with with an AWS Lamba function URL (not essential if not using Lambda)
-#COPY --from=public.ecr.aws/awsguru/aws-lambda-adapter:0.8.3 /lambda-adapter /opt/extensions/lambda-adapter
 # Install system dependencies
 RUN apt-get update && rm -rf /var/lib/apt/lists/*
@@ -15,7 +15,7 @@ WORKDIR /src
 # Copy requirements file and install dependencies. Sentence transformers and Bertopic are installed without dependencies so that torch is not reinstalled.
 COPY requirements_aws.txt .
-RUN pip install torch==2.4.0+cpu --target=/install --index-url https://download.pytorch.org/whl/cpu \
 && pip install --no-cache-dir --target=/install sentence-transformers==3.2.0 --no-deps \
 && pip install --no-cache-dir --target=/install bertopic==0.16.4 --no-deps \
 && pip install --no-cache-dir --target=/install -r requirements_aws.txt \

 FROM public.ecr.aws/docker/library/python:3.11.9-slim-bookworm AS builder
 # Install Lambda web adapter in case you want to run with with an AWS Lamba function URL (not essential if not using Lambda)
+#COPY --from=public.ecr.aws/awsguru/aws-lambda-adapter:0.8.4 /lambda-adapter /opt/extensions/lambda-adapter
 # Install system dependencies
 RUN apt-get update && rm -rf /var/lib/apt/lists/*
 # Copy requirements file and install dependencies. Sentence transformers and Bertopic are installed without dependencies so that torch is not reinstalled.
 COPY requirements_aws.txt .
+RUN pip install torch==2.5.1+cpu --target=/install --index-url https://download.pytorch.org/whl/cpu \
 && pip install --no-cache-dir --target=/install sentence-transformers==3.2.0 --no-deps \
 && pip install --no-cache-dir --target=/install bertopic==0.16.4 --no-deps \
 && pip install --no-cache-dir --target=/install -r requirements_aws.txt \

download_model.py CHANGED Viewed

@@ -1,15 +1,22 @@
 from huggingface_hub import hf_hub_download
 # Define the repository and files to download
-repo_id = "mixedbread-ai/mxbai-embed-xsmall-v1" #"sentence-transformers/all-MiniLM-L6-v2" #"BAAI/bge-small-en-v1.5"
 files_to_download = [
     "config.json",
-    "pytorch_model.bin",
     "tokenizer_config.json",
     "vocab.txt"
 ]
 # Download each file and save it to the /model/bge directory
 for file_name in files_to_download:
     print("Checking for file", file_name)
-    hf_hub_download(repo_id=repo_id, filename=file_name, local_dir="/model/embed") #"/model/bge"

 from huggingface_hub import hf_hub_download
 # Define the repository and files to download
+repo_id = "mixedbread-ai/mxbai-embed-xsmall-v1" #"sentence-transformers/all-MiniLM-L6-v2"
 files_to_download = [
     "config.json",
+    "config_sentence_transformers.json",
+    "model.safetensors",
+    "tokenizer.json",
+    "special_tokens_map.json",
+    "angle_config.json",
+    "modules.json",
     "tokenizer_config.json",
     "vocab.txt"
 ]
+#"pytorch_model.bin",
 # Download each file and save it to the /model/bge directory
 for file_name in files_to_download:
     print("Checking for file", file_name)
+    hf_hub_download(repo_id=repo_id, filename=file_name, local_dir="/model/embed")