Commit
·
dce6100
1
Parent(s):
72a4f68
Added TLDExtract cache files so that internet connection is not required
Browse files- Dockerfile +2 -0
- app.py +5 -2
- tld/.tld_set_snapshot +0 -0
Dockerfile
CHANGED
@@ -26,6 +26,7 @@ RUN useradd -m -u 1000 user
|
|
26 |
|
27 |
# Make output folder
|
28 |
RUN mkdir -p /home/user/app/output && chown -R user:user /home/user/app/output
|
|
|
29 |
|
30 |
# Switch to the "user" user
|
31 |
USER user
|
@@ -40,6 +41,7 @@ ENV HOME=/home/user \
|
|
40 |
GRADIO_SERVER_NAME=0.0.0.0 \
|
41 |
GRADIO_SERVER_PORT=7860 \
|
42 |
GRADIO_THEME=huggingface \
|
|
|
43 |
#GRADIO_TEMP_DIR=$HOME/tmp \
|
44 |
#GRADIO_ROOT_PATH=/address-match \
|
45 |
# gunicorn keep alive timeout limit extended for GUI-based work - https://github.com/tiangolo/uvicorn-gunicorn-fastapi-docker?tab=readme-ov-file#timeout
|
|
|
26 |
|
27 |
# Make output folder
|
28 |
RUN mkdir -p /home/user/app/output && chown -R user:user /home/user/app/output
|
29 |
+
RUN mkdir -p /home/user/app/tld && chown -R user:user /home/user/app/tld
|
30 |
|
31 |
# Switch to the "user" user
|
32 |
USER user
|
|
|
41 |
GRADIO_SERVER_NAME=0.0.0.0 \
|
42 |
GRADIO_SERVER_PORT=7860 \
|
43 |
GRADIO_THEME=huggingface \
|
44 |
+
TLDEXTRACT_CACHE=$HOME/app/tld/.tld_set_snapshot \
|
45 |
#GRADIO_TEMP_DIR=$HOME/tmp \
|
46 |
#GRADIO_ROOT_PATH=/address-match \
|
47 |
# gunicorn keep alive timeout limit extended for GUI-based work - https://github.com/tiangolo/uvicorn-gunicorn-fastapi-docker?tab=readme-ov-file#timeout
|
app.py
CHANGED
@@ -1,8 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from tools.file_redaction import choose_and_run_redactor
|
2 |
from tools.file_conversion import prepare_image_or_text_pdf, convert_text_pdf_to_img_pdf
|
3 |
from tools.aws_functions import load_data_from_aws
|
4 |
-
from typing import List
|
5 |
-
import pandas as pd
|
6 |
import gradio as gr
|
7 |
|
8 |
#file_path = "examples/Lambeth_2030-Our_Future_Our_Lambeth_foreword.pdf" #"examples/skills-based-cv-example.pdf" # "examples/graduate-job-example-cover-letter.pdf" #
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
# By default TLDExtract will try to pull files from the internet. I have instead downloaded this file locally to avoid the requirement for an internet connection.
|
4 |
+
os.environ['TLDEXTRACT_CACHE'] = 'tld/.tld_set_snapshot'
|
5 |
+
|
6 |
from tools.file_redaction import choose_and_run_redactor
|
7 |
from tools.file_conversion import prepare_image_or_text_pdf, convert_text_pdf_to_img_pdf
|
8 |
from tools.aws_functions import load_data_from_aws
|
|
|
|
|
9 |
import gradio as gr
|
10 |
|
11 |
#file_path = "examples/Lambeth_2030-Our_Future_Our_Lambeth_foreword.pdf" #"examples/skills-based-cv-example.pdf" # "examples/graduate-job-example-cover-letter.pdf" #
|
tld/.tld_set_snapshot
ADDED
The diff for this file is too large to render.
See raw diff
|
|