diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 9c433dbb6741feedd1bac830e7314969b43c1ae5..0000000000000000000000000000000000000000 --- a/.dockerignore +++ /dev/null @@ -1,21 +0,0 @@ - - -#--- ignore select binary files/folders -bin/images/sample* -#---bin/models/*.pth -bin/models/*.zip -bin/models/deeplabv3*vhflip30/*.pth -bin/testing -_ignore -.vscode -cicd_workflows - -#--- ignore all local data files; preserve/recreate folder structure -data/demo_tiles/*.tiff -data/tiles -data/wsi - -#--- ignore all doc files -docs -notebooks -preso diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index f3469dcef5283f83448abf6c541c740fb27a198b..0000000000000000000000000000000000000000 --- a/.gitattributes +++ /dev/null @@ -1,46 +0,0 @@ -#--- Note: omdena github does not allow git lfs. Disable. -#--- Note: huggingface does not allow _any_ binaries; git lfs has to be used -#bin/models/*.pth filter=lfs diff=lfs merge=lfs -text -#bin/models/deeplab*vhflip30/model_a* filter=lfs diff=lfs merge=lfs -text -#bin/images/* filter=lfs diff=lfs merge=lfs -text -#data/demo_tiles/raw/* filter=lfs diff=lfs merge=lfs -text -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ckpt filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -*.safetensors filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tar filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text -*.tiff filter=lfs diff=lfs merge=lfs -text -data/demo_tiles/raw/*.tiff filter=lfs diff=lfs merge=lfs -text -bin/models/deeplabv3*vhflip30/model_a* filter=lfs diff=lfs merge=lfs -text -*model_a* filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -bin/models/model.pth filter=lfs diff=lfs merge=lfs -text diff --git a/Dockerfile b/Dockerfile index d50daa1b42563bb4b69f0a294ab850635fbdc6d4..7b1ad21eb069659dee1f8587135887cfb6a03015 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,4 +21,8 @@ EXPOSE 39500 # docker run -it -p 49400:39400 -p 49500:39500 --name ctr_stmOmdenaHcc_demo kidcoconut73/img_stm_omdenasaudi_hcc:demo #--- start an existing container -# docker start -it ctr_stmOmdenaHcc_demo \ No newline at end of file +# docker start -it ctr_stmOmdenaHcc_demo + +#--- deploy to huggingspace +# overwrite Dockerfile; cp Dockerfile.hug Dockerfile +# push to Huggingspace; \ No newline at end of file diff --git a/Dockerfile.hug b/Dockerfile.hug deleted file mode 100644 index 7b1ad21eb069659dee1f8587135887cfb6a03015..0000000000000000000000000000000000000000 --- a/Dockerfile.hug +++ /dev/null @@ -1,28 +0,0 @@ -#--- build the image from dockerHub -FROM kidcoconut73/img_stm_omdenasaudi_hcc:0.1.4 -#FROM kidcoconut73/img_stm_omdenasaudi_hcc:demo - - -#--- for streamlit; external 49400; internal 39400 -#--- for fastapi; external 49500; internal 39500 -#--- for huggingface; assume 1:1 mapping between internal and external ports; and only one port can truly be exposed -EXPOSE 39400 -EXPOSE 39500 - -#--- build/rebuild the image from an alt Dockerfile -# docker build -t foo/bar -f /path/to/Dockerfile /path/to/ -# docker build -t img_stm_omdenasaudi_hcc:0.1.4 -f Dockerfile.hub ./ - -#--- create a container from an image (without running it) -# docker create -it -p 49400:39400 -p 49500:39500 --name ctr_stmOmdenaHcc_demo kidcoconut73/img_stm_omdenasaudi_hcc:demo - -#--- to run the container (interactive) from the image; specific port mapping (-p) vs any available port mapping (-P) -# docker run -it -p 49400:39400 -p 49500:39500 --name ctr_stmOmdenaHcc_demo img_stm_omdenasaudi_hcc:0.1.4 -# docker run -it -p 49400:39400 -p 49500:39500 --name ctr_stmOmdenaHcc_demo kidcoconut73/img_stm_omdenasaudi_hcc:demo - -#--- start an existing container -# docker start -it ctr_stmOmdenaHcc_demo - -#--- deploy to huggingspace -# overwrite Dockerfile; cp Dockerfile.hug Dockerfile -# push to Huggingspace; \ No newline at end of file diff --git a/Dockerfile.local b/Dockerfile.local deleted file mode 100644 index 5e187c7b7486d401279a9ea137798810008f05af..0000000000000000000000000000000000000000 --- a/Dockerfile.local +++ /dev/null @@ -1,86 +0,0 @@ -#--- PREREQS: -# - create a local folder dedicated to WSI image mgmt: (docker pwd)/data -# - populate the folder with raw data, wsi and tiles -# - docker run --name -v - -#--- utilize a light linux distro for python apps -FROM python:3.10.9-slim-bullseye - -RUN pip install --no-cache-dir --upgrade pip -RUN apt-get update -y && apt-get install -y ffmpeg libsm6 libxext6 - -# Set up a new user named "user" with user ID 1000 -# Switch to the "user" user -RUN useradd -m -u 1000 user -USER user - -# Set home to the user's home directory -ENV HOME=/home/user \ - PATH=/home/user/.local/bin:$PATH - -#--- set docker image working directory to /app -WORKDIR $HOME/app - -# Try and run pip command after setting the user with `USER user` to avoid permission issues with Python -#RUN pip install --no-cache-dir --upgrade pip - - -#--- copy only the requirements.txt file -#--- set docker image working directory to /app -#--- Not: this is reorg'd in an attempt to reduce the rebuilding of layers -COPY --chown=user ./requirements.txt $HOME/app/requirements.txt -COPY --chown=user ./packages.txt $HOME/app/packages.txt - -#--- install all lib dependencies into the image -#RUN pip3 install -r $HOME/app/requirements.txt -RUN pip install --no-cache-dir --upgrade -r $HOME/app/requirements.txt -#RUN apt-get update -y && apt-get install -y ffmpeg libsm6 libxext6 - -#--- copy all files from the local pwd to the docker image /app folder -#--- .dockerignore: ensure no local data folders or files (images) are copied into the docker image/container -COPY --chown=user . $HOME/app - -#--- for streamlit; external 49400; internal 39400 -# localExec: (from root folder) streamlit run app.py --server.port=39400 --server.maxUploadSize=2000 -EXPOSE 49400 -#CMD ["streamlit", "run", "app.py", "--server.port=39400", "--server.maxUploadSize=2000"] - -#--- for fastapi; external 49500; internal 39500 -# localExec: (from root folder) uvicorn main:app --reload --workers 1 --host 0.0.0.0 --port 39500 -EXPOSE 49500 -#CMD ["uvicorn", "main:app", "--reload", "--host=0.0.0.0", "--port=39500"] - -#--- start streamlit and fastapi from a helper utility script -#CMD ./util_startLocal_streamlitFastApi.sh -CMD $HOME/app/scripts/docker/util_docker_preRun.sh - - -#--- to build/rebuild the image; make sure you stop and remove the container if you are replacing/upgrading; or change the version tag# from 0.1 -# docker build -t img_stm_omdenasaudi_hcc:0.1 . -# docker build -t img_stm_omdenasaudi_hcc:0.1.3 . - -#--- build/rebuild the image from an alt Dockerfile -# docker build -t foo/bar -f /path/to/Dockerfile /path/to/ -# docker build -t img_stm_omdenasaudi_hcc:0.1.4 -f Dockerfile.hub ./ - -#--- to tag the image prior to push to DockerHub; docker login and then register user/image:tag -#--- to push this image to DockerHub, example based on the repo: kidcoconut73/img_stm_omdenasaudi_hcc -# docker tag img_stm_omdenasaudi_hcc:0.1 kidcoconut73/img_stm_omdenasaudi_hcc:demo -# docker tag img_stm_omdenasaudi_hcc:0.1 kidcoconut73/img_stm_omdenasaudi_hcc:0.1 -# docker push kidcoconut73/img_stm_omdenasaudi_hcc:demo - -#--- to run the container from the image; specific port mapping (-p) vs any available port mapping (-P) -# docker run -p 49400:39400 -p 49500:39500 --name ctr_stmOmdenaSaudiHcc -v ./data:/app/data img_stm_omdenasaudi_hcc:0.1 -# docker run -p 49400:39400 -p 49500:39500 --name ctr_stmOmdenaSaudiHcc img_stm_omdenasaudi_hcc:0.1 -# docker run -P --name ctr_stmOmdenaHcc img_stm_omdenasaudi_hcc:0.1 #--- open all ports defined by Docker EXPOSE - -#--- to run the container in interactive mode (local debug) -# docker run -it -p 49400:39400 -p 49500:39500 --name ctr_stmOmdenaSaudiHcc_013 img_stm_omdenasaudi_hcc:0.1.3 - -#--- ISSUE: uvicorn bug does not allow ctl-C break of fastapi through terminal -#--- WORKAROUND: you have to run a docker or docker compose kill cmd; eg docker kill - - -#--- Docker build log -# from python:3.10.9-slim-bullseye size: 4.21gb time: >yyys -# from python:3.10.9-slim-bullseye size: 4.21gb time: >3500s :0.13 expose streamlit 49400 diff --git a/app.py b/app.py deleted file mode 100644 index 23f7cc0ec504cb9f59f9eb3ed31b1d503f44abfa..0000000000000000000000000000000000000000 --- a/app.py +++ /dev/null @@ -1,23 +0,0 @@ -''' - toExecute: (from root app folder) ... streamlit run app.py -''' -import streamlit as st -import uix.lit_sidebar as litSideBar - - -#--- streamlit: specify title and logo -st.set_page_config( - page_title='Omdena Saudi Arabia - Liver HCC Diagnosis with XAI', - #page_icon='https://cdn.freebiesupply.com/logos/thumbs/1x/nvidia-logo.png', - layout="wide") -st.header('\ - Detecting Liver Cancer from Histopathology WSI \ - using Deep Learning and Explainability (XAI)\ -') -st.markdown('#### Dr. Shaista Hussain (Saudi Arabia Chapter Lead)') -st.markdown("##### Iain McKone (Deployment Lead) [LinkedIn](%s)" % "https://linkedin.com/in/iainmckone") -st.markdown('---') - - -#--- streamlit: add a sidebar -litSideBar.init() diff --git a/bin/images/logo_omdena_saudi.png b/bin/images/logo_omdena_saudi.png deleted file mode 100644 index 86ed85b3be5b436f344578286dbbba8a15c05fe3..0000000000000000000000000000000000000000 Binary files a/bin/images/logo_omdena_saudi.png and /dev/null differ diff --git a/bin/models/__init__.py b/bin/models/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/bin/models/deeplabv3_r50_full_train_dataset_80-20_split_10epochs_no-norm_vhflip30/version b/bin/models/deeplabv3_r50_full_train_dataset_80-20_split_10epochs_no-norm_vhflip30/version deleted file mode 100644 index 00750edc07d6415dcc07ae0351e9397b0222b7ba..0000000000000000000000000000000000000000 --- a/bin/models/deeplabv3_r50_full_train_dataset_80-20_split_10epochs_no-norm_vhflip30/version +++ /dev/null @@ -1 +0,0 @@ -3 diff --git a/bin/models/model.pth b/bin/models/model.pth deleted file mode 100644 index 2b980685db7dd69e0c47545469147c956728a160..0000000000000000000000000000000000000000 --- a/bin/models/model.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9029cfef04ad8f66b1b59f7417ec0f2eb6721198fb8a31f32dea75d87e1168a8 -size 168385213 diff --git a/config.toml b/config.toml deleted file mode 100644 index c99a40792054ed4f9756781907f0d0ebe2e20b3c..0000000000000000000000000000000000000000 --- a/config.toml +++ /dev/null @@ -1,3 +0,0 @@ -[server] - -maxUploadSize = 2000 #--- increased from default 200MB to 2000MB diff --git a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0105_svsvt_chunks_10240_13312.tiff b/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0105_svsvt_chunks_10240_13312.tiff deleted file mode 100644 index 8be16180366a834292c0ce06927d8a36ade2fd85..0000000000000000000000000000000000000000 --- a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0105_svsvt_chunks_10240_13312.tiff +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f7f1ead30f809502867630618dc072714d204c3ad191662f5837f70d083dbf76 -size 1048864 diff --git a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0105_svswt_chunks_38912_5632.tiff b/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0105_svswt_chunks_38912_5632.tiff deleted file mode 100644 index 2dbb0158ad9bfcc3e409d9c9d017fde490c9c416..0000000000000000000000000000000000000000 --- a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0105_svswt_chunks_38912_5632.tiff +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1c4d4c9a4c03c433098076160a3a9a6142242336475b113d1b423449f56c4736 -size 1048864 diff --git a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0140_svsvt_chunks_45056_8192.tiff b/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0140_svsvt_chunks_45056_8192.tiff deleted file mode 100644 index 5f95871c41f1fca212d65e2472723e6603489841..0000000000000000000000000000000000000000 --- a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0140_svsvt_chunks_45056_8192.tiff +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4283367c661dc21d397cb42c0ed07c7d0668bed4a531c159be46f8e3283b99fe -size 1048864 diff --git a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0141_svsvt_chunks_39936_26624.tiff b/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0141_svsvt_chunks_39936_26624.tiff deleted file mode 100644 index 1a1427f334017a9eaaa3af093fc6c1ec744d7d66..0000000000000000000000000000000000000000 --- a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0141_svsvt_chunks_39936_26624.tiff +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:32beafb1e6da830177c68d41d3595346cf6d005b59518c9ff76338465a09f76a -size 1048864 diff --git a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0141_svswt_chunks_46080_46080.tiff b/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0141_svswt_chunks_46080_46080.tiff deleted file mode 100644 index 0881e3f22f835fcc2c2a4fff650c5a9d367b1225..0000000000000000000000000000000000000000 --- a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0141_svswt_chunks_46080_46080.tiff +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:384304ab2e113ef190b025a7706913f2dd305591aa262cb90fbdde4635c90bcc -size 1048864 diff --git a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0142_svswt_chunks_41472_21504.tiff b/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0142_svswt_chunks_41472_21504.tiff deleted file mode 100644 index 0ee079ac8e3845c79318b2caaf00aee6acc2ff0d..0000000000000000000000000000000000000000 --- a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0142_svswt_chunks_41472_21504.tiff +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:57e67d534e0bce3f305fca0037b3e5b069dab0dd5b7100b259ae47c21d410345 -size 1048864 diff --git a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0145_svsvt_chunks_5632_28672.tiff b/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0145_svsvt_chunks_5632_28672.tiff deleted file mode 100644 index 1a5cbd299d6c1fb7660aad7d2f71cac414860ac6..0000000000000000000000000000000000000000 --- a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0145_svsvt_chunks_5632_28672.tiff +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:31d1a4b039f036a019f2a11e0f3c0002f9c83125b32c8a868aab4c5a996ce1ff -size 1048864 diff --git a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0161_svsvt_chunks_37888_35840.tiff b/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0161_svsvt_chunks_37888_35840.tiff deleted file mode 100644 index 1585006a976ef74df4929e9778b59d28a980a0ee..0000000000000000000000000000000000000000 --- a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0161_svsvt_chunks_37888_35840.tiff +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:02e14838208a8afa44c5d7a94a19e95b5ba348e4f243e2445cab00d2d0fe202c -size 1048864 diff --git a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0161_svsvt_chunks_44032_38912.tiff b/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0161_svsvt_chunks_44032_38912.tiff deleted file mode 100644 index a39ce4ec4909e59ef3c3891c0a3489af455cf8e3..0000000000000000000000000000000000000000 --- a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0161_svsvt_chunks_44032_38912.tiff +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a004d358b77bcb115e7a7178e5cea2d11539076069fe557075a07913520db62 -size 1048864 diff --git a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0161_svswt_chunks_46080_19456.tiff b/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0161_svswt_chunks_46080_19456.tiff deleted file mode 100644 index 982f1e9b420345b1ec74efd1f94b1047178308bc..0000000000000000000000000000000000000000 --- a/data/demo_tiles/raw/sample/Dataset_PAIP2019_Omdena_Validation_Image_01_01_0161_svswt_chunks_46080_19456.tiff +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7450a97f433fda64a85e21883f9b53c9f38f36be18ead29aaac1ee03d6d096ff -size 1048864 diff --git a/lib/__init__.py b/lib/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/lib/fctTile.py b/lib/fctTile.py deleted file mode 100644 index aa2415d9b5f6b221e14f3bceb9553deaf61418fc..0000000000000000000000000000000000000000 --- a/lib/fctTile.py +++ /dev/null @@ -1 +0,0 @@ -#--- factory class for tile operations \ No newline at end of file diff --git a/lib/fctWsi.py b/lib/fctWsi.py deleted file mode 100644 index 4255ca99779a2353c0505d966d4dff61ed7883c9..0000000000000000000000000000000000000000 --- a/lib/fctWsi.py +++ /dev/null @@ -1 +0,0 @@ -#--- factory class for wsi operations \ No newline at end of file diff --git a/lib/models/__init__.py b/lib/models/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/lib/models/mdl_autoenc.py b/lib/models/mdl_autoenc.py deleted file mode 100644 index fad488f7da6d44c651fa49aaf4a0564d89b5b478..0000000000000000000000000000000000000000 --- a/lib/models/mdl_autoenc.py +++ /dev/null @@ -1,55 +0,0 @@ -import pandas as pd -import numpy as np -from sklearn.decomposition import PCA -import lib.utils as libPaths -import pickle - - -m_kstrFile = __file__ -m_kstrDataPath = libPaths.pth_data -m_kstrBinModelPath = libPaths.pth_binModels -m_kstrPcaModelPath = m_kstrBinModelPath + 'pca_unsuperv_colab.pkl' -m_kstrEncModelPath = m_kstrBinModelPath + 'enc_keras_seq/' - - -#--- Supervised: autoencoder - Principal Component Analysis -def load_encFromKeras(): - from tensorflow import keras - mdlAnoms = keras.models.load_model(m_kstrEncModelPath) - return mdlAnoms - - -def load_pcaFromPkl(): - with open(m_kstrPcaModelPath, 'rb') as filPkl: - # load using pickle de-serializer - mdlAnoms = pickle.load(filPkl) - return mdlAnoms - - -def save_encToKeras(mdlAnoms): - mdlAnoms.save(m_kstrEncModelPath) - - - -def predict(pdfScaled): - - #--- Pre: Transforming train and test dataframes based on PCA - mdlPCA = load_pcaFromPkl() #--- this is a pre-fit model based on training - npaPca = mdlPCA.transform(pdfScaled) - print("INFO (" + m_kstrFile + ".predict) npaPca.shape: ", npaPca.shape) - - - #--- predict on unseen data - mdlEnc = load_encFromKeras() - npaPredict = mdlEnc.predict(npaPca[:,:29]) - print("INFO (" + m_kstrFile + ".predict) npaPredict.shape: ", npaPredict.shape) - #--- expected: 297, 29? - return npaPredict - - -""" -def train(pdfTrainData): - mdlAnoms = PCA() #---- TODO: this is Keras Sequential - mdlAnoms.fit(pdfTrainData.values) - save_encToKeras(mdlAnoms) - return mdlAnoms """ \ No newline at end of file diff --git a/lib/models/mdl_kmeans.py b/lib/models/mdl_kmeans.py deleted file mode 100644 index 38eb9ba61d7389e26fc6dd89441c3b29287f9224..0000000000000000000000000000000000000000 --- a/lib/models/mdl_kmeans.py +++ /dev/null @@ -1,155 +0,0 @@ -from sklearn.cluster import KMeans -import lib.utils as libPaths -import pickle -import pandas as pd - - -m_kstrFile = __file__ -m_kstrDataPath = libPaths.pth_data -m_kstrBinModelPath = libPaths.pth_binModels - -#m_kstrPcaModelPath = m_kstrBinModelPath + 'pca_kmeans_unsuperv_colab.pkl' -#m_kstrPcaModelPath = m_kstrBinModelPath + 'pca_kmeans_unsuperv_colab_v1.2.1.pkl' -m_kstrPcaModelPath_111 = m_kstrBinModelPath + 'claims_pca_v1.1.1_27cols.pkl' #--- ERROR: __randomstate_ctor() takes from 0 to 1 positional arguments but 2 were given -m_kstrPcaModelPath_121 = m_kstrBinModelPath + 'claims_pca_v1.2.1_27cols.pkl' -m_kstrPcaModelPath_claims_py3816_sk111hp = m_kstrBinModelPath + 'claims_pca_py3816_sk111hp_27cols.pkl' -m_kstrPcaModelPath = m_kstrPcaModelPath_claims_py3816_sk111hp - -#m_kstrKmeansModelPath = m_kstrBinModelPath + 'kmeans_unsuperv_colab.pkl' -#m_kstrKmeansModelPath = m_kstrBinModelPath + 'kmn_unsuperv_colab_v1.2.1.pkl' -m_kstrModelPath_111 = m_kstrBinModelPath + 'claims_kmn_v1.1.1_22cols.pkl' #--- ERROR: __randomstate_ctor() takes from 0 to 1 positional arguments but 2 were given -m_kstrModelPath_121 = m_kstrBinModelPath + 'claims_kmn_v1.2.1_22cols.pkl' -m_kstrModelPath_claims_py3816_sk111hp = m_kstrBinModelPath + 'claims_kmn_py3816_sk111hp_22cols.pkl' -m_kstrKmeansModelPath = m_kstrModelPath_claims_py3816_sk111hp - -m_blnTraceOn = True - - -#--- unsupervised: Logistic Regession -def load_pcaFromPkl(): - with open(m_kstrPcaModelPath, 'rb') as filPkl: - mdlAnoms = pickle.load(filPkl) - return mdlAnoms - - -#--- unsupervised: KMeans -def load_kmeansFromPkl(): - with open(m_kstrKmeansModelPath, 'rb') as filPkl: - mdlAnoms = pickle.load(filPkl) - return mdlAnoms - - -def save_pcaToPkl(mdlAnoms): - with open(m_kstrPcaModelPath, 'wb') as filPkl: - pickle.dump(mdlAnoms, filPkl) - return mdlAnoms - - -def save_kmeansToPkl(mdlAnoms): - with open(m_kstrKmeansModelPath, 'wb') as filPkl: - pickle.dump(mdlAnoms, filPkl) - return mdlAnoms - - - -#--- determine which points can be labelled against which clusters -def predict(pdfScaled): - #--- load a persisted fit kmeans model - #--- predict will assign labels onto a similarly scaled data frame - - - #--- Note: reverse chron through the code ... - #--- 4. KMeans was fit on X-reduced (22 cols) - #--- 3. X_reduced was a reduced column set of X-scaled (27 -> 22; Dropped 5 cols: DeadOrNot; and hotEncoded Gender and Race) - #--- 2. x_scaled was transformed through stdScaler - #--- 1. StdScaler was fit on X to produce X-scaled (X has 27 cols) - pdfReduced = pdfScaled[['InscClaimAmtReimbursed', 'DeductibleAmtPaid', - 'AdmittedDays', 'RenalDiseaseIndicator', 'NoOfMonths_PartACov', - 'NoOfMonths_PartBCov', 'ChronicCond_Alzheimer', - 'ChronicCond_Heartfailure', 'ChronicCond_KidneyDisease', - 'ChronicCond_Cancer', 'ChronicCond_ObstrPulmonary', - 'ChronicCond_Depression', 'ChronicCond_Diabetes', - 'ChronicCond_IschemicHeart', 'ChronicCond_Osteoporasis', - 'ChronicCond_rheumatoidarthritis', 'ChronicCond_stroke', - 'IPAnnualReimbursementAmt', 'IPAnnualDeductibleAmt', - 'OPAnnualReimbursementAmt', 'OPAnnualDeductibleAmt', 'Age']] - - #--- prefit Kmeans clustering - was fit on trained pdfReduced - #--- Note: if we want to understand how kmeans performs on test/prod data, we need to predict - mdlKMeans = load_kmeansFromPkl() - #ndaPredict = mdlKMeans.predict(pdfScaled) #20230208: ValueError: X has 27 features, but KMeans is expecting 22 features as input. - ndaPredict = mdlKMeans.predict(pdfReduced) #ValueError: X has 22 features, but KMeans is expecting 27 features as input. - return ndaPredict - - -#--- feat eng -def do_featEng(pdfLoaded, blnIsTrain=False, hasGroupByProviderCols=True): - print("INFO (mdl_kmeans.doFeatEng): blnIsTrain, ", blnIsTrain) - - #--- columns_to_remove - aryColsToDrop = ['BeneID', 'ClaimID', 'ClaimStartDt','ClaimEndDt','AttendingPhysician', - 'OperatingPhysician', 'OtherPhysician', 'ClmDiagnosisCode_1', - 'ClmDiagnosisCode_2', 'ClmDiagnosisCode_3', 'ClmDiagnosisCode_4', - 'ClmDiagnosisCode_5', 'ClmDiagnosisCode_6', 'ClmDiagnosisCode_7', - 'ClmDiagnosisCode_8', 'ClmDiagnosisCode_9', 'ClmDiagnosisCode_10', - 'ClmProcedureCode_1', 'ClmProcedureCode_2', 'ClmProcedureCode_3', - 'ClmProcedureCode_4', 'ClmProcedureCode_5', 'ClmProcedureCode_6', - 'ClmAdmitDiagnosisCode', 'AdmissionDt', - 'DischargeDt', 'DiagnosisGroupCode','DOB', 'DOD', - 'State', 'County'] - pdfFeatEng = pdfLoaded.drop(columns=aryColsToDrop, axis=1) - - #--- flag categorical cols - pdfFeatEng.Gender = pdfFeatEng.Gender.astype('category') - pdfFeatEng.Race = pdfFeatEng.Race.astype('category') - - #--- one-hot-encoding - pdfFeatEng = pd.get_dummies(pdfFeatEng, columns=['Gender', 'Race'], drop_first=True) - if (blnIsTrain): - #--- one-hot encode the potential fraud column (for training data only) - try: - #print("INFO (claims.doFeatEng): one-hot encoding potential fraud") - pdfFeatEng.loc[pdfFeatEng['PotentialFraud'] == 'Yes', 'PotentialFraud'] = 1 - pdfFeatEng.loc[pdfFeatEng['PotentialFraud'] == 'No', 'PotentialFraud'] = 0 - except KeyError: - #--- likely column not found; invalid fxn call - print("ERROR (claims.doFeatEng): Potential Fraud col not found") - - pdfFeatEng.loc[pdfFeatEng['RenalDiseaseIndicator'] == 'Y', 'RenalDiseaseIndicator'] = 1 - pdfFeatEng['DeductibleAmtPaid'].fillna(0, inplace=True) - pdfFeatEng['AdmittedDays'].fillna(0, inplace=True) - - #--- check for correlated cols - - #--- add new features to assist with predictions - if (hasGroupByProviderCols): - pdfFeatEng['InscClaimReimbursement_ProviderAvg'] = pdfFeatEng.groupby(['Provider'])['InscClaimAmtReimbursed'].transform('mean') - pdfFeatEng['DeductibleAmtPaid_ProviderAvg'] = pdfFeatEng.groupby(['Provider'])['DeductibleAmtPaid'].transform('mean') - - pdfFeatEng['IPAnnualReimbursementAmt_ProviderAvg'] = pdfFeatEng.groupby(['Provider'])['IPAnnualReimbursementAmt'].transform('mean') - pdfFeatEng['IPAnnualDeductibleAmt_ProviderAvg'] = pdfFeatEng.groupby(['Provider'])['IPAnnualDeductibleAmt'].transform('mean') - - pdfFeatEng['OPAnnualReimbursementAmt_ProviderAvg'] = pdfFeatEng.groupby(['Provider'])['OPAnnualReimbursementAmt'].transform('mean') - pdfFeatEng['OPAnnualDeductibleAmt_ProviderAvg'] = pdfFeatEng.groupby(['Provider'])['OPAnnualDeductibleAmt'].transform('mean') - return pdfFeatEng - - -def fit(pdfScaled): - #--- determine the centroids of the kmeans clusters - #--- refit kmeans clustering according to the pre-scaled data provided - #--- note: this all assumes that the nature of the data and the number of clusters remain unchanged - m_klngNumClusters = 3 - if (m_blnTraceOn): print("TRACE (" + m_kstrFile + ".fit) instantiate KMeans ...") - mdlKMeans = KMeans(n_clusters=m_klngNumClusters, max_iter=50, random_state=2022) #--- #clusters was learned from training - - if (m_blnTraceOn): print("TRACE (" + m_kstrFile + ".fit) fitting data (scaled) ...") - mdlKMeans.fit(pdfScaled) #--- fit on test/prod data - - return mdlKMeans #--- this ibject will give us all results based on kmeans - - -def train(pdfTrainData): - mdlAnoms = KMeans(n_clusters=3, max_iter=50, random_state=2022) - mdlAnoms.fit(pdfTrainData.values) - save_kmeansToPkl(mdlAnoms) - return mdlAnoms \ No newline at end of file diff --git a/lib/models/mdl_logR.py b/lib/models/mdl_logR.py deleted file mode 100644 index d3546a2b881db9b387a9d1bedca3aefda1a8860d..0000000000000000000000000000000000000000 --- a/lib/models/mdl_logR.py +++ /dev/null @@ -1,41 +0,0 @@ -from sklearn.linear_model import LogisticRegressionCV -import lib.utils as libPaths -import pickle - - -m_kstrFile = __file__ -m_kstrDataPath = libPaths.pth_data -m_kstrBinModelPath = libPaths.pth_binModels -m_kstrModelPath = m_kstrBinModelPath + 'lgr_model_colab.pkl' - - -#--- Supervised: Logistic Regession -def load_fromPkl(): - with open(m_kstrModelPath, 'rb') as filPkl: - mdlAnoms = pickle.load(filPkl) - return mdlAnoms - - - -def save_toPkl(mdlAnoms): - with open(m_kstrModelPath, 'wb') as filPkl: - pickle.dump(mdlAnoms, filPkl) - return mdlAnoms - - - -def predict(npaData): - #--- input: numpy.ndarray of feature eng, and scaled data - mdlAnoms = load_fromPkl() - npaPredict = mdlAnoms.predict(npaData) - - print("INFO (npaPredict.shape): ", npaPredict.shape) - return npaPredict - - - -def train(pdfTrainData): - mdlAnoms = LogisticRegressionCV() - mdlAnoms.fit(pdfTrainData.values) - save_toPkl(mdlAnoms) - return mdlAnoms \ No newline at end of file diff --git a/lib/models/mdl_svm.py b/lib/models/mdl_svm.py deleted file mode 100644 index 1921dd3b87a29a9f62fd81288d850900bf588cd7..0000000000000000000000000000000000000000 --- a/lib/models/mdl_svm.py +++ /dev/null @@ -1,40 +0,0 @@ -from sklearn.svm import LinearSVC -import lib.utils as libPaths -import pickle - - -m_kstrFile = __file__ -m_kstrDataPath = libPaths.pth_data -m_kstrBinModelPath = libPaths.pth_binModels -m_kstrModelPath = m_kstrBinModelPath + 'svm_model_colab.pkl' - - -#--- Supervised: Support Vector Machines -def load_fromPkl(): - with open(m_kstrModelPath, 'rb') as filPkl: - mdlAnoms = pickle.load(filPkl) - return mdlAnoms - - - -def save_toPkl(mdlAnoms): - with open(m_kstrModelPath, 'wb') as filPkl: - pickle.dump(mdlAnoms, filPkl) - return mdlAnoms - - - -def predict(npaData): - #--- input: numpy.ndarray of feature eng, and scaled data - mdlAnoms = load_fromPkl() - npaPredict = mdlAnoms.predict(npaData) - print("INFO (" + m_kstrFile + ".predict) npaPredict.shape: ", npaPredict.shape) - return npaPredict - - - -def train(pdfTrainData): - mdlAnoms = LinearSVC() - mdlAnoms.fit(pdfTrainData.values) - save_toPkl(mdlAnoms) - return mdlAnoms \ No newline at end of file diff --git a/lib/models/mdl_utils.py b/lib/models/mdl_utils.py deleted file mode 100644 index 9becb4041a751a7c8bcd3e2c8261961e84772d32..0000000000000000000000000000000000000000 --- a/lib/models/mdl_utils.py +++ /dev/null @@ -1,256 +0,0 @@ -import pandas as pd -import pickle -import lib.utils as libPaths - -m_blnTraceOn = False - -#--- load, merge data from file -m_kstrDataPath = libPaths.pth_data -m_kstrModelPath = libPaths.pth_model -m_kstrBinModelPath = libPaths.pth_binModels - -#m_kstrScalerPath_claims = m_kstrBinModelPath + 'stdClaims_scaler_colab.pkl' #--- does not work for scaling claims data; from v1.0.2; using 1.1.1 -#m_kstrScalerPath_claims2 = m_kstrBinModelPath + 'std_scaler_unsuperv_colab.pkl' #--- does not work; expects 32 features -#m_kstrScalerPath_claims = m_kstrBinModelPath + 'stdClaims_scaler_colab_v1.2.1.pkl' -m_kstrScalerPath_claims111 = m_kstrBinModelPath + 'claims_stdScaler_v1.1.1_27cols.pkl' -m_kstrScalerPath_claims121 = m_kstrBinModelPath + 'claims_stdScaler_v1.2.1_27cols.pkl' -m_kstrScalerPath_claims_py3816_sk111hp = m_kstrBinModelPath + 'claims_stdScl_py3816_sk111hp_27cols.pkl' -m_kstrScalerPath_claims = m_kstrScalerPath_claims_py3816_sk111hp - -m_kstrScalerPath_providers111 = m_kstrBinModelPath + 'prov_stdScaler_v1.1.1_32cols.pkl' -m_kstrScalerPath_providers121 = m_kstrBinModelPath + 'prov_stdScaler_v1.2.1_32cols.pkl' -m_kstrScalerPath_prov_py3816_sk111 = m_kstrBinModelPath + 'prov_stdScl_py3816_sk111_32cols.pkl' -m_kstrScalerPath_prov_py3816_sk111hp = m_kstrBinModelPath + 'prov_stdScl_py3816_sk111hp_32cols.pkl' -m_kstrScalerPath_prov = m_kstrScalerPath_prov_py3816_sk111hp - -m_kstrScalerPath_providers_superv = m_kstrBinModelPath + 'gbc_scaler.pkl' -m_kstrScalerPath_providers_train = m_kstrBinModelPath + "stdProvider_scaler.pkl" - - - -def doProviders_stdScaler(pdfFeatEng, blnIsTrain=False, hasGroupByProviderCols=True): - print("INFO (claims.do_stdScaler): blnIsTrain, ", blnIsTrain) - - #--- Note: prediction runs on X_val - ''' - #--- WARN: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. - # In a future version, numeric_only will default to False. Either specify - # numeric_only or select only columns which should be valid for the function. - ''' - - #--- WARN: this code groups all data by provider; any predictions will also be by provider - pdfGroupBy = pdfFeatEng - if (hasGroupByProviderCols): - pdfGroupBy = pdfFeatEng.groupby(['Provider'], as_index=False).agg('sum') - - X = pdfGroupBy - - try: - X = X.drop(columns=['Provider'], axis=1) #--- cannot scale; text - except KeyError: - #--- likely column not found; invalid fxn call - print("ERROR (mdlUtils.doProviders_stdScaler): Provider col not found") - - try: - X = X.drop(columns=['PotentialFraud'], axis=1) - except KeyError: - #--- likely column not found; invalid fxn call - if (blnIsTrain): print("ERROR (mdlUtils.doProviders_stdScaler): Potential Fraud col not found") - - - #--- apply std scaler - #--- WARN: scaling is also grouped by provider - if (m_blnTraceOn): print("INFO (mdlUtils.doProviders_stdScaler) cols: ", X.columns) #--- 32cols - X_std = fitProviders_txfStdScaler(X, blnIsTrain) - return X_std - - - -def doClaims_stdScaler(pdfFeatEng, blnIsTrain=False): - print("INFO (mdlUtils.doClaims_stdScaler): blnIsTrain, ", blnIsTrain) - - #--- Note: prediction runs on X_val - ''' - #--- WARN: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. - # In a future version, numeric_only will default to False. Either specify - # numeric_only or select only columns which should be valid for the function. - ''' - - #--- WARN: this code groups all data by provider; any predictions will also be by provider - X = pdfFeatEng - - try: - X = X.drop(columns=['Provider'], axis=1) #--- cannot scale; text - except KeyError: - #--- likely column not found; invalid fxn call - print("ERROR (mdlUtils.do_stdScaler): Provider col not found") - - try: - X = X.drop(columns=['PotentialFraud'], axis=1) - except KeyError: - #--- likely column not found; invalid fxn call - if (blnIsTrain): print("ERROR (mdlUtils.do_stdScaler): Potential Fraud col not found") - - - #--- apply std scaler - #--- WARN: scaling is also grouped by provider - #print("INFO (mdlUtils.doClaims_stdScaler) cols: ", X.columns) - X_std = fitClaims_txfStdScaler(X, blnIsTrain) - return X_std - - - -def doProviders_stdScaler_toPdf(npaScaled): - #--- NOTE: the list of cols came from doProvider_stdScaler; print(X.columns) - aryCols = ['InscClaimAmtReimbursed', 'DeductibleAmtPaid', 'AdmittedDays', - 'NoOfMonths_PartACov', 'NoOfMonths_PartBCov', 'ChronicCond_Alzheimer', - 'ChronicCond_Heartfailure', 'ChronicCond_KidneyDisease', - 'ChronicCond_Cancer', 'ChronicCond_ObstrPulmonary', - 'ChronicCond_Depression', 'ChronicCond_Diabetes', - 'ChronicCond_IschemicHeart', 'ChronicCond_Osteoporasis', - 'ChronicCond_rheumatoidarthritis', 'ChronicCond_stroke', - 'IPAnnualReimbursementAmt', 'IPAnnualDeductibleAmt', - 'OPAnnualReimbursementAmt', 'OPAnnualDeductibleAmt', 'Age', 'DeadOrNot', - 'Gender_2', 'Race_2', 'Race_3', 'Race_5', - 'ClaimReimbursement_ProviderAvg', - 'ClaimReimbursement_AttendingPhysician', - 'ClaimReimbursement_OperatingPhysician', - 'DeductibleAmtPaid_ProviderAvg', 'DeductibleAmtPaid_AttendingPhysician', - 'DeductibleAmtPaid_OperatingPhysician'] - - #npaScaled = do_stdScaler(pdfFeatEng) - pdfScaled = pd.DataFrame(npaScaled, columns=aryCols) - return pdfScaled - - - -def doClaims_stdScaler_toPdf(npaScaled): - #--- NOTE: the list of cols came from doClaims_stdScaler; print(X.columns) - aryCols = ['InscClaimAmtReimbursed', 'DeductibleAmtPaid', 'AdmittedDays', - 'RenalDiseaseIndicator', 'NoOfMonths_PartACov', 'NoOfMonths_PartBCov', 'ChronicCond_Alzheimer', - 'ChronicCond_Heartfailure', 'ChronicCond_KidneyDisease', - 'ChronicCond_Cancer', 'ChronicCond_ObstrPulmonary', - 'ChronicCond_Depression', 'ChronicCond_Diabetes', - 'ChronicCond_IschemicHeart', 'ChronicCond_Osteoporasis', - 'ChronicCond_rheumatoidarthritis', 'ChronicCond_stroke', - 'IPAnnualReimbursementAmt', 'IPAnnualDeductibleAmt', - 'OPAnnualReimbursementAmt', 'OPAnnualDeductibleAmt', 'Age', 'DeadOrNot', - 'Gender_2', 'Race_2', 'Race_3', 'Race_5'] - - #npaScaled = do_stdScaler(pdfFeatEng) - pdfScaled = pd.DataFrame(npaScaled, columns=aryCols) - return pdfScaled - - - - -def fitClaims_stdScaler(pdfData, blnIsTrain=False): - #--- apply scaler - #--- WARN: scaling is not grouped by provider - from sklearn.preprocessing import StandardScaler - - #--- note: this is a numpy.ndarray - #--- we need to fit the scaler, and then save as a pkl file - #strScalerPath = m_kstrScalerPath_claims - strScalerPath = m_kstrScalerPath_claims -# strScalerPath = m_kstrBinModelPath + "stdClaims_scaler_colab.pkl" - if (m_blnTraceOn): print("INFO (lib.model.fitClaims_stdScalar): ", strScalerPath) - if (blnIsTrain): - scaler = StandardScaler() - sclFit = scaler.fit(pdfData) - #--- if we train locally; write out to gbc_scalar.pkl - #--- we do not want to overwrite the colab version used for test - strScalerPath = m_kstrBinModelPath + "stdClaims_scaler.pkl" - if (m_blnTraceOn): print("INFO (lib.model.fit_stdScalar) Using local pkl for Train: ", strScalerPath) - with open(strScalerPath, 'wb') as filPkl: - pickle.dump(sclFit, filPkl) - else: - #--- we need to load the pkl file - import sklearn - if (m_blnTraceOn): print("INFO (lib.model.fit_stdScalar) Using colab pkl for Test: ", strScalerPath) - with open(strScalerPath, 'rb') as filPkl: - sclFit = pickle.load(filPkl) - if (m_blnTraceOn): print("TRACE (libModel.fitClaims_stdScalar) sclFit.type: ", type(sclFit)) - - #--- testing - scaler = StandardScaler() - if (m_blnTraceOn): print("TRACE (libModel.fitClaims_stdScalar) StdScaler.version: ", scaler.__getstate__()['_sklearn_version']) - if (m_blnTraceOn): print("TRACE (libModel.fitClaims_stdScalar) sclFit.version: " , sclFit.__getstate__()['_sklearn_version']) - if (m_blnTraceOn): print("TRACE (libModel.fitClaims_stdScalar) sklearn.version: " , sklearn.__version__) - return sclFit - - - -def fitProviders_stdScaler(pdfData, blnIsTrain=False): - #--- apply scaler - #--- WARN: scaling is also grouped by provider - from sklearn.preprocessing import StandardScaler - - #--- note: this is a numpy.ndarray - #--- we need to fit the scaler, and then save as a pkl file - #strScalerPath = m_kstrScalerPath_providers - #strScalerPath = m_kstrScalerPath_providers_train - strScalerPath = m_kstrScalerPath_prov - print("INFO (libModel.fitProviders_stdScalar): ", strScalerPath) - if (blnIsTrain): - scaler = StandardScaler() - sclFit = scaler.fit(pdfData) - #--- if we train locally; write out to gbc_scalar.pkl - #--- we do not want to overwrite the colab version used for test - strScalerPath = m_kstrScalerPath_providers_train #--- works for provider training - if (m_blnTraceOn): print("TRACE (libModel.fitProviders_stdScalar) Using local pkl for Train: ", strScalerPath) - with open(strScalerPath, 'wb') as filPkl: - pickle.dump(sclFit, filPkl) - else: - #--- we need to load the pkl file - if (m_blnTraceOn): print("TRACE (libModel.fitProviders_stdScalar) Using colab pkl for Test: ", strScalerPath) - with open(strScalerPath, 'rb') as filPkl: - sclFit = pickle.load(filPkl) - if (m_blnTraceOn): print("TRACE (libModel.fitProviders_stdScalar) sclFit.type: ", type(sclFit)) - return sclFit - - - -def fitProviders_stdScalerSuperv(pdfData, blnIsTrain=False): - #--- apply scaler - #--- WARN: scaling is also grouped by provider - from sklearn.preprocessing import StandardScaler - - #--- note: this is a numpy.ndarray - #--- we need to fit the scaler, and then save as a pkl file - strScalerPath = m_kstrScalerPath_prov - if (m_blnTraceOn): print("TRACE (libModel.fitProviders_stdScalar): ", strScalerPath) - if (blnIsTrain): - scaler = StandardScaler() - sclFit = scaler.fit(pdfData) - #--- if we train locally; write out to gbc_scalar.pkl - #--- we do not want to overwrite the colab version used for test - strScalerPath = m_kstrBinModelPath + "stdProvider_scaler.pkl" - if (m_blnTraceOn): print("TRACE (libModel.fitProviders_stdScalar) Using local pkl for Train: ", strScalerPath) - with open(strScalerPath, 'wb') as filPkl: - pickle.dump(sclFit, filPkl) - else: - #--- we need to load the pkl file - if (m_blnTraceOn): print("TRACE (libModel.fitProviders_stdScalar) Using colab pkl for Test: ", strScalerPath) - with open(strScalerPath, 'rb') as filPkl: - sclFit = pickle.load(filPkl) - if (m_blnTraceOn): print("TRACE (libModel.fitProviders_stdScalar) sclFit.type: ", type(sclFit)) - return sclFit - - - -def fitProviders_txfStdScaler(pdfData, blnIsTrain=False): - from sklearn.preprocessing import StandardScaler - sclFit = fitProviders_stdScaler(pdfData, blnIsTrain) - X_std = sclFit.transform(pdfData) - return X_std - - - -def fitClaims_txfStdScaler(pdfData, blnIsTrain=False): - from sklearn.preprocessing import StandardScaler - sclFit = fitClaims_stdScaler(pdfData, blnIsTrain) - - - X_std = sclFit.transform(pdfData) - return X_std \ No newline at end of file diff --git a/lib/models/mdl_xgb.py b/lib/models/mdl_xgb.py deleted file mode 100644 index 0f8a78a9aa6f103da2003046ae118082c69fcb6b..0000000000000000000000000000000000000000 --- a/lib/models/mdl_xgb.py +++ /dev/null @@ -1,66 +0,0 @@ -import pandas as pd -from sklearn.ensemble import GradientBoostingClassifier -import lib.utils as libPaths -import pickle -import sys - - -m_kstrFile = __file__ -m_kstrDataPath = libPaths.pth_data -m_kstrBinModelPath = libPaths.pth_binModels -m_kstrModelPath_gbc = m_kstrBinModelPath + 'gbc_model_colab.pkl' -m_kstrModelPath_prov111 = m_kstrBinModelPath + 'prov_gbc_v1.1.1_32cols.pkl' #--- ERROR: __randomstate_ctor() takes from 0 to 1 positional arguments but 2 were given -m_kstrModelPath_prov121 = m_kstrBinModelPath + 'prov_gbc_v1.2.1_32cols.pkl' -m_kstrModelPath_prov_py3816_sk111hp = m_kstrBinModelPath + 'prov_gbc_py3816_sk111hp_32cols.pkl' -m_kstrModelPath = m_kstrModelPath_prov_py3816_sk111hp - -m_blnTraceOn = True - - - -#--- Supervised: xg boost; gradient boosting classifier -def load_fromPkl(): - try: - with open(m_kstrModelPath, 'rb') as filPkl: - mdlAnoms = pickle.load(filPkl) - return mdlAnoms - - except: - e = sys.exc_info() - print("ERROR (mdl_xgb.load_fromPkl_genError): ", e) - - - -def save_toPkl(mdlAnoms): - with open(m_kstrModelPath, 'wb') as filPkl: - pickle.dump(mdlAnoms, filPkl) - return mdlAnoms - - - -def predict(npaData): - - try: - #--- input: numpy.ndarray of feature eng, and scaled data - mdlAnoms = load_fromPkl() - if (m_blnTraceOn): print("TRACE (mdl_xgb.predict): data loaded ... ") - npaPredict = mdlAnoms.predict(npaData) - - except: - e = sys.exc_info() - print("ERROR (mdl_xgb.predict_genError1): ", e) - - - #--- AttributeError: 'GradientBoostingClassifier' object has no attribute '_loss' - #--- version of scikit-learn? Monika: ?.?.? ; Iain: 1.2.0 - - #print("INFO (type.npaPredict): ", type(npaPredict)) - #if (m_blnTraceOn): print("TRACE (mdl_xgb.predict) npaPredict.shape: ", npaPredict.shape) - return npaPredict - - -def train(pdfTrainData): - mdlAnoms = GradientBoostingClassifier() - mdlAnoms.fit(pdfTrainData.values) - save_toPkl(mdlAnoms) - return mdlAnoms diff --git a/lib/utils.py b/lib/utils.py deleted file mode 100644 index 14be94365bfe707a174d2dde194cde38af3fa657..0000000000000000000000000000000000000000 --- a/lib/utils.py +++ /dev/null @@ -1,38 +0,0 @@ -#--- notes: -# - this file is loaded by fastapi and streamlit, so keep it independant of those libs -# - all path are relative to the appl working folder: the parent of the lib folder; ie ..\.. to this file - -from pathlib import Path - -pth_pwd = Path(__file__).resolve().parent #--- should be \lib -pth_appRoot = pth_pwd.parent #--- .. - -pth_root = str(pth_appRoot) + "/" - -pth_bin = pth_root + "bin/" -pth_data = pth_root + "data/" -pth_lib = pth_root + "lib/" -pth_routes = pth_root + "routes/" -pth_templ = pth_root + "templ/" -pth_uix = pth_root + "uix/" - -#--- bin paths -pth_binImages = pth_bin + "images/" -pth_binModels = pth_bin + "models/" - -#--- data paths -pth_dtaApp = pth_data #--- working folders for app data; for docker, should be mapped to local host mount -pth_dtaDemoTiles = pth_data + "demo_tiles/" #--- dedicated area for demo data -pth_dtaTiles = pth_data + "tiles/" -pth_dtaWsi = pth_data + "wsi/" -pth_dtaTileSamples = pth_dtaDemoTiles + "raw/sample/" - -#--- lib paths -pth_libModels = pth_lib + "models/" - -#--- route paths -pth_rteApi = pth_routes + "api/" -pth_rteQa = pth_routes + "qa/" - -m_klngMaxRecords = 100 -m_klngSampleSize = 25 diff --git a/main.py b/main.py deleted file mode 100644 index 6440f2638d704b85fb89d1bfbb0fd440152f613d..0000000000000000000000000000000000000000 --- a/main.py +++ /dev/null @@ -1,92 +0,0 @@ -''' - purpose: fastAPI routing -''' - -from fastapi import FastAPI -from fastapi.responses import HTMLResponse -from fastapi import APIRouter, Request, Response -from fastapi.templating import Jinja2Templates -import uvicorn - -#--- import custom libraries -import lib.utils as libUtils - - -#--- imported route handlers -from routes.api.rte_api import rteApi -from routes.api.rte_wsi import rteWsi -from routes.api.rte_tiles import rteTiles - - -#--- fastAPI self doc descriptors -description = """ - Omdena Saudi Arabia: Liver Cancer HCC Diagnosis with XAI - - - - ## key business benefit #1 - ## key business benefit #2 - ## key business benefit #3 - - You will be able to: - * key feature #1 - * key feature #2 - * key feature #3 -""" - -app = FastAPI( - title="App: Omdena Saudi Arabia - Liver Cancer HCC Diagnosis with XAI", - description=description, - version="0.0.1", - terms_of_service="http://example.com/terms/", - contact={ - "name": "Iain McKone", - "email": "iain.mckone@gmail.com", - }, - license_info={ - "name": "Apache 2.0", - "url": "https://www.apache.org/licenses/LICENSE-2.0.html", - }, -) - - -#--- configure route handlers -app.include_router(rteWsi, prefix="/api/wsi") -app.include_router(rteTiles, prefix="/api/tiles") -app.include_router(rteApi, prefix="/api") - -#app.include_router(rteQa, prefix="/qa") - - -m_kstrPath_templ = libUtils.pth_templ -m_templRef = Jinja2Templates(directory=str(m_kstrPath_templ)) - - -def get_jinja2Templ(request: Request, pdfResults, strParamTitle, lngNumRecords, blnIsTrain=False, blnIsSample=False): - lngNumRecords = min(lngNumRecords, libUtils.m_klngMaxRecords) - if (blnIsTrain): strParamTitle = strParamTitle + " - Training Data" - if (not blnIsTrain): strParamTitle = strParamTitle + " - Test Data" - if (blnIsSample): lngNumRecords = libUtils.m_klngSampleSize - strParamTitle = strParamTitle + " - max " + str(lngNumRecords) + " rows" - - kstrTempl = 'templ_showDataframe.html' - jsonContext = {'request': request, - 'paramTitle': strParamTitle, - 'paramDataframe': pdfResults.sample(lngNumRecords).to_html(classes='table table-striped') - } - result = m_templRef.TemplateResponse(kstrTempl, jsonContext) - return result - - -#--- get main ui/ux entry point -@app.get('/') -def index(): - return { - "message": "Landing page: Omdena Saudi Arabia - Liver HCC Diagnosis with XAI" - } - - - -if __name__ == '__main__': - uvicorn.run("main:app", host="0.0.0.0", port=49300, reload=True) -#CMD ["uvicorn", "main:app", "--host=0.0.0.0", "--reload"] diff --git a/packages.txt b/packages.txt deleted file mode 100644 index 4649f711d1c528342fa2dc4bd39ab6730af6dbde..0000000000000000000000000000000000000000 --- a/packages.txt +++ /dev/null @@ -1,3 +0,0 @@ -ffmpeg -libsm6 -libxext6 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 82b446d173e1ce7da91666b82458c5f1e96b1ab9..0000000000000000000000000000000000000000 --- a/requirements.txt +++ /dev/null @@ -1,87 +0,0 @@ -#--- 20240625: attempt to set the minimum version based on a working v0.1.2 -#--- 20230530: commented out all secondary packages as they were causing the huggingfaceSpace to fail - -altair<=5.0.1 -anyio<=3.7.1 -attrs<=23.1.0 -#backports.zoneinfo==0.2.1 - -#blinker==1.6.2 -#cachetools==5.3.0 -#certifi==2023.5.7 -#charset-normalizer==3.1.0 -#click==8.1.3 - -#decorator==5.1.1 -#entrypoints==0.4 - -fastapi<=0.95.2 - -#gitdb==4.0.10 -#git-lfs - -#GitPython==3.1.31 -grad-cam<=1.4.8 -#h11==0.14.0 -#idna==3.4 -#importlib-metadata==6.6.0 -#importlib-resources==5.12.0 -#ipython-genutils==0.2.0 - -Jinja2<=3.1.2 -joblib<=1.2.0 -jsonschema<=4.17.3 - -#markdown-it-py==2.2.0 -#MarkupSafe==2.1.2 - -matplotlib<=3.7.2 - -#mdurl==0.1.2 - -numpy<=1.24.3 -#packaging==23.1 - -pandas<=1.5.3 -#Pillow==9.5.0 -pip<=22.3.1 -#pkgutil_resolve_name==1.3.10 - -plotly<=5.14.1 -#protobuf==4.23.3 -#pyarrow==12.0.1 -#pydantic==1.10.11 -#pydeck==0.8.1b0 -#Pygments==2.15.1 -#Pympler==1.0.1 -#pyrsistent==0.19.3 -#python-dateutil==2.8.2 -#pytz==2023.3 -#PyYAML==6.0 -#requests==2.31.0 -#rich==13.4.2 - -scikit-learn==1.1.1 -#scipy<=1.11.1 -#six<=1.16.0 -#smmap==5.0.0 -#sniffio==1.3.0 -#starlette==0.27.0 -streamlit==1.24.0 - -#tenacity==8.2.2 -#threadpoolctl==3.1.0 -#toml==0.10.2 -#toolz==0.12.0 - -torch<=2.0.1 -torchvision<=0.15.2 -#tornado==6.3.2 - -#typing_extensions==4.6.2 -#tzlocal==5.0.1 -#urllib3==2.0.2 -uvicorn<=0.22.0 -#validators==0.20.0 -#watchdog==3.0.0 -#zipp==3.15.0 diff --git a/routes/__init__.py b/routes/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/routes/api/__init__.py b/routes/api/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/routes/api/rte_api.py b/routes/api/rte_api.py deleted file mode 100644 index c74d0041bbe645968740a8755b7d34fb142e1b30..0000000000000000000000000000000000000000 --- a/routes/api/rte_api.py +++ /dev/null @@ -1,79 +0,0 @@ -from fastapi import APIRouter, Request, Response -from fastapi.responses import JSONResponse - -import pandas as pd -import json - -#import lib.claims as libClaims -#from lib.models import mdl_utils, mdl_xgb - - -rteApi = APIRouter() - - -#--- -@rteApi.get('/') -def api_entry(): - return { - "message": "api routing - welcome to Omdena Saudi HCC api" - } - - - -''' -#--- >>> SAMPLE CODE BELOW -#--- return json for claims data (merged) -#--- note: current is kaggle, but future could include from yyyymm filter -@rteApi.get('/claims', response_class = JSONResponse) -def api_getClaims(request: Request, response: Response): - pdfClaims = libClaims.load_claims() - jsonSample = pdfClaims.head(50).to_json(orient="records", indent=4) - result = json.loads(jsonSample) - return result - - -#--- return json for featEng -@rteApi.get('/claims/doFeatEng/', response_class = JSONResponse) -def tst_claims_featEng(): - pdfClaims = libClaims.load_claims() - pdfFeatEng = libClaims.do_featEng(pdfClaims) - jsonSample = pdfClaims.head(50).to_json(orient="records", indent=4) - result = json.loads(jsonSample) - return result - - -@rteApi.get('/claims/doStdScaling/', response_class = JSONResponse) -def tst_claims_stdScaling(): - pdfClaims = libClaims.load_claims() - pdfFeatEng = libClaims.do_featEng(pdfClaims) - pdfScaled = mdl_utils.doClaims_stdScaler_toPdf(pdfFeatEng) - - jsonSample = pdfClaims.head(50).to_json(orient="records", indent=4) - result = json.loads(jsonSample) - return result - - -@rteApi.get('/claims/predict/superv', response_class = JSONResponse) -@rteApi.get('/claims/predict/xgb', response_class = JSONResponse) -def predict_xgb(): - #--- load test data - pdfClaims = libClaims.load_claims() - pdfFeatEng = libClaims.do_featEng(pdfClaims) - - npaScaled = mdl_utils.do_stdScaler(pdfFeatEng) - pdfScaled = mdl_utils.do_stdScaler_toPdf(npaScaled) - - ndaPredict = mdl_xgb.predict(npaScaled) - pdfPredict = pd.DataFrame(ndaPredict) - - #--- stitch the grouped data with the labels - pdfResults = pdfScaled.copy() - pdfResults.insert(0, "hasAnom?", pdfPredict[0]) - - #--- filter to only those rows that are flagged with an anomaly - pdfResults = pdfResults[pdfResults['hasAnom?'] > 0] - - jsonSample = pdfResults.head(50).to_json(orient="records", indent=4) - result = json.loads(jsonSample) - return result -''' \ No newline at end of file diff --git a/routes/api/rte_tiles.py b/routes/api/rte_tiles.py deleted file mode 100644 index 5d1b2349f815a91a043ff9b0cd804a74a7b99744..0000000000000000000000000000000000000000 --- a/routes/api/rte_tiles.py +++ /dev/null @@ -1,198 +0,0 @@ -from fastapi import APIRouter, Request, Response -from fastapi.responses import HTMLResponse -import numpy as np -import cv2 -import os - - -import main as libMain -from lib import utils as libUtils - - -m_kstrFile = __file__ -m_blnTraceOn = True - -m_kstrPath_templ = libUtils.pth_templ - - -rteTiles = APIRouter() - - -#--- -@rteTiles.get('/') -def api_tiles(): - return { - "message": "tiles api endpoint - welcome to the endpoint for tile image processing" - } - - -#--- -@rteTiles.get('/raw/upload') -def api_tilesRawUpload(): - ''' - process an array of uploaded raw Tiles (from external app path) - - cleanup all old raw images in /data/tiles/raw - - save uploads to /data/tiles/raw - - create tile class obj; capture file path, size, zoomMagnif, etc - - create array of tile class objs - - return(s) json - - ack tile/raw uploads with info/attribs - ''' - return { - "message": "tilesRawUpload endpoint - file processing of raw tile images" - } - - -@rteTiles.get('/raw/norm') -def api_tilesRawNormalize(strPthTile): - ''' - process an array of uploaded raw Tiles (from internal app path) - - cleanup all old norm images in /data/tiles/norm - - process tile normalization ops - - save norm tiles to /data/tiles/norm - - create tile class obj; capture file path, size, zoomMagnif, etc - - return(s) json - - ack tile/norms with info/attribs - ''' - #--- get file attributes - strFilPath, strFilName = os.path.split(strPthTile) - strPthRaw = strPthTile - - #--- load the tile as a binary object - with open(strPthRaw,"rb") as filRaw: - imgRaw = filRaw.read() - - #--- Resize Tiles to 256x256 - #--- Note: imgTile is a buffer object. - aryNp = np.frombuffer(imgRaw, np.uint8) - imgTemp = cv2.imdecode(aryNp, cv2.IMREAD_COLOR) - imgResized = cv2.resize(imgTemp, (256, 256)) - - #--- save the normalized file - imgNorm = imgResized - strPthNorm = "data/tiles/norm", strFilName - with open(os.path.join(strPthNorm),"wb") as filNorm: - filNorm.write(imgResized.buffer) - return strPthNorm - """ return { - "message": "tileRawNorm endpoint - normalization of raw tile images" - } - """ - -@rteTiles.get('/norm/upload') -def api_tilesNormUpload(): - ''' - process an array of uploaded norm Tiles (from external app path) - - cleanup all old norm images in /data/tiles/norm - - save uploads to /data/tiles/norm - - create tile class obj; capture file path, size, zoomMagnif, etc - - create array of tile class objs - - return(s) json - - ack tile/norm uploads with info/attribs - ''' - return { - "message": "tilesNormUpload endpoint - file processing of norm tile images" - } - - -@rteTiles.get('/norm/preprocess') -def api_tilesNormPreprocess(): - ''' - preprocess an array of uploaded norm Tiles (from internal app path) - - perform remaining pre-processing of tiles prior to model prediction - - cleanup all old preproc images in /data/tiles/preproc - - save preproc tiles to /data/tiles/preproc - - create tile class obj; capture file path, size, zoomMagnif, etc - - return(s) json - - ack tile/preproc with info/attribs - ''' - return { - "message": "tileNormPreprocess endpoint - preprocessing of normalized tile images" - } - - -@rteTiles.get('/preproc/upload') -def api_tilesPreprocUpload(): - ''' - process an array of uploaded preprocessed Tiles (from external app path) - - cleanup all old preproc images in /data/tiles/preproc - - save uploads to /data/tiles/preproc - - create tile class obj; capture file path, size, zoomMagnif, etc - - create array of tile class objs - - return(s) json - - ack tile/preproc uploads with info/attribs - ''' - return { - "message": "tilesPreprocUpload endpoint - manage upload of preprocessed tile images, in prep for modelling/prdictions" - } - - -@rteTiles.get('/preproc/augment') -def api_tilesPreprocAugment(): - ''' - process an array of uploaded preprocessed tiles (from internal app path) - - cleanup all old augmented tiles in /data/tiles/augm - - perform augments of tiles prior to model prediction (translation, rotation, transforms) - - save augmented tiles to /data/tiles/augm - - create tile class obj; capture file path, size, zoomMagnif, etc - - return(s) json - - ack tile/augm with info/attribs - ''' - return { - "message": "tilePreprocAugment endpoint - augment tile images" - } - - -@rteTiles.get('/augm/upload') -def api_tilesAugmUpload(): - ''' - process an array of augmented tiles (from external app path) - - cleanup all old augm images in /data/tiles/augm - - save uploads to /data/tiles/augm - - create tile class obj; capture file path, size, zoomMagnif, etc - - create array of tile class objs - - return(s) json - - ack tile/augm uploads with info/attribs - ''' - return { - "message": "tilesAugmUpload endpoint - manage upload of augmented tile images, in prep for modelling/predictions" - } - - -#--- -@rteTiles.get('/raw/predict') -def api_tileRawPredict(): - return { - "message": "tile_rawPredict api endpoint - welcome to the endpoint for tile predictions" - } - - -#--- -@rteTiles.get('/norm/segment') -def api_tileNormPredict(): - return { - "message": "tile_normPredict api endpoint - welcome to the endpoint for tile predictions" - } - -#--- -@rteTiles.get('/norm/predict') -def api_tileNormPredict(): - return { - "message": "tile_normPredict api endpoint - welcome to the endpoint for tile predictions" - } - - -#--- -@rteTiles.get('/preproc/predict') -def api_tilePreprocPredict(): - return { - "message": "tile_preprocPredict api endpoint - welcome to the endpoint for tile predictions" - } - - -#--- -@rteTiles.get('/augm/predict') -def api_tileAugmPredict(): - return { - "message": "tile_augmPredict api endpoint - welcome to the endpoint for tile predictions" - } \ No newline at end of file diff --git a/routes/api/rte_wsi.py b/routes/api/rte_wsi.py deleted file mode 100644 index 1176222931bc5fb05190a6770eeab3477421f32d..0000000000000000000000000000000000000000 --- a/routes/api/rte_wsi.py +++ /dev/null @@ -1,56 +0,0 @@ -from fastapi import APIRouter, Request, Response -from fastapi.responses import HTMLResponse - - -import main as libMain -from lib import utils as libUtils - - -m_kstrFile = __file__ -m_blnTraceOn = True - -m_kstrPath_templ = libUtils.pth_templ - - -rteWsi = APIRouter() - - -#--- -@rteWsi.get('/') -def api_wsi(): - return { - "message": "wsi api endpoint - welcome to the endpoint for wsi image processing" - } - - -#--- -@rteWsi.get('/upload') -def api_wsiUpload(): - ''' - process a single uploaded WSI image (from external app path) - - cleanup all old WSI images in /data/wsi/raw - - save upload to /data/wsi/raw - - create wsi class obj; capture file path, size, zoomMagnif, etc - - return(s) json - - ack wsi upload with info/attribs - ''' - return { - "message": "wsiUpload endpoint - file processing of one uploaded wsi image" - } - - -#--- -@rteWsi.get('/chunk') -def api_wsiChunk(): - ''' - process a single WSI image (from internal app path) - - create wsi class obj; capture file path, size, zoomMagnif, etc - - kick off tile chunking process; - - save tiles to /data/tiles/raw - - return(s) json - - ack wsi upload with info/attribs - - ack of tiles created: total count; names, paths, attribs (dimensions) - ''' - return { - "message": "wsiLoad endpoint - for chunking of wsi image to one or more tiles" - } diff --git a/routes/qa/__init__.py b/routes/qa/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/routes/qa/rte_qa.py b/routes/qa/rte_qa.py deleted file mode 100644 index 479196c9d05cc50ed68d82cc0ca588a2cec88285..0000000000000000000000000000000000000000 --- a/routes/qa/rte_qa.py +++ /dev/null @@ -1,17 +0,0 @@ -from fastapi import APIRouter - - -m_kstrFile = __file__ -m_blnTraceOn = True - - -rteQa = APIRouter() - - -@rteQa.get('/') -@rteQa.get('/verif') -@rteQa.get('/valid') -def qa_entry(): - return { - "message": "qa routing - welcome to Omdena Saudi HCC qa" - } diff --git a/scripts/docker/util_docker_preRun.sh b/scripts/docker/util_docker_preRun.sh deleted file mode 100755 index 3232f6f220154ca378fc5f39cc441b62b909704e..0000000000000000000000000000000000000000 --- a/scripts/docker/util_docker_preRun.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -#--- Note: this file is designed to run locally as well as within docker to prep the environment -#--- Entry: this script is assumed to run from the /app root folder -#--- Usage: ./scripts/docker/util_docker_preRun.sh - -#--- for volume initialization; ensure folders are in place; assume: we are in the /app folder - - -< /dev/null && pwd ) -strpth_scrHome="${strpth_scriptLoc}/../" -strpth_appHome="${strpth_scrHome}../" -strpth_scrModels="${strpth_scrHome}models/" - -echo "strpth_appHome = ${strpth_appHome}" - -#--- recreate single model file from its parts, stored within a specific model version folder -strpth_binModels="${strpth_appHome}bin/models/" -echo "strpth_binModels = ${strpth_binModels}" -#$("'${strpth_scrModels}util_joinModel.sh' '${strpth_binModels}deeplabv3*vhflip30/model_a*' '${strpth_binModels}model.pth'") -#---eval "'${strpth_scrModels}/util_joinModel.sh' '${strpth_binModels}/deeplabv3*vhflip30/model_a*' '${strpth_binModels}/model.pth'" - -#--- run streamlit/fastapi -eval "'${strpth_scrHome}/streamlitFastApi/util_local_runStreamlitFastApi.sh'" \ No newline at end of file diff --git a/scripts/docker/util_local_buildDockerDemo.sh b/scripts/docker/util_local_buildDockerDemo.sh deleted file mode 100755 index 8d315e80472f3cf778460a5e73d8ba21212a4cce..0000000000000000000000000000000000000000 --- a/scripts/docker/util_local_buildDockerDemo.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/bash - -#--- Note: this file is designed to run locally to build the docker image -#--- Entry: this script is assumed to run from the /app root folder -#--- Usage: ./scripts/docker/util_local_buildDockerDemo.sh -#--- Assume: any associated containers are not running - -< img_stm_omdenasaudi_hcc:demo -> ctr_stm_omdenasaudi_hcc:demo -blockComment - - -#--- initialize/configuration -echo "TRACE: Initializing ..." -kstr_defDkrHubId="kidcoconut73" -kstr_defDkrImageName="img_stm_omdenasaudi_hcc" -kstr_defDkrTagVersion="0.1.3" -kstr_defDkrTagStage="demo" - -strpth_pwd=$(pwd) -strpth_scriptLoc=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -strpth_scrHome="${strpth_scriptLoc}/../" -strpth_appHome="${strpth_scrHome}/../" - -#--- declarations -echo "TRACE: Declarations ..." - -#strUtl_scriptLoc="$(utl_getScriptLoc)" -source ${strpth_scrHome}/util.sh - -#kstr_dkrImg="kidcoconut73/img_stm_omdenasaudi_hcc:demo" -#kstr_dkrCtr="kidcoconut73/ctr_stm_omdenasaudi_hcc:demo" -kstr_dkrHubImg="${kstr_defDkrHubId}/${kstr_defDkrImageName}:${kstr_defDkrTagStage}" -kstr_dkrImg="${kstr_defDkrImageName}:${kstr_defDkrTagVersion}" -kstr_dkrCtr="${kstr_dkrImg/img_/ctr_}" #--- bash replace one occurrence - - - -function utl_trace_config { - #echo "" - local kbln_enableLog=true - utl_logInfo $kbln_enableLog 0 "(utl_trace_config) ... echo configuration" - - utl_trace_var "strpth_pwd" $strpth_pwd - utl_trace_var "strpth_scriptLoc" $strpth_scriptLoc - echo "" - utl_trace_var "kstr_defDkrHubId" $kstr_defDkrHubId - utl_trace_var "kstr_defDkrImageName" $kstr_defDkrImageName - utl_trace_var "kstr_defDkrTagVersion" $kstr_defDkrTagVersion - utl_trace_var "kstr_defDkrTagStage" $kstr_defDkrTagStage - echo "" - utl_trace_var "kstr_dkrHubImg" $kstr_dkrHubImg - utl_trace_var "kstr_dkrImg" $kstr_dkrImg - utl_trace_var "kstr_dkrCtr" $kstr_dkrCtr - echo "" -} - -#echo -e "\nTRACE: Echo config ..." -utl_trace_config - - -#--- to build/rebuild the image; make sure you stop and remove the container if you are replacing/upgrading; or change the version tag# from 0.1 -#--- stop the container if it is running -#--- delete container if it exists -echo -e "\nTRACE: Stop and remove container if it exists ..." -docker stop $kstr_dkrCtr -docker rm $kstr_dkrCtr - -#--- build the docker image -echo -e "\nTRACE: Build the docker image ..." -docker build -t $kstr_dkrImg . - - -#--- to tag the image prior to push to DockerHub; docker login and then register user/image:tag -#--- to push this image to DockerHub, example based on the repo: kidcoconut73/img_stm_omdenasaudi_hcc -# docker tag img_omdenasaudi_hcc:0.1 kidcoconut73/img_stm_omdenasaudi_hcc:demo -# docker tag img_omdenasaudi_hcc:0.1 kidcoconut73/img_stm_omdenasaudi_hcc:0.1 -#--- tag the image -echo -e "\nTRACE: Tag the image ..." -docker tag ${kstr_dkrImg} $kstr_dkrHubImg -docker tag ${kstr_dkrImg} "${kstr_defDkrHubId}/${kstr_defDkrImageName}:${kstr_defDkrTagVersion}" - - -#--- prereq: make sure you are signed in through Docker Desktop -#--- push the image to dockerHub -docker push kidcoconut73/img_stm_omdenasaudi_hcc:demo diff --git a/scripts/docker/util_local_runDockerDemo.sh b/scripts/docker/util_local_runDockerDemo.sh deleted file mode 100755 index 4af8a34d2c8ebff8cd64123fcda6ee736fc3b062..0000000000000000000000000000000000000000 --- a/scripts/docker/util_local_runDockerDemo.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -#--- Note: this file is designed to run locally to launch docker -#--- Entry: this script is assumed to run from the /app root folder -#--- Usage: ./scripts/util_local_runDockerDemo.sh -#--- Assume: docker image has been built; container is not running - -< Dockerfile -> util_dockerPreRun -> util_local_runStreamlitFastApi -blockComment - - -#--- initialize/config -kstr_defDkrHubId="kidcoconut73" -kstr_defDkrImageName="img_stm_omdenasaudi_hcc" -kstr_defDkrCtrName=${kstr_defDkrImageName/img_/ctr_} -kstr_defDkrTagVersion="0.1.3" -kstr_defDkrTagStage="demo" - -kstr_dkrImg="${kstr_defDkrImageName}:${kstr_defDkrTagVersion}" -kstr_dkrCtr="${kstr_defDkrImageName/img_/ctr_}" #--- bash replace one occurrence - -#--- stop the container if it is running -docker stop $kstr_dkrCtr - -#--- delete container if it exists -docker rm $kstr_dkrCtr - -#--- to run the container from the image; specific port mapping (-p) vs any available port mapping (-P) -# docker run -p 49400:39400 -p 49500:39500 --name ctr_stmOmdenaSaudiHcc -v ./data:/app/data img_stm_omdenasaudi_hcc:0.1 - -#--- run docker demo locally -docker run -p 49400:39400 -p 49500:39500 --name $kstr_dkrCtr -v ./data:/app/data $kstr_dkrImg diff --git a/scripts/huggingface/util_local_deploy_toHugSpace.sh b/scripts/huggingface/util_local_deploy_toHugSpace.sh new file mode 100755 index 0000000000000000000000000000000000000000..f52864c504bbd07a9f99c6c3dde611f31e70ca80 --- /dev/null +++ b/scripts/huggingface/util_local_deploy_toHugSpace.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +#--- Note: this file is designed to run locally to ready the deploy branch for hugspace +#--- Entry: this script is assumed to run from the /app root folder +#--- Usage: ./scripts/huggingface/util_local_deploy_toHugSpace.sh + +#--- Prereqs: +# - deploy_dockerLocal branch image and container has been tested and tagged +# - deploy_dockerHub branch has been merged from deploy_dockerLocal, tagged and pushed an image to kidcoconut73/img_stm_omdenasaudi_hcc: +# - deploy_hugspace branch has been merged from deploy_dockerLocal, readyToDeployScript run, and pushed to hugspace + +< -# - deploy_hugspace branch has been merged from deploy_dockerLocal, readyToDeployScript run, and pushed to hugspace - -< git - git: local/task-5-deployment -> omdena/deploy_hugspace_streamlit -> hugspace/main -blockComment - - -#--- initialize/configuration -echo "TRACE: Initializing ..." -kstr_hugspaceId="kidcoconut" - - -#--- git checkout deploy_hugspace_streamlit -#--- git merge task-5-deployment -#--- delete all unnecessary files -< /dev/null && pwd ) - - - #--- declarations - echo "TRACE: Declarations ..." - - #strUtl_scriptLoc="$(utl_getScriptLoc)" - source ${strpth_scriptLoc}/util.sh - - #kstr_dkrImg="kidcoconut73/img_stm_omdenasaudi_hcc:demo" - #kstr_dkrCtr="kidcoconut73/ctr_stm_omdenasaudi_hcc:demo" - kstr_dkrHubImg="${kstr_defDkrHubId}/${kstr_defDkrImageName}:${kstr_defDkrTagStage}" - kstr_dkrImg="${kstr_defDkrImageName}:${kstr_defDkrTagVersion}" - kstr_dkrCtr="${kstr_dkrImg/img_/ctr_}" #--- bash replace one occurrence - - - - function utl_trace_config () { - echo "" - utl_trace_var "strpth_pwd" $strpth_pwd - utl_trace_var "strpth_scriptLoc" $strpth_scriptLoc - echo "" - utl_trace_var "kstr_defDkrHubId" $kstr_defDkrHubId - utl_trace_var "kstr_defDkrImageName" $kstr_defDkrImageName - utl_trace_var "kstr_defDkrTagVersion" $kstr_defDkrTagVersion - utl_trace_var "kstr_defDkrTagStage" $kstr_defDkrTagStage - echo "" - utl_trace_var "kstr_dkrHubImg" $kstr_dkrHubImg - utl_trace_var "kstr_dkrImg" $kstr_dkrImg - utl_trace_var "kstr_dkrCtr" $kstr_dkrCtr - echo "" - } - - echo -e "\nTRACE: Echo config ...\n" - utl_trace_config - - - #--- to build/rebuild the image; make sure you stop and remove the container if you are replacing/upgrading; or change the version tag# from 0.1 - #--- stop the container if it is running - #--- delete container if it exists - echo -e "\nTRACE: Stop and remove container if it exists ..." - docker stop $kstr_dkrCtr - docker rm $kstr_dkrCtr - - #--- build the docker image - echo -e "\nTRACE: Build the docker image ..." - docker build -t $kstr_dkrImg . - - - #--- to tag the image prior to push to DockerHub; docker login and then register user/image:tag - #--- to push this image to DockerHub, example based on the repo: kidcoconut73/img_stm_omdenasaudi_hcc - # docker tag img_omdenasaudi_hcc:0.1 kidcoconut73/img_stm_omdenasaudi_hcc:demo - # docker tag img_omdenasaudi_hcc:0.1 kidcoconut73/img_stm_omdenasaudi_hcc:0.1 - #--- tag the image - echo -e "\nTRACE: Tag the image ..." - docker tag ${kstr_dkrImg} $kstr_dkrHubImg - docker tag ${kstr_dkrImg} "${kstr_defDkrHubId}/${kstr_defDkrImageName}:${kstr_defDkrTagVersion}" - - - #--- push the image to dockerHub - # docker push kidcoconut73/img_stm_omdenasaudi_hcc:demo -deadCode \ No newline at end of file diff --git a/scripts/models/util_joinModel.sh b/scripts/models/util_joinModel.sh deleted file mode 100755 index 9fb7d520c64fb333b7f669611272bacf1c18a963..0000000000000000000000000000000000000000 --- a/scripts/models/util_joinModel.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash - -< - - the first arg has to be wrapped in single quotes to ensure that bash does not expand wildcards -Prereqs: a model folder within bin/models; containing a split pyTorch model.pth as 1 or more model_nn files -Todo: get the parent folder name and use this as the name for the model file -blkHeader - -#--- dependencies -#none - - -#--- initialize/configuration -#--- $1: first arg; source pattern match; eg './bin/models/deeplabv3*vhflip30/model_a*'; Note that this is wildcarded so must be in quotes -#--- $n: last arg; dest model file; eg. ./bin/models/model.pth -echo -e "INFO(util_joinModel):\t Initializing ..." -strPth_patternMatch=$1 -if [ -z "$strPth_patternMatch" ]; then - echo "WARN: no args provided. Exiting script." - exit -fi - -strPth_filMatch=( $strPth_patternMatch ) #--- expand the pattern match; get the first value of the pattern match -strPth_parentFld=$(dirname $strPth_filMatch) #--- get the parent dir of the first file match -strPth_mdlFile=${@: -1} #--- Note: this gets the last arg; otherwise the 2nd arg would be an iteration of the 1st arg wildcard - -strpth_pwd=$(pwd) -strpth_scriptLoc=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -strpth_scrHome="${strpth_scriptLoc}/../" -strpth_appHome="${strpth_scrHome}/../" - -#echo "TRACE: strPth_patternMatch= $strPth_patternMatch" -#echo "TRACE: strPth_filMatch= $strPth_filMatch" -#echo "TRACE: strPth_parentFld= $strPth_parentFld" -#echo "TRACE: strPth_mdlFile= $strPth_mdlFile" - -#--- reconstitute model -#--- Note: cat command does not work with single-quote literals; do not reapply single quotes -#echo "cat ${strPth_patternMatch} > ${strPth_mdlFile}" -echo -e "INFO:\t Joining model binary ..." -cat ${strPth_patternMatch} > ${strPth_mdlFile} -echo -e "INFO:\t Done ...\n" \ No newline at end of file diff --git a/scripts/models/util_splitModel.sh b/scripts/models/util_splitModel.sh deleted file mode 100755 index 1d7f9dc8065629196aeb24ab97a161597fc2bc41..0000000000000000000000000000000000000000 --- a/scripts/models/util_splitModel.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -< - - the first arg has to be wrapped in single quotes to ensure that bash does not expand wildcards -Prereqs: a pytorch model file -Todo: get the parent folder name and use this as the name for the model file -blkHeader - -#--- dependencies -#none - - -#--- initialization/configuration -#--- $1: first arg; the source model file; eg ./bin/models/model.pth -#--- $n: last arg; dest model path; eg. ./test_model_folder -strPth_mdlFile=$1 -strPth_mdlFolder=$2 -strPrefix='/model_' - -if [ -z "$strPth_mdlFile" ] || [ -z "$strPth_mdlFolder" ]; then - echo "WARN: no args provided. Exiting script." - exit -fi - -strpth_pwd=$(pwd) -strpth_scriptLoc=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -strpth_scrHome="${strpth_scriptLoc}/../" -#strpth_ignHome="${strpth_scrHome}/../" -strpth_appHome="${strpth_scrHome}/../" - -#echo "TRACE: strPth_mdlFile= $strPth_mdlFile" -echo "TRACE: strPth_mdlFolder= $strPth_mdlFolder" - -#--- ensure the target dir exists -mkdir -p $strPth_mdlFolder - -#--- split the model into smaller chunks -echo "split -b 10M $strPth_mdlFile $strPth_mdlFolder$strPrefix" -split -b 10M $strPth_mdlFile $strPth_mdlFolder$strPrefix - -echo -e "INFO:\t Done ...\n" \ No newline at end of file diff --git a/scripts/streamlitFastApi/util_local_runStreamlitFastApi.sh b/scripts/streamlitFastApi/util_local_runStreamlitFastApi.sh deleted file mode 100755 index d949e9186f32b75fa91d810c23d63ddc19ad8ce5..0000000000000000000000000000000000000000 --- a/scripts/streamlitFastApi/util_local_runStreamlitFastApi.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -#--- Note: this file is designed to run locally and within docker to prep the environment -#--- Entry: this script is assumed to run from the /app root folder -#--- Usage: ./scripts/util_local_runStreamlitFastApi.sh -echo -e "INFO(util_local_runStreamlitFastApi):\t Initializing ..." - -#--- for volume initialization; ensure folders are in place; assume: we are in the /app folder -mkdir -p data/demo_tiles/raw -mkdir -p data/tiles/raw data/tiles/pred data/tiles/grad_bg data/tiles/grad_wt data/tiles/grad_vt -mkdir -p data/wsi/raw - -#--- for streamlit; external 49400; internal 39400 -echo "INFO: starting streamlit ..." -streamlit run app.py --server.port=39400 --server.maxUploadSize=2000 & - -#--- for fastapi; external 49500; internal 39500 -echo "INFO: starting fastapi ..." - -#--- uvicorn main:app --reload --workers 1 --host 0.0.0.0 --port 39500 & -uvicorn main:app --reload --workers 1 --host 0.0.0.0 --port 39500 - -#--- wait for any process to exit -#wait -n - -#--- Exit with status of process that exited first -#exit $? \ No newline at end of file diff --git a/templ/templ_results.html b/templ/templ_results.html deleted file mode 100644 index 869fc19b1478a53258a247255e17ee4ba6a72adb..0000000000000000000000000000000000000000 --- a/templ/templ_results.html +++ /dev/null @@ -1,4 +0,0 @@ - - - {{ dataframe | safe }} - \ No newline at end of file diff --git a/templ/templ_showDataframe.html b/templ/templ_showDataframe.html deleted file mode 100644 index 47ff53b5cfd8549e8f1ec797083aafa99f7eb4d7..0000000000000000000000000000000000000000 --- a/templ/templ_showDataframe.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - - Fourthbrain Capstone: Healthcare Anomalies - - - - -

{{ paramTitle }}:

- - - {{ paramDataframe | safe }} - - \ No newline at end of file diff --git a/uix/__init__.py b/uix/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/uix/lit_packages.py b/uix/lit_packages.py deleted file mode 100644 index 875b49858210da4b4e55f78a266ce92252085a9e..0000000000000000000000000000000000000000 --- a/uix/lit_packages.py +++ /dev/null @@ -1,31 +0,0 @@ -import importlib - - -#--- return a list of streamlit packages/pages to render -def packages(): - #--- - ary_pkg = [] - ary_pkg.extend(['lit_continentData', - 'lit_countryData' - ]) - return ary_pkg - - - -def get_aryPkgDescr(): - #--- load list of pages to display - aryDescr = [] - aryPkgs = [] - - aryModules = packages() - for modname in aryModules: - m = importlib.import_module('.'+ modname,'uix') - aryPkgs.append(m) - - #--- use the module description attribute if it exists - #--- otherwise use the module name - try: - aryDescr.append(m.description) - except: - aryDescr.append(modname) - return [aryDescr, aryPkgs] \ No newline at end of file diff --git a/uix/lit_sidebar.py b/uix/lit_sidebar.py deleted file mode 100644 index 4dcf6910f3c0e1688ce1d00ee4146df1ba81f536..0000000000000000000000000000000000000000 --- a/uix/lit_sidebar.py +++ /dev/null @@ -1,43 +0,0 @@ -import streamlit as st -import importlib -from uix import lit_packages - -from uix.pages import lit_home, lit_about, lit_diagnosis -from uix.pages import lit_qaConfigCheck - -m_kblnTraceOn=False - - -#--- alt define sidebar pages -m_aryPages = { - "Home": lit_home, #--- TODO: update - "Diagnosis: Single Tile": lit_diagnosis, - #"QA: File Check": lit_qaConfigCheck, - "About": lit_about -} - - -#--- define module-level vars -m_aryModNames = lit_packages.packages() -m_aryDescr = [] -m_aryMods = [] - - -def init(): - #--- upper panel - with st.sidebar: - kstrUrl_image = "bin/images/logo_omdena_saudi.png" - st.sidebar.image(kstrUrl_image, width=200) - - #--- get radio selection - strKey = st.sidebar.radio("rdoPageSel", list(m_aryPages.keys()), label_visibility="hidden") - pagSel = m_aryPages[strKey] - writePage(pagSel) - - -def writePage(uixFile): - #--- writes out the page for the selected combo - - # _reload_module(page) - uixFile.run() - diff --git a/uix/pages/__init__.py b/uix/pages/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/uix/pages/lit_about.py b/uix/pages/lit_about.py deleted file mode 100644 index 55451b9c2935c90b44a7e5ba9b44c8fbe055ebe1..0000000000000000000000000000000000000000 --- a/uix/pages/lit_about.py +++ /dev/null @@ -1,26 +0,0 @@ -#--- about page -import streamlit as st - -description = "About" -def run(): - - print("\nINFO (lit_about.run) loading ", description, " page ...") - - #--- - #st.experimental_memo.clear() #--- try to clear cache each time this page is hit - #st.cache_data.clear() - - #st.markdown('### About') - #st.markdown('### Omdena Saudi Arabia') - #st.markdown('### Detecting Liver Cancer from Histopathology WSI Using Deep Learning and Explainability') - #st.markdown('#### Dr. Shaista Hussain (Saudi Arabia Chapter Lead)') - #st.markdown('##### Deployment Lead: Iain McKone') - st.markdown('##### Project Url: https://github.com/OmdenaAI/saudi-arabia-histopathology-detection') - ''' - st.markdown( - """ - About page - """, - unsafe_allow_html=True, - ) - ''' \ No newline at end of file diff --git a/uix/pages/lit_diagnosis.py b/uix/pages/lit_diagnosis.py deleted file mode 100644 index 8f4efb2fa59413c3473a319fbad17a5a063baeb5..0000000000000000000000000000000000000000 --- a/uix/pages/lit_diagnosis.py +++ /dev/null @@ -1,536 +0,0 @@ -#--- anomaly detection - supervised page -import streamlit as st -import pandas as pd -import plotly.express as px -import plotly.graph_objects as go -import numpy as np - -import matplotlib.pyplot as plt -from PIL import Image -import torch -import torch.nn as nn -import torch.nn.functional as F -from torchvision.models.segmentation import deeplabv3_resnet50 -from torchvision.transforms.functional import to_tensor -from pytorch_grad_cam import GradCAM -from pytorch_grad_cam.utils.image import show_cam_on_image - -import lib.utils as libUtils - -import sys -import os, random, io - -description = "Diagnosis" -m_kblnTraceOn = True #--- enable/disable module level tracing - - -#--- model initializations -#data_batch_size = 3 #--- decrease the number of images loaded, processed if the notebook crashes due to limited RAM -#NUM_EPOCHS = 10 # 50 -#BATCH_SIZE = data_batch_size -NUM_CLASSES = 3 - -# path to save model weights -#BESTMODEL_PATH = r"model_deeplabv3_r50_full_training_dataset_80-20_split_10epochs_no-norm_vhflip30.pth" #--- path to save model weights -BESTMODEL_PATH = r"model.pth" -MODEL_FULLPATH = 'bin/models/' + BESTMODEL_PATH -model_path = MODEL_FULLPATH - -DEFAULT_DEVICE_TYPE = ('cuda' if torch.cuda.is_available() else 'cpu') #--- cuda if gpu; cpu if on Colab Free -DEFAULT_BACKBONE_MODEL = 'r50' -backbone_model_name = DEFAULT_BACKBONE_MODEL - - - -def image_toBytesIO(image: Image) -> bytes: - #--- BytesIO is a file-like buffer stored in memory - imgByteArr = io.BytesIO() - - #--- image.save expects a file-like as a argument - image.save(imgByteArr, format=image.format) - - return imgByteArr - - -def image_toByteArray(image: Image) -> bytes: - #--- convert image to bytesIO - imgByteArr = image_toBytesIO(image) - - #--- Turn the BytesIO object back into a bytes object - imgByteArr = imgByteArr.getvalue() - return imgByteArr - - -def run(): - #--- note: in python, you need to specify global scope for fxns to access module-level variables - global m_kbln_TraceOn - print("\nINFO (litDiagnosis.run) loading ", description, " page ...") - - - #--- page settings - if (m_kblnTraceOn): print("TRACE1 (litDiagnosis.run): Initialize Page Settings ...") - #st.header("Single Tile Diagnosis") - st.markdown("#### Single Tile Diagnosis") - - #--- allow the user to select a random sample - imgUploaded = None - if st.button("Random Sample"): - #--- get a random sample file - strPth_sample = libUtils.pth_dtaTileSamples - strFil_sample = random.choice(os.listdir(strPth_sample)) - strFullPth_sample = os.path.join(strPth_sample, strFil_sample) - - print("INFO (litDiagnosis.run): sample file selected ... ", strFullPth_sample) - - #--- display; convert file image to bytesIO - imgSample = Image.open(strFullPth_sample) - imgSample = image_toBytesIO(imgSample) - imgUploaded = imgSample - imgUploaded.name = strFil_sample - imgUploaded.type = os.path.splitext(strFil_sample)[1] - - - #--- provide file drag/drop capability - m_blnDisableDragDrop = False - #if(not m_blnDisableDragDrop): - #btnSave = st.button("Save") - imgDropped = st.file_uploader("Upload a single Tile", - type=["png", "jpg", "tif", "tiff", "img"], - accept_multiple_files=False ) - #m_blnDisableDragDrop = (imgDropped is None) - #--- - if (imgDropped is not None): - imgUploaded = imgDropped - - - if (imgUploaded is None): - if (m_kblnTraceOn): print("ERROR (litDiagnosis.run): imgUploaded is None ...") - else: - try: - #--- display uploaded file details - if (m_kblnTraceOn): print("TRACE1 (litDiagnosis.run): Print uploaded file details ...") - st.write("FileName:", "   ", imgUploaded.name) - st.write("FileType:", "   ", imgUploaded.type) - - #--- show: - #if (m_kblnTraceOn): print("TRACE (litDiagnosis.run): load WSI ...") - #if (m_blnDisableDragDrop): - #--- load wsi - # print("") - #else: - - #--- display diagnosis results ... format (vertical) - #showDiagnosis_vert(imgUploaded) - showDiagnosis_horiz(imgUploaded) - - except TypeError as e: - print("ERROR (litDiagnosis.run_typeError1): ", e) - - except: - e = sys.exc_info() - print("ERROR (litDiagnosis.run_genError1): ", e) - - - try: - - #--- display WSI - #showImg_wsi(img) - #st.image("bin/images/sample_wsi.png", use_column_width=True) - - print("") - - except TypeError as e: - print("ERROR (litDiagnosis.run_typeError2): ", e) - - except: - e = sys.exc_info() - print("ERROR (litDiagnosis.run_genError2): ", e) - - -def showImg_wsi(img): - print("") - - -def readyModel_getPreds(imgDropped): - print("TRACE: readyModel_getPreds ...") - print("INFO: save raw tile ...") - strPth_tilRaw = save_tilRaw(imgDropped) - - #--- ready the model - print("INFO: ready base model ...") - mdlBase = readyBaseModel() - print("INFO: ready model with weights ...") - mdlWeights = readyModelWithWeights(mdlBase) - print("INFO: ready model with xai ...") - mdlXai = readyModelWithXAI(mdlWeights) - - #--- get the XAI weighted prediction - print("INFO: get xai weighted pred ...") - output_pred, tns_batch = predXai_tile(mdlXai, strPth_tilRaw) - - #--- get the GRADCAM predictions - print("INFO: get GRADCAM preds ...") - cam_img_bg, cam_img_wt, cam_img_vt = predGradCam_tile(output_pred, mdlXai, tns_batch) - - print("TRACE: return readyModel_getPreds ...") - return strPth_tilRaw, output_pred, cam_img_bg, cam_img_wt, cam_img_vt - - -def showDiagnosis_horiz(imgDropped): - - #--- copy the uploaded file to data/tiles/raw - st.write("#") - - #--- ready the model, get predictions - print("TRACE2: ready model ...") - strPth_tilRaw, xai_pred, cam_img_bg, cam_img_wt, cam_img_vt = readyModel_getPreds(imgDropped) - - #--- display the raw prediction: headers - print("TRACE2: display raw preds, headers ...") - colRaw, colPred, colGradBack, colGradWhole, colGradViable = st.columns(5) - colRaw.write("Raw Tile") - colPred.write("Prediction") - colGradBack.write("GradCAM: Background") - colGradWhole.write("GradCAM: Whole Tumor") - colGradViable.write("GradCAM: Viable Tumor") - - #--- display the raw prediction: images - colRaw, colPred, colGradBack, colGradWhole, colGradViable = st.columns(5) - showCol_rawTil(colRaw, strPth_tilRaw) - showCol_predTil(colPred, xai_pred[0], strPth_tilRaw) - showCol_gradCamImg("imgGradCam_bg", colGradBack, cam_img_bg[0]) - showCol_gradCamImg("imgGradCam_wt", colGradWhole, cam_img_wt[0]) - showCol_gradCamImg("imgGradCam_vt", colGradViable, cam_img_vt[0]) - - -def showCol_rawTil(colRaw, strPth_tilRaw): - print("TRACE3: showCol_rawTil ...") - colRaw.image(strPth_tilRaw, width=400, use_column_width=True) - -#--- Dark blue -> Background -# Brown -> Whole tumor -# Green/Aqua -> Viable tumor -def showCol_predTil(colPred, xai_pred, strPth_tilRaw): - kstrPth_tilePred = "data/tiles/pred/" - strFilName = os.path.basename(strPth_tilRaw) - strFil_tilePred = kstrPth_tilePred + strFilName - - #--- make sure the dir exists - ensureDirExists(kstrPth_tilePred) - - print("TRACE3: showCol_predTil2 ... ", strFil_tilePred) - argmax_mask = torch.argmax(xai_pred, dim=0) - preds = argmax_mask.cpu().squeeze().numpy() - - cmap = plt.cm.get_cmap('tab10', 3) # Choose a colormap with 3 colors - print("TRACE3: typeOf(preds) ...", type(preds)) - - print("TRACE3: save pred image ...") - plt.imsave(strFil_tilePred, preds, cmap=cmap, vmin=0, vmax=2) - - print("TRACE3: load image ...", strFil_tilePred) - colPred.image(strFil_tilePred, width=400, use_column_width=True) - - -def showCol_gradCamImg(strImgContext, colGradCam, cam_img): - print("TRACE3: showCol_gradImg ... ", strImgContext) - imgGradCam = Image.fromarray(cam_img) - colGradCam.image(imgGradCam, width=400, use_column_width=True) - - -def showDiagnosis_vert(imgDropped): - - #--- copy the uploaded file to data/tiles/raw - st.write("#") - - #--- ready the model, get predictions - strPth_tilRaw, xai_pred, cam_img_bg, cam_img_wt, cam_img_vt = readyModel_getPreds(imgDropped) - - #--- display all predictions - ''' - strPth_tilPred = save_tilPred(output_pred) - strPth_tilGradBg = save_tilGradBg(cam_img_bg) - strPth_tilGradWt = None - strPth_tilGradVt = None - ''' - - #--- display the raw image - lstDescr = ["Raw Tile", "Prediction", "GradCam: Background", "GradCam: Whole Tumor", "GradCam: Viable Tumor"] - lstImages = [strPth_tilRaw, strPth_tilRaw, strPth_tilRaw, strPth_tilRaw, strPth_tilRaw] - - #--- display the raw prediction - for imgIdx in range(len(lstImages)): - colDescr, colImage = st.columns([0.25, 0.75]) - colDescr.write(lstDescr[imgIdx]) - colImage.image(lstImages[imgIdx], width=400, use_column_width=True) - - -def ensureDirExists(strPth): - blnExists = os.path.exists(strPth) - if not blnExists: - os.makedirs(strPth) - print("TRACE: creating dir ... ", strPth) - - -def save_tilRaw(imgDropped): - print("TRACE: save_tilRaw ...") - #--- copy the uploaded raw Tile to data/tiles/raw - kstrPth_tileRaw = "data/tiles/raw/" - strFil_tileRaw = kstrPth_tileRaw + imgDropped.name - print("TRACE: save_tilRaw.file ... ", strFil_tileRaw) - - #--- make sure the dir exists - ensureDirExists(kstrPth_tileRaw) - - #--- check if the file already exists; delete - if (os.path.isfile(strFil_tileRaw)): - print("WARN: save_tilRaw.file exists; delete ... ", strFil_tileRaw) - os.remove(strFil_tileRaw) - - with open(strFil_tileRaw,"wb") as filUpload: - filUpload.write(imgDropped.getbuffer()) - print("TRACE: uploaded file saved to ", strFil_tileRaw) - return strFil_tileRaw - - -def prepare_model(backbone_model="mbv3", num_classes=2): - - # Initialize model with pre-trained weights. - weights = 'DEFAULT' - if backbone_model == "mbv3": - model = None - #model = deeplabv3_mobilenet_v3_large(weights=weights) - - elif backbone_model == "r50": - model = deeplabv3_resnet50(weights=weights) - - elif backbone_model == "r101": - model = None - #model = deeplabv3_resnet101(weights=weights) - - else: - raise ValueError("Wrong backbone model passed. Must be one of 'mbv3', 'r50' and 'r101' ") - - # Update the number of output channels for the output layer. - # This will remove the pre-trained weights for the last layer. - model.classifier[-1] = nn.Conv2d(model.classifier[-1].in_channels, num_classes, kernel_size=1) - model.aux_classifier[-1] = nn.Conv2d(model.aux_classifier[-1].in_channels, num_classes, kernel_size=1) - return model - - -# computes IoU or Dice index -def intermediate_metric_calculation(predictions, targets, use_dice=False, - smooth=1e-6, dims=(2, 3)): - # dims corresponding to image height and width: [B, C, H, W]. - - # Intersection: |G ∩ P|. Shape: (batch_size, num_classes) - intersection = (predictions * targets).sum(dim=dims) + smooth - - # Summation: |G| + |P|. Shape: (batch_size, num_classes). - summation = (predictions.sum(dim=dims) + targets.sum(dim=dims)) + smooth - - if use_dice: - # Dice Shape: (batch_size, num_classes) - metric = (2.0 * intersection) / summation - else: - # Union. Shape: (batch_size, num_classes) - union = summation - intersection + smooth - - # IoU Shape: (batch_size, num_classes) - metric = intersection / union - - # Compute the mean over the remaining axes (batch and classes). - # Shape: Scalar - total = metric.mean() - - #print(f"iou = {total}") - return total - - -def convert_2_onehot(matrix, num_classes=3): - ''' - Perform one-hot encoding across the channel dimension. - ''' - matrix = matrix.permute(0, 2, 3, 1) - matrix = torch.argmax(matrix, dim=-1) - matrix = torch.nn.functional.one_hot(matrix, num_classes=num_classes) - matrix = matrix.permute(0, 3, 1, 2) - return matrix - - -#--- I'm using just categorical cross_entropy for now -class Loss(nn.Module): - def __init__(self): - super().__init__() - - def forward(self, predictions, targets): - # predictions --> (B, #C, H, W) unnormalized - # targets --> (B, #C, H, W) one-hot encoded - targets = torch.argmax(targets, dim=1) - pixel_loss = F.cross_entropy(predictions, targets, reduction="mean") - - return pixel_loss - - -class Metric(nn.Module): - def __init__(self, num_classes=3, smooth=1e-6, use_dice=False): - super().__init__() - self.num_classes = num_classes - self.smooth = smooth - self.use_dice = use_dice - - def forward(self, predictions, targets): - # predictions --> (B, #C, H, W) unnormalized - # targets --> (B, #C, H, W) one-hot encoded - - # Converting unnormalized predictions into one-hot encoded across channels. - # Shape: (B, #C, H, W) - predictions = convert_2_onehot(predictions, num_classes=self.num_classes) # one hot encoded - metric = intermediate_metric_calculation(predictions, targets, use_dice=self.use_dice, smooth=self.smooth) - - # Compute the mean over the remaining axes (batch and classes). Shape: Scalar - return metric - - -def get_default_device(): - return torch.device('cuda' if torch.cuda.is_available() else 'cpu') - - -def readyBaseModel(): - - #--- prep model conditions - device = get_default_device() - model = prepare_model(backbone_model=backbone_model_name, num_classes=NUM_CLASSES) - - metric_name = "iou" - use_dice = (True if metric_name == "dice" else False) - metric_fn = Metric(num_classes=NUM_CLASSES, use_dice=use_dice).to(device) - loss_fn = Loss().to(device) - optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) - - return model - - -def readyModelWithWeights(mdlBase): - print("TRACE: loading model with weights ... ", model_path) - mdlBase.load_state_dict(torch.load(model_path, map_location=torch.device('cpu'))) - model_with_weights = mdlBase - model_with_weights.eval() - return model_with_weights - - -class SegmentationModelOutputWrapper(torch.nn.Module): - def __init__(self, model): - super(SegmentationModelOutputWrapper, self).__init__() - self.model = model - - def forward(self, x): - return self.model(x)["out"] - - -def readyModelWithXAI(mdlWeighted): - model_xai = SegmentationModelOutputWrapper(mdlWeighted) - - model_xai.eval() - model_xai.to('cpu') - return model_xai - - -#--- demo: process a single file for validation/demo -def val_filToTensor(strPth_fil): - img_fil = Image.open(strPth_fil) - img_fil = img_fil.convert("RGB") - img_fil = np.asarray(img_fil)/255 - return to_tensor(img_fil).unsqueeze(0) - - -#--- TODO demo: process a batch of files for validation/demo -def val_aryToTensor(pth_fil, ary_fils): - aryTensor = [] - for str_filName in ary_fils: - aryTensor.append(val_filToTensor(pth_fil, str_filName)) - return aryTensor - - -def predXai_tile(mdl_xai, strPth_tileRaw): - #--- run a prediction for a single - print("TRACE: get tensor from single file ... ", strPth_tileRaw) - val_tensorFil = val_filToTensor(strPth_tileRaw) - val_tensorBatch = val_tensorFil - - print("TRACE: get mdl_xai prediction ...") - output = mdl_xai(val_tensorBatch.float().to('cpu')) - - print("TRACE: predXai_tile return ...") - return output, val_tensorBatch - - -class SemanticSegmentationTarget: - def __init__(self, category, mask): - self.category = category - self.mask = torch.from_numpy(mask) - if torch.cuda.is_available(): - self.mask = self.mask.cuda() - - def __call__(self, model_output): - return (model_output[self.category, :, : ] * self.mask).sum() - - -def predGradCam_tile(output_xaiPred, mdl_xai, val_image_batch): - print("TRACE: predGradCam initialize ...") - cam_img_bg = [] - cam_img_wt = [] - cam_img_vt = [] - - sem_classes = ['__background__', 'whole_tumor', 'viable_tumor'] - sem_class_to_idx = {cls: idx for (idx, cls) in enumerate(sem_classes)} - - argmax_mask = torch.argmax(output_xaiPred, dim=1) - argmax_mask_np = argmax_mask.cpu().squeeze().numpy() - preds = argmax_mask_np - - seg_mask = preds - bg_category = sem_class_to_idx["__background__"] - bg_mask_float = np.float32(seg_mask == bg_category) - wt_category = sem_class_to_idx["whole_tumor"] - wt_mask_float = np.float32(seg_mask == wt_category) - vt_category = sem_class_to_idx["viable_tumor"] - vt_mask_float = np.float32(seg_mask == vt_category) - - target_layers = [mdl_xai.model.backbone.layer4] - - for i in range(len(val_image_batch)): - rgb_img = np.float32(val_image_batch[i].permute(1, 2, 0)) - rgb_tensor = val_image_batch[i].unsqueeze(0).float() - - print("TRACE: process the background ...") - targets = [SemanticSegmentationTarget(bg_category, bg_mask_float[i])] - with GradCAM(model=mdl_xai, - target_layers=target_layers, - use_cuda=torch.cuda.is_available()) as cam: - - grayscale_cam = cam(input_tensor = rgb_tensor, - targets = targets)[0, :] - cam_img_bg.append(show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)) - - print("TRACE: process whole tumors ...") - targets = [SemanticSegmentationTarget(wt_category, wt_mask_float[i])] - with GradCAM(model=mdl_xai, - target_layers=target_layers, - use_cuda=torch.cuda.is_available()) as cam: - - grayscale_cam = cam(input_tensor = rgb_tensor, - targets = targets)[0, :] - cam_img_wt.append(show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)) - - print("TRACE: process viable tumors ...") - targets = [SemanticSegmentationTarget(vt_category, vt_mask_float[i])] - with GradCAM(model=mdl_xai, - target_layers=target_layers, - use_cuda=torch.cuda.is_available()) as cam: - - grayscale_cam = cam(input_tensor = rgb_tensor, - targets = targets)[0, :] - cam_img_vt.append(show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)) - - return cam_img_bg, cam_img_wt, cam_img_vt diff --git a/uix/pages/lit_home.py b/uix/pages/lit_home.py deleted file mode 100644 index 0666b235214f827ea8e131163e30aeba66e64e5e..0000000000000000000000000000000000000000 --- a/uix/pages/lit_home.py +++ /dev/null @@ -1,76 +0,0 @@ -#--- about page -import streamlit as st - -description = "Home" -def run(): - - print("\nINFO (lit_home.run) loading ", description, " page ...") - - - #st.markdown('### Home') - #st.markdown('### Omdena Saudi Arabia') - #st.markdown('### Detecting Liver Cancer from Histopathology WSI Using Deep Learning and Explainability') - st.markdown('#### Background ') - st.markdown('\ - Hepatocellular Carcinoma (HCC) is a primary liver malignancy, with \ - alarming global impact. It is the 4th most common cause of cancer \ - mortality worldwide, and the 6th most common malignancy overall. \ - \ - A patient\'s prognosis increases markedly with the speed of diagnosis \ - and treatment, however the rates of occurrence are increasing at an \ - alarming rate which will commensurately challenge the medical \ - community. \ - \ - There are already several tools and technologies available to assist \ - pathologists, however the current approach is ultimately constrained by \ - a number of factors including: the rising demand, a limited supply \ - of skilled specialists, the time required to grow/replenish this talent \ - pool, and human factors which influence quality, accuracy, consistency, \ - and speed (timeliness). \ - ') - - st.markdown('#### Claim ') - st.markdown('\ - It is the desire of this project team to increase the prognosis of \ - hepatocellular cancer patients.\ - \ - Machine Learning techniques, specifically Deep Learning and \ - Explainability (XAI) show promise in mimic\'ing the role of the \ - pathologist. \ - \ - MLOps promises to establish a baseline for performance\ - and a basis for continuous process improvement. This could greatly \ - reduce human factor elements while accelerating the times and \ - increasing the volumes of response.\ - \ - As a minimum, an ML application can serve as a supplement to the\ - pathologist, a teaching aide, a verification tool, or as a framework\ - for community collaboration and the advancement of quality diagnosis.\ - ') - - st.markdown('#### Objectives ') - st.markdown('\ - A key objective of this project is to produce a deployed app that will\ - enable pathologists to upload a digital liver histopathology slide\ - image and then receive an output that classifies the segment as\ - malignant (or not). \ - \ - The utilization of Machine Learning and Explainability Techniques \ - to the traditional process of Liver Histopathology and HCC Diagnosis \ - could serve to greatly reduce the time to diagnosis and treatment. \ - \ - ') - ''' - st.markdown( - """ - - Home page - - """, - unsafe_allow_html=True, - ) - - ''' \ No newline at end of file diff --git a/uix/pages/lit_qaConfigCheck.py b/uix/pages/lit_qaConfigCheck.py deleted file mode 100644 index a7bb8872241c02260a27cb7254313c1db904df00..0000000000000000000000000000000000000000 --- a/uix/pages/lit_qaConfigCheck.py +++ /dev/null @@ -1,88 +0,0 @@ -#--- about page -import streamlit as st -import sys, os -import pandas as pd - -import lib.utils as libUtils - - -description = "QA: Config Check" -def run(): - - print("\nINFO (lit_config.run) loading ", description, " page ...") - - #--- - #st.experimental_memo.clear() #--- try to clear cache each time this page is hit - #st.cache_data.clear() - - st.markdown('### Configuration Check') - - #--- check that base folders exist - #--- list raw WSIs - lstWSI = os.listdir(libUtils.pth_dtaWsi + "raw/") - print("TRACE: ", lstWSI) - st.dataframe( - pd.DataFrame({"Raw WSI": lstWSI,}), - use_container_width=True - ) - - #--- list raw Tiles - lstTiles = os.listdir(libUtils.pth_dtaTiles + "raw/") - print("TRACE: ", lstTiles) - st.dataframe( - pd.DataFrame({"Raw Tiles": lstTiles,}), - use_container_width=True - ) - - #--- list raw demo Tiles - lstDemo = os.listdir(libUtils.pth_dtaDemoTiles + "raw/") - print("TRACE: ", lstDemo) - st.dataframe( - pd.DataFrame({"Raw Demo Tiles": lstDemo,}), - use_container_width=True - ) - - - st.markdown(''' - - ''', unsafe_allow_html=True) - - -# st.markdown( - # st.footer( - # """ - # Configuration Check page - # """, - # unsafe_allow_html=True, - # ) - - cssFooter=""" - - - """ - st.markdown(cssFooter, unsafe_allow_html=True) \ No newline at end of file