In [1]:
!pip install --upgrade pip
!pip install "sagemaker==2.116.0" "huggingface_hub==0.10.1" --upgrade --quiet

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com


In [5]:
from distutils.dir_util import copy_tree
from pathlib import Path
from huggingface_hub import snapshot_download
import random
import os
import tarfile
import time
import sagemaker
from datetime import datetime
from sagemaker.s3 import S3Uploader
import boto3
from sagemaker.huggingface.model import HuggingFaceModel
from threading import Thread
import subprocess
import shutil

In [2]:
# ------------------------------------------------
# Configuration
# ------------------------------------------------
STAGE = "prod"
model_configs = [
 # {
 # "inference_2": False, 
 # "path": "icbinp",
 # "endpoint_name": "gamma-10000-2023-05-16-14-55"
 # #"endpoint_name": f"{STAGE}-10000-" + datetime.now().strftime("%Y-%m-%d-%H-%M")
 # },
 # {
 # "inference_2": False, 
 # "path": "icb_with_epi",
 # "endpoint_name": "gamma-10000-2023-05-16-14-55"
 # # "endpoint_name": f"{STAGE}-10000-" + datetime.now().strftime("%Y-%m-%d-%H-%M")
 # },
 {
 "inference_2": False, 
 "path": "model_v9",
 # "endpoint_name": "gamma-10000-2023-05-16-14-55"
 "endpoint_name": f"{STAGE}-10000-" + datetime.now().strftime("%Y-%m-%d-%H-%M")
 },
 {
 "inference_2": False, 
 "path": "model_v8",
 #"endpoint_name": "gamma-10001-2023-05-08-06-14"
 "endpoint_name": f"{STAGE}-10001-" + datetime.now().strftime("%Y-%m-%d-%H-%M")
 },
 # {
 # "inference_2": False, 
 # "path": "model_v5_anime",
 # "endpoint_name": "gamma-10001-2023-05-08-06-14"
 # #"endpoint_name": f"{STAGE}-10001-" + datetime.now().strftime("%Y-%m-%d-%H-%M")
 # },
 # {
 # "inference_2": False, 
 # "path": "model_v5.3_comic",
 # #"endpoint_name": "gamma-10002-2023-05-08-07-22"
 # "endpoint_name": f"{STAGE}-10002-" + datetime.now().strftime("%Y-%m-%d-%H-%M")
 # },
 {
 "inference_2": False, 
 "path": "model_v10",
 # "endpoint_name": "gamma-10002-2023-05-08-07-22"
 "endpoint_name": f"{STAGE}-10002-" + datetime.now().strftime("%Y-%m-%d-%H-%M")
 },
 {
 "inference_2": True, 
 "path": "model_v5.2_other",
 # "endpoint_name": "gamma-other-2023-05-04-09-33"
 "endpoint_name": f"{STAGE}-other-" + datetime.now().strftime("%Y-%m-%d-%H-%M")
 }
 # {
 # "inference_2": False, 
 # "path": "model_v6_bheem",
 # "endpoint_name": f"{STAGE}-10003-" + datetime.now().strftime("%Y-%m-%d-%H-%M")
 # },
 # {
 # "inference_2": False, 
 # "path": "model_v12",
 # "endpoint_name": "gamma-10003-2023-05-04-05-20"
 # # "endpoint_name": f"{STAGE}-10003-" + datetime.now().strftime("%Y-%m-%d-%H-%M")
 # }
]

VpcConfig = {
 "Subnets": [
 "subnet-0df3f71df4c7b29e5",
 "subnet-0d753b7fc74b5ee68"
 ],
 "SecurityGroupIds": [
 "sg-033a7948e79a501cd"
 ]
}

In [3]:
def compress(tar_dir=None,output_file="model.tar.gz"):
 parent_dir=os.getcwd()
 os.chdir(parent_dir + "/" + tar_dir)
 with tarfile.open(os.path.join(parent_dir, output_file), "w:gz") as tar:
 for item in os.listdir('.'):
 print("- " + item)
 tar.add(item, arcname=item)
 os.chdir(parent_dir)

 
def create_model_tar(config):
 print("Copying inference 'code': " + config.get("path"))
 
 model_tar = Path(config.get("path"))
 if os.path.exists(model_tar.joinpath("code")):
 shutil.rmtree(model_tar.joinpath("code"))
 out_tar = config.get("path") + ".tar.gz"
 model_tar.mkdir(exist_ok=True)
 copy_tree("code/", str(model_tar.joinpath("code")))
 copy_tree("laur_style/", str(model_tar.joinpath("laur_style")))
 
 if config.get("inference_2"):
 os.remove(model_tar.joinpath("code").joinpath("inference.py"))
 os.rename(model_tar.joinpath("code").joinpath("inference2.py"), model_tar.joinpath("code").joinpath("inference.py"))
 
 print("Compressing: " + config.get("path"))

 if os.path.exists(out_tar):
 os.remove(out_tar)

 compress(str(model_tar), out_tar)
 
def upload_to_s3(config):
 out_tar = config.get("path") + ".tar.gz"
 print("Uploading model to S3: " + out_tar)
 s3_model_uri=S3Uploader.upload(local_path=out_tar, desired_s3_uri=f"s3://comic-assets/stable-diffusion-v1-4/v2/")
 return s3_model_uri
 
 
def deploy_and_create_endpoint(config, s3_model_uri):
 sess = sagemaker.Session()
 # sagemaker session bucket -> used for uploading data, models and logs
 # sagemaker will automatically create this bucket if it not exists
 sagemaker_session_bucket=None
 if sagemaker_session_bucket is None and sess is not None:
 # set to default bucket if a bucket name is not given
 sagemaker_session_bucket = sess.default_bucket()
 try:
 role = sagemaker.get_execution_role()
 except ValueError:
 iam = boto3.client('iam')
 role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

 sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)
 
 huggingface_model = HuggingFaceModel(
 model_data=s3_model_uri, # path to your model and script
 role=role, # iam role with permissions to create an Endpoint
 transformers_version="4.17", # transformers version used
 pytorch_version="1.10", # pytorch version used
 py_version='py38',# python version used
 vpc_config=VpcConfig,
 )

 print("Creating endpoint: " + config.get("endpoint_name"))

 predictor = huggingface_model.deploy(
 initial_instance_count=1,
 instance_type="ml.g4dn.xlarge",
 endpoint_name=config.get("endpoint_name")
 )

 
def start_process(config):
 try:
 create_model_tar(config)
 s3_model_uri = upload_to_s3(config)
 #s3_model_uri = "s3://comic-assets/stable-diffusion-v1-4/v2//model_v5.2_other.tar.gz"
 deploy_and_create_endpoint(config, s3_model_uri)
 except Exception as e:
 print("Failed to deploy: " + config.get("path") + "\n" + str(e))

In [4]:
threads = []

os.chdir("/home/ec2-user/SageMaker")

start_time = time.time()

for config in model_configs:
 thread = Thread(target=start_process, args=(config,))
 thread.start()
 thread.join()
 threads.append(thread)

for thread in threads:
 thread.join()
 
print("\n\nCompleted in : " + str(time.time() - start_time) + "s")

# For redeploying gamma endpoints or promoting gamma endpoints to prod

# thread1 = Thread(target=deploy_and_create_endpoint, args=(model_configs[0],"s3://comic-assets/stable-diffusion-v1-4/v2//model_v9.tar.gz",))
# thread2 = Thread(target=deploy_and_create_endpoint, args=(model_configs[1],"s3://comic-assets/stable-diffusion-v1-4/v2//anime_mode_with_lora.tar.gz",))
# thread3 = Thread(target=deploy_and_create_endpoint, args=(model_configs[0],"s3://comic-assets/stable-diffusion-v1-4/v2//model_v5.3_comic.tar.gz",))
# thread4 = Thread(target=deploy_and_create_endpoint, args=(model_configs[3],"s3://comic-assets/stable-diffusion-v1-4/v2//model_v5.2_other.tar.gz",))

# thread1.start()
# thread2.start()
# thread3.start()
# thread4.start()

# thread1.join()
# thread2.join()
# thread3.join()
# thread4.join()

# print("Done")


Copying inference 'code': model_v9
Compressing: model_v9
- scheduler
- vae
- .ipynb_checkpoints
- feature_extractor
- tokenizer
- text_encoder
- model_index.json
- laur_style
- code
- unet
- args.json
Uploading model to S3: model_v9.tar.gz
Creating endpoint: gamma-10000-2023-05-16-14-55
-----------------!

Completed in : 992.3517553806305s


In [None]:

# import sagemaker
# import boto3
# import time 

# start = time.time()

# sess = sagemaker.Session()
# # sagemaker session bucket -> used for uploading data, models and logs
# # sagemaker will automatically create this bucket if it not exists
# sagemaker_session_bucket=None
# if sagemaker_session_bucket is None and sess is not None:
# # set to default bucket if a bucket name is not given
# sagemaker_session_bucket = sess.default_bucket()

# try:
# role = sagemaker.get_execution_role()
# except ValueError:
# iam = boto3.client('iam')
# role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

# sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

# print(f"sagemaker role arn: {role}")
# print(f"sagemaker bucket: {sess.default_bucket()}")
# print(f"sagemaker session region: {sess.boto_region_name}")
# print(sagemaker.get_execution_role())

# from sagemaker.s3 import S3Uploader

# print("Uploading model to S3")

# # upload model.tar.gz to s3
# s3_model_uri=S3Uploader.upload(local_path="model.tar.gz", desired_s3_uri=f"s3://comic-assets/stable-diffusion-v1-4/v2/")

# print(f"model uploaded to: {s3_model_uri}")


# from sagemaker.huggingface.model import HuggingFaceModel

# VpcConfig = {
# "Subnets": [
# "subnet-0df3f71df4c7b29e5",
# "subnet-0d753b7fc74b5ee68"
# ],
# "SecurityGroupIds": [
# "sg-033a7948e79a501cd"
# ]
# }

# # create Hugging Face Model Class
# huggingface_model = HuggingFaceModel(
# model_data=s3_model_uri, # path to your model and script
# role=role, # iam role with permissions to create an Endpoint
# transformers_version="4.17", # transformers version used
# pytorch_version="1.10", # pytorch version used
# py_version='py38',# python version used
# vpc_config=VpcConfig,
# )

# print("Deploying model")

# predictor = huggingface_model.deploy(
# initial_instance_count=1,
# instance_type="ml.g4dn.xlarge",
# # endpoint_name=endpoint_name
# )

# print(f"Done {time.time() - start}")

In [None]:
!curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | sudo bash

In [None]:
!sudo yum install git-lfs