File size: 5,017 Bytes
02f3f24 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import argparse
import os
import torch
import mlflow
from data_ingestion import create_dataloaders, test_data_ingestion
from model import Generator, Discriminator, init_weights, test_models
from train import train, test_training
from app import setup_gradio_app
EXPERIMENT_NAME = "Colorizer_Experiment"
def setup_mlflow():
experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
if experiment is None:
experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)
else:
experiment_id = experiment.experiment_id
return experiment_id
def run_pipeline(args):
device = torch.device(args.device)
print(f"Using device: {device}")
experiment_id = setup_mlflow()
if args.ingest_data or args.run_all:
print("Starting data ingestion...")
train_loader = create_dataloaders(batch_size=args.batch_size)
if train_loader is None:
print("Data ingestion failed.")
return
else:
train_loader = None
if args.create_model or args.train or args.run_all:
print("Creating and testing models...")
generator = Generator().to(device)
discriminator = Discriminator().to(device)
generator.apply(init_weights)
discriminator.apply(init_weights)
if not test_models():
print("Model creation or testing failed.")
return
else:
generator = None
discriminator = None
if args.train or args.run_all:
print("Starting model training...")
if train_loader is None:
print("Creating dataloader for training...")
train_loader = create_dataloaders(batch_size=args.batch_size)
if train_loader is None:
print("Failed to create dataloader for training.")
return
if generator is None or discriminator is None:
print("Creating models for training...")
generator = Generator().to(device)
discriminator = Discriminator().to(device)
generator.apply(init_weights)
discriminator.apply(init_weights)
run_id = train(generator, discriminator, train_loader, num_epochs=args.num_epochs, device=device)
if run_id:
print(f"Training completed. Run ID: {run_id}")
with open("latest_run_id.txt", "w") as f:
f.write(run_id)
else:
print("Training failed.")
return
if args.test_training:
print("Testing training process...")
if train_loader is None:
print("Creating dataloader for testing...")
train_loader = create_dataloaders(batch_size=args.batch_size)
if train_loader is None:
print("Failed to create dataloader for testing.")
return
if generator is None or discriminator is None:
print("Creating models for testing...")
generator = Generator().to(device)
discriminator = Discriminator().to(device)
generator.apply(init_weights)
discriminator.apply(init_weights)
if test_training(generator, discriminator, train_loader, device):
print("Training process test passed.")
else:
print("Training process test failed.")
if args.serve or args.run_all:
print("Setting up Gradio app for serving...")
if not args.run_id:
try:
with open("latest_run_id.txt", "r") as f:
args.run_id = f.read().strip()
except FileNotFoundError:
print("No run ID provided and couldn't find latest_run_id.txt")
return
iface = setup_gradio_app(args.run_id, device)
iface.launch(share=args.share)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run Colorizer Pipeline")
parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu",
help="Device to use (cuda/cpu)")
parser.add_argument("--batch_size", type=int, default=32, help="Batch size for training")
parser.add_argument("--num_epochs", type=int, default=50, help="Number of epochs to train")
parser.add_argument("--run_id", type=str, help="MLflow run ID of the trained model for inference")
parser.add_argument("--ingest_data", action="store_true", help="Run data ingestion")
parser.add_argument("--create_model", action="store_true", help="Create and test the model")
parser.add_argument("--train", action="store_true", help="Train the model")
parser.add_argument("--test_training", action="store_true", help="Test the training process")
parser.add_argument("--serve", action="store_true", help="Serve the model using Gradio")
parser.add_argument("--run_all", action="store_true", help="Run all steps")
parser.add_argument("--share", action="store_true", help="Share the Gradio app publicly")
args = parser.parse_args()
run_pipeline(args) |