deepcode-ai
/

diffai

Model card Files Files and versions Community

diffai / __main__.py

khulnasoft

Upload 16 files

746c674 verified 9 months ago

raw

history blame contribute delete

23.3 kB

	import future
	import builtins
	import past
	import six
	import copy

	from timeit import default_timer as timer
	from datetime import datetime
	import argparse
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import torch.optim as optim
	from torchvision import datasets
	from torch.utils.data import Dataset
	import decimal
	import torch.onnx


	import inspect
	from inspect import getargspec
	import os
	import helpers as h
	from helpers import Timer
	import copy
	import random

	from components import *
	import models

	import goals
	import scheduling

	from goals import *
	from scheduling import *

	import math

	import warnings
	from torch.serialization import SourceChangeWarning

	POINT_DOMAINS = [m for m in h.getMethods(goals) if issubclass(m, goals.Point)]
	SYMETRIC_DOMAINS = [goals.Box] + POINT_DOMAINS


	datasets.Imagenet12 = None

	class Top(nn.Module):
	def __init__(self, args, net, ty = Point):
	super(Top, self).__init__()
	self.net = net
	self.ty = ty
	self.w = args.width
	self.global_num = 0
	self.getSpec = getattr(self, args.spec)
	self.sub_batch_size = args.sub_batch_size
	self.curve_width = args.curve_width
	self.regularize = args.regularize


	self.speedCount = 0
	self.speed = 0.0

	def addSpeed(self, s):
	self.speed = (s + self.speed * self.speedCount) / (self.speedCount + 1)
	self.speedCount += 1

	def forward(self, x):
	return self.net(x)

	def clip_norm(self):
	self.net.clip_norm()

	def boxSpec(self, x, target, **kargs):
	return [(self.ty.box(x, w = self.w, model=self, target=target, untargeted=True, **kargs).to_dtype(), target)]

	def curveSpec(self, x, target, **kargs):
	if self.ty.__class__ in SYMETRIC_DOMAINS:
	return self.boxSpec(x,target, **kargs)


	batch_size = x.size()[0]

	newTargs = [ None for i in range(batch_size) ]
	newSpecs = [ None for i in range(batch_size) ]
	bestSpecs = [ None for i in range(batch_size) ]

	for i in range(batch_size):
	newTarg = target[i]
	newTargs[i] = newTarg
	newSpec = x[i]

	best_x = newSpec
	best_dist = float("inf")
	for j in range(batch_size):
	potTarg = target[j]
	potSpec = x[j]
	if (not newTarg.data.equal(potTarg.data)) or i == j:
	continue
	curr_dist = (newSpec - potSpec).norm(1).item() # must experiment with the type of norm here
	if curr_dist <= best_dist:
	best_x = potSpec

	newSpecs[i] = newSpec
	bestSpecs[i] = best_x

	new_batch_size = self.sub_batch_size
	batchedTargs = h.chunks(newTargs, new_batch_size)
	batchedSpecs = h.chunks(newSpecs, new_batch_size)
	batchedBest = h.chunks(bestSpecs, new_batch_size)

	def batch(t,s,b):
	t = h.lten(t)
	s = torch.stack(s)
	b = torch.stack(b)

	if h.use_cuda:
	t.cuda()
	s.cuda()
	b.cuda()

	m = self.ty.line(s, b, w = self.curve_width, **kargs)
	return (m , t)

	return [batch(t,s,b) for t,s,b in zip(batchedTargs, batchedSpecs, batchedBest)]


	def regLoss(self):
	if self.regularize is None or self.regularize <= 0.0:
	return 0
	reg_loss = 0
	r = self.net.regularize(2)
	return self.regularize * r

	def aiLoss(self, dom, target, **args):
	r = self(dom)
	return self.regLoss() + r.loss(target = target, **args)

	def printNet(self, f):
	self.net.printNet(f)


	# Training settings
	parser = argparse.ArgumentParser(description='PyTorch DiffAI Example', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	parser.add_argument('--batch-size', type=int, default=10, metavar='N', help='input batch size for training')
	parser.add_argument('--test-first', type=h.str2bool, nargs='?', const=True, default=True, help='test first')
	parser.add_argument('--test-freq', type=int, default=1, metavar='N', help='number of epochs to skip before testing')
	parser.add_argument('--test-batch-size', type=int, default=10, metavar='N', help='input batch size for testing')
	parser.add_argument('--sub-batch-size', type=int, default=3, metavar='N', help='input batch size for curve specs')

	parser.add_argument('--custom-schedule', type=str, default="", metavar='net', help='Learning rate scheduling for lr-multistep. Defaults to [200,250,300] for CIFAR10 and [15,25] for everything else.')

	parser.add_argument('--test', type=str, default=None, metavar='net', help='Saved net to use, in addition to any other nets you specify with -n')
	parser.add_argument('--update-test-net', type=h.str2bool, nargs='?', const=True, default=False, help="should update test net")

	parser.add_argument('--sgd',type=h.str2bool, nargs='?', const=True, default=False, help="use sgd instead of adam")
	parser.add_argument('--onyx', type=h.str2bool, nargs='?', const=True, default=False, help="should output onyx")
	parser.add_argument('--save-dot-net', type=h.str2bool, nargs='?', const=True, default=False, help="should output in .net")
	parser.add_argument('--update-test-net-name', type=str, choices = h.getMethodNames(models), default=None, help="update test net name")

	parser.add_argument('--normalize-layer', type=h.str2bool, nargs='?', const=True, default=True, help="should include a training set specific normalization layer")
	parser.add_argument('--clip-norm', type=h.str2bool, nargs='?', const=True, default=False, help="should clip the normal and use normal decomposition for weights")

	parser.add_argument('--epochs', type=int, default=1000, metavar='N', help='number of epochs to train')
	parser.add_argument('--log-freq', type=int, default=10, metavar='N', help='The frequency with which log statistics are printed')
	parser.add_argument('--save-freq', type=int, default=1, metavar='N', help='The frequency with which nets and images are saved, in terms of number of test passes')
	parser.add_argument('--number-save-images', type=int, default=0, metavar='N', help='The number of images to save. Should be smaller than test-size.')

	parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate')
	parser.add_argument('--lr-multistep', type=h.str2bool, nargs='?', const=True, default=False, help='learning rate multistep scheduling')

	parser.add_argument('--threshold', type=float, default=-0.01, metavar='TH', help='threshold for lr schedule')
	parser.add_argument('--patience', type=int, default=0, metavar='PT', help='patience for lr schedule')
	parser.add_argument('--factor', type=float, default=0.5, metavar='R', help='reduction multiplier for lr schedule')
	parser.add_argument('--max-norm', type=float, default=10000, metavar='MN', help='the maximum norm allowed in weight distribution')


	parser.add_argument('--curve-width', type=float, default=None, metavar='CW', help='the width of the curve spec')

	parser.add_argument('--width', type=float, default=0.01, metavar='CW', help='the width of either the line or box')
	parser.add_argument('--spec', choices = [ x for x in dir(Top) if x[-4:] == "Spec" and len(getargspec(getattr(Top, x)).args) == 3]
	, default="boxSpec", help='picks which spec builder function to use for training')


	parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed')
	parser.add_argument("--use-schedule", type=h.str2bool, nargs='?',
	const=True, default=False,
	help="activate learning rate schedule")

	parser.add_argument('-d', '--domain', sub_choices = None, action = h.SubAct
	, default=[], help='picks which abstract goals to use for training', required=True)

	parser.add_argument('-t', '--test-domain', sub_choices = None, action = h.SubAct
	, default=[], help='picks which abstract goals to use for testing. Examples include ' + str(goals), required=True)

	parser.add_argument('-n', '--net', choices = h.getMethodNames(models), action = 'append'
	, default=[], help='picks which net to use for training') # one net for now

	parser.add_argument('-D', '--dataset', choices = [n for (n,k) in inspect.getmembers(datasets, inspect.isclass) if issubclass(k, Dataset)]
	, default="MNIST", help='picks which dataset to use.')

	parser.add_argument('-o', '--out', default="out", help='picks which net to use for training')
	parser.add_argument('--dont-write', type=h.str2bool, nargs='?', const=True, default=False, help='dont write anywhere if this flag is on')
	parser.add_argument('--write-first', type=h.str2bool, nargs='?', const=True, default=False, help='write the initial net. Useful for comparing algorithms, a pain for testing.')
	parser.add_argument('--test-size', type=int, default=2000, help='number of examples to test with')

	parser.add_argument('-r', '--regularize', type=float, default=None, help='use regularization')


	args = parser.parse_args()

	largest_domain = max([len(h.catStrs(d)) for d in (args.domain)] )
	largest_test_domain = max([len(h.catStrs(d)) for d in (args.test_domain)] )

	args.log_interval = int(50000 / (args.batch_size * args.log_freq))

	h.max_c_for_norm = args.max_norm

	if h.use_cuda:
	torch.cuda.manual_seed(1 + args.seed)
	else:
	torch.manual_seed(args.seed)

	train_loader = h.loadDataset(args.dataset, args.batch_size, True, False)
	test_loader = h.loadDataset(args.dataset, args.test_batch_size, False, False)

	input_dims = train_loader.dataset[0][0].size()
	num_classes = int(max(getattr(train_loader.dataset, 'train_labels' if args.dataset != "SVHN" else 'labels'))) + 1

	print("input_dims: ", input_dims)
	print("Num classes: ", num_classes)

	vargs = vars(args)

	total_batches_seen = 0

	def train(epoch, models):
	global total_batches_seen

	for model in models:
	model.train()

	for batch_idx, (data, target) in enumerate(train_loader):
	total_batches_seen += 1
	time = float(total_batches_seen) / len(train_loader)
	if h.use_cuda:
	data, target = data.cuda(), target.cuda()

	for model in models:
	model.global_num += data.size()[0]

	timer = Timer("train a sample from " + model.name + " with " + model.ty.name, data.size()[0], False)
	lossy = 0
	with timer:
	for s in model.getSpec(data.to_dtype(),target, time = time):
	model.optimizer.zero_grad()
	loss = model.aiLoss(s, time = time, *vargs).mean(dim=0)
	lossy += loss.detach().item()
	loss.backward()
	torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
	for p in model.parameters():
	if p is not None and torch.isnan(p).any():
	print("Such nan in vals")
	if p is not None and p.grad is not None and torch.isnan(p.grad).any():
	print("Such nan in postmagic")
	stdv = 1 / math.sqrt(h.product(p.data.shape))
	p.grad = torch.where(torch.isnan(p.grad), torch.normal(mean=h.zeros(p.grad.shape), std=stdv), p.grad)

	model.optimizer.step()

	for p in model.parameters():
	if p is not None and torch.isnan(p).any():
	print("Such nan in vals after grad")
	stdv = 1 / math.sqrt(h.product(p.data.shape))
	p.data = torch.where(torch.isnan(p.data), torch.normal(mean=h.zeros(p.data.shape), std=stdv), p.data)

	if args.clip_norm:
	model.clip_norm()
	for p in model.parameters():
	if p is not None and torch.isnan(p).any():
	raise Exception("Such nan in vals after clip")

	model.addSpeed(timer.getUnitTime())

	if batch_idx % args.log_interval == 0:
	print(('Train Epoch {:12} {:'+ str(largest_domain) +'}: {:3} [{:7}/{} ({:.0f}%)] \tAvg sec/ex {:1.8f}\tLoss: {:.6f}').format(
	model.name, model.ty.name,
	epoch,
	batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader),
	model.speed,
	lossy))


	num_tests = 0
	def test(models, epoch, f = None):
	global num_tests
	num_tests += 1
	class MStat:
	def __init__(self, model):
	model.eval()
	self.model = model
	self.correct = 0
	class Stat:
	def __init__(self, d, dnm):
	self.domain = d
	self.name = dnm
	self.width = 0
	self.max_eps = None
	self.safe = 0
	self.proved = 0
	self.time = 0
	self.domains = [ Stat(h.parseValues(d, goals), h.catStrs(d)) for d in args.test_domain ]
	model_stats = [ MStat(m) for m in models ]

	num_its = 0
	saved_data_target = []
	for data, target in test_loader:
	if num_its >= args.test_size:
	break

	if num_tests == 1:
	saved_data_target += list(zip(list(data), list(target)))

	num_its += data.size()[0]
	if h.use_cuda:
	data, target = data.cuda().to_dtype(), target.cuda()

	for m in model_stats:

	with torch.no_grad():
	pred = m.model(data).vanillaTensorPart().max(1, keepdim=True)[1] # get the index of the max log-probability
	m.correct += pred.eq(target.data.view_as(pred)).sum()

	for stat in m.domains:
	timer = Timer(shouldPrint = False)
	with timer:
	def calcData(data, target):
	box = stat.domain.box(data, w = m.model.w, model=m.model, untargeted = True, target=target).to_dtype()
	with torch.no_grad():
	bs = m.model(box)
	org = m.model(data).vanillaTensorPart().max(1,keepdim=True)[1]
	stat.width += bs.diameter().sum().item() # sum up batch loss
	stat.proved += bs.isSafe(org).sum().item()
	stat.safe += bs.isSafe(target).sum().item()
	# stat.max_eps += 0 # TODO: calculate max_eps

	if m.model.net.neuronCount() < 5000 or stat.domain in SYMETRIC_DOMAINS:
	calcData(data, target)
	else:
	for d,t in zip(data, target):
	calcData(d.unsqueeze(0),t.unsqueeze(0))
	stat.time += timer.getUnitTime()

	l = num_its # len(test_loader.dataset)
	for m in model_stats:
	if args.lr_multistep:
	m.model.lrschedule.step()

	pr_corr = float(m.correct) / float(l)
	if args.use_schedule:
	m.model.lrschedule.step(1 - pr_corr)

	h.printBoth(('Test: {:12} trained with {:'+ str(largest_domain) +'} - Avg sec/ex {:1.12f}, Accuracy: {}/{} ({:3.1f}%)').format(
	m.model.name, m.model.ty.name,
	m.model.speed,
	m.correct, l, 100. * pr_corr), f = f)

	model_stat_rec = ""
	for stat in m.domains:
	pr_safe = stat.safe / l
	pr_proved = stat.proved / l
	pr_corr_given_proved = pr_safe / pr_proved if pr_proved > 0 else 0.0
	h.printBoth(("\t{:" + str(largest_test_domain)+"} - Width: {:<36.16f} Pr[Proved]={:<1.3f} Pr[Corr and Proved]={:<1.3f} Pr[Corr\|Proved]={:<1.3f} {}Time = {:<7.5f}" ).format(
	stat.name,
	stat.width / l,
	pr_proved,
	pr_safe, pr_corr_given_proved,
	"AvgMaxEps: {:1.10f} ".format(stat.max_eps / l) if stat.max_eps is not None else "",
	stat.time), f = f)
	model_stat_rec += "{}_{:1.3f}_{:1.3f}_{:1.3f}__".format(stat.name, pr_proved, pr_safe, pr_corr_given_proved)
	prepedname = m.model.ty.name.replace(" ", "_").replace(",", "").replace("(", "_").replace(")", "_").replace("=", "_")
	net_file = os.path.join(out_dir, m.model.name +"__" +prepedname + "_checkpoint_"+str(epoch)+"_with_{:1.3f}".format(pr_corr))

	h.printBoth("\tSaving netfile: {}\n".format(net_file + ".pynet"), f = f)

	if (num_tests % args.save_freq == 1 or args.save_freq == 1) and not args.dont_write and (num_tests > 1 or args.write_first):
	print("Actually Saving")
	torch.save(m.model.net, net_file + ".pynet")
	if args.save_dot_net:
	with h.mopen(args.dont_write, net_file + ".net", "w") as f2:
	m.model.net.printNet(f2)
	f2.close()
	if args.onyx:
	nn = copy.deepcopy(m.model.net)
	nn.remove_norm()
	torch.onnx.export(nn, h.zeros([1] + list(input_dims)), net_file + ".onyx",
	verbose=False, input_names=["actual_input"] + ["param"+str(i) for i in range(len(list(nn.parameters())))], output_names=["output"])


	if num_tests == 1 and not args.dont_write:
	img_dir = os.path.join(out_dir, "images")
	if not os.path.exists(img_dir):
	os.makedirs(img_dir)
	for img_num,(img,target) in zip(range(args.number_save_images), saved_data_target[:args.number_save_images]):
	sz = ""
	for s in img.size():
	sz += str(s) + "x"
	sz = sz[:-1]

	img_file = os.path.join(img_dir, args.dataset + "_" + sz + "_"+ str(img_num))
	if img_num == 0:
	print("Saving image to: ", img_file + ".img")
	with open(img_file + ".img", "w") as imgfile:
	flatimg = img.view(h.product(img.size()))
	for t in flatimg.cpu():
	print(decimal.Decimal(float(t)).__format__("f"), file=imgfile)
	with open(img_file + ".class" , "w") as imgfile:
	print(int(target.item()), file=imgfile)

	def createModel(net, domain, domain_name):
	net_weights, net_create = net
	domain.name = domain_name

	net = net_create()
	m = {}
	for (k,v) in net_weights.state_dict().items():
	m[k] = v.to_dtype()
	net.load_state_dict(m)

	model = Top(args, net, domain)
	if args.clip_norm:
	model.clip_norm()
	if h.use_cuda:
	model.cuda()
	if args.sgd:
	model.optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
	else:
	model.optimizer = optim.Adam(model.parameters(), lr=args.lr)

	if args.lr_multistep:
	model.lrschedule = optim.lr_scheduler.MultiStepLR(
	model.optimizer,
	gamma = 0.1,
	milestones = eval(args.custom_schedule) if args.custom_schedule != "" else ([200, 250, 300] if args.dataset == "CIFAR10" else [15, 25]))
	else:
	model.lrschedule = optim.lr_scheduler.ReduceLROnPlateau(
	model.optimizer,
	'min',
	patience=args.patience,
	threshold= args.threshold,
	min_lr=0.000001,
	factor=args.factor,
	verbose=True)

	net.name = net_create.__name__
	model.name = net_create.__name__

	return model

	out_dir = os.path.join(args.out, args.dataset, str(args.net)[1:-1].replace(", ","_").replace("'",""),
	args.spec, "width_"+str(args.width), h.file_timestamp() )

	print("Saving to:", out_dir)

	if not os.path.exists(out_dir) and not args.dont_write:
	os.makedirs(out_dir)

	print("Starting Training with:")
	with h.mopen(args.dont_write, os.path.join(out_dir, "config.txt"), "w") as f:
	for k in sorted(vars(args)):
	h.printBoth("\t"+k+": "+str(getattr(args,k)), f = f)
	print("")

	def buildNet(n):
	n = n(num_classes)
	if args.normalize_layer:
	if args.dataset in ["MNIST"]:
	n = Seq(Normalize([0.1307], [0.3081] ), n)
	elif args.dataset in ["CIFAR10", "CIFAR100"]:
	n = Seq(Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), n)
	elif args.dataset in ["SVHN"]:
	n = Seq(Normalize([0.5,0.5,0.5], [0.2, 0.2, 0.2]), n)
	elif args.dataset in ["Imagenet12"]:
	n = Seq(Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]), n)
	n = n.infer(input_dims)
	if args.clip_norm:
	n.clip_norm()
	return n

	if not args.test is None:

	test_name = None

	def loadedNet():
	if test_name is not None:
	n = getattr(models,test_name)
	n = buildNet(n)
	if args.clip_norm:
	n.clip_norm()
	return n
	else:
	with warnings.catch_warnings():
	warnings.simplefilter("ignore", SourceChangeWarning)
	return torch.load(args.test)

	net = loadedNet().double() if h.dtype == torch.float64 else loadedNet().float()


	if args.update_test_net_name is not None:
	test_name = args.update_test_net_name
	elif args.update_test_net and '__name__' in dir(net):
	test_name = net.__name__

	if test_name is not None:
	loadedNet.__name__ = test_name

	nets = [ (net, loadedNet) ]

	elif args.net == []:
	raise Exception("Need to specify at least one net with either -n or --test")
	else:
	nets = []

	for n in args.net:
	m = getattr(models,n)
	net_create = (lambda m: lambda: buildNet(m))(m) # why doesn't python do scoping right? This is a thunk. It is bad.
	net_create.__name__ = n
	net = buildNet(m)
	net.__name__ = n
	nets += [ (net, net_create) ]

	print("Name: ", net_create.__name__)
	print("Number of Neurons (relus): ", net.neuronCount())
	print("Number of Parameters: ", sum([h.product(s.size()) for s in net.parameters()]))
	print("Depth (relu layers): ", net.depth())
	print()
	net.showNet()
	print()


	if args.domain == []:
	models = [ createModel(net, goals.Box(args.width), "Box") for net in nets]
	else:
	models = h.flat([[createModel(net, h.parseValues(d, goals, scheduling), h.catStrs(d)) for net in nets] for d in args.domain])


	with h.mopen(args.dont_write, os.path.join(out_dir, "log.txt"), "w") as f:
	startTime = timer()
	for epoch in range(1, args.epochs + 1):
	if f is not None:
	f.flush()
	if (epoch - 1) % args.test_freq == 0 and (epoch > 1 or args.test_first):
	with Timer("test all models before epoch "+str(epoch), 1):
	test(models, epoch, f)
	if f is not None:
	f.flush()
	h.printBoth("Elapsed-Time: {:.2f}s\n".format(timer() - startTime), f = f)
	if args.epochs <= args.test_freq:
	break
	with Timer("train all models in epoch", 1, f = f):
	train(epoch, models)