Spaces:

EvanTHU
/

MotionLLM

Running on Zero

EvanTHU

update

445d3d1 5 months ago

4.32 kB

	# This code is based on https://github.com/Mael-zys/T2M-GPT.git
	import torch.nn as nn
	from models.encdec import Encoder, Decoder
	from models.quantize_cnn import QuantizeEMAReset, Quantizer, QuantizeEMA, QuantizeReset


	class VQVAE_251(nn.Module):
	def __init__(self,
	args,
	nb_code=1024,
	code_dim=512,
	output_emb_width=512,
	down_t=3,
	stride_t=2,
	width=512,
	depth=3,
	dilation_growth_rate=3,
	activation='relu',
	norm=None):

	super().__init__()
	self.code_dim = code_dim
	self.num_code = nb_code
	self.quant = args.quantizer
	self.encoder = Encoder(251 if args.dataname == 'kit' else 263, output_emb_width, down_t, stride_t, width, depth, dilation_growth_rate, activation=activation, norm=norm)
	self.decoder = Decoder(251 if args.dataname == 'kit' else 263, output_emb_width, down_t, stride_t, width, depth, dilation_growth_rate, activation=activation, norm=norm)
	if args.quantizer == "ema_reset":
	self.quantizer = QuantizeEMAReset(nb_code, code_dim, args)
	elif args.quantizer == "orig":
	self.quantizer = Quantizer(nb_code, code_dim, 1.0)
	elif args.quantizer == "ema":
	self.quantizer = QuantizeEMA(nb_code, code_dim, args)
	elif args.quantizer == "reset":
	self.quantizer = QuantizeReset(nb_code, code_dim, args)


	def preprocess(self, x):
	# (bs, T, Jx3) -> (bs, Jx3, T)
	x = x.permute(0,2,1).float()
	return x


	def postprocess(self, x):
	# (bs, Jx3, T) -> (bs, T, Jx3)
	x = x.permute(0,2,1)
	return x


	def encode(self, x):
	N, T, _ = x.shape
	x_in = self.preprocess(x)
	x_encoder = self.encoder(x_in)
	# import pdb; pdb.set_trace()
	x_encoder = self.postprocess(x_encoder)
	x_encoder = x_encoder.contiguous().view(-1, x_encoder.shape[-1]) # (NT, C)
	code_idx = self.quantizer.quantize(x_encoder)
	code_idx = code_idx.view(N, -1)
	return code_idx


	def encode_x(self, x):
	N, T, _ = x.shape
	x_in = self.preprocess(x)
	x_encoder = self.encoder(x_in)
	# import pdb; pdb.set_trace()
	x_encoder = self.postprocess(x_encoder)
	x_encoder = x_encoder.contiguous().view(-1, x_encoder.shape[-1]) # (NT, C)
	return x_encoder # (B*T, 512)

	def forward(self, x):

	x_in = self.preprocess(x)
	# Encode
	x_encoder = self.encoder(x_in)

	## quantization
	x_quantized, loss, perplexity = self.quantizer(x_encoder)

	## decoder
	x_decoder = self.decoder(x_quantized)
	x_out = self.postprocess(x_decoder)
	return x_out, loss, perplexity


	def forward_decoder(self, x):
	x_d = self.quantizer.dequantize(x)
	x_d = x_d.view(1, -1, self.code_dim).permute(0, 2, 1).contiguous()

	# decoder
	x_decoder = self.decoder(x_d)
	x_out = self.postprocess(x_decoder)
	return x_out



	class HumanVQVAE(nn.Module):
	def __init__(self,
	args,
	nb_code=512,
	code_dim=512,
	output_emb_width=512,
	down_t=3,
	stride_t=2,
	width=512,
	depth=3,
	dilation_growth_rate=3,
	activation='relu',
	norm=None):

	super().__init__()

	self.nb_joints = 21 if args.dataname == 'kit' else 22
	self.vqvae = VQVAE_251(args, nb_code, code_dim, output_emb_width, down_t, stride_t, width, depth, dilation_growth_rate, activation=activation, norm=norm)

	def encode(self, x):
	b, t, c = x.size()
	quants = self.vqvae.encode(x) # (N, T)
	return quants

	def encode_x(self, x):
	b, t, c = x.size()
	quants = self.vqvae.encode_x(x) # (N, T)
	return quants

	def forward(self, x):

	x_out, loss, perplexity = self.vqvae(x)

	return x_out, loss, perplexity

	def forward_decoder(self, x):
	x_out = self.vqvae.forward_decoder(x)
	return x_out