Spaces:

NCTCMumbai
/

NCTC

Running

App Files Files Community

NCTC / models /research /lfads /utils.py

NCTCMumbai

Upload 2571 files

0b8359d over 1 year ago

raw

history blame

12.4 kB

	# Copyright 2017 Google Inc. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	# ==============================================================================
	from __future__ import print_function

	import os
	import h5py
	import json

	import numpy as np
	import tensorflow as tf


	def log_sum_exp(x_k):
	"""Computes log \sum exp in a numerically stable way.
	log ( sum_i exp(x_i) )
	log ( sum_i exp(x_i - m + m) ), with m = max(x_i)
	log ( sum_i exp(x_i - m)*exp(m) )
	log ( sum_i exp(x_i - m) + m

	Args:
	x_k - k -dimensional list of arguments to log_sum_exp.

	Returns:
	log_sum_exp of the arguments.
	"""
	m = tf.reduce_max(x_k)
	x1_k = x_k - m
	u_k = tf.exp(x1_k)
	z = tf.reduce_sum(u_k)
	return tf.log(z) + m


	def linear(x, out_size, do_bias=True, alpha=1.0, identity_if_possible=False,
	normalized=False, name=None, collections=None):
	"""Linear (affine) transformation, y = x W + b, for a variety of
	configurations.

	Args:
	x: input The tensor to tranformation.
	out_size: The integer size of non-batch output dimension.
	do_bias (optional): Add a learnable bias vector to the operation.
	alpha (optional): A multiplicative scaling for the weight initialization
	of the matrix, in the form \alpha * 1/\sqrt{x.shape[1]}.
	identity_if_possible (optional): just return identity,
	if x.shape[1] == out_size.
	normalized (optional): Option to divide out by the norms of the rows of W.
	name (optional): The name prefix to add to variables.
	collections (optional): List of additional collections. (Placed in
	tf.GraphKeys.GLOBAL_VARIABLES already, so no need for that.)

	Returns:
	In the equation, y = x W + b, returns the tensorflow op that yields y.
	"""
	in_size = int(x.get_shape()[1]) # from Dimension(10) -> 10
	stddev = alpha/np.sqrt(float(in_size))
	mat_init = tf.random_normal_initializer(0.0, stddev)
	wname = (name + "/W") if name else "/W"

	if identity_if_possible and in_size == out_size:
	# Sometimes linear layers are nothing more than size adapters.
	return tf.identity(x, name=(wname+'_ident'))

	W,b = init_linear(in_size, out_size, do_bias=do_bias, alpha=alpha,
	normalized=normalized, name=name, collections=collections)

	if do_bias:
	return tf.matmul(x, W) + b
	else:
	return tf.matmul(x, W)


	def init_linear(in_size, out_size, do_bias=True, mat_init_value=None,
	bias_init_value=None, alpha=1.0, identity_if_possible=False,
	normalized=False, name=None, collections=None, trainable=True):
	"""Linear (affine) transformation, y = x W + b, for a variety of
	configurations.

	Args:
	in_size: The integer size of the non-batc input dimension. [(x),y]
	out_size: The integer size of non-batch output dimension. [x,(y)]
	do_bias (optional): Add a (learnable) bias vector to the operation,
	if false, b will be None
	mat_init_value (optional): numpy constant for matrix initialization, if None
	, do random, with additional parameters.
	alpha (optional): A multiplicative scaling for the weight initialization
	of the matrix, in the form \alpha * 1/\sqrt{x.shape[1]}.
	identity_if_possible (optional): just return identity,
	if x.shape[1] == out_size.
	normalized (optional): Option to divide out by the norms of the rows of W.
	name (optional): The name prefix to add to variables.
	collections (optional): List of additional collections. (Placed in
	tf.GraphKeys.GLOBAL_VARIABLES already, so no need for that.)

	Returns:
	In the equation, y = x W + b, returns the pair (W, b).
	"""

	if mat_init_value is not None and mat_init_value.shape != (in_size, out_size):
	raise ValueError(
	'Provided mat_init_value must have shape [%d, %d].'%(in_size, out_size))
	if bias_init_value is not None and bias_init_value.shape != (1,out_size):
	raise ValueError(
	'Provided bias_init_value must have shape [1,%d].'%(out_size,))

	if mat_init_value is None:
	stddev = alpha/np.sqrt(float(in_size))
	mat_init = tf.random_normal_initializer(0.0, stddev)

	wname = (name + "/W") if name else "/W"

	if identity_if_possible and in_size == out_size:
	return (tf.constant(np.eye(in_size).astype(np.float32)),
	tf.zeros(in_size))

	# Note the use of get_variable vs. tf.Variable. this is because get_variable
	# does not allow the initialization of the variable with a value.
	if normalized:
	w_collections = [tf.GraphKeys.GLOBAL_VARIABLES, "norm-variables"]
	if collections:
	w_collections += collections
	if mat_init_value is not None:
	w = tf.Variable(mat_init_value, name=wname, collections=w_collections,
	trainable=trainable)
	else:
	w = tf.get_variable(wname, [in_size, out_size], initializer=mat_init,
	collections=w_collections, trainable=trainable)
	w = tf.nn.l2_normalize(w, dim=0) # x W, so xW_j = \sum_i x_bi W_ij
	else:
	w_collections = [tf.GraphKeys.GLOBAL_VARIABLES]
	if collections:
	w_collections += collections
	if mat_init_value is not None:
	w = tf.Variable(mat_init_value, name=wname, collections=w_collections,
	trainable=trainable)
	else:
	w = tf.get_variable(wname, [in_size, out_size], initializer=mat_init,
	collections=w_collections, trainable=trainable)
	b = None
	if do_bias:
	b_collections = [tf.GraphKeys.GLOBAL_VARIABLES]
	if collections:
	b_collections += collections
	bname = (name + "/b") if name else "/b"
	if bias_init_value is None:
	b = tf.get_variable(bname, [1, out_size],
	initializer=tf.zeros_initializer(),
	collections=b_collections,
	trainable=trainable)
	else:
	b = tf.Variable(bias_init_value, name=bname,
	collections=b_collections,
	trainable=trainable)

	return (w, b)


	def write_data(data_fname, data_dict, use_json=False, compression=None):
	"""Write data in HD5F format.

	Args:
	data_fname: The filename of teh file in which to write the data.
	data_dict: The dictionary of data to write. The keys are strings
	and the values are numpy arrays.
	use_json (optional): human readable format for simple items
	compression (optional): The compression to use for h5py (disabled by
	default because the library borks on scalars, otherwise try 'gzip').
	"""

	dir_name = os.path.dirname(data_fname)
	if not os.path.exists(dir_name):
	os.makedirs(dir_name)

	if use_json:
	the_file = open(data_fname,'wb')
	json.dump(data_dict, the_file)
	the_file.close()
	else:
	try:
	with h5py.File(data_fname, 'w') as hf:
	for k, v in data_dict.items():
	clean_k = k.replace('/', '_')
	if clean_k is not k:
	print('Warning: saving variable with name: ', k, ' as ', clean_k)
	else:
	print('Saving variable with name: ', clean_k)
	hf.create_dataset(clean_k, data=v, compression=compression)
	except IOError:
	print("Cannot open %s for writing.", data_fname)
	raise


	def read_data(data_fname):
	""" Read saved data in HDF5 format.

	Args:
	data_fname: The filename of the file from which to read the data.
	Returns:
	A dictionary whose keys will vary depending on dataset (but should
	always contain the keys 'train_data' and 'valid_data') and whose
	values are numpy arrays.
	"""

	try:
	with h5py.File(data_fname, 'r') as hf:
	data_dict = {k: np.array(v) for k, v in hf.items()}
	return data_dict
	except IOError:
	print("Cannot open %s for reading." % data_fname)
	raise


	def write_datasets(data_path, data_fname_stem, dataset_dict, compression=None):
	"""Write datasets in HD5F format.

	This function assumes the dataset_dict is a mapping ( string ->
	to data_dict ). It calls write_data for each data dictionary,
	post-fixing the data filename with the key of the dataset.

	Args:
	data_path: The path to the save directory.
	data_fname_stem: The filename stem of the file in which to write the data.
	dataset_dict: The dictionary of datasets. The keys are strings
	and the values data dictionaries (str -> numpy arrays) associations.
	compression (optional): The compression to use for h5py (disabled by
	default because the library borks on scalars, otherwise try 'gzip').
	"""

	full_name_stem = os.path.join(data_path, data_fname_stem)
	for s, data_dict in dataset_dict.items():
	write_data(full_name_stem + "_" + s, data_dict, compression=compression)


	def read_datasets(data_path, data_fname_stem):
	"""Read dataset sin HD5F format.

	This function assumes the dataset_dict is a mapping ( string ->
	to data_dict ). It calls write_data for each data dictionary,
	post-fixing the data filename with the key of the dataset.

	Args:
	data_path: The path to the save directory.
	data_fname_stem: The filename stem of the file in which to write the data.
	"""

	dataset_dict = {}
	fnames = os.listdir(data_path)

	print ('loading data from ' + data_path + ' with stem ' + data_fname_stem)
	for fname in fnames:
	if fname.startswith(data_fname_stem):
	data_dict = read_data(os.path.join(data_path,fname))
	idx = len(data_fname_stem) + 1
	key = fname[idx:]
	data_dict['data_dim'] = data_dict['train_data'].shape[2]
	data_dict['num_steps'] = data_dict['train_data'].shape[1]
	dataset_dict[key] = data_dict

	if len(dataset_dict) == 0:
	raise ValueError("Failed to load any datasets, are you sure that the "
	"'--data_dir' and '--data_filename_stem' flag values "
	"are correct?")

	print (str(len(dataset_dict)) + ' datasets loaded')
	return dataset_dict


	# NUMPY utility functions
	def list_t_bxn_to_list_b_txn(values_t_bxn):
	"""Convert a length T list of BxN numpy tensors of length B list of TxN numpy
	tensors.

	Args:
	values_t_bxn: The length T list of BxN numpy tensors.

	Returns:
	The length B list of TxN numpy tensors.
	"""
	T = len(values_t_bxn)
	B, N = values_t_bxn[0].shape
	values_b_txn = []
	for b in range(B):
	values_pb_txn = np.zeros([T,N])
	for t in range(T):
	values_pb_txn[t,:] = values_t_bxn[t][b,:]
	values_b_txn.append(values_pb_txn)

	return values_b_txn


	def list_t_bxn_to_tensor_bxtxn(values_t_bxn):
	"""Convert a length T list of BxN numpy tensors to single numpy tensor with
	shape BxTxN.

	Args:
	values_t_bxn: The length T list of BxN numpy tensors.

	Returns:
	values_bxtxn: The BxTxN numpy tensor.
	"""

	T = len(values_t_bxn)
	B, N = values_t_bxn[0].shape
	values_bxtxn = np.zeros([B,T,N])
	for t in range(T):
	values_bxtxn[:,t,:] = values_t_bxn[t]

	return values_bxtxn


	def tensor_bxtxn_to_list_t_bxn(tensor_bxtxn):
	"""Convert a numpy tensor with shape BxTxN to a length T list of numpy tensors
	with shape BxT.

	Args:
	tensor_bxtxn: The BxTxN numpy tensor.

	Returns:
	A length T list of numpy tensors with shape BxT.
	"""

	values_t_bxn = []
	B, T, N = tensor_bxtxn.shape
	for t in range(T):
	values_t_bxn.append(np.squeeze(tensor_bxtxn[:,t,:]))

	return values_t_bxn


	def flatten(list_of_lists):
	"""Takes a list of lists and returns a list of the elements.

	Args:
	list_of_lists: List of lists.

	Returns:
	flat_list: Flattened list.
	flat_list_idxs: Flattened list indices.
	"""
	flat_list = []
	flat_list_idxs = []
	start_idx = 0
	for item in list_of_lists:
	if isinstance(item, list):
	flat_list += item
	l = len(item)
	idxs = range(start_idx, start_idx+l)
	start_idx = start_idx+l
	else: # a value
	flat_list.append(item)
	idxs = [start_idx]
	start_idx += 1
	flat_list_idxs.append(idxs)

	return flat_list, flat_list_idxs