# avenir-python: Machine Learning
# Author: Pranab Ghosh
# Licensed under the Apache License, Version 2.0 (the "License"); you
# may not use this file except in compliance with the License. You may
# obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# implied. See the License for the specific language governing
# permissions and limitations under the License.
# Package imports
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
import sklearn.linear_model
import matplotlib
if len(sys.argv) != 7:
print "usage: <num_hidden_units> <data_set_size> <noise_in_data> <iteration_count> <learning_rate> <training_mode> "
# number of hidden units
nn_hdim = int(sys.argv[1])
# dat set size
dsize = int(sys.argv[2])
# noise in training data
noise_level = float(sys.argv[3])
# iteration count
it_count = int(sys.argv[4])
# learning rate
epsilon = float(sys.argv[5])
#training mode
training_mode = sys.argv[6]
# validation
use_validation_data = True
# Generate a dataset
#noise_level = 0.20
#noise_level = 0.01
vlo = 100
vup = vlo + dsize / 5
vsize = vup - vlo
print "trainig data size %d" %(vsize)
XC, yc = sklearn.datasets.make_moons(dsize, noise=noise_level)
print "complete data set generated"
def print_array(X,y):
print X
print y
# Generate a validation dataset
#XV, yv = sklearn.datasets.make_moons(40, noise=0.20)
#print "validation data set generated"
XV = XC[vlo:vup:1]
yv = yc[vlo:vup:1]
print "validation data generated"
#print_array(XV, yv)
X = np.delete(XC, np.s_[vlo:vup:1], 0)
y = np.delete(yc, np.s_[vlo:vup:1], 0)
print "training data generated"
#print_array(X, y)
print X
print y
# Parameters
num_examples = len(X) # training set size
nn_input_dim = 2 # input layer dimensionality
nn_output_dim = 2 # output layer dimensionality
#training data indices
tr_data_indices = np.arange(num_examples)
#print tr_data_indices
# Gradient descent parameters (I picked these by hand)
#epsilon = 0.01 # learning rate for gradient descent
reg_lambda = 0.01 # regularization strength
# Helper function to evaluate the total loss on the dataset
def calculate_loss(X,y,model):
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
size = len(X)
# Forward propagation to calculate our predictions
z1 = + b1
a1 = np.tanh(z1)
z2 = + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
# Calculating the loss
corect_logprobs = -np.log(probs[range(size), y])
data_loss = np.sum(corect_logprobs)
# Add regulatization term to loss (optional)
data_loss += reg_lambda/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
return 1./size * data_loss
# Helper function to predict an output (0 or 1)
def predict(model, x):
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
# Forward propagation
z1 = + b1
a1 = np.tanh(z1)
z2 = + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
return np.argmax(probs, axis=1)
# This function learns parameters for the neural network in batch mode and returns the model.
# - nn_hdim: Number of nodes in the hidden layer
# - num_passes: Number of passes through the training data for gradient descent
# - print_loss: If True, print the loss every 1000 iterations
def build_model_batch(nn_hdim, num_passes=10000, validation_interval=50):
# Initialize the parameters to random values. We need to learn these.
W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)
b1 = np.zeros((1, nn_hdim))
W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
b2 = np.zeros((1, nn_output_dim))
# This is what we return at the end
model = {}
# Gradient descent. For each batch...
loss = -1.0
for i in xrange(0, num_passes):
#print "pass %d" %(i)
# Forward propagation
z1 = + b1
a1 = np.tanh(z1)
z2 = + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
# Back propagation
delta3 = probs
delta3[range(num_examples), y] -= 1
dW2 = (a1.T).dot(delta3)
db2 = np.sum(delta3, axis=0, keepdims=True)
delta2 = * (1 - np.power(a1, 2))
dW1 =, delta2)
db1 = np.sum(delta2, axis=0)
# Add regularization terms (b1 and b2 don't have regularization terms)
dW2 += reg_lambda * W2
dW1 += reg_lambda * W1
# Gradient descent parameter update
W1 += -epsilon * dW1
b1 += -epsilon * db1
W2 += -epsilon * dW2
b2 += -epsilon * db2
# Assign new parameters to the model
model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
# This is expensive because it uses the whole dataset, so we don't want to do it too often.
if i % validation_interval == 0:
if use_validation_data:
cur_loss = calculate_loss(XV,yv,model)
cur_loss = calculate_loss(X,y,model)
print "Loss after iteration %i: %.8f" %(i, cur_loss)
loss = cur_loss
return model
# This function learns parameters for the neural network in incremental and returns the model.
# - nn_hdim: Number of nodes in the hidden layer
# - num_passes: Number of passes through the training data for gradient descent
# - print_loss: If True, print the loss every 1000 iterations
def build_model_incr(nn_hdim, num_passes=10000, validation_interval=50):
# Initialize the parameters to random values. We need to learn these.
W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)
b1 = np.zeros((1, nn_hdim))
W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
b2 = np.zeros((1, nn_output_dim))
# This is what we return at the end
model = {}
# gradient descent. For each batch...
loss = -1.0
for i in xrange(0, num_passes):
#print "pass %d" %(i)
#shuffle training data indices
# all training data
for j in tr_data_indices:
Xi = X[j].reshape(1,2)
yi = y[j].reshape(1)
# Forward propagation
z1 = + b1
a1 = np.tanh(z1)
z2 = + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
# Back propagation
delta3 = probs
delta3[0,yi] -= 1
dW2 = (a1.T).dot(delta3)
db2 = np.sum(delta3, axis=0, keepdims=True)
delta2 = * (1 - np.power(a1, 2))
dW1 =, delta2)
db1 = np.sum(delta2, axis=0)
# Add regularization terms (b1 and b2 don't have regularization terms)
dW2 += reg_lambda * W2
dW1 += reg_lambda * W1
# Gradient descent parameter update
W1 += -epsilon * dW1
b1 += -epsilon * db1
W2 += -epsilon * dW2
b2 += -epsilon * db2
# Assign new parameters to the model
model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
# This is expensive because it uses the whole dataset, so we don't want to do it too often.
if i % validation_interval == 0:
if use_validation_data:
cur_loss = calculate_loss(XV,yv,model)
cur_loss = calculate_loss(X,y,model)
print "Loss after iteration %i: %.8f" %(i, cur_loss)
loss = cur_loss
return model
# Build a model with a 3-dimensional hidden layer
if (training_mode == "batch"):
model = build_model_batch(nn_hdim, num_passes=it_count, validation_interval=1)
elif (training_mode == "incr"):
model = build_model_incr(nn_hdim, num_passes=it_count, validation_interval=1)
print "invalid learning mode"
print "hidden layer"
for row in model['W1']:
print "hidden layer bias"
for row in model['b1']:
print "output layer"
for row in model['W2']:
print "output layer bias"
for row in model['b2']: