Gradient-checking code

This commit is contained in:
Carl Pearson
2016-11-17 08:01:40 -06:00
parent 3591a9421c
commit 4a07ed1dd7

View File

@@ -1,3 +1,4 @@
import copy
import numpy as np
import numpy.random as npr
import random
@@ -16,7 +17,7 @@ def dataset_get_sin():
data[:, 0] = np.linspace(0.0, 4 * np.pi, num=NUM) # inputs
data[:, 1] = np.sin(data[:, 0]) # outputs
npr.shuffle(data)
training, test = data[:SPLIT, :], data[SPLIT:, :]
training, test = data[:SPLIT,:], data[SPLIT:,:]
return training, test
@@ -28,7 +29,7 @@ def dataset_get_linear():
data[:, 0] = np.linspace(0.0, 2 * np.pi, num=NUM) # inputs
data[:, 1] = 2 * data[:, 0] # outputs
npr.shuffle(data)
training, test = data[:SPLIT, :], data[SPLIT:, :]
training, test = data[:SPLIT,:], data[SPLIT:,:]
return training, test
@@ -79,6 +80,10 @@ class Model(object):
self.b1_v = np.zeros(self.b1.shape)
self.b2_v = np.zeros(self.b2.shape)
def L(self, x, y):
f_x = self.f(x)
return 0.5 * (f_x - y) * (f_x - y)
def z1(self, x):
return self.w1 * x + self.b1
@@ -92,8 +97,7 @@ class Model(object):
return self.f(x) - y
def dLdb2(self, x, y):
return self.dLdf(x, y)
return self.dLdf(x, y)
def dfda(self): # how f changes with ith element of a
return self.w2
@@ -112,7 +116,7 @@ class Model(object):
def dLdw2(self, x, y):
"""Compute dL/dw2 for an input x and expected output y"""
return self.dLdf(x, y) * self.a(x) #df/dw2
return self.dLdf(x, y) * self.a(x) # df/dw2
def dLdb1(self, x, y):
return self.dLdf(x, y) * np.dot(self.dfda(), self.dadz1(x))
@@ -142,6 +146,8 @@ class Model(object):
sample_input = sample[0]
sample_output = sample[1]
self.grad_checker(10e-4, sample_input, sample_output)
b2_grad += self.dLdb2(sample_input, sample_output)
w2_grad += self.dLdw2(sample_input, sample_output)
b1_grad += self.dLdb1(sample_input, sample_output)
@@ -159,27 +165,63 @@ class Model(object):
sample_input = sample[0]
sample_output = sample[1]
self.b2_v = alpha * self.b2_v + ETA * self.dLdb2(sample_input, sample_output)
self.w2_v = alpha * self.w2_v + ETA * self.dLdw2(sample_input, sample_output)
self.b1_v = alpha * self.b1_v + ETA * self.dLdb1(sample_input, sample_output)
self.w1_v = alpha * self.w1_v + ETA * self.dLdw1(sample_input, sample_output)
self.b2_v = alpha * self.b2_v + ETA * \
self.dLdb2(sample_input, sample_output)
self.w2_v = alpha * self.w2_v + ETA * \
self.dLdw2(sample_input, sample_output)
self.b1_v = alpha * self.b1_v + ETA * \
self.dLdb1(sample_input, sample_output)
self.w1_v = alpha * self.w1_v + ETA * \
self.dLdw1(sample_input, sample_output)
self.b2 -= self.b2_v
self.b1 -= self.b1_v
self.w2 -= self.w2_v
self.w1 -= self.w1_v
return
def grad_checker(self, eps, x, y):
# Check b2
# inc_model = copy.deepcopy(self)
# dec_model = copy.deepcopy(self)
# inc_model.b2 = self.b2 + eps
# dec_model.b2 = self.b2 - eps
# grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
# grad_actual = self.dLdb2(x, y)
# print "b2:", np.linalg.norm(grad_estimate - grad_actual)
# Check b1
# inc_model = copy.deepcopy(self)
# dec_model = copy.deepcopy(self)
# inc_model.b1 = self.b1 + eps
# dec_model.b1 = self.b1 - eps
# grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
# grad_actual = self.dLdb1(x, y)
# print "b1:", np.linalg.norm(grad_estimate - grad_actual)
# Check w2
inc_model = copy.deepcopy(self)
dec_model = copy.deepcopy(self)
inc_model.w2 = self.w2 + eps
dec_model.w2 = self.w2 - eps
grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
grad_actual = self.dLdw2(x, y)
print "w2:", np.linalg.norm(grad_estimate - grad_actual)
# Check w1
# inc_model = copy.deepcopy(self)
# dec_model = copy.deepcopy(self)
# inc_model.w1 = self.w1 + eps
# dec_model.w1 = self.w1 - eps
# grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
# grad_actual = self.dLdw1(x, y)
# print "w1:", np.linalg.norm(grad_estimate - grad_actual)
def evaluate(model, samples):
"""Report the loss function over the data"""
loss_acc = 0.0
"""Report the average loss function over the data"""
cost_acc = 0.0
for sample in samples:
guess = model.f(sample[0])
actual = sample[1]
loss_acc += L(guess, actual)
return loss_acc / len(samples)
cost_acc += model.L(sample[0], sample[1])
return cost_acc / len(samples)
TRAIN_DATA, TEST_DATA = dataset_get_sin()
# TRAIN_DATA, TEST_DATA = dataset_get_linear()
@@ -212,7 +254,7 @@ for training_iter in range(TRAINING_ITERS):
# Apply backprop with minibatch
BATCH_SIZE = 1
for i in range(0, len(training_subset), BATCH_SIZE):
batch = training_subset[i:min(i+BATCH_SIZE, len(training_subset))]
batch = training_subset[i:min(i + BATCH_SIZE, len(training_subset))]
# print batch
MODEL.backward_minibatch(batch, LEARNING_RATE)