Gradient-checking code
This commit is contained in:
64
network.py
64
network.py
@@ -1,3 +1,4 @@
|
|||||||
|
import copy
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import numpy.random as npr
|
import numpy.random as npr
|
||||||
import random
|
import random
|
||||||
@@ -79,6 +80,10 @@ class Model(object):
|
|||||||
self.b1_v = np.zeros(self.b1.shape)
|
self.b1_v = np.zeros(self.b1.shape)
|
||||||
self.b2_v = np.zeros(self.b2.shape)
|
self.b2_v = np.zeros(self.b2.shape)
|
||||||
|
|
||||||
|
def L(self, x, y):
|
||||||
|
f_x = self.f(x)
|
||||||
|
return 0.5 * (f_x - y) * (f_x - y)
|
||||||
|
|
||||||
def z1(self, x):
|
def z1(self, x):
|
||||||
return self.w1 * x + self.b1
|
return self.w1 * x + self.b1
|
||||||
|
|
||||||
@@ -94,7 +99,6 @@ class Model(object):
|
|||||||
def dLdb2(self, x, y):
|
def dLdb2(self, x, y):
|
||||||
return self.dLdf(x, y)
|
return self.dLdf(x, y)
|
||||||
|
|
||||||
|
|
||||||
def dfda(self): # how f changes with ith element of a
|
def dfda(self): # how f changes with ith element of a
|
||||||
return self.w2
|
return self.w2
|
||||||
|
|
||||||
@@ -142,6 +146,8 @@ class Model(object):
|
|||||||
sample_input = sample[0]
|
sample_input = sample[0]
|
||||||
sample_output = sample[1]
|
sample_output = sample[1]
|
||||||
|
|
||||||
|
self.grad_checker(10e-4, sample_input, sample_output)
|
||||||
|
|
||||||
b2_grad += self.dLdb2(sample_input, sample_output)
|
b2_grad += self.dLdb2(sample_input, sample_output)
|
||||||
w2_grad += self.dLdw2(sample_input, sample_output)
|
w2_grad += self.dLdw2(sample_input, sample_output)
|
||||||
b1_grad += self.dLdb1(sample_input, sample_output)
|
b1_grad += self.dLdb1(sample_input, sample_output)
|
||||||
@@ -159,27 +165,63 @@ class Model(object):
|
|||||||
sample_input = sample[0]
|
sample_input = sample[0]
|
||||||
sample_output = sample[1]
|
sample_output = sample[1]
|
||||||
|
|
||||||
self.b2_v = alpha * self.b2_v + ETA * self.dLdb2(sample_input, sample_output)
|
self.b2_v = alpha * self.b2_v + ETA * \
|
||||||
self.w2_v = alpha * self.w2_v + ETA * self.dLdw2(sample_input, sample_output)
|
self.dLdb2(sample_input, sample_output)
|
||||||
self.b1_v = alpha * self.b1_v + ETA * self.dLdb1(sample_input, sample_output)
|
self.w2_v = alpha * self.w2_v + ETA * \
|
||||||
self.w1_v = alpha * self.w1_v + ETA * self.dLdw1(sample_input, sample_output)
|
self.dLdw2(sample_input, sample_output)
|
||||||
|
self.b1_v = alpha * self.b1_v + ETA * \
|
||||||
|
self.dLdb1(sample_input, sample_output)
|
||||||
|
self.w1_v = alpha * self.w1_v + ETA * \
|
||||||
|
self.dLdw1(sample_input, sample_output)
|
||||||
self.b2 -= self.b2_v
|
self.b2 -= self.b2_v
|
||||||
self.b1 -= self.b1_v
|
self.b1 -= self.b1_v
|
||||||
self.w2 -= self.w2_v
|
self.w2 -= self.w2_v
|
||||||
self.w1 -= self.w1_v
|
self.w1 -= self.w1_v
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def grad_checker(self, eps, x, y):
|
||||||
|
# Check b2
|
||||||
|
# inc_model = copy.deepcopy(self)
|
||||||
|
# dec_model = copy.deepcopy(self)
|
||||||
|
# inc_model.b2 = self.b2 + eps
|
||||||
|
# dec_model.b2 = self.b2 - eps
|
||||||
|
# grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
|
||||||
|
# grad_actual = self.dLdb2(x, y)
|
||||||
|
# print "b2:", np.linalg.norm(grad_estimate - grad_actual)
|
||||||
|
|
||||||
|
# Check b1
|
||||||
|
# inc_model = copy.deepcopy(self)
|
||||||
|
# dec_model = copy.deepcopy(self)
|
||||||
|
# inc_model.b1 = self.b1 + eps
|
||||||
|
# dec_model.b1 = self.b1 - eps
|
||||||
|
# grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
|
||||||
|
# grad_actual = self.dLdb1(x, y)
|
||||||
|
# print "b1:", np.linalg.norm(grad_estimate - grad_actual)
|
||||||
|
|
||||||
|
# Check w2
|
||||||
|
inc_model = copy.deepcopy(self)
|
||||||
|
dec_model = copy.deepcopy(self)
|
||||||
|
inc_model.w2 = self.w2 + eps
|
||||||
|
dec_model.w2 = self.w2 - eps
|
||||||
|
grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
|
||||||
|
grad_actual = self.dLdw2(x, y)
|
||||||
|
print "w2:", np.linalg.norm(grad_estimate - grad_actual)
|
||||||
|
|
||||||
|
# Check w1
|
||||||
|
# inc_model = copy.deepcopy(self)
|
||||||
|
# dec_model = copy.deepcopy(self)
|
||||||
|
# inc_model.w1 = self.w1 + eps
|
||||||
|
# dec_model.w1 = self.w1 - eps
|
||||||
|
# grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
|
||||||
|
# grad_actual = self.dLdw1(x, y)
|
||||||
|
# print "w1:", np.linalg.norm(grad_estimate - grad_actual)
|
||||||
|
|
||||||
def evaluate(model, samples):
|
def evaluate(model, samples):
|
||||||
"""Report the loss function over the data"""
|
"""Report the average loss function over the data"""
|
||||||
loss_acc = 0.0
|
cost_acc = 0.0
|
||||||
for sample in samples:
|
for sample in samples:
|
||||||
guess = model.f(sample[0])
|
cost_acc += model.L(sample[0], sample[1])
|
||||||
actual = sample[1]
|
return cost_acc / len(samples)
|
||||||
loss_acc += L(guess, actual)
|
|
||||||
return loss_acc / len(samples)
|
|
||||||
|
|
||||||
TRAIN_DATA, TEST_DATA = dataset_get_sin()
|
TRAIN_DATA, TEST_DATA = dataset_get_sin()
|
||||||
# TRAIN_DATA, TEST_DATA = dataset_get_linear()
|
# TRAIN_DATA, TEST_DATA = dataset_get_linear()
|
||||||
|
|||||||
Reference in New Issue
Block a user