Gradient-checking code

2016-11-17 08:01:40 -06:00
parent 3591a9421c
commit 4a07ed1dd7
1 changed files with 58 additions and 16 deletions
--- a/network.py
+++ b/network.py
@@ -1,3 +1,4 @@
 import copy
 import numpy as np
 import numpy.random as npr
 import random
@@ -79,6 +80,10 @@ class Model(object):
        self.b1_v = np.zeros(self.b1.shape)
        self.b2_v = np.zeros(self.b2.shape)
    def L(self, x, y):
        f_x = self.f(x)
        return 0.5 * (f_x - y) * (f_x - y)
    def z1(self, x):
        return self.w1 * x + self.b1
@@ -94,7 +99,6 @@ class Model(object):
    def dLdb2(self, x, y):
        return self.dLdf(x, y)
    def dfda(self):  # how f changes with ith element of a
        return self.w2
@@ -142,6 +146,8 @@ class Model(object):
            sample_input = sample[0]
            sample_output = sample[1]
            self.grad_checker(10e-4, sample_input, sample_output)
            b2_grad += self.dLdb2(sample_input, sample_output)
            w2_grad += self.dLdw2(sample_input, sample_output)
            b1_grad += self.dLdb1(sample_input, sample_output)
@@ -159,27 +165,63 @@ class Model(object):
            sample_input = sample[0]
            sample_output = sample[1]
-            self.b2_v = alpha * self.b2_v + ETA * self.dLdb2(sample_input, sample_output)
+            self.b2_v = alpha * self.b2_v + ETA * \
-            self.w2_v = alpha * self.w2_v + ETA * self.dLdw2(sample_input, sample_output)
+                self.dLdb2(sample_input, sample_output)
-            self.b1_v = alpha * self.b1_v + ETA * self.dLdb1(sample_input, sample_output)
+            self.w2_v = alpha * self.w2_v + ETA * \
-            self.w1_v = alpha * self.w1_v + ETA * self.dLdw1(sample_input, sample_output)
+                self.dLdw2(sample_input, sample_output)
            self.b1_v = alpha * self.b1_v + ETA * \
                self.dLdb1(sample_input, sample_output)
            self.w1_v = alpha * self.w1_v + ETA * \
                self.dLdw1(sample_input, sample_output)
            self.b2 -= self.b2_v
            self.b1 -= self.b1_v
            self.w2 -= self.w2_v
            self.w1 -= self.w1_v
        return
    def grad_checker(self, eps, x, y):
        # Check b2
        # inc_model = copy.deepcopy(self)
        # dec_model = copy.deepcopy(self)
        # inc_model.b2 = self.b2 + eps
        # dec_model.b2 = self.b2 - eps
        # grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
        # grad_actual = self.dLdb2(x, y)
        # print "b2:", np.linalg.norm(grad_estimate - grad_actual)
        # Check b1
        # inc_model = copy.deepcopy(self)
        # dec_model = copy.deepcopy(self)
        # inc_model.b1 = self.b1 + eps
        # dec_model.b1 = self.b1 - eps
        # grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
        # grad_actual = self.dLdb1(x, y)
        # print "b1:", np.linalg.norm(grad_estimate - grad_actual)
        # Check w2
        inc_model = copy.deepcopy(self)
        dec_model = copy.deepcopy(self)
        inc_model.w2 = self.w2 + eps
        dec_model.w2 = self.w2 - eps
        grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
        grad_actual = self.dLdw2(x, y)
        print "w2:", np.linalg.norm(grad_estimate - grad_actual)
        # Check w1
        # inc_model = copy.deepcopy(self)
        # dec_model = copy.deepcopy(self)
        # inc_model.w1 = self.w1 + eps
        # dec_model.w1 = self.w1 - eps
        # grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
        # grad_actual = self.dLdw1(x, y)
        # print "w1:", np.linalg.norm(grad_estimate - grad_actual)
 def evaluate(model, samples):
-    """Report the loss function over the data"""
+    """Report the average loss function over the data"""
-    loss_acc = 0.0
+    cost_acc = 0.0
    for sample in samples:
-        guess = model.f(sample[0])
+        cost_acc += model.L(sample[0], sample[1])
-        actual = sample[1]
+    return cost_acc / len(samples)
        loss_acc += L(guess, actual)
    return loss_acc / len(samples)
 TRAIN_DATA, TEST_DATA = dataset_get_sin()
 # TRAIN_DATA, TEST_DATA = dataset_get_linear()