Update readme

Print test cost for network.py
pylintrc -> .pylintrc, add numpy.random
2017-02-20 06:07:08 -06:00 · 2017-02-20 06:04:03 -06:00 · 2017-02-20 05:59:14 -06:00 · 2017-02-20 05:55:26 -06:00 · 2017-02-20 05:54:09 -06:00 · 2016-11-17 09:42:11 -06:00
5 changed files with 116 additions and 38 deletions
--- a/.pylintrc
+++ b/.pylintrc
@@ -0,0 +1,2 @@
+[TYPECHECK]
+ignored-modules = numpy, numpy.random
--- a/README.md
+++ b/README.md
@@ -1 +1,19 @@
-# ece408-backprop-demo
+# ece408-backprop-demo
+
+# Setup
+
+    Python 2.7
+    scikit-learn
+    numpy
+
+Depending on your configuration, you could try
+
+    pip install --user scikit-learn numpy matplotlib
+
+# Running
+
+    python network.py
+
+# Other Stuff
+
+`network2.py` has a different network implementation with a bug somewhere.
--- a/demo.odp
+++ b/demo.odp
--- a/network.py
+++ b/network.py
@@ -1,3 +1,4 @@
+import copy
 import numpy as np
 import numpy.random as npr
 import random
@@ -9,11 +10,11 @@ DATA_TYPE = np.float32


 def dataset_get_sin():
-    NUM = 1000
-    RATIO = 0.5
+    NUM = 200
+    RATIO = 0.7
    SPLIT = int(NUM * RATIO)
    data = np.zeros((NUM, 2), DATA_TYPE)
-    data[:, 0] = np.linspace(0.0, 4 * np.pi, num=NUM)  # inputs
+    data[:, 0] = np.linspace(0.0, 1 * np.pi, num=NUM)  # inputs
    data[:, 1] = np.sin(data[:, 0])  # outputs
    npr.shuffle(data)
    training, test = data[:SPLIT, :], data[SPLIT:, :]
@@ -21,7 +22,7 @@ def dataset_get_sin():


 def dataset_get_linear():
-    NUM = 100
+    NUM = 1000
    RATIO = 0.8
    SPLIT = int(NUM * RATIO)
    data = np.zeros((NUM, 2), DATA_TYPE)
@@ -62,8 +63,8 @@ class Model(object):

    def __init__(self, layer_size, h, dh, data_type):
        self.w1 = npr.uniform(0, 1, layer_size)
-        self.b1 = npr.uniform(0, 1, layer_size)
        self.w2 = npr.uniform(0, 1, (1, layer_size))
+        self.b1 = npr.uniform(0, 1, layer_size)
        self.b2 = npr.uniform(0, 1, 1)

        # self.w1 = preprocessing.scale(self.w1)
@@ -79,6 +80,10 @@ class Model(object):
        self.b1_v = np.zeros(self.b1.shape)
        self.b2_v = np.zeros(self.b2.shape)

+    def L(self, x, y):
+        f_x = self.f(x)
+        return 0.5 * (f_x - y) * (f_x - y)
+
    def z1(self, x):
        return self.w1 * x + self.b1

@@ -92,8 +97,7 @@ class Model(object):
        return self.f(x) - y

    def dLdb2(self, x, y):
-        return self.dLdf(x, y) 
-
+        return self.dLdf(x, y)

    def dfda(self):  # how f changes with ith element of a
        return self.w2
@@ -112,7 +116,7 @@ class Model(object):

    def dLdw2(self, x, y):
        """Compute dL/dw2 for an input x and expected output y"""
-        return self.dLdf(x, y) * self.a(x) #df/dw2
+        return self.dLdf(x, y) * np.sum(self.a(x))  # df/dw2

    def dLdb1(self, x, y):
        return self.dLdf(x, y) * np.dot(self.dfda(), self.dadz1(x))
@@ -142,6 +146,8 @@ class Model(object):
            sample_input = sample[0]
            sample_output = sample[1]

+            # self.grad_checker(10e-4, sample_input, sample_output)
+
            b2_grad += self.dLdb2(sample_input, sample_output)
            w2_grad += self.dLdw2(sample_input, sample_output)
            b1_grad += self.dLdb1(sample_input, sample_output)
@@ -159,44 +165,87 @@ class Model(object):
            sample_input = sample[0]
            sample_output = sample[1]

-            self.b2_v = alpha * self.b2_v + ETA * self.dLdb2(sample_input, sample_output)
-            self.w2_v = alpha * self.w2_v + ETA * self.dLdw2(sample_input, sample_output)
-            self.b1_v = alpha * self.b1_v + ETA * self.dLdb1(sample_input, sample_output)
-            self.w1_v = alpha * self.w1_v + ETA * self.dLdw1(sample_input, sample_output)
+            self.b2_v = alpha * self.b2_v + ETA * \
+                self.dLdb2(sample_input, sample_output)
+            self.w2_v = alpha * self.w2_v + ETA * \
+                self.dLdw2(sample_input, sample_output)
+            self.b1_v = alpha * self.b1_v + ETA * \
+                self.dLdb1(sample_input, sample_output)
+            self.w1_v = alpha * self.w1_v + ETA * \
+                self.dLdw1(sample_input, sample_output)
            self.b2 -= self.b2_v
            self.b1 -= self.b1_v
            self.w2 -= self.w2_v
            self.w1 -= self.w1_v
        return

+    def grad_checker(self, eps, x, y):
+        # Check b2
+        inc_model = copy.deepcopy(self)
+        dec_model = copy.deepcopy(self)
+        inc_model.b2 = self.b2 + eps
+        dec_model.b2 = self.b2 - eps
+        grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
+        grad_actual = self.dLdb2(x, y)
+        if np.linalg.norm(grad_estimate - grad_actual) > 10e-5:
+            print "b2"

+        # Check b1
+        inc_model = copy.deepcopy(self)
+        dec_model = copy.deepcopy(self)
+        inc_model.b1 = self.b1 + eps
+        dec_model.b1 = self.b1 - eps
+        grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
+        grad_actual = self.dLdb1(x, y)
+        if np.linalg.norm(grad_estimate - grad_actual) > 10e-5:
+            print "b1"
+
+        # Check w2
+        inc_model = copy.deepcopy(self)
+        dec_model = copy.deepcopy(self)
+        inc_model.w2 = self.w2 + eps
+        dec_model.w2 = self.w2 - eps
+        grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
+        grad_actual = self.dLdw2(x, y)
+        if np.linalg.norm(grad_estimate - grad_actual) > 10e-5:
+            print "w2"
+
+        # Check w1
+        inc_model = copy.deepcopy(self)
+        dec_model = copy.deepcopy(self)
+        inc_model.w1 = self.w1 + eps
+        dec_model.w1 = self.w1 - eps
+        grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
+        grad_actual = self.dLdw1(x, y)
+        if np.linalg.norm(grad_estimate - grad_actual) > 10e-5:
+            print "w1"


 def evaluate(model, samples):
-    """Report the loss function over the data"""
-    loss_acc = 0.0
+    """Report the average loss function over the data"""
+    cost_acc = 0.0
    for sample in samples:
-        guess = model.f(sample[0])
-        actual = sample[1]
-        loss_acc += L(guess, actual)
-    return loss_acc / len(samples)
+        cost_acc += model.L(sample[0], sample[1])
+    return cost_acc / len(samples)

 TRAIN_DATA, TEST_DATA = dataset_get_sin()
 # TRAIN_DATA, TEST_DATA = dataset_get_linear()

-MODEL = Model(60, sigmoid, d_sigmoid, DATA_TYPE)
-# MODEL = Model(10, relu, d_relu, DATA_TYPE)
+MODEL = Model(8, sigmoid, d_sigmoid, DATA_TYPE)
+# MODEL = Model(20, relu, d_relu, DATA_TYPE)

 # Train the model with some training data
-TRAINING_ITERS = 5000
-LEARNING_RATE = 0.005
+MAX_EPOCHS = 2000
 TRAINING_SUBSET_SIZE = len(TRAIN_DATA)
+PATIENCE = 200

 print TRAINING_SUBSET_SIZE

+print "Epoch\tTraining Cost Function\tTest Cost Function"
+
 best_rate = np.inf
-rates = [["iter", "training_rate", "test_rate"]]
-for training_iter in range(TRAINING_ITERS):
+best_model = None
+for epoch in range(MAX_EPOCHS):
    # Create a training sample
    training_subset_indices = npr.choice(
        range(len(TRAIN_DATA)), size=TRAINING_SUBSET_SIZE, replace=False)
@@ -207,38 +256,47 @@ for training_iter in range(TRAINING_ITERS):
    # MODEL.backward(training_subset, LEARNING_RATE)

    # Apply backpropagation
-    # MODEL.SGDm(training_subset, LEARNING_RATE)
+    # MODEL.SGDm(training_subset, 0.00004)

    # Apply backprop with minibatch
-    BATCH_SIZE = 1
+    BATCH_SIZE = 4
+    LEARNING_RATE = 0.05
    for i in range(0, len(training_subset), BATCH_SIZE):
-        batch = training_subset[i:min(i+BATCH_SIZE, len(training_subset))]
-        # print batch
+        batch = training_subset[i:min(i + BATCH_SIZE, len(training_subset))]
        MODEL.backward_minibatch(batch, LEARNING_RATE)

-    # Evaluate accuracy against training data
+    # Evaluate accuracy against training data and test data
    training_rate = evaluate(MODEL, training_subset)
    test_rate = evaluate(MODEL, TEST_DATA)
-    rates += [[training_iter, training_rate, test_rate]]

-    print training_iter, "positive rates:", training_rate, test_rate,
+    print epoch, training_rate, test_rate,

    # If it's the best one so far, store it
    if training_rate < best_rate:
        print "(new best)"
        best_rate = training_rate
+        best_model = copy.deepcopy(MODEL)
+        patience = PATIENCE
    else:
-        print ""
+        patience -= 1
+        print patience

-TEST_OUTPUT = np.vectorize(MODEL.f)(TEST_DATA[:, 0])
-TRAIN_OUTPUT = np.vectorize(MODEL.f)(TRAIN_DATA[:, 0])
+    if patience <= 0:
+        print PATIENCE, "iterations without improvement"
+        break
+
+test_rate = evaluate(MODEL, TEST_DATA)
+print "Test cost:", test_rate
+
+TEST_OUTPUT = np.vectorize(best_model.f)(TEST_DATA[:, 0])
+TRAIN_OUTPUT = np.vectorize(best_model.f)(TRAIN_DATA[:, 0])

 scatter_train, = plt.plot(
-    TRAIN_DATA[:, 0], TRAIN_DATA[:, 1], 'ro', label="Real Data")
+    TRAIN_DATA[:, 0], TRAIN_DATA[:, 1], 'ro', markersize=2, label="Real Data")
 scatter_train_out, = plt.plot(
    TRAIN_DATA[:, 0], TRAIN_OUTPUT, 'go', label="Network output on training data")
 scatter_test_out, = plt.plot(
    TEST_DATA[:, 0], TEST_OUTPUT, 'bo', label="Network output on test data")
 plt.legend(handles=[scatter_train, scatter_train_out, scatter_test_out])
-
+plt.savefig("results.png", bbox_inches="tight")
 plt.show()
--- a/network2.py
+++ b/network2.py
@@ -114,7 +114,7 @@ class Model(object):
    def dLdb2(self, x, y):
        return self.dLdf(x, y) * self.dfdb2(x)

-    def dz2dw2(self, x): # how z2 changes with a row of w2
+    def dz2dw2(self, x):  # how z2 changes with a row of w2
        return np.sum(self.a1(x))

    def da2dw2(self, x):
Author	SHA1	Message	Date
Carl Pearson	84ede8312c	Update readme	2017-02-20 06:07:08 -06:00
Carl Pearson	6dee452bd2	Print test cost for network.py	2017-02-20 06:04:03 -06:00
Carl Pearson	ca585d51af	pylintrc -> .pylintrc, add numpy.random	2017-02-20 05:59:14 -06:00
Carl Pearson	7e408c5b27	Add pylintrc file	2017-02-20 05:55:26 -06:00
Carl Pearson	39e53619d3	Update readme	2017-02-20 05:54:09 -06:00
Carl Pearson	1eddee5ec7	.	2016-11-17 09:42:11 -06:00
Carl Pearson	7318b04c14	.	2016-11-17 09:16:53 -06:00
Carl Pearson	4a07ed1dd7	Gradient-checking code	2016-11-17 08:01:40 -06:00
Carl Pearson	3591a9421c	Add notes	2016-11-16 17:44:20 -06:00
Carl Pearson	578774561c	Add patience, save figure	2016-11-16 17:44:12 -06:00