Compare commits

...

10 Commits

Author SHA1 Message Date
Carl Pearson
84ede8312c Update readme 2017-02-20 06:07:08 -06:00
Carl Pearson
6dee452bd2 Print test cost for network.py 2017-02-20 06:04:03 -06:00
Carl Pearson
ca585d51af pylintrc -> .pylintrc, add numpy.random 2017-02-20 05:59:14 -06:00
Carl Pearson
7e408c5b27 Add pylintrc file 2017-02-20 05:55:26 -06:00
Carl Pearson
39e53619d3 Update readme 2017-02-20 05:54:09 -06:00
Carl Pearson
1eddee5ec7 . 2016-11-17 09:42:11 -06:00
Carl Pearson
7318b04c14 . 2016-11-17 09:16:53 -06:00
Carl Pearson
4a07ed1dd7 Gradient-checking code 2016-11-17 08:01:40 -06:00
Carl Pearson
3591a9421c Add notes 2016-11-16 17:44:20 -06:00
Carl Pearson
578774561c Add patience, save figure 2016-11-16 17:44:12 -06:00
5 changed files with 116 additions and 38 deletions

2
.pylintrc Normal file
View File

@@ -0,0 +1,2 @@
[TYPECHECK]
ignored-modules = numpy, numpy.random

View File

@@ -1 +1,19 @@
# ece408-backprop-demo
# ece408-backprop-demo
# Setup
Python 2.7
scikit-learn
numpy
Depending on your configuration, you could try
pip install --user scikit-learn numpy matplotlib
# Running
python network.py
# Other Stuff
`network2.py` has a different network implementation with a bug somewhere.

BIN
demo.odp Normal file

Binary file not shown.

View File

@@ -1,3 +1,4 @@
import copy
import numpy as np
import numpy.random as npr
import random
@@ -9,11 +10,11 @@ DATA_TYPE = np.float32
def dataset_get_sin():
NUM = 1000
RATIO = 0.5
NUM = 200
RATIO = 0.7
SPLIT = int(NUM * RATIO)
data = np.zeros((NUM, 2), DATA_TYPE)
data[:, 0] = np.linspace(0.0, 4 * np.pi, num=NUM) # inputs
data[:, 0] = np.linspace(0.0, 1 * np.pi, num=NUM) # inputs
data[:, 1] = np.sin(data[:, 0]) # outputs
npr.shuffle(data)
training, test = data[:SPLIT, :], data[SPLIT:, :]
@@ -21,7 +22,7 @@ def dataset_get_sin():
def dataset_get_linear():
NUM = 100
NUM = 1000
RATIO = 0.8
SPLIT = int(NUM * RATIO)
data = np.zeros((NUM, 2), DATA_TYPE)
@@ -62,8 +63,8 @@ class Model(object):
def __init__(self, layer_size, h, dh, data_type):
self.w1 = npr.uniform(0, 1, layer_size)
self.b1 = npr.uniform(0, 1, layer_size)
self.w2 = npr.uniform(0, 1, (1, layer_size))
self.b1 = npr.uniform(0, 1, layer_size)
self.b2 = npr.uniform(0, 1, 1)
# self.w1 = preprocessing.scale(self.w1)
@@ -79,6 +80,10 @@ class Model(object):
self.b1_v = np.zeros(self.b1.shape)
self.b2_v = np.zeros(self.b2.shape)
def L(self, x, y):
f_x = self.f(x)
return 0.5 * (f_x - y) * (f_x - y)
def z1(self, x):
return self.w1 * x + self.b1
@@ -92,8 +97,7 @@ class Model(object):
return self.f(x) - y
def dLdb2(self, x, y):
return self.dLdf(x, y)
return self.dLdf(x, y)
def dfda(self): # how f changes with ith element of a
return self.w2
@@ -112,7 +116,7 @@ class Model(object):
def dLdw2(self, x, y):
"""Compute dL/dw2 for an input x and expected output y"""
return self.dLdf(x, y) * self.a(x) #df/dw2
return self.dLdf(x, y) * np.sum(self.a(x)) # df/dw2
def dLdb1(self, x, y):
return self.dLdf(x, y) * np.dot(self.dfda(), self.dadz1(x))
@@ -142,6 +146,8 @@ class Model(object):
sample_input = sample[0]
sample_output = sample[1]
# self.grad_checker(10e-4, sample_input, sample_output)
b2_grad += self.dLdb2(sample_input, sample_output)
w2_grad += self.dLdw2(sample_input, sample_output)
b1_grad += self.dLdb1(sample_input, sample_output)
@@ -159,44 +165,87 @@ class Model(object):
sample_input = sample[0]
sample_output = sample[1]
self.b2_v = alpha * self.b2_v + ETA * self.dLdb2(sample_input, sample_output)
self.w2_v = alpha * self.w2_v + ETA * self.dLdw2(sample_input, sample_output)
self.b1_v = alpha * self.b1_v + ETA * self.dLdb1(sample_input, sample_output)
self.w1_v = alpha * self.w1_v + ETA * self.dLdw1(sample_input, sample_output)
self.b2_v = alpha * self.b2_v + ETA * \
self.dLdb2(sample_input, sample_output)
self.w2_v = alpha * self.w2_v + ETA * \
self.dLdw2(sample_input, sample_output)
self.b1_v = alpha * self.b1_v + ETA * \
self.dLdb1(sample_input, sample_output)
self.w1_v = alpha * self.w1_v + ETA * \
self.dLdw1(sample_input, sample_output)
self.b2 -= self.b2_v
self.b1 -= self.b1_v
self.w2 -= self.w2_v
self.w1 -= self.w1_v
return
def grad_checker(self, eps, x, y):
# Check b2
inc_model = copy.deepcopy(self)
dec_model = copy.deepcopy(self)
inc_model.b2 = self.b2 + eps
dec_model.b2 = self.b2 - eps
grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
grad_actual = self.dLdb2(x, y)
if np.linalg.norm(grad_estimate - grad_actual) > 10e-5:
print "b2"
# Check b1
inc_model = copy.deepcopy(self)
dec_model = copy.deepcopy(self)
inc_model.b1 = self.b1 + eps
dec_model.b1 = self.b1 - eps
grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
grad_actual = self.dLdb1(x, y)
if np.linalg.norm(grad_estimate - grad_actual) > 10e-5:
print "b1"
# Check w2
inc_model = copy.deepcopy(self)
dec_model = copy.deepcopy(self)
inc_model.w2 = self.w2 + eps
dec_model.w2 = self.w2 - eps
grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
grad_actual = self.dLdw2(x, y)
if np.linalg.norm(grad_estimate - grad_actual) > 10e-5:
print "w2"
# Check w1
inc_model = copy.deepcopy(self)
dec_model = copy.deepcopy(self)
inc_model.w1 = self.w1 + eps
dec_model.w1 = self.w1 - eps
grad_estimate = (inc_model.L(x, y) - dec_model.L(x, y)) / (2 * eps)
grad_actual = self.dLdw1(x, y)
if np.linalg.norm(grad_estimate - grad_actual) > 10e-5:
print "w1"
def evaluate(model, samples):
"""Report the loss function over the data"""
loss_acc = 0.0
"""Report the average loss function over the data"""
cost_acc = 0.0
for sample in samples:
guess = model.f(sample[0])
actual = sample[1]
loss_acc += L(guess, actual)
return loss_acc / len(samples)
cost_acc += model.L(sample[0], sample[1])
return cost_acc / len(samples)
TRAIN_DATA, TEST_DATA = dataset_get_sin()
# TRAIN_DATA, TEST_DATA = dataset_get_linear()
MODEL = Model(60, sigmoid, d_sigmoid, DATA_TYPE)
# MODEL = Model(10, relu, d_relu, DATA_TYPE)
MODEL = Model(8, sigmoid, d_sigmoid, DATA_TYPE)
# MODEL = Model(20, relu, d_relu, DATA_TYPE)
# Train the model with some training data
TRAINING_ITERS = 5000
LEARNING_RATE = 0.005
MAX_EPOCHS = 2000
TRAINING_SUBSET_SIZE = len(TRAIN_DATA)
PATIENCE = 200
print TRAINING_SUBSET_SIZE
print "Epoch\tTraining Cost Function\tTest Cost Function"
best_rate = np.inf
rates = [["iter", "training_rate", "test_rate"]]
for training_iter in range(TRAINING_ITERS):
best_model = None
for epoch in range(MAX_EPOCHS):
# Create a training sample
training_subset_indices = npr.choice(
range(len(TRAIN_DATA)), size=TRAINING_SUBSET_SIZE, replace=False)
@@ -207,38 +256,47 @@ for training_iter in range(TRAINING_ITERS):
# MODEL.backward(training_subset, LEARNING_RATE)
# Apply backpropagation
# MODEL.SGDm(training_subset, LEARNING_RATE)
# MODEL.SGDm(training_subset, 0.00004)
# Apply backprop with minibatch
BATCH_SIZE = 1
BATCH_SIZE = 4
LEARNING_RATE = 0.05
for i in range(0, len(training_subset), BATCH_SIZE):
batch = training_subset[i:min(i+BATCH_SIZE, len(training_subset))]
# print batch
batch = training_subset[i:min(i + BATCH_SIZE, len(training_subset))]
MODEL.backward_minibatch(batch, LEARNING_RATE)
# Evaluate accuracy against training data
# Evaluate accuracy against training data and test data
training_rate = evaluate(MODEL, training_subset)
test_rate = evaluate(MODEL, TEST_DATA)
rates += [[training_iter, training_rate, test_rate]]
print training_iter, "positive rates:", training_rate, test_rate,
print epoch, training_rate, test_rate,
# If it's the best one so far, store it
if training_rate < best_rate:
print "(new best)"
best_rate = training_rate
best_model = copy.deepcopy(MODEL)
patience = PATIENCE
else:
print ""
patience -= 1
print patience
TEST_OUTPUT = np.vectorize(MODEL.f)(TEST_DATA[:, 0])
TRAIN_OUTPUT = np.vectorize(MODEL.f)(TRAIN_DATA[:, 0])
if patience <= 0:
print PATIENCE, "iterations without improvement"
break
test_rate = evaluate(MODEL, TEST_DATA)
print "Test cost:", test_rate
TEST_OUTPUT = np.vectorize(best_model.f)(TEST_DATA[:, 0])
TRAIN_OUTPUT = np.vectorize(best_model.f)(TRAIN_DATA[:, 0])
scatter_train, = plt.plot(
TRAIN_DATA[:, 0], TRAIN_DATA[:, 1], 'ro', label="Real Data")
TRAIN_DATA[:, 0], TRAIN_DATA[:, 1], 'ro', markersize=2, label="Real Data")
scatter_train_out, = plt.plot(
TRAIN_DATA[:, 0], TRAIN_OUTPUT, 'go', label="Network output on training data")
scatter_test_out, = plt.plot(
TEST_DATA[:, 0], TEST_OUTPUT, 'bo', label="Network output on test data")
plt.legend(handles=[scatter_train, scatter_train_out, scatter_test_out])
plt.savefig("results.png", bbox_inches="tight")
plt.show()

View File

@@ -114,7 +114,7 @@ class Model(object):
def dLdb2(self, x, y):
return self.dLdf(x, y) * self.dfdb2(x)
def dz2dw2(self, x): # how z2 changes with a row of w2
def dz2dw2(self, x): # how z2 changes with a row of w2
return np.sum(self.a1(x))
def da2dw2(self, x):