This commit is contained in:
Carl Pearson
2016-11-16 14:02:16 -06:00
parent 7bc3ed028f
commit 23bf172b96
2 changed files with 54 additions and 23 deletions

View File

@@ -1,6 +1,7 @@
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
import random import random
from sklearn import preprocessing
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
@@ -9,7 +10,7 @@ DATA_TYPE = np.float32
def dataset_get_sin(): def dataset_get_sin():
NUM = 1000 NUM = 1000
RATIO = 0.8 RATIO = 0.5
SPLIT = int(NUM * RATIO) SPLIT = int(NUM * RATIO)
data = np.zeros((NUM, 2), DATA_TYPE) data = np.zeros((NUM, 2), DATA_TYPE)
data[:, 0] = np.linspace(0.0, 2 * np.pi, num=NUM) # inputs data[:, 0] = np.linspace(0.0, 2 * np.pi, num=NUM) # inputs
@@ -39,7 +40,7 @@ def relu(x):
def d_relu(x): def d_relu(x):
res = x res = x
res[res >= 0] = 1 res[res >= 0] = 1
res[res < 0] = 0 res[res < 0] = 0.01
return res return res
@@ -60,15 +61,15 @@ def L(x, y):
class Model(object): class Model(object):
def __init__(self, layer_size, h, dh, data_type): def __init__(self, layer_size, h, dh, data_type):
self.w1 = npr.rand(layer_size).astype(data_type) self.w1 = npr.uniform(-1, 1, layer_size)
self.b1 = npr.rand(layer_size).astype(data_type) self.b1 = npr.uniform(-1, 1, layer_size)
self.w2 = npr.rand(1, layer_size).astype(data_type) self.w2 = npr.uniform(-1, 1, (1, layer_size))
self.b2 = npr.rand(1).astype(data_type) self.b2 = npr.uniform(-1, 1, 1)
self.w1 /= np.sum(self.w1) self.w1 = preprocessing.scale(self.w1)
self.w2 /= np.sum(self.w2) self.w2 = preprocessing.scale(self.w2)
self.b1 /= np.sum(self.b1) self.b1 = preprocessing.scale(self.b1)
self.b2 /= np.sum(self.b2) self.b2 = preprocessing.scale(self.b2)
self.h = h self.h = h
self.dh = dh self.dh = dh
@@ -126,7 +127,6 @@ class Model(object):
return self.dLdf(x, y) * np.sum(self.dfda() * self.dadz1(x) * self.dz1db1()) return self.dLdf(x, y) * np.sum(self.dfda() * self.dadz1(x) * self.dz1db1())
def backward(self, training_samples, ETA): def backward(self, training_samples, ETA):
"""Do backpropagation with stochastic gradient descent on the model using training_samples"""
for sample in training_samples: for sample in training_samples:
sample_input = sample[0] sample_input = sample[0]
sample_output = sample[1] sample_output = sample[1]
@@ -139,6 +139,29 @@ class Model(object):
self.b1 -= ETA * b1_grad self.b1 -= ETA * b1_grad
self.w2 -= ETA * w2_grad self.w2 -= ETA * w2_grad
self.w1 -= ETA * w1_grad self.w1 -= ETA * w1_grad
return
def backward_minibatch(self, batch, ETA):
b2_grad = np.zeros(self.b2.shape)
b1_grad = np.zeros(self.b1.shape)
w2_grad = np.zeros(self.w2.shape)
w1_grad = np.zeros(self.w1.shape)
for sample in batch:
sample_input = sample[0]
sample_output = sample[1]
b2_grad += self.dLdb2(sample_input, sample_output)
w2_grad += self.dLdw2(sample_input, sample_output)
b1_grad += self.dLdb1(sample_input, sample_output)
w1_grad += self.dLdw1(sample_input, sample_output)
self.b2 -= ETA * b2_grad / len(batch)
self.b1 -= ETA * b1_grad / len(batch)
self.w2 -= ETA * w2_grad / len(batch)
self.w1 -= ETA * w1_grad / len(batch)
return return
@@ -154,12 +177,12 @@ def evaluate(model, samples):
TRAIN_DATA, TEST_DATA = dataset_get_sin() TRAIN_DATA, TEST_DATA = dataset_get_sin()
# TRAIN_DATA, TEST_DATA = dataset_get_linear() # TRAIN_DATA, TEST_DATA = dataset_get_linear()
MODEL = Model(6, sigmoid, d_sigmoid, DATA_TYPE) MODEL = Model(10, sigmoid, d_sigmoid, DATA_TYPE)
# MODEL = Model(10, relu, d_relu, DATA_TYPE) # MODEL = Model(10, relu, d_relu, DATA_TYPE)
# Train the model with some training data # Train the model with some training data
TRAINING_ITERS = 500 TRAINING_ITERS = 1000
LEARNING_RATE = 0.006 LEARNING_RATE = 0.005
TRAINING_SUBSET_SIZE = len(TRAIN_DATA) TRAINING_SUBSET_SIZE = len(TRAIN_DATA)
print TRAINING_SUBSET_SIZE print TRAINING_SUBSET_SIZE
@@ -174,7 +197,14 @@ for training_iter in range(TRAINING_ITERS):
random.shuffle(training_subset) random.shuffle(training_subset)
# Apply backpropagation # Apply backpropagation
MODEL.backward(training_subset, LEARNING_RATE) # MODEL.backward(training_subset, LEARNING_RATE)
# Apply backprop with minibatch
BATCH_SIZE = 2
for i in range(0, len(training_subset), BATCH_SIZE):
batch = training_subset[i:min(i+BATCH_SIZE, len(training_subset))]
# print batch
MODEL.backward_minibatch(batch, LEARNING_RATE)
# Evaluate accuracy against training data # Evaluate accuracy against training data
training_rate = evaluate(MODEL, training_subset) training_rate = evaluate(MODEL, training_subset)
@@ -194,11 +224,11 @@ TEST_OUTPUT = np.vectorize(MODEL.f)(TEST_DATA[:, 0])
TRAIN_OUTPUT = np.vectorize(MODEL.f)(TRAIN_DATA[:, 0]) TRAIN_OUTPUT = np.vectorize(MODEL.f)(TRAIN_DATA[:, 0])
scatter_train, = plt.plot( scatter_train, = plt.plot(
TRAIN_DATA[:, 0], TRAIN_DATA[:, 1], 'ro', label="Training data") TRAIN_DATA[:, 0], TRAIN_DATA[:, 1], 'ro', label="Real Data")
scatter_train_out, = plt.plot( scatter_train_out, = plt.plot(
TRAIN_DATA[:, 0], TRAIN_OUTPUT, 'go', label="Training output") TRAIN_DATA[:, 0], TRAIN_OUTPUT, 'go', label="Network output on training data")
scatter_test_out, = plt.plot( scatter_test_out, = plt.plot(
TEST_DATA[:, 0], TEST_OUTPUT, 'bo', label="Test output") TEST_DATA[:, 0], TEST_OUTPUT, 'bo', label="Network output on test data")
plt.legend(handles=[scatter_train, scatter_train_out, scatter_test_out]) plt.legend(handles=[scatter_train, scatter_train_out, scatter_test_out])
plt.show() plt.show()

View File

@@ -98,7 +98,7 @@ class Model(object):
return 2.0 * (self.f(x) - y) return 2.0 * (self.f(x) - y)
def dLdb3(self, x, y): def dLdb3(self, x, y):
return self.dLdf(x, y) * np.ones(self.b3.shape) return self.dLdf(x, y)
def dLdw3(self, x, y): def dLdw3(self, x, y):
return self.dLdf(x, y) * np.sum(self.a2(x)) return self.dLdf(x, y) * np.sum(self.a2(x))
@@ -114,17 +114,18 @@ class Model(object):
def dLdb2(self, x, y): def dLdb2(self, x, y):
return self.dLdf(x, y) * self.dfdb2(x) return self.dLdf(x, y) * self.dfdb2(x)
def dz2dw2(self, x): def dz2dw2(self, x): # how z2 changes with a row of w2
return np.sum(self.a2(x)) return np.sum(self.a1(x))
def da2dw2(self, x): def da2dw2(self, x):
return self.dh(self.z2(x)) * self.dz2dw2(x) return self.dh(self.z2(x)) * self.dz2dw2(x)
def dfdw2(self, x): def dfdw2(self, x):
return np.dot(self.w3, self.da2dw2(x)) # print self.dfdz2(x).shape
return np.dot(self.dfdz2(x), self.dz2dw2(x))
def dLdw2(self, x, y): def dLdw2(self, x, y):
return self.dLdf(x, y) * self.dfdw2(x) return self.dLdf(x, y) * np.sum(self.dfdw2(x))
# First layer updates # First layer updates