From 532064b6ba1d943f0ec49d26f623e07b9c829c57 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 16 Nov 2016 09:03:15 -0600 Subject: [PATCH] Single-layer network --- .vscode/settings.json | 5 + network.py | 213 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 218 insertions(+) create mode 100644 .vscode/settings.json create mode 100644 network.py diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..63cc31a --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +// Place your settings in this file to overwrite default and user settings. +{ + "python.formatting.provider": "autopep8", + "python.formatting.formatOnSave": true +} \ No newline at end of file diff --git a/network.py b/network.py new file mode 100644 index 0000000..082d9e8 --- /dev/null +++ b/network.py @@ -0,0 +1,213 @@ +import numpy as np +import numpy.random as npr +import random + +import matplotlib.pyplot as plt + +DATA_TYPE = np.float32 + + +def dataset_get_sin(): + NUM = 100 + RATIO = 0.8 + SPLIT = int(NUM * RATIO) + data = np.zeros((NUM, 2), DATA_TYPE) + data[:, 0] = np.linspace(0.0, 4.0 * np.pi, num=NUM) # inputs + data[:, 1] = np.sin(data[:, 0]) # outputs + npr.shuffle(data) + training, test = data[:SPLIT, :], data[SPLIT:, :] + return training, test + + +def dataset_get_linear(): + NUM = 100 + RATIO = 0.8 + SPLIT = int(NUM * RATIO) + data = np.zeros((NUM, 2), DATA_TYPE) + data[:, 0] = np.linspace(0.0, 4.0 * np.pi, num=NUM) # inputs + data[:, 1] = 2 * data[:, 0] # outputs + npr.shuffle(data) + training, test = data[:SPLIT, :], data[SPLIT:, :] + return training, test + + +def relu(x): + """Apply a rectified linear until to x""" + return np.maximum(x, 0, x) + + +def d_relu(x): + res = x + res[res < 0] = 0 + res[res >= 0] = 1 + return res + + +def sigmoid(vec): + """Apply sigmoid to vec""" + return 1.0 / (1.0 + np.exp(-1 * vec)) + + +def d_sigmoid(vec): + s = sigmoid(vec) + return s * (1 - s) + + +def L(x, y): + return (x - y) * (x - y) + + +class Model(object): + + def __init__(self, layer_size, data_type): + self.w1 = npr.rand(layer_size).astype(data_type) + self.b1 = npr.rand(layer_size).astype(data_type) + self.w2 = npr.rand(1, layer_size).astype(data_type) + self.b2 = npr.rand(1).astype(data_type) + + self.w1 /= np.sum(self.w1) + self.w2 /= np.sum(self.w2) + self.b1 /= np.sum(self.b1) + self.b2 /= np.sum(self.b2) + + def h(self, vec): + return relu(vec) + + def dh(self, vec): + return d_relu(vec) + + def Z1(self, x): + """Apply the first linear layer to an input x""" + return self.w1 * x + self.b1 + + def A(self, x): + """Compute A for an input x""" + return self.h(self.Z1(x)) + + def Z2(self, x): + """Compute Z2 for an input x""" + return self.w2.dot(self.A(x)) + self.b2 + + def forward(self, x): + """Evaluate the model on an input x""" + return self.Z2(x) + + def dLdf(self, x, y): + """Compute dL/df for an input x""" + return 2.0 * (self.forward(x) - y) + + def dfdb2(self): + return 1.0 + + def dLdb2(self, x, y): + """Evaluate dL/db2 for an input x and expected output y""" + return self.dLdf(x, y) * self.dfdb2() + + def dfdw2(self, x): + """Evaluate df/dw2 using an input sample x""" + return self.A(x) + + def dfda(self): + return np.sum(self.w2) + + def dadz(self, x): + """Compute da/dz1 for an input x""" + return self.dh(self.Z1(x)) + + def dLdz(self, x, y): + """Compute dL/dz1 for an input x and expected output y""" + return self.dLdf(x, y) * self.dfda() * self.dadz(x) + + def dzdw1(self, x): + return x + + def dLdw1(self, x, y): + """Compute dL/dw1 for an input x and expected output y""" + return self.dLdz(x, y) * self.dzdw1(x) + + def dLdw2(self, x, y): + """Compute dL/dw2 for an input x and expected output y""" + return self.dLdf(x, y) * self.dfdw2(x) + + def dzdb1(self): + return 1.0 + + def dLdb1(self, x, y): + return self.dLdz(x, y) * self.dzdb1() + + def backward(self, training_samples, ETA): + """Do backpropagation with stochastic gradient descent on the model using training_samples""" + for sample in training_samples: + sample_input = sample[0] + sample_output = sample[1] + + b2_grad = self.dLdb2(sample_input, sample_output) + w2_grad = self.dLdw2(sample_input, sample_output) + b1_grad = self.dLdb1(sample_input, sample_output) + w1_grad = self.dLdw1(sample_input, sample_output) + self.b2 -= ETA * b2_grad + self.b1 -= ETA * b1_grad + self.w2 -= ETA * w2_grad + self.w1 -= ETA * w1_grad + return + + +def evaluate(model, samples): + """Report the loss function over the data""" + loss_acc = 0.0 + for sample in samples: + guess = model.forward(sample[0]) + actual = sample[1] + loss_acc += L(guess, actual) + return loss_acc / len(samples) + +# TRAIN_DATA, TEST_DATA = dataset_get_sin() +TRAIN_DATA, TEST_DATA = dataset_get_linear() + +MODEL = Model(10, DATA_TYPE) + +# Train the model with some training data +TRAINING_ITERS = 100 +LEARNING_RATE = 0.001 +TRAINING_SUBSET_SIZE = len(TRAIN_DATA) + +print TRAINING_SUBSET_SIZE + +best_rate = np.inf +rates = [["iter", "training_rate", "test_rate"]] +for training_iter in range(TRAINING_ITERS): + # Create a training sample + training_subset_indices = npr.choice( + range(len(TRAIN_DATA)), size=TRAINING_SUBSET_SIZE, replace=False) + training_subset = [TRAIN_DATA[i] for i in training_subset_indices] + random.shuffle(training_subset) + + # Apply backpropagation + MODEL.backward(training_subset, LEARNING_RATE) + + # Evaluate accuracy against training data + training_rate = evaluate(MODEL, training_subset) + test_rate = evaluate(MODEL, TEST_DATA) + rates += [[training_iter, training_rate, test_rate]] + + print training_iter, "positive rates:", training_rate, test_rate, + + # If it's the best one so far, store it + if training_rate < best_rate: + print "(new best)" + best_rate = training_rate + else: + print "" + +TEST_OUTPUT = np.vectorize(MODEL.forward)(TEST_DATA[:, 0]) +TRAIN_OUTPUT = np.vectorize(MODEL.forward)(TRAIN_DATA[:, 0]) + +scatter_train, = plt.plot( + TRAIN_DATA[:, 0], TRAIN_DATA[:, 1], 'ro', label="Training data") +scatter_train_out, = plt.plot( + TRAIN_DATA[:, 0], TRAIN_OUTPUT, 'go', label="Training output") +scatter_test_out, = plt.plot( + TEST_DATA[:, 0], TEST_OUTPUT, 'bo', label="Test output") +plt.legend(handles=[scatter_train, scatter_train_out, scatter_test_out]) + +plt.show()