.
This commit is contained in:
66
network.py
66
network.py
@@ -1,6 +1,7 @@
|
||||
import numpy as np
|
||||
import numpy.random as npr
|
||||
import random
|
||||
from sklearn import preprocessing
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
@@ -9,7 +10,7 @@ DATA_TYPE = np.float32
|
||||
|
||||
def dataset_get_sin():
|
||||
NUM = 1000
|
||||
RATIO = 0.8
|
||||
RATIO = 0.5
|
||||
SPLIT = int(NUM * RATIO)
|
||||
data = np.zeros((NUM, 2), DATA_TYPE)
|
||||
data[:, 0] = np.linspace(0.0, 2 * np.pi, num=NUM) # inputs
|
||||
@@ -39,7 +40,7 @@ def relu(x):
|
||||
def d_relu(x):
|
||||
res = x
|
||||
res[res >= 0] = 1
|
||||
res[res < 0] = 0
|
||||
res[res < 0] = 0.01
|
||||
return res
|
||||
|
||||
|
||||
@@ -60,15 +61,15 @@ def L(x, y):
|
||||
class Model(object):
|
||||
|
||||
def __init__(self, layer_size, h, dh, data_type):
|
||||
self.w1 = npr.rand(layer_size).astype(data_type)
|
||||
self.b1 = npr.rand(layer_size).astype(data_type)
|
||||
self.w2 = npr.rand(1, layer_size).astype(data_type)
|
||||
self.b2 = npr.rand(1).astype(data_type)
|
||||
self.w1 = npr.uniform(-1, 1, layer_size)
|
||||
self.b1 = npr.uniform(-1, 1, layer_size)
|
||||
self.w2 = npr.uniform(-1, 1, (1, layer_size))
|
||||
self.b2 = npr.uniform(-1, 1, 1)
|
||||
|
||||
self.w1 /= np.sum(self.w1)
|
||||
self.w2 /= np.sum(self.w2)
|
||||
self.b1 /= np.sum(self.b1)
|
||||
self.b2 /= np.sum(self.b2)
|
||||
self.w1 = preprocessing.scale(self.w1)
|
||||
self.w2 = preprocessing.scale(self.w2)
|
||||
self.b1 = preprocessing.scale(self.b1)
|
||||
self.b2 = preprocessing.scale(self.b2)
|
||||
|
||||
self.h = h
|
||||
self.dh = dh
|
||||
@@ -126,7 +127,6 @@ class Model(object):
|
||||
return self.dLdf(x, y) * np.sum(self.dfda() * self.dadz1(x) * self.dz1db1())
|
||||
|
||||
def backward(self, training_samples, ETA):
|
||||
"""Do backpropagation with stochastic gradient descent on the model using training_samples"""
|
||||
for sample in training_samples:
|
||||
sample_input = sample[0]
|
||||
sample_output = sample[1]
|
||||
@@ -139,6 +139,29 @@ class Model(object):
|
||||
self.b1 -= ETA * b1_grad
|
||||
self.w2 -= ETA * w2_grad
|
||||
self.w1 -= ETA * w1_grad
|
||||
|
||||
return
|
||||
|
||||
def backward_minibatch(self, batch, ETA):
|
||||
b2_grad = np.zeros(self.b2.shape)
|
||||
b1_grad = np.zeros(self.b1.shape)
|
||||
w2_grad = np.zeros(self.w2.shape)
|
||||
w1_grad = np.zeros(self.w1.shape)
|
||||
|
||||
for sample in batch:
|
||||
sample_input = sample[0]
|
||||
sample_output = sample[1]
|
||||
|
||||
b2_grad += self.dLdb2(sample_input, sample_output)
|
||||
w2_grad += self.dLdw2(sample_input, sample_output)
|
||||
b1_grad += self.dLdb1(sample_input, sample_output)
|
||||
w1_grad += self.dLdw1(sample_input, sample_output)
|
||||
|
||||
self.b2 -= ETA * b2_grad / len(batch)
|
||||
self.b1 -= ETA * b1_grad / len(batch)
|
||||
self.w2 -= ETA * w2_grad / len(batch)
|
||||
self.w1 -= ETA * w1_grad / len(batch)
|
||||
|
||||
return
|
||||
|
||||
|
||||
@@ -154,12 +177,12 @@ def evaluate(model, samples):
|
||||
TRAIN_DATA, TEST_DATA = dataset_get_sin()
|
||||
# TRAIN_DATA, TEST_DATA = dataset_get_linear()
|
||||
|
||||
MODEL = Model(6, sigmoid, d_sigmoid, DATA_TYPE)
|
||||
MODEL = Model(10, sigmoid, d_sigmoid, DATA_TYPE)
|
||||
# MODEL = Model(10, relu, d_relu, DATA_TYPE)
|
||||
|
||||
# Train the model with some training data
|
||||
TRAINING_ITERS = 500
|
||||
LEARNING_RATE = 0.006
|
||||
TRAINING_ITERS = 1000
|
||||
LEARNING_RATE = 0.005
|
||||
TRAINING_SUBSET_SIZE = len(TRAIN_DATA)
|
||||
|
||||
print TRAINING_SUBSET_SIZE
|
||||
@@ -174,7 +197,14 @@ for training_iter in range(TRAINING_ITERS):
|
||||
random.shuffle(training_subset)
|
||||
|
||||
# Apply backpropagation
|
||||
MODEL.backward(training_subset, LEARNING_RATE)
|
||||
# MODEL.backward(training_subset, LEARNING_RATE)
|
||||
|
||||
# Apply backprop with minibatch
|
||||
BATCH_SIZE = 2
|
||||
for i in range(0, len(training_subset), BATCH_SIZE):
|
||||
batch = training_subset[i:min(i+BATCH_SIZE, len(training_subset))]
|
||||
# print batch
|
||||
MODEL.backward_minibatch(batch, LEARNING_RATE)
|
||||
|
||||
# Evaluate accuracy against training data
|
||||
training_rate = evaluate(MODEL, training_subset)
|
||||
@@ -194,11 +224,11 @@ TEST_OUTPUT = np.vectorize(MODEL.f)(TEST_DATA[:, 0])
|
||||
TRAIN_OUTPUT = np.vectorize(MODEL.f)(TRAIN_DATA[:, 0])
|
||||
|
||||
scatter_train, = plt.plot(
|
||||
TRAIN_DATA[:, 0], TRAIN_DATA[:, 1], 'ro', label="Training data")
|
||||
TRAIN_DATA[:, 0], TRAIN_DATA[:, 1], 'ro', label="Real Data")
|
||||
scatter_train_out, = plt.plot(
|
||||
TRAIN_DATA[:, 0], TRAIN_OUTPUT, 'go', label="Training output")
|
||||
TRAIN_DATA[:, 0], TRAIN_OUTPUT, 'go', label="Network output on training data")
|
||||
scatter_test_out, = plt.plot(
|
||||
TEST_DATA[:, 0], TEST_OUTPUT, 'bo', label="Test output")
|
||||
TEST_DATA[:, 0], TEST_OUTPUT, 'bo', label="Network output on test data")
|
||||
plt.legend(handles=[scatter_train, scatter_train_out, scatter_test_out])
|
||||
|
||||
plt.show()
|
||||
|
Reference in New Issue
Block a user