Updates
This commit is contained in:
85
network.py
85
network.py
@@ -8,11 +8,11 @@ DATA_TYPE = np.float32
|
|||||||
|
|
||||||
|
|
||||||
def dataset_get_sin():
|
def dataset_get_sin():
|
||||||
NUM = 100
|
NUM = 1000
|
||||||
RATIO = 0.8
|
RATIO = 0.8
|
||||||
SPLIT = int(NUM * RATIO)
|
SPLIT = int(NUM * RATIO)
|
||||||
data = np.zeros((NUM, 2), DATA_TYPE)
|
data = np.zeros((NUM, 2), DATA_TYPE)
|
||||||
data[:, 0] = np.linspace(0.0, 4.0 * np.pi, num=NUM) # inputs
|
data[:, 0] = np.linspace(0.0, 2 * np.pi, num=NUM) # inputs
|
||||||
data[:, 1] = np.sin(data[:, 0]) # outputs
|
data[:, 1] = np.sin(data[:, 0]) # outputs
|
||||||
npr.shuffle(data)
|
npr.shuffle(data)
|
||||||
training, test = data[:SPLIT, :], data[SPLIT:, :]
|
training, test = data[:SPLIT, :], data[SPLIT:, :]
|
||||||
@@ -24,7 +24,7 @@ def dataset_get_linear():
|
|||||||
RATIO = 0.8
|
RATIO = 0.8
|
||||||
SPLIT = int(NUM * RATIO)
|
SPLIT = int(NUM * RATIO)
|
||||||
data = np.zeros((NUM, 2), DATA_TYPE)
|
data = np.zeros((NUM, 2), DATA_TYPE)
|
||||||
data[:, 0] = np.linspace(0.0, 4.0 * np.pi, num=NUM) # inputs
|
data[:, 0] = np.linspace(0.0, 2 * np.pi, num=NUM) # inputs
|
||||||
data[:, 1] = 2 * data[:, 0] # outputs
|
data[:, 1] = 2 * data[:, 0] # outputs
|
||||||
npr.shuffle(data)
|
npr.shuffle(data)
|
||||||
training, test = data[:SPLIT, :], data[SPLIT:, :]
|
training, test = data[:SPLIT, :], data[SPLIT:, :]
|
||||||
@@ -32,14 +32,14 @@ def dataset_get_linear():
|
|||||||
|
|
||||||
|
|
||||||
def relu(x):
|
def relu(x):
|
||||||
"""Apply a rectified linear until to x"""
|
"""Apply a rectified linear unit to x"""
|
||||||
return np.maximum(x, 0, x)
|
return np.maximum(0, x)
|
||||||
|
|
||||||
|
|
||||||
def d_relu(x):
|
def d_relu(x):
|
||||||
res = x
|
res = x
|
||||||
res[res < 0] = 0
|
|
||||||
res[res >= 0] = 1
|
res[res >= 0] = 1
|
||||||
|
res[res < 0] = 0
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
@@ -59,7 +59,7 @@ def L(x, y):
|
|||||||
|
|
||||||
class Model(object):
|
class Model(object):
|
||||||
|
|
||||||
def __init__(self, layer_size, data_type):
|
def __init__(self, layer_size, h, dh, data_type):
|
||||||
self.w1 = npr.rand(layer_size).astype(data_type)
|
self.w1 = npr.rand(layer_size).astype(data_type)
|
||||||
self.b1 = npr.rand(layer_size).astype(data_type)
|
self.b1 = npr.rand(layer_size).astype(data_type)
|
||||||
self.w2 = npr.rand(1, layer_size).astype(data_type)
|
self.w2 = npr.rand(1, layer_size).astype(data_type)
|
||||||
@@ -70,70 +70,60 @@ class Model(object):
|
|||||||
self.b1 /= np.sum(self.b1)
|
self.b1 /= np.sum(self.b1)
|
||||||
self.b2 /= np.sum(self.b2)
|
self.b2 /= np.sum(self.b2)
|
||||||
|
|
||||||
def h(self, vec):
|
self.h = h
|
||||||
return relu(vec)
|
self.dh = dh
|
||||||
|
|
||||||
def dh(self, vec):
|
def z1(self, x):
|
||||||
return d_relu(vec)
|
|
||||||
|
|
||||||
def Z1(self, x):
|
|
||||||
"""Apply the first linear layer to an input x"""
|
|
||||||
return self.w1 * x + self.b1
|
return self.w1 * x + self.b1
|
||||||
|
|
||||||
def A(self, x):
|
def a(self, x):
|
||||||
"""Compute A for an input x"""
|
|
||||||
return self.h(self.Z1(x))
|
|
||||||
|
|
||||||
def Z2(self, x):
|
return self.h(self.z1(x))
|
||||||
"""Compute Z2 for an input x"""
|
|
||||||
return self.w2.dot(self.A(x)) + self.b2
|
|
||||||
|
|
||||||
def forward(self, x):
|
def f(self, x):
|
||||||
"""Evaluate the model on an input x"""
|
|
||||||
return self.Z2(x)
|
return self.w2.dot(self.a(x)) + self.b2
|
||||||
|
|
||||||
def dLdf(self, x, y):
|
def dLdf(self, x, y):
|
||||||
"""Compute dL/df for an input x"""
|
return 2.0 * (self.f(x) - y)
|
||||||
return 2.0 * (self.forward(x) - y)
|
|
||||||
|
|
||||||
def dfdb2(self):
|
def dfdb2(self):
|
||||||
return 1.0
|
return np.array([1.0])
|
||||||
|
|
||||||
def dLdb2(self, x, y):
|
def dLdb2(self, x, y):
|
||||||
"""Evaluate dL/db2 for an input x and expected output y"""
|
|
||||||
return self.dLdf(x, y) * self.dfdb2()
|
return self.dLdf(x, y) * self.dfdb2()
|
||||||
|
|
||||||
def dfdw2(self, x):
|
def dfdw2(self, x):
|
||||||
"""Evaluate df/dw2 using an input sample x"""
|
return np.sum(self.a(x))
|
||||||
return self.A(x)
|
|
||||||
|
|
||||||
def dfda(self):
|
def dfda(self): # how f changes with ith element of a
|
||||||
return np.sum(self.w2)
|
return self.w2
|
||||||
|
|
||||||
def dadz(self, x):
|
def dadz1(self, x): # how a[i] changes with z1[i]
|
||||||
"""Compute da/dz1 for an input x"""
|
"""Compute da/dz1 for an input x"""
|
||||||
return self.dh(self.Z1(x))
|
return self.dh(self.z1(x))
|
||||||
|
|
||||||
def dLdz(self, x, y):
|
def dLdz1(self, x, y):
|
||||||
"""Compute dL/dz1 for an input x and expected output y"""
|
"""Compute dL/dz1 for an input x and expected output y"""
|
||||||
return self.dLdf(x, y) * self.dfda() * self.dadz(x)
|
return self.dLdf(x, y) * np.sum(self.dfda() * self.dadz1(x))
|
||||||
|
|
||||||
def dzdw1(self, x):
|
def dz1dw1(self, x):
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def dLdw1(self, x, y):
|
def dLdw1(self, x, y):
|
||||||
"""Compute dL/dw1 for an input x and expected output y"""
|
"""Compute dL/dw1 for an input x and expected output y"""
|
||||||
return self.dLdz(x, y) * self.dzdw1(x)
|
return self.dLdf(x, y) * np.sum(self.dfda() * self.dadz1(x) * self.dz1dw1(x))
|
||||||
|
|
||||||
def dLdw2(self, x, y):
|
def dLdw2(self, x, y):
|
||||||
"""Compute dL/dw2 for an input x and expected output y"""
|
"""Compute dL/dw2 for an input x and expected output y"""
|
||||||
return self.dLdf(x, y) * self.dfdw2(x)
|
return self.dLdf(x, y) * self.dfdw2(x)
|
||||||
|
|
||||||
def dzdb1(self):
|
def dz1db1(self):
|
||||||
return 1.0
|
return np.ones(self.b1.shape)
|
||||||
|
|
||||||
def dLdb1(self, x, y):
|
def dLdb1(self, x, y):
|
||||||
return self.dLdz(x, y) * self.dzdb1()
|
return self.dLdf(x, y) * np.sum(self.dfda() * self.dadz1(x) * self.dz1db1())
|
||||||
|
|
||||||
def backward(self, training_samples, ETA):
|
def backward(self, training_samples, ETA):
|
||||||
"""Do backpropagation with stochastic gradient descent on the model using training_samples"""
|
"""Do backpropagation with stochastic gradient descent on the model using training_samples"""
|
||||||
@@ -156,19 +146,20 @@ def evaluate(model, samples):
|
|||||||
"""Report the loss function over the data"""
|
"""Report the loss function over the data"""
|
||||||
loss_acc = 0.0
|
loss_acc = 0.0
|
||||||
for sample in samples:
|
for sample in samples:
|
||||||
guess = model.forward(sample[0])
|
guess = model.f(sample[0])
|
||||||
actual = sample[1]
|
actual = sample[1]
|
||||||
loss_acc += L(guess, actual)
|
loss_acc += L(guess, actual)
|
||||||
return loss_acc / len(samples)
|
return loss_acc / len(samples)
|
||||||
|
|
||||||
# TRAIN_DATA, TEST_DATA = dataset_get_sin()
|
TRAIN_DATA, TEST_DATA = dataset_get_sin()
|
||||||
TRAIN_DATA, TEST_DATA = dataset_get_linear()
|
# TRAIN_DATA, TEST_DATA = dataset_get_linear()
|
||||||
|
|
||||||
MODEL = Model(10, DATA_TYPE)
|
MODEL = Model(6, sigmoid, d_sigmoid, DATA_TYPE)
|
||||||
|
# MODEL = Model(10, relu, d_relu, DATA_TYPE)
|
||||||
|
|
||||||
# Train the model with some training data
|
# Train the model with some training data
|
||||||
TRAINING_ITERS = 100
|
TRAINING_ITERS = 500
|
||||||
LEARNING_RATE = 0.001
|
LEARNING_RATE = 0.006
|
||||||
TRAINING_SUBSET_SIZE = len(TRAIN_DATA)
|
TRAINING_SUBSET_SIZE = len(TRAIN_DATA)
|
||||||
|
|
||||||
print TRAINING_SUBSET_SIZE
|
print TRAINING_SUBSET_SIZE
|
||||||
@@ -199,8 +190,8 @@ for training_iter in range(TRAINING_ITERS):
|
|||||||
else:
|
else:
|
||||||
print ""
|
print ""
|
||||||
|
|
||||||
TEST_OUTPUT = np.vectorize(MODEL.forward)(TEST_DATA[:, 0])
|
TEST_OUTPUT = np.vectorize(MODEL.f)(TEST_DATA[:, 0])
|
||||||
TRAIN_OUTPUT = np.vectorize(MODEL.forward)(TRAIN_DATA[:, 0])
|
TRAIN_OUTPUT = np.vectorize(MODEL.f)(TRAIN_DATA[:, 0])
|
||||||
|
|
||||||
scatter_train, = plt.plot(
|
scatter_train, = plt.plot(
|
||||||
TRAIN_DATA[:, 0], TRAIN_DATA[:, 1], 'ro', label="Training data")
|
TRAIN_DATA[:, 0], TRAIN_DATA[:, 1], 'ro', label="Training data")
|
||||||
|
|||||||
Reference in New Issue
Block a user