Backpropagation of error is a learning algorithm for artificial neural networks (typically feedforward) popularized by Geoffery Hinton (PhD, University of Toronto). Although it is not biologically plausible, it serves as a powerful algorithm for solving complex problems with artificial neural networks.
Below is an implementation of a backpropagation softmax network in python. The script would run faster if numpy arrays are used. More detailed descriptions will be added later.
#!/usr/bin/env python -w
import math
import random
class Data:
""" Data """
def __init__(self):
self.inputs = []
self.targets = []
def readMatrixFromFile(f):
mat = []
for line in f:
strNums = line.split(' ')
mat.append( [float(a) for a in strNums if a != ''] )
return mat
def matrixMult(a, b):
"""Matrix muplication of a and b"""
n1, m1, n2, m2 = len(a), len(a[0]), len(b), len(b[0])
if m1 != n2: raise ValueError, 'Invalid dimensions: m of a and n of b must agree'
c = [[0] * m2 for i in range(n1)]
for i in range(n1):
for k in range(m2):
c[i][k] = sum([a[i][j]*b[j][k] for j in range(m1)])
return c
def matrixMultT1(a, b):
"""Matrix muplication of a^T and b"""
n1, m1, n2, m2 = len(a), len(a[0]), len(b), len(b[0])
if n1 != n2: raise ValueError, 'Invalid dimensions: n of a and n of b must agree'
c = [[0] * m2 for i in range(m1)]
for i in range(m1):
for k in range(m2):
c[i][k] = sum([a[j][i]*b[j][k] for j in range(n1)])
return c
def matrixMultT2(a, b):
"""Matrix mulplication of a and b^T"""
n1, m1, n2, m2 = len(a), len(a[0]), len(b), len(b[0])
if m1 != m2: raise ValueError, 'Invalid dimensions: m of a and m of b must agree'
c = [[0] * n2 for i in range(n1)]
for i in range(n1):
for k in range(n2):
c[i][k] = sum([a[i][j]*b[k][j] for j in range(m1)])
return c
class Net:
"""Backpropagation network (batch learning)"""
def __init__(self, trainData, testData):
self.maxEpoch = 1000
self.epsilon = 0.005
self.numHidden = 5
self.initWeightSd = 0.01
self.errorPrintFreq = 10
self.weightPrintFreq = 500
self.tiny = math.exp(-200)
self.trainData = trainData
self.testData = testData
self.outputs = []
print 'numHidden=%5d initW=%5.3f epsilon=%5.3f' % (self.numHidden, self.initWeightSd, self.epsilon)
self.numInputs = len(trainData.inputs[0])
self.numOutputs = len(trainData.targets[0])
# weights from inputs to hidden; w_ij is weight from input_i to hidden_j
self.inHidW = [ [random.gauss(0, self.initWeightSd) for j in range(self.numHidden)] for i in range(self.numInputs) ]
# weights from hidden to outputs
self.hidOutW = [ [random.gauss(0, self.initWeightSd) for j in range(self.numOutputs)] for i in range(self.numHidden) ]
self.hidB = [random.gauss(0, self.initWeightSd) for i in range(self.numHidden)]
self.outB = [random.gauss(0, self.initWeightSd) for i in range(self.numOutputs)]
self.errors = []
self.E = []
self.testErrors = []
self.testE = []
def forwardPass(self, inputs, targets):
"""Forward Pass
Run forward passes for all the cases in parallel, using one case per row
"""
numCases = len(inputs)
# Calculate the activities of hidden nodes
self.hidSum = matrixMult(inputs, self.inHidW)
self.hidSum = [ [self.hidSum[i][j] + self.hidB[j] for j in range(self.numHidden)] for i in range(numCases) ]
self.hidAct = [ [1 / (1 + math.exp(-self.hidSum[i][j])) for j in range(self.numHidden)] for i in range(numCases) ]
# Calculate the activities of output nodes, with softmax
self.outSum = matrixMult(self.hidAct, self.hidOutW)
self.outSum = [ [self.outSum[i][j] + self.outB[j] for j in range(self.numOutputs)] for i in range(numCases) ]
preOutputs = [ [math.exp(self.outSum[i][j]) for j in range(self.numOutputs)] for i in range(numCases) ]
rowSum = [ sum(preOutputs[i]) for i in range(numCases) ]
self.outputs = [ [preOutputs[i][j] / rowSum[i] for j in range(self.numOutputs)] for i in range(numCases) ]
def backwardPass(self, inputs, targets):
numCases = len(inputs)
# Error derivatives w.r.t. the summed input to each output node (softmax) (size: numCases, numOutputs)
self.dE_dOutSum = [ [self.outputs[i][j] - targets[i][j] for j in range(self.numOutputs)] for i in range(numCases) ]
# Error derivatives w.r.t. the hidden-to-output weights (size: numHidden, numOutputs)
self.dE_dHidOut = matrixMultT1(self.hidAct, self.dE_dOutSum)
# Error derivatives w.r.t. the hidden node activities (size: numCases, numHidden)
self.dE_dHidAct = matrixMultT2(self.dE_dOutSum, self.hidOutW)
# Error derivatives w.r.t. the summed input to each hidden node (size: numCases, numHidden)
self.dE_dHidSum = [ [self.dE_dHidAct[i][j]*self.hidAct[i][j]*(1-self.hidAct[i][j]) for j in range(self.numHidden)] for i in range(numCases)]
# Error derivatives w.r.t. the bias for each output node summed over all cases (size: numOutputs)
self.dE_dOutB = [0] * self.numOutputs;
for i in range(numCases):
for j in range(self.numOutputs):
self.dE_dOutB[j] += self.dE_dOutSum[i][j] # since activity from bias node is 1
# Error derivatives w.r.t. the bias for each hidden node summed over all cases (size: numHidden)
self.dE_dHidB = [0] * self.numHidden;
for i in range(numCases):
for j in range(self.numHidden):
self.dE_dHidB[j] += self.dE_dHidSum[i][j] # since activity from bias node is 1
# Error derivatives w.r.t. the input-to-hidden weights (size: numInputs, numHidden)
self.dE_dInHid = matrixMultT1(inputs, self.dE_dHidSum)
# Update weights and biases
self.inHidW = [ [self.inHidW[i][j] - self.epsilon * self.dE_dInHid[i][j] for j in range(self.numHidden)] for i in range(self.numInputs) ]
self.hidOutW = [ [self.hidOutW[i][j] - self.epsilon * self.dE_dHidOut[i][j] for j in range(self.numOutputs)] for i in range(self.numHidden) ]
self.hidB = [self.hidB[j] - self.epsilon * self.dE_dHidB[j] for j in range(self.numHidden)]
self.outB = [self.outB[j] - self.epsilon * self.dE_dOutB[j] for j in range(self.numOutputs)]
def getErrors(self, targets):
numCases = len(targets)
# Cross entropy
E = 0
for i in range(numCases):
for j in range(self.numOutputs):
E += targets[i][j] * math.log(self.tiny + self.outputs[i][j])
# Error residual
# Local representation of network output
# Output of the network is represented by the node with the highest activity
guesses = [ [ (self.outputs[i][j] - max(self.outputs[i])) >= 0 for j in range(self.numOutputs)] for i in range(numCases) ]
#print self.outputs
#print guesses
#print targets
error = 0
for i in range(numCases):
for j in range(self.numOutputs):
error += targets[i][j] - targets[i][j]*guesses[i][j]
return error, E
def backprop(self):
for epoch in range(self.maxEpoch):
inputs, targets = self.trainData.inputs, self.trainData.targets
self.forwardPass(inputs, targets)
self.backwardPass(inputs, targets)
if (epoch+1) % self.errorPrintFreq == 0:
error, E = self.getErrors(targets)
print 'epoch=%5d errors=%5.3f E=%5.3f' % (epoch+1, error, E),
self.errors.append(error)
self.E.append(E)
inputs, targets = self.testData.inputs, self.testData.targets
self.forwardPass(inputs, targets)
tError, tE = self.getErrors(targets)
print 'tErrors=%5.3f tE=%5.3f' % (tError, tE)
self.testErrors.append(tError)
self.testE.append(tE)
self.printResults()
def printResults(self):
numCases = len(self.testData.targets)
for i in range(numCases):
print 'Case %d:' % i
print 'targets:',
for b in self.testData.targets[i]:
print '%4d' % b,
print '\noutputs:',
for b in self.outputs[i]:
print '%4.3f' % b,
print '\nsoftmax:',
guesses = [ [ (self.outputs[i][j] - max(self.outputs[i])) >= 0 for j in range(self.numOutputs)] for i in range(numCases) ]
for b in guesses[i]:
print '%4d' % b,
print '\n'
trainData = Data()
#trainData.inputs = [[0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 1, 1]]
f = open('train-inputs.txt', 'r')
trainData.inputs = readMatrixFromFile(f)
f.close()
#trainData.targets = [[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]]
f = open('train-targets.txt', 'r')
trainData.targets = readMatrixFromFile(f)
f.close()
testData = Data()
#testData.inputs = [[1, 0, 0], [1, 0, 1]]
f = open('test-inputs.txt', 'r')
testData.inputs = readMatrixFromFile(f)
f.close()
#testData.targets = [[0, 0, 1], [0, 1, 0]]
f = open('test-targets.txt', 'r')
testData.targets = readMatrixFromFile(f)
f.close()
net = Net(trainData, testData)
net.backprop()