Backpropagation of Error

Backpropagation of error is a learning algorithm for artificial neural networks (typically feedforward) popularized by Geoffery Hinton (PhD, University of Toronto). Although it is not biologically plausible, it serves as a powerful algorithm for solving complex problems with artificial neural networks.

Below is an implementation of a backpropagation softmax network in python. The script would run faster if numpy arrays are used. More detailed descriptions will be added later.

#!/usr/bin/env python -w

import math
import random

class Data:
  """ Data """
  def __init__(self):
    self.inputs = []
    self.targets = []

def readMatrixFromFile(f):
  mat = []
  for line in f:
    strNums = line.split('  ')
    mat.append( [float(a) for a in strNums if a != ''] )
  return mat

def matrixMult(a, b):
  """Matrix muplication of a and b"""
  n1, m1, n2, m2 = len(a), len(a[0]), len(b), len(b[0])
  if m1 != n2: raise ValueError, 'Invalid dimensions: m of a and n of b must agree'
  c = [[0] * m2 for i in range(n1)]
  for i in range(n1):
    for k in range(m2):
      c[i][k] = sum([a[i][j]*b[j][k] for j in range(m1)])
  return c

def matrixMultT1(a, b):
  """Matrix muplication of a^T and b"""
  n1, m1, n2, m2 = len(a), len(a[0]), len(b), len(b[0])
  if n1 != n2: raise ValueError, 'Invalid dimensions: n of a and n of b must agree'
  c = [[0] * m2 for i in range(m1)]
  for i in range(m1):
    for k in range(m2):
      c[i][k] = sum([a[j][i]*b[j][k] for j in range(n1)])
  return c

def matrixMultT2(a, b):
  """Matrix mulplication of a and b^T"""
  n1, m1, n2, m2 = len(a), len(a[0]), len(b), len(b[0])
  if m1 != m2: raise ValueError, 'Invalid dimensions: m of a and m of b must agree'
  c = [[0] * n2 for i in range(n1)]
  for i in range(n1):
    for k in range(n2):
      c[i][k] = sum([a[i][j]*b[k][j] for j in range(m1)])
  return c

class Net:
  """Backpropagation network (batch learning)"""
  def __init__(self, trainData, testData):
    self.maxEpoch = 1000
    self.epsilon = 0.005
    self.numHidden = 5
    self.initWeightSd = 0.01
    self.errorPrintFreq = 10
    self.weightPrintFreq = 500
    self.tiny = math.exp(-200)
    self.trainData = trainData
    self.testData = testData
    self.outputs = []

    print 'numHidden=%5d initW=%5.3f epsilon=%5.3f' % (self.numHidden, self.initWeightSd, self.epsilon)

    self.numInputs = len(trainData.inputs[0])
    self.numOutputs = len(trainData.targets[0])

    # weights from inputs to hidden; w_ij is weight from input_i to hidden_j
    self.inHidW =  [ [random.gauss(0, self.initWeightSd) for j in range(self.numHidden)] for i in range(self.numInputs) ]
    # weights from hidden to outputs
    self.hidOutW = [ [random.gauss(0, self.initWeightSd) for j in range(self.numOutputs)] for i in range(self.numHidden) ]
    self.hidB = [random.gauss(0, self.initWeightSd) for i in range(self.numHidden)]
    self.outB = [random.gauss(0, self.initWeightSd) for i in range(self.numOutputs)]

    self.errors = []
    self.E = []
    self.testErrors = []
    self.testE = []

  def forwardPass(self, inputs, targets):
    """Forward Pass
    Run forward passes for all the cases in parallel, using one case per row
    """
    numCases = len(inputs)
    # Calculate the activities of hidden nodes
    self.hidSum = matrixMult(inputs, self.inHidW)
    self.hidSum = [ [self.hidSum[i][j] + self.hidB[j] for j in range(self.numHidden)] for i in range(numCases) ]
    self.hidAct = [ [1 / (1 + math.exp(-self.hidSum[i][j])) for j in range(self.numHidden)] for i in range(numCases) ]
    # Calculate the activities of output nodes, with softmax
    self.outSum = matrixMult(self.hidAct, self.hidOutW)
    self.outSum = [ [self.outSum[i][j] + self.outB[j] for j in range(self.numOutputs)] for i in range(numCases) ]
    preOutputs = [ [math.exp(self.outSum[i][j]) for j in range(self.numOutputs)] for i in range(numCases) ]
    rowSum = [ sum(preOutputs[i]) for i in range(numCases) ]
    self.outputs = [ [preOutputs[i][j] / rowSum[i] for j in range(self.numOutputs)] for i in range(numCases) ]

  def backwardPass(self, inputs, targets):
    numCases = len(inputs)
    # Error derivatives w.r.t. the summed input to each output node (softmax) (size: numCases, numOutputs)
    self.dE_dOutSum = [ [self.outputs[i][j] - targets[i][j] for j in range(self.numOutputs)] for i in range(numCases) ]
    # Error derivatives w.r.t. the hidden-to-output weights (size: numHidden, numOutputs)
    self.dE_dHidOut = matrixMultT1(self.hidAct, self.dE_dOutSum)
    # Error derivatives w.r.t. the hidden node activities (size: numCases, numHidden)
    self.dE_dHidAct = matrixMultT2(self.dE_dOutSum, self.hidOutW)
    # Error derivatives w.r.t. the summed input to each hidden node (size: numCases, numHidden)
    self.dE_dHidSum = [ [self.dE_dHidAct[i][j]*self.hidAct[i][j]*(1-self.hidAct[i][j]) for j in range(self.numHidden)] for i in range(numCases)]
    # Error derivatives w.r.t. the bias for each output node summed over all cases (size: numOutputs)
    self.dE_dOutB = [0] * self.numOutputs;
    for i in range(numCases):
      for j in range(self.numOutputs):
        self.dE_dOutB[j] += self.dE_dOutSum[i][j]  # since activity from bias node is 1
    # Error derivatives w.r.t. the bias for each hidden node summed over all cases (size: numHidden)
    self.dE_dHidB = [0] * self.numHidden;
    for i in range(numCases):
      for j in range(self.numHidden):
        self.dE_dHidB[j] += self.dE_dHidSum[i][j]  # since activity from bias node is 1
    # Error derivatives w.r.t. the input-to-hidden weights (size: numInputs, numHidden)
    self.dE_dInHid = matrixMultT1(inputs, self.dE_dHidSum)

    # Update weights and biases
    self.inHidW = [ [self.inHidW[i][j] - self.epsilon * self.dE_dInHid[i][j] for j in range(self.numHidden)] for i in range(self.numInputs) ]
    self.hidOutW = [ [self.hidOutW[i][j] - self.epsilon * self.dE_dHidOut[i][j] for j in range(self.numOutputs)] for i in range(self.numHidden) ]
    self.hidB = [self.hidB[j] - self.epsilon * self.dE_dHidB[j] for j in range(self.numHidden)]
    self.outB = [self.outB[j] - self.epsilon * self.dE_dOutB[j] for j in range(self.numOutputs)]

  def getErrors(self, targets):
    numCases = len(targets)
    # Cross entropy
    E = 0
    for i in range(numCases):
      for j in range(self.numOutputs):
        E += targets[i][j] * math.log(self.tiny + self.outputs[i][j])
    # Error residual
    # Local representation of network output
    # Output of the network is represented by the node with the highest activity
    guesses = [ [ (self.outputs[i][j] - max(self.outputs[i])) >= 0 for j in range(self.numOutputs)] for i in range(numCases) ]
    #print self.outputs
    #print guesses
    #print targets
    error = 0
    for i in range(numCases):
      for j in range(self.numOutputs):
        error += targets[i][j] - targets[i][j]*guesses[i][j]
    return error, E

  def backprop(self):
    for epoch in range(self.maxEpoch):
      inputs, targets = self.trainData.inputs, self.trainData.targets
      self.forwardPass(inputs, targets)
      self.backwardPass(inputs, targets)
      if (epoch+1) % self.errorPrintFreq == 0:
        error, E = self.getErrors(targets)
        print 'epoch=%5d errors=%5.3f E=%5.3f' % (epoch+1, error, E),
        self.errors.append(error)
        self.E.append(E)
        inputs, targets = self.testData.inputs, self.testData.targets
        self.forwardPass(inputs, targets)
        tError, tE = self.getErrors(targets)
        print 'tErrors=%5.3f tE=%5.3f' % (tError, tE)
        self.testErrors.append(tError)
        self.testE.append(tE)
    self.printResults()

  def printResults(self):
    numCases = len(self.testData.targets)
    for i in range(numCases):
      print 'Case %d:' % i
      print 'targets:',
      for b in self.testData.targets[i]:
        print '%4d' % b,
      print '\noutputs:',
      for b in self.outputs[i]:
        print '%4.3f' % b,
      print '\nsoftmax:',
      guesses = [ [ (self.outputs[i][j] - max(self.outputs[i])) >= 0 for j in range(self.numOutputs)] for i in range(numCases) ]
      for b in guesses[i]:
        print '%4d' % b,
      print '\n'

trainData = Data()

#trainData.inputs = [[0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 1, 1]]
f = open('train-inputs.txt', 'r')
trainData.inputs = readMatrixFromFile(f)
f.close()

#trainData.targets = [[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]]
f = open('train-targets.txt', 'r')
trainData.targets = readMatrixFromFile(f)
f.close()

testData = Data()

#testData.inputs = [[1, 0, 0], [1, 0, 1]]
f = open('test-inputs.txt', 'r')
testData.inputs = readMatrixFromFile(f)
f.close()

#testData.targets = [[0, 0, 1], [0, 1, 0]]
f = open('test-targets.txt', 'r')
testData.targets = readMatrixFromFile(f)
f.close()

net = Net(trainData, testData)
net.backprop()

Leave a Reply