What causes RBMs to reconstruct the average of the training set?

by asfarley

In testing an implementation of a continuous RBM (Restricted Boltzmann Model), certain data sets cause the RBM to learn the average of each image rather than isolating any features. In the following cases, the RBM was initialized with 5 hidden units, learning rate of 0.01, 5000-10000 training iterations. The results are similar throughout a fairly wide range in parameters (4-10 hidden units, 1000-100,000 iterations, learning rates from 0.1 to 0.001).

In the first example, RBM training has failed and led to identical weights/features. Reconstruction shows that the RBM just reproduces an average of the inputs.
failed training set

If the training set is altered by adding cases where each feature is present by itself, this issue disappears:
success_training

The implementation in C# is below for reference. It’s hard to tell whether this is a bug in my implementation, something related to the way the features appear in the training sets, or just inherent behavior of RBMs.

It appears that other people have encountered this issue as well:
http://stats.stackexchange.com/questions/115946/what-enforces-features-diversity-in-rbm

http://www.quora.com/I-am-training-a-stacked-RBM-on-the-Olivetti-dataset-faces-to-reduce-the-dimensionality-When-I-put-different-faces-through-the-network-I-get-roughly-the-same-result-and-reconstructing-always-results-in-the-average-face-of-the-training-What-am-I-doing-wrong

public class RBM
{
public double[][] Weights;
public double L;
public double N;

public double[][] TrainingSet;
public int NumberOfCycles;

[NonSerialized] private Thread _trainingThread = null;
[NonSerialized] private bool stopTraining = false;

private Random r;
///

/// Single-layer Continuous Restricted Boltzmann Machine
///

/// Input vector size
/// Hidden binary units
/// Weight update multipler
/// Noise magnitude in reconstruction
public RBM(int dataLength, int numHiddenUnits, double learningRate, double noiseMagnitude)
{
r = new Random();
L = learningRate;
N = noiseMagnitude;
InitWeights(dataLength, numHiddenUnits);

}

public void Train(double[][] trainingSet, int numberOfCycles)
{
for (var i = 0; i < numberOfCycles; i++)
{
var index = i%trainingSet.Length;
var input = trainingSet[index];
UpdateWeights(input);
}
}

public void TrainWorker()
{
var i = 0;
while(true)
{
var index = (i++) % TrainingSet.Length;
var input = TrainingSet[index];
UpdateWeights(input);

if(stopTraining)
_trainingThread.Abort();
}
}

public void TrainMultithreaded(double[][] trainingSet, int numberOfCycles)
{
TrainingSet = trainingSet;
NumberOfCycles = numberOfCycles;

stopTraining = false;
_trainingThread = new Thread(new ThreadStart(TrainWorker));
_trainingThread.Start();
}

public void StopTrainMultithreaded()
{
stopTraining = true;
}

public void InitWeights(int dataLength, int numHiddenUnits)
{
Weights = new double[numHiddenUnits][];
for (var i = 0; i < numHiddenUnits; i++)
{
Weights[i] = new double[dataLength];
for (var j = 0; j < dataLength; j++)
{
Weights[i][j] = r.NextDouble()/10;
}
}
}

private double ActivationEnergy(double[] input, int hiddenUnitNumber)
{
double sum = 0;
for (var i = 0; i < Weights[hiddenUnitNumber].Length; i++)
{
sum += input[i]*Weights[hiddenUnitNumber][i];
}

return sum;
}

private double Sigma(double activationEnergy)
{
var sigma = 1/(1 + Math.Exp(-activationEnergy));
return sigma;
}

public bool[] ComputeActivations(double[] input)
{
var activations = new bool[Weights.Length];
for (var i = 0; i < Weights.Length; i++)
{
var p = Sigma(ActivationEnergy(input, i));
activations[i] = (r.NextDouble()

0.5) && activations[i];
}
return activations;
}

public double[] ComputeActivationsExact(double[] input)
{
var activations = new double[Weights.Length];
for (var i = 0; i < Weights.Length; i++)
{
var p = Sigma(ActivationEnergy(input, i));
activations[i] = p;
}
return activations;
}

private double[][] ComputeAgreement(double[] input, bool[] activations)
{
var agreement = new double[activations.Length][];
for (var i = 0; i < activations.Length; i++)
{
var a = new double[input.Length];
for (var j = 0; j < input.Length; j++)
{
a[j] = activations[i] ? input[j] : 0;
}
agreement[i] = a;
}

return agreement;
}

public double[] ReconstructExact(double[] activations)
{
var reconstruction = new double[Weights[0].Length];
for (var i = 0; i t*Weights[j][i] ).Sum();
var p = Sigma(sum);
reconstruction[i] = p;
}
return reconstruction;
}

public double[] Reconstruct(bool[] activations)
{
var reconstruction = new double[Weights[0].Length];
for (var i = 0; i t ? Weights[j][i] : 0).Sum();
var p = Sigma(sum);
reconstruction[i] = p + N * (r.NextDouble() - 0.5);
}
return reconstruction;
}

private void UpdateWeights(double[] input)
{
var activations = ComputeActivations(input);
var ePositive = ComputeAgreement(input, activations);
var reconstruction = Reconstruct(activations);
var ractivations = ComputeActivations(reconstruction);
var eNegative = ComputeAgreement(reconstruction, ractivations);

for(var i=0; i<Weights.Length; i++)
for (var j = 0; j < Weights[i].Length; j++)
{
Weights[i][j] = Weights[i][j] + L*(ePositive[i][j] - eNegative[i][j]);
}

}

}

Advertisements