|
|
|
@ -1,84 +1,30 @@
|
|
|
|
|
use crate::{
|
|
|
|
|
algebra::NeuraVectorSpace, derivable::NeuraLoss, layer::NeuraLayer, network::NeuraNetwork,
|
|
|
|
|
algebra::NeuraVectorSpace,
|
|
|
|
|
derivable::NeuraLoss,
|
|
|
|
|
layer::NeuraLayer,
|
|
|
|
|
network::{sequential::NeuraSequential, NeuraTrainableNetwork},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// TODO: move this trait to layer/mod.rs
|
|
|
|
|
pub trait NeuraTrainableLayer: NeuraLayer {
|
|
|
|
|
type Delta: NeuraVectorSpace;
|
|
|
|
|
|
|
|
|
|
/// Computes the backpropagation term and the derivative of the internal weights,
|
|
|
|
|
/// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
|
|
|
|
|
///
|
|
|
|
|
/// Note: we introduce the term `epsilon`, which together with the activation of the current function can be used to compute `delta_l`:
|
|
|
|
|
/// ```no_rust
|
|
|
|
|
/// f_l'(a_l) * epsilon_l = delta_l
|
|
|
|
|
/// ```
|
|
|
|
|
///
|
|
|
|
|
/// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
|
|
|
|
|
/// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`.
|
|
|
|
|
/// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers.
|
|
|
|
|
fn backpropagate(
|
|
|
|
|
&self,
|
|
|
|
|
input: &Self::Input,
|
|
|
|
|
epsilon: Self::Output,
|
|
|
|
|
) -> (Self::Input, Self::Delta);
|
|
|
|
|
|
|
|
|
|
/// Computes the regularization
|
|
|
|
|
fn regularize(&self) -> Self::Delta;
|
|
|
|
|
|
|
|
|
|
/// Applies `δW_l` to the weights of the layer
|
|
|
|
|
fn apply_gradient(&mut self, gradient: &Self::Delta);
|
|
|
|
|
|
|
|
|
|
/// Called before an iteration begins, to allow the layer to set itself up for training.
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn prepare_epoch(&mut self) {}
|
|
|
|
|
|
|
|
|
|
/// Called at the end of training, to allow the layer to clean itself up
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn cleanup(&mut self) {}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub trait NeuraTrainable: NeuraLayer {
|
|
|
|
|
type Delta: NeuraVectorSpace;
|
|
|
|
|
|
|
|
|
|
fn apply_gradient(&mut self, gradient: &Self::Delta);
|
|
|
|
|
|
|
|
|
|
/// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information.
|
|
|
|
|
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
|
|
|
|
|
&self,
|
|
|
|
|
input: &Self::Input,
|
|
|
|
|
target: &Loss::Target,
|
|
|
|
|
loss: Loss,
|
|
|
|
|
) -> (Self::Input, Self::Delta);
|
|
|
|
|
|
|
|
|
|
/// Should return the regularization gradient
|
|
|
|
|
fn regularize(&self) -> Self::Delta;
|
|
|
|
|
|
|
|
|
|
/// Called before an iteration begins, to allow the network to set itself up for training.
|
|
|
|
|
fn prepare_epoch(&mut self);
|
|
|
|
|
|
|
|
|
|
/// Called at the end of training, to allow the network to clean itself up
|
|
|
|
|
fn cleanup(&mut self);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub trait NeuraGradientSolver<Output, Target = Output> {
|
|
|
|
|
fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
|
|
|
|
|
&self,
|
|
|
|
|
trainable: &NeuraNetwork<Layer, ChildNetwork>,
|
|
|
|
|
trainable: &NeuraSequential<Layer, ChildNetwork>,
|
|
|
|
|
input: &Layer::Input,
|
|
|
|
|
target: &Target,
|
|
|
|
|
) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta
|
|
|
|
|
) -> <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta
|
|
|
|
|
where
|
|
|
|
|
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>;
|
|
|
|
|
NeuraSequential<Layer, ChildNetwork>:
|
|
|
|
|
NeuraTrainableNetwork<Input = Layer::Input, Output = Output>;
|
|
|
|
|
|
|
|
|
|
fn score<Layer: NeuraLayer, ChildNetwork>(
|
|
|
|
|
&self,
|
|
|
|
|
trainable: &NeuraNetwork<Layer, ChildNetwork>,
|
|
|
|
|
trainable: &NeuraSequential<Layer, ChildNetwork>,
|
|
|
|
|
input: &Layer::Input,
|
|
|
|
|
target: &Target,
|
|
|
|
|
) -> f64
|
|
|
|
|
where
|
|
|
|
|
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>;
|
|
|
|
|
NeuraSequential<Layer, ChildNetwork>:
|
|
|
|
|
NeuraTrainableNetwork<Input = Layer::Input, Output = Output>;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[non_exhaustive]
|
|
|
|
@ -97,24 +43,26 @@ impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone>
|
|
|
|
|
{
|
|
|
|
|
fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
|
|
|
|
|
&self,
|
|
|
|
|
trainable: &NeuraNetwork<Layer, ChildNetwork>,
|
|
|
|
|
trainable: &NeuraSequential<Layer, ChildNetwork>,
|
|
|
|
|
input: &Layer::Input,
|
|
|
|
|
target: &Loss::Target,
|
|
|
|
|
) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta
|
|
|
|
|
) -> <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta
|
|
|
|
|
where
|
|
|
|
|
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>,
|
|
|
|
|
NeuraSequential<Layer, ChildNetwork>:
|
|
|
|
|
NeuraTrainableNetwork<Input = Layer::Input, Output = [f64; N]>,
|
|
|
|
|
{
|
|
|
|
|
trainable.backpropagate(input, target, self.loss.clone()).1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn score<Layer: NeuraLayer, ChildNetwork>(
|
|
|
|
|
&self,
|
|
|
|
|
trainable: &NeuraNetwork<Layer, ChildNetwork>,
|
|
|
|
|
trainable: &NeuraSequential<Layer, ChildNetwork>,
|
|
|
|
|
input: &Layer::Input,
|
|
|
|
|
target: &Loss::Target,
|
|
|
|
|
) -> f64
|
|
|
|
|
where
|
|
|
|
|
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>,
|
|
|
|
|
NeuraSequential<Layer, ChildNetwork>:
|
|
|
|
|
NeuraTrainableNetwork<Input = Layer::Input, Output = [f64; N]>,
|
|
|
|
|
{
|
|
|
|
|
let output = trainable.eval(&input);
|
|
|
|
|
self.loss.eval(target, &output)
|
|
|
|
@ -183,11 +131,12 @@ impl NeuraBatchedTrainer {
|
|
|
|
|
>(
|
|
|
|
|
&self,
|
|
|
|
|
gradient_solver: GradientSolver,
|
|
|
|
|
network: &mut NeuraNetwork<Layer, ChildNetwork>,
|
|
|
|
|
network: &mut NeuraSequential<Layer, ChildNetwork>,
|
|
|
|
|
inputs: Inputs,
|
|
|
|
|
test_inputs: &[(Layer::Input, Target)],
|
|
|
|
|
) where
|
|
|
|
|
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>,
|
|
|
|
|
NeuraSequential<Layer, ChildNetwork>:
|
|
|
|
|
NeuraTrainableNetwork<Input = Layer::Input, Output = Output>,
|
|
|
|
|
Layer::Input: Clone,
|
|
|
|
|
{
|
|
|
|
|
let mut iter = inputs.into_iter();
|
|
|
|
@ -197,10 +146,10 @@ impl NeuraBatchedTrainer {
|
|
|
|
|
|
|
|
|
|
// Contains `momentum_factor * factor * gradient_sum_previous_iter`
|
|
|
|
|
let mut previous_gradient_sum =
|
|
|
|
|
<NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
|
|
|
|
|
<NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta::zero();
|
|
|
|
|
'd: for iteration in 0..self.iterations {
|
|
|
|
|
let mut gradient_sum =
|
|
|
|
|
<NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
|
|
|
|
|
<NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta::zero();
|
|
|
|
|
network.prepare_epoch();
|
|
|
|
|
|
|
|
|
|
for _ in 0..self.batch_size {
|
|
|
|
@ -249,16 +198,18 @@ mod test {
|
|
|
|
|
assert_approx,
|
|
|
|
|
derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0},
|
|
|
|
|
layer::NeuraDenseLayer,
|
|
|
|
|
network::NeuraNetworkTail,
|
|
|
|
|
neura_network,
|
|
|
|
|
network::sequential::NeuraSequentialTail,
|
|
|
|
|
neura_sequential,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_backpropagation_simple() {
|
|
|
|
|
for wa in [0.0, 0.25, 0.5, 1.0] {
|
|
|
|
|
for wb in [0.0, 0.25, 0.5, 1.0] {
|
|
|
|
|
let network =
|
|
|
|
|
NeuraNetwork::new(NeuraDenseLayer::new([[wa, wb]], [0.0], Linear, NeuraL0), ());
|
|
|
|
|
let network = NeuraSequential::new(
|
|
|
|
|
NeuraDenseLayer::new([[wa, wb]], [0.0], Linear, NeuraL0),
|
|
|
|
|
(),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let gradient =
|
|
|
|
|
NeuraBackprop::new(Euclidean).get_gradient(&network, &[1.0, 1.0], &[0.0]);
|
|
|
|
@ -274,7 +225,7 @@ mod test {
|
|
|
|
|
fn test_backpropagation_complex() {
|
|
|
|
|
const EPSILON: f64 = 0.00001;
|
|
|
|
|
// Test that we get the same values as https://hmkcode.com/ai/backpropagation-step-by-step/
|
|
|
|
|
let network = neura_network![
|
|
|
|
|
let network = neura_sequential![
|
|
|
|
|
NeuraDenseLayer::new([[0.11, 0.21], [0.12, 0.08]], [0.0; 2], Linear, NeuraL0),
|
|
|
|
|
NeuraDenseLayer::new([[0.14, 0.15]], [0.0], Linear, NeuraL0)
|
|
|
|
|
];
|
|
|
|
|