🔥 Refactor of NeuraTrainableLayer, split it into multiple traits

main
Shad Amethyst 2 years ago
parent f3752bd411
commit d40098d2ef

@ -30,7 +30,7 @@ fn main() {
network.layer.weights.clone(), network.layer.weights.clone(),
network.layer.bias.clone(), network.layer.bias.clone(),
network.child_network.layer.weights.clone(), network.child_network.layer.weights.clone(),
network.child_network.layer.bias.clone() network.child_network.layer.bias.clone(),
)]; )];
for iteration in 0..4 { for iteration in 0..4 {
@ -45,7 +45,7 @@ fn main() {
network.layer.weights.clone(), network.layer.weights.clone(),
network.layer.bias.clone(), network.layer.bias.clone(),
network.child_network.layer.weights.clone(), network.child_network.layer.weights.clone(),
network.child_network.layer.bias.clone() network.child_network.layer.bias.clone(),
)); ));
} }

@ -1,6 +1,9 @@
use num::ToPrimitive; use num::ToPrimitive;
use crate::{derivable::NeuraLoss, layer::NeuraTrainableLayer, network::NeuraTrainableNetworkBase}; use crate::{
derivable::NeuraLoss, layer::NeuraTrainableLayerBackprop, layer::NeuraTrainableLayerSelf,
network::NeuraTrainableNetworkBase,
};
use super::*; use super::*;
@ -53,23 +56,26 @@ impl<LayerOutput, Target, Loss: NeuraLoss<LayerOutput, Target = Target>>
} }
} }
impl<LayerOutput, Target, Loss> NeuraGradientSolverTransient<LayerOutput> impl<
for (&NeuraBackprop<Loss>, &Target)
{
fn eval_layer<
Input, Input,
NetworkGradient, Target,
RecGradient, Loss,
Layer: NeuraTrainableLayer<Input, Output = LayerOutput>, Layer: NeuraTrainableLayerBackprop<Input> + NeuraTrainableLayerSelf<Input>,
>( > NeuraGradientSolverTransient<Input, Layer> for (&NeuraBackprop<Loss>, &Target)
{
fn eval_layer<NetworkGradient, RecGradient>(
&self, &self,
layer: &Layer, layer: &Layer,
input: &Input, input: &Input,
rec_opt_output: Self::Output<LayerOutput, RecGradient>, _output: &Layer::Output,
intermediary: &Layer::IntermediaryRepr,
rec_opt_output: Self::Output<Layer::Output, RecGradient>,
combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient, combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient,
) -> Self::Output<Input, NetworkGradient> { ) -> Self::Output<Input, NetworkGradient> {
let (epsilon_in, rec_gradient) = rec_opt_output; let (epsilon_in, rec_gradient) = rec_opt_output;
let (epsilon_out, layer_gradient) = layer.backprop_layer(input, epsilon_in);
let epsilon_out = layer.backprop_layer(input, intermediary, &epsilon_in);
let layer_gradient = layer.get_gradient(input, intermediary, &epsilon_in);
(epsilon_out, combine_gradients(layer_gradient, rec_gradient)) (epsilon_out, combine_gradients(layer_gradient, rec_gradient))
} }
@ -80,7 +86,11 @@ mod test {
use approx::assert_relative_eq; use approx::assert_relative_eq;
use super::*; use super::*;
use crate::{prelude::*, derivable::{activation::Tanh, loss::Euclidean, NeuraDerivable}, utils::uniform_vector}; use crate::{
derivable::{activation::Tanh, loss::Euclidean, NeuraDerivable},
prelude::*,
utils::uniform_vector,
};
#[test] #[test]
fn test_backprop_epsilon_bias() { fn test_backprop_epsilon_bias() {
@ -91,16 +101,22 @@ mod test {
let network = neura_sequential![ let network = neura_sequential![
neura_layer!("dense", 4, f64).activation(Tanh), neura_layer!("dense", 4, f64).activation(Tanh),
neura_layer!("dense", 2, f64).activation(Tanh) neura_layer!("dense", 2, f64).activation(Tanh)
].construct(NeuraShape::Vector(4)).unwrap(); ]
.construct(NeuraShape::Vector(4))
.unwrap();
let optimizer = NeuraBackprop::new(Euclidean); let optimizer = NeuraBackprop::new(Euclidean);
let input = uniform_vector(4); let input = uniform_vector(4);
let target = uniform_vector(2); let target = uniform_vector(2);
let layer1_intermediary = &network.layer.weights * &input; let layer1_intermediary = &network.layer.weights * &input;
let layer2_intermediary = &network.child_network.layer.weights * layer1_intermediary.map(|x| x.tanh()); let layer2_intermediary =
&network.child_network.layer.weights * layer1_intermediary.map(|x| x.tanh());
assert_relative_eq!(layer1_intermediary.map(|x| x.tanh()), network.clone().trim_tail().eval(&input)); assert_relative_eq!(
layer1_intermediary.map(|x| x.tanh()),
network.clone().trim_tail().eval(&input)
);
let output = network.eval(&input); let output = network.eval(&input);
@ -110,25 +126,27 @@ mod test {
for i in 0..2 { for i in 0..2 {
delta2_expected[i] *= Tanh.derivate(layer2_intermediary[i]); delta2_expected[i] *= Tanh.derivate(layer2_intermediary[i]);
} }
let delta2_actual = gradient.1.0.1; let delta2_actual = gradient.1 .0 .1;
assert_relative_eq!(delta2_actual.as_slice(), delta2_expected.as_slice()); assert_relative_eq!(delta2_actual.as_slice(), delta2_expected.as_slice());
let gradient2_expected = &delta2_expected * layer1_intermediary.map(|x| x.tanh()).transpose(); let gradient2_expected =
let gradient2_actual = gradient.1.0.0; &delta2_expected * layer1_intermediary.map(|x| x.tanh()).transpose();
let gradient2_actual = gradient.1 .0 .0;
assert_relative_eq!(gradient2_actual.as_slice(), gradient2_expected.as_slice()); assert_relative_eq!(gradient2_actual.as_slice(), gradient2_expected.as_slice());
let mut delta1_expected = network.child_network.layer.weights.transpose() * delta2_expected; let mut delta1_expected =
network.child_network.layer.weights.transpose() * delta2_expected;
for i in 0..4 { for i in 0..4 {
delta1_expected[i] *= Tanh.derivate(layer1_intermediary[i]); delta1_expected[i] *= Tanh.derivate(layer1_intermediary[i]);
} }
let delta1_actual = gradient.0.1; let delta1_actual = gradient.0 .1;
assert_relative_eq!(delta1_actual.as_slice(), delta1_expected.as_slice()); assert_relative_eq!(delta1_actual.as_slice(), delta1_expected.as_slice());
let gradient1_expected = &delta1_expected * input.transpose(); let gradient1_expected = &delta1_expected * input.transpose();
let gradient1_actual = gradient.0.0; let gradient1_actual = gradient.0 .0;
assert_relative_eq!(gradient1_actual.as_slice(), gradient1_expected.as_slice()); assert_relative_eq!(gradient1_actual.as_slice(), gradient1_expected.as_slice());
} }

@ -1,7 +1,7 @@
use nalgebra::{DVector, Scalar}; use nalgebra::{DVector, Scalar};
use num::{traits::NumAssignOps, Float, ToPrimitive}; use num::{traits::NumAssignOps, Float, ToPrimitive};
use crate::derivable::NeuraDerivable; use crate::{derivable::NeuraDerivable, prelude::NeuraTrainableLayerSelf};
use super::*; use super::*;
@ -90,22 +90,23 @@ impl<Act, LayerOutput> NeuraGradientSolverFinal<LayerOutput> for NeuraForwardPai
} }
} }
impl<F: Float + Scalar + NumAssignOps, Act: NeuraDerivable<F>> impl<
NeuraGradientSolverTransient<DVector<F>> for NeuraForwardPair<Act> F: Float + Scalar + NumAssignOps,
{ Act: NeuraDerivable<F>,
fn eval_layer<
Input, Input,
NetworkGradient, Layer: NeuraTrainableLayerSelf<Input, Output = DVector<F>>,
RecGradient, > NeuraGradientSolverTransient<Input, Layer> for NeuraForwardPair<Act>
Layer: NeuraTrainableLayer<Input, Output = DVector<F>>, {
>( fn eval_layer<NetworkGradient, RecGradient>(
&self, &self,
layer: &Layer, layer: &Layer,
input: &Input, input: &Input,
output: &Layer::Output,
intermediary: &Layer::IntermediaryRepr,
rec_gradient: RecGradient, rec_gradient: RecGradient,
combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient, combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient,
) -> Self::Output<Input, NetworkGradient> { ) -> Self::Output<Input, NetworkGradient> {
let output = layer.eval(input); // let output = layer.eval(input);
let goodness = output let goodness = output
.iter() .iter()
.copied() .copied()
@ -129,7 +130,7 @@ impl<F: Float + Scalar + NumAssignOps, Act: NeuraDerivable<F>>
} }
// TODO: split backprop_layer into eval_training, get_gradient and get_backprop // TODO: split backprop_layer into eval_training, get_gradient and get_backprop
let (_, layer_gradient) = layer.backprop_layer(input, goodness_derivative); let layer_gradient = layer.get_gradient(input, intermediary, &goodness_derivative);
combine_gradients(layer_gradient, rec_gradient) combine_gradients(layer_gradient, rec_gradient)
} }

@ -5,7 +5,7 @@ mod forward_forward;
pub use forward_forward::NeuraForwardForward; pub use forward_forward::NeuraForwardForward;
use crate::{ use crate::{
layer::NeuraTrainableLayer, layer::NeuraTrainableLayerBase,
network::{NeuraTrainableNetwork, NeuraTrainableNetworkBase}, network::{NeuraTrainableNetwork, NeuraTrainableNetworkBase},
}; };
@ -17,17 +17,16 @@ pub trait NeuraGradientSolverFinal<LayerOutput>: NeuraGradientSolverBase {
fn eval_final(&self, output: LayerOutput) -> Self::Output<LayerOutput, ()>; fn eval_final(&self, output: LayerOutput) -> Self::Output<LayerOutput, ()>;
} }
pub trait NeuraGradientSolverTransient<LayerOutput>: NeuraGradientSolverBase { pub trait NeuraGradientSolverTransient<Input, Layer: NeuraTrainableLayerBase<Input>>:
fn eval_layer< NeuraGradientSolverBase
Input, {
NetworkGradient, fn eval_layer<NetworkGradient, RecGradient>(
RecGradient,
Layer: NeuraTrainableLayer<Input, Output = LayerOutput>,
>(
&self, &self,
layer: &Layer, layer: &Layer,
input: &Input, input: &Input,
rec_opt_output: Self::Output<LayerOutput, RecGradient>, output: &Layer::Output,
layer_intermediary: &Layer::IntermediaryRepr,
rec_opt_output: Self::Output<Layer::Output, RecGradient>,
combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient, combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient,
) -> Self::Output<Input, NetworkGradient>; ) -> Self::Output<Input, NetworkGradient>;
} }

@ -161,9 +161,9 @@ impl<
fn eval(&self, input: &DVector<F>) -> Self::Output { fn eval(&self, input: &DVector<F>) -> Self::Output {
assert_eq!(input.shape().0, self.weights.shape().1); assert_eq!(input.shape().0, self.weights.shape().1);
let res = &self.weights * input + &self.bias; let evaluated = &self.weights * input + &self.bias;
res.map(|x| self.activation.eval(x)) evaluated.map(|x| self.activation.eval(x))
} }
} }
@ -171,9 +171,17 @@ impl<
F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign, F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
Act: NeuraDerivable<F>, Act: NeuraDerivable<F>,
Reg: NeuraDerivable<F>, Reg: NeuraDerivable<F>,
> NeuraTrainableLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg> > NeuraTrainableLayerBase<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
{ {
type Gradient = (DMatrix<F>, DVector<F>); type Gradient = (DMatrix<F>, DVector<F>);
type IntermediaryRepr = DVector<F>; // pre-activation values
fn eval_training(&self, input: &DVector<F>) -> (Self::Output, Self::IntermediaryRepr) {
let evaluated = &self.weights * input + &self.bias;
let output = evaluated.map(|x| self.activation.eval(x));
(output, evaluated)
}
fn default_gradient(&self) -> Self::Gradient { fn default_gradient(&self) -> Self::Gradient {
( (
@ -182,41 +190,70 @@ impl<
) )
} }
fn backprop_layer( fn apply_gradient(&mut self, gradient: &Self::Gradient) {
self.weights += &gradient.0;
self.bias += &gradient.1;
}
}
impl<
F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
Act: NeuraDerivable<F>,
Reg: NeuraDerivable<F>,
> NeuraTrainableLayerSelf<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
{
fn regularize_layer(&self) -> Self::Gradient {
(
self.weights.map(|x| self.regularization.derivate(x)),
DVector::zeros(self.bias.shape().0),
)
}
fn get_gradient(
&self, &self,
input: &DVector<F>, input: &DVector<F>,
epsilon: Self::Output, evaluated: &Self::IntermediaryRepr,
) -> (DVector<F>, Self::Gradient) { epsilon: &Self::Output,
let evaluated = &self.weights * input + &self.bias; ) -> Self::Gradient {
// Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron), // Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron),
// with `self.activation'(input) ° epsilon = delta` // with `self.activation'(input) ° epsilon = delta`
let mut delta = epsilon.clone(); let mut delta = epsilon.clone();
for i in 0..delta.len() { for i in 0..delta.len() {
// TODO: remove `- self.bias[i]`
delta[i] *= self.activation.derivate(evaluated[i]); delta[i] *= self.activation.derivate(evaluated[i]);
} }
// Compute the weight gradient
let weights_gradient = &delta * input.transpose(); let weights_gradient = &delta * input.transpose();
let new_epsilon = self.weights.tr_mul(&delta);
// According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation // According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
// The gradient of the bias is equal to the delta term of the backpropagation algorithm // The gradient of the bias is equal to the delta term of the backpropagation algorithm
let bias_gradient = delta; let bias_gradient = delta;
(new_epsilon, (weights_gradient, bias_gradient)) (weights_gradient, bias_gradient)
} }
}
fn regularize_layer(&self) -> Self::Gradient { impl<
( F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
self.weights.map(|x| self.regularization.derivate(x)), Act: NeuraDerivable<F>,
DVector::zeros(self.bias.shape().0), Reg: NeuraDerivable<F>,
) > NeuraTrainableLayerBackprop<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
} {
fn backprop_layer(
&self,
input: &DVector<F>,
evaluated: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> DVector<F> {
// Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron),
// with `self.activation'(input) ° epsilon = delta`
let mut delta = epsilon.clone();
fn apply_gradient(&mut self, gradient: &Self::Gradient) { for i in 0..delta.len() {
self.weights += &gradient.0; delta[i] *= self.activation.derivate(evaluated[i]);
self.bias += &gradient.1; }
self.weights.tr_mul(&delta)
} }
} }

@ -61,24 +61,15 @@ impl<R: Rng, F: Float> NeuraLayer<DVector<F>> for NeuraDropoutLayer<R> {
} }
} }
impl<R: Rng, F: Float> NeuraTrainableLayer<DVector<F>> for NeuraDropoutLayer<R> { impl<R: Rng, F: Float> NeuraTrainableLayerBase<DVector<F>> for NeuraDropoutLayer<R> {
type Gradient = (); type Gradient = ();
type IntermediaryRepr = ();
fn default_gradient(&self) -> Self::Gradient { fn eval_training(&self, input: &DVector<F>) -> (Self::Output, Self::IntermediaryRepr) {
() (self.eval(input), ())
} }
fn backprop_layer( fn default_gradient(&self) -> Self::Gradient {
&self,
_input: &DVector<F>,
mut epsilon: Self::Output,
) -> (DVector<F>, Self::Gradient) {
self.apply_dropout(&mut epsilon);
(epsilon, ())
}
fn regularize_layer(&self) -> Self::Gradient {
() ()
} }
@ -110,6 +101,36 @@ impl<R: Rng, F: Float> NeuraTrainableLayer<DVector<F>> for NeuraDropoutLayer<R>
} }
} }
impl<R: Rng, F: Float> NeuraTrainableLayerSelf<DVector<F>> for NeuraDropoutLayer<R> {
fn regularize_layer(&self) -> Self::Gradient {
()
}
fn get_gradient(
&self,
_input: &DVector<F>,
_intermediary: &Self::IntermediaryRepr,
_epsilon: &Self::Output,
) -> Self::Gradient {
()
}
}
impl<R: Rng, F: Float> NeuraTrainableLayerBackprop<DVector<F>> for NeuraDropoutLayer<R> {
fn backprop_layer(
&self,
_input: &DVector<F>,
_intermediary: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> DVector<F> {
let mut epsilon = epsilon.clone();
self.apply_dropout(&mut epsilon);
epsilon
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;
@ -121,7 +142,7 @@ mod test {
.unwrap(); .unwrap();
for _ in 0..100 { for _ in 0..100 {
<NeuraDropoutLayer<_> as NeuraTrainableLayer<DVector<f64>>>::prepare_layer( <NeuraDropoutLayer<_> as NeuraTrainableLayerBase<DVector<f64>>>::prepare_layer(
&mut layer, true, &mut layer, true,
); );
assert!(layer.multiplier.is_finite()); assert!(layer.multiplier.is_finite());

@ -23,6 +23,7 @@ impl NeuraShape {
} }
pub trait NeuraLayer<Input> { pub trait NeuraLayer<Input> {
/// What type the layer outputs
type Output; type Output;
fn eval(&self, input: &Input) -> Self::Output; fn eval(&self, input: &Input) -> Self::Output;
@ -46,12 +47,64 @@ pub trait NeuraPartialLayer {
fn output_shape(constructed: &Self::Constructed) -> NeuraShape; fn output_shape(constructed: &Self::Constructed) -> NeuraShape;
} }
pub trait NeuraTrainableLayer<Input>: NeuraLayer<Input> { pub trait NeuraTrainableLayerBase<Input>: NeuraLayer<Input> {
/// The representation of the layer gradient as a vector space /// The representation of the layer gradient as a vector space
type Gradient: NeuraVectorSpace; type Gradient: NeuraVectorSpace;
/// An intermediary object type to be passed to the various training methods
type IntermediaryRepr;
fn default_gradient(&self) -> Self::Gradient; fn default_gradient(&self) -> Self::Gradient;
/// Applies `δW_l` to the weights of the layer
fn apply_gradient(&mut self, gradient: &Self::Gradient);
fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr);
/// Arbitrary computation that can be executed at the start of an epoch
#[allow(unused_variables)]
#[inline(always)]
fn prepare_layer(&mut self, is_training: bool) {}
}
/// Contains methods relative to a layer's ability to compute its own weights gradients,
/// given the derivative of the output variables.
pub trait NeuraTrainableLayerSelf<Input>: NeuraTrainableLayerBase<Input> {
/// Computes the regularization
fn regularize_layer(&self) -> Self::Gradient;
/// Computes the layer's gradient,
///
/// `intermediary` is guaranteed to have been generated by a previous call to `eval_training`,
/// without mutation of `self` in-between, and with the same `input`.
fn get_gradient(
&self,
input: &Input,
intermediary: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> Self::Gradient;
}
// impl<Input, Layer: NeuraTrainableLayerBase<Input, Gradient = ()>> NeuraTrainableLayerSelf<Input>
// for Layer
// {
// #[inline(always)]
// fn regularize_layer(&self) -> Self::Gradient {
// ()
// }
// #[inline(always)]
// fn get_gradient(
// &self,
// input: &Input,
// intermediary: &Self::IntermediaryRepr,
// epsilon: Self::Output,
// ) -> Self::Gradient {
// ()
// }
// }
pub trait NeuraTrainableLayerBackprop<Input>: NeuraTrainableLayerBase<Input> {
/// Computes the backpropagation term and the derivative of the internal weights, /// Computes the backpropagation term and the derivative of the internal weights,
/// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer. /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
/// ///
@ -63,42 +116,31 @@ pub trait NeuraTrainableLayer<Input>: NeuraLayer<Input> {
/// The function should then return a pair `(epsilon_{l-1}, δW_l)`, /// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
/// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`. /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`.
/// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers. /// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers.
fn backprop_layer(&self, input: &Input, epsilon: Self::Output) -> (Input, Self::Gradient); fn backprop_layer(
&self,
/// Computes the regularization input: &Input,
fn regularize_layer(&self) -> Self::Gradient; intermediary: &Self::IntermediaryRepr,
epsilon: &Self::Output,
/// Applies `δW_l` to the weights of the layer ) -> Input;
fn apply_gradient(&mut self, gradient: &Self::Gradient);
/// Arbitrary computation that can be executed at the start of an epoch
#[allow(unused_variables)]
#[inline(always)]
fn prepare_layer(&mut self, is_training: bool) {}
} }
impl<Input: Clone> NeuraTrainableLayer<Input> for () { impl<Input: Clone> NeuraTrainableLayerBase<Input> for () {
type Gradient = (); type Gradient = ();
type IntermediaryRepr = ();
#[inline(always)] #[inline(always)]
fn default_gradient(&self) -> Self::Gradient { fn default_gradient(&self) -> Self::Gradient {
() ()
} }
#[inline(always)]
fn backprop_layer(&self, _input: &Input, epsilon: Self::Output) -> (Input, Self::Gradient) {
(epsilon, ())
}
#[inline(always)]
fn regularize_layer(&self) -> Self::Gradient {
()
}
#[inline(always)] #[inline(always)]
fn apply_gradient(&mut self, _gradient: &Self::Gradient) { fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
// Noop // Noop
} }
fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr) {
(self.eval(input), ())
}
} }
/// Temporary implementation of neura_layer /// Temporary implementation of neura_layer

@ -1,4 +1,4 @@
use nalgebra::{DVector, Scalar}; use nalgebra::{DMatrix, DVector, Scalar};
use num::{traits::NumAssignOps, Float}; use num::{traits::NumAssignOps, Float};
use super::*; use super::*;
@ -54,14 +54,19 @@ impl<F: Float + Scalar> NeuraLayer<DVector<F>> for NeuraNormalizeLayer {
} }
} }
impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayer<DVector<F>> for NeuraNormalizeLayer { impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerBase<DVector<F>> for NeuraNormalizeLayer {
type Gradient = (); type Gradient = ();
type IntermediaryRepr = (DMatrix<F>, F); // Partial jacobian matrix (without the kroenecker term) and stddev
fn backprop_layer( fn default_gradient(&self) -> Self::Gradient {
&self, ()
input: &DVector<F>, }
epsilon: Self::Output,
) -> (DVector<F>, Self::Gradient) { fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
// Noop
}
fn eval_training(&self, input: &DVector<F>) -> (Self::Output, Self::IntermediaryRepr) {
let (mean, variance, len) = mean_variance(input); let (mean, variance, len) = mean_variance(input);
let stddev = F::sqrt(variance); let stddev = F::sqrt(variance);
let input_centered = input.clone().map(|x| x - mean); let input_centered = input.clone().map(|x| x - mean);
@ -73,26 +78,42 @@ impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayer<DVector<F>> for Neura
*value += F::one() / (stddev * len); *value += F::one() / (stddev * len);
} }
let mut epsilon_out = jacobian_partial * &epsilon; (input_centered / stddev, (jacobian_partial, stddev))
// Apply the δ_{ik}/σ term
for i in 0..epsilon_out.len() {
epsilon_out[i] += epsilon[i] / stddev;
}
(epsilon_out, ())
} }
}
fn default_gradient(&self) -> Self::Gradient { impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerSelf<DVector<F>> for NeuraNormalizeLayer {
fn regularize_layer(&self) -> Self::Gradient {
() ()
} }
fn regularize_layer(&self) -> Self::Gradient { fn get_gradient(
&self,
input: &DVector<F>,
intermediary: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> Self::Gradient {
() ()
} }
}
fn apply_gradient(&mut self, _gradient: &Self::Gradient) { impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerBackprop<DVector<F>>
// Noop for NeuraNormalizeLayer
{
fn backprop_layer(
&self,
input: &DVector<F>,
(jacobian_partial, stddev): &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> DVector<F> {
let mut epsilon_out = jacobian_partial * epsilon;
// Apply the δ_{ik}/σ term
for i in 0..epsilon_out.len() {
epsilon_out[i] += epsilon[i] / *stddev;
}
epsilon_out
} }
} }

@ -54,22 +54,53 @@ impl NeuraPartialLayer for NeuraSoftmaxLayer {
} }
} }
impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayer<DVector<F>> for NeuraSoftmaxLayer { impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerBase<DVector<F>> for NeuraSoftmaxLayer {
type Gradient = (); type Gradient = ();
type IntermediaryRepr = Self::Output; // Result of self.eval
fn default_gradient(&self) -> Self::Gradient { fn default_gradient(&self) -> Self::Gradient {
() ()
} }
fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
// Noop
}
fn eval_training(&self, input: &DVector<F>) -> (Self::Output, Self::IntermediaryRepr) {
let res = self.eval(input);
(res.clone(), res)
}
}
impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerSelf<DVector<F>> for NeuraSoftmaxLayer {
#[inline(always)]
fn regularize_layer(&self) -> Self::Gradient {
()
}
#[inline(always)]
fn get_gradient(
&self,
input: &DVector<F>,
intermediary: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> Self::Gradient {
()
}
}
impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerBackprop<DVector<F>>
for NeuraSoftmaxLayer
{
fn backprop_layer( fn backprop_layer(
&self, &self,
input: &DVector<F>, input: &DVector<F>,
mut epsilon: Self::Output, evaluated: &Self::IntermediaryRepr,
) -> (DVector<F>, Self::Gradient) { epsilon: &Self::Output,
// Note: a constant value can be added to `input` to bring it to increase precision ) -> DVector<F> {
let evaluated = self.eval(input); let mut epsilon = epsilon.clone();
// Compute $a_{l-1,i} \epsilon_{l,i}$ // Compute $a_{l-1,i} ° \epsilon_{l,i}$
hadamard_product(&mut epsilon, &evaluated); hadamard_product(&mut epsilon, &evaluated);
// Compute $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$ // Compute $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$
@ -80,15 +111,7 @@ impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayer<DVector<F>> for Neura
epsilon[i] -= evaluated[i] * sum_diagonal_terms; epsilon[i] -= evaluated[i] * sum_diagonal_terms;
} }
(epsilon, ()) epsilon
}
fn regularize_layer(&self) -> Self::Gradient {
()
}
fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
// Noop
} }
} }
@ -132,8 +155,9 @@ mod test {
for epsilon1 in [1.7, 1.9, 2.3] { for epsilon1 in [1.7, 1.9, 2.3] {
for epsilon2 in [2.9, 3.1, 3.7] { for epsilon2 in [2.9, 3.1, 3.7] {
let epsilon = dvector![epsilon1, epsilon2]; let epsilon = dvector![epsilon1, epsilon2];
let evaluated = layer.eval(&input);
let (epsilon, _) = layer.backprop_layer(&input, epsilon); let epsilon = layer.backprop_layer(&input, &evaluated, &epsilon);
let expected = [ let expected = [
output[0] * (1.0 - output[0]) * epsilon1 output[0] * (1.0 - output[0]) * epsilon1
- output[1] * output[0] * epsilon2, - output[1] * output[0] * epsilon2,
@ -165,7 +189,8 @@ mod test {
derivative += DMatrix::from_diagonal(&evaluated); derivative += DMatrix::from_diagonal(&evaluated);
let expected = derivative * &loss; let expected = derivative * &loss;
let (actual, _) = layer.backprop_layer(&input, loss); let evaluated = layer.eval(&input);
let actual = layer.backprop_layer(&input, &evaluated, &loss);
for i in 0..4 { for i in 0..4 {
assert!((expected[i] - actual[i]).abs() < EPSILON); assert!((expected[i] - actual[i]).abs() < EPSILON);

@ -4,6 +4,7 @@ use crate::{
pub mod sequential; pub mod sequential;
// TODO: extract regularize from this, so that we can drop the trait constraints on NeuraSequential's impl
pub trait NeuraTrainableNetworkBase<Input>: NeuraLayer<Input> { pub trait NeuraTrainableNetworkBase<Input>: NeuraLayer<Input> {
type Gradient: NeuraVectorSpace; type Gradient: NeuraVectorSpace;
type LayerOutput; type LayerOutput;

@ -0,0 +1,96 @@
use super::*;
use crate::prelude::NeuraTrainableLayerBackprop;
impl<Input, Layer: NeuraLayer<Input>, ChildNetwork: NeuraLayer<Layer::Output>> NeuraLayer<Input>
for NeuraSequential<Layer, ChildNetwork>
{
type Output = ChildNetwork::Output;
fn eval(&self, input: &Input) -> Self::Output {
self.child_network.eval(&self.layer.eval(input))
}
}
impl<
Input,
Layer: NeuraTrainableLayerBase<Input>,
ChildNetwork: NeuraTrainableLayerBase<Layer::Output>,
> NeuraTrainableLayerBase<Input> for NeuraSequential<Layer, ChildNetwork>
{
type Gradient = (Layer::Gradient, Box<ChildNetwork::Gradient>);
type IntermediaryRepr = (Layer::IntermediaryRepr, Box<ChildNetwork::IntermediaryRepr>);
fn default_gradient(&self) -> Self::Gradient {
(
self.layer.default_gradient(),
Box::new(self.child_network.default_gradient()),
)
}
fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr) {
let (layer_output, layer_intermediary) = self.layer.eval_training(input);
let (child_output, child_intermediary) = self.child_network.eval_training(&layer_output);
(
child_output,
(layer_intermediary, Box::new(child_intermediary)),
)
}
fn prepare_layer(&mut self, is_training: bool) {
self.layer.prepare_layer(is_training);
self.child_network.prepare_layer(is_training);
}
fn apply_gradient(&mut self, gradient: &Self::Gradient) {
self.layer.apply_gradient(&gradient.0);
self.child_network.apply_gradient(&gradient.1);
}
}
impl<
Input,
Layer: NeuraTrainableLayerSelf<Input>,
ChildNetwork: NeuraTrainableLayerSelf<Layer::Output> + NeuraTrainableLayerBackprop<Layer::Output>,
> NeuraTrainableLayerSelf<Input> for NeuraSequential<Layer, ChildNetwork>
{
fn regularize_layer(&self) -> Self::Gradient {
(
self.layer.regularize_layer(),
Box::new(self.child_network.regularize_layer()),
)
}
fn get_gradient(
&self,
input: &Input,
intermediary: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> Self::Gradient {
unimplemented!("NeuraSequential::get_gradient is not yet implemented, sorry");
}
}
impl<
Input,
Layer: NeuraTrainableLayerBackprop<Input>,
ChildNetwork: NeuraTrainableLayerBackprop<Layer::Output>,
> NeuraTrainableLayerBackprop<Input> for NeuraSequential<Layer, ChildNetwork>
{
fn backprop_layer(
&self,
input: &Input,
intermediary: &Self::IntermediaryRepr,
incoming_epsilon: &Self::Output,
) -> Input {
let transient_output = self.layer.eval(input);
let transient_epsilon =
self.child_network
.backprop_layer(&transient_output, &intermediary.1, incoming_epsilon);
let outgoing_epsilon =
self.layer
.backprop_layer(input, &intermediary.0, &transient_epsilon);
outgoing_epsilon
}
}

@ -1,10 +1,12 @@
use super::{NeuraTrainableNetwork, NeuraTrainableNetworkBase}; use super::{NeuraTrainableNetwork, NeuraTrainableNetworkBase};
use crate::{ use crate::{
gradient_solver::{NeuraGradientSolverFinal, NeuraGradientSolverTransient}, gradient_solver::{NeuraGradientSolverFinal, NeuraGradientSolverTransient},
layer::{NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayer}, layer::{NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayerBase},
prelude::NeuraTrainableLayerSelf,
}; };
mod construct; mod construct;
mod layer_impl;
mod tail; mod tail;
pub use construct::*; pub use construct::*;
@ -24,7 +26,7 @@ pub use tail::*;
/// ## Notes on implemented traits /// ## Notes on implemented traits
/// ///
/// The different implementations for `NeuraTrainableNetwork`, /// The different implementations for `NeuraTrainableNetwork`,
/// `NeuraLayer` and `NeuraTrainableLayer` each require that `ChildNetwork` implements those respective traits, /// `NeuraLayer` and `NeuraTrainableLayerBase` each require that `ChildNetwork` implements those respective traits,
/// and that the output type of `Layer` matches the input type of `ChildNetwork`. /// and that the output type of `Layer` matches the input type of `ChildNetwork`.
/// ///
/// If a method, like `eval`, is reported as missing, /// If a method, like `eval`, is reported as missing,
@ -74,61 +76,9 @@ impl<Layer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
} }
} }
impl<Input, Layer: NeuraLayer<Input>, ChildNetwork: NeuraLayer<Layer::Output>> NeuraLayer<Input>
for NeuraSequential<Layer, ChildNetwork>
{
type Output = ChildNetwork::Output;
fn eval(&self, input: &Input) -> Self::Output {
self.child_network.eval(&self.layer.eval(input))
}
}
impl<
Input,
Layer: NeuraTrainableLayer<Input>,
ChildNetwork: NeuraTrainableLayer<Layer::Output>,
> NeuraTrainableLayer<Input> for NeuraSequential<Layer, ChildNetwork>
{
type Gradient = (Layer::Gradient, Box<ChildNetwork::Gradient>);
fn default_gradient(&self) -> Self::Gradient {
(
self.layer.default_gradient(),
Box::new(self.child_network.default_gradient()),
)
}
fn backprop_layer(
&self,
input: &Input,
incoming_epsilon: Self::Output,
) -> (Input, Self::Gradient) {
let output = self.layer.eval(input);
let (transient_epsilon, child_gradient) =
self.child_network.backprop_layer(&output, incoming_epsilon);
let (outgoing_epsilon, layer_gradient) =
self.layer.backprop_layer(input, transient_epsilon);
(outgoing_epsilon, (layer_gradient, Box::new(child_gradient)))
}
fn regularize_layer(&self) -> Self::Gradient {
(
self.layer.regularize_layer(),
Box::new(self.child_network.regularize_layer()),
)
}
fn apply_gradient(&mut self, gradient: &Self::Gradient) {
self.layer.apply_gradient(&gradient.0);
self.child_network.apply_gradient(&gradient.1);
}
}
impl< impl<
Input, Input,
Layer: NeuraTrainableLayer<Input>, Layer: NeuraTrainableLayerBase<Input> + NeuraTrainableLayerSelf<Input>,
ChildNetwork: NeuraTrainableNetworkBase<Layer::Output>, ChildNetwork: NeuraTrainableNetworkBase<Layer::Output>,
> NeuraTrainableNetworkBase<Input> for NeuraSequential<Layer, ChildNetwork> > NeuraTrainableNetworkBase<Input> for NeuraSequential<Layer, ChildNetwork>
{ {
@ -188,8 +138,8 @@ impl<Input: Clone> NeuraTrainableNetworkBase<Input> for () {
impl< impl<
Input, Input,
Layer: NeuraTrainableLayer<Input>, Layer: NeuraTrainableLayerBase<Input> + NeuraTrainableLayerSelf<Input>,
Optimizer: NeuraGradientSolverTransient<Layer::Output>, Optimizer: NeuraGradientSolverTransient<Input, Layer>,
ChildNetwork: NeuraTrainableNetworkBase<Layer::Output>, ChildNetwork: NeuraTrainableNetworkBase<Layer::Output>,
> NeuraTrainableNetwork<Input, Optimizer> for NeuraSequential<Layer, ChildNetwork> > NeuraTrainableNetwork<Input, Optimizer> for NeuraSequential<Layer, ChildNetwork>
where where
@ -200,12 +150,14 @@ where
input: &Input, input: &Input,
optimizer: &Optimizer, optimizer: &Optimizer,
) -> Optimizer::Output<Input, Self::Gradient> { ) -> Optimizer::Output<Input, Self::Gradient> {
let next_activation = self.layer.eval(input); let (next_activation, intermediary) = self.layer.eval_training(input);
let child_result = self.child_network.traverse(&next_activation, optimizer); let child_result = self.child_network.traverse(&next_activation, optimizer);
optimizer.eval_layer( optimizer.eval_layer(
&self.layer, &self.layer,
input, input,
&next_activation,
&intermediary,
child_result, child_result,
|layer_gradient, child_gradient| (layer_gradient, Box::new(child_gradient)), |layer_gradient, child_gradient| (layer_gradient, Box::new(child_gradient)),
) )

@ -82,7 +82,10 @@ impl NeuraBatchedTrainer {
network: &mut Network, network: &mut Network,
inputs: Inputs, inputs: Inputs,
test_inputs: &[(Input, Target)], test_inputs: &[(Input, Target)],
) -> Vec<(f64, f64)> { ) -> Vec<(f64, f64)>
where
<Network as NeuraTrainableNetworkBase<Input>>::Gradient: std::fmt::Debug,
{
let mut losses = Vec::new(); let mut losses = Vec::new();
let mut iter = inputs.into_iter(); let mut iter = inputs.into_iter();
let factor = -self.learning_rate / (self.batch_size as f64); let factor = -self.learning_rate / (self.batch_size as f64);

@ -1,12 +1,20 @@
use std::fs::File; use std::fs::File;
use approx::assert_relative_eq; use approx::assert_relative_eq;
use nalgebra::{DMatrix, DVector, dvector}; use nalgebra::{dvector, DMatrix, DVector};
use neuramethyst::{prelude::{*, dense::NeuraDenseLayer}, derivable::{activation::{Relu, Tanh}, regularize::NeuraL0, loss::Euclidean}}; use neuramethyst::{
derivable::{
activation::{Relu, Tanh},
loss::Euclidean,
regularize::NeuraL0,
},
prelude::{dense::NeuraDenseLayer, *},
};
fn load_test_data() -> Vec<(DMatrix<f64>, DVector<f64>, DMatrix<f64>, DVector<f64>)> { fn load_test_data() -> Vec<(DMatrix<f64>, DVector<f64>, DMatrix<f64>, DVector<f64>)> {
let file = File::open("tests/xor.json").unwrap(); let file = File::open("tests/xor.json").unwrap();
let data: Vec<(DMatrix<f64>, DVector<f64>, DMatrix<f64>, DVector<f64>)> = serde_json::from_reader(&file).unwrap(); let data: Vec<(DMatrix<f64>, DVector<f64>, DMatrix<f64>, DVector<f64>)> =
serde_json::from_reader(&file).unwrap();
data data
} }
@ -43,7 +51,7 @@ fn test_xor_training() {
network.layer.weights.clone(), network.layer.weights.clone(),
network.layer.bias.clone(), network.layer.bias.clone(),
network.child_network.layer.weights.clone(), network.child_network.layer.weights.clone(),
network.child_network.layer.bias.clone() network.child_network.layer.bias.clone(),
); );
assert_relative_eq!(expected.0.as_slice(), actual.0.as_slice()); assert_relative_eq!(expected.0.as_slice(), actual.0.as_slice());

Loading…
Cancel
Save