diff --git a/src/gradient_solver/backprop.rs b/src/gradient_solver/backprop.rs index 1214a60..7c9ede8 100644 --- a/src/gradient_solver/backprop.rs +++ b/src/gradient_solver/backprop.rs @@ -1,9 +1,6 @@ use num::ToPrimitive; -use crate::{ - derivable::NeuraLoss, layer::*, - network::*, -}; +use crate::{derivable::NeuraLoss, layer::*, network::*}; use super::*; @@ -83,11 +80,9 @@ impl< fn map_epsilon To>( &self, rec_opt_output: Self::Output, - callback: Cb + callback: Cb, ) -> Self::Output { - ( - callback(rec_opt_output.0), rec_opt_output.1 - ) + (callback(rec_opt_output.0), rec_opt_output.1) } } @@ -95,30 +90,38 @@ trait BackpropRecurse { fn recurse(&self, network: &Network, input: &Input) -> (Input, Gradient); } -impl> BackpropRecurse for (&NeuraBackprop, &Loss::Target) { +impl> BackpropRecurse + for (&NeuraBackprop, &Loss::Target) +{ fn recurse(&self, _network: &(), input: &Input) -> (Input, ()) { (self.0.loss.nabla(self.1, input), ()) } } impl< - Input: Clone, - Network: NeuraNetworkRec + NeuraNetwork + NeuraTrainableLayerBase, - Loss, - Target -> BackpropRecurse for (&NeuraBackprop, &Target) + Input: Clone, + Network: NeuraNetworkRec + NeuraNetwork + NeuraTrainableLayerEval, + Loss, + Target, + > BackpropRecurse for (&NeuraBackprop, &Target) where // Verify that we can traverse recursively - for<'a> (&'a NeuraBackprop, &'a Target): BackpropRecurse>::Gradient>, + for<'a> (&'a NeuraBackprop, &'a Target): BackpropRecurse< + Network::NodeOutput, + Network::NextNode, + ::Gradient, + >, // Verify that the current layer implements the right traits - Network::Layer: NeuraTrainableLayerSelf + NeuraTrainableLayerBackprop, + Network::Layer: NeuraTrainableLayerSelf + + NeuraTrainableLayerBackprop, // Verify that the layer output can be cloned >::Output: Clone, - Network::NextNode: NeuraTrainableLayerBase, + Network::NextNode: NeuraTrainableLayerEval, { fn recurse(&self, network: &Network, input: &Input) -> (Input, Network::Gradient) { let layer_input = network.map_input(input); - let (layer_output, layer_intermediary) = network.get_layer().eval_training(layer_input.as_ref()); + let (layer_output, layer_intermediary) = + network.get_layer().eval_training(layer_input.as_ref()); let output = network.map_output(input, &layer_output); let (epsilon_in, gradient_rec) = self.recurse(network.get_next(), output.as_ref()); diff --git a/src/gradient_solver/forward_forward.rs b/src/gradient_solver/forward_forward.rs index c68f713..1ada58e 100644 --- a/src/gradient_solver/forward_forward.rs +++ b/src/gradient_solver/forward_forward.rs @@ -138,7 +138,7 @@ impl< fn map_epsilon To>( &self, rec_opt_output: Self::Output, - _callback: Cb + _callback: Cb, ) -> Self::Output { rec_opt_output } diff --git a/src/gradient_solver/mod.rs b/src/gradient_solver/mod.rs index 275c964..e6291a2 100644 --- a/src/gradient_solver/mod.rs +++ b/src/gradient_solver/mod.rs @@ -5,7 +5,7 @@ mod forward_forward; pub use forward_forward::NeuraForwardForward; use crate::{ - layer::NeuraTrainableLayerBase, + layer::{NeuraTrainableLayerBase, NeuraTrainableLayerEval}, network::{NeuraOldTrainableNetwork, NeuraOldTrainableNetworkBase}, }; @@ -17,7 +17,7 @@ pub trait NeuraGradientSolverFinal: NeuraGradientSolverBase { fn eval_final(&self, output: LayerOutput) -> Self::Output; } -pub trait NeuraGradientSolverTransient>: +pub trait NeuraGradientSolverTransient>: NeuraGradientSolverBase { fn eval_layer( @@ -33,7 +33,7 @@ pub trait NeuraGradientSolverTransient To>( &self, rec_opt_output: Self::Output, - callback: Cb + callback: Cb, ) -> Self::Output; } diff --git a/src/layer/dense.rs b/src/layer/dense.rs index 8fc6dfd..0d95f1d 100644 --- a/src/layer/dense.rs +++ b/src/layer/dense.rs @@ -175,17 +175,9 @@ impl< F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign, Act: NeuraDerivable, Reg: NeuraDerivable, - > NeuraTrainableLayerBase> for NeuraDenseLayer + > NeuraTrainableLayerBase for NeuraDenseLayer { type Gradient = (DMatrix, DVector); - type IntermediaryRepr = DVector; // pre-activation values - - fn eval_training(&self, input: &DVector) -> (Self::Output, Self::IntermediaryRepr) { - let evaluated = &self.weights * input + &self.bias; - let output = evaluated.map(|x| self.activation.eval(x)); - - (output, evaluated) - } fn default_gradient(&self) -> Self::Gradient { ( @@ -200,6 +192,22 @@ impl< } } +impl< + F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign, + Act: NeuraDerivable, + Reg: NeuraDerivable, + > NeuraTrainableLayerEval> for NeuraDenseLayer +{ + type IntermediaryRepr = DVector; // pre-activation values + + fn eval_training(&self, input: &DVector) -> (Self::Output, Self::IntermediaryRepr) { + let evaluated = &self.weights * input + &self.bias; + let output = evaluated.map(|x| self.activation.eval(x)); + + (output, evaluated) + } +} + impl< F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign, Act: NeuraDerivable, diff --git a/src/layer/dropout.rs b/src/layer/dropout.rs index 9f46c0c..be56222 100644 --- a/src/layer/dropout.rs +++ b/src/layer/dropout.rs @@ -63,13 +63,8 @@ impl NeuraLayer> for NeuraDropoutLayer { } } -impl NeuraTrainableLayerBase> for NeuraDropoutLayer { +impl NeuraTrainableLayerBase for NeuraDropoutLayer { type Gradient = (); - type IntermediaryRepr = (); - - fn eval_training(&self, input: &DVector) -> (Self::Output, Self::IntermediaryRepr) { - (self.eval(input), ()) - } fn default_gradient(&self) -> Self::Gradient { () @@ -103,6 +98,14 @@ impl NeuraTrainableLayerBase> for NeuraDropoutLayer } } +impl NeuraTrainableLayerEval> for NeuraDropoutLayer { + type IntermediaryRepr = (); + + fn eval_training(&self, input: &DVector) -> (Self::Output, Self::IntermediaryRepr) { + (self.eval(input), ()) + } +} + impl NeuraTrainableLayerSelf> for NeuraDropoutLayer { fn regularize_layer(&self) -> Self::Gradient { () @@ -144,9 +147,7 @@ mod test { .unwrap(); for _ in 0..100 { - as NeuraTrainableLayerBase>>::prepare_layer( - &mut layer, true, - ); + layer.prepare_layer(true); assert!(layer.multiplier.is_finite()); assert!(!layer.multiplier.is_nan()); } diff --git a/src/layer/lock.rs b/src/layer/lock.rs index 5d5026a..d8d8dc0 100644 --- a/src/layer/lock.rs +++ b/src/layer/lock.rs @@ -32,11 +32,8 @@ impl> NeuraLayer for NeuraLockLayer> NeuraTrainableLayerBase - for NeuraLockLayer -{ +impl NeuraTrainableLayerBase for NeuraLockLayer { type Gradient = (); - type IntermediaryRepr = Layer::IntermediaryRepr; fn default_gradient(&self) -> Self::Gradient { () @@ -45,13 +42,19 @@ impl> NeuraTrainableLayerBase> NeuraTrainableLayerEval + for NeuraLockLayer +{ + type IntermediaryRepr = Layer::IntermediaryRepr; fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr) { self.layer.eval_training(input) } } -impl> NeuraTrainableLayerSelf +impl> NeuraTrainableLayerSelf for NeuraLockLayer { fn regularize_layer(&self) -> Self::Gradient { diff --git a/src/layer/mod.rs b/src/layer/mod.rs index 87fabf7..60f32bf 100644 --- a/src/layer/mod.rs +++ b/src/layer/mod.rs @@ -59,30 +59,32 @@ pub trait NeuraPartialLayer { fn construct(self, input_shape: NeuraShape) -> Result; } -pub trait NeuraTrainableLayerBase: NeuraLayer { +pub trait NeuraTrainableLayerBase { /// The representation of the layer gradient as a vector space type Gradient: NeuraVectorSpace; - /// An intermediary object type to be passed to the various training methods - type IntermediaryRepr; - fn default_gradient(&self) -> Self::Gradient; /// Applies `δW_l` to the weights of the layer fn apply_gradient(&mut self, gradient: &Self::Gradient); - // TODO: move this into another trait - fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr); - /// Arbitrary computation that can be executed at the start of an epoch #[allow(unused_variables)] #[inline(always)] fn prepare_layer(&mut self, is_training: bool) {} } +pub trait NeuraTrainableLayerEval: NeuraTrainableLayerBase + NeuraLayer { + /// An intermediary object type to be passed to the various training methods + type IntermediaryRepr; + + // TODO: move this into another trait + fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr); +} + /// Contains methods relative to a layer's ability to compute its own weights gradients, /// given the derivative of the output variables. -pub trait NeuraTrainableLayerSelf: NeuraTrainableLayerBase { +pub trait NeuraTrainableLayerSelf: NeuraTrainableLayerEval { /// Computes the regularization fn regularize_layer(&self) -> Self::Gradient; @@ -117,7 +119,7 @@ pub trait NeuraTrainableLayerSelf: NeuraTrainableLayerBase { // } // } -pub trait NeuraTrainableLayerBackprop: NeuraTrainableLayerBase { +pub trait NeuraTrainableLayerBackprop: NeuraTrainableLayerEval { /// Computes the backpropagation term and the derivative of the internal weights, /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer. /// @@ -137,9 +139,8 @@ pub trait NeuraTrainableLayerBackprop: NeuraTrainableLayerBase { ) -> Input; } -impl NeuraTrainableLayerBase for () { +impl NeuraTrainableLayerBase for () { type Gradient = (); - type IntermediaryRepr = (); #[inline(always)] fn default_gradient(&self) -> Self::Gradient { @@ -150,7 +151,12 @@ impl NeuraTrainableLayerBase for () { fn apply_gradient(&mut self, _gradient: &Self::Gradient) { // Noop } +} + +impl NeuraTrainableLayerEval for () { + type IntermediaryRepr = (); + #[inline(always)] fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr) { (self.eval(input), ()) } diff --git a/src/layer/normalize.rs b/src/layer/normalize.rs index ab0c4c9..c12d95b 100644 --- a/src/layer/normalize.rs +++ b/src/layer/normalize.rs @@ -56,9 +56,8 @@ impl NeuraLayer> for NeuraNormalizeLayer { } } -impl NeuraTrainableLayerBase> for NeuraNormalizeLayer { +impl NeuraTrainableLayerBase for NeuraNormalizeLayer { type Gradient = (); - type IntermediaryRepr = (DMatrix, F); // Partial jacobian matrix (without the kroenecker term) and stddev fn default_gradient(&self) -> Self::Gradient { () @@ -67,6 +66,10 @@ impl NeuraTrainableLayerBase> for N fn apply_gradient(&mut self, _gradient: &Self::Gradient) { // Noop } +} + +impl NeuraTrainableLayerEval> for NeuraNormalizeLayer { + type IntermediaryRepr = (DMatrix, F); // Partial jacobian matrix (without the kroenecker term) and stddev fn eval_training(&self, input: &DVector) -> (Self::Output, Self::IntermediaryRepr) { let (mean, variance, len) = mean_variance(input); diff --git a/src/layer/softmax.rs b/src/layer/softmax.rs index 82f31f7..8172a88 100644 --- a/src/layer/softmax.rs +++ b/src/layer/softmax.rs @@ -56,9 +56,8 @@ impl NeuraPartialLayer for NeuraSoftmaxLayer { } } -impl NeuraTrainableLayerBase> for NeuraSoftmaxLayer { +impl NeuraTrainableLayerBase for NeuraSoftmaxLayer { type Gradient = (); - type IntermediaryRepr = Self::Output; // Result of self.eval fn default_gradient(&self) -> Self::Gradient { () @@ -67,6 +66,10 @@ impl NeuraTrainableLayerBase> for N fn apply_gradient(&mut self, _gradient: &Self::Gradient) { // Noop } +} + +impl NeuraTrainableLayerEval> for NeuraSoftmaxLayer { + type IntermediaryRepr = Self::Output; // Result of self.eval fn eval_training(&self, input: &DVector) -> (Self::Output, Self::IntermediaryRepr) { let res = self.eval(input); diff --git a/src/network/mod.rs b/src/network/mod.rs index b40d08d..c6a89a2 100644 --- a/src/network/mod.rs +++ b/src/network/mod.rs @@ -1,5 +1,7 @@ use crate::{ - algebra::NeuraVectorSpace, gradient_solver::{NeuraGradientSolverBase, NeuraGradientSolverFinal}, layer::NeuraLayer, + algebra::NeuraVectorSpace, + gradient_solver::{NeuraGradientSolverBase, NeuraGradientSolverFinal}, + layer::NeuraLayer, }; // pub mod residual; diff --git a/src/network/sequential/layer_impl.rs b/src/network/sequential/layer_impl.rs index 3412bef..aed7ef9 100644 --- a/src/network/sequential/layer_impl.rs +++ b/src/network/sequential/layer_impl.rs @@ -1,5 +1,5 @@ use super::*; -use crate::layer::NeuraTrainableLayerBackprop; +use crate::layer::{NeuraTrainableLayerBackprop, NeuraTrainableLayerEval}; impl, ChildNetwork: NeuraLayer> NeuraLayer for NeuraSequential @@ -11,14 +11,10 @@ impl, ChildNetwork: NeuraLayer> N } } -impl< - Input, - Layer: NeuraTrainableLayerBase, - ChildNetwork: NeuraTrainableLayerBase, - > NeuraTrainableLayerBase for NeuraSequential +impl NeuraTrainableLayerBase + for NeuraSequential { type Gradient = (Layer::Gradient, Box); - type IntermediaryRepr = (Layer::IntermediaryRepr, Box); fn default_gradient(&self) -> Self::Gradient { ( @@ -27,16 +23,6 @@ impl< ) } - fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr) { - let (layer_output, layer_intermediary) = self.layer.eval_training(input); - let (child_output, child_intermediary) = self.child_network.eval_training(&layer_output); - - ( - child_output, - (layer_intermediary, Box::new(child_intermediary)), - ) - } - fn prepare_layer(&mut self, is_training: bool) { self.layer.prepare_layer(is_training); self.child_network.prepare_layer(is_training); @@ -48,6 +34,25 @@ impl< } } +impl< + Input, + Layer: NeuraTrainableLayerEval, + ChildNetwork: NeuraTrainableLayerEval, + > NeuraTrainableLayerEval for NeuraSequential +{ + type IntermediaryRepr = (Layer::IntermediaryRepr, Box); + + fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr) { + let (layer_output, layer_intermediary) = self.layer.eval_training(input); + let (child_output, child_intermediary) = self.child_network.eval_training(&layer_output); + + ( + child_output, + (layer_intermediary, Box::new(child_intermediary)), + ) + } +} + impl< Input, Layer: NeuraTrainableLayerSelf, diff --git a/src/network/sequential/mod.rs b/src/network/sequential/mod.rs index c2fbc63..0301f65 100644 --- a/src/network/sequential/mod.rs +++ b/src/network/sequential/mod.rs @@ -1,8 +1,9 @@ use super::{NeuraOldTrainableNetwork, NeuraOldTrainableNetworkBase}; use crate::{ - gradient_solver::{NeuraGradientSolverTransient}, + gradient_solver::NeuraGradientSolverTransient, layer::{ - NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayerBase, NeuraTrainableLayerSelf, + NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayerBase, + NeuraTrainableLayerEval, NeuraTrainableLayerSelf, }, }; @@ -81,7 +82,7 @@ impl NeuraSequential { impl< Input, - Layer: NeuraTrainableLayerBase + NeuraTrainableLayerSelf, + Layer: NeuraTrainableLayerEval + NeuraTrainableLayerSelf, ChildNetwork: NeuraOldTrainableNetworkBase, > NeuraOldTrainableNetworkBase for NeuraSequential { @@ -141,7 +142,7 @@ impl NeuraOldTrainableNetworkBase for () { impl< Input, - Layer: NeuraTrainableLayerBase + NeuraTrainableLayerSelf, + Layer: NeuraTrainableLayerEval + NeuraTrainableLayerSelf, Optimizer: NeuraGradientSolverTransient, ChildNetwork: NeuraOldTrainableNetworkBase, > NeuraOldTrainableNetwork for NeuraSequential diff --git a/src/network/traits.rs b/src/network/traits.rs index 93d8e4f..7dd2847 100644 --- a/src/network/traits.rs +++ b/src/network/traits.rs @@ -14,7 +14,7 @@ pub trait NeuraNetworkBase { pub trait NeuraNetwork: NeuraNetworkBase where Self::Layer: NeuraLayer, - >::Output: Clone + >::Output: Clone, { /// The type of the input to `Self::Layer` type LayerInput: Clone; @@ -25,12 +25,25 @@ where /// Maps the input of network node to the enclosed layer fn map_input<'a>(&'_ self, input: &'a NodeInput) -> Cow<'a, Self::LayerInput>; /// Maps the output of the enclosed layer to the output of the network node - fn map_output<'a>(&'_ self, input: &'_ NodeInput, layer_output: &'a >::Output) -> Cow<'a, Self::NodeOutput>; + fn map_output<'a>( + &'_ self, + input: &'_ NodeInput, + layer_output: &'a >::Output, + ) -> Cow<'a, Self::NodeOutput>; /// Maps a gradient in the format of the node's output into the format of the enclosed layer's output - fn map_gradient_in<'a>(&'_ self, input: &'_ NodeInput, gradient_in: &'a Self::NodeOutput) -> Cow<'a, >::Output>; + fn map_gradient_in<'a>( + &'_ self, + input: &'_ NodeInput, + gradient_in: &'a Self::NodeOutput, + ) -> Cow<'a, >::Output>; /// Maps a gradient in the format of the enclosed layer's input into the format of the node's input - fn map_gradient_out<'a>(&'_ self, input: &'_ NodeInput, gradient_in: &'_ Self::NodeOutput, gradient_out: &'a Self::LayerInput) -> Cow<'a, NodeInput>; + fn map_gradient_out<'a>( + &'_ self, + input: &'_ NodeInput, + gradient_in: &'_ Self::NodeOutput, + gradient_out: &'a Self::LayerInput, + ) -> Cow<'a, NodeInput>; } pub trait NeuraNetworkRec: NeuraNetworkBase { @@ -39,6 +52,4 @@ pub trait NeuraNetworkRec: NeuraNetworkBase { type NextNode; fn get_next(&self) -> &Self::NextNode; - - }