diff --git a/src/gradient_solver/backprop.rs b/src/gradient_solver/backprop.rs index 921e2b9..6832e79 100644 --- a/src/gradient_solver/backprop.rs +++ b/src/gradient_solver/backprop.rs @@ -24,7 +24,8 @@ where >::Output: ToPrimitive, // Trainable: NeuraOldTrainableNetworkBase::Gradient>, // Trainable: for<'a> NeuraOldTrainableNetwork, &'a Target)>, - for<'a> (&'a NeuraBackprop, &'a Target): BackpropRecurse::Gradient> + for<'a> (&'a NeuraBackprop, &'a Target): + BackpropRecurse::Gradient>, { fn get_gradient( &self, @@ -133,13 +134,17 @@ where // Get layer outgoing gradient vector let layer_epsilon_in = network.map_gradient_in(input, &epsilon_in); - let layer_epsilon_out = layer.backprop_layer(&layer_input, &layer_intermediary, &layer_epsilon_in); + let layer_epsilon_out = + layer.backprop_layer(&layer_input, &layer_intermediary, &layer_epsilon_in); let epsilon_out = network.map_gradient_out(input, &epsilon_in, &layer_epsilon_out); // Get layer parameter gradient let gradient = layer.get_gradient(&layer_input, &layer_intermediary, &layer_epsilon_in); - (epsilon_out.into_owned(), network.merge_gradient(gradient_rec, gradient)) + ( + epsilon_out.into_owned(), + network.merge_gradient(gradient_rec, gradient), + ) } } @@ -215,11 +220,24 @@ mod test { } } + /// Check that there is no recursion error when using `()` in `recurse` #[test] - fn test_recursive() { + fn test_recurse() { let backprop = NeuraBackprop::new(Euclidean); let target = dvector![0.0]; (&backprop, &target).recurse(&(), &dvector![0.0]); } + + #[test] + fn test_recurse_sequential() { + let backprop = NeuraBackprop::new(Euclidean); + let target = dvector![0.0]; + + let network = neura_sequential![neura_layer!("dense", 4), neura_layer!("dense", 1),] + .construct(NeuraShape::Vector(1)) + .unwrap(); + + (&backprop, &target).recurse(&network, &dvector![0.0]); + } } diff --git a/src/gradient_solver/forward_forward.rs b/src/gradient_solver/forward_forward.rs index 777ac26..0bb6bfd 100644 --- a/src/gradient_solver/forward_forward.rs +++ b/src/gradient_solver/forward_forward.rs @@ -1,7 +1,12 @@ use nalgebra::{DVector, Scalar}; use num::{traits::NumAssignOps, Float, ToPrimitive}; -use crate::{derivable::NeuraDerivable, layer::NeuraTrainableLayerSelf}; +use crate::{ + derivable::NeuraDerivable, + layer::NeuraTrainableLayerSelf, + network::{NeuraNetwork, NeuraNetworkRec}, + prelude::NeuraLayer, +}; use super::*; @@ -20,21 +25,58 @@ impl> NeuraForwardForward { } } +trait ForwardForwardDerivate { + fn derivate_goodness(&self, data: &Data) -> Data; +} + +impl> ForwardForwardDerivate> + for NeuraForwardPair +{ + fn derivate_goodness(&self, data: &DVector) -> DVector { + let goodness = data + .iter() + .copied() + .reduce(|acc, x| acc + x * x) + .unwrap_or(F::zero()); + let goodness = if self.maximize { + goodness - F::from(self.threshold).unwrap() + } else { + F::from(self.threshold).unwrap() - goodness + }; + // We skip self.activation.eval(goodness) + + let two = F::from(2.0).unwrap(); + + // The original formula does not have a 1/2 term, + // so we must multiply by 2 + let mut goodness_derivative = data * (two * self.activation.derivate(goodness)); + + if self.maximize { + goodness_derivative = -goodness_derivative; + } + + goodness_derivative + } +} + struct NeuraForwardPair { threshold: f64, maximize: bool, activation: Act, } -impl< - F, - Act: Clone + NeuraDerivable, - Input, - Trainable: NeuraTrainableLayerBase, - > NeuraGradientSolver for NeuraForwardForward +impl, Input: Clone, Trainable: NeuraTrainableLayerBase> + NeuraGradientSolver for NeuraForwardForward where F: ToPrimitive, - Trainable: NeuraOldTrainableNetwork, Output = DVector, Gradient = ::Gradient> + Trainable: NeuraOldTrainableNetwork< + Input, + NeuraForwardPair, + Output = DVector, + Gradient = ::Gradient, + >, + NeuraForwardPair: + ForwardForwardRecurse::Gradient>, { fn get_gradient( &self, @@ -43,15 +85,18 @@ where target: &bool, ) -> ::Gradient { let target = *target; + let pair = NeuraForwardPair { + threshold: self.threshold, + maximize: target, + activation: self.activation.clone(), + }; + + // trainable.traverse( + // input, + // &pair, + // ) - trainable.traverse( - input, - &NeuraForwardPair { - threshold: self.threshold, - maximize: target, - activation: self.activation.clone(), - }, - ) + pair.recurse(trainable, input) } fn score(&self, trainable: &Trainable, input: &Input, target: &bool) -> f64 { @@ -145,6 +190,43 @@ impl< } } +trait ForwardForwardRecurse { + fn recurse(&self, network: &Network, input: &Input) -> Gradient; +} + +impl ForwardForwardRecurse for NeuraForwardPair { + #[inline(always)] + fn recurse(&self, _network: &(), _input: &Input) -> () { + () + } +} + +impl + NeuraNetworkRec> + ForwardForwardRecurse for NeuraForwardPair +where + Network::Layer: NeuraTrainableLayerSelf, + >::Output: Clone, + Self: ForwardForwardDerivate<>::Output>, + Self: ForwardForwardRecurse< + Network::NodeOutput, + Network::NextNode, + ::Gradient, + >, +{ + fn recurse(&self, network: &Network, input: &Input) -> Network::Gradient { + let layer = network.get_layer(); + let layer_input = network.map_input(input); + let (layer_output, layer_intermediary) = layer.eval_training(&layer_input); + let output = network.map_output(input, &layer_output); + + let derivative = self.derivate_goodness(&layer_output); + + let layer_gradient = layer.get_gradient(&layer_input, &layer_intermediary, &derivative); + + network.merge_gradient(self.recurse(network.get_next(), &output), layer_gradient) + } +} + #[cfg(test)] mod test { use rand::Rng; diff --git a/src/layer/mod.rs b/src/layer/mod.rs index 60f32bf..43f38d7 100644 --- a/src/layer/mod.rs +++ b/src/layer/mod.rs @@ -162,6 +162,33 @@ impl NeuraTrainableLayerEval for () { } } +impl NeuraTrainableLayerSelf for () { + #[inline(always)] + fn regularize_layer(&self) -> Self::Gradient { + () + } + + #[inline(always)] + fn get_gradient( + &self, + _input: &Input, + _intermediary: &Self::IntermediaryRepr, + _epsilon: &Self::Output, + ) -> Self::Gradient { + () + } +} + +impl NeuraTrainableLayerBackprop for () { + fn backprop_layer( + &self, + _input: &Input, + _intermediary: &Self::IntermediaryRepr, + epsilon: &Self::Output, + ) -> Input { + epsilon.clone() + } +} /// Temporary implementation of neura_layer #[macro_export] macro_rules! neura_layer { diff --git a/src/network/sequential/mod.rs b/src/network/sequential/mod.rs index 8dd5013..b34966c 100644 --- a/src/network/sequential/mod.rs +++ b/src/network/sequential/mod.rs @@ -1,3 +1,5 @@ +use std::borrow::Cow; + use super::*; use crate::{ gradient_solver::NeuraGradientSolverTransient, @@ -185,7 +187,9 @@ impl NeuraNetworkBase for NeuraSequential NeuraNetworkRec for NeuraSequential { +impl NeuraNetworkRec + for NeuraSequential +{ type NextNode = ChildNetwork; fn get_next(&self) -> &Self::NextNode { @@ -195,9 +199,47 @@ impl Neur fn merge_gradient( &self, rec_gradient: ::Gradient, - layer_gradient: ::Gradient + layer_gradient: ::Gradient, ) -> Self::Gradient { - (rec_gradient, Box::new(layer_gradient)) + (layer_gradient, Box::new(rec_gradient)) + } +} + +impl, ChildNetwork> NeuraNetwork + for NeuraSequential +where + Layer::Output: Clone, +{ + type LayerInput = Input; + type NodeOutput = Layer::Output; + + fn map_input<'a>(&'_ self, input: &'a Input) -> Cow<'a, Input> { + Cow::Borrowed(input) + } + + fn map_output<'a>( + &'_ self, + _input: &'_ Input, + layer_output: &'a >::Output, + ) -> Cow<'a, Self::NodeOutput> { + Cow::Borrowed(layer_output) + } + + fn map_gradient_in<'a>( + &'_ self, + _input: &'_ Input, + gradient_in: &'a Self::NodeOutput, + ) -> Cow<'a, >::Output> { + Cow::Borrowed(gradient_in) + } + + fn map_gradient_out<'a>( + &'_ self, + _input: &'_ Input, + _gradient_in: &'_ Self::NodeOutput, + gradient_out: &'a Self::LayerInput, + ) -> Cow<'a, Input> { + Cow::Borrowed(gradient_out) } } diff --git a/src/network/traits.rs b/src/network/traits.rs index a16af93..f7afd1a 100644 --- a/src/network/traits.rs +++ b/src/network/traits.rs @@ -58,7 +58,8 @@ pub trait NeuraNetworkRec: NeuraNetworkBase + NeuraTrainableLayerBase { fn merge_gradient( &self, rec_gradient: ::Gradient, - layer_gradient: ::Gradient + layer_gradient: ::Gradient, ) -> Self::Gradient - where Self::Layer: NeuraTrainableLayerBase; + where + Self::Layer: NeuraTrainableLayerBase; } diff --git a/src/train.rs b/src/train.rs index 648d693..351de66 100644 --- a/src/train.rs +++ b/src/train.rs @@ -1,5 +1,5 @@ use crate::{ - algebra::NeuraVectorSpace, gradient_solver::NeuraGradientSolver, + algebra::NeuraVectorSpace, gradient_solver::NeuraGradientSolver, layer::*, network::NeuraOldTrainableNetworkBase, }; @@ -73,7 +73,7 @@ impl NeuraBatchedTrainer { pub fn train< Input: Clone, Target: Clone, - Network: NeuraOldTrainableNetworkBase, + Network: NeuraTrainableLayerBase + NeuraTrainableLayerSelf, GradientSolver: NeuraGradientSolver, Inputs: IntoIterator, >( @@ -84,7 +84,7 @@ impl NeuraBatchedTrainer { test_inputs: &[(Input, Target)], ) -> Vec<(f64, f64)> where - >::Gradient: std::fmt::Debug, + Network::Gradient: std::fmt::Debug, { let mut losses = Vec::new(); let mut iter = inputs.into_iter(); @@ -97,7 +97,7 @@ impl NeuraBatchedTrainer { let mut train_loss = 0.0; 'd: for iteration in 0..self.iterations { let mut gradient_sum = network.default_gradient(); - network.prepare(true); + network.prepare_layer(true); for _ in 0..self.batch_size { if let Some((input, target)) = iter.next() { @@ -113,7 +113,7 @@ impl NeuraBatchedTrainer { gradient_sum.mul_assign(factor); // Add regularization gradient - let mut reg_gradient = Box::new(network.regularize()); + let mut reg_gradient = network.regularize_layer(); reg_gradient.mul_assign(reg_factor); gradient_sum.add_assign(®_gradient); @@ -126,7 +126,7 @@ impl NeuraBatchedTrainer { } if self.log_iterations > 0 && (iteration + 1) % self.log_iterations == 0 { - network.prepare(false); + network.prepare_layer(false); let mut val_loss = 0.0; for (input, target) in test_inputs { val_loss += gradient_solver.score(&network, input, target); @@ -145,7 +145,7 @@ impl NeuraBatchedTrainer { } } - network.prepare(false); + network.prepare_layer(false); losses }