diff --git a/src/gradient_solver/backprop.rs b/src/gradient_solver/backprop.rs index d6635ec..9f5f0ed 100644 --- a/src/gradient_solver/backprop.rs +++ b/src/gradient_solver/backprop.rs @@ -79,6 +79,16 @@ impl< (epsilon_out, combine_gradients(layer_gradient, rec_gradient)) } + + fn map_epsilon To>( + &self, + rec_opt_output: Self::Output, + callback: Cb + ) -> Self::Output { + ( + callback(rec_opt_output.0), rec_opt_output.1 + ) + } } #[cfg(test)] diff --git a/src/gradient_solver/forward_forward.rs b/src/gradient_solver/forward_forward.rs index b07bf41..01ce34d 100644 --- a/src/gradient_solver/forward_forward.rs +++ b/src/gradient_solver/forward_forward.rs @@ -134,6 +134,14 @@ impl< combine_gradients(layer_gradient, rec_gradient) } + + fn map_epsilon To>( + &self, + rec_opt_output: Self::Output, + callback: Cb + ) -> Self::Output { + rec_opt_output + } } #[cfg(test)] diff --git a/src/gradient_solver/mod.rs b/src/gradient_solver/mod.rs index 732ca2c..d5497d2 100644 --- a/src/gradient_solver/mod.rs +++ b/src/gradient_solver/mod.rs @@ -29,6 +29,12 @@ pub trait NeuraGradientSolverTransient, combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient, ) -> Self::Output; + + fn map_epsilon To>( + &self, + rec_opt_output: Self::Output, + callback: Cb + ) -> Self::Output; } pub trait NeuraGradientSolver> { diff --git a/src/network/mod.rs b/src/network/mod.rs index a625bd5..e13de8f 100644 --- a/src/network/mod.rs +++ b/src/network/mod.rs @@ -1,5 +1,5 @@ use crate::{ - algebra::NeuraVectorSpace, gradient_solver::NeuraGradientSolverBase, layer::NeuraLayer, + algebra::NeuraVectorSpace, gradient_solver::{NeuraGradientSolverBase, NeuraGradientSolverFinal}, layer::NeuraLayer, }; pub mod residual; @@ -31,3 +31,15 @@ where optimizer: &Optimizer, ) -> Optimizer::Output; } + +impl> + NeuraTrainableNetwork for () +{ + fn traverse( + &self, + input: &Input, + optimizer: &Optimizer, + ) -> Optimizer::Output { + optimizer.eval_final(input.clone()) + } +} diff --git a/src/network/residual/axis.rs b/src/network/residual/axis.rs index 1f30fff..a024e53 100644 --- a/src/network/residual/axis.rs +++ b/src/network/residual/axis.rs @@ -1,4 +1,4 @@ -use std::borrow::Borrow; +use std::{borrow::Borrow, rc::Rc}; use nalgebra::{Const, DVector, Dyn, VecStorage}; @@ -17,9 +17,10 @@ pub trait NeuraCombineInputs { type Combined; fn combine(&self, inputs: Vec>) -> Self::Combined; +} - // TODO: - // fn shape(&self, input_shapes: Vec) -> NeuraShape; +pub trait NeuraSplitInputs: NeuraCombineInputs { + fn split(&self, combined: Self::Combined, input_shapes: &[NeuraShape]) -> Vec>; } impl NeuraCombineInputs> for NeuraAxisAppend { diff --git a/src/network/residual/layer_impl.rs b/src/network/residual/layer_impl.rs new file mode 100644 index 0000000..4e14d1b --- /dev/null +++ b/src/network/residual/layer_impl.rs @@ -0,0 +1,189 @@ +//! Implementations for NeuraLayer* + +use crate::{gradient_solver::NeuraGradientSolverTransient, network::{NeuraTrainableNetwork, NeuraTrainableNetworkBase}}; + +use super::*; + +impl NeuraResidualNode { + fn process_input(&self, input: &NeuraResidualInput) -> (Axis::Combined, NeuraResidualInput) + where + Axis: NeuraCombineInputs, + Layer: NeuraLayer + { + let (inputs, rest) = input.shift(); + + let layer_input = self.axis.combine(inputs); + + (layer_input, rest) + } + + fn combine_outputs(&self, layer_output: Data, output: &mut NeuraResidualInput) -> Rc { + let layer_output = Rc::new(layer_output); + + for &offset in &self.offsets { + output.push(offset, Rc::clone(&layer_output)); + } + + layer_output + } +} + +impl NeuraLayer>> + for NeuraResidualNode +where + Axis: NeuraCombineInputs>, + Layer: NeuraLayer>, + ChildNetwork: NeuraLayer>>, +{ + type Output = >>>::Output; + + fn eval(&self, input: &NeuraResidualInput>) -> Self::Output { + let (layer_input, mut rest) = self.process_input(input); + + self.combine_outputs(self.layer.eval(&layer_input), &mut rest); + + self.child_network.eval(&rest) + } +} + +impl NeuraLayer> for NeuraResidual +where + Layers: NeuraLayer>, Output = NeuraResidualInput>, +{ + type Output = Output; + + fn eval(&self, input: &DVector) -> Self::Output { + let input: Rc> = Rc::new((*input).clone()); + let mut inputs = NeuraResidualInput::new(); + + for &offset in &self.initial_offsets { + inputs.push(offset, Rc::clone(&input)); + } + + drop(input); + + let output = self.layers.eval(&inputs); + + let result = output.get_first() + .expect("Invalid NeuraResidual state: network returned no data, did you forget to link the last layer?") + .into(); + + Rc::unwrap_or_clone(result) + } +} + +pub struct NeuraResidualIntermediary { + layer_intermediary: LayerIntermediary, + layer_output: Rc, + child_intermediary: Box, +} + +impl< + Data, + Axis: NeuraCombineInputs, + Layer: NeuraTrainableLayerBase, + ChildNetwork: NeuraTrainableLayerBase> +> NeuraTrainableLayerBase> for NeuraResidualNode +where + NeuraResidualNode: NeuraLayer, Output=ChildNetwork::Output> +{ + type Gradient = (Layer::Gradient, Box); + type IntermediaryRepr = NeuraResidualIntermediary; + + fn default_gradient(&self) -> Self::Gradient { + (self.layer.default_gradient(), Box::new(self.child_network.default_gradient())) + } + + fn apply_gradient(&mut self, gradient: &Self::Gradient) { + self.layer.apply_gradient(&gradient.0); + self.child_network.apply_gradient(&gradient.1); + } + + fn eval_training(&self, input: &NeuraResidualInput) -> (Self::Output, Self::IntermediaryRepr) { + let (layer_input, mut rest) = self.process_input(input); + + let (layer_output, layer_intermediary) = self.layer.eval_training(&layer_input); + let layer_output = self.combine_outputs(layer_output, &mut rest); + + let (output, child_intermediary) = self.child_network.eval_training(&rest); + + let intermediary = NeuraResidualIntermediary { + layer_intermediary, + layer_output, + child_intermediary: Box::new(child_intermediary) + }; + + (output, intermediary) + } + + fn prepare_layer(&mut self, is_training: bool) { + self.layer.prepare_layer(is_training); + self.child_network.prepare_layer(is_training); + } +} + +impl< + Data, + Axis: NeuraCombineInputs, + Layer: NeuraTrainableLayerSelf, + ChildNetwork: NeuraTrainableNetworkBase>, +> NeuraTrainableNetworkBase> for NeuraResidualNode +where + Self: NeuraTrainableLayerBase, Gradient = (Layer::Gradient, Box)>, +{ + type Gradient = >>::Gradient; + type LayerOutput = Layer::Output; + + fn default_gradient(&self) -> Self::Gradient { + >>::default_gradient(self) + } + + fn apply_gradient(&mut self, gradient: &Self::Gradient) { + >>::apply_gradient(self, gradient) + } + + fn regularize(&self) -> Self::Gradient { + (self.layer.regularize_layer(), Box::new(self.child_network.regularize())) + } + + fn prepare(&mut self, train_iteration: bool) { + self.layer.prepare_layer(train_iteration); + self.child_network.prepare(train_iteration); + } +} + +impl< + Data, + Axis: NeuraSplitInputs, + Layer: NeuraTrainableLayerSelf, + Optimizer: NeuraGradientSolverTransient, + ChildNetwork: NeuraTrainableNetwork, Optimizer>, +> NeuraTrainableNetwork, Optimizer> for NeuraResidualNode +where + Self: NeuraTrainableLayerBase, Gradient = (Layer::Gradient, Box)>, +{ + fn traverse( + &self, + input: &NeuraResidualInput, + optimizer: &Optimizer, + ) -> Optimizer::Output, Self::Gradient> { + let (layer_input, mut rest) = self.process_input(input); + let (layer_output, layer_intermediary) = self.layer.eval_training(&layer_input); + let layer_output = self.combine_outputs(layer_output, &mut rest); + + let child_result = self.child_network.traverse(&rest, optimizer); + // TODO: maybe move this to a custom impl of NeuraGradientSolverTransient for NeuraResidualInput? + // Or have a different set of traits for NeuraTrainableNetwork specific to NeuraResidualNodes + let child_result = optimizer.map_epsilon(child_result, |epsilon| { + // Pop the first value from `epsilon`, then: + // - compute its sum + // - use it to compute the outcoming epsilon of the current layer + // - split the oucoming epsilon into its original components, and push those back onto the rest + // At this point, the value for `epsilon` in the gradient solver's state should be ready for another iteration, + // with the first value containing the unsummed incoming epsilon values from the downstream layers + todo!(); + }); + + todo!(); + } +} diff --git a/src/network/residual/mod.rs b/src/network/residual/mod.rs index 3555a90..a013ed2 100644 --- a/src/network/residual/mod.rs +++ b/src/network/residual/mod.rs @@ -5,6 +5,8 @@ use num::Float; use crate::layer::*; +mod layer_impl; + mod input; pub use input::*; @@ -91,55 +93,6 @@ impl NeuraResidualNode { } } -impl NeuraLayer>> - for NeuraResidualNode -where - Axis: NeuraCombineInputs>, - Layer: NeuraLayer>, - ChildNetwork: NeuraLayer>>, -{ - type Output = >>>::Output; - - fn eval(&self, input: &NeuraResidualInput>) -> Self::Output { - let (inputs, mut rest) = input.shift(); - - let layer_input = self.axis.combine(inputs); - let layer_output = Rc::new(self.layer.eval(&layer_input)); - - for &offset in &self.offsets { - rest.push(offset, Rc::clone(&layer_output)); - } - - self.child_network.eval(&rest) - } -} - -impl NeuraLayer> for NeuraResidual -where - Layers: NeuraLayer>, Output = NeuraResidualInput>, -{ - type Output = Output; - - fn eval(&self, input: &DVector) -> Self::Output { - let input: Rc> = Rc::new((*input).clone()); - let mut inputs = NeuraResidualInput::new(); - - for &offset in &self.initial_offsets { - inputs.push(offset, Rc::clone(&input)); - } - - drop(input); - - let output = self.layers.eval(&inputs); - - let result = output.get_first() - .expect("Invalid NeuraResidual state: network returned no data, did you forget to link the last layer?") - .into(); - - Rc::unwrap_or_clone(result) - } -} - #[macro_export] macro_rules! neura_residual { [ "__combine_layers", ] => { diff --git a/src/network/sequential/mod.rs b/src/network/sequential/mod.rs index f2da4ca..a917a29 100644 --- a/src/network/sequential/mod.rs +++ b/src/network/sequential/mod.rs @@ -167,18 +167,6 @@ where } } -impl> - NeuraTrainableNetwork for () -{ - fn traverse( - &self, - input: &Input, - optimizer: &Optimizer, - ) -> Optimizer::Output { - optimizer.eval_final(input.clone()) - } -} - impl From for NeuraSequential { fn from(layer: Layer) -> Self { Self {