From 872cb3a6cefc1e1e93e2fb298806768b26428f0e Mon Sep 17 00:00:00 2001 From: Adrien Burgun Date: Sat, 29 Apr 2023 16:27:43 +0200 Subject: [PATCH] :sparkles: Allow using NeuraResidual with NeuraBackprop --- src/derivable/loss.rs | 5 ++ src/layer/dense.rs | 4 + src/network/residual/axis.rs | 30 +++++++- src/network/residual/construct.rs | 119 ++++++++++++++++++------------ src/network/residual/last.rs | 37 +++++++--- src/network/residual/mod.rs | 34 ++++++++- src/network/residual/node.rs | 55 ++++++++++---- 7 files changed, 207 insertions(+), 77 deletions(-) diff --git a/src/derivable/loss.rs b/src/derivable/loss.rs index ec26552..a2e882d 100644 --- a/src/derivable/loss.rs +++ b/src/derivable/loss.rs @@ -25,6 +25,11 @@ impl NeuraLoss> for Euclidean { #[inline] fn nabla(&self, target: &DVector, actual: &DVector) -> DVector { let mut res = DVector::zeros(target.len()); + assert_eq!( + target.shape(), + actual.shape(), + "target value differs in shape with network output" + ); // ∂E(y)/∂yᵢ = yᵢ - yᵢ' for i in 0..target.len() { diff --git a/src/layer/dense.rs b/src/layer/dense.rs index 0d95f1d..f000802 100644 --- a/src/layer/dense.rs +++ b/src/layer/dense.rs @@ -95,6 +95,10 @@ impl, Reg: NeuraDer phantom: PhantomData, } } + + pub fn input_len(&self) -> usize { + self.weights.shape().1 + } } impl NeuraDenseLayerPartial { diff --git a/src/network/residual/axis.rs b/src/network/residual/axis.rs index a024e53..b9aa234 100644 --- a/src/network/residual/axis.rs +++ b/src/network/residual/axis.rs @@ -1,6 +1,6 @@ -use std::{borrow::Borrow, rc::Rc}; +use std::borrow::Borrow; -use nalgebra::{Const, DVector, Dyn, VecStorage}; +use nalgebra::{Const, DVector, Dyn, Scalar, VecStorage}; use crate::prelude::NeuraShape; @@ -20,7 +20,7 @@ pub trait NeuraCombineInputs { } pub trait NeuraSplitInputs: NeuraCombineInputs { - fn split(&self, combined: Self::Combined, input_shapes: &[NeuraShape]) -> Vec>; + fn split(&self, combined: &Self::Combined, input_shapes: &[NeuraShape]) -> Vec; } impl NeuraCombineInputs> for NeuraAxisAppend { @@ -62,3 +62,27 @@ impl NeuraCombineInputs for NeuraAxisAppend { } } } + +impl NeuraSplitInputs> for NeuraAxisAppend { + fn split(&self, combined: &Self::Combined, input_shapes: &[NeuraShape]) -> Vec> { + let mut result = Vec::with_capacity(input_shapes.len()); + let mut offset = 0; + + for &input_shape in input_shapes.iter() { + let NeuraShape::Vector(input_shape) = input_shape else { + panic!("Expected {:?} to be NeuraShape::Vector", input_shape); + }; + + let mut subvector = DVector::from_element(input_shape, F::default()); + + for i in 0..input_shape { + subvector[i] = combined[offset + i].clone(); + } + result.push(subvector); + + offset += input_shape; + } + + result + } +} diff --git a/src/network/residual/construct.rs b/src/network/residual/construct.rs index 388b241..cebe5e8 100644 --- a/src/network/residual/construct.rs +++ b/src/network/residual/construct.rs @@ -6,7 +6,9 @@ pub trait NeuraResidualConstruct { fn construct_residual( self, - input: NeuraResidualInput, + inputs: NeuraResidualInput, + indices: NeuraResidualInput, + current_index: usize, ) -> Result; } @@ -14,49 +16,52 @@ pub trait NeuraResidualConstruct { pub enum NeuraResidualConstructErr { LayerErr(LayerErr), ChildErr(ChildErr), - OOBConnection(usize), + WrongConnection(isize), AxisErr(NeuraAxisErr), + NoOutput, } use NeuraResidualConstructErr::*; -impl NeuraResidualConstruct for NeuraResidualNode -where - Axis: NeuraCombineInputs>, -{ - type Constructed = NeuraResidualNode; - type Err = NeuraResidualConstructErr; - - fn construct_residual( - self, - input: NeuraResidualInput, - ) -> Result { - let (layer_input_shape, _rest) = input.shift(); - let layer_input_shape = self - .axis - .combine(layer_input_shape) - .map_err(|e| AxisErr(e))?; - - let layer = self - .layer - .construct(layer_input_shape) - .map_err(|e| LayerErr(e))?; - let layer_shape = layer.output_shape(); - - if let Some(oob_offset) = self.offsets.iter().copied().find(|o| *o > 0) { - return Err(OOBConnection(oob_offset)); - } - // TODO: check rest for non-zero columns - - Ok(NeuraResidualNode { - layer, - child_network: (), - offsets: self.offsets, - axis: self.axis, - output_shape: Some(layer_shape), - }) - } -} +// impl NeuraResidualConstruct for NeuraResidualNode +// where +// Axis: NeuraCombineInputs>, +// { +// type Constructed = NeuraResidualNode; +// type Err = NeuraResidualConstructErr::Err>; + +// fn construct_residual( +// self, +// inputs: NeuraResidualInput, +// indices: NeuraResidualInput, +// current_index: usize, +// ) -> Result { +// let (layer_input_shape, _rest) = inputs.shift(); +// let layer_input_shape = self +// .axis +// .combine(layer_input_shape) +// .map_err(|e| AxisErr(e))?; + +// let layer = self +// .layer +// .construct(layer_input_shape) +// .map_err(|e| LayerErr(e))?; +// let layer_shape = layer.output_shape(); + +// if let Some(oob_offset) = self.offsets.iter().copied().find(|o| *o > 0) { +// return Err(WrongConnection(oob_offset)); +// } +// // TODO: check rest for non-zero columns + +// Ok(NeuraResidualNode { +// layer, +// child_network: (), +// offsets: self.offsets, +// axis: self.axis, +// output_shape: Some(layer_shape), +// }) +// } +// } impl NeuraResidualConstruct for NeuraResidualNode @@ -68,13 +73,16 @@ where fn construct_residual( self, - input: NeuraResidualInput, + inputs: NeuraResidualInput, + indices: NeuraResidualInput, + current_index: usize, ) -> Result { - let (layer_input_shape, mut rest) = input.shift(); - let layer_input_shape = self - .axis - .combine(layer_input_shape) - .map_err(|e| AxisErr(e))?; + let (input_shapes, mut rest_inputs) = inputs.shift(); + let (this_indices, mut rest_indices) = indices.shift(); + + let self_input_shapes = input_shapes.iter().map(|x| **x).collect::>(); + + let layer_input_shape = self.axis.combine(input_shapes).map_err(|e| AxisErr(e))?; let layer = self .layer @@ -82,14 +90,25 @@ where .map_err(|e| LayerErr(e))?; let layer_shape = Rc::new(layer.output_shape()); + if self.offsets.len() == 0 { + return Err(NoOutput); + } + for &offset in &self.offsets { - rest.push(offset, Rc::clone(&layer_shape)); + rest_inputs.push(offset, Rc::clone(&layer_shape)); + rest_indices.push(offset, Rc::new(current_index)); } let layer_shape = *layer_shape; + debug_assert!(this_indices.iter().all(|x| **x < current_index)); + let input_offsets: Vec = this_indices + .into_iter() + .map(|x| current_index - *x - 1) + .collect(); + let child_network = self .child_network - .construct_residual(rest) + .construct_residual(rest_inputs, rest_indices, current_index + 1) .map_err(|e| ChildErr(e))?; Ok(NeuraResidualNode { @@ -98,6 +117,8 @@ where offsets: self.offsets, axis: self.axis, output_shape: Some(layer_shape), + input_shapes: self_input_shapes, + input_offsets, }) } } @@ -136,14 +157,16 @@ where fn construct(self, input_shape: NeuraShape) -> Result { let input_shape = Rc::new(input_shape); let mut inputs = NeuraResidualInput::new(); + let mut indices = NeuraResidualInput::new(); for &offset in &self.initial_offsets { inputs.push(offset, Rc::clone(&input_shape)); + indices.push(offset, Rc::new(0usize)); } drop(input_shape); - let layers = self.layers.construct_residual(inputs)?; + let layers = self.layers.construct_residual(inputs, indices, 1)?; Ok(NeuraResidual { layers, diff --git a/src/network/residual/last.rs b/src/network/residual/last.rs index f3f37eb..4d82220 100644 --- a/src/network/residual/last.rs +++ b/src/network/residual/last.rs @@ -33,11 +33,27 @@ impl NeuraResidualConstruct for NeuraResidualLast { fn construct_residual( self, input: NeuraResidualInput, + indices: NeuraResidualInput, + current_index: usize, ) -> Result { - let input = *input + let (this_input, _rest) = input.shift(); + let index = indices .get_first() .ok_or(Self::Err::AxisErr(NeuraAxisErr::NoInput))?; + if *index != current_index - 1 { + return Err(Self::Err::WrongConnection( + current_index as isize - *index as isize - 1, + )); + } + if this_input.len() != 1 { + return Err(Self::Err::AxisErr(NeuraAxisErr::NoInput)); + } + + // TODO: check that rest contains nothing else + + let input = unwrap_or_clone(this_input.into_iter().next().unwrap()); + Ok(Self { output_shape: Some(input), }) @@ -68,15 +84,12 @@ impl NeuraNetworkRec for NeuraResidualLast { &() } - fn merge_gradient( - &self, - rec_gradient: ::Gradient, - layer_gradient: ::Gradient, - ) -> Self::Gradient + #[inline(always)] + fn merge_gradient(&self, _rec_gradient: (), _layer_gradient: ()) -> Self::Gradient where Self::Layer: NeuraTrainableLayerBase, { - todo!() + () } } @@ -107,11 +120,15 @@ impl NeuraNetwork> for NeuraResidualLast { fn map_gradient_out<'a>( &'_ self, - input: &'_ NeuraResidualInput, - gradient_in: &'_ Self::NodeOutput, + _input: &'_ NeuraResidualInput, + _gradient_in: &'_ Self::NodeOutput, gradient_out: &'a Self::LayerInput, ) -> Cow<'a, NeuraResidualInput> { - unimplemented!() + let mut result = NeuraResidualInput::new(); + + result.push(0, Rc::new(gradient_out.clone())); + + Cow::Owned(result) } } diff --git a/src/network/residual/mod.rs b/src/network/residual/mod.rs index 26bc45d..9b7835c 100644 --- a/src/network/residual/mod.rs +++ b/src/network/residual/mod.rs @@ -137,6 +137,36 @@ mod test { .unwrap(); assert_eq!(network.output_shape(), NeuraShape::Vector(8)); + assert_eq!(network.layers.layer.input_len(), 1); + assert_eq!(network.layers.child_network.layer.input_len(), 3); // input (1) + first layer (2) + assert_eq!( + network.layers.child_network.child_network.layer.input_len(), + 6 + ); // first layer (2) + second layer (4) + + assert_eq!(network.layers.input_offsets, vec![0]); + assert_eq!(network.layers.child_network.input_offsets, vec![1, 0]); // input, first layer + assert_eq!( + network.layers.child_network.child_network.input_offsets, + vec![1, 0] + ); // first layer, second layer + + let map_shape = |shapes: &[NeuraShape]| { + shapes + .into_iter() + .map(|shape| shape.size()) + .collect::>() + }; + + assert_eq!(map_shape(&network.layers.input_shapes), vec![1]); + assert_eq!( + map_shape(&network.layers.child_network.input_shapes), + vec![1, 2] + ); // input, first layer + assert_eq!( + map_shape(&network.layers.child_network.child_network.input_shapes), + vec![2, 4] + ); // first layer, second layer network.eval(&dvector![0.0]); } @@ -147,13 +177,13 @@ mod test { <= 0, 1; neura_layer!("dense", 2) => 0, 1; neura_layer!("dense", 4); - neura_layer!("dense", 8) + neura_layer!("dense", 4) ] .construct(NeuraShape::Vector(1)) .unwrap(); let backprop = NeuraBackprop::new(Euclidean); - backprop.get_gradient(&network, &dvector![0.0], &dvector![0.0]); + backprop.get_gradient(&network, &dvector![0.0], &dvector![0.0, 0.0, 0.0, 0.0]); } } diff --git a/src/network/residual/node.rs b/src/network/residual/node.rs index c283047..36ec981 100644 --- a/src/network/residual/node.rs +++ b/src/network/residual/node.rs @@ -18,6 +18,8 @@ pub struct NeuraResidualNode { pub axis: Axis, pub(crate) output_shape: Option, + pub(crate) input_shapes: Vec, + pub(crate) input_offsets: Vec, } impl NeuraResidualNode { @@ -28,6 +30,8 @@ impl NeuraResidualNode NeuraResidualNode { } pub fn axis(self, axis: Axis2) -> NeuraResidualNode { + if self.output_shape.is_some() { + unimplemented!( + "NeuraResidualNode::axis cannot yet be called after NeuraResidualNode::construct" + ); + } + NeuraResidualNode { layer: self.layer, child_network: self.child_network, offsets: self.offsets, axis, - // Drop the knowledge of output_shape output_shape: None, + input_shapes: self.input_shapes, + input_offsets: self.input_offsets, } } @@ -195,13 +206,14 @@ where ) } + #[allow(unused)] fn get_gradient( &self, input: &NeuraResidualInput, intermediary: &Self::IntermediaryRepr, epsilon: &Self::Output, ) -> Self::Gradient { - unimplemented!(); + unimplemented!("NeuraResidualNode::get_gradient is not yet implemented, sorry"); } } @@ -232,14 +244,15 @@ impl, - Layer: NeuraLayer, + Data: Clone + std::fmt::Debug, + Axis: NeuraCombineInputs + NeuraSplitInputs, + Layer: NeuraLayer + std::fmt::Debug, ChildNetwork, > NeuraNetwork> for NeuraResidualNode where Layer::Output: Clone, Axis::Combined: Clone, + for<'a> Data: std::iter::Sum<&'a Data>, { type LayerInput = Axis::Combined; @@ -260,20 +273,26 @@ where Cow::Owned(remaining_inputs) } + // To convert from gradient_in to gradient_out: + // - pop the first value from `gradient_in` (map_gradient_in) + // - compute its sum (map_gradient_in) + // - use it to compute the outcoming epsilon of the current layer (backprop) + // - split the oucoming epsilon into its original components (map_gradient_out) + // - push those back onto the rest (map_gradient_out) + // At this point, the value for `epsilon` in the gradient solver's state should be ready for another iteration, + // with the first value containing the unsummed incoming epsilon values from the downstream layers + #[allow(unused_variables)] fn map_gradient_in<'a>( &'_ self, input: &'_ NeuraResidualInput, gradient_in: &'a Self::NodeOutput, ) -> Cow<'a, >::Output> { - // To convert from gradient_in to layer's gradient_in: - // Pop the first value from `epsilon`, then: - // - compute its sum - // - use it to compute the outcoming epsilon of the current layer - // - split the oucoming epsilon into its original components, and push those back onto the rest - // At this point, the value for `epsilon` in the gradient solver's state should be ready for another iteration, - // with the first value containing the unsummed incoming epsilon values from the downstream layers - unimplemented!() + let (first_gradient, _) = gradient_in.shift(); + + let sum = first_gradient.iter().map(|x| x.as_ref()).sum(); + + Cow::Owned(sum) } #[allow(unused_variables)] @@ -283,6 +302,14 @@ where gradient_in: &'_ Self::NodeOutput, gradient_out: &'a Self::LayerInput, ) -> Cow<'a, NeuraResidualInput> { - unimplemented!() + let (_, mut rest) = gradient_in.shift(); + + let split = self.axis.split(gradient_out, &self.input_shapes); + + for (offset, gradient) in self.input_offsets.iter().copied().zip(split.into_iter()) { + rest.push(offset, Rc::new(gradient)); + } + + Cow::Owned(rest) } }