From 83dc7637465ab25576d972b32e1955f2e52221f6 Mon Sep 17 00:00:00 2001 From: Adrien Burgun Date: Mon, 24 Apr 2023 15:58:36 +0200 Subject: [PATCH] :sparkles: Initial implementation of residual neural networks --- src/layer/dense.rs | 12 +- src/layer/dropout.rs | 10 +- src/layer/mod.rs | 8 +- src/layer/normalize.rs | 10 +- src/layer/softmax.rs | 10 +- src/lib.rs | 1 + src/network/mod.rs | 1 + src/network/residual/axis.rs | 63 +++++++ src/network/residual/construct.rs | 153 ++++++++++++++++ src/network/residual/input.rs | 38 ++++ src/network/residual/mod.rs | 259 ++++++++++++++++++++++++++++ src/network/sequential/construct.rs | 20 ++- 12 files changed, 565 insertions(+), 20 deletions(-) create mode 100644 src/network/residual/axis.rs create mode 100644 src/network/residual/construct.rs create mode 100644 src/network/residual/input.rs create mode 100644 src/network/residual/mod.rs diff --git a/src/layer/dense.rs b/src/layer/dense.rs index 3b81686..8fc6dfd 100644 --- a/src/layer/dense.rs +++ b/src/layer/dense.rs @@ -122,6 +122,14 @@ impl NeuraDenseLayerPartial { } } +impl, Reg: NeuraDerivable> NeuraShapedLayer + for NeuraDenseLayer +{ + fn output_shape(&self) -> NeuraShape { + NeuraShape::Vector(self.weights.shape().0) + } +} + impl< F: Float + std::fmt::Debug + 'static, Act: NeuraDerivable, @@ -144,10 +152,6 @@ where self.regularization, )) } - - fn output_shape(constructed: &Self::Constructed) -> NeuraShape { - NeuraShape::Vector(constructed.weights.shape().0) - } } impl< diff --git a/src/layer/dropout.rs b/src/layer/dropout.rs index b44fb32..9f46c0c 100644 --- a/src/layer/dropout.rs +++ b/src/layer/dropout.rs @@ -35,6 +35,12 @@ impl NeuraDropoutLayer { } } +impl NeuraShapedLayer for NeuraDropoutLayer { + fn output_shape(&self) -> NeuraShape { + self.shape + } +} + impl NeuraPartialLayer for NeuraDropoutLayer { type Constructed = NeuraDropoutLayer; @@ -45,10 +51,6 @@ impl NeuraPartialLayer for NeuraDropoutLayer { self.mask = DVector::from_element(input_shape.size(), false); Ok(self) } - - fn output_shape(constructed: &Self::Constructed) -> NeuraShape { - constructed.shape - } } impl NeuraLayer> for NeuraDropoutLayer { diff --git a/src/layer/mod.rs b/src/layer/mod.rs index 1c30e62..914bd7b 100644 --- a/src/layer/mod.rs +++ b/src/layer/mod.rs @@ -48,13 +48,15 @@ impl NeuraLayer for () { } } +pub trait NeuraShapedLayer { + fn output_shape(&self) -> NeuraShape; +} + pub trait NeuraPartialLayer { - type Constructed; + type Constructed: NeuraShapedLayer; type Err; fn construct(self, input_shape: NeuraShape) -> Result; - - fn output_shape(constructed: &Self::Constructed) -> NeuraShape; } pub trait NeuraTrainableLayerBase: NeuraLayer { diff --git a/src/layer/normalize.rs b/src/layer/normalize.rs index 427dec6..ab0c4c9 100644 --- a/src/layer/normalize.rs +++ b/src/layer/normalize.rs @@ -23,6 +23,12 @@ impl NeuraNormalizeLayer { } } +impl NeuraShapedLayer for NeuraNormalizeLayer { + fn output_shape(&self) -> NeuraShape { + self.shape + } +} + impl NeuraPartialLayer for NeuraNormalizeLayer { type Constructed = NeuraNormalizeLayer; @@ -31,10 +37,6 @@ impl NeuraPartialLayer for NeuraNormalizeLayer { fn construct(self, input_shape: NeuraShape) -> Result { Ok(Self { shape: input_shape }) } - - fn output_shape(constructed: &Self::Constructed) -> NeuraShape { - constructed.shape - } } impl NeuraLayer> for NeuraNormalizeLayer { diff --git a/src/layer/softmax.rs b/src/layer/softmax.rs index c9956f5..82f31f7 100644 --- a/src/layer/softmax.rs +++ b/src/layer/softmax.rs @@ -41,6 +41,12 @@ impl NeuraLayer> for NeuraSoftmaxLa } } +impl NeuraShapedLayer for NeuraSoftmaxLayer { + fn output_shape(&self) -> NeuraShape { + self.shape + } +} + impl NeuraPartialLayer for NeuraSoftmaxLayer { type Constructed = Self; type Err = (); @@ -48,10 +54,6 @@ impl NeuraPartialLayer for NeuraSoftmaxLayer { fn construct(self, input_shape: NeuraShape) -> Result { Ok(Self { shape: input_shape }) } - - fn output_shape(constructed: &Self::Constructed) -> NeuraShape { - constructed.shape - } } impl NeuraTrainableLayerBase> for NeuraSoftmaxLayer { diff --git a/src/lib.rs b/src/lib.rs index f3ee7bb..f455cac 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ #![feature(generic_arg_infer)] // #![feature(generic_const_exprs)] #![feature(associated_type_defaults)] +#![feature(arc_unwrap_or_clone)] pub mod algebra; pub mod derivable; diff --git a/src/network/mod.rs b/src/network/mod.rs index 335889e..a625bd5 100644 --- a/src/network/mod.rs +++ b/src/network/mod.rs @@ -2,6 +2,7 @@ use crate::{ algebra::NeuraVectorSpace, gradient_solver::NeuraGradientSolverBase, layer::NeuraLayer, }; +pub mod residual; pub mod sequential; // TODO: extract regularize from this, so that we can drop the trait constraints on NeuraSequential's impl diff --git a/src/network/residual/axis.rs b/src/network/residual/axis.rs new file mode 100644 index 0000000..1f30fff --- /dev/null +++ b/src/network/residual/axis.rs @@ -0,0 +1,63 @@ +use std::borrow::Borrow; + +use nalgebra::{Const, DVector, Dyn, VecStorage}; + +use crate::prelude::NeuraShape; + +#[derive(Clone, Copy, Debug)] +pub struct NeuraAxisAppend; + +#[derive(Clone, Copy, Debug)] +pub enum NeuraAxisErr { + NoInput, + ConflictingShape(NeuraShape, NeuraShape), +} + +pub trait NeuraCombineInputs { + type Combined; + + fn combine(&self, inputs: Vec>) -> Self::Combined; + + // TODO: + // fn shape(&self, input_shapes: Vec) -> NeuraShape; +} + +impl NeuraCombineInputs> for NeuraAxisAppend { + type Combined = DVector; + + fn combine(&self, inputs: Vec>>) -> Self::Combined { + assert!(inputs.len() > 0); + let mut res = Vec::with_capacity(inputs.iter().map(|vec| vec.borrow().len()).sum()); + + for input in inputs { + for x in input.borrow().iter() { + res.push(x.clone()); + } + } + + DVector::from_data(VecStorage::new(Dyn(res.len()), Const as Const<1>, res)) + } +} + +impl NeuraCombineInputs for NeuraAxisAppend { + type Combined = Result; + + fn combine(&self, inputs: Vec>) -> Self::Combined { + let mut inputs = inputs.into_iter().map(|x| *x.borrow()); + if let Some(mut res) = inputs.next() { + for operand in inputs { + match (res, operand) { + (NeuraShape::Vector(x), NeuraShape::Vector(y)) => { + res = NeuraShape::Vector(x + y); + } + (x, y) => { + return Err(NeuraAxisErr::ConflictingShape(x, y)); + } + } + } + Ok(res) + } else { + Err(NeuraAxisErr::NoInput) + } + } +} diff --git a/src/network/residual/construct.rs b/src/network/residual/construct.rs new file mode 100644 index 0000000..388b241 --- /dev/null +++ b/src/network/residual/construct.rs @@ -0,0 +1,153 @@ +use super::*; + +pub trait NeuraResidualConstruct { + type Constructed; + type Err; + + fn construct_residual( + self, + input: NeuraResidualInput, + ) -> Result; +} + +#[derive(Clone, Debug)] +pub enum NeuraResidualConstructErr { + LayerErr(LayerErr), + ChildErr(ChildErr), + OOBConnection(usize), + AxisErr(NeuraAxisErr), +} + +use NeuraResidualConstructErr::*; + +impl NeuraResidualConstruct for NeuraResidualNode +where + Axis: NeuraCombineInputs>, +{ + type Constructed = NeuraResidualNode; + type Err = NeuraResidualConstructErr; + + fn construct_residual( + self, + input: NeuraResidualInput, + ) -> Result { + let (layer_input_shape, _rest) = input.shift(); + let layer_input_shape = self + .axis + .combine(layer_input_shape) + .map_err(|e| AxisErr(e))?; + + let layer = self + .layer + .construct(layer_input_shape) + .map_err(|e| LayerErr(e))?; + let layer_shape = layer.output_shape(); + + if let Some(oob_offset) = self.offsets.iter().copied().find(|o| *o > 0) { + return Err(OOBConnection(oob_offset)); + } + // TODO: check rest for non-zero columns + + Ok(NeuraResidualNode { + layer, + child_network: (), + offsets: self.offsets, + axis: self.axis, + output_shape: Some(layer_shape), + }) + } +} + +impl NeuraResidualConstruct + for NeuraResidualNode +where + Axis: NeuraCombineInputs>, +{ + type Constructed = NeuraResidualNode; + type Err = NeuraResidualConstructErr; + + fn construct_residual( + self, + input: NeuraResidualInput, + ) -> Result { + let (layer_input_shape, mut rest) = input.shift(); + let layer_input_shape = self + .axis + .combine(layer_input_shape) + .map_err(|e| AxisErr(e))?; + + let layer = self + .layer + .construct(layer_input_shape) + .map_err(|e| LayerErr(e))?; + let layer_shape = Rc::new(layer.output_shape()); + + for &offset in &self.offsets { + rest.push(offset, Rc::clone(&layer_shape)); + } + let layer_shape = *layer_shape; + + let child_network = self + .child_network + .construct_residual(rest) + .map_err(|e| ChildErr(e))?; + + Ok(NeuraResidualNode { + layer, + child_network, + offsets: self.offsets, + axis: self.axis, + output_shape: Some(layer_shape), + }) + } +} + +impl NeuraShapedLayer for NeuraResidualNode { + #[inline(always)] + fn output_shape(&self) -> NeuraShape { + self.output_shape.unwrap() + } +} + +impl NeuraShapedLayer + for NeuraResidualNode +{ + #[inline(always)] + fn output_shape(&self) -> NeuraShape { + self.child_network.output_shape() + } +} + +impl NeuraShapedLayer for NeuraResidual { + #[inline(always)] + fn output_shape(&self) -> NeuraShape { + self.layers.output_shape() + } +} + +impl NeuraPartialLayer for NeuraResidual +where + // Should always be satisfied: + Layers::Constructed: NeuraShapedLayer, +{ + type Constructed = NeuraResidual; + type Err = Layers::Err; + + fn construct(self, input_shape: NeuraShape) -> Result { + let input_shape = Rc::new(input_shape); + let mut inputs = NeuraResidualInput::new(); + + for &offset in &self.initial_offsets { + inputs.push(offset, Rc::clone(&input_shape)); + } + + drop(input_shape); + + let layers = self.layers.construct_residual(inputs)?; + + Ok(NeuraResidual { + layers, + initial_offsets: self.initial_offsets, + }) + } +} diff --git a/src/network/residual/input.rs b/src/network/residual/input.rs new file mode 100644 index 0000000..d316a67 --- /dev/null +++ b/src/network/residual/input.rs @@ -0,0 +1,38 @@ +use std::rc::Rc; + +#[derive(Clone, Debug)] +pub struct NeuraResidualInput { + // TODO: try to remove this Rc + slots: Vec>>, +} + +impl NeuraResidualInput { + pub fn new() -> Self { + Self { slots: Vec::new() } + } + + pub fn push(&mut self, offset: usize, data: Rc) { + while self.slots.len() <= offset { + self.slots.push(Vec::new()); + } + self.slots[offset].push(data); + } + + pub fn shift(&self) -> (Vec>, NeuraResidualInput) { + let res = self.slots.get(0).cloned().unwrap_or(vec![]); + let new_input = Self { + slots: self.slots.iter().skip(1).cloned().collect(), + }; + + (res, new_input) + } + + /// Returns the first input item of the first slot + pub fn get_first(self) -> Option> { + // TODO: return None if the first slot is bigger than 1 or if there are multiple non-empty slots + self.slots + .into_iter() + .next() + .and_then(|first_slot| first_slot.into_iter().next()) + } +} diff --git a/src/network/residual/mod.rs b/src/network/residual/mod.rs new file mode 100644 index 0000000..3555a90 --- /dev/null +++ b/src/network/residual/mod.rs @@ -0,0 +1,259 @@ +use std::rc::Rc; + +use nalgebra::{DVector, Scalar}; +use num::Float; + +use crate::layer::*; + +mod input; +pub use input::*; + +mod axis; +pub use axis::*; + +mod construct; +pub use construct::NeuraResidualConstructErr; + +#[derive(Clone, Debug, PartialEq)] +pub struct NeuraResidual { + /// Instance of NeuraResidualNode + layers: Layers, + + /// Array of which layers to send the input to, defaults to `vec![0]` + initial_offsets: Vec, +} + +impl NeuraResidual { + pub fn new(layers: Layers) -> Self { + Self { + layers, + initial_offsets: vec![0], + } + } + + pub fn offset(mut self, offset: usize) -> Self { + self.initial_offsets.push(offset); + self + } + + pub fn offsets(mut self, offsets: Vec) -> Self { + self.initial_offsets = offsets; + self + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct NeuraResidualNode { + pub layer: Layer, + pub child_network: ChildNetwork, + + /// Array of relative layers indices to send the offset of this layer to, + /// defaults to `vec![0]`. + offsets: Vec, + + pub axis: Axis, + + output_shape: Option, +} + +impl NeuraResidualNode { + pub fn new(layer: Layer, child_network: ChildNetwork) -> Self { + Self { + layer, + child_network, + offsets: vec![0], + axis: NeuraAxisAppend, + output_shape: None, + } + } +} + +impl NeuraResidualNode { + pub fn offsets(mut self, offsets: Vec) -> Self { + self.offsets = offsets; + self + } + + pub fn offset(mut self, offset: usize) -> Self { + self.offsets.push(offset); + self + } + + pub fn axis(self, axis: Axis2) -> NeuraResidualNode { + NeuraResidualNode { + layer: self.layer, + child_network: self.child_network, + offsets: self.offsets, + axis, + // Drop the knowledge of output_shape + output_shape: None, + } + } +} + +impl NeuraLayer>> + for NeuraResidualNode +where + Axis: NeuraCombineInputs>, + Layer: NeuraLayer>, + ChildNetwork: NeuraLayer>>, +{ + type Output = >>>::Output; + + fn eval(&self, input: &NeuraResidualInput>) -> Self::Output { + let (inputs, mut rest) = input.shift(); + + let layer_input = self.axis.combine(inputs); + let layer_output = Rc::new(self.layer.eval(&layer_input)); + + for &offset in &self.offsets { + rest.push(offset, Rc::clone(&layer_output)); + } + + self.child_network.eval(&rest) + } +} + +impl NeuraLayer> for NeuraResidual +where + Layers: NeuraLayer>, Output = NeuraResidualInput>, +{ + type Output = Output; + + fn eval(&self, input: &DVector) -> Self::Output { + let input: Rc> = Rc::new((*input).clone()); + let mut inputs = NeuraResidualInput::new(); + + for &offset in &self.initial_offsets { + inputs.push(offset, Rc::clone(&input)); + } + + drop(input); + + let output = self.layers.eval(&inputs); + + let result = output.get_first() + .expect("Invalid NeuraResidual state: network returned no data, did you forget to link the last layer?") + .into(); + + Rc::unwrap_or_clone(result) + } +} + +#[macro_export] +macro_rules! neura_residual { + [ "__combine_layers", ] => { + () + }; + + [ "__combine_layers", + $layer:expr $(, $axis:expr)? $( => $( $offset:expr ),* )? + $(; $( $rest_layer:expr $(, $rest_axis:expr)? $( => $( $rest_offset:expr ),* )? );*)? + ] => {{ + let layer = $crate::network::residual::NeuraResidualNode::new($layer, + neura_residual![ + "__combine_layers", + $($( $rest_layer $(, $rest_axis)? $( => $( $rest_offset ),* )? );*)? + ] + ); + + $( + let layer = layer.axis($axis); + )? + + $( + let layer = layer.offsets(vec![$($offset),*]); + )? + + layer + }}; + + [ + $( <= $( $initial_offset:expr ),* ;)? + $( $layer:expr $(, $axis:expr)? $( => $( $offset:expr ),* $(,)? )? );* + $(;)? + ] => {{ + let res = $crate::network::residual::NeuraResidual::new( + neura_residual![ "__combine_layers", $( $layer $(, $axis)? $( => $( $offset ),* )? );* ] + ); + + $( + let res = res.offsets(vec![$($initial_offset),*]); + )? + + res + }}; +} + +#[cfg(test)] +mod test { + use nalgebra::dvector; + + use crate::neura_layer; + + use super::*; + + #[test] + fn test_resnet_eval() { + let network = NeuraResidual::new( + NeuraResidualNode::new( + neura_layer!("dense", 4) + .construct(NeuraShape::Vector(2)) + .unwrap(), + NeuraResidualNode::new( + neura_layer!("dense", 3) + .construct(NeuraShape::Vector(4)) + .unwrap(), + NeuraResidualNode::new( + neura_layer!("dense", 6) + .construct(NeuraShape::Vector(7)) + .unwrap(), + (), + ), + ), + ) + .offset(1), + ); + + network.eval(&dvector![0.2, 0.4]); + } + + #[test] + fn test_resnet_macro() { + let network = neura_residual![ + <= 0, 2; + neura_layer!("dense", 5) => 0, 1; + neura_layer!("dense", 5); + neura_layer!("dense", 3) + ]; + + println!("{:#?}", network); + + assert_eq!(network.initial_offsets, vec![0, 2]); + assert_eq!(network.layers.offsets, vec![0, 1]); + assert_eq!(network.layers.child_network.offsets, vec![0]); + assert_eq!(network.layers.child_network.child_network.child_network, ()); + + let network = neura_residual![ + neura_layer!("dense", 4) => 0; + ]; + + assert_eq!(network.initial_offsets, vec![0]); + } + + #[test] + fn test_resnet_partial() { + let network = neura_residual![ + <= 0, 1; + neura_layer!("dense", 2) => 0, 1; + neura_layer!("dense", 4); + neura_layer!("dense", 8) + ] + .construct(NeuraShape::Vector(1)) + .unwrap(); + + assert_eq!(network.output_shape(), NeuraShape::Vector(8)); + + network.eval(&dvector![0.0]); + } +} diff --git a/src/network/sequential/construct.rs b/src/network/sequential/construct.rs index 1e257e9..4c837e3 100644 --- a/src/network/sequential/construct.rs +++ b/src/network/sequential/construct.rs @@ -1,3 +1,5 @@ +use crate::layer::NeuraShapedLayer; + use super::*; pub trait NeuraSequentialConstruct { @@ -40,7 +42,7 @@ impl NeuraSequ // TODO: ensure that this operation (and all recursive operations) are directly allocated on the heap let child_network = self .child_network - .construct(Layer::output_shape(&layer)) + .construct(layer.output_shape()) .map_err(|e| NeuraSequentialConstructErr::Child(e))?; let child_network = Box::new(child_network); @@ -50,3 +52,19 @@ impl NeuraSequ }) } } + +impl NeuraShapedLayer for NeuraSequential { + #[inline(always)] + fn output_shape(&self) -> NeuraShape { + self.layer.output_shape() + } +} + +impl NeuraShapedLayer + for NeuraSequential +{ + #[inline(always)] + fn output_shape(&self) -> NeuraShape { + self.child_network.output_shape() + } +}