From 2edbff860c09af468afca473fa226ba6a1be0cae Mon Sep 17 00:00:00 2001 From: Adrien Burgun Date: Wed, 19 Apr 2023 00:54:30 +0200 Subject: [PATCH] :fire: :truck: :recycle: Refactoring the previous layer system It was becoming almost impossible to manage the dimensions of the layers, especially with convolution layers. Generic consts are nice, but they are a bit too early to have right now for this use-case. We'll probably be expanding the implementations to accept const or dynamically-sized layers at some point, for performance-critical applications. --- Cargo.toml | 1 + examples/xor.rs | 24 +-- src/algebra/matrix.rs | 8 +- src/algebra/mod.rs | 69 ++++--- src/algebra/vector.rs | 8 +- src/derivable/loss.rs | 25 +-- src/layer/dense.rs | 238 +++++++++++++----------- src/layer/mod.rs | 183 +++++------------- src/lib.rs | 9 +- src/network/mod.rs | 15 +- src/network/sequential.rs | 189 ++++++++++++------- src/{layer => old_layer}/convolution.rs | 0 src/old_layer/dense.rs | 180 ++++++++++++++++++ src/{layer => old_layer}/dropout.rs | 0 src/{layer => old_layer}/lock.rs | 0 src/old_layer/mod.rs | 170 +++++++++++++++++ src/{layer => old_layer}/one_hot.rs | 0 src/{layer => old_layer}/pool.rs | 0 src/{layer => old_layer}/reshape.rs | 0 src/{layer => old_layer}/softmax.rs | 0 src/train.rs | 135 ++++++-------- 21 files changed, 796 insertions(+), 458 deletions(-) rename src/{layer => old_layer}/convolution.rs (100%) create mode 100644 src/old_layer/dense.rs rename src/{layer => old_layer}/dropout.rs (100%) rename src/{layer => old_layer}/lock.rs (100%) create mode 100644 src/old_layer/mod.rs rename src/{layer => old_layer}/one_hot.rs (100%) rename src/{layer => old_layer}/pool.rs (100%) rename src/{layer => old_layer}/reshape.rs (100%) rename src/{layer => old_layer}/softmax.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index 48cafe8..fe4ca1f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" [dependencies] boxed-array = "0.1.0" +nalgebra = { version = "^0.32", features = ["std", "macros", "rand"] } ndarray = "^0.15" num = "^0.4" # num-traits = "0.2.15" diff --git a/examples/xor.rs b/examples/xor.rs index 759dd9d..4b7c277 100644 --- a/examples/xor.rs +++ b/examples/xor.rs @@ -1,22 +1,24 @@ #![feature(generic_arg_infer)] -use neuramethyst::algebra::NeuraVector; +use nalgebra::dvector; + use neuramethyst::derivable::activation::Relu; use neuramethyst::derivable::loss::Euclidean; -use neuramethyst::{cycle_shuffling, prelude::*}; +use neuramethyst::prelude::*; +use neuramethyst::cycle_shuffling; fn main() { let mut network = neura_sequential![ - neura_layer!("dense", 2, 4; Relu), - neura_layer!("dense", 3; Relu), - neura_layer!("dense", 1; Relu) - ]; + neura_layer!("dense", 4, Relu), + neura_layer!("dense", 3, Relu), + neura_layer!("dense", 1, Relu) + ].construct(NeuraShape::Vector(2)).unwrap(); - let inputs: [(NeuraVector<2, f64>, NeuraVector<1, f64>); 4] = [ - ([0.0, 0.0].into(), [0.0].into()), - ([0.0, 1.0].into(), [1.0].into()), - ([1.0, 0.0].into(), [1.0].into()), - ([1.0, 1.0].into(), [0.0].into()), + let inputs = [ + (dvector![0.0, 0.0], dvector![0.0]), + (dvector![0.0, 1.0], dvector![1.0]), + (dvector![1.0, 0.0], dvector![1.0]), + (dvector![1.0, 1.0], dvector![0.0]), ]; for (input, target) in &inputs { diff --git a/src/algebra/matrix.rs b/src/algebra/matrix.rs index 484b0a1..9b15213 100644 --- a/src/algebra/matrix.rs +++ b/src/algebra/matrix.rs @@ -167,10 +167,10 @@ impl Neura } } - #[inline(always)] - fn zero() -> Self { - Self::from_value(F::zero()) - } + // #[inline(always)] + // fn zero() -> Self { + // Self::from_value(F::zero()) + // } fn norm_squared(&self) -> f64 { let mut sum = 0.0; diff --git a/src/algebra/mod.rs b/src/algebra/mod.rs index ec9e3ad..8e26d80 100644 --- a/src/algebra/mod.rs +++ b/src/algebra/mod.rs @@ -2,6 +2,8 @@ mod matrix; pub use matrix::NeuraMatrix; mod vector; +use nalgebra::Matrix; +use num::Float; pub use vector::NeuraVector; /// An extension of `std::ops::AddAssign` and `std::ops::Default` @@ -10,7 +12,7 @@ pub trait NeuraVectorSpace { fn mul_assign(&mut self, by: f64); - fn zero() -> Self; + // fn zero() -> Self; fn norm_squared(&self) -> f64; } @@ -26,10 +28,10 @@ impl NeuraVectorSpace for () { // Noop } - #[inline(always)] - fn zero() -> Self { - () - } + // #[inline(always)] + // fn zero() -> Self { + // () + // } fn norm_squared(&self) -> f64 { 0.0 @@ -45,9 +47,9 @@ impl NeuraVectorSpace for Box { self.as_mut().mul_assign(by); } - fn zero() -> Self { - Box::new(T::zero()) - } + // fn zero() -> Self { + // Box::new(T::zero()) + // } fn norm_squared(&self) -> f64 { self.as_ref().norm_squared() @@ -65,9 +67,9 @@ impl NeuraVectorSpace for (Left NeuraVectorSpace::mul_assign(&mut self.1, by); } - fn zero() -> Self { - (Left::zero(), Right::zero()) - } + // fn zero() -> Self { + // (Left::zero(), Right::zero()) + // } fn norm_squared(&self) -> f64 { self.0.norm_squared() + self.1.norm_squared() @@ -87,24 +89,43 @@ impl NeuraVectorSpace for [T; N] { } } - fn zero() -> Self { - let mut res: Vec = Vec::with_capacity(N); + // fn zero() -> Self { + // let mut res: Vec = Vec::with_capacity(N); - for _ in 0..N { - res.push(T::zero()); - } + // for _ in 0..N { + // res.push(T::zero()); + // } - res.try_into().unwrap_or_else(|_| { - // TODO: check that this panic is optimized away - unreachable!() - }) - } + // res.try_into().unwrap_or_else(|_| { + // // TODO: check that this panic is optimized away + // unreachable!() + // }) + // } fn norm_squared(&self) -> f64 { self.iter().map(T::norm_squared).sum() } } +impl> NeuraVectorSpace for Matrix +where + Matrix: std::ops::MulAssign, + for<'c> Matrix: std::ops::AddAssign<&'c Matrix>, + F: From + Into +{ + fn add_assign(&mut self, other: &Self) { + *self += other; + } + + fn mul_assign(&mut self, by: f64) { + *self *= >::from(by); + } + + fn norm_squared(&self) -> f64 { + self.iter().map(|x| *x * *x).reduce(|sum, curr| sum + curr).unwrap_or(F::zero()).into() + } +} + macro_rules! base { ( $type:ty ) => { impl NeuraVectorSpace for $type { @@ -116,9 +137,9 @@ macro_rules! base { std::ops::MulAssign::mul_assign(self, other as $type); } - fn zero() -> Self { - ::default() - } + // fn zero() -> Self { + // ::default() + // } fn norm_squared(&self) -> f64 { (self * self) as f64 diff --git a/src/algebra/vector.rs b/src/algebra/vector.rs index 7042431..52f4a66 100644 --- a/src/algebra/vector.rs +++ b/src/algebra/vector.rs @@ -95,10 +95,10 @@ impl + Into> NeuraVectorSpace } } - #[inline(always)] - fn zero() -> Self { - Self::from_value(F::zero()) - } + // #[inline(always)] + // fn zero() -> Self { + // Self::from_value(F::zero()) + // } fn norm_squared(&self) -> f64 { let mut sum = F::zero(); diff --git a/src/derivable/loss.rs b/src/derivable/loss.rs index 7d23d8b..9bb79da 100644 --- a/src/derivable/loss.rs +++ b/src/derivable/loss.rs @@ -1,19 +1,22 @@ +use nalgebra::DVector; + use crate::algebra::NeuraVector; use super::NeuraLoss; #[derive(Clone, Copy, Debug, PartialEq)] -pub struct Euclidean; +pub struct Euclidean; -impl NeuraLoss for Euclidean { - type Input = NeuraVector; - type Target = NeuraVector; +impl NeuraLoss for Euclidean { + type Input = DVector; + type Target = DVector; #[inline] - fn eval(&self, target: &NeuraVector, actual: &NeuraVector) -> f64 { + fn eval(&self, target: &DVector, actual: &DVector) -> f64 { + assert_eq!(target.shape(), actual.shape()); let mut sum_squared = 0.0; - for i in 0..N { + for i in 0..target.len() { sum_squared += (target[i] - actual[i]) * (target[i] - actual[i]); } @@ -23,13 +26,13 @@ impl NeuraLoss for Euclidean { #[inline] fn nabla( &self, - target: &NeuraVector, - actual: &NeuraVector, - ) -> NeuraVector { - let mut res = NeuraVector::default(); + target: &DVector, + actual: &DVector, + ) -> DVector { + let mut res = DVector::zeros(target.len()); // ∂E(y)/∂yᵢ = yᵢ - yᵢ' - for i in 0..N { + for i in 0..target.len() { res[i] = actual[i] - target[i]; } diff --git a/src/layer/dense.rs b/src/layer/dense.rs index ff921a8..dd56367 100644 --- a/src/layer/dense.rs +++ b/src/layer/dense.rs @@ -1,38 +1,49 @@ -use super::{NeuraLayer, NeuraTrainableLayer}; -use crate::{ - algebra::{NeuraMatrix, NeuraVector, NeuraVectorSpace}, - derivable::NeuraDerivable, -}; +use std::marker::PhantomData; +use nalgebra::{DMatrix, DVector}; +use num::Float; use rand::Rng; -use rand_distr::Distribution; + +use crate::derivable::NeuraDerivable; + +use super::*; #[derive(Clone, Debug)] -pub struct NeuraDenseLayer< - Act: NeuraDerivable, - Reg: NeuraDerivable, - const INPUT_LEN: usize, - const OUTPUT_LEN: usize, +pub struct NeuraDenseLayer, Reg: NeuraDerivable> { + weights: DMatrix, + bias: DVector, + activation: Act, + regularization: Reg, +} + +#[derive(Clone, Debug)] +pub struct NeuraDenseLayerPartial< + F: Float, + Act: NeuraDerivable, + Reg: NeuraDerivable, + R: Rng, > { - weights: NeuraMatrix, - bias: NeuraVector, activation: Act, regularization: Reg, + output_size: usize, + rng: R, + phantom: PhantomData, } impl< - Act: NeuraDerivable, - Reg: NeuraDerivable, - const INPUT_LEN: usize, - const OUTPUT_LEN: usize, - > NeuraDenseLayer + F: Float + From + std::fmt::Debug + 'static, + Act: NeuraDerivable, + Reg: NeuraDerivable, + > NeuraDenseLayer { pub fn new( - weights: NeuraMatrix, - bias: NeuraVector, + weights: DMatrix, + bias: DVector, activation: Act, regularization: Reg, ) -> Self { + assert_eq!(bias.shape().0, weights.shape().0); + Self { weights, bias, @@ -41,85 +52,129 @@ impl< } } - pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self { - let mut weights: NeuraMatrix = NeuraMatrix::from_value(0.0f64); - - // Use Xavier (or He) initialisation, using the harmonic mean - // Ref: https://www.deeplearning.ai/ai-notes/initialization/index.html + pub fn from_rng( + input_size: usize, + output_size: usize, + rng: &mut impl Rng, + activation: Act, + regularization: Reg, + ) -> Self + where + rand_distr::StandardNormal: rand_distr::Distribution, + { let distribution = rand_distr::Normal::new( - 0.0, - activation.variance_hint() * 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64), + F::zero(), + >::from( + activation.variance_hint() * 2.0 / (input_size as f64 + output_size as f64), + ), ) .unwrap(); - // let distribution = rand_distr::Uniform::new(-0.5, 0.5); - for i in 0..OUTPUT_LEN { - for j in 0..INPUT_LEN { - weights[i][j] = distribution.sample(rng); - } + Self { + weights: DMatrix::from_distribution(output_size, input_size, &distribution, rng), + bias: DVector::from_element( + output_size, + >::from(activation.bias_hint()), + ), + activation, + regularization, } + } - Self { - weights, - // Biases are initialized based on the activation's hint - bias: NeuraVector::from_value(activation.bias_hint()), + pub fn new_partial( + output_size: usize, + rng: R, + activation: Act, + regularization: Reg, + ) -> NeuraDenseLayerPartial { + NeuraDenseLayerPartial { activation, regularization, + output_size, + rng, + phantom: PhantomData, } } } impl< - Act: NeuraDerivable, - Reg: NeuraDerivable, - const INPUT_LEN: usize, - const OUTPUT_LEN: usize, - > NeuraLayer for NeuraDenseLayer + F: Float + From + std::fmt::Debug + 'static, + Act: NeuraDerivable, + Reg: NeuraDerivable, + R: Rng, + > NeuraPartialLayer for NeuraDenseLayerPartial +where + rand_distr::StandardNormal: rand_distr::Distribution, { - type Input = NeuraVector; + type Constructed = NeuraDenseLayer; + type Err = (); + + fn construct(self, input_shape: NeuraShape) -> Result { + let mut rng = self.rng; + Ok(NeuraDenseLayer::from_rng( + input_shape.size(), + self.output_size, + &mut rng, + self.activation, + self.regularization, + )) + } + + fn output_shape(constructed: &Self::Constructed) -> NeuraShape { + NeuraShape::Vector(constructed.weights.shape().0) + } +} - type Output = NeuraVector; +impl< + F: Float + From + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign, + Act: NeuraDerivable, + Reg: NeuraDerivable, + > NeuraLayer> for NeuraDenseLayer +{ + type Output = DVector; - fn eval(&self, input: &Self::Input) -> Self::Output { - let mut result = self.weights.multiply_vector(input); + fn eval(&self, input: &DVector) -> Self::Output { + assert_eq!(input.shape().0, self.weights.shape().1); - for i in 0..OUTPUT_LEN { - result[i] = self.activation.eval(result[i] + self.bias[i]); - } + let res = &self.weights * input + &self.bias; - result + res.map(|x| self.activation.eval(x)) } } impl< - Act: NeuraDerivable, - Reg: NeuraDerivable, - const INPUT_LEN: usize, - const OUTPUT_LEN: usize, - > NeuraTrainableLayer for NeuraDenseLayer + F: Float + From + Into + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign, + Act: NeuraDerivable, + Reg: NeuraDerivable, + > NeuraTrainableLayer> for NeuraDenseLayer { - type Delta = ( - NeuraMatrix, - NeuraVector, - ); + type Gradient = (DMatrix, DVector); - fn backpropagate( + fn default_gradient(&self) -> Self::Gradient { + ( + DMatrix::zeros(self.weights.shape().0, self.weights.shape().1), + DVector::zeros(self.bias.shape().0), + ) + } + + fn backprop_layer( &self, - input: &Self::Input, + input: &DVector, epsilon: Self::Output, - ) -> (Self::Input, Self::Delta) { - let evaluated = self.weights.multiply_vector(input); + ) -> (DVector, Self::Gradient) { + let evaluated = &self.weights * input; // Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron), // with `self.activation'(input) ° epsilon = delta` - let mut delta: NeuraVector = epsilon.clone(); - for i in 0..OUTPUT_LEN { + let mut delta = epsilon.clone(); + + for i in 0..delta.len() { delta[i] *= self.activation.derivate(evaluated[i]); } // Compute the weight gradient - let weights_gradient = delta.reverse_dot(input); + let weights_gradient = &delta * input.transpose(); - let new_epsilon = self.weights.transpose_multiply_vector(&delta); + let new_epsilon = self.weights.tr_mul(&delta); // According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation // The gradient of the bias is equal to the delta term of the backpropagation algorithm @@ -128,53 +183,12 @@ impl< (new_epsilon, (weights_gradient, bias_gradient)) } - fn apply_gradient(&mut self, gradient: &Self::Delta) { - NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0); - NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1); + fn regularize_layer(&self) -> Self::Gradient { + (self.weights.map(|x| self.regularization.derivate(x)), DVector::zeros(self.bias.shape().0)) } - fn regularize(&self) -> Self::Delta { - let mut res = Self::Delta::default(); - - for i in 0..OUTPUT_LEN { - for j in 0..INPUT_LEN { - res.0[i][j] = self.regularization.derivate(self.weights[i][j]); - } - } - - // Note: biases aren't taken into account here, as per https://stats.stackexchange.com/questions/153605/no-regularisation-term-for-bias-unit-in-neural-network - - res - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::{ - derivable::{activation::Relu, regularize::NeuraL0}, - utils::uniform_vector, - }; - - #[test] - fn test_from_rng() { - let mut rng = rand::thread_rng(); - let layer: NeuraDenseLayer<_, _, 64, 32> = - NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0); - let mut input = [0.0; 64]; - for x in 0..64 { - input[x] = rng.gen(); - } - assert!(layer.eval(&input.into()).len() == 32); - } - - #[test] - fn test_stack_overflow_big_layer() { - let layer = NeuraDenseLayer::from_rng(&mut rand::thread_rng(), Relu, NeuraL0) - as NeuraDenseLayer; - - layer.backpropagate(&uniform_vector(), uniform_vector()); - - as NeuraTrainableLayer>::Delta::zero(); + fn apply_gradient(&mut self, gradient: &Self::Gradient) { + self.weights += &gradient.0; + self.bias += &gradient.1; } } diff --git a/src/layer/mod.rs b/src/layer/mod.rs index c89d835..560d738 100644 --- a/src/layer/mod.rs +++ b/src/layer/mod.rs @@ -1,39 +1,55 @@ -mod dense; -pub use dense::NeuraDenseLayer; +use num::Float; -mod convolution; -pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DBlockLayer, NeuraConv2DPadLayer}; +use crate::algebra::NeuraVectorSpace; -mod dropout; -pub use dropout::NeuraDropoutLayer; +pub mod dense; +pub use dense::NeuraDenseLayer; -mod softmax; -pub use softmax::NeuraSoftmaxLayer; +#[derive(Clone, Copy, PartialEq, Debug)] +pub enum NeuraShape { + Vector(usize), // entries + Matrix(usize, usize), // rows, columns + Tensor(usize, usize, usize), // rows, columns, channels +} -mod one_hot; -pub use one_hot::NeuraOneHotLayer; +impl NeuraShape { + pub fn size(&self) -> usize { + match self { + NeuraShape::Vector(entries) => *entries, + NeuraShape::Matrix(rows, columns) => rows * columns, + NeuraShape::Tensor(rows, columns, channels) => rows * columns * channels + } + } +} + +pub trait NeuraLayer { + type Output; -mod lock; -pub use lock::NeuraLockLayer; + fn eval(&self, input: &Input) -> Self::Output; +} -mod pool; -pub use pool::{NeuraGlobalPoolLayer, NeuraPool1DLayer}; +impl NeuraLayer for () { + type Output = Input; -mod reshape; -pub use reshape::{NeuraFlattenLayer, NeuraReshapeLayer}; + fn eval(&self, input: &Input) -> Self::Output { + input.clone() + } +} -use crate::algebra::NeuraVectorSpace; +pub trait NeuraPartialLayer { + type Constructed; + type Err; -pub trait NeuraLayer { - type Input; - type Output; + fn construct(self, input_shape: NeuraShape) -> Result; - fn eval(&self, input: &Self::Input) -> Self::Output; + fn output_shape(constructed: &Self::Constructed) -> NeuraShape; } -pub trait NeuraTrainableLayer: NeuraLayer { +pub trait NeuraTrainableLayer: NeuraLayer { /// The representation of the layer gradient as a vector space - type Delta: NeuraVectorSpace; + type Gradient: NeuraVectorSpace; + + fn default_gradient(&self) -> Self::Gradient; /// Computes the backpropagation term and the derivative of the internal weights, /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer. @@ -46,125 +62,28 @@ pub trait NeuraTrainableLayer: NeuraLayer { /// The function should then return a pair `(epsilon_{l-1}, δW_l)`, /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`. /// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers. - fn backpropagate( + fn backprop_layer( &self, - input: &Self::Input, + input: &Input, epsilon: Self::Output, - ) -> (Self::Input, Self::Delta); + ) -> (Input, Self::Gradient); /// Computes the regularization - fn regularize(&self) -> Self::Delta; + fn regularize_layer(&self) -> Self::Gradient; /// Applies `δW_l` to the weights of the layer - fn apply_gradient(&mut self, gradient: &Self::Delta); - - /// Called before an iteration begins, to allow the layer to set itself up for training. - #[inline(always)] - fn prepare_epoch(&mut self) {} + fn apply_gradient(&mut self, gradient: &Self::Gradient); - /// Called at the end of training, to allow the layer to clean itself up + /// Arbitrary computation that can be executed at the start of an epoch + #[allow(unused_variables)] #[inline(always)] - fn cleanup(&mut self) {} + fn prepare_layer(&mut self, is_training: bool) {} } +/// Temporary implementation of neura_layer #[macro_export] macro_rules! neura_layer { - ( "dense", $( $shape:expr ),*; $activation:expr ) => { - $crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0) - as neura_layer!("_dense_shape", $($shape),*) - }; - - ( "dense", $( $shape:expr ),*; $activation:expr, $regularization:expr ) => { - $crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $regularization) - as neura_layer!("_dense_shape", $($shape),*) - }; - - ( "_dense_shape", $output:expr ) => { - $crate::layer::NeuraDenseLayer<_, _, _, $output> - }; - - ( "_dense_shape", $input:expr, $output:expr ) => { - $crate::layer::NeuraDenseLayer<_, _, $input, $output> - }; - - ( "dropout", $probability:expr ) => { - $crate::layer::NeuraDropoutLayer::new($probability, rand::thread_rng()) - as $crate::layer::NeuraDropoutLayer<_, _> - }; - - ( "softmax" ) => { - $crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<_> - }; - - ( "softmax", $length:expr ) => { - $crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<$length> - }; - - ( "one_hot" ) => { - $crate::layer::NeuraOneHotLayer as $crate::layer::NeuraOneHotLayer<2, _> - }; - - ( "lock", $layer:expr ) => { - $crate::layer::NeuraLockLayer($layer) - }; - - ( "conv1d_pad", $length:expr, $feats:expr; $window:expr; $layer:expr ) => { - $crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<$length, $feats, $window, _> - }; - - ( "conv1d_pad"; $window:expr; $layer:expr ) => { - $crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<_, _, $window, _> - }; - - ( "conv2d_pad", $feats:expr, $length:expr; $width:expr, $window:expr; $layer:expr ) => { - $crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<$length, $feats, $window, _> - }; - - ( "conv2d_pad"; $width:expr, $window:expr; $layer:expr ) => { - $crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _> - }; - - ( "conv2d_block", $feats:expr, $width:expr, $height:expr; $block_size:expr; $layer:expr ) => { - $crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, $feats, $block_size, _> - }; - - ( "conv2d_block", $width:expr, $height:expr; $block_size:expr; $layer:expr ) => { - $crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, _, $block_size, _> - }; - - ( "pool_global"; $reduce:expr ) => { - $crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _> - }; - - ( "pool_global", $feats:expr, $length:expr; $reduce:expr ) => { - $crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<$length, $feats, _> - }; - - ( "pool1d", $blocklength:expr; $reduce:expr ) => { - $crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<_, $blocklength, _, _> - }; - - ( "pool1d", $blocks:expr, $blocklength:expr; $reduce:expr ) => { - $crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, _, _> - }; - - ( "pool1d", $feats:expr, $blocks:expr, $blocklength:expr; $reduce:expr ) => { - $crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, $feats, _> - }; - - ( "unstable_flatten" ) => { - $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<_, _, f64> - }; - - ( "unstable_flatten", $width:expr, $height:expr ) => { - $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<$width, $height, f64> - }; - - ( "unstable_reshape", $height:expr ) => { - $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<_, $height, f64> - }; - - ( "unstable_reshape", $width:expr, $height:expr ) => { - $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<$width, $height, f64> - }; + ( "dense", $output:expr, $activation:expr ) => { + $crate::layer::dense::NeuraDenseLayer::new_partial($output, rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0) + } } diff --git a/src/lib.rs b/src/lib.rs index 0ccb167..4b2d837 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,12 +1,15 @@ #![feature(generic_arg_infer)] #![feature(generic_const_exprs)] +#![feature(negative_impls)] pub mod algebra; pub mod derivable; -pub mod layer; +// pub mod layer; pub mod network; pub mod train; +pub mod layer; + mod utils; // TODO: move to a different file @@ -17,7 +20,7 @@ pub mod prelude { pub use crate::{neura_layer, neura_sequential}; // Structs and traits - pub use crate::layer::{NeuraDenseLayer, NeuraDropoutLayer, NeuraLayer}; - pub use crate::network::sequential::{NeuraSequential, NeuraSequentialTail}; + pub use crate::layer::*; + pub use crate::network::sequential::{NeuraSequential, NeuraSequentialTail, NeuraSequentialBuild}; pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer}; } diff --git a/src/network/mod.rs b/src/network/mod.rs index 68b953f..68bdcfa 100644 --- a/src/network/mod.rs +++ b/src/network/mod.rs @@ -2,25 +2,24 @@ use crate::{algebra::NeuraVectorSpace, derivable::NeuraLoss, layer::NeuraLayer}; pub mod sequential; -pub trait NeuraTrainableNetwork: NeuraLayer { +pub trait NeuraTrainableNetwork: NeuraLayer { type Delta: NeuraVectorSpace; + fn default_gradient(&self) -> Self::Delta; + fn apply_gradient(&mut self, gradient: &Self::Delta); /// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information. fn backpropagate>( &self, - input: &Self::Input, + input: &Input, target: &Loss::Target, loss: Loss, - ) -> (Self::Input, Self::Delta); + ) -> (Input, Self::Delta); /// Should return the regularization gradient fn regularize(&self) -> Self::Delta; - /// Called before an iteration begins, to allow the network to set itself up for training. - fn prepare_epoch(&mut self); - - /// Called at the end of training, to allow the network to clean itself up - fn cleanup(&mut self); + /// Called before an iteration begins, to allow the network to set itself up for training or not. + fn prepare(&mut self, train_iteration: bool); } diff --git a/src/network/sequential.rs b/src/network/sequential.rs index 602e50d..0fcd876 100644 --- a/src/network/sequential.rs +++ b/src/network/sequential.rs @@ -1,12 +1,14 @@ +use num::Float; + use crate::{ derivable::NeuraLoss, - layer::{NeuraLayer, NeuraTrainableLayer}, + layer::{NeuraLayer, NeuraTrainableLayer, NeuraShape, NeuraPartialLayer}, }; use super::NeuraTrainableNetwork; #[derive(Clone, Debug)] -pub struct NeuraSequential { +pub struct NeuraSequential { pub layer: Layer, pub child_network: Box, } @@ -14,13 +16,13 @@ pub struct NeuraSequential { /// Operations on the tail end of a sequential network pub trait NeuraSequentialTail { type TailTrimmed; - type TailPushed; + type TailPushed; fn trim_tail(self) -> Self::TailTrimmed; - fn push_tail(self, layer: T) -> Self::TailPushed; + fn push_tail(self, layer: T) -> Self::TailPushed; } -impl NeuraSequential { +impl NeuraSequential { pub fn new(layer: Layer, child_network: ChildNetwork) -> Self { Self { layer, @@ -28,9 +30,10 @@ impl NeuraSequential { } } - pub fn new_match_output(layer: Layer, child_network: ChildNetwork) -> Self + pub fn new_match_output(layer: Layer, child_network: ChildNetwork) -> Self where - ChildNetwork: NeuraLayer, + Layer: NeuraLayer, + ChildNetwork: NeuraLayer, { Self::new(layer, child_network) } @@ -39,7 +42,10 @@ impl NeuraSequential { *self.child_network } - pub fn push_front(self, layer: T) -> NeuraSequential { + pub fn push_front>(self, layer: T) -> NeuraSequential + where + Layer: NeuraLayer + { NeuraSequential { layer: layer, child_network: Box::new(self), @@ -48,15 +54,15 @@ impl NeuraSequential { } // Trimming the last layer returns an empty network -impl NeuraSequentialTail for NeuraSequential { +impl NeuraSequentialTail for NeuraSequential { type TailTrimmed = (); - type TailPushed = NeuraSequential>; + type TailPushed = NeuraSequential>; fn trim_tail(self) -> Self::TailTrimmed { () } - fn push_tail(self, layer: T) -> Self::TailPushed { + fn push_tail(self, layer: T) -> Self::TailPushed { NeuraSequential { layer: self.layer, child_network: Box::new(NeuraSequential { @@ -68,11 +74,11 @@ impl NeuraSequentialTail for NeuraSequential { } // Trimming another layer returns a network which calls trim recursively -impl NeuraSequentialTail +impl NeuraSequentialTail for NeuraSequential { type TailTrimmed = NeuraSequential::TailTrimmed>; - type TailPushed = + type TailPushed = NeuraSequential::TailPushed>; fn trim_tail(self) -> Self::TailTrimmed { @@ -82,7 +88,7 @@ impl NeuraSequentialTail } } - fn push_tail(self, layer: T) -> Self::TailPushed { + fn push_tail(self, layer: T) -> Self::TailPushed { NeuraSequential { layer: self.layer, child_network: Box::new(self.child_network.push_tail(layer)), @@ -90,62 +96,55 @@ impl NeuraSequentialTail } } -impl NeuraLayer for NeuraSequential { - type Input = Layer::Input; - type Output = Layer::Output; - - fn eval(&self, input: &Self::Input) -> Self::Output { - self.layer.eval(input) - } -} - -impl> NeuraLayer +impl, ChildNetwork: NeuraLayer> NeuraLayer for NeuraSequential { - type Input = Layer::Input; - type Output = ChildNetwork::Output; - fn eval(&self, input: &Self::Input) -> Self::Output { + fn eval(&self, input: &Input) -> Self::Output { self.child_network.eval(&self.layer.eval(input)) } } -impl NeuraTrainableNetwork for NeuraSequential { - type Delta = Layer::Delta; +impl NeuraTrainableNetwork for () { + type Delta = (); - fn apply_gradient(&mut self, gradient: &Self::Delta) { - self.layer.apply_gradient(gradient); + fn default_gradient(&self) -> () { + () + } + + fn apply_gradient(&mut self, _gradient: &()) { + // Noop } fn backpropagate>( &self, - input: &Self::Input, + final_activation: &Input, target: &Loss::Target, loss: Loss, - ) -> (Self::Input, Self::Delta) { - let final_activation = self.layer.eval(input); + ) -> (Input, Self::Delta) { let backprop_epsilon = loss.nabla(target, &final_activation); - self.layer.backpropagate(&input, backprop_epsilon) - } - fn regularize(&self) -> Self::Delta { - self.layer.regularize() + (backprop_epsilon, ()) } - fn prepare_epoch(&mut self) { - self.layer.prepare_epoch(); + fn regularize(&self) -> () { + () } - fn cleanup(&mut self) { - self.layer.cleanup(); + fn prepare(&mut self, _is_training: bool) { + // Noop } } -impl> - NeuraTrainableNetwork for NeuraSequential +impl, ChildNetwork: NeuraTrainableNetwork> + NeuraTrainableNetwork for NeuraSequential { - type Delta = (Layer::Delta, Box); + type Delta = (Layer::Gradient, Box); + + fn default_gradient(&self) -> Self::Delta { + (self.layer.default_gradient(), Box::new(self.child_network.default_gradient())) + } fn apply_gradient(&mut self, gradient: &Self::Delta) { self.layer.apply_gradient(&gradient.0); @@ -154,16 +153,16 @@ impl>( &self, - input: &Self::Input, + input: &Input, target: &Loss::Target, loss: Loss, - ) -> (Self::Input, Self::Delta) { + ) -> (Input, Self::Delta) { let next_activation = self.layer.eval(input); let (backprop_gradient, weights_gradient) = self.child_network .backpropagate(&next_activation, target, loss); let (backprop_gradient, layer_gradient) = - self.layer.backpropagate(input, backprop_gradient); + self.layer.backprop_layer(input, backprop_gradient); ( backprop_gradient, @@ -173,23 +172,18 @@ impl Self::Delta { ( - self.layer.regularize(), + self.layer.regularize_layer(), Box::new(self.child_network.regularize()), ) } - fn prepare_epoch(&mut self) { - self.layer.prepare_epoch(); - self.child_network.prepare_epoch(); - } - - fn cleanup(&mut self) { - self.layer.cleanup(); - self.child_network.cleanup(); + fn prepare(&mut self, is_training: bool) { + self.layer.prepare_layer(is_training); + self.child_network.prepare(is_training); } } -impl From for NeuraSequential { +impl From for NeuraSequential { fn from(layer: Layer) -> Self { Self { layer, @@ -198,6 +192,53 @@ impl From for NeuraSequential { } } +pub trait NeuraSequentialBuild { + type Constructed; + type Err; + + fn construct(self, input_shape: NeuraShape) -> Result; +} + +#[derive(Debug, Clone)] +pub enum NeuraSequentialBuildErr { + Current(Err), + Child(ChildErr), +} + +impl NeuraSequentialBuild for NeuraSequential { + type Constructed = NeuraSequential; + type Err = Layer::Err; + + fn construct(self, input_shape: NeuraShape) -> Result { + Ok(NeuraSequential { + layer: self.layer.construct(input_shape)?, + child_network: Box::new(()) + }) + } +} + +impl NeuraSequentialBuild for NeuraSequential { + type Constructed = NeuraSequential; + type Err = NeuraSequentialBuildErr; + + fn construct(self, input_shape: NeuraShape) -> Result { + let layer = self.layer.construct(input_shape).map_err(|e| NeuraSequentialBuildErr::Current(e))?; + + // TODO: ensure that this operation (and all recursive operations) are directly allocated on the heap + let child_network = self.child_network + .construct(Layer::output_shape(&layer)) + .map_err(|e| NeuraSequentialBuildErr::Child(e))?; + let child_network = Box::new(child_network); + + Ok(NeuraSequential { + layer, + child_network, + }) + } + + +} + /// An utility to recursively create a NeuraSequential network, while writing it in a declarative and linear fashion. /// Note that this can quickly create big and unwieldly types. #[macro_export] @@ -211,41 +252,47 @@ macro_rules! neura_sequential { }; [ $first:expr, $($rest:expr),+ $(,)? ] => { - $crate::network::sequential::NeuraSequential::new_match_output($first, neura_sequential![$($rest),+]) + $crate::network::sequential::NeuraSequential::new($first, neura_sequential![$($rest),+]) }; } #[cfg(test)] mod test { + use nalgebra::dvector; + use crate::{ derivable::{activation::Relu, regularize::NeuraL0}, - layer::NeuraDenseLayer, + layer::{NeuraDenseLayer, NeuraShape, NeuraLayer}, neura_layer, }; + use super::NeuraSequentialBuild; + #[test] fn test_neura_network_macro() { let mut rng = rand::thread_rng(); let _ = neura_sequential![ - NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>, - NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 12>, - NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 2> + NeuraDenseLayer::from_rng(8, 12, &mut rng, Relu, NeuraL0) as NeuraDenseLayer, + NeuraDenseLayer::from_rng(12, 16, &mut rng, Relu, NeuraL0) as NeuraDenseLayer, + NeuraDenseLayer::from_rng(16, 2, &mut rng, Relu, NeuraL0) as NeuraDenseLayer ]; let _ = neura_sequential![ - NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>, + NeuraDenseLayer::from_rng(2, 2, &mut rng, Relu, NeuraL0) as NeuraDenseLayer, ]; let _ = neura_sequential![ - NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>, - NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 12>, + NeuraDenseLayer::from_rng(8, 16, &mut rng, Relu, NeuraL0) as NeuraDenseLayer, + NeuraDenseLayer::from_rng(16, 12, &mut rng, Relu, NeuraL0) as NeuraDenseLayer, ]; - let _ = neura_sequential![ - neura_layer!("dense", 8, 16; Relu), - neura_layer!("dense", 12; Relu), - neura_layer!("dense", 2; Relu) - ]; + let network = neura_sequential![ + neura_layer!("dense", 16, Relu), + neura_layer!("dense", 12, Relu), + neura_layer!("dense", 2, Relu) + ].construct(NeuraShape::Vector(2)).unwrap(); + + network.eval(&dvector![0.0f64, 0.0]); } } diff --git a/src/layer/convolution.rs b/src/old_layer/convolution.rs similarity index 100% rename from src/layer/convolution.rs rename to src/old_layer/convolution.rs diff --git a/src/old_layer/dense.rs b/src/old_layer/dense.rs new file mode 100644 index 0000000..ff921a8 --- /dev/null +++ b/src/old_layer/dense.rs @@ -0,0 +1,180 @@ +use super::{NeuraLayer, NeuraTrainableLayer}; +use crate::{ + algebra::{NeuraMatrix, NeuraVector, NeuraVectorSpace}, + derivable::NeuraDerivable, +}; + +use rand::Rng; +use rand_distr::Distribution; + +#[derive(Clone, Debug)] +pub struct NeuraDenseLayer< + Act: NeuraDerivable, + Reg: NeuraDerivable, + const INPUT_LEN: usize, + const OUTPUT_LEN: usize, +> { + weights: NeuraMatrix, + bias: NeuraVector, + activation: Act, + regularization: Reg, +} + +impl< + Act: NeuraDerivable, + Reg: NeuraDerivable, + const INPUT_LEN: usize, + const OUTPUT_LEN: usize, + > NeuraDenseLayer +{ + pub fn new( + weights: NeuraMatrix, + bias: NeuraVector, + activation: Act, + regularization: Reg, + ) -> Self { + Self { + weights, + bias, + activation, + regularization, + } + } + + pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self { + let mut weights: NeuraMatrix = NeuraMatrix::from_value(0.0f64); + + // Use Xavier (or He) initialisation, using the harmonic mean + // Ref: https://www.deeplearning.ai/ai-notes/initialization/index.html + let distribution = rand_distr::Normal::new( + 0.0, + activation.variance_hint() * 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64), + ) + .unwrap(); + // let distribution = rand_distr::Uniform::new(-0.5, 0.5); + + for i in 0..OUTPUT_LEN { + for j in 0..INPUT_LEN { + weights[i][j] = distribution.sample(rng); + } + } + + Self { + weights, + // Biases are initialized based on the activation's hint + bias: NeuraVector::from_value(activation.bias_hint()), + activation, + regularization, + } + } +} + +impl< + Act: NeuraDerivable, + Reg: NeuraDerivable, + const INPUT_LEN: usize, + const OUTPUT_LEN: usize, + > NeuraLayer for NeuraDenseLayer +{ + type Input = NeuraVector; + + type Output = NeuraVector; + + fn eval(&self, input: &Self::Input) -> Self::Output { + let mut result = self.weights.multiply_vector(input); + + for i in 0..OUTPUT_LEN { + result[i] = self.activation.eval(result[i] + self.bias[i]); + } + + result + } +} + +impl< + Act: NeuraDerivable, + Reg: NeuraDerivable, + const INPUT_LEN: usize, + const OUTPUT_LEN: usize, + > NeuraTrainableLayer for NeuraDenseLayer +{ + type Delta = ( + NeuraMatrix, + NeuraVector, + ); + + fn backpropagate( + &self, + input: &Self::Input, + epsilon: Self::Output, + ) -> (Self::Input, Self::Delta) { + let evaluated = self.weights.multiply_vector(input); + // Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron), + // with `self.activation'(input) ° epsilon = delta` + let mut delta: NeuraVector = epsilon.clone(); + for i in 0..OUTPUT_LEN { + delta[i] *= self.activation.derivate(evaluated[i]); + } + + // Compute the weight gradient + let weights_gradient = delta.reverse_dot(input); + + let new_epsilon = self.weights.transpose_multiply_vector(&delta); + + // According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation + // The gradient of the bias is equal to the delta term of the backpropagation algorithm + let bias_gradient = delta; + + (new_epsilon, (weights_gradient, bias_gradient)) + } + + fn apply_gradient(&mut self, gradient: &Self::Delta) { + NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0); + NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1); + } + + fn regularize(&self) -> Self::Delta { + let mut res = Self::Delta::default(); + + for i in 0..OUTPUT_LEN { + for j in 0..INPUT_LEN { + res.0[i][j] = self.regularization.derivate(self.weights[i][j]); + } + } + + // Note: biases aren't taken into account here, as per https://stats.stackexchange.com/questions/153605/no-regularisation-term-for-bias-unit-in-neural-network + + res + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{ + derivable::{activation::Relu, regularize::NeuraL0}, + utils::uniform_vector, + }; + + #[test] + fn test_from_rng() { + let mut rng = rand::thread_rng(); + let layer: NeuraDenseLayer<_, _, 64, 32> = + NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0); + let mut input = [0.0; 64]; + for x in 0..64 { + input[x] = rng.gen(); + } + assert!(layer.eval(&input.into()).len() == 32); + } + + #[test] + fn test_stack_overflow_big_layer() { + let layer = NeuraDenseLayer::from_rng(&mut rand::thread_rng(), Relu, NeuraL0) + as NeuraDenseLayer; + + layer.backpropagate(&uniform_vector(), uniform_vector()); + + as NeuraTrainableLayer>::Delta::zero(); + } +} diff --git a/src/layer/dropout.rs b/src/old_layer/dropout.rs similarity index 100% rename from src/layer/dropout.rs rename to src/old_layer/dropout.rs diff --git a/src/layer/lock.rs b/src/old_layer/lock.rs similarity index 100% rename from src/layer/lock.rs rename to src/old_layer/lock.rs diff --git a/src/old_layer/mod.rs b/src/old_layer/mod.rs new file mode 100644 index 0000000..c89d835 --- /dev/null +++ b/src/old_layer/mod.rs @@ -0,0 +1,170 @@ +mod dense; +pub use dense::NeuraDenseLayer; + +mod convolution; +pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DBlockLayer, NeuraConv2DPadLayer}; + +mod dropout; +pub use dropout::NeuraDropoutLayer; + +mod softmax; +pub use softmax::NeuraSoftmaxLayer; + +mod one_hot; +pub use one_hot::NeuraOneHotLayer; + +mod lock; +pub use lock::NeuraLockLayer; + +mod pool; +pub use pool::{NeuraGlobalPoolLayer, NeuraPool1DLayer}; + +mod reshape; +pub use reshape::{NeuraFlattenLayer, NeuraReshapeLayer}; + +use crate::algebra::NeuraVectorSpace; + +pub trait NeuraLayer { + type Input; + type Output; + + fn eval(&self, input: &Self::Input) -> Self::Output; +} + +pub trait NeuraTrainableLayer: NeuraLayer { + /// The representation of the layer gradient as a vector space + type Delta: NeuraVectorSpace; + + /// Computes the backpropagation term and the derivative of the internal weights, + /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer. + /// + /// Note: we introduce the term `epsilon`, which together with the activation of the current function can be used to compute `delta_l`: + /// ```no_rust + /// f_l'(a_l) * epsilon_l = delta_l + /// ``` + /// + /// The function should then return a pair `(epsilon_{l-1}, δW_l)`, + /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`. + /// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers. + fn backpropagate( + &self, + input: &Self::Input, + epsilon: Self::Output, + ) -> (Self::Input, Self::Delta); + + /// Computes the regularization + fn regularize(&self) -> Self::Delta; + + /// Applies `δW_l` to the weights of the layer + fn apply_gradient(&mut self, gradient: &Self::Delta); + + /// Called before an iteration begins, to allow the layer to set itself up for training. + #[inline(always)] + fn prepare_epoch(&mut self) {} + + /// Called at the end of training, to allow the layer to clean itself up + #[inline(always)] + fn cleanup(&mut self) {} +} + +#[macro_export] +macro_rules! neura_layer { + ( "dense", $( $shape:expr ),*; $activation:expr ) => { + $crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0) + as neura_layer!("_dense_shape", $($shape),*) + }; + + ( "dense", $( $shape:expr ),*; $activation:expr, $regularization:expr ) => { + $crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $regularization) + as neura_layer!("_dense_shape", $($shape),*) + }; + + ( "_dense_shape", $output:expr ) => { + $crate::layer::NeuraDenseLayer<_, _, _, $output> + }; + + ( "_dense_shape", $input:expr, $output:expr ) => { + $crate::layer::NeuraDenseLayer<_, _, $input, $output> + }; + + ( "dropout", $probability:expr ) => { + $crate::layer::NeuraDropoutLayer::new($probability, rand::thread_rng()) + as $crate::layer::NeuraDropoutLayer<_, _> + }; + + ( "softmax" ) => { + $crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<_> + }; + + ( "softmax", $length:expr ) => { + $crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<$length> + }; + + ( "one_hot" ) => { + $crate::layer::NeuraOneHotLayer as $crate::layer::NeuraOneHotLayer<2, _> + }; + + ( "lock", $layer:expr ) => { + $crate::layer::NeuraLockLayer($layer) + }; + + ( "conv1d_pad", $length:expr, $feats:expr; $window:expr; $layer:expr ) => { + $crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<$length, $feats, $window, _> + }; + + ( "conv1d_pad"; $window:expr; $layer:expr ) => { + $crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<_, _, $window, _> + }; + + ( "conv2d_pad", $feats:expr, $length:expr; $width:expr, $window:expr; $layer:expr ) => { + $crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<$length, $feats, $window, _> + }; + + ( "conv2d_pad"; $width:expr, $window:expr; $layer:expr ) => { + $crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _> + }; + + ( "conv2d_block", $feats:expr, $width:expr, $height:expr; $block_size:expr; $layer:expr ) => { + $crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, $feats, $block_size, _> + }; + + ( "conv2d_block", $width:expr, $height:expr; $block_size:expr; $layer:expr ) => { + $crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, _, $block_size, _> + }; + + ( "pool_global"; $reduce:expr ) => { + $crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _> + }; + + ( "pool_global", $feats:expr, $length:expr; $reduce:expr ) => { + $crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<$length, $feats, _> + }; + + ( "pool1d", $blocklength:expr; $reduce:expr ) => { + $crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<_, $blocklength, _, _> + }; + + ( "pool1d", $blocks:expr, $blocklength:expr; $reduce:expr ) => { + $crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, _, _> + }; + + ( "pool1d", $feats:expr, $blocks:expr, $blocklength:expr; $reduce:expr ) => { + $crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, $feats, _> + }; + + ( "unstable_flatten" ) => { + $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<_, _, f64> + }; + + ( "unstable_flatten", $width:expr, $height:expr ) => { + $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<$width, $height, f64> + }; + + ( "unstable_reshape", $height:expr ) => { + $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<_, $height, f64> + }; + + ( "unstable_reshape", $width:expr, $height:expr ) => { + $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<$width, $height, f64> + }; +} diff --git a/src/layer/one_hot.rs b/src/old_layer/one_hot.rs similarity index 100% rename from src/layer/one_hot.rs rename to src/old_layer/one_hot.rs diff --git a/src/layer/pool.rs b/src/old_layer/pool.rs similarity index 100% rename from src/layer/pool.rs rename to src/old_layer/pool.rs diff --git a/src/layer/reshape.rs b/src/old_layer/reshape.rs similarity index 100% rename from src/layer/reshape.rs rename to src/old_layer/reshape.rs diff --git a/src/layer/softmax.rs b/src/old_layer/softmax.rs similarity index 100% rename from src/layer/softmax.rs rename to src/old_layer/softmax.rs diff --git a/src/train.rs b/src/train.rs index 9d26ac7..78d5e63 100644 --- a/src/train.rs +++ b/src/train.rs @@ -5,26 +5,20 @@ use crate::{ network::{sequential::NeuraSequential, NeuraTrainableNetwork}, }; -pub trait NeuraGradientSolver { - fn get_gradient( +pub trait NeuraGradientSolver> { + fn get_gradient( &self, - trainable: &NeuraSequential, - input: &Layer::Input, + trainable: &Trainable, + input: &Input, target: &Target, - ) -> as NeuraTrainableNetwork>::Delta - where - NeuraSequential: - NeuraTrainableNetwork; + ) -> Trainable::Delta; - fn score( + fn score( &self, - trainable: &NeuraSequential, - input: &Layer::Input, + trainable: &Trainable, + input: &Input, target: &Target, - ) -> f64 - where - NeuraSequential: - NeuraTrainableNetwork; + ) -> f64; } #[non_exhaustive] @@ -38,32 +32,24 @@ impl NeuraBackprop { } } -impl> + Clone> - NeuraGradientSolver, Loss::Target> for NeuraBackprop +impl, Loss: NeuraLoss + Clone> + NeuraGradientSolver for NeuraBackprop { - fn get_gradient( + fn get_gradient( &self, - trainable: &NeuraSequential, - input: &Layer::Input, - target: &Loss::Target, - ) -> as NeuraTrainableNetwork>::Delta - where - NeuraSequential: - NeuraTrainableNetwork>, - { + trainable: &Trainable, + input: &Input, + target: &Target, + ) -> Trainable::Delta { trainable.backpropagate(input, target, self.loss.clone()).1 } - fn score( + fn score( &self, - trainable: &NeuraSequential, - input: &Layer::Input, - target: &Loss::Target, - ) -> f64 - where - NeuraSequential: - NeuraTrainableNetwork>, - { + trainable: &Trainable, + input: &Input, + target: &Target, + ) -> f64 { let output = trainable.eval(&input); self.loss.eval(target, &output) } @@ -137,41 +123,32 @@ impl NeuraBatchedTrainer { } pub fn train< - Output, + Input: Clone, Target: Clone, - GradientSolver: NeuraGradientSolver, - Layer: NeuraLayer, - ChildNetwork, - Inputs: IntoIterator, + Network: NeuraTrainableNetwork, + GradientSolver: NeuraGradientSolver, + Inputs: IntoIterator, >( &self, gradient_solver: GradientSolver, - network: &mut NeuraSequential, + network: &mut Network, inputs: Inputs, - test_inputs: &[(Layer::Input, Target)], - ) where - NeuraSequential: - NeuraTrainableNetwork, - Layer::Input: Clone, - { + test_inputs: &[(Input, Target)], + ) { let mut iter = inputs.into_iter(); let factor = -self.learning_rate / (self.batch_size as f64); let momentum_factor = self.learning_momentum / self.learning_rate; let reg_factor = -self.learning_rate; // Contains `momentum_factor * factor * gradient_sum_previous_iter` - let mut previous_gradient_sum = - Box::< as NeuraTrainableNetwork>::Delta>::zero(); + let mut previous_gradient_sum = network.default_gradient(); 'd: for iteration in 0..self.iterations { - let mut gradient_sum = Box::< - as NeuraTrainableNetwork>::Delta, - >::zero(); - network.prepare_epoch(); + let mut gradient_sum = network.default_gradient(); + network.prepare(true); for _ in 0..self.batch_size { if let Some((input, target)) = iter.next() { - let gradient = - Box::new(gradient_solver.get_gradient(&network, &input, &target)); + let gradient = gradient_solver.get_gradient(&network, &input, &target); gradient_sum.add_assign(&gradient); } else { break 'd; @@ -194,7 +171,7 @@ impl NeuraBatchedTrainer { } if self.log_iterations > 0 && (iteration + 1) % self.log_iterations == 0 { - network.cleanup(); + network.prepare(false); let mut loss_sum = 0.0; for (input, target) in test_inputs { loss_sum += gradient_solver.score(&network, input, target); @@ -204,12 +181,14 @@ impl NeuraBatchedTrainer { } } - network.cleanup(); + network.prepare(false); } } #[cfg(test)] mod test { + use nalgebra::{DMatrix, dmatrix, dvector}; + use super::*; use crate::{ assert_approx, @@ -224,19 +203,19 @@ mod test { for wa in [0.0, 0.25, 0.5, 1.0] { for wb in [0.0, 0.25, 0.5, 1.0] { let network = NeuraSequential::new( - NeuraDenseLayer::new([[wa, wb]].into(), [0.0].into(), Linear, NeuraL0), + NeuraDenseLayer::new(dmatrix![wa, wb], dvector![0.0], Linear, NeuraL0), (), ); - let gradient = NeuraBackprop::new(Euclidean).get_gradient( + let (gradient, _) = NeuraBackprop::new(Euclidean).get_gradient( &network, - &[1.0, 1.0].into(), - &[0.0].into(), + &dvector![1.0, 1.0], + &dvector![0.0], ); let expected = wa + wb; - assert!((gradient.0[0][0] - expected) < 0.001); - assert!((gradient.0[0][1] - expected) < 0.001); + assert!((gradient.0[(0, 0)] - expected) < 0.001); + assert!((gradient.0[(0, 1)] - expected) < 0.001); } } } @@ -247,42 +226,42 @@ mod test { // Test that we get the same values as https://hmkcode.com/ai/backpropagation-step-by-step/ let network = neura_sequential![ NeuraDenseLayer::new( - [[0.11, 0.21], [0.12, 0.08]].into(), - [0.0; 2].into(), + dmatrix![0.11, 0.21; 0.12, 0.08], + dvector![0.0, 0.0], Linear, NeuraL0 ), - NeuraDenseLayer::new([[0.14, 0.15]].into(), [0.0].into(), Linear, NeuraL0) + NeuraDenseLayer::new(dmatrix![0.14, 0.15], dvector![0.0], Linear, NeuraL0) ]; - let input = [2.0, 3.0]; - let target = [1.0]; + let input = dvector![2.0, 3.0]; + let target = dvector![1.0]; - let intermediary = network.clone().trim_tail().eval(&input.into()); + let intermediary = network.clone().trim_tail().eval(&input); assert_approx!(0.85, intermediary[0], EPSILON); assert_approx!(0.48, intermediary[1], EPSILON); - assert_approx!(0.191, network.eval(&input.into())[0], EPSILON); + assert_approx!(0.191, network.eval(&input)[0], EPSILON); assert_approx!( 0.327, - Euclidean.eval(&target.into(), &network.eval(&input.into())), + Euclidean.eval(&target, &network.eval(&input)), 0.001 ); - let delta = network.eval(&input.into())[0] - target[0]; + let delta = network.eval(&input)[0] - target[0]; let (gradient_first, gradient_second) = - NeuraBackprop::new(Euclidean).get_gradient(&network, &input.into(), &target.into()); + NeuraBackprop::new(Euclidean).get_gradient(&network, &input, &target); let gradient_first = gradient_first.0; - let gradient_second = gradient_second.0[0]; + let gradient_second = gradient_second.0.0; assert_approx!(gradient_second[0], intermediary[0] * delta, EPSILON); assert_approx!(gradient_second[1], intermediary[1] * delta, EPSILON); - assert_approx!(gradient_first[0][0], input[0] * delta * 0.14, EPSILON); - assert_approx!(gradient_first[0][1], input[1] * delta * 0.14, EPSILON); + assert_approx!(gradient_first[(0, 0)], input[0] * delta * 0.14, EPSILON); + assert_approx!(gradient_first[(0, 1)], input[1] * delta * 0.14, EPSILON); - assert_approx!(gradient_first[1][0], input[0] * delta * 0.15, EPSILON); - assert_approx!(gradient_first[1][1], input[1] * delta * 0.15, EPSILON); + assert_approx!(gradient_first[(1, 0)], input[0] * delta * 0.15, EPSILON); + assert_approx!(gradient_first[(1, 1)], input[1] * delta * 0.15, EPSILON); } }