From 920bca4a48a45ae10ccfca513e69a7f039cfa047 Mon Sep 17 00:00:00 2001 From: Adrien Burgun Date: Sun, 16 Apr 2023 00:57:01 +0200 Subject: [PATCH] :truck: Move files and traits around, extract stuff out of train.rs --- examples/bivariate.rs | 2 +- examples/xor.rs | 2 +- src/layer/dense.rs | 3 +- src/layer/dropout.rs | 4 +- src/layer/lock.rs | 39 ++++++++ src/layer/mod.rs | 45 +++++++++ src/layer/one_hot.rs | 4 +- src/layer/softmax.rs | 4 +- src/lib.rs | 4 +- src/network/mod.rs | 26 ++++++ src/{network.rs => network/sequential.rs} | 79 ++++++++-------- src/train.rs | 109 ++++++---------------- 12 files changed, 190 insertions(+), 131 deletions(-) create mode 100644 src/layer/lock.rs create mode 100644 src/network/mod.rs rename src/{network.rs => network/sequential.rs} (72%) diff --git a/examples/bivariate.rs b/examples/bivariate.rs index 905ce37..4b6e0bf 100644 --- a/examples/bivariate.rs +++ b/examples/bivariate.rs @@ -11,7 +11,7 @@ use neuramethyst::prelude::*; use rand::Rng; fn main() { - let mut network = neura_network![ + let mut network = neura_sequential![ neura_layer!("dense", 2, 8; Relu, NeuraL1(0.001)), neura_layer!("dropout", 0.25), neura_layer!("dense", 2; Linear, NeuraL1(0.001)), diff --git a/examples/xor.rs b/examples/xor.rs index a0a4572..b1e35d6 100644 --- a/examples/xor.rs +++ b/examples/xor.rs @@ -5,7 +5,7 @@ use neuramethyst::derivable::loss::Euclidean; use neuramethyst::prelude::*; fn main() { - let mut network = neura_network![ + let mut network = neura_sequential![ neura_layer!("dense", 2, 4; Relu), neura_layer!("dense", 3; Relu), neura_layer!("dense", 1; Relu) diff --git a/src/layer/dense.rs b/src/layer/dense.rs index 18e4f26..e1be4f0 100644 --- a/src/layer/dense.rs +++ b/src/layer/dense.rs @@ -1,8 +1,7 @@ -use super::NeuraLayer; +use super::{NeuraLayer, NeuraTrainableLayer}; use crate::{ algebra::NeuraVectorSpace, derivable::NeuraDerivable, - train::NeuraTrainableLayer, utils::{multiply_matrix_transpose_vector, multiply_matrix_vector, reverse_dot_product}, }; diff --git a/src/layer/dropout.rs b/src/layer/dropout.rs index 5ce6479..442afa7 100644 --- a/src/layer/dropout.rs +++ b/src/layer/dropout.rs @@ -1,8 +1,6 @@ use rand::Rng; -use crate::train::NeuraTrainableLayer; - -use super::NeuraLayer; +use super::{NeuraLayer, NeuraTrainableLayer}; #[derive(Clone, Debug)] pub struct NeuraDropoutLayer { diff --git a/src/layer/lock.rs b/src/layer/lock.rs new file mode 100644 index 0000000..2b7dbe0 --- /dev/null +++ b/src/layer/lock.rs @@ -0,0 +1,39 @@ +use super::{NeuraLayer, NeuraTrainableLayer}; + +/// Represents a layer that has been locked: +/// it won't be modified during training and its weight won't be stored +#[derive(Clone, Debug, PartialEq)] +pub struct NeuraLockLayer(pub L); + +impl NeuraLayer for NeuraLockLayer { + type Input = L::Input; + + type Output = L::Output; + + fn eval(&self, input: &Self::Input) -> Self::Output { + self.0.eval(input) + } +} + +impl NeuraTrainableLayer for NeuraLockLayer { + type Delta = (); + + #[inline(always)] + fn backpropagate( + &self, + input: &Self::Input, + epsilon: Self::Output, + ) -> (Self::Input, Self::Delta) { + (self.0.backpropagate(input, epsilon).0, ()) + } + + #[inline(always)] + fn regularize(&self) -> Self::Delta { + () + } + + #[inline(always)] + fn apply_gradient(&mut self, _gradient: &Self::Delta) { + // Noop + } +} diff --git a/src/layer/mod.rs b/src/layer/mod.rs index d4153ed..e89857b 100644 --- a/src/layer/mod.rs +++ b/src/layer/mod.rs @@ -10,6 +10,11 @@ pub use softmax::NeuraSoftmaxLayer; mod one_hot; pub use one_hot::NeuraOneHotLayer; +mod lock; +pub use lock::NeuraLockLayer; + +use crate::algebra::NeuraVectorSpace; + pub trait NeuraLayer { type Input; type Output; @@ -17,6 +22,42 @@ pub trait NeuraLayer { fn eval(&self, input: &Self::Input) -> Self::Output; } +pub trait NeuraTrainableLayer: NeuraLayer { + /// The representation of the layer gradient as a vector space + type Delta: NeuraVectorSpace; + + /// Computes the backpropagation term and the derivative of the internal weights, + /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer. + /// + /// Note: we introduce the term `epsilon`, which together with the activation of the current function can be used to compute `delta_l`: + /// ```no_rust + /// f_l'(a_l) * epsilon_l = delta_l + /// ``` + /// + /// The function should then return a pair `(epsilon_{l-1}, δW_l)`, + /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`. + /// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers. + fn backpropagate( + &self, + input: &Self::Input, + epsilon: Self::Output, + ) -> (Self::Input, Self::Delta); + + /// Computes the regularization + fn regularize(&self) -> Self::Delta; + + /// Applies `δW_l` to the weights of the layer + fn apply_gradient(&mut self, gradient: &Self::Delta); + + /// Called before an iteration begins, to allow the layer to set itself up for training. + #[inline(always)] + fn prepare_epoch(&mut self) {} + + /// Called at the end of training, to allow the layer to clean itself up + #[inline(always)] + fn cleanup(&mut self) {} +} + #[macro_export] macro_rules! neura_layer { ( "dense", $( $shape:expr ),*; $activation:expr ) => { @@ -53,4 +94,8 @@ macro_rules! neura_layer { ( "one_hot" ) => { $crate::layer::NeuraOneHotLayer as $crate::layer::NeuraOneHotLayer<2, _> }; + + ( "lock", $layer:expr ) => { + $crate::layer::NeuraLockLayer($layer) + }; } diff --git a/src/layer/one_hot.rs b/src/layer/one_hot.rs index 39335b6..029f9a9 100644 --- a/src/layer/one_hot.rs +++ b/src/layer/one_hot.rs @@ -1,6 +1,4 @@ -use crate::train::NeuraTrainableLayer; - -use super::NeuraLayer; +use super::{NeuraLayer, NeuraTrainableLayer}; /// A special layer that allows you to split a vector into one-hot vectors #[derive(Debug, Clone, PartialEq)] diff --git a/src/layer/softmax.rs b/src/layer/softmax.rs index 8160e50..ebbc22b 100644 --- a/src/layer/softmax.rs +++ b/src/layer/softmax.rs @@ -1,6 +1,6 @@ -use crate::{train::NeuraTrainableLayer, utils::multiply_vectors_pointwise}; +use crate::utils::multiply_vectors_pointwise; -use super::NeuraLayer; +use super::{NeuraLayer, NeuraTrainableLayer}; #[non_exhaustive] #[derive(Clone, Debug)] diff --git a/src/lib.rs b/src/lib.rs index 012bb19..61ee52a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,11 +15,11 @@ pub use utils::{argmax, one_hot}; pub mod prelude { // Macros - pub use crate::{neura_layer, neura_network}; + pub use crate::{neura_layer, neura_sequential}; // Structs and traits pub use crate::layer::{NeuraDenseLayer, NeuraDropoutLayer, NeuraLayer}; - pub use crate::network::{NeuraNetwork, NeuraNetworkTail}; + pub use crate::network::sequential::{NeuraSequential, NeuraSequentialTail}; pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer}; pub use crate::utils::cycle_shuffling; } diff --git a/src/network/mod.rs b/src/network/mod.rs new file mode 100644 index 0000000..68b953f --- /dev/null +++ b/src/network/mod.rs @@ -0,0 +1,26 @@ +use crate::{algebra::NeuraVectorSpace, derivable::NeuraLoss, layer::NeuraLayer}; + +pub mod sequential; + +pub trait NeuraTrainableNetwork: NeuraLayer { + type Delta: NeuraVectorSpace; + + fn apply_gradient(&mut self, gradient: &Self::Delta); + + /// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information. + fn backpropagate>( + &self, + input: &Self::Input, + target: &Loss::Target, + loss: Loss, + ) -> (Self::Input, Self::Delta); + + /// Should return the regularization gradient + fn regularize(&self) -> Self::Delta; + + /// Called before an iteration begins, to allow the network to set itself up for training. + fn prepare_epoch(&mut self); + + /// Called at the end of training, to allow the network to clean itself up + fn cleanup(&mut self); +} diff --git a/src/network.rs b/src/network/sequential.rs similarity index 72% rename from src/network.rs rename to src/network/sequential.rs index de8b4f6..370a75c 100644 --- a/src/network.rs +++ b/src/network/sequential.rs @@ -1,16 +1,26 @@ use crate::{ derivable::NeuraLoss, - layer::NeuraLayer, - train::{NeuraTrainable, NeuraTrainableLayer}, + layer::{NeuraLayer, NeuraTrainableLayer}, }; +use super::NeuraTrainableNetwork; + #[derive(Clone, Debug)] -pub struct NeuraNetwork { +pub struct NeuraSequential { pub layer: Layer, pub child_network: ChildNetwork, } -impl NeuraNetwork { +/// Operations on the tail end of a sequential network +pub trait NeuraSequentialTail { + type TailTrimmed; + type TailPushed; + + fn trim_tail(self) -> Self::TailTrimmed; + fn push_tail(self, layer: T) -> Self::TailPushed; +} + +impl NeuraSequential { pub fn new(layer: Layer, child_network: ChildNetwork) -> Self { Self { layer, @@ -29,36 +39,27 @@ impl NeuraNetwork { self.child_network } - pub fn push_front(self, layer: T) -> NeuraNetwork { - NeuraNetwork { + pub fn push_front(self, layer: T) -> NeuraSequential { + NeuraSequential { layer: layer, child_network: self, } } } -/// Operations on the tail end of the network -pub trait NeuraNetworkTail { - type TailTrimmed; - type TailPushed; - - fn trim_tail(self) -> Self::TailTrimmed; - fn push_tail(self, layer: T) -> Self::TailPushed; -} - // Trimming the last layer returns an empty network -impl NeuraNetworkTail for NeuraNetwork { +impl NeuraSequentialTail for NeuraSequential { type TailTrimmed = (); - type TailPushed = NeuraNetwork>; + type TailPushed = NeuraSequential>; fn trim_tail(self) -> Self::TailTrimmed { () } fn push_tail(self, layer: T) -> Self::TailPushed { - NeuraNetwork { + NeuraSequential { layer: self.layer, - child_network: NeuraNetwork { + child_network: NeuraSequential { layer, child_network: (), }, @@ -67,29 +68,29 @@ impl NeuraNetworkTail for NeuraNetwork { } // Trimming another layer returns a network which calls trim recursively -impl NeuraNetworkTail - for NeuraNetwork +impl NeuraSequentialTail + for NeuraSequential { - type TailTrimmed = NeuraNetwork::TailTrimmed>; + type TailTrimmed = NeuraSequential::TailTrimmed>; type TailPushed = - NeuraNetwork::TailPushed>; + NeuraSequential::TailPushed>; fn trim_tail(self) -> Self::TailTrimmed { - NeuraNetwork { + NeuraSequential { layer: self.layer, child_network: self.child_network.trim_tail(), } } fn push_tail(self, layer: T) -> Self::TailPushed { - NeuraNetwork { + NeuraSequential { layer: self.layer, child_network: self.child_network.push_tail(layer), } } } -impl NeuraLayer for NeuraNetwork { +impl NeuraLayer for NeuraSequential { type Input = Layer::Input; type Output = Layer::Output; @@ -99,7 +100,7 @@ impl NeuraLayer for NeuraNetwork { } impl> NeuraLayer - for NeuraNetwork + for NeuraSequential { type Input = Layer::Input; @@ -110,7 +111,7 @@ impl> NeuraLa } } -impl NeuraTrainable for NeuraNetwork { +impl NeuraTrainableNetwork for NeuraSequential { type Delta = Layer::Delta; fn apply_gradient(&mut self, gradient: &Self::Delta) { @@ -141,8 +142,8 @@ impl NeuraTrainable for NeuraNetwork { } } -impl> NeuraTrainable - for NeuraNetwork +impl> + NeuraTrainableNetwork for NeuraSequential { type Delta = (Layer::Delta, ChildNetwork::Delta); @@ -182,7 +183,7 @@ impl From for NeuraNetwork { +impl From for NeuraSequential { fn from(layer: Layer) -> Self { Self { layer, @@ -191,18 +192,20 @@ impl From for NeuraNetwork { } } +/// An utility to recursively create a NeuraSequential network, while writing it in a declarative and linear fashion. +/// Note that this can quickly create big and unwieldly types. #[macro_export] -macro_rules! neura_network { +macro_rules! neura_sequential { [] => { () }; [ $layer:expr $(,)? ] => { - $crate::network::NeuraNetwork::from($layer) + $crate::network::sequential::NeuraSequential::from($layer) }; [ $first:expr, $($rest:expr),+ $(,)? ] => { - $crate::network::NeuraNetwork::new_match_output($first, neura_network![$($rest),+]) + $crate::network::sequential::NeuraSequential::new_match_output($first, neura_sequential![$($rest),+]) }; } @@ -218,22 +221,22 @@ mod test { fn test_neura_network_macro() { let mut rng = rand::thread_rng(); - let _ = neura_network![ + let _ = neura_sequential![ NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>, NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 12>, NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 2> ]; - let _ = neura_network![ + let _ = neura_sequential![ NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>, ]; - let _ = neura_network![ + let _ = neura_sequential![ NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>, NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 12>, ]; - let _ = neura_network![ + let _ = neura_sequential![ neura_layer!("dense", 8, 16; Relu), neura_layer!("dense", 12; Relu), neura_layer!("dense", 2; Relu) diff --git a/src/train.rs b/src/train.rs index 1652452..875ac0a 100644 --- a/src/train.rs +++ b/src/train.rs @@ -1,84 +1,30 @@ use crate::{ - algebra::NeuraVectorSpace, derivable::NeuraLoss, layer::NeuraLayer, network::NeuraNetwork, + algebra::NeuraVectorSpace, + derivable::NeuraLoss, + layer::NeuraLayer, + network::{sequential::NeuraSequential, NeuraTrainableNetwork}, }; -// TODO: move this trait to layer/mod.rs -pub trait NeuraTrainableLayer: NeuraLayer { - type Delta: NeuraVectorSpace; - - /// Computes the backpropagation term and the derivative of the internal weights, - /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer. - /// - /// Note: we introduce the term `epsilon`, which together with the activation of the current function can be used to compute `delta_l`: - /// ```no_rust - /// f_l'(a_l) * epsilon_l = delta_l - /// ``` - /// - /// The function should then return a pair `(epsilon_{l-1}, δW_l)`, - /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`. - /// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers. - fn backpropagate( - &self, - input: &Self::Input, - epsilon: Self::Output, - ) -> (Self::Input, Self::Delta); - - /// Computes the regularization - fn regularize(&self) -> Self::Delta; - - /// Applies `δW_l` to the weights of the layer - fn apply_gradient(&mut self, gradient: &Self::Delta); - - /// Called before an iteration begins, to allow the layer to set itself up for training. - #[inline(always)] - fn prepare_epoch(&mut self) {} - - /// Called at the end of training, to allow the layer to clean itself up - #[inline(always)] - fn cleanup(&mut self) {} -} - -pub trait NeuraTrainable: NeuraLayer { - type Delta: NeuraVectorSpace; - - fn apply_gradient(&mut self, gradient: &Self::Delta); - - /// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information. - fn backpropagate>( - &self, - input: &Self::Input, - target: &Loss::Target, - loss: Loss, - ) -> (Self::Input, Self::Delta); - - /// Should return the regularization gradient - fn regularize(&self) -> Self::Delta; - - /// Called before an iteration begins, to allow the network to set itself up for training. - fn prepare_epoch(&mut self); - - /// Called at the end of training, to allow the network to clean itself up - fn cleanup(&mut self); -} - pub trait NeuraGradientSolver { fn get_gradient( &self, - trainable: &NeuraNetwork, + trainable: &NeuraSequential, input: &Layer::Input, target: &Target, - ) -> as NeuraTrainable>::Delta + ) -> as NeuraTrainableNetwork>::Delta where - NeuraNetwork: NeuraTrainable; + NeuraSequential: + NeuraTrainableNetwork; fn score( &self, - trainable: &NeuraNetwork, + trainable: &NeuraSequential, input: &Layer::Input, target: &Target, ) -> f64 where - NeuraNetwork: NeuraTrainable; + NeuraSequential: + NeuraTrainableNetwork; } #[non_exhaustive] @@ -97,24 +43,26 @@ impl + Clone> { fn get_gradient( &self, - trainable: &NeuraNetwork, + trainable: &NeuraSequential, input: &Layer::Input, target: &Loss::Target, - ) -> as NeuraTrainable>::Delta + ) -> as NeuraTrainableNetwork>::Delta where - NeuraNetwork: NeuraTrainable, + NeuraSequential: + NeuraTrainableNetwork, { trainable.backpropagate(input, target, self.loss.clone()).1 } fn score( &self, - trainable: &NeuraNetwork, + trainable: &NeuraSequential, input: &Layer::Input, target: &Loss::Target, ) -> f64 where - NeuraNetwork: NeuraTrainable, + NeuraSequential: + NeuraTrainableNetwork, { let output = trainable.eval(&input); self.loss.eval(target, &output) @@ -183,11 +131,12 @@ impl NeuraBatchedTrainer { >( &self, gradient_solver: GradientSolver, - network: &mut NeuraNetwork, + network: &mut NeuraSequential, inputs: Inputs, test_inputs: &[(Layer::Input, Target)], ) where - NeuraNetwork: NeuraTrainable, + NeuraSequential: + NeuraTrainableNetwork, Layer::Input: Clone, { let mut iter = inputs.into_iter(); @@ -197,10 +146,10 @@ impl NeuraBatchedTrainer { // Contains `momentum_factor * factor * gradient_sum_previous_iter` let mut previous_gradient_sum = - as NeuraTrainable>::Delta::zero(); + as NeuraTrainableNetwork>::Delta::zero(); 'd: for iteration in 0..self.iterations { let mut gradient_sum = - as NeuraTrainable>::Delta::zero(); + as NeuraTrainableNetwork>::Delta::zero(); network.prepare_epoch(); for _ in 0..self.batch_size { @@ -249,16 +198,18 @@ mod test { assert_approx, derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0}, layer::NeuraDenseLayer, - network::NeuraNetworkTail, - neura_network, + network::sequential::NeuraSequentialTail, + neura_sequential, }; #[test] fn test_backpropagation_simple() { for wa in [0.0, 0.25, 0.5, 1.0] { for wb in [0.0, 0.25, 0.5, 1.0] { - let network = - NeuraNetwork::new(NeuraDenseLayer::new([[wa, wb]], [0.0], Linear, NeuraL0), ()); + let network = NeuraSequential::new( + NeuraDenseLayer::new([[wa, wb]], [0.0], Linear, NeuraL0), + (), + ); let gradient = NeuraBackprop::new(Euclidean).get_gradient(&network, &[1.0, 1.0], &[0.0]); @@ -274,7 +225,7 @@ mod test { fn test_backpropagation_complex() { const EPSILON: f64 = 0.00001; // Test that we get the same values as https://hmkcode.com/ai/backpropagation-step-by-step/ - let network = neura_network![ + let network = neura_sequential![ NeuraDenseLayer::new([[0.11, 0.21], [0.12, 0.08]], [0.0; 2], Linear, NeuraL0), NeuraDenseLayer::new([[0.14, 0.15]], [0.0], Linear, NeuraL0) ];