From 7a6921a1c18272b01ba5b6e5d24c8caa1fbcd9cc Mon Sep 17 00:00:00 2001 From: Adrien Burgun Date: Wed, 12 Apr 2023 11:46:55 +0200 Subject: [PATCH] :sparkles: :fire: Semi-working training, although it seems to be only want to converge to zero --- examples/xor.rs | 42 ++++++++++++ src/algebra.rs | 45 ++++++++---- src/derivable/activation.rs | 29 ++++++++ src/derivable/loss.rs | 22 ++++-- src/derivable/mod.rs | 8 +-- src/layer/dense.rs | 10 ++- src/lib.rs | 12 +++- src/network.rs | 43 ++++++++++-- src/train.rs | 133 ++++++++++++++++++++++++++++++------ src/utils.rs | 2 +- 10 files changed, 289 insertions(+), 57 deletions(-) create mode 100644 examples/xor.rs diff --git a/examples/xor.rs b/examples/xor.rs new file mode 100644 index 0000000..9d19aa0 --- /dev/null +++ b/examples/xor.rs @@ -0,0 +1,42 @@ +#![feature(generic_arg_infer)] + +use neuramethyst::prelude::*; +use neuramethyst::derivable::activation::{Relu, Tanh}; +use neuramethyst::derivable::loss::Euclidean; + +fn main() { + let mut network = neura_network![ + neura_layer!("dense", Tanh, 2, 2), + neura_layer!("dense", Tanh, 3), + neura_layer!("dense", Relu, 1) + ]; + + let inputs = [ + ([0.0, 0.0], [0.0]), + ([0.0, 1.0], [1.0]), + ([1.0, 0.0], [1.0]), + ([1.0, 1.0], [0.0]) + ]; + + // println!("{:#?}", network); + + for (input, target) in inputs { + println!("Input: {:?}, target: {}, actual: {}", &input, target[0], network.eval(&input)[0]); + } + + train_batched( + &mut network, + inputs.clone(), + &inputs, + NeuraBackprop::new(Euclidean), + 0.01, + 1, + 25 + ); + + // println!("{:#?}", network); + + for (input, target) in inputs { + println!("Input: {:?}, target: {}, actual: {}", &input, target[0], network.eval(&input)[0]); + } +} diff --git a/src/algebra.rs b/src/algebra.rs index 9f7df6d..8ed39ab 100644 --- a/src/algebra.rs +++ b/src/algebra.rs @@ -1,33 +1,46 @@ /// An extension of `std::ops::AddAssign` and `std::ops::Default` -pub trait NeuraAddAssign { +pub trait NeuraVectorSpace { fn add_assign(&mut self, other: &Self); - fn default() -> Self; + fn mul_assign(&mut self, by: f64); + + fn zero() -> Self; } -impl NeuraAddAssign for (Left, Right) { +impl NeuraVectorSpace for (Left, Right) { fn add_assign(&mut self, other: &Self) { - NeuraAddAssign::add_assign(&mut self.0, &other.0); - NeuraAddAssign::add_assign(&mut self.1, &other.1); + NeuraVectorSpace::add_assign(&mut self.0, &other.0); + NeuraVectorSpace::add_assign(&mut self.1, &other.1); + } + + fn mul_assign(&mut self, by: f64) { + NeuraVectorSpace::mul_assign(&mut self.0, by); + NeuraVectorSpace::mul_assign(&mut self.1, by); } - fn default() -> Self { - (Left::default(), Right::default()) + fn zero() -> Self { + (Left::zero(), Right::zero()) } } -impl NeuraAddAssign for [T; N] { +impl NeuraVectorSpace for [T; N] { fn add_assign(&mut self, other: &[T; N]) { for i in 0..N { - NeuraAddAssign::add_assign(&mut self[i], &other[i]); + NeuraVectorSpace::add_assign(&mut self[i], &other[i]); + } + } + + fn mul_assign(&mut self, by: f64) { + for i in 0..N { + NeuraVectorSpace::mul_assign(&mut self[i], by); } } - fn default() -> Self { + fn zero() -> Self { let mut res: Vec = Vec::with_capacity(N); for _ in 0..N { - res.push(T::default()); + res.push(T::zero()); } res.try_into().unwrap_or_else(|_| { @@ -39,16 +52,20 @@ impl NeuraAddAssign for [T; N] { macro_rules! base { ( $type:ty ) => { - impl NeuraAddAssign for $type { + impl NeuraVectorSpace for $type { fn add_assign(&mut self, other: &Self) { std::ops::AddAssign::add_assign(self, other); } - fn default() -> Self { + fn mul_assign(&mut self, other: f64) { + std::ops::MulAssign::mul_assign(self, other as $type); + } + + fn zero() -> Self { ::default() } } - } + }; } base!(f32); diff --git a/src/derivable/activation.rs b/src/derivable/activation.rs index 0a3cd1c..0bac5ee 100644 --- a/src/derivable/activation.rs +++ b/src/derivable/activation.rs @@ -34,3 +34,32 @@ impl NeuraDerivable for Relu { } } } + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Tanh; + +impl NeuraDerivable for Tanh { + #[inline(always)] + fn eval(&self, input: f64) -> f64 { + 0.5 * input.tanh() + 0.5 + } + + #[inline(always)] + fn derivate(&self, at: f64) -> f64 { + let tanh = at.tanh(); + 0.5 * (1.0 - tanh * tanh) + } +} + +impl NeuraDerivable for Tanh { + #[inline(always)] + fn eval(&self, input: f32) -> f32 { + 0.5 * input.tanh() + 0.5 + } + + #[inline(always)] + fn derivate(&self, at: f32) -> f32 { + let tanh = at.tanh(); + 0.5 * (1.0 - tanh * tanh) + } +} diff --git a/src/derivable/loss.rs b/src/derivable/loss.rs index 3e35dcd..a15e4d3 100644 --- a/src/derivable/loss.rs +++ b/src/derivable/loss.rs @@ -1,12 +1,14 @@ use super::NeuraLoss; #[derive(Clone, Copy, Debug, PartialEq)] -pub struct Euclidean; -impl NeuraLoss<[f64; N]> for Euclidean { - type Out = f64; +pub struct Euclidean; + +impl NeuraLoss for Euclidean { + type Input = [f64; N]; type Target = [f64; N]; - fn eval(&self, target: [f64; N], actual: [f64; N]) -> f64 { + #[inline] + fn eval(&self, target: &[f64; N], actual: &[f64; N]) -> f64 { let mut sum_squared = 0.0; for i in 0..N { @@ -16,7 +18,15 @@ impl NeuraLoss<[f64; N]> for Euclidean { sum_squared * 0.5 } - fn nabla(&self, target: [f64; N], actual: [f64; N]) -> [f64; N] { - todo!() + #[inline] + fn nabla(&self, target: &[f64; N], actual: &[f64; N]) -> [f64; N] { + let mut res = [0.0; N]; + + // ∂E(y)/∂yᵢ = yᵢ - yᵢ' + for i in 0..N { + res[i] = actual[i] - target[i]; + } + + res } } diff --git a/src/derivable/mod.rs b/src/derivable/mod.rs index 5c3db62..9888423 100644 --- a/src/derivable/mod.rs +++ b/src/derivable/mod.rs @@ -8,13 +8,13 @@ pub trait NeuraDerivable { fn derivate(&self, at: F) -> F; } -pub trait NeuraLoss { - type Out; +pub trait NeuraLoss { + type Input; type Target; - fn eval(&self, target: Self::Target, actual: F) -> Self::Out; + fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64; /// Should return the gradient of the loss function according to `actual` /// ($\nabla_{\texttt{actual}} \texttt{self.eval}(\texttt{target}, \texttt{actual})$). - fn nabla(&self, target: Self::Target, actual: F) -> F; + fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input; } diff --git a/src/layer/dense.rs b/src/layer/dense.rs index 337bdc1..2929f22 100644 --- a/src/layer/dense.rs +++ b/src/layer/dense.rs @@ -1,7 +1,8 @@ use super::NeuraLayer; -use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer}; +use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer, algebra::NeuraVectorSpace}; use rand::Rng; +#[derive(Clone, Debug)] pub struct NeuraDenseLayer< Act: NeuraDerivable, const INPUT_LEN: usize, @@ -34,7 +35,7 @@ impl, const INPUT_LEN: usize, const OUTPUT_LEN: usize> for i in 0..OUTPUT_LEN { for j in 0..INPUT_LEN { - weights[i][j] = rng.gen::() * multiplier; + weights[i][j] = rng.gen_range(-multiplier..multiplier); } } @@ -88,6 +89,11 @@ impl, const INPUT_LEN: usize, const OUTPUT_LEN: usize> (new_epsilon, (weights_gradient, bias_gradient)) } + + fn apply_gradient(&mut self, gradient: &Self::Delta) { + NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0); + NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1); + } } #[cfg(test)] diff --git a/src/lib.rs b/src/lib.rs index 0a3fc5d..d17f734 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,19 @@ #![feature(generic_arg_infer)] +pub mod algebra; pub mod derivable; pub mod layer; pub mod network; pub mod train; -pub mod algebra; mod utils; + +pub mod prelude { + // Macros + pub use crate::{neura_network, neura_layer}; + + // Structs and traits + pub use super::network::{NeuraNetwork}; + pub use super::layer::{NeuraLayer, NeuraDenseLayer}; + pub use super::train::{NeuraBackprop, train_batched}; +} diff --git a/src/network.rs b/src/network.rs index 0fddb33..29ac603 100644 --- a/src/network.rs +++ b/src/network.rs @@ -1,5 +1,10 @@ -use crate::{layer::NeuraLayer, train::{NeuraTrainable, NeuraTrainableLayer}, derivable::NeuraLoss}; +use crate::{ + derivable::NeuraLoss, + layer::NeuraLayer, + train::{NeuraTrainable, NeuraTrainableLayer}, +}; +#[derive(Clone, Debug)] pub struct NeuraNetwork { layer: Layer, child_network: ChildNetwork, @@ -62,20 +67,44 @@ impl> NeuraLa impl NeuraTrainable for NeuraNetwork { type Delta = Layer::Delta; - fn backpropagate>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) { + fn apply_gradient(&mut self, gradient: &Self::Delta) { + self.layer.apply_gradient(gradient); + } + + fn backpropagate>( + &self, + input: &Self::Input, + target: &Loss::Target, + loss: Loss, + ) -> (Self::Input, Self::Delta) { let final_activation = self.layer.eval(input); - let backprop_epsilon = loss.nabla(target, final_activation); + let backprop_epsilon = loss.nabla(target, &final_activation); self.layer.backpropagate(&input, backprop_epsilon) } } -impl> NeuraTrainable for NeuraNetwork { +impl> NeuraTrainable + for NeuraNetwork +{ type Delta = (Layer::Delta, ChildNetwork::Delta); - fn backpropagate>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) { + fn apply_gradient(&mut self, gradient: &Self::Delta) { + self.layer.apply_gradient(&gradient.0); + self.child_network.apply_gradient(&gradient.1); + } + + fn backpropagate>( + &self, + input: &Self::Input, + target: &Loss::Target, + loss: Loss, + ) -> (Self::Input, Self::Delta) { let next_activation = self.layer.eval(input); - let (backprop_gradient, weights_gradient) = self.child_network.backpropagate(&next_activation, target, loss); - let (backprop_gradient, layer_gradient) = self.layer.backpropagate(input, backprop_gradient); + let (backprop_gradient, weights_gradient) = + self.child_network + .backpropagate(&next_activation, target, loss); + let (backprop_gradient, layer_gradient) = + self.layer.backpropagate(input, backprop_gradient); (backprop_gradient, (layer_gradient, weights_gradient)) } diff --git a/src/train.rs b/src/train.rs index f0c126a..adc23fb 100644 --- a/src/train.rs +++ b/src/train.rs @@ -1,14 +1,13 @@ use crate::{ + // utils::{assign_add_vector, chunked}, + algebra::NeuraVectorSpace, derivable::NeuraLoss, layer::NeuraLayer, network::NeuraNetwork, - // utils::{assign_add_vector, chunked}, - algebra::NeuraAddAssign, }; - pub trait NeuraTrainableLayer: NeuraLayer { - type Delta: NeuraAddAssign; + type Delta: NeuraVectorSpace; /// Computes the backpropagation term and the derivative of the internal weights, /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer. @@ -19,44 +18,134 @@ pub trait NeuraTrainableLayer: NeuraLayer { /// ``` /// /// The function should then return a pair `(epsilon_{l-1}, δW_l)`, - /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)`. - fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta); + /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`. + /// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers. + fn backpropagate( + &self, + input: &Self::Input, + epsilon: Self::Output, + ) -> (Self::Input, Self::Delta); + + /// Applies `δW_l` to the weights of the layer + fn apply_gradient(&mut self, gradient: &Self::Delta); } pub trait NeuraTrainable: NeuraLayer { - type Delta: NeuraAddAssign; + type Delta: NeuraVectorSpace; + + fn apply_gradient(&mut self, gradient: &Self::Delta); - fn backpropagate>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta); + /// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information. + fn backpropagate>( + &self, + input: &Self::Input, + target: &Loss::Target, + loss: Loss, + ) -> (Self::Input, Self::Delta); } -pub trait NeuraTrainer> { +pub trait NeuraTrainer { fn get_gradient( &self, trainable: &NeuraNetwork, input: &Layer::Input, - target: Loss::Target, - loss: Loss, - ) -> as NeuraTrainable>::Delta where - NeuraNetwork: NeuraTrainable - ; + target: &Target, + ) -> as NeuraTrainable>::Delta + where + NeuraNetwork: NeuraTrainable; + + fn score( + &self, + trainable: &NeuraNetwork, + input: &Layer::Input, + target: &Target, + ) -> f64 + where + NeuraNetwork: NeuraTrainable; } #[non_exhaustive] -pub struct NeuraBackprop { - pub epsilon: f64, - pub batch_size: usize, +pub struct NeuraBackprop { + loss: Loss, +} + +impl NeuraBackprop { + pub fn new(loss: Loss) -> Self { + Self { loss } + } } -impl> NeuraTrainer<[f64; N], Loss> for NeuraBackprop { +impl + Clone> NeuraTrainer<[f64; N], Loss::Target> + for NeuraBackprop +{ fn get_gradient( &self, trainable: &NeuraNetwork, input: &Layer::Input, - target: Loss::Target, - loss: Loss, - ) -> as NeuraTrainable>::Delta where + target: &Loss::Target, + ) -> as NeuraTrainable>::Delta + where + NeuraNetwork: NeuraTrainable, + { + trainable.backpropagate(input, target, self.loss.clone()).1 + } + + fn score( + &self, + trainable: &NeuraNetwork, + input: &Layer::Input, + target: &Loss::Target, + ) -> f64 + where NeuraNetwork: NeuraTrainable, { - trainable.backpropagate(input, target, loss).1 + self.loss.eval(target, &trainable.eval(&input)) + } +} + +pub fn train_batched< + Output, + Target, + Trainer: NeuraTrainer, + Layer: NeuraLayer, + ChildNetwork, + Inputs: IntoIterator, +>( + network: &mut NeuraNetwork, + inputs: Inputs, + test_inputs: &[(Layer::Input, Target)], + trainer: Trainer, + learning_rate: f64, + batch_size: usize, + epochs: usize, +) where + NeuraNetwork: NeuraTrainable, + Inputs::IntoIter: Clone, +{ + // TODO: apply shuffling? + let mut iter = inputs.into_iter().cycle(); + let factor = -learning_rate / (batch_size as f64); + + 'd: for epoch in 0..epochs { + let mut gradient_sum = as NeuraTrainable>::Delta::zero(); + + for _ in 0..batch_size { + if let Some((input, target)) = iter.next() { + let gradient = trainer.get_gradient(&network, &input, &target); + gradient_sum.add_assign(&gradient); + } else { + break 'd; + } + } + + gradient_sum.mul_assign(factor); + network.apply_gradient(&gradient_sum); + + let mut loss_sum = 0.0; + for (input, target) in test_inputs { + loss_sum += trainer.score(&network, input, target); + } + loss_sum /= test_inputs.len() as f64; + println!("Epoch {epoch}, Loss: {:.3}", loss_sum); } } diff --git a/src/utils.rs b/src/utils.rs index 94d2bf2..7b63642 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -35,7 +35,7 @@ pub(crate) fn multiply_matrix_transpose_vector( left: &[f64; HEIGHT], - right: &[f64; WIDTH] + right: &[f64; WIDTH], ) -> [[f64; WIDTH]; HEIGHT] { let mut result = [[0.0; WIDTH]; HEIGHT];