diff --git a/Cargo.toml b/Cargo.toml index ec6baa3..8bbb664 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,4 +7,5 @@ edition = "2021" [dependencies] ndarray = "^0.15" +# num-traits = "0.2.15" rand = "^0.8" diff --git a/src/algebra.rs b/src/algebra.rs index dc0d71d..9f7df6d 100644 --- a/src/algebra.rs +++ b/src/algebra.rs @@ -1,6 +1,8 @@ -/// An extension of `std::ops::AddAssign` +/// An extension of `std::ops::AddAssign` and `std::ops::Default` pub trait NeuraAddAssign { fn add_assign(&mut self, other: &Self); + + fn default() -> Self; } impl NeuraAddAssign for (Left, Right) { @@ -8,14 +10,31 @@ impl NeuraAddAssign for (Left, Righ NeuraAddAssign::add_assign(&mut self.0, &other.0); NeuraAddAssign::add_assign(&mut self.1, &other.1); } + + fn default() -> Self { + (Left::default(), Right::default()) + } } -impl NeuraAddAssign for [T; N] { +impl NeuraAddAssign for [T; N] { fn add_assign(&mut self, other: &[T; N]) { for i in 0..N { NeuraAddAssign::add_assign(&mut self[i], &other[i]); } } + + fn default() -> Self { + let mut res: Vec = Vec::with_capacity(N); + + for _ in 0..N { + res.push(T::default()); + } + + res.try_into().unwrap_or_else(|_| { + // TODO: check that this panic is optimized away + unreachable!() + }) + } } macro_rules! base { @@ -24,6 +43,10 @@ macro_rules! base { fn add_assign(&mut self, other: &Self) { std::ops::AddAssign::add_assign(self, other); } + + fn default() -> Self { + ::default() + } } } } diff --git a/src/layer/dense.rs b/src/layer/dense.rs index c7762b2..337bdc1 100644 --- a/src/layer/dense.rs +++ b/src/layer/dense.rs @@ -1,5 +1,5 @@ use super::NeuraLayer; -use crate::{derivable::NeuraDerivable, utils::multiply_matrix_vector}; +use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer}; use rand::Rng; pub struct NeuraDenseLayer< @@ -65,6 +65,31 @@ impl, const INPUT_LEN: usize, const OUTPUT_LEN: usize> } } +impl, const INPUT_LEN: usize, const OUTPUT_LEN: usize> NeuraTrainableLayer + for NeuraDenseLayer +{ + type Delta = ([[f64; INPUT_LEN]; OUTPUT_LEN], [f64; OUTPUT_LEN]); + + // TODO: double-check the math in this + fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta) { + let evaluated = multiply_matrix_vector(&self.weights, input); + // Compute delta from epsilon, with `self.activation'(z) * epsilon = delta` + let mut delta = epsilon.clone(); + for i in 0..OUTPUT_LEN { + delta[i] = self.activation.derivate(evaluated[i]); + } + + let weights_gradient = reverse_dot_product(&delta, input); + // According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation + // The gradient of the bias is equal to the delta term of the backpropagation algorithm + let bias_gradient = delta; + + let new_epsilon = multiply_matrix_transpose_vector(&self.weights, &delta); + + (new_epsilon, (weights_gradient, bias_gradient)) + } +} + #[cfg(test)] mod test { use super::*; diff --git a/src/lib.rs b/src/lib.rs index dcdf856..0a3fc5d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,7 @@ pub mod derivable; pub mod layer; pub mod network; +pub mod train; pub mod algebra; mod utils; diff --git a/src/network.rs b/src/network.rs index 6a1cbde..0fddb33 100644 --- a/src/network.rs +++ b/src/network.rs @@ -1,4 +1,4 @@ -use crate::{layer::NeuraLayer, train::NeuraTrainable}; +use crate::{layer::NeuraLayer, train::{NeuraTrainable, NeuraTrainableLayer}, derivable::NeuraLoss}; pub struct NeuraNetwork { layer: Layer, @@ -23,6 +23,10 @@ impl NeuraNetwork { pub fn child_network(&self) -> &ChildNetwork { &self.child_network } + + pub fn layer(&self) -> &Layer { + &self.layer + } } impl From for NeuraNetwork { @@ -55,6 +59,28 @@ impl> NeuraLa } } +impl NeuraTrainable for NeuraNetwork { + type Delta = Layer::Delta; + + fn backpropagate>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) { + let final_activation = self.layer.eval(input); + let backprop_epsilon = loss.nabla(target, final_activation); + self.layer.backpropagate(&input, backprop_epsilon) + } +} + +impl> NeuraTrainable for NeuraNetwork { + type Delta = (Layer::Delta, ChildNetwork::Delta); + + fn backpropagate>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) { + let next_activation = self.layer.eval(input); + let (backprop_gradient, weights_gradient) = self.child_network.backpropagate(&next_activation, target, loss); + let (backprop_gradient, layer_gradient) = self.layer.backpropagate(input, backprop_gradient); + + (backprop_gradient, (layer_gradient, weights_gradient)) + } +} + #[macro_export] macro_rules! neura_network { [] => { diff --git a/src/train.rs b/src/train.rs new file mode 100644 index 0000000..f0c126a --- /dev/null +++ b/src/train.rs @@ -0,0 +1,62 @@ +use crate::{ + derivable::NeuraLoss, + layer::NeuraLayer, + network::NeuraNetwork, + // utils::{assign_add_vector, chunked}, + algebra::NeuraAddAssign, +}; + + +pub trait NeuraTrainableLayer: NeuraLayer { + type Delta: NeuraAddAssign; + + /// Computes the backpropagation term and the derivative of the internal weights, + /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer. + /// + /// Note: we introduce the term `epsilon`, which together with the activation of the current function can be used to compute `delta_l`: + /// ```no_rust + /// f_l'(a_l) * epsilon_l = delta_l + /// ``` + /// + /// The function should then return a pair `(epsilon_{l-1}, δW_l)`, + /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)`. + fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta); +} + +pub trait NeuraTrainable: NeuraLayer { + type Delta: NeuraAddAssign; + + fn backpropagate>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta); +} + +pub trait NeuraTrainer> { + fn get_gradient( + &self, + trainable: &NeuraNetwork, + input: &Layer::Input, + target: Loss::Target, + loss: Loss, + ) -> as NeuraTrainable>::Delta where + NeuraNetwork: NeuraTrainable + ; +} + +#[non_exhaustive] +pub struct NeuraBackprop { + pub epsilon: f64, + pub batch_size: usize, +} + +impl> NeuraTrainer<[f64; N], Loss> for NeuraBackprop { + fn get_gradient( + &self, + trainable: &NeuraNetwork, + input: &Layer::Input, + target: Loss::Target, + loss: Loss, + ) -> as NeuraTrainable>::Delta where + NeuraNetwork: NeuraTrainable, + { + trainable.backpropagate(input, target, loss).1 + } +} diff --git a/src/utils.rs b/src/utils.rs index aeffdc7..94d2bf2 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -15,6 +15,39 @@ pub(crate) fn multiply_matrix_vector( result } +/// Equivalent to `multiply_matrix_vector(transpose(matrix), vector)`. +pub(crate) fn multiply_matrix_transpose_vector( + matrix: &[[f64; WIDTH]; HEIGHT], + vector: &[f64; HEIGHT], +) -> [f64; WIDTH] { + let mut result = [0.0; WIDTH]; + + for i in 0..WIDTH { + let mut sum = 0.0; + for k in 0..HEIGHT { + sum += matrix[k][i] * vector[k]; + } + result[i] = sum; + } + + result +} + +pub(crate) fn reverse_dot_product( + left: &[f64; HEIGHT], + right: &[f64; WIDTH] +) -> [[f64; WIDTH]; HEIGHT] { + let mut result = [[0.0; WIDTH]; HEIGHT]; + + for i in 0..HEIGHT { + for j in 0..WIDTH { + result[i][j] = left[i] * right[j]; + } + } + + result +} + pub(crate) fn assign_add_vector(sum: &mut [f64; N], operand: &[f64; N]) { for i in 0..N { sum[i] += operand[i];