✨ 🔥 Attempt at backpropagation

My head is tired :/
2 years ago · d3d5f57a2b
parent 5a20acf595
commit d3d5f57a2b
7 changed files with 175 additions and 4 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -7,4 +7,5 @@ edition = "2021"
 [dependencies]
 ndarray = "^0.15"
 # num-traits = "0.2.15"
 rand = "^0.8"
--- a/src/algebra.rs
+++ b/src/algebra.rs
@ -1,6 +1,8 @@
-/// An extension of `std::ops::AddAssign`
+/// An extension of `std::ops::AddAssign` and `std::ops::Default`
 pub trait NeuraAddAssign {
    fn add_assign(&mut self, other: &Self);
    fn default() -> Self;
 }
 impl<Left: NeuraAddAssign, Right: NeuraAddAssign> NeuraAddAssign for (Left, Right) {
@ -8,14 +10,31 @@ impl<Left: NeuraAddAssign, Right: NeuraAddAssign> NeuraAddAssign for (Left, Righ
        NeuraAddAssign::add_assign(&mut self.0, &other.0);
        NeuraAddAssign::add_assign(&mut self.1, &other.1);
    }
    fn default() -> Self {
        (Left::default(), Right::default())
    }
 }
-impl<const N: usize, T: NeuraAddAssign> NeuraAddAssign for [T; N] {
+impl<const N: usize, T: NeuraAddAssign + Clone> NeuraAddAssign for [T; N] {
    fn add_assign(&mut self, other: &[T; N]) {
        for i in 0..N {
            NeuraAddAssign::add_assign(&mut self[i], &other[i]);
        }
    }
    fn default() -> Self {
        let mut res: Vec<T> = Vec::with_capacity(N);
        for _ in 0..N {
            res.push(T::default());
        }
        res.try_into().unwrap_or_else(|_| {
            // TODO: check that this panic is optimized away
            unreachable!()
        })
    }
 }
 macro_rules! base {
@ -24,6 +43,10 @@ macro_rules! base {
            fn add_assign(&mut self, other: &Self) {
                std::ops::AddAssign::add_assign(self, other);
            }
            fn default() -> Self {
                <Self as Default>::default()
            }
        }
    }
 }
--- a/src/layer/dense.rs
+++ b/src/layer/dense.rs
@ -1,5 +1,5 @@
 use super::NeuraLayer;
-use crate::{derivable::NeuraDerivable, utils::multiply_matrix_vector};
+use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer};
 use rand::Rng;
 pub struct NeuraDenseLayer<
@ -65,6 +65,31 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
    }
 }
 impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize> NeuraTrainableLayer
    for NeuraDenseLayer<Act, INPUT_LEN, OUTPUT_LEN>
 {
    type Delta = ([[f64; INPUT_LEN]; OUTPUT_LEN], [f64; OUTPUT_LEN]);
    // TODO: double-check the math in this
    fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta) {
        let evaluated = multiply_matrix_vector(&self.weights, input);
        // Compute delta from epsilon, with `self.activation'(z) * epsilon = delta`
        let mut delta = epsilon.clone();
        for i in 0..OUTPUT_LEN {
            delta[i] = self.activation.derivate(evaluated[i]);
        }
        let weights_gradient = reverse_dot_product(&delta, input);
        // According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
        // The gradient of the bias is equal to the delta term of the backpropagation algorithm
        let bias_gradient = delta;
        let new_epsilon = multiply_matrix_transpose_vector(&self.weights, &delta);
        (new_epsilon, (weights_gradient, bias_gradient))
    }
 }
 #[cfg(test)]
 mod test {
    use super::*;
--- a/src/lib.rs
+++ b/src/lib.rs
@ -3,6 +3,7 @@
 pub mod derivable;
 pub mod layer;
 pub mod network;
 pub mod train;
 pub mod algebra;
 mod utils;
--- a/src/network.rs
+++ b/src/network.rs
@ -1,4 +1,4 @@
-use crate::{layer::NeuraLayer, train::NeuraTrainable};
+use crate::{layer::NeuraLayer, train::{NeuraTrainable, NeuraTrainableLayer}, derivable::NeuraLoss};
 pub struct NeuraNetwork<Layer: NeuraLayer, ChildNetwork> {
    layer: Layer,
@ -23,6 +23,10 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraNetwork<Layer, ChildNetwork> {
    pub fn child_network(&self) -> &ChildNetwork {
        &self.child_network
    }
    pub fn layer(&self) -> &Layer {
        &self.layer
    }
 }
 impl<Layer: NeuraLayer> From<Layer> for NeuraNetwork<Layer, ()> {
@ -55,6 +59,28 @@ impl<Layer: NeuraLayer, ChildNetwork: NeuraLayer<Input = Layer::Output>> NeuraLa
    }
 }
 impl<Layer: NeuraTrainableLayer> NeuraTrainable for NeuraNetwork<Layer, ()> {
    type Delta = Layer::Delta;
    fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) {
        let final_activation = self.layer.eval(input);
        let backprop_epsilon = loss.nabla(target, final_activation);
        self.layer.backpropagate(&input, backprop_epsilon)
    }
 }
 impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Output>> NeuraTrainable for NeuraNetwork<Layer, ChildNetwork> {
    type Delta = (Layer::Delta, ChildNetwork::Delta);
    fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) {
        let next_activation = self.layer.eval(input);
        let (backprop_gradient, weights_gradient) = self.child_network.backpropagate(&next_activation, target, loss);
        let (backprop_gradient, layer_gradient) = self.layer.backpropagate(input, backprop_gradient);
        (backprop_gradient, (layer_gradient, weights_gradient))
    }
 }
 #[macro_export]
 macro_rules! neura_network {
    [] => {
--- a/src/train.rs
+++ b/src/train.rs
@ -0,0 +1,62 @@
 use crate::{
    derivable::NeuraLoss,
    layer::NeuraLayer,
    network::NeuraNetwork,
    // utils::{assign_add_vector, chunked},
    algebra::NeuraAddAssign,
 };
 pub trait NeuraTrainableLayer: NeuraLayer {
    type Delta: NeuraAddAssign;
    /// Computes the backpropagation term and the derivative of the internal weights,
    /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
    ///
    /// Note: we introduce the term `epsilon`, which together with the activation of the current function can be used to compute `delta_l`:
    /// ```no_rust
    /// f_l'(a_l) * epsilon_l = delta_l
    /// ```
    ///
    /// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
    /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)`.
    fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta);
 }
 pub trait NeuraTrainable: NeuraLayer {
    type Delta: NeuraAddAssign;
    fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta);
 }
 pub trait NeuraTrainer<F, Loss: NeuraLoss<F>> {
    fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
        &self,
        trainable: &NeuraNetwork<Layer, ChildNetwork>,
        input: &Layer::Input,
        target: Loss::Target,
        loss: Loss,
    ) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta where
        NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = F>
    ;
 }
 #[non_exhaustive]
 pub struct NeuraBackprop {
    pub epsilon: f64,
    pub batch_size: usize,
 }
 impl<const N: usize, Loss: NeuraLoss<[f64; N]>> NeuraTrainer<[f64; N], Loss> for NeuraBackprop {
    fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
        &self,
        trainable: &NeuraNetwork<Layer, ChildNetwork>,
        input: &Layer::Input,
        target: Loss::Target,
        loss: Loss,
    ) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta where
        NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>,
    {
        trainable.backpropagate(input, target, loss).1
    }
 }
--- a/src/utils.rs
+++ b/src/utils.rs
@ -15,6 +15,39 @@ pub(crate) fn multiply_matrix_vector<const WIDTH: usize, const HEIGHT: usize>(
    result
 }
 /// Equivalent to `multiply_matrix_vector(transpose(matrix), vector)`.
 pub(crate) fn multiply_matrix_transpose_vector<const WIDTH: usize, const HEIGHT: usize>(
    matrix: &[[f64; WIDTH]; HEIGHT],
    vector: &[f64; HEIGHT],
 ) -> [f64; WIDTH] {
    let mut result = [0.0; WIDTH];
    for i in 0..WIDTH {
        let mut sum = 0.0;
        for k in 0..HEIGHT {
            sum += matrix[k][i] * vector[k];
        }
        result[i] = sum;
    }
    result
 }
 pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>(
    left: &[f64; HEIGHT],
    right: &[f64; WIDTH]
 ) -> [[f64; WIDTH]; HEIGHT] {
    let mut result = [[0.0; WIDTH]; HEIGHT];
    for i in 0..HEIGHT {
        for j in 0..WIDTH {
            result[i][j] = left[i] * right[j];
        }
    }
    result
 }
 pub(crate) fn assign_add_vector<const N: usize>(sum: &mut [f64; N], operand: &[f64; N]) {
    for i in 0..N {
        sum[i] += operand[i];