From d3d5f57a2bb2f64f8bfa7f2426cf667b1b0bb6cf Mon Sep 17 00:00:00 2001
From: Adrien Burgun <adrien.burgun@orange.fr>
Date: Tue, 11 Apr 2023 23:37:24 +0200
Subject: [PATCH] :sparkles: :fire: Attempt at backpropagation

My head is tired :/
---
 Cargo.toml         |  1 +
 src/algebra.rs     | 27 ++++++++++++++++++--
 src/layer/dense.rs | 27 +++++++++++++++++++-
 src/lib.rs         |  1 +
 src/network.rs     | 28 ++++++++++++++++++++-
 src/train.rs       | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 src/utils.rs       | 33 ++++++++++++++++++++++++
 7 files changed, 175 insertions(+), 4 deletions(-)
 create mode 100644 src/train.rs
diff --git a/Cargo.toml b/Cargo.toml
index ec6baa3..8bbb664 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,4 +7,5 @@ edition = "2021"
 
 [dependencies]
 ndarray = "^0.15"
+# num-traits = "0.2.15"
 rand = "^0.8"
diff --git a/src/algebra.rs b/src/algebra.rs
index dc0d71d..9f7df6d 100644
--- a/src/algebra.rs
+++ b/src/algebra.rs
@@ -1,6 +1,8 @@
-/// An extension of `std::ops::AddAssign`
+/// An extension of `std::ops::AddAssign` and `std::ops::Default`
 pub trait NeuraAddAssign {
     fn add_assign(&mut self, other: &Self);
+
+    fn default() -> Self;
 }
 
 impl<Left: NeuraAddAssign, Right: NeuraAddAssign> NeuraAddAssign for (Left, Right) {
@@ -8,14 +10,31 @@ impl<Left: NeuraAddAssign, Right: NeuraAddAssign> NeuraAddAssign for (Left, Righ
         NeuraAddAssign::add_assign(&mut self.0, &other.0);
         NeuraAddAssign::add_assign(&mut self.1, &other.1);
     }
+
+    fn default() -> Self {
+        (Left::default(), Right::default())
+    }
 }
 
-impl<const N: usize, T: NeuraAddAssign> NeuraAddAssign for [T; N] {
+impl<const N: usize, T: NeuraAddAssign + Clone> NeuraAddAssign for [T; N] {
     fn add_assign(&mut self, other: &[T; N]) {
         for i in 0..N {
             NeuraAddAssign::add_assign(&mut self[i], &other[i]);
         }
     }
+
+    fn default() -> Self {
+        let mut res: Vec<T> = Vec::with_capacity(N);
+
+        for _ in 0..N {
+            res.push(T::default());
+        }
+
+        res.try_into().unwrap_or_else(|_| {
+            // TODO: check that this panic is optimized away
+            unreachable!()
+        })
+    }
 }
 
 macro_rules! base {
@@ -24,6 +43,10 @@ macro_rules! base {
             fn add_assign(&mut self, other: &Self) {
                 std::ops::AddAssign::add_assign(self, other);
             }
+
+            fn default() -> Self {
+                <Self as Default>::default()
+            }
         }
     }
 }
diff --git a/src/layer/dense.rs b/src/layer/dense.rs
index c7762b2..337bdc1 100644
--- a/src/layer/dense.rs
+++ b/src/layer/dense.rs
@@ -1,5 +1,5 @@
 use super::NeuraLayer;
-use crate::{derivable::NeuraDerivable, utils::multiply_matrix_vector};
+use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer};
 use rand::Rng;
 
 pub struct NeuraDenseLayer<
@@ -65,6 +65,31 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
     }
 }
 
+impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize> NeuraTrainableLayer
+    for NeuraDenseLayer<Act, INPUT_LEN, OUTPUT_LEN>
+{
+    type Delta = ([[f64; INPUT_LEN]; OUTPUT_LEN], [f64; OUTPUT_LEN]);
+
+    // TODO: double-check the math in this
+    fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta) {
+        let evaluated = multiply_matrix_vector(&self.weights, input);
+        // Compute delta from epsilon, with `self.activation'(z) * epsilon = delta`
+        let mut delta = epsilon.clone();
+        for i in 0..OUTPUT_LEN {
+            delta[i] = self.activation.derivate(evaluated[i]);
+        }
+
+        let weights_gradient = reverse_dot_product(&delta, input);
+        // According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
+        // The gradient of the bias is equal to the delta term of the backpropagation algorithm
+        let bias_gradient = delta;
+
+        let new_epsilon = multiply_matrix_transpose_vector(&self.weights, &delta);
+
+        (new_epsilon, (weights_gradient, bias_gradient))
+    }
+}
+
 #[cfg(test)]
 mod test {
     use super::*;
diff --git a/src/lib.rs b/src/lib.rs
index dcdf856..0a3fc5d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,6 +3,7 @@
 pub mod derivable;
 pub mod layer;
 pub mod network;
+pub mod train;
 pub mod algebra;
 
 mod utils;
diff --git a/src/network.rs b/src/network.rs
index 6a1cbde..0fddb33 100644
--- a/src/network.rs
+++ b/src/network.rs
@@ -1,4 +1,4 @@
-use crate::{layer::NeuraLayer, train::NeuraTrainable};
+use crate::{layer::NeuraLayer, train::{NeuraTrainable, NeuraTrainableLayer}, derivable::NeuraLoss};
 
 pub struct NeuraNetwork<Layer: NeuraLayer, ChildNetwork> {
     layer: Layer,
@@ -23,6 +23,10 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraNetwork<Layer, ChildNetwork> {
     pub fn child_network(&self) -> &ChildNetwork {
         &self.child_network
     }
+
+    pub fn layer(&self) -> &Layer {
+        &self.layer
+    }
 }
 
 impl<Layer: NeuraLayer> From<Layer> for NeuraNetwork<Layer, ()> {
@@ -55,6 +59,28 @@ impl<Layer: NeuraLayer, ChildNetwork: NeuraLayer<Input = Layer::Output>> NeuraLa
     }
 }
 
+impl<Layer: NeuraTrainableLayer> NeuraTrainable for NeuraNetwork<Layer, ()> {
+    type Delta = Layer::Delta;
+
+    fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) {
+        let final_activation = self.layer.eval(input);
+        let backprop_epsilon = loss.nabla(target, final_activation);
+        self.layer.backpropagate(&input, backprop_epsilon)
+    }
+}
+
+impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Output>> NeuraTrainable for NeuraNetwork<Layer, ChildNetwork> {
+    type Delta = (Layer::Delta, ChildNetwork::Delta);
+
+    fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) {
+        let next_activation = self.layer.eval(input);
+        let (backprop_gradient, weights_gradient) = self.child_network.backpropagate(&next_activation, target, loss);
+        let (backprop_gradient, layer_gradient) = self.layer.backpropagate(input, backprop_gradient);
+
+        (backprop_gradient, (layer_gradient, weights_gradient))
+    }
+}
+
 #[macro_export]
 macro_rules! neura_network {
     [] => {
diff --git a/src/train.rs b/src/train.rs
new file mode 100644
index 0000000..f0c126a
--- /dev/null
+++ b/src/train.rs
@@ -0,0 +1,62 @@
+use crate::{
+    derivable::NeuraLoss,
+    layer::NeuraLayer,
+    network::NeuraNetwork,
+    // utils::{assign_add_vector, chunked},
+    algebra::NeuraAddAssign,
+};
+
+
+pub trait NeuraTrainableLayer: NeuraLayer {
+    type Delta: NeuraAddAssign;
+
+    /// Computes the backpropagation term and the derivative of the internal weights,
+    /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
+    ///
+    /// Note: we introduce the term `epsilon`, which together with the activation of the current function can be used to compute `delta_l`:
+    /// ```no_rust
+    /// f_l'(a_l) * epsilon_l = delta_l
+    /// ```
+    ///
+    /// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
+    /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)`.
+    fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta);
+}
+
+pub trait NeuraTrainable: NeuraLayer {
+    type Delta: NeuraAddAssign;
+
+    fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta);
+}
+
+pub trait NeuraTrainer<F, Loss: NeuraLoss<F>> {
+    fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
+        &self,
+        trainable: &NeuraNetwork<Layer, ChildNetwork>,
+        input: &Layer::Input,
+        target: Loss::Target,
+        loss: Loss,
+    ) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta where
+        NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = F>
+    ;
+}
+
+#[non_exhaustive]
+pub struct NeuraBackprop {
+    pub epsilon: f64,
+    pub batch_size: usize,
+}
+
+impl<const N: usize, Loss: NeuraLoss<[f64; N]>> NeuraTrainer<[f64; N], Loss> for NeuraBackprop {
+    fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
+        &self,
+        trainable: &NeuraNetwork<Layer, ChildNetwork>,
+        input: &Layer::Input,
+        target: Loss::Target,
+        loss: Loss,
+    ) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta where
+        NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>,
+    {
+        trainable.backpropagate(input, target, loss).1
+    }
+}
diff --git a/src/utils.rs b/src/utils.rs
index aeffdc7..94d2bf2 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -15,6 +15,39 @@ pub(crate) fn multiply_matrix_vector<const WIDTH: usize, const HEIGHT: usize>(
     result
 }
 
+/// Equivalent to `multiply_matrix_vector(transpose(matrix), vector)`.
+pub(crate) fn multiply_matrix_transpose_vector<const WIDTH: usize, const HEIGHT: usize>(
+    matrix: &[[f64; WIDTH]; HEIGHT],
+    vector: &[f64; HEIGHT],
+) -> [f64; WIDTH] {
+    let mut result = [0.0; WIDTH];
+
+    for i in 0..WIDTH {
+        let mut sum = 0.0;
+        for k in 0..HEIGHT {
+            sum += matrix[k][i] * vector[k];
+        }
+        result[i] = sum;
+    }
+
+    result
+}
+
+pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>(
+    left: &[f64; HEIGHT],
+    right: &[f64; WIDTH]
+) -> [[f64; WIDTH]; HEIGHT] {
+    let mut result = [[0.0; WIDTH]; HEIGHT];
+
+    for i in 0..HEIGHT {
+        for j in 0..WIDTH {
+            result[i][j] = left[i] * right[j];
+        }
+    }
+
+    result
+}
+
 pub(crate) fn assign_add_vector<const N: usize>(sum: &mut [f64; N], operand: &[f64; N]) {
     for i in 0..N {
         sum[i] += operand[i];