✨ 🔥 Semi-working training, although it seems to be only want to converge to zero

2 years ago · 7a6921a1c1
parent d3d5f57a2b
commit 7a6921a1c1
10 changed files with 289 additions and 57 deletions
--- a/examples/xor.rs
+++ b/examples/xor.rs
@ -0,0 +1,42 @@
+#![feature(generic_arg_infer)]
+
+use neuramethyst::prelude::*;
+use neuramethyst::derivable::activation::{Relu, Tanh};
+use neuramethyst::derivable::loss::Euclidean;
+
+fn main() {
+    let mut network = neura_network![
+        neura_layer!("dense", Tanh, 2, 2),
+        neura_layer!("dense", Tanh, 3),
+        neura_layer!("dense", Relu, 1)
+    ];
+
+    let inputs = [
+        ([0.0, 0.0], [0.0]),
+        ([0.0, 1.0], [1.0]),
+        ([1.0, 0.0], [1.0]),
+        ([1.0, 1.0], [0.0])
+    ];
+
+    // println!("{:#?}", network);
+
+    for (input, target) in inputs {
+        println!("Input: {:?}, target: {}, actual: {}", &input, target[0], network.eval(&input)[0]);
+    }
+
+    train_batched(
+        &mut network,
+        inputs.clone(),
+        &inputs,
+        NeuraBackprop::new(Euclidean),
+        0.01,
+        1,
+        25
+    );
+
+    // println!("{:#?}", network);
+
+    for (input, target) in inputs {
+        println!("Input: {:?}, target: {}, actual: {}", &input, target[0], network.eval(&input)[0]);
+    }
+}
--- a/src/algebra.rs
+++ b/src/algebra.rs
@ -1,33 +1,46 @@
 /// An extension of `std::ops::AddAssign` and `std::ops::Default`
-pub trait NeuraAddAssign {
+pub trait NeuraVectorSpace {
    fn add_assign(&mut self, other: &Self);

-    fn default() -> Self;
+    fn mul_assign(&mut self, by: f64);
+
+    fn zero() -> Self;
 }

-impl<Left: NeuraAddAssign, Right: NeuraAddAssign> NeuraAddAssign for (Left, Right) {
+impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left, Right) {
    fn add_assign(&mut self, other: &Self) {
-        NeuraAddAssign::add_assign(&mut self.0, &other.0);
-        NeuraAddAssign::add_assign(&mut self.1, &other.1);
+        NeuraVectorSpace::add_assign(&mut self.0, &other.0);
+        NeuraVectorSpace::add_assign(&mut self.1, &other.1);
+    }
+
+    fn mul_assign(&mut self, by: f64) {
+        NeuraVectorSpace::mul_assign(&mut self.0, by);
+        NeuraVectorSpace::mul_assign(&mut self.1, by);
    }

-    fn default() -> Self {
-        (Left::default(), Right::default())
+    fn zero() -> Self {
+        (Left::zero(), Right::zero())
    }
 }

-impl<const N: usize, T: NeuraAddAssign + Clone> NeuraAddAssign for [T; N] {
+impl<const N: usize, T: NeuraVectorSpace + Clone> NeuraVectorSpace for [T; N] {
    fn add_assign(&mut self, other: &[T; N]) {
        for i in 0..N {
-            NeuraAddAssign::add_assign(&mut self[i], &other[i]);
+            NeuraVectorSpace::add_assign(&mut self[i], &other[i]);
+        }
+    }
+
+    fn mul_assign(&mut self, by: f64) {
+        for i in 0..N {
+            NeuraVectorSpace::mul_assign(&mut self[i], by);
        }
    }

-    fn default() -> Self {
+    fn zero() -> Self {
        let mut res: Vec<T> = Vec::with_capacity(N);

        for _ in 0..N {
-            res.push(T::default());
+            res.push(T::zero());
        }

        res.try_into().unwrap_or_else(|_| {
@ -39,16 +52,20 @@ impl<const N: usize, T: NeuraAddAssign + Clone> NeuraAddAssign for [T; N] {

 macro_rules! base {
    ( $type:ty ) => {
-        impl NeuraAddAssign for $type {
+        impl NeuraVectorSpace for $type {
            fn add_assign(&mut self, other: &Self) {
                std::ops::AddAssign::add_assign(self, other);
            }

-            fn default() -> Self {
-                <Self as Default>::default()
+            fn mul_assign(&mut self, other: f64) {
+                std::ops::MulAssign::mul_assign(self, other as $type);
            }
+
+            fn zero() -> Self {
+                <Self as Default>::default()
            }
        }
+    };
 }

 base!(f32);
--- a/src/derivable/activation.rs
+++ b/src/derivable/activation.rs
@ -34,3 +34,32 @@ impl NeuraDerivable<f32> for Relu {
        }
    }
 }
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct Tanh;
+
+impl NeuraDerivable<f64> for Tanh {
+    #[inline(always)]
+    fn eval(&self, input: f64) -> f64 {
+        0.5 * input.tanh() + 0.5
+    }
+
+    #[inline(always)]
+    fn derivate(&self, at: f64) -> f64 {
+        let tanh = at.tanh();
+        0.5 * (1.0 - tanh * tanh)
+    }
+}
+
+impl NeuraDerivable<f32> for Tanh {
+    #[inline(always)]
+    fn eval(&self, input: f32) -> f32 {
+        0.5 * input.tanh() + 0.5
+    }
+
+    #[inline(always)]
+    fn derivate(&self, at: f32) -> f32 {
+        let tanh = at.tanh();
+        0.5 * (1.0 - tanh * tanh)
+    }
+}
--- a/src/derivable/loss.rs
+++ b/src/derivable/loss.rs
@ -1,12 +1,14 @@
 use super::NeuraLoss;

 #[derive(Clone, Copy, Debug, PartialEq)]
-pub struct Euclidean;
-impl<const N: usize> NeuraLoss<[f64; N]> for Euclidean {
-    type Out = f64;
+pub struct Euclidean<const N: usize>;
+
+impl<const N: usize> NeuraLoss for Euclidean<N> {
+    type Input = [f64; N];
    type Target = [f64; N];

-    fn eval(&self, target: [f64; N], actual: [f64; N]) -> f64 {
+    #[inline]
+    fn eval(&self, target: &[f64; N], actual: &[f64; N]) -> f64 {
        let mut sum_squared = 0.0;

        for i in 0..N {
@ -16,7 +18,15 @@ impl<const N: usize> NeuraLoss<[f64; N]> for Euclidean {
        sum_squared * 0.5
    }

-    fn nabla(&self, target: [f64; N], actual: [f64; N]) -> [f64; N] {
-        todo!()
+    #[inline]
+    fn nabla(&self, target: &[f64; N], actual: &[f64; N]) -> [f64; N] {
+        let mut res = [0.0; N];
+
+        // ∂E(y)/∂yᵢ = yᵢ - yᵢ'
+        for i in 0..N {
+            res[i] = actual[i] - target[i];
+        }
+
+        res
    }
 }
--- a/src/derivable/mod.rs
+++ b/src/derivable/mod.rs
@ -8,13 +8,13 @@ pub trait NeuraDerivable<F> {
    fn derivate(&self, at: F) -> F;
 }

-pub trait NeuraLoss<F> {
-    type Out;
+pub trait NeuraLoss {
+    type Input;
    type Target;

-    fn eval(&self, target: Self::Target, actual: F) -> Self::Out;
+    fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64;

    /// Should return the gradient of the loss function according to `actual`
    /// ($\nabla_{\texttt{actual}} \texttt{self.eval}(\texttt{target}, \texttt{actual})$).
-    fn nabla(&self, target: Self::Target, actual: F) -> F;
+    fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input;
 }
--- a/src/layer/dense.rs
+++ b/src/layer/dense.rs
@ -1,7 +1,8 @@
 use super::NeuraLayer;
-use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer};
+use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer, algebra::NeuraVectorSpace};
 use rand::Rng;

+#[derive(Clone, Debug)]
 pub struct NeuraDenseLayer<
    Act: NeuraDerivable<f64>,
    const INPUT_LEN: usize,
@ -34,7 +35,7 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>

        for i in 0..OUTPUT_LEN {
            for j in 0..INPUT_LEN {
-                weights[i][j] = rng.gen::<f64>() * multiplier;
+                weights[i][j] = rng.gen_range(-multiplier..multiplier);
            }
        }

@ -88,6 +89,11 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>

        (new_epsilon, (weights_gradient, bias_gradient))
    }
+
+    fn apply_gradient(&mut self, gradient: &Self::Delta) {
+        NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0);
+        NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1);
+    }
 }

 #[cfg(test)]
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,9 +1,19 @@
 #![feature(generic_arg_infer)]

+pub mod algebra;
 pub mod derivable;
 pub mod layer;
 pub mod network;
 pub mod train;
-pub mod algebra;

 mod utils;
+
+pub mod prelude {
+    // Macros
+    pub use crate::{neura_network, neura_layer};
+
+    // Structs and traits
+    pub use super::network::{NeuraNetwork};
+    pub use super::layer::{NeuraLayer, NeuraDenseLayer};
+    pub use super::train::{NeuraBackprop, train_batched};
+}
--- a/src/network.rs
+++ b/src/network.rs
@ -1,5 +1,10 @@
-use crate::{layer::NeuraLayer, train::{NeuraTrainable, NeuraTrainableLayer}, derivable::NeuraLoss};
+use crate::{
+    derivable::NeuraLoss,
+    layer::NeuraLayer,
+    train::{NeuraTrainable, NeuraTrainableLayer},
+};

+#[derive(Clone, Debug)]
 pub struct NeuraNetwork<Layer: NeuraLayer, ChildNetwork> {
    layer: Layer,
    child_network: ChildNetwork,
@ -62,20 +67,44 @@ impl<Layer: NeuraLayer, ChildNetwork: NeuraLayer<Input = Layer::Output>> NeuraLa
 impl<Layer: NeuraTrainableLayer> NeuraTrainable for NeuraNetwork<Layer, ()> {
    type Delta = Layer::Delta;

-    fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) {
+    fn apply_gradient(&mut self, gradient: &Self::Delta) {
+        self.layer.apply_gradient(gradient);
+    }
+
+    fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
+        &self,
+        input: &Self::Input,
+        target: &Loss::Target,
+        loss: Loss,
+    ) -> (Self::Input, Self::Delta) {
        let final_activation = self.layer.eval(input);
-        let backprop_epsilon = loss.nabla(target, final_activation);
+        let backprop_epsilon = loss.nabla(target, &final_activation);
        self.layer.backpropagate(&input, backprop_epsilon)
    }
 }

-impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Output>> NeuraTrainable for NeuraNetwork<Layer, ChildNetwork> {
+impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Output>> NeuraTrainable
+    for NeuraNetwork<Layer, ChildNetwork>
+{
    type Delta = (Layer::Delta, ChildNetwork::Delta);

-    fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) {
+    fn apply_gradient(&mut self, gradient: &Self::Delta) {
+        self.layer.apply_gradient(&gradient.0);
+        self.child_network.apply_gradient(&gradient.1);
+    }
+
+    fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
+        &self,
+        input: &Self::Input,
+        target: &Loss::Target,
+        loss: Loss,
+    ) -> (Self::Input, Self::Delta) {
        let next_activation = self.layer.eval(input);
-        let (backprop_gradient, weights_gradient) = self.child_network.backpropagate(&next_activation, target, loss);
-        let (backprop_gradient, layer_gradient) = self.layer.backpropagate(input, backprop_gradient);
+        let (backprop_gradient, weights_gradient) =
+            self.child_network
+                .backpropagate(&next_activation, target, loss);
+        let (backprop_gradient, layer_gradient) =
+            self.layer.backpropagate(input, backprop_gradient);

        (backprop_gradient, (layer_gradient, weights_gradient))
    }
--- a/src/train.rs
+++ b/src/train.rs
@ -1,14 +1,13 @@
 use crate::{
+    // utils::{assign_add_vector, chunked},
+    algebra::NeuraVectorSpace,
    derivable::NeuraLoss,
    layer::NeuraLayer,
    network::NeuraNetwork,
-    // utils::{assign_add_vector, chunked},
-    algebra::NeuraAddAssign,
 };

-
 pub trait NeuraTrainableLayer: NeuraLayer {
-    type Delta: NeuraAddAssign;
+    type Delta: NeuraVectorSpace;

    /// Computes the backpropagation term and the derivative of the internal weights,
    /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
@ -19,44 +18,134 @@ pub trait NeuraTrainableLayer: NeuraLayer {
    /// ```
    ///
    /// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
-    /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)`.
-    fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta);
+    /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`.
+    /// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers.
+    fn backpropagate(
+        &self,
+        input: &Self::Input,
+        epsilon: Self::Output,
+    ) -> (Self::Input, Self::Delta);
+
+    /// Applies `δW_l` to the weights of the layer
+    fn apply_gradient(&mut self, gradient: &Self::Delta);
 }

 pub trait NeuraTrainable: NeuraLayer {
-    type Delta: NeuraAddAssign;
+    type Delta: NeuraVectorSpace;

-    fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta);
+    fn apply_gradient(&mut self, gradient: &Self::Delta);
+
+    /// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information.
+    fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
+        &self,
+        input: &Self::Input,
+        target: &Loss::Target,
+        loss: Loss,
+    ) -> (Self::Input, Self::Delta);
 }

-pub trait NeuraTrainer<F, Loss: NeuraLoss<F>> {
+pub trait NeuraTrainer<Output, Target = Output> {
    fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
        &self,
        trainable: &NeuraNetwork<Layer, ChildNetwork>,
        input: &Layer::Input,
-        target: Loss::Target,
-        loss: Loss,
-    ) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta where
-        NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = F>
-    ;
+        target: &Target,
+    ) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta
+    where
+        NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>;
+
+    fn score<Layer: NeuraLayer, ChildNetwork>(
+        &self,
+        trainable: &NeuraNetwork<Layer, ChildNetwork>,
+        input: &Layer::Input,
+        target: &Target,
+    ) -> f64
+    where
+        NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>;
 }

 #[non_exhaustive]
-pub struct NeuraBackprop {
-    pub epsilon: f64,
-    pub batch_size: usize,
+pub struct NeuraBackprop<Loss: NeuraLoss + Clone> {
+    loss: Loss,
 }

-impl<const N: usize, Loss: NeuraLoss<[f64; N]>> NeuraTrainer<[f64; N], Loss> for NeuraBackprop {
+impl<Loss: NeuraLoss + Clone> NeuraBackprop<Loss> {
+    pub fn new(loss: Loss) -> Self {
+        Self { loss }
+    }
+}
+
+impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone> NeuraTrainer<[f64; N], Loss::Target>
+    for NeuraBackprop<Loss>
+{
    fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
        &self,
        trainable: &NeuraNetwork<Layer, ChildNetwork>,
        input: &Layer::Input,
-        target: Loss::Target,
-        loss: Loss,
-    ) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta where
+        target: &Loss::Target,
+    ) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta
+    where
        NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>,
    {
-        trainable.backpropagate(input, target, loss).1
+        trainable.backpropagate(input, target, self.loss.clone()).1
+    }
+
+    fn score<Layer: NeuraLayer, ChildNetwork>(
+        &self,
+        trainable: &NeuraNetwork<Layer, ChildNetwork>,
+        input: &Layer::Input,
+        target: &Loss::Target,
+    ) -> f64
+    where
+        NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>,
+    {
+        self.loss.eval(target, &trainable.eval(&input))
+    }
+}
+
+pub fn train_batched<
+    Output,
+    Target,
+    Trainer: NeuraTrainer<Output, Target>,
+    Layer: NeuraLayer,
+    ChildNetwork,
+    Inputs: IntoIterator<Item = (Layer::Input, Target)>,
+>(
+    network: &mut NeuraNetwork<Layer, ChildNetwork>,
+    inputs: Inputs,
+    test_inputs: &[(Layer::Input, Target)],
+    trainer: Trainer,
+    learning_rate: f64,
+    batch_size: usize,
+    epochs: usize,
+) where
+    NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>,
+    Inputs::IntoIter: Clone,
+{
+    // TODO: apply shuffling?
+    let mut iter = inputs.into_iter().cycle();
+    let factor = -learning_rate / (batch_size as f64);
+
+    'd: for epoch in 0..epochs {
+        let mut gradient_sum = <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
+
+        for _ in 0..batch_size {
+            if let Some((input, target)) = iter.next() {
+                let gradient = trainer.get_gradient(&network, &input, &target);
+                gradient_sum.add_assign(&gradient);
+            } else {
+                break 'd;
+            }
+        }
+
+        gradient_sum.mul_assign(factor);
+        network.apply_gradient(&gradient_sum);
+
+        let mut loss_sum = 0.0;
+        for (input, target) in test_inputs {
+            loss_sum += trainer.score(&network, input, target);
+        }
+        loss_sum /= test_inputs.len() as f64;
+        println!("Epoch {epoch}, Loss: {:.3}", loss_sum);
    }
 }
--- a/src/utils.rs
+++ b/src/utils.rs
@ -35,7 +35,7 @@ pub(crate) fn multiply_matrix_transpose_vector<const WIDTH: usize, const HEIGHT:

 pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>(
    left: &[f64; HEIGHT],
-    right: &[f64; WIDTH]
+    right: &[f64; WIDTH],
 ) -> [[f64; WIDTH]; HEIGHT] {
    let mut result = [[0.0; WIDTH]; HEIGHT];