✨ 1D convolution layer

Nailed it on the first try :3c (or not, and I'll regret writing this in a few years)
2 years ago · d7eb6de34e
parent 6c1d6874d7
commit d7eb6de34e
7 changed files with 238 additions and 60 deletions
--- a/examples/convolution.rs
+++ b/examples/convolution.rs
@ -1,5 +1,5 @@
 #![feature(generic_arg_infer)]
-// #![feature(generic_const_exprs)]
+#![feature(generic_const_exprs)]
 use neuramethyst::algebra::NeuraVector;
 use rust_mnist::Mnist;
@ -9,7 +9,7 @@ use neuramethyst::derivable::loss::CrossEntropy;
 use neuramethyst::{cycle_shuffling, one_hot, prelude::*};
 fn main() {
-    const TRAIN_SIZE: usize = 100;
+    const TRAIN_SIZE: usize = 1000;
    let Mnist {
        train_data: train_images,
@ -53,19 +53,18 @@ fn main() {
    let test_inputs: Vec<_> = test_images.zip(test_labels.into_iter()).collect();
    let mut network = neura_sequential![
-        neura_layer!("dense", { 28 * 28 }, 200; Relu),
+        neura_layer!("unstable_reshape", 28, 28),
-        neura_layer!("dropout", 0.5),
+        neura_layer!("conv1d_pad", 3; neura_layer!("dense", {28 * 3}, 10; Relu)),
-        neura_layer!("dense", 100; Relu),
+        neura_layer!("unstable_flatten"),
-        neura_layer!("dropout", 0.5),
+        // neura_layer!("dense", 100; Relu),
        // neura_layer!("dropout", 0.5),
        neura_layer!("dense", 30; Relu),
        neura_layer!("dropout", 0.5),
        neura_layer!("dense", 10; Linear),
        neura_layer!("softmax")
    ];
-    let mut trainer = NeuraBatchedTrainer::new(0.03, TRAIN_SIZE * 10);
+    let mut trainer = NeuraBatchedTrainer::with_epochs(0.03, 100, 128, TRAIN_SIZE);
    trainer.log_iterations = (TRAIN_SIZE / 128).max(1);
    trainer.batch_size = 128;
    trainer.learning_momentum = 0.001;
    trainer.train(
--- a/src/algebra/matrix.rs
+++ b/src/algebra/matrix.rs
@ -31,6 +31,24 @@ impl<const WIDTH: usize, const HEIGHT: usize, F> NeuraMatrix<WIDTH, HEIGHT, F> {
        Some(&self.data[y][x])
    }
    #[inline]
    pub fn set_row(&mut self, y: usize, row: impl Borrow<[F; WIDTH]>)
    where
        F: Clone,
    {
        if y >= HEIGHT {
            panic!(
                "Cannot set row {} of NeuraMatrix<{}, {}, _>: row index out of bound",
                y, WIDTH, HEIGHT
            );
        }
        let row = row.borrow();
        for j in 0..WIDTH {
            self.data[y][j] = row[j].clone();
        }
    }
 }
 impl<const WIDTH: usize, const HEIGHT: usize, F: Float> NeuraMatrix<WIDTH, HEIGHT, F> {
--- a/src/layer/convolution.rs
+++ b/src/layer/convolution.rs
@ -0,0 +1,138 @@
 use crate::algebra::{NeuraMatrix, NeuraVector};
 use super::*;
 /// A 1-dimensional convolutional
 #[derive(Clone, Debug)]
 pub struct NeuraConv1DPad<
    const LENGTH: usize,
    const IN_FEATS: usize,
    const WINDOW: usize,
    Layer: NeuraLayer<Input = NeuraVector<{ IN_FEATS * WINDOW }, f64>>,
 > {
    inner_layer: Layer,
    pad_with: NeuraVector<IN_FEATS, f64>,
 }
 impl<
        const LENGTH: usize,
        const IN_FEATS: usize,
        const WINDOW: usize,
        Layer: NeuraLayer<Input = NeuraVector<{ IN_FEATS * WINDOW }, f64>>,
    > NeuraConv1DPad<LENGTH, IN_FEATS, WINDOW, Layer>
 where
    [u8; IN_FEATS * WINDOW]: Sized,
 {
    pub fn new(inner_layer: Layer, pad_with: NeuraVector<IN_FEATS, f64>) -> Self {
        Self {
            inner_layer,
            pad_with,
        }
    }
    fn iterate_windows<'a>(
        &'a self,
        input: &'a NeuraMatrix<IN_FEATS, LENGTH, f64>,
    ) -> impl Iterator<Item = (usize, Layer::Input)> + 'a {
        (0..LENGTH).map(move |window_center| {
            let mut virtual_input: NeuraVector<{ IN_FEATS * WINDOW }, f64> = NeuraVector::default();
            for i in 0..WINDOW {
                let input_index = i as isize + window_center as isize - (WINDOW - 1) as isize / 2;
                if input_index < 0 || input_index >= LENGTH as isize {
                    for j in 0..IN_FEATS {
                        virtual_input[i * IN_FEATS + j] = self.pad_with[j];
                    }
                } else {
                    for j in 0..IN_FEATS {
                        virtual_input[i * IN_FEATS + j] = input[input_index as usize][j];
                    }
                }
            }
            (window_center, virtual_input)
        })
    }
 }
 impl<
        const LENGTH: usize,
        const IN_FEATS: usize,
        const OUT_FEATS: usize,
        const WINDOW: usize,
        Layer: NeuraLayer<
            Input = NeuraVector<{ IN_FEATS * WINDOW }, f64>,
            Output = NeuraVector<OUT_FEATS, f64>,
        >,
    > NeuraLayer for NeuraConv1DPad<LENGTH, IN_FEATS, WINDOW, Layer>
 {
    type Input = NeuraMatrix<IN_FEATS, LENGTH, f64>;
    type Output = NeuraMatrix<OUT_FEATS, LENGTH, f64>;
    fn eval(&self, input: &Self::Input) -> Self::Output {
        let mut res = NeuraMatrix::default();
        for (window_center, virtual_input) in self.iterate_windows(input) {
            res.set_row(window_center, self.inner_layer.eval(&virtual_input));
        }
        res
    }
 }
 impl<
        const LENGTH: usize,
        const IN_FEATS: usize,
        const OUT_FEATS: usize,
        const WINDOW: usize,
        Layer: NeuraLayer<
            Input = NeuraVector<{ IN_FEATS * WINDOW }, f64>,
            Output = NeuraVector<OUT_FEATS, f64>,
        >,
    > NeuraTrainableLayer for NeuraConv1DPad<LENGTH, IN_FEATS, WINDOW, Layer>
 where
    Layer: NeuraTrainableLayer,
 {
    type Delta = <Layer as NeuraTrainableLayer>::Delta;
    fn backpropagate(
        &self,
        input: &Self::Input,
        epsilon: Self::Output,
    ) -> (Self::Input, Self::Delta) {
        let mut next_epsilon = Self::Input::default();
        let mut weights_gradient_sum = Self::Delta::zero();
        // TODO: consume epsilon efficiently
        for (window_center, virtual_input) in self.iterate_windows(input) {
            let epsilon = NeuraVector::from(&epsilon[window_center]);
            let (layer_next_epsilon, weights_gradient) =
                self.inner_layer.backpropagate(&virtual_input, epsilon);
            weights_gradient_sum.add_assign(&weights_gradient);
            for i in 0..WINDOW {
                // Re-compute the positions in `input` matching the positions in `layer_next_epsilon` and `virtual_input`
                let input_index = window_center as isize + i as isize - (WINDOW - 1) as isize / 2;
                if input_index < 0 || input_index >= LENGTH as isize {
                    continue;
                }
                let input_index = input_index as usize;
                for j in 0..IN_FEATS {
                    next_epsilon[input_index][j] += layer_next_epsilon[i * WINDOW + j];
                }
            }
        }
        (next_epsilon, weights_gradient_sum)
    }
    fn regularize(&self) -> Self::Delta {
        self.inner_layer.regularize()
    }
    fn apply_gradient(&mut self, gradient: &Self::Delta) {
        self.inner_layer.apply_gradient(gradient);
    }
 }
--- a/src/layer/mod.rs
+++ b/src/layer/mod.rs
@ -1,6 +1,9 @@
 mod dense;
 pub use dense::NeuraDenseLayer;
 mod convolution;
 pub use convolution::NeuraConv1DPad;
 mod dropout;
 pub use dropout::NeuraDropoutLayer;
@ -10,15 +13,12 @@ pub use softmax::NeuraSoftmaxLayer;
 mod one_hot;
 pub use one_hot::NeuraOneHotLayer;
 // mod reshape;
 // pub use reshape::{
 //     NeuraFlattenLayer,
 //     NeuraReshapeLayer
 // };
 mod lock;
 pub use lock::NeuraLockLayer;
 mod reshape;
 pub use reshape::{NeuraFlattenLayer, NeuraReshapeLayer};
 use crate::algebra::NeuraVectorSpace;
 pub trait NeuraLayer {
@ -105,19 +105,27 @@ macro_rules! neura_layer {
        $crate::layer::NeuraLockLayer($layer)
    };
-    // ( "flatten" ) => {
+    ( "conv1d_pad", $length:expr, $feats:expr, $window:expr; $layer:expr ) => {
-    //     $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<_, _, f64>
+        $crate::layer::NeuraConv1DPad::new($layer, Default::default()) as $crate::layer::NeuraConv1DPad<$length, $feats, $window, _>
-    // };
+    };
-    // ( "flatten", $width:expr, $height:expr ) => {
+    ( "conv1d_pad", $window:expr; $layer:expr ) => {
-    //     $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<$width, $height, f64>
+        $crate::layer::NeuraConv1DPad::new($layer, Default::default()) as $crate::layer::NeuraConv1DPad<_, _, $window, _>
-    // };
+    };
-    // ( "reshape", $height:expr ) => {
+    ( "unstable_flatten" ) => {
-    //     $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<_, $height, f64>
+        $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<_, _, f64>
-    // };
+    };
-    // ( "reshape", $width:expr, $height:expr ) => {
+    ( "unstable_flatten", $width:expr, $height:expr ) => {
-    //     $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<$width, $height, f64>
+        $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<$width, $height, f64>
-    // };
+    };
    ( "unstable_reshape", $height:expr ) => {
        $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<_, $height, f64>
    };
    ( "unstable_reshape", $width:expr, $height:expr ) => {
        $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<$width, $height, f64>
    };
 }
--- a/src/layer/reshape.rs
+++ b/src/layer/reshape.rs
@ -1,32 +1,35 @@
-//! This module is currently disabled, as it relies on `generic_const_exprs`, which is too unstable to use as of now
+//! This module requires the `generic_const_exprs` feature to be enabled,
 //! which is still quite unstable as of writing this.
 use std::borrow::Borrow;
 use crate::algebra::{NeuraMatrix, NeuraVector};
 use super::{NeuraLayer, NeuraTrainableLayer};
-/// Converts a `[[T; WIDTH]; HEIGHT]` into a `[T; WIDTH * HEIGHT]`.
+/// Converts a `[[T; WIDTH]; HEIGHT]` into a `NeuraVector<{WIDTH * HEIGHT}, T>`.
 /// Requires the `#![feature(generic_const_exprs)]` feature to be enabled.
 pub struct NeuraFlattenLayer<const WIDTH: usize, const HEIGHT: usize, T> {
    phantom: std::marker::PhantomData<T>,
 }
-/// Converts a `[T; WIDTH * HEIGHT]` into a `[[T; WIDTH]; HEIGHT]`.
+/// Converts a `NeuraVector<{WIDTH * HEIGHT}, T>` into a `[[T; WIDTH]; HEIGHT]`.
 /// Requires the `#![feature(generic_const_exprs)]` feature to be enabled.
 pub struct NeuraReshapeLayer<const WIDTH: usize, const HEIGHT: usize, T> {
    phantom: std::marker::PhantomData<T>,
 }
 #[inline(always)]
-fn flatten<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default>(
+fn flatten<const WIDTH: usize, const HEIGHT: usize, T: Clone + Default>(
-    input: &[[T; WIDTH]; HEIGHT],
+    input: impl Borrow<[[T; WIDTH]; HEIGHT]>,
-) -> [T; WIDTH * HEIGHT]
+) -> NeuraVector<{ WIDTH * HEIGHT }, T> {
-where
+    let mut res = NeuraVector::default();
-    [T; WIDTH * HEIGHT]: Sized,
+    let input = input.borrow();
 {
    let mut res = [T::default(); WIDTH * HEIGHT];
    // Hopefully the optimizer realizes this can be all optimized away
    for i in 0..HEIGHT {
        for j in 0..WIDTH {
-            res[i * WIDTH + j] = input[i][j];
+            res[i * WIDTH + j] = input[i][j].clone();
        }
    }
@ -34,18 +37,16 @@ where
 }
 #[inline(always)]
-fn reshape<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default>(
+fn reshape<const WIDTH: usize, const HEIGHT: usize, T: Clone + Default>(
-    input: &[T; WIDTH * HEIGHT],
+    input: impl Borrow<[T; WIDTH * HEIGHT]>,
-) -> [[T; WIDTH]; HEIGHT]
+) -> NeuraMatrix<WIDTH, HEIGHT, T> {
-where
+    let input = input.borrow();
-    [T; WIDTH * HEIGHT]: Sized,
+    let mut res = NeuraMatrix::default();
 {
    let mut res = [[T::default(); WIDTH]; HEIGHT];
    // Hopefully the optimizer realizes this can be all optimized away
    for i in 0..HEIGHT {
        for j in 0..WIDTH {
-            res[i][j] = input[i * WIDTH + j];
+            res[i][j] = input[i * WIDTH + j].clone();
        }
    }
@ -71,26 +72,26 @@ impl<const WIDTH: usize, const HEIGHT: usize, T> NeuraReshapeLayer<WIDTH, HEIGHT
 impl<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default> NeuraLayer
    for NeuraFlattenLayer<WIDTH, HEIGHT, T>
 where
-    [T; WIDTH * HEIGHT]: Sized,
+    NeuraVector<{ WIDTH * HEIGHT }, T>: Sized,
 {
-    type Input = [[T; WIDTH]; HEIGHT];
+    type Input = NeuraMatrix<WIDTH, HEIGHT, T>;
-    type Output = [T; WIDTH * HEIGHT];
+    type Output = NeuraVector<{ WIDTH * HEIGHT }, T>;
    #[inline(always)]
    fn eval(&self, input: &Self::Input) -> Self::Output {
-        flatten(input)
+        flatten(input.as_ref())
    }
 }
 impl<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default> NeuraLayer
    for NeuraReshapeLayer<WIDTH, HEIGHT, T>
 where
-    [T; WIDTH * HEIGHT]: Sized,
+    NeuraVector<{ WIDTH * HEIGHT }, T>: Sized,
 {
-    type Input = [T; WIDTH * HEIGHT];
+    type Input = NeuraVector<{ WIDTH * HEIGHT }, T>;
-    type Output = [[T; WIDTH]; HEIGHT];
+    type Output = NeuraMatrix<WIDTH, HEIGHT, T>;
    #[inline(always)]
    fn eval(&self, input: &Self::Input) -> Self::Output {
@ -101,7 +102,7 @@ where
 impl<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default> NeuraTrainableLayer
    for NeuraFlattenLayer<WIDTH, HEIGHT, T>
 where
-    [T; WIDTH * HEIGHT]: Sized,
+    NeuraVector<{ WIDTH * HEIGHT }, T>: Sized,
 {
    type Delta = ();
@ -114,7 +115,7 @@ where
    }
    fn regularize(&self) -> Self::Delta {
-        todo!()
+        ()
    }
    fn apply_gradient(&mut self, _gradient: &Self::Delta) {
@ -125,7 +126,7 @@ where
 impl<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default> NeuraTrainableLayer
    for NeuraReshapeLayer<WIDTH, HEIGHT, T>
 where
-    [T; WIDTH * HEIGHT]: Sized,
+    NeuraVector<{ WIDTH * HEIGHT }, T>: Sized,
 {
    type Delta = ();
@ -134,11 +135,11 @@ where
        _input: &Self::Input,
        epsilon: Self::Output,
    ) -> (Self::Input, Self::Delta) {
-        (flatten(&epsilon), ())
+        (flatten(epsilon), ())
    }
    fn regularize(&self) -> Self::Delta {
-        todo!()
+        ()
    }
    fn apply_gradient(&mut self, _gradient: &Self::Delta) {
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,6 +1,5 @@
 #![feature(generic_arg_infer)]
-#![feature(generic_associated_types)]
+#![feature(generic_const_exprs)]
 // #![feature(generic_const_exprs)]
 pub mod algebra;
 pub mod derivable;
--- a/src/train.rs
+++ b/src/train.rs
@ -121,6 +121,21 @@ impl NeuraBatchedTrainer {
        }
    }
    pub fn with_epochs(
        learning_rate: f64,
        epochs: usize,
        batch_size: usize,
        training_size: usize,
    ) -> Self {
        Self {
            learning_rate,
            iterations: (training_size * epochs / batch_size).max(1),
            log_iterations: (training_size / batch_size).max(1),
            batch_size,
            ..Default::default()
        }
    }
    pub fn train<
        Output,
        Target: Clone,