✨ Block convolution, max pooling

2 years ago · cc7686569a
parent a6da11b125
commit cc7686569a
5 changed files with 200 additions and 14 deletions
--- a/examples/convolution.rs
+++ b/examples/convolution.rs
@ -2,7 +2,7 @@
 #![feature(generic_const_exprs)]
 use neuramethyst::algebra::NeuraVector;
-use neuramethyst::derivable::reduce::Average;
+use neuramethyst::derivable::reduce::{Average, Max};
 use rust_mnist::Mnist;
 use neuramethyst::derivable::activation::{Linear, Relu};
@ -55,17 +55,22 @@ fn main() {
    let mut network = neura_sequential![
        neura_layer!("unstable_reshape", 1, { 28 * 28 }),
-        neura_layer!("conv2d_pad", 1, {28 * 28}; 28, 3; neura_layer!("dense", {1 * 3 * 3}, 10; Relu)),
+        neura_layer!("conv2d_pad", 1, {28 * 28}; 28, 3; neura_layer!("dense", {1 * 3 * 3}, 3; Relu)),
-        // neura_layer!("conv2d_pad", 28, 1; neura_layer!("dense", {30 * 1 * 1}, 10; Relu)),
+        // neura_layer!("conv2d_block", 7, 7; 4; neura_layer!("dense", {3 * 4 * 4}, 8; Relu)),
        // neura_layer!("conv2d_pad"; 28, 1; neura_layer!("dense", {30 * 1 * 1}, 10; Relu)),
        neura_layer!("unstable_flatten"),
        neura_layer!("dropout", 0.33),
        neura_layer!("unstable_reshape", 3, { 28 * 28 }),
        neura_layer!("conv2d_block", 14, 14; 2; neura_layer!("dense", {3 * 2 * 2}, 2; Relu)),
        // neura_layer!("unstable_flatten"),
        // neura_layer!("dropout", 0.33),
        // neura_layer!("unstable_reshape", 2, { 14 * 14 }),
        // neura_layer!("conv2d_pad"; 14, 5; neura_layer!("dense", {2 * 5 * 5}, 20; Relu)),
        // neura_layer!("pool_global"; Max),
-        // neura_layer!("pool_global", 10, {28 * 28}; Average),
+        // neura_layer!("pool1d", {14 * 2}, 7; Max),
-        // neura_layer!("pool1d", 10, 28, 28; Average),
+        neura_layer!("unstable_flatten"),
-        // neura_layer!("unstable_flatten", 10, 28),
+        neura_layer!("dropout", 0.2),
        neura_layer!("unstable_flatten", 10, { 28 * 28 }),
        // neura_layer!("dense", 100; Relu),
        // neura_layer!("dropout", 0.5),
        // neura_layer!("dense", 30; Relu),
        // neura_layer!("dropout", 0.5),
        neura_layer!("dense", 10; Linear),
        neura_layer!("softmax")
    ];
--- a/src/derivable/reduce.rs
+++ b/src/derivable/reduce.rs
@ -1,3 +1,5 @@
 use crate::utils::{argmax, one_hot};
 use super::*;
 #[derive(Clone, Copy, Debug, PartialEq)]
@ -18,3 +20,25 @@ impl NeuraReducer<f64> for Average {
        NeuraVector::from_value(1.0 / inputs.len() as f64)
    }
 }
 #[derive(Clone, Copy, Debug, PartialEq)]
 pub struct Max;
 impl NeuraReducer<f64> for Max {
    #[inline(always)]
    fn eval<const LENGTH: usize>(&self, inputs: NeuraVector<LENGTH, f64>) -> f64 {
        let mut max = 0.0;
        for &i in inputs.iter() {
            max = i.max(max);
        }
        max
    }
    #[inline(always)]
    fn nabla<const LENGTH: usize>(
        &self,
        inputs: NeuraVector<LENGTH, f64>,
    ) -> NeuraVector<LENGTH, f64> {
        one_hot(argmax(inputs.as_ref()))
    }
 }
--- a/src/layer/convolution.rs
+++ b/src/layer/convolution.rs
@ -167,7 +167,7 @@ pub struct NeuraConv2DPadLayer<
    /// The width of the image, in grid units.
    ///
    /// **Class invariant:** `LAYER % width == 0`, `width > 0`
-    pub width: NonZeroUsize,
+    width: NonZeroUsize,
 }
 impl<
@ -192,6 +192,10 @@ impl<
        }
    }
    pub fn width(&self) -> NonZeroUsize {
        self.width
    }
    /// Iterates within the `(WINDOW, WINDOW)` window centered around `x, y`;
    /// Returns a 4-uple `(x' = x + δx, y' = y + δy, δy * WINDOW + δ, y' * width + x')`, with the last element
    /// being set to `None` if `x'` or `y'` are out of bound.
@ -338,3 +342,148 @@ where
        self.inner_layer.cleanup();
    }
 }
 #[non_exhaustive]
 #[derive(Clone, Debug)]
 pub struct NeuraConv2DBlockLayer<
    const WIDTH: usize,
    const HEIGHT: usize,
    const IN_FEATS: usize,
    const BLOCK_SIZE: usize,
    Layer: NeuraLayer<Input = NeuraVector<{ IN_FEATS * BLOCK_SIZE * BLOCK_SIZE }, f64>>,
 > {
    pub inner_layer: Layer,
 }
 impl<
        const WIDTH: usize,
        const HEIGHT: usize,
        const IN_FEATS: usize,
        const BLOCK_SIZE: usize,
        Layer: NeuraLayer<Input = NeuraVector<{ IN_FEATS * BLOCK_SIZE * BLOCK_SIZE }, f64>>,
    > NeuraConv2DBlockLayer<WIDTH, HEIGHT, IN_FEATS, BLOCK_SIZE, Layer>
 {
    pub fn new(layer: Layer) -> Self {
        Self { inner_layer: layer }
    }
    fn iterate_blocks<'a>(
        &'a self,
        input: &'a NeuraMatrix<IN_FEATS, { WIDTH * HEIGHT * BLOCK_SIZE * BLOCK_SIZE }, f64>,
    ) -> impl Iterator<
        Item = (
            usize,
            usize,
            usize,
            NeuraVector<{ IN_FEATS * BLOCK_SIZE * BLOCK_SIZE }, f64>,
        ),
    > + 'a {
        (0..HEIGHT).flat_map(move |y| {
            (0..WIDTH).map(move |x| {
                let output_index = y * WIDTH + x;
                let mut virtual_input = NeuraVector::default();
                for dy in 0..BLOCK_SIZE {
                    let y = y * BLOCK_SIZE + dy;
                    for dx in 0..BLOCK_SIZE {
                        let x = x * BLOCK_SIZE + dx;
                        let virtual_index = dy * BLOCK_SIZE + dx;
                        for k in 0..IN_FEATS {
                            virtual_input[virtual_index * IN_FEATS + k] = input[y * WIDTH + x][k];
                        }
                    }
                }
                (x, y, output_index, virtual_input)
            })
        })
    }
 }
 impl<
        const WIDTH: usize,
        const HEIGHT: usize,
        const IN_FEATS: usize,
        const OUT_FEATS: usize,
        const BLOCK_SIZE: usize,
        Layer: NeuraLayer<
            Input = NeuraVector<{ IN_FEATS * BLOCK_SIZE * BLOCK_SIZE }, f64>,
            Output = NeuraVector<OUT_FEATS, f64>,
        >,
    > NeuraLayer for NeuraConv2DBlockLayer<WIDTH, HEIGHT, IN_FEATS, BLOCK_SIZE, Layer>
 where
    [f64; WIDTH * HEIGHT * BLOCK_SIZE * BLOCK_SIZE]: Sized,
 {
    type Input = NeuraMatrix<IN_FEATS, { WIDTH * HEIGHT * BLOCK_SIZE * BLOCK_SIZE }, f64>;
    type Output = NeuraMatrix<OUT_FEATS, { WIDTH * HEIGHT }, f64>;
    fn eval(&self, input: &Self::Input) -> Self::Output {
        let mut res = Self::Output::default();
        for (_, _, output_index, virtual_input) in self.iterate_blocks(input) {
            res.set_row(output_index, self.inner_layer.eval(&virtual_input));
        }
        res
    }
 }
 impl<
        const WIDTH: usize,
        const HEIGHT: usize,
        const IN_FEATS: usize,
        const OUT_FEATS: usize,
        const BLOCK_SIZE: usize,
        Layer: NeuraLayer<
            Input = NeuraVector<{ IN_FEATS * BLOCK_SIZE * BLOCK_SIZE }, f64>,
            Output = NeuraVector<OUT_FEATS, f64>,
        >,
    > NeuraTrainableLayer for NeuraConv2DBlockLayer<WIDTH, HEIGHT, IN_FEATS, BLOCK_SIZE, Layer>
 where
    [f64; WIDTH * HEIGHT * BLOCK_SIZE * BLOCK_SIZE]: Sized,
    Layer: NeuraTrainableLayer,
 {
    type Delta = Layer::Delta;
    fn backpropagate(
        &self,
        input: &Self::Input,
        epsilon: Self::Output,
    ) -> (Self::Input, Self::Delta) {
        let mut gradient_sum = Layer::Delta::zero();
        let mut next_epsilon = Self::Input::default();
        for (x, y, output_index, virtual_input) in self.iterate_blocks(input) {
            let (layer_next_epsilon, gradient) = self
                .inner_layer
                .backpropagate(&virtual_input, epsilon.get_row(output_index));
            gradient_sum.add_assign(&gradient);
            for dy in 0..BLOCK_SIZE {
                let y = y * BLOCK_SIZE + dy;
                for dx in 0..BLOCK_SIZE {
                    let x = x * BLOCK_SIZE + dx;
                    let input_index = y * WIDTH + x;
                    for k in 0..IN_FEATS {
                        next_epsilon[input_index][k] =
                            layer_next_epsilon[(dy * BLOCK_SIZE + dx) * IN_FEATS + k];
                    }
                }
            }
        }
        (next_epsilon, gradient_sum)
    }
    fn regularize(&self) -> Self::Delta {
        self.inner_layer.regularize()
    }
    fn apply_gradient(&mut self, gradient: &Self::Delta) {
        self.inner_layer.apply_gradient(gradient);
    }
 }
--- a/src/layer/mod.rs
+++ b/src/layer/mod.rs
@ -2,7 +2,7 @@ mod dense;
 pub use dense::NeuraDenseLayer;
 mod convolution;
-pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DPadLayer};
+pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DBlockLayer, NeuraConv2DPadLayer};
 mod dropout;
 pub use dropout::NeuraDropoutLayer;
@ -124,6 +124,14 @@ macro_rules! neura_layer {
        $crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _>
    };
    ( "conv2d_block", $feats:expr, $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
        $crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, $feats, $block_size, _>
    };
    ( "conv2d_block", $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
        $crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, _, $block_size, _>
    };
    ( "pool_global"; $reduce:expr ) => {
        $crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _>
    };
--- a/src/layer/pool.rs
+++ b/src/layer/pool.rs
@ -150,7 +150,7 @@ where
                    block_input[k] = input[block * BLOCK_LENGTH + k];
                }
-                let mut gradient = self.reducer.nabla(block_input);
+                let gradient = self.reducer.nabla(block_input);
                for k in 0..BLOCK_LENGTH {
                    column_gradient[block * BLOCK_LENGTH + k] = gradient[k] * epsilon[block][j];