diff --git a/examples/convolution.rs b/examples/convolution.rs index a19e739..e59d38f 100644 --- a/examples/convolution.rs +++ b/examples/convolution.rs @@ -2,7 +2,7 @@ #![feature(generic_const_exprs)] use neuramethyst::algebra::NeuraVector; -use neuramethyst::derivable::reduce::Average; +use neuramethyst::derivable::reduce::{Average, Max}; use rust_mnist::Mnist; use neuramethyst::derivable::activation::{Linear, Relu}; @@ -55,17 +55,22 @@ fn main() { let mut network = neura_sequential![ neura_layer!("unstable_reshape", 1, { 28 * 28 }), - neura_layer!("conv2d_pad", 1, {28 * 28}; 28, 3; neura_layer!("dense", {1 * 3 * 3}, 10; Relu)), - // neura_layer!("conv2d_pad", 28, 1; neura_layer!("dense", {30 * 1 * 1}, 10; Relu)), + neura_layer!("conv2d_pad", 1, {28 * 28}; 28, 3; neura_layer!("dense", {1 * 3 * 3}, 3; Relu)), + // neura_layer!("conv2d_block", 7, 7; 4; neura_layer!("dense", {3 * 4 * 4}, 8; Relu)), + // neura_layer!("conv2d_pad"; 28, 1; neura_layer!("dense", {30 * 1 * 1}, 10; Relu)), + neura_layer!("unstable_flatten"), + neura_layer!("dropout", 0.33), + neura_layer!("unstable_reshape", 3, { 28 * 28 }), + neura_layer!("conv2d_block", 14, 14; 2; neura_layer!("dense", {3 * 2 * 2}, 2; Relu)), + // neura_layer!("unstable_flatten"), + // neura_layer!("dropout", 0.33), + // neura_layer!("unstable_reshape", 2, { 14 * 14 }), + // neura_layer!("conv2d_pad"; 14, 5; neura_layer!("dense", {2 * 5 * 5}, 20; Relu)), + // neura_layer!("pool_global"; Max), - // neura_layer!("pool_global", 10, {28 * 28}; Average), - // neura_layer!("pool1d", 10, 28, 28; Average), - // neura_layer!("unstable_flatten", 10, 28), - neura_layer!("unstable_flatten", 10, { 28 * 28 }), - // neura_layer!("dense", 100; Relu), - // neura_layer!("dropout", 0.5), - // neura_layer!("dense", 30; Relu), - // neura_layer!("dropout", 0.5), + // neura_layer!("pool1d", {14 * 2}, 7; Max), + neura_layer!("unstable_flatten"), + neura_layer!("dropout", 0.2), neura_layer!("dense", 10; Linear), neura_layer!("softmax") ]; diff --git a/src/derivable/reduce.rs b/src/derivable/reduce.rs index c692147..1070806 100644 --- a/src/derivable/reduce.rs +++ b/src/derivable/reduce.rs @@ -1,3 +1,5 @@ +use crate::utils::{argmax, one_hot}; + use super::*; #[derive(Clone, Copy, Debug, PartialEq)] @@ -18,3 +20,25 @@ impl NeuraReducer for Average { NeuraVector::from_value(1.0 / inputs.len() as f64) } } + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Max; + +impl NeuraReducer for Max { + #[inline(always)] + fn eval(&self, inputs: NeuraVector) -> f64 { + let mut max = 0.0; + for &i in inputs.iter() { + max = i.max(max); + } + max + } + + #[inline(always)] + fn nabla( + &self, + inputs: NeuraVector, + ) -> NeuraVector { + one_hot(argmax(inputs.as_ref())) + } +} diff --git a/src/layer/convolution.rs b/src/layer/convolution.rs index 1e14905..c8fde6b 100644 --- a/src/layer/convolution.rs +++ b/src/layer/convolution.rs @@ -167,7 +167,7 @@ pub struct NeuraConv2DPadLayer< /// The width of the image, in grid units. /// /// **Class invariant:** `LAYER % width == 0`, `width > 0` - pub width: NonZeroUsize, + width: NonZeroUsize, } impl< @@ -192,6 +192,10 @@ impl< } } + pub fn width(&self) -> NonZeroUsize { + self.width + } + /// Iterates within the `(WINDOW, WINDOW)` window centered around `x, y`; /// Returns a 4-uple `(x' = x + δx, y' = y + δy, δy * WINDOW + δ, y' * width + x')`, with the last element /// being set to `None` if `x'` or `y'` are out of bound. @@ -338,3 +342,148 @@ where self.inner_layer.cleanup(); } } + +#[non_exhaustive] +#[derive(Clone, Debug)] +pub struct NeuraConv2DBlockLayer< + const WIDTH: usize, + const HEIGHT: usize, + const IN_FEATS: usize, + const BLOCK_SIZE: usize, + Layer: NeuraLayer>, +> { + pub inner_layer: Layer, +} + +impl< + const WIDTH: usize, + const HEIGHT: usize, + const IN_FEATS: usize, + const BLOCK_SIZE: usize, + Layer: NeuraLayer>, + > NeuraConv2DBlockLayer +{ + pub fn new(layer: Layer) -> Self { + Self { inner_layer: layer } + } + + fn iterate_blocks<'a>( + &'a self, + input: &'a NeuraMatrix, + ) -> impl Iterator< + Item = ( + usize, + usize, + usize, + NeuraVector<{ IN_FEATS * BLOCK_SIZE * BLOCK_SIZE }, f64>, + ), + > + 'a { + (0..HEIGHT).flat_map(move |y| { + (0..WIDTH).map(move |x| { + let output_index = y * WIDTH + x; + let mut virtual_input = NeuraVector::default(); + + for dy in 0..BLOCK_SIZE { + let y = y * BLOCK_SIZE + dy; + for dx in 0..BLOCK_SIZE { + let x = x * BLOCK_SIZE + dx; + let virtual_index = dy * BLOCK_SIZE + dx; + + for k in 0..IN_FEATS { + virtual_input[virtual_index * IN_FEATS + k] = input[y * WIDTH + x][k]; + } + } + } + + (x, y, output_index, virtual_input) + }) + }) + } +} + +impl< + const WIDTH: usize, + const HEIGHT: usize, + const IN_FEATS: usize, + const OUT_FEATS: usize, + const BLOCK_SIZE: usize, + Layer: NeuraLayer< + Input = NeuraVector<{ IN_FEATS * BLOCK_SIZE * BLOCK_SIZE }, f64>, + Output = NeuraVector, + >, + > NeuraLayer for NeuraConv2DBlockLayer +where + [f64; WIDTH * HEIGHT * BLOCK_SIZE * BLOCK_SIZE]: Sized, +{ + type Input = NeuraMatrix; + + type Output = NeuraMatrix; + + fn eval(&self, input: &Self::Input) -> Self::Output { + let mut res = Self::Output::default(); + + for (_, _, output_index, virtual_input) in self.iterate_blocks(input) { + res.set_row(output_index, self.inner_layer.eval(&virtual_input)); + } + + res + } +} + +impl< + const WIDTH: usize, + const HEIGHT: usize, + const IN_FEATS: usize, + const OUT_FEATS: usize, + const BLOCK_SIZE: usize, + Layer: NeuraLayer< + Input = NeuraVector<{ IN_FEATS * BLOCK_SIZE * BLOCK_SIZE }, f64>, + Output = NeuraVector, + >, + > NeuraTrainableLayer for NeuraConv2DBlockLayer +where + [f64; WIDTH * HEIGHT * BLOCK_SIZE * BLOCK_SIZE]: Sized, + Layer: NeuraTrainableLayer, +{ + type Delta = Layer::Delta; + + fn backpropagate( + &self, + input: &Self::Input, + epsilon: Self::Output, + ) -> (Self::Input, Self::Delta) { + let mut gradient_sum = Layer::Delta::zero(); + let mut next_epsilon = Self::Input::default(); + + for (x, y, output_index, virtual_input) in self.iterate_blocks(input) { + let (layer_next_epsilon, gradient) = self + .inner_layer + .backpropagate(&virtual_input, epsilon.get_row(output_index)); + + gradient_sum.add_assign(&gradient); + + for dy in 0..BLOCK_SIZE { + let y = y * BLOCK_SIZE + dy; + for dx in 0..BLOCK_SIZE { + let x = x * BLOCK_SIZE + dx; + let input_index = y * WIDTH + x; + + for k in 0..IN_FEATS { + next_epsilon[input_index][k] = + layer_next_epsilon[(dy * BLOCK_SIZE + dx) * IN_FEATS + k]; + } + } + } + } + + (next_epsilon, gradient_sum) + } + + fn regularize(&self) -> Self::Delta { + self.inner_layer.regularize() + } + + fn apply_gradient(&mut self, gradient: &Self::Delta) { + self.inner_layer.apply_gradient(gradient); + } +} diff --git a/src/layer/mod.rs b/src/layer/mod.rs index 2b08a33..c89d835 100644 --- a/src/layer/mod.rs +++ b/src/layer/mod.rs @@ -2,7 +2,7 @@ mod dense; pub use dense::NeuraDenseLayer; mod convolution; -pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DPadLayer}; +pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DBlockLayer, NeuraConv2DPadLayer}; mod dropout; pub use dropout::NeuraDropoutLayer; @@ -124,6 +124,14 @@ macro_rules! neura_layer { $crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _> }; + ( "conv2d_block", $feats:expr, $width:expr, $height:expr; $block_size:expr; $layer:expr ) => { + $crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, $feats, $block_size, _> + }; + + ( "conv2d_block", $width:expr, $height:expr; $block_size:expr; $layer:expr ) => { + $crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, _, $block_size, _> + }; + ( "pool_global"; $reduce:expr ) => { $crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _> }; diff --git a/src/layer/pool.rs b/src/layer/pool.rs index 22ad4ae..9c1f056 100644 --- a/src/layer/pool.rs +++ b/src/layer/pool.rs @@ -150,7 +150,7 @@ where block_input[k] = input[block * BLOCK_LENGTH + k]; } - let mut gradient = self.reducer.nabla(block_input); + let gradient = self.reducer.nabla(block_input); for k in 0..BLOCK_LENGTH { column_gradient[block * BLOCK_LENGTH + k] = gradient[k] * epsilon[block][j];