From a6da11b1255a2365a1a916a5aa4469f14525d242 Mon Sep 17 00:00:00 2001 From: Adrien Burgun Date: Mon, 17 Apr 2023 18:46:16 +0200 Subject: [PATCH] :sparkles: Pooling layers --- examples/convolution.rs | 16 ++- src/algebra/matrix.rs | 60 +++++++++-- src/algebra/vector.rs | 4 +- src/derivable/mod.rs | 10 ++ src/derivable/reduce.rs | 20 ++++ src/layer/convolution.rs | 218 +++++++++++++++++++++++++++++++++++++-- src/layer/mod.rs | 41 +++++++- src/layer/pool.rs | 173 +++++++++++++++++++++++++++++++ 8 files changed, 514 insertions(+), 28 deletions(-) create mode 100644 src/derivable/reduce.rs create mode 100644 src/layer/pool.rs diff --git a/examples/convolution.rs b/examples/convolution.rs index f0a4cd3..a19e739 100644 --- a/examples/convolution.rs +++ b/examples/convolution.rs @@ -2,6 +2,7 @@ #![feature(generic_const_exprs)] use neuramethyst::algebra::NeuraVector; +use neuramethyst::derivable::reduce::Average; use rust_mnist::Mnist; use neuramethyst::derivable::activation::{Linear, Relu}; @@ -53,13 +54,18 @@ fn main() { let test_inputs: Vec<_> = test_images.zip(test_labels.into_iter()).collect(); let mut network = neura_sequential![ - neura_layer!("unstable_reshape", 28, 28), - neura_layer!("conv1d_pad", 3; neura_layer!("dense", {28 * 3}, 10; Relu)), - neura_layer!("unstable_flatten"), + neura_layer!("unstable_reshape", 1, { 28 * 28 }), + neura_layer!("conv2d_pad", 1, {28 * 28}; 28, 3; neura_layer!("dense", {1 * 3 * 3}, 10; Relu)), + // neura_layer!("conv2d_pad", 28, 1; neura_layer!("dense", {30 * 1 * 1}, 10; Relu)), + + // neura_layer!("pool_global", 10, {28 * 28}; Average), + // neura_layer!("pool1d", 10, 28, 28; Average), + // neura_layer!("unstable_flatten", 10, 28), + neura_layer!("unstable_flatten", 10, { 28 * 28 }), // neura_layer!("dense", 100; Relu), // neura_layer!("dropout", 0.5), - neura_layer!("dense", 30; Relu), - neura_layer!("dropout", 0.5), + // neura_layer!("dense", 30; Relu), + // neura_layer!("dropout", 0.5), neura_layer!("dense", 10; Linear), neura_layer!("softmax") ]; diff --git a/src/algebra/matrix.rs b/src/algebra/matrix.rs index a393e1b..484b0a1 100644 --- a/src/algebra/matrix.rs +++ b/src/algebra/matrix.rs @@ -49,6 +49,52 @@ impl NeuraMatrix { self.data[y][j] = row[j].clone(); } } + + #[inline] + pub fn set_column(&mut self, x: usize, column: impl Borrow<[F; HEIGHT]>) + where + F: Clone, + { + if x >= WIDTH { + panic!( + "Cannot set column {} of NeuraMatrix<{}, {}, _>: column index out of bound", + x, WIDTH, HEIGHT + ); + } + + let column = column.borrow(); + for i in 0..HEIGHT { + self.data[i][x] = column[i].clone(); + } + } + + #[inline] + pub fn get_row(&self, y: usize) -> NeuraVector + where + F: Default + Clone, + { + let mut row = NeuraVector::default(); + + for j in 0..WIDTH { + row[j] = self.data[y][j].clone(); + } + + row + } + + #[inline] + pub fn get_column(&self, x: usize) -> NeuraVector + where + F: Default + Clone, + { + let mut row = NeuraVector::default(); + + for i in 0..HEIGHT { + row[i] = self.data[i][x].clone(); + } + + row + } } impl NeuraMatrix { @@ -102,22 +148,21 @@ impl NeuraMatrix { } } -impl + Into> NeuraVectorSpace +impl NeuraVectorSpace for NeuraMatrix { fn add_assign(&mut self, other: &Self) { for i in 0..HEIGHT { for j in 0..WIDTH { - self.data[i][j] = self.data[i][j] + other.data[i][j]; + self.data[i][j].add_assign(&other.data[i][j]); } } } fn mul_assign(&mut self, by: f64) { - let by: F = by.into(); for i in 0..HEIGHT { for j in 0..WIDTH { - self.data[i][j] = self.data[i][j] * by; + self.data[i][j].mul_assign(by); } } } @@ -128,16 +173,15 @@ impl + Into> } fn norm_squared(&self) -> f64 { - let mut sum = F::zero(); + let mut sum = 0.0; for i in 0..HEIGHT { for j in 0..WIDTH { - let x = self.data[i][j]; - sum = sum + x * x; + sum += self.data[i][j].norm_squared(); } } - sum.into() + sum } } diff --git a/src/algebra/vector.rs b/src/algebra/vector.rs index f5e637e..7042431 100644 --- a/src/algebra/vector.rs +++ b/src/algebra/vector.rs @@ -68,9 +68,9 @@ impl NeuraVector { result } - pub fn hadamard_product(&self, other: impl AsRef<[F; LENGTH]>) -> NeuraVector { + pub fn hadamard_product(&self, other: impl Borrow<[F; LENGTH]>) -> NeuraVector { let mut result: NeuraVector = NeuraVector::from_value(F::zero()); - let other = other.as_ref(); + let other = other.borrow(); for i in 0..LENGTH { result[i] = self.data[i] * other[i]; diff --git a/src/derivable/mod.rs b/src/derivable/mod.rs index 57587e7..94a7a84 100644 --- a/src/derivable/mod.rs +++ b/src/derivable/mod.rs @@ -1,5 +1,8 @@ +use crate::algebra::NeuraVector; + pub mod activation; pub mod loss; +pub mod reduce; pub mod regularize; pub trait NeuraDerivable { @@ -31,3 +34,10 @@ pub trait NeuraLoss { /// ($\nabla_{\texttt{actual}} \texttt{self.eval}(\texttt{target}, \texttt{actual})$). fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input; } + +pub trait NeuraReducer { + fn eval(&self, inputs: NeuraVector) -> F; + + /// Should return the gradient of the reducer at `inputs`, ie. `[∂eval(inputs)/∂inputsᵢ]ᵢ` + fn nabla(&self, inputs: NeuraVector) -> NeuraVector; +} diff --git a/src/derivable/reduce.rs b/src/derivable/reduce.rs new file mode 100644 index 0000000..c692147 --- /dev/null +++ b/src/derivable/reduce.rs @@ -0,0 +1,20 @@ +use super::*; + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Average; + +impl NeuraReducer for Average { + #[inline(always)] + fn eval(&self, inputs: NeuraVector) -> f64 { + let sum: f64 = inputs.iter().sum(); + sum / inputs.len() as f64 + } + + #[inline(always)] + fn nabla( + &self, + inputs: NeuraVector, + ) -> NeuraVector { + NeuraVector::from_value(1.0 / inputs.len() as f64) + } +} diff --git a/src/layer/convolution.rs b/src/layer/convolution.rs index 3158e86..1e14905 100644 --- a/src/layer/convolution.rs +++ b/src/layer/convolution.rs @@ -1,17 +1,21 @@ +use std::num::NonZeroUsize; + use crate::algebra::{NeuraMatrix, NeuraVector}; use super::*; -/// A 1-dimensional convolutional +/// A 1-dimensional convolutional layer, which operates on rows of the input, +/// and pads them with `self.pad_with` +#[non_exhaustive] #[derive(Clone, Debug)] -pub struct NeuraConv1DPad< +pub struct NeuraConv1DPadLayer< const LENGTH: usize, const IN_FEATS: usize, const WINDOW: usize, Layer: NeuraLayer>, > { - inner_layer: Layer, - pad_with: NeuraVector, + pub inner_layer: Layer, + pub pad_with: NeuraVector, } impl< @@ -19,7 +23,7 @@ impl< const IN_FEATS: usize, const WINDOW: usize, Layer: NeuraLayer>, - > NeuraConv1DPad + > NeuraConv1DPadLayer where [u8; IN_FEATS * WINDOW]: Sized, { @@ -64,7 +68,7 @@ impl< Input = NeuraVector<{ IN_FEATS * WINDOW }, f64>, Output = NeuraVector, >, - > NeuraLayer for NeuraConv1DPad + > NeuraLayer for NeuraConv1DPadLayer { type Input = NeuraMatrix; type Output = NeuraMatrix; @@ -89,7 +93,7 @@ impl< Input = NeuraVector<{ IN_FEATS * WINDOW }, f64>, Output = NeuraVector, >, - > NeuraTrainableLayer for NeuraConv1DPad + > NeuraTrainableLayer for NeuraConv1DPadLayer where Layer: NeuraTrainableLayer, { @@ -120,7 +124,7 @@ where let input_index = input_index as usize; for j in 0..IN_FEATS { - next_epsilon[input_index][j] += layer_next_epsilon[i * WINDOW + j]; + next_epsilon[input_index][j] += layer_next_epsilon[i * IN_FEATS + j]; } } } @@ -135,4 +139,202 @@ where fn apply_gradient(&mut self, gradient: &Self::Delta) { self.inner_layer.apply_gradient(gradient); } + + fn prepare_epoch(&mut self) { + self.inner_layer.prepare_epoch(); + } + + fn cleanup(&mut self) { + self.inner_layer.cleanup(); + } +} + +/// Applies and trains a 2d convolution on an image, which has been laid out flat +/// (instead of being a `(width, height, features)` tensor, it should be a `(width * height, features)` matrix). +/// +/// The `width` value must be given to the layer, to allow it to interpret the input as a 2D image. +/// This function asserts that `width` divides `LENGTH`. +#[non_exhaustive] +#[derive(Clone, Debug)] +pub struct NeuraConv2DPadLayer< + const LENGTH: usize, + const IN_FEATS: usize, + const WINDOW: usize, + Layer: NeuraLayer>, +> { + pub inner_layer: Layer, + pub pad_with: NeuraVector, + /// The width of the image, in grid units. + /// + /// **Class invariant:** `LAYER % width == 0`, `width > 0` + pub width: NonZeroUsize, +} + +impl< + const LENGTH: usize, + const IN_FEATS: usize, + const WINDOW: usize, + Layer: NeuraLayer>, + > NeuraConv2DPadLayer +{ + pub fn new(inner_layer: Layer, pad_with: NeuraVector, width: usize) -> Self { + assert!( + LENGTH % width == 0, + "width ({}) does not divide LENGTH ({})", + width, + LENGTH + ); + + Self { + inner_layer, + pad_with, + width: width.try_into().expect("width cannot be zero!"), + } + } + + /// Iterates within the `(WINDOW, WINDOW)` window centered around `x, y`; + /// Returns a 4-uple `(x' = x + δx, y' = y + δy, δy * WINDOW + δ, y' * width + x')`, with the last element + /// being set to `None` if `x'` or `y'` are out of bound. + fn iterate_within_window<'a>( + &'a self, + x: usize, + y: usize, + ) -> impl Iterator)> + 'a { + let window_offset = (WINDOW as isize - 1) / 2; + let width = self.width.get() as isize; + let height = LENGTH as isize / width; + + (0..WINDOW).flat_map(move |dy| { + let window_index = dy * WINDOW; + let dy = dy as isize - window_offset; + (0..WINDOW).map(move |dx| { + let window_index = window_index + dx; + let dx = dx as isize - window_offset; + + let x = x as isize + dx; + let y = y as isize + dy; + + if x < 0 || y < 0 || x >= width || y >= height { + (x, y, window_index, None) + } else { + (x, y, window_index, Some((y * width + x) as usize)) + } + }) + }) + } + + fn iterate_windows<'a>( + &'a self, + input: &'a NeuraMatrix, + ) -> impl Iterator + 'a { + (0..self.width.into()).flat_map(move |x| { + (0..(LENGTH / self.width)).map(move |y| { + // TODO: hint the compiler that x * y < LENGTH and LENGTH % width == 0? + let mut virtual_input: Layer::Input = NeuraVector::default(); + + for (_, _, window_index, input_index) in self.iterate_within_window(x, y) { + if let Some(input_index) = input_index { + let input_row: &[f64; IN_FEATS] = &input[input_index]; + for j in 0..IN_FEATS { + virtual_input[window_index * IN_FEATS + j] = input_row[j]; + } + } else { + for j in 0..IN_FEATS { + virtual_input[window_index * IN_FEATS + j] = self.pad_with[j]; + } + } + } + + (x, y, virtual_input) + }) + }) + } +} + +impl< + const LENGTH: usize, + const IN_FEATS: usize, + const OUT_FEATS: usize, + const WINDOW: usize, + Layer: NeuraLayer< + Input = NeuraVector<{ IN_FEATS * WINDOW * WINDOW }, f64>, + Output = NeuraVector, + >, + > NeuraLayer for NeuraConv2DPadLayer +{ + type Input = NeuraMatrix; + type Output = NeuraMatrix; + + fn eval(&self, input: &Self::Input) -> Self::Output { + let mut res = NeuraMatrix::default(); + + for (cx, cy, virtual_input) in self.iterate_windows(input) { + res.set_row( + cy * self.width.get() + cx, + self.inner_layer.eval(&virtual_input), + ); + } + + res + } +} + +impl< + const LENGTH: usize, + const IN_FEATS: usize, + const OUT_FEATS: usize, + const WINDOW: usize, + Layer: NeuraLayer< + Input = NeuraVector<{ IN_FEATS * WINDOW * WINDOW }, f64>, + Output = NeuraVector, + >, + > NeuraTrainableLayer for NeuraConv2DPadLayer +where + Layer: NeuraTrainableLayer, +{ + type Delta = ::Delta; + + fn backpropagate( + &self, + input: &Self::Input, + epsilon: Self::Output, + ) -> (Self::Input, Self::Delta) { + let mut next_epsilon = Self::Input::default(); + let mut weights_gradient_sum = Self::Delta::zero(); + + for (cx, cy, virtual_input) in self.iterate_windows(input) { + let epsilon = NeuraVector::from(&epsilon[cy * self.width.get() + cx]); + + let (layer_next_epsilon, weights_gradient) = + self.inner_layer.backpropagate(&virtual_input, epsilon); + weights_gradient_sum.add_assign(&weights_gradient); + + for (_, _, window_index, input_index) in self.iterate_within_window(cx, cy) { + if let Some(input_index) = input_index { + for j in 0..IN_FEATS { + next_epsilon[input_index][j] += + layer_next_epsilon[window_index * IN_FEATS + j]; + } + } + } + } + + (next_epsilon, weights_gradient_sum) + } + + fn regularize(&self) -> Self::Delta { + self.inner_layer.regularize() + } + + fn apply_gradient(&mut self, gradient: &Self::Delta) { + self.inner_layer.apply_gradient(gradient); + } + + fn prepare_epoch(&mut self) { + self.inner_layer.prepare_epoch(); + } + + fn cleanup(&mut self) { + self.inner_layer.cleanup(); + } } diff --git a/src/layer/mod.rs b/src/layer/mod.rs index 6f10007..2b08a33 100644 --- a/src/layer/mod.rs +++ b/src/layer/mod.rs @@ -2,7 +2,7 @@ mod dense; pub use dense::NeuraDenseLayer; mod convolution; -pub use convolution::NeuraConv1DPad; +pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DPadLayer}; mod dropout; pub use dropout::NeuraDropoutLayer; @@ -16,6 +16,9 @@ pub use one_hot::NeuraOneHotLayer; mod lock; pub use lock::NeuraLockLayer; +mod pool; +pub use pool::{NeuraGlobalPoolLayer, NeuraPool1DLayer}; + mod reshape; pub use reshape::{NeuraFlattenLayer, NeuraReshapeLayer}; @@ -105,12 +108,40 @@ macro_rules! neura_layer { $crate::layer::NeuraLockLayer($layer) }; - ( "conv1d_pad", $length:expr, $feats:expr, $window:expr; $layer:expr ) => { - $crate::layer::NeuraConv1DPad::new($layer, Default::default()) as $crate::layer::NeuraConv1DPad<$length, $feats, $window, _> + ( "conv1d_pad", $length:expr, $feats:expr; $window:expr; $layer:expr ) => { + $crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<$length, $feats, $window, _> + }; + + ( "conv1d_pad"; $window:expr; $layer:expr ) => { + $crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<_, _, $window, _> + }; + + ( "conv2d_pad", $feats:expr, $length:expr; $width:expr, $window:expr; $layer:expr ) => { + $crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<$length, $feats, $window, _> + }; + + ( "conv2d_pad"; $width:expr, $window:expr; $layer:expr ) => { + $crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _> + }; + + ( "pool_global"; $reduce:expr ) => { + $crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _> + }; + + ( "pool_global", $feats:expr, $length:expr; $reduce:expr ) => { + $crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<$length, $feats, _> + }; + + ( "pool1d", $blocklength:expr; $reduce:expr ) => { + $crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<_, $blocklength, _, _> + }; + + ( "pool1d", $blocks:expr, $blocklength:expr; $reduce:expr ) => { + $crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, _, _> }; - ( "conv1d_pad", $window:expr; $layer:expr ) => { - $crate::layer::NeuraConv1DPad::new($layer, Default::default()) as $crate::layer::NeuraConv1DPad<_, _, $window, _> + ( "pool1d", $feats:expr, $blocks:expr, $blocklength:expr; $reduce:expr ) => { + $crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, $feats, _> }; ( "unstable_flatten" ) => { diff --git a/src/layer/pool.rs b/src/layer/pool.rs new file mode 100644 index 0000000..22ad4ae --- /dev/null +++ b/src/layer/pool.rs @@ -0,0 +1,173 @@ +use crate::{ + algebra::{NeuraMatrix, NeuraVector}, + derivable::NeuraReducer, +}; + +use super::*; + +pub struct NeuraGlobalPoolLayer> +{ + reducer: Reducer, +} + +impl> + NeuraGlobalPoolLayer +{ + pub fn new(reducer: Reducer) -> Self { + Self { reducer } + } +} + +impl> NeuraLayer + for NeuraGlobalPoolLayer +{ + type Input = NeuraMatrix; + + type Output = NeuraVector; + + fn eval(&self, input: &Self::Input) -> Self::Output { + let mut res = Self::Output::default(); + + for j in 0..FEATS { + let input = input.get_column(j); + res[j] = self.reducer.eval(input); + } + + res + } +} + +impl> NeuraTrainableLayer + for NeuraGlobalPoolLayer +{ + type Delta = (); + + #[inline] + fn backpropagate( + &self, + input: &Self::Input, + epsilon: Self::Output, + ) -> (Self::Input, Self::Delta) { + let mut next_epsilon = Self::Input::default(); + + for j in 0..FEATS { + let input = input.get_column(j); + let mut gradient = self.reducer.nabla(input); + gradient.mul_assign(epsilon[j]); + next_epsilon.set_column(j, gradient); + } + + (next_epsilon, ()) + } + + #[inline(always)] + fn regularize(&self) -> Self::Delta { + () + } + + #[inline(always)] + fn apply_gradient(&mut self, _gradient: &Self::Delta) { + // Noop + } +} + +pub struct NeuraPool1DLayer< + const BLOCKS: usize, + const BLOCK_LENGTH: usize, + const FEATS: usize, + Reducer: NeuraReducer, +> { + reducer: Reducer, +} + +impl< + const BLOCKS: usize, + const BLOCK_LENGTH: usize, + const FEATS: usize, + Reducer: NeuraReducer, + > NeuraPool1DLayer +{ + pub fn new(reducer: Reducer) -> Self { + Self { reducer } + } +} + +impl< + const BLOCKS: usize, + const BLOCK_LENGTH: usize, + const FEATS: usize, + Reducer: NeuraReducer, + > NeuraLayer for NeuraPool1DLayer +where + [f64; BLOCKS * BLOCK_LENGTH]: Sized, +{ + type Input = NeuraMatrix; + type Output = NeuraMatrix; + + fn eval(&self, input: &Self::Input) -> Self::Output { + let mut result = NeuraMatrix::default(); + + for j in 0..FEATS { + let input = input.get_column(j); + for block in 0..BLOCKS { + let mut block_input: NeuraVector = NeuraVector::default(); + for k in 0..BLOCK_LENGTH { + block_input[k] = input[block * BLOCK_LENGTH + k]; + } + result[block][j] = self.reducer.eval(block_input); + } + } + + result + } +} + +impl< + const BLOCKS: usize, + const BLOCK_LENGTH: usize, + const FEATS: usize, + Reducer: NeuraReducer, + > NeuraTrainableLayer for NeuraPool1DLayer +where + [f64; BLOCKS * BLOCK_LENGTH]: Sized, +{ + type Delta = (); + + fn backpropagate( + &self, + input: &Self::Input, + epsilon: Self::Output, + ) -> (Self::Input, Self::Delta) { + let mut next_epsilon = Self::Input::default(); + + for j in 0..FEATS { + let input = input.get_column(j); + let mut column_gradient = NeuraVector::default(); + + for block in 0..BLOCKS { + let mut block_input: NeuraVector = NeuraVector::default(); + for k in 0..BLOCK_LENGTH { + block_input[k] = input[block * BLOCK_LENGTH + k]; + } + + let mut gradient = self.reducer.nabla(block_input); + + for k in 0..BLOCK_LENGTH { + column_gradient[block * BLOCK_LENGTH + k] = gradient[k] * epsilon[block][j]; + } + } + + next_epsilon.set_column(j, column_gradient); + } + + (next_epsilon, ()) + } + + fn regularize(&self) -> Self::Delta { + () + } + + fn apply_gradient(&mut self, _gradient: &Self::Delta) { + // Noop + } +}