Block convolution, max pooling

main
Shad Amethyst 2 years ago
parent a6da11b125
commit cc7686569a

@ -2,7 +2,7 @@
#![feature(generic_const_exprs)] #![feature(generic_const_exprs)]
use neuramethyst::algebra::NeuraVector; use neuramethyst::algebra::NeuraVector;
use neuramethyst::derivable::reduce::Average; use neuramethyst::derivable::reduce::{Average, Max};
use rust_mnist::Mnist; use rust_mnist::Mnist;
use neuramethyst::derivable::activation::{Linear, Relu}; use neuramethyst::derivable::activation::{Linear, Relu};
@ -55,17 +55,22 @@ fn main() {
let mut network = neura_sequential![ let mut network = neura_sequential![
neura_layer!("unstable_reshape", 1, { 28 * 28 }), neura_layer!("unstable_reshape", 1, { 28 * 28 }),
neura_layer!("conv2d_pad", 1, {28 * 28}; 28, 3; neura_layer!("dense", {1 * 3 * 3}, 10; Relu)), neura_layer!("conv2d_pad", 1, {28 * 28}; 28, 3; neura_layer!("dense", {1 * 3 * 3}, 3; Relu)),
// neura_layer!("conv2d_pad", 28, 1; neura_layer!("dense", {30 * 1 * 1}, 10; Relu)), // neura_layer!("conv2d_block", 7, 7; 4; neura_layer!("dense", {3 * 4 * 4}, 8; Relu)),
// neura_layer!("conv2d_pad"; 28, 1; neura_layer!("dense", {30 * 1 * 1}, 10; Relu)),
neura_layer!("unstable_flatten"),
neura_layer!("dropout", 0.33),
neura_layer!("unstable_reshape", 3, { 28 * 28 }),
neura_layer!("conv2d_block", 14, 14; 2; neura_layer!("dense", {3 * 2 * 2}, 2; Relu)),
// neura_layer!("unstable_flatten"),
// neura_layer!("dropout", 0.33),
// neura_layer!("unstable_reshape", 2, { 14 * 14 }),
// neura_layer!("conv2d_pad"; 14, 5; neura_layer!("dense", {2 * 5 * 5}, 20; Relu)),
// neura_layer!("pool_global"; Max),
// neura_layer!("pool_global", 10, {28 * 28}; Average), // neura_layer!("pool1d", {14 * 2}, 7; Max),
// neura_layer!("pool1d", 10, 28, 28; Average), neura_layer!("unstable_flatten"),
// neura_layer!("unstable_flatten", 10, 28), neura_layer!("dropout", 0.2),
neura_layer!("unstable_flatten", 10, { 28 * 28 }),
// neura_layer!("dense", 100; Relu),
// neura_layer!("dropout", 0.5),
// neura_layer!("dense", 30; Relu),
// neura_layer!("dropout", 0.5),
neura_layer!("dense", 10; Linear), neura_layer!("dense", 10; Linear),
neura_layer!("softmax") neura_layer!("softmax")
]; ];

@ -1,3 +1,5 @@
use crate::utils::{argmax, one_hot};
use super::*; use super::*;
#[derive(Clone, Copy, Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]
@ -18,3 +20,25 @@ impl NeuraReducer<f64> for Average {
NeuraVector::from_value(1.0 / inputs.len() as f64) NeuraVector::from_value(1.0 / inputs.len() as f64)
} }
} }
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Max;
impl NeuraReducer<f64> for Max {
#[inline(always)]
fn eval<const LENGTH: usize>(&self, inputs: NeuraVector<LENGTH, f64>) -> f64 {
let mut max = 0.0;
for &i in inputs.iter() {
max = i.max(max);
}
max
}
#[inline(always)]
fn nabla<const LENGTH: usize>(
&self,
inputs: NeuraVector<LENGTH, f64>,
) -> NeuraVector<LENGTH, f64> {
one_hot(argmax(inputs.as_ref()))
}
}

@ -167,7 +167,7 @@ pub struct NeuraConv2DPadLayer<
/// The width of the image, in grid units. /// The width of the image, in grid units.
/// ///
/// **Class invariant:** `LAYER % width == 0`, `width > 0` /// **Class invariant:** `LAYER % width == 0`, `width > 0`
pub width: NonZeroUsize, width: NonZeroUsize,
} }
impl< impl<
@ -192,6 +192,10 @@ impl<
} }
} }
pub fn width(&self) -> NonZeroUsize {
self.width
}
/// Iterates within the `(WINDOW, WINDOW)` window centered around `x, y`; /// Iterates within the `(WINDOW, WINDOW)` window centered around `x, y`;
/// Returns a 4-uple `(x' = x + δx, y' = y + δy, δy * WINDOW + δ, y' * width + x')`, with the last element /// Returns a 4-uple `(x' = x + δx, y' = y + δy, δy * WINDOW + δ, y' * width + x')`, with the last element
/// being set to `None` if `x'` or `y'` are out of bound. /// being set to `None` if `x'` or `y'` are out of bound.
@ -338,3 +342,148 @@ where
self.inner_layer.cleanup(); self.inner_layer.cleanup();
} }
} }
#[non_exhaustive]
#[derive(Clone, Debug)]
pub struct NeuraConv2DBlockLayer<
const WIDTH: usize,
const HEIGHT: usize,
const IN_FEATS: usize,
const BLOCK_SIZE: usize,
Layer: NeuraLayer<Input = NeuraVector<{ IN_FEATS * BLOCK_SIZE * BLOCK_SIZE }, f64>>,
> {
pub inner_layer: Layer,
}
impl<
const WIDTH: usize,
const HEIGHT: usize,
const IN_FEATS: usize,
const BLOCK_SIZE: usize,
Layer: NeuraLayer<Input = NeuraVector<{ IN_FEATS * BLOCK_SIZE * BLOCK_SIZE }, f64>>,
> NeuraConv2DBlockLayer<WIDTH, HEIGHT, IN_FEATS, BLOCK_SIZE, Layer>
{
pub fn new(layer: Layer) -> Self {
Self { inner_layer: layer }
}
fn iterate_blocks<'a>(
&'a self,
input: &'a NeuraMatrix<IN_FEATS, { WIDTH * HEIGHT * BLOCK_SIZE * BLOCK_SIZE }, f64>,
) -> impl Iterator<
Item = (
usize,
usize,
usize,
NeuraVector<{ IN_FEATS * BLOCK_SIZE * BLOCK_SIZE }, f64>,
),
> + 'a {
(0..HEIGHT).flat_map(move |y| {
(0..WIDTH).map(move |x| {
let output_index = y * WIDTH + x;
let mut virtual_input = NeuraVector::default();
for dy in 0..BLOCK_SIZE {
let y = y * BLOCK_SIZE + dy;
for dx in 0..BLOCK_SIZE {
let x = x * BLOCK_SIZE + dx;
let virtual_index = dy * BLOCK_SIZE + dx;
for k in 0..IN_FEATS {
virtual_input[virtual_index * IN_FEATS + k] = input[y * WIDTH + x][k];
}
}
}
(x, y, output_index, virtual_input)
})
})
}
}
impl<
const WIDTH: usize,
const HEIGHT: usize,
const IN_FEATS: usize,
const OUT_FEATS: usize,
const BLOCK_SIZE: usize,
Layer: NeuraLayer<
Input = NeuraVector<{ IN_FEATS * BLOCK_SIZE * BLOCK_SIZE }, f64>,
Output = NeuraVector<OUT_FEATS, f64>,
>,
> NeuraLayer for NeuraConv2DBlockLayer<WIDTH, HEIGHT, IN_FEATS, BLOCK_SIZE, Layer>
where
[f64; WIDTH * HEIGHT * BLOCK_SIZE * BLOCK_SIZE]: Sized,
{
type Input = NeuraMatrix<IN_FEATS, { WIDTH * HEIGHT * BLOCK_SIZE * BLOCK_SIZE }, f64>;
type Output = NeuraMatrix<OUT_FEATS, { WIDTH * HEIGHT }, f64>;
fn eval(&self, input: &Self::Input) -> Self::Output {
let mut res = Self::Output::default();
for (_, _, output_index, virtual_input) in self.iterate_blocks(input) {
res.set_row(output_index, self.inner_layer.eval(&virtual_input));
}
res
}
}
impl<
const WIDTH: usize,
const HEIGHT: usize,
const IN_FEATS: usize,
const OUT_FEATS: usize,
const BLOCK_SIZE: usize,
Layer: NeuraLayer<
Input = NeuraVector<{ IN_FEATS * BLOCK_SIZE * BLOCK_SIZE }, f64>,
Output = NeuraVector<OUT_FEATS, f64>,
>,
> NeuraTrainableLayer for NeuraConv2DBlockLayer<WIDTH, HEIGHT, IN_FEATS, BLOCK_SIZE, Layer>
where
[f64; WIDTH * HEIGHT * BLOCK_SIZE * BLOCK_SIZE]: Sized,
Layer: NeuraTrainableLayer,
{
type Delta = Layer::Delta;
fn backpropagate(
&self,
input: &Self::Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta) {
let mut gradient_sum = Layer::Delta::zero();
let mut next_epsilon = Self::Input::default();
for (x, y, output_index, virtual_input) in self.iterate_blocks(input) {
let (layer_next_epsilon, gradient) = self
.inner_layer
.backpropagate(&virtual_input, epsilon.get_row(output_index));
gradient_sum.add_assign(&gradient);
for dy in 0..BLOCK_SIZE {
let y = y * BLOCK_SIZE + dy;
for dx in 0..BLOCK_SIZE {
let x = x * BLOCK_SIZE + dx;
let input_index = y * WIDTH + x;
for k in 0..IN_FEATS {
next_epsilon[input_index][k] =
layer_next_epsilon[(dy * BLOCK_SIZE + dx) * IN_FEATS + k];
}
}
}
}
(next_epsilon, gradient_sum)
}
fn regularize(&self) -> Self::Delta {
self.inner_layer.regularize()
}
fn apply_gradient(&mut self, gradient: &Self::Delta) {
self.inner_layer.apply_gradient(gradient);
}
}

@ -2,7 +2,7 @@ mod dense;
pub use dense::NeuraDenseLayer; pub use dense::NeuraDenseLayer;
mod convolution; mod convolution;
pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DPadLayer}; pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DBlockLayer, NeuraConv2DPadLayer};
mod dropout; mod dropout;
pub use dropout::NeuraDropoutLayer; pub use dropout::NeuraDropoutLayer;
@ -124,6 +124,14 @@ macro_rules! neura_layer {
$crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _> $crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _>
}; };
( "conv2d_block", $feats:expr, $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, $feats, $block_size, _>
};
( "conv2d_block", $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, _, $block_size, _>
};
( "pool_global"; $reduce:expr ) => { ( "pool_global"; $reduce:expr ) => {
$crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _> $crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _>
}; };

@ -150,7 +150,7 @@ where
block_input[k] = input[block * BLOCK_LENGTH + k]; block_input[k] = input[block * BLOCK_LENGTH + k];
} }
let mut gradient = self.reducer.nabla(block_input); let gradient = self.reducer.nabla(block_input);
for k in 0..BLOCK_LENGTH { for k in 0..BLOCK_LENGTH {
column_gradient[block * BLOCK_LENGTH + k] = gradient[k] * epsilon[block][j]; column_gradient[block * BLOCK_LENGTH + k] = gradient[k] * epsilon[block][j];

Loading…
Cancel
Save