Pooling layers

main
Shad Amethyst 2 years ago
parent d7eb6de34e
commit a6da11b125

@ -2,6 +2,7 @@
#![feature(generic_const_exprs)]
use neuramethyst::algebra::NeuraVector;
use neuramethyst::derivable::reduce::Average;
use rust_mnist::Mnist;
use neuramethyst::derivable::activation::{Linear, Relu};
@ -53,13 +54,18 @@ fn main() {
let test_inputs: Vec<_> = test_images.zip(test_labels.into_iter()).collect();
let mut network = neura_sequential![
neura_layer!("unstable_reshape", 28, 28),
neura_layer!("conv1d_pad", 3; neura_layer!("dense", {28 * 3}, 10; Relu)),
neura_layer!("unstable_flatten"),
neura_layer!("unstable_reshape", 1, { 28 * 28 }),
neura_layer!("conv2d_pad", 1, {28 * 28}; 28, 3; neura_layer!("dense", {1 * 3 * 3}, 10; Relu)),
// neura_layer!("conv2d_pad", 28, 1; neura_layer!("dense", {30 * 1 * 1}, 10; Relu)),
// neura_layer!("pool_global", 10, {28 * 28}; Average),
// neura_layer!("pool1d", 10, 28, 28; Average),
// neura_layer!("unstable_flatten", 10, 28),
neura_layer!("unstable_flatten", 10, { 28 * 28 }),
// neura_layer!("dense", 100; Relu),
// neura_layer!("dropout", 0.5),
neura_layer!("dense", 30; Relu),
neura_layer!("dropout", 0.5),
// neura_layer!("dense", 30; Relu),
// neura_layer!("dropout", 0.5),
neura_layer!("dense", 10; Linear),
neura_layer!("softmax")
];

@ -49,6 +49,52 @@ impl<const WIDTH: usize, const HEIGHT: usize, F> NeuraMatrix<WIDTH, HEIGHT, F> {
self.data[y][j] = row[j].clone();
}
}
#[inline]
pub fn set_column(&mut self, x: usize, column: impl Borrow<[F; HEIGHT]>)
where
F: Clone,
{
if x >= WIDTH {
panic!(
"Cannot set column {} of NeuraMatrix<{}, {}, _>: column index out of bound",
x, WIDTH, HEIGHT
);
}
let column = column.borrow();
for i in 0..HEIGHT {
self.data[i][x] = column[i].clone();
}
}
#[inline]
pub fn get_row(&self, y: usize) -> NeuraVector<WIDTH, F>
where
F: Default + Clone,
{
let mut row = NeuraVector::default();
for j in 0..WIDTH {
row[j] = self.data[y][j].clone();
}
row
}
#[inline]
pub fn get_column(&self, x: usize) -> NeuraVector<HEIGHT, F>
where
F: Default + Clone,
{
let mut row = NeuraVector::default();
for i in 0..HEIGHT {
row[i] = self.data[i][x].clone();
}
row
}
}
impl<const WIDTH: usize, const HEIGHT: usize, F: Float> NeuraMatrix<WIDTH, HEIGHT, F> {
@ -102,22 +148,21 @@ impl<const LENGTH: usize, F: Default + Clone> NeuraMatrix<LENGTH, LENGTH, F> {
}
}
impl<const WIDTH: usize, const HEIGHT: usize, F: Float + From<f64> + Into<f64>> NeuraVectorSpace
impl<const WIDTH: usize, const HEIGHT: usize, F: NeuraVectorSpace + Clone> NeuraVectorSpace
for NeuraMatrix<WIDTH, HEIGHT, F>
{
fn add_assign(&mut self, other: &Self) {
for i in 0..HEIGHT {
for j in 0..WIDTH {
self.data[i][j] = self.data[i][j] + other.data[i][j];
self.data[i][j].add_assign(&other.data[i][j]);
}
}
}
fn mul_assign(&mut self, by: f64) {
let by: F = by.into();
for i in 0..HEIGHT {
for j in 0..WIDTH {
self.data[i][j] = self.data[i][j] * by;
self.data[i][j].mul_assign(by);
}
}
}
@ -128,16 +173,15 @@ impl<const WIDTH: usize, const HEIGHT: usize, F: Float + From<f64> + Into<f64>>
}
fn norm_squared(&self) -> f64 {
let mut sum = F::zero();
let mut sum = 0.0;
for i in 0..HEIGHT {
for j in 0..WIDTH {
let x = self.data[i][j];
sum = sum + x * x;
sum += self.data[i][j].norm_squared();
}
}
sum.into()
sum
}
}

@ -68,9 +68,9 @@ impl<const LENGTH: usize, F: Float> NeuraVector<LENGTH, F> {
result
}
pub fn hadamard_product(&self, other: impl AsRef<[F; LENGTH]>) -> NeuraVector<LENGTH, F> {
pub fn hadamard_product(&self, other: impl Borrow<[F; LENGTH]>) -> NeuraVector<LENGTH, F> {
let mut result: NeuraVector<LENGTH, F> = NeuraVector::from_value(F::zero());
let other = other.as_ref();
let other = other.borrow();
for i in 0..LENGTH {
result[i] = self.data[i] * other[i];

@ -1,5 +1,8 @@
use crate::algebra::NeuraVector;
pub mod activation;
pub mod loss;
pub mod reduce;
pub mod regularize;
pub trait NeuraDerivable<F> {
@ -31,3 +34,10 @@ pub trait NeuraLoss {
/// ($\nabla_{\texttt{actual}} \texttt{self.eval}(\texttt{target}, \texttt{actual})$).
fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input;
}
pub trait NeuraReducer<F> {
fn eval<const LENGTH: usize>(&self, inputs: NeuraVector<LENGTH, F>) -> F;
/// Should return the gradient of the reducer at `inputs`, ie. `[∂eval(inputs)/∂inputsᵢ]ᵢ`
fn nabla<const LENGTH: usize>(&self, inputs: NeuraVector<LENGTH, F>) -> NeuraVector<LENGTH, F>;
}

@ -0,0 +1,20 @@
use super::*;
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Average;
impl NeuraReducer<f64> for Average {
#[inline(always)]
fn eval<const LENGTH: usize>(&self, inputs: NeuraVector<LENGTH, f64>) -> f64 {
let sum: f64 = inputs.iter().sum();
sum / inputs.len() as f64
}
#[inline(always)]
fn nabla<const LENGTH: usize>(
&self,
inputs: NeuraVector<LENGTH, f64>,
) -> NeuraVector<LENGTH, f64> {
NeuraVector::from_value(1.0 / inputs.len() as f64)
}
}

@ -1,17 +1,21 @@
use std::num::NonZeroUsize;
use crate::algebra::{NeuraMatrix, NeuraVector};
use super::*;
/// A 1-dimensional convolutional
/// A 1-dimensional convolutional layer, which operates on rows of the input,
/// and pads them with `self.pad_with`
#[non_exhaustive]
#[derive(Clone, Debug)]
pub struct NeuraConv1DPad<
pub struct NeuraConv1DPadLayer<
const LENGTH: usize,
const IN_FEATS: usize,
const WINDOW: usize,
Layer: NeuraLayer<Input = NeuraVector<{ IN_FEATS * WINDOW }, f64>>,
> {
inner_layer: Layer,
pad_with: NeuraVector<IN_FEATS, f64>,
pub inner_layer: Layer,
pub pad_with: NeuraVector<IN_FEATS, f64>,
}
impl<
@ -19,7 +23,7 @@ impl<
const IN_FEATS: usize,
const WINDOW: usize,
Layer: NeuraLayer<Input = NeuraVector<{ IN_FEATS * WINDOW }, f64>>,
> NeuraConv1DPad<LENGTH, IN_FEATS, WINDOW, Layer>
> NeuraConv1DPadLayer<LENGTH, IN_FEATS, WINDOW, Layer>
where
[u8; IN_FEATS * WINDOW]: Sized,
{
@ -64,7 +68,7 @@ impl<
Input = NeuraVector<{ IN_FEATS * WINDOW }, f64>,
Output = NeuraVector<OUT_FEATS, f64>,
>,
> NeuraLayer for NeuraConv1DPad<LENGTH, IN_FEATS, WINDOW, Layer>
> NeuraLayer for NeuraConv1DPadLayer<LENGTH, IN_FEATS, WINDOW, Layer>
{
type Input = NeuraMatrix<IN_FEATS, LENGTH, f64>;
type Output = NeuraMatrix<OUT_FEATS, LENGTH, f64>;
@ -89,7 +93,7 @@ impl<
Input = NeuraVector<{ IN_FEATS * WINDOW }, f64>,
Output = NeuraVector<OUT_FEATS, f64>,
>,
> NeuraTrainableLayer for NeuraConv1DPad<LENGTH, IN_FEATS, WINDOW, Layer>
> NeuraTrainableLayer for NeuraConv1DPadLayer<LENGTH, IN_FEATS, WINDOW, Layer>
where
Layer: NeuraTrainableLayer,
{
@ -120,7 +124,197 @@ where
let input_index = input_index as usize;
for j in 0..IN_FEATS {
next_epsilon[input_index][j] += layer_next_epsilon[i * WINDOW + j];
next_epsilon[input_index][j] += layer_next_epsilon[i * IN_FEATS + j];
}
}
}
(next_epsilon, weights_gradient_sum)
}
fn regularize(&self) -> Self::Delta {
self.inner_layer.regularize()
}
fn apply_gradient(&mut self, gradient: &Self::Delta) {
self.inner_layer.apply_gradient(gradient);
}
fn prepare_epoch(&mut self) {
self.inner_layer.prepare_epoch();
}
fn cleanup(&mut self) {
self.inner_layer.cleanup();
}
}
/// Applies and trains a 2d convolution on an image, which has been laid out flat
/// (instead of being a `(width, height, features)` tensor, it should be a `(width * height, features)` matrix).
///
/// The `width` value must be given to the layer, to allow it to interpret the input as a 2D image.
/// This function asserts that `width` divides `LENGTH`.
#[non_exhaustive]
#[derive(Clone, Debug)]
pub struct NeuraConv2DPadLayer<
const LENGTH: usize,
const IN_FEATS: usize,
const WINDOW: usize,
Layer: NeuraLayer<Input = NeuraVector<{ IN_FEATS * WINDOW * WINDOW }, f64>>,
> {
pub inner_layer: Layer,
pub pad_with: NeuraVector<IN_FEATS, f64>,
/// The width of the image, in grid units.
///
/// **Class invariant:** `LAYER % width == 0`, `width > 0`
pub width: NonZeroUsize,
}
impl<
const LENGTH: usize,
const IN_FEATS: usize,
const WINDOW: usize,
Layer: NeuraLayer<Input = NeuraVector<{ IN_FEATS * WINDOW * WINDOW }, f64>>,
> NeuraConv2DPadLayer<LENGTH, IN_FEATS, WINDOW, Layer>
{
pub fn new(inner_layer: Layer, pad_with: NeuraVector<IN_FEATS, f64>, width: usize) -> Self {
assert!(
LENGTH % width == 0,
"width ({}) does not divide LENGTH ({})",
width,
LENGTH
);
Self {
inner_layer,
pad_with,
width: width.try_into().expect("width cannot be zero!"),
}
}
/// Iterates within the `(WINDOW, WINDOW)` window centered around `x, y`;
/// Returns a 4-uple `(x' = x + δx, y' = y + δy, δy * WINDOW + δ, y' * width + x')`, with the last element
/// being set to `None` if `x'` or `y'` are out of bound.
fn iterate_within_window<'a>(
&'a self,
x: usize,
y: usize,
) -> impl Iterator<Item = (isize, isize, usize, Option<usize>)> + 'a {
let window_offset = (WINDOW as isize - 1) / 2;
let width = self.width.get() as isize;
let height = LENGTH as isize / width;
(0..WINDOW).flat_map(move |dy| {
let window_index = dy * WINDOW;
let dy = dy as isize - window_offset;
(0..WINDOW).map(move |dx| {
let window_index = window_index + dx;
let dx = dx as isize - window_offset;
let x = x as isize + dx;
let y = y as isize + dy;
if x < 0 || y < 0 || x >= width || y >= height {
(x, y, window_index, None)
} else {
(x, y, window_index, Some((y * width + x) as usize))
}
})
})
}
fn iterate_windows<'a>(
&'a self,
input: &'a NeuraMatrix<IN_FEATS, LENGTH, f64>,
) -> impl Iterator<Item = (usize, usize, Layer::Input)> + 'a {
(0..self.width.into()).flat_map(move |x| {
(0..(LENGTH / self.width)).map(move |y| {
// TODO: hint the compiler that x * y < LENGTH and LENGTH % width == 0?
let mut virtual_input: Layer::Input = NeuraVector::default();
for (_, _, window_index, input_index) in self.iterate_within_window(x, y) {
if let Some(input_index) = input_index {
let input_row: &[f64; IN_FEATS] = &input[input_index];
for j in 0..IN_FEATS {
virtual_input[window_index * IN_FEATS + j] = input_row[j];
}
} else {
for j in 0..IN_FEATS {
virtual_input[window_index * IN_FEATS + j] = self.pad_with[j];
}
}
}
(x, y, virtual_input)
})
})
}
}
impl<
const LENGTH: usize,
const IN_FEATS: usize,
const OUT_FEATS: usize,
const WINDOW: usize,
Layer: NeuraLayer<
Input = NeuraVector<{ IN_FEATS * WINDOW * WINDOW }, f64>,
Output = NeuraVector<OUT_FEATS, f64>,
>,
> NeuraLayer for NeuraConv2DPadLayer<LENGTH, IN_FEATS, WINDOW, Layer>
{
type Input = NeuraMatrix<IN_FEATS, LENGTH, f64>;
type Output = NeuraMatrix<OUT_FEATS, LENGTH, f64>;
fn eval(&self, input: &Self::Input) -> Self::Output {
let mut res = NeuraMatrix::default();
for (cx, cy, virtual_input) in self.iterate_windows(input) {
res.set_row(
cy * self.width.get() + cx,
self.inner_layer.eval(&virtual_input),
);
}
res
}
}
impl<
const LENGTH: usize,
const IN_FEATS: usize,
const OUT_FEATS: usize,
const WINDOW: usize,
Layer: NeuraLayer<
Input = NeuraVector<{ IN_FEATS * WINDOW * WINDOW }, f64>,
Output = NeuraVector<OUT_FEATS, f64>,
>,
> NeuraTrainableLayer for NeuraConv2DPadLayer<LENGTH, IN_FEATS, WINDOW, Layer>
where
Layer: NeuraTrainableLayer,
{
type Delta = <Layer as NeuraTrainableLayer>::Delta;
fn backpropagate(
&self,
input: &Self::Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta) {
let mut next_epsilon = Self::Input::default();
let mut weights_gradient_sum = Self::Delta::zero();
for (cx, cy, virtual_input) in self.iterate_windows(input) {
let epsilon = NeuraVector::from(&epsilon[cy * self.width.get() + cx]);
let (layer_next_epsilon, weights_gradient) =
self.inner_layer.backpropagate(&virtual_input, epsilon);
weights_gradient_sum.add_assign(&weights_gradient);
for (_, _, window_index, input_index) in self.iterate_within_window(cx, cy) {
if let Some(input_index) = input_index {
for j in 0..IN_FEATS {
next_epsilon[input_index][j] +=
layer_next_epsilon[window_index * IN_FEATS + j];
}
}
}
}
@ -135,4 +329,12 @@ where
fn apply_gradient(&mut self, gradient: &Self::Delta) {
self.inner_layer.apply_gradient(gradient);
}
fn prepare_epoch(&mut self) {
self.inner_layer.prepare_epoch();
}
fn cleanup(&mut self) {
self.inner_layer.cleanup();
}
}

@ -2,7 +2,7 @@ mod dense;
pub use dense::NeuraDenseLayer;
mod convolution;
pub use convolution::NeuraConv1DPad;
pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DPadLayer};
mod dropout;
pub use dropout::NeuraDropoutLayer;
@ -16,6 +16,9 @@ pub use one_hot::NeuraOneHotLayer;
mod lock;
pub use lock::NeuraLockLayer;
mod pool;
pub use pool::{NeuraGlobalPoolLayer, NeuraPool1DLayer};
mod reshape;
pub use reshape::{NeuraFlattenLayer, NeuraReshapeLayer};
@ -105,12 +108,40 @@ macro_rules! neura_layer {
$crate::layer::NeuraLockLayer($layer)
};
( "conv1d_pad", $length:expr, $feats:expr, $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv1DPad::new($layer, Default::default()) as $crate::layer::NeuraConv1DPad<$length, $feats, $window, _>
( "conv1d_pad", $length:expr, $feats:expr; $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<$length, $feats, $window, _>
};
( "conv1d_pad"; $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<_, _, $window, _>
};
( "conv2d_pad", $feats:expr, $length:expr; $width:expr, $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<$length, $feats, $window, _>
};
( "conv2d_pad"; $width:expr, $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _>
};
( "pool_global"; $reduce:expr ) => {
$crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _>
};
( "pool_global", $feats:expr, $length:expr; $reduce:expr ) => {
$crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<$length, $feats, _>
};
( "pool1d", $blocklength:expr; $reduce:expr ) => {
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<_, $blocklength, _, _>
};
( "pool1d", $blocks:expr, $blocklength:expr; $reduce:expr ) => {
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, _, _>
};
( "conv1d_pad", $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv1DPad::new($layer, Default::default()) as $crate::layer::NeuraConv1DPad<_, _, $window, _>
( "pool1d", $feats:expr, $blocks:expr, $blocklength:expr; $reduce:expr ) => {
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, $feats, _>
};
( "unstable_flatten" ) => {

@ -0,0 +1,173 @@
use crate::{
algebra::{NeuraMatrix, NeuraVector},
derivable::NeuraReducer,
};
use super::*;
pub struct NeuraGlobalPoolLayer<const LENGTH: usize, const FEATS: usize, Reducer: NeuraReducer<f64>>
{
reducer: Reducer,
}
impl<const LENGTH: usize, const FEATS: usize, Reducer: NeuraReducer<f64>>
NeuraGlobalPoolLayer<LENGTH, FEATS, Reducer>
{
pub fn new(reducer: Reducer) -> Self {
Self { reducer }
}
}
impl<const LENGTH: usize, const FEATS: usize, Reducer: NeuraReducer<f64>> NeuraLayer
for NeuraGlobalPoolLayer<LENGTH, FEATS, Reducer>
{
type Input = NeuraMatrix<FEATS, LENGTH, f64>;
type Output = NeuraVector<FEATS, f64>;
fn eval(&self, input: &Self::Input) -> Self::Output {
let mut res = Self::Output::default();
for j in 0..FEATS {
let input = input.get_column(j);
res[j] = self.reducer.eval(input);
}
res
}
}
impl<const LENGTH: usize, const FEATS: usize, Reducer: NeuraReducer<f64>> NeuraTrainableLayer
for NeuraGlobalPoolLayer<LENGTH, FEATS, Reducer>
{
type Delta = ();
#[inline]
fn backpropagate(
&self,
input: &Self::Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta) {
let mut next_epsilon = Self::Input::default();
for j in 0..FEATS {
let input = input.get_column(j);
let mut gradient = self.reducer.nabla(input);
gradient.mul_assign(epsilon[j]);
next_epsilon.set_column(j, gradient);
}
(next_epsilon, ())
}
#[inline(always)]
fn regularize(&self) -> Self::Delta {
()
}
#[inline(always)]
fn apply_gradient(&mut self, _gradient: &Self::Delta) {
// Noop
}
}
pub struct NeuraPool1DLayer<
const BLOCKS: usize,
const BLOCK_LENGTH: usize,
const FEATS: usize,
Reducer: NeuraReducer<f64>,
> {
reducer: Reducer,
}
impl<
const BLOCKS: usize,
const BLOCK_LENGTH: usize,
const FEATS: usize,
Reducer: NeuraReducer<f64>,
> NeuraPool1DLayer<BLOCKS, BLOCK_LENGTH, FEATS, Reducer>
{
pub fn new(reducer: Reducer) -> Self {
Self { reducer }
}
}
impl<
const BLOCKS: usize,
const BLOCK_LENGTH: usize,
const FEATS: usize,
Reducer: NeuraReducer<f64>,
> NeuraLayer for NeuraPool1DLayer<BLOCKS, BLOCK_LENGTH, FEATS, Reducer>
where
[f64; BLOCKS * BLOCK_LENGTH]: Sized,
{
type Input = NeuraMatrix<FEATS, { BLOCKS * BLOCK_LENGTH }, f64>;
type Output = NeuraMatrix<FEATS, BLOCKS, f64>;
fn eval(&self, input: &Self::Input) -> Self::Output {
let mut result = NeuraMatrix::default();
for j in 0..FEATS {
let input = input.get_column(j);
for block in 0..BLOCKS {
let mut block_input: NeuraVector<BLOCK_LENGTH, f64> = NeuraVector::default();
for k in 0..BLOCK_LENGTH {
block_input[k] = input[block * BLOCK_LENGTH + k];
}
result[block][j] = self.reducer.eval(block_input);
}
}
result
}
}
impl<
const BLOCKS: usize,
const BLOCK_LENGTH: usize,
const FEATS: usize,
Reducer: NeuraReducer<f64>,
> NeuraTrainableLayer for NeuraPool1DLayer<BLOCKS, BLOCK_LENGTH, FEATS, Reducer>
where
[f64; BLOCKS * BLOCK_LENGTH]: Sized,
{
type Delta = ();
fn backpropagate(
&self,
input: &Self::Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta) {
let mut next_epsilon = Self::Input::default();
for j in 0..FEATS {
let input = input.get_column(j);
let mut column_gradient = NeuraVector::default();
for block in 0..BLOCKS {
let mut block_input: NeuraVector<BLOCK_LENGTH, f64> = NeuraVector::default();
for k in 0..BLOCK_LENGTH {
block_input[k] = input[block * BLOCK_LENGTH + k];
}
let mut gradient = self.reducer.nabla(block_input);
for k in 0..BLOCK_LENGTH {
column_gradient[block * BLOCK_LENGTH + k] = gradient[k] * epsilon[block][j];
}
}
next_epsilon.set_column(j, column_gradient);
}
(next_epsilon, ())
}
fn regularize(&self) -> Self::Delta {
()
}
fn apply_gradient(&mut self, _gradient: &Self::Delta) {
// Noop
}
}
Loading…
Cancel
Save