🔥 🚚 ♻️ Refactoring the previous layer system

It was becoming almost impossible to manage the dimensions of the layers,
especially with convolution layers. Generic consts are nice, but they are a bit too early
to have right now for this use-case. We'll probably be expanding the implementations to accept
const or dynamically-sized layers at some point, for performance-critical applications.
main
Shad Amethyst 2 years ago
parent cc7686569a
commit 2edbff860c

@ -7,6 +7,7 @@ edition = "2021"
[dependencies]
boxed-array = "0.1.0"
nalgebra = { version = "^0.32", features = ["std", "macros", "rand"] }
ndarray = "^0.15"
num = "^0.4"
# num-traits = "0.2.15"

@ -1,22 +1,24 @@
#![feature(generic_arg_infer)]
use neuramethyst::algebra::NeuraVector;
use nalgebra::dvector;
use neuramethyst::derivable::activation::Relu;
use neuramethyst::derivable::loss::Euclidean;
use neuramethyst::{cycle_shuffling, prelude::*};
use neuramethyst::prelude::*;
use neuramethyst::cycle_shuffling;
fn main() {
let mut network = neura_sequential![
neura_layer!("dense", 2, 4; Relu),
neura_layer!("dense", 3; Relu),
neura_layer!("dense", 1; Relu)
];
neura_layer!("dense", 4, Relu),
neura_layer!("dense", 3, Relu),
neura_layer!("dense", 1, Relu)
].construct(NeuraShape::Vector(2)).unwrap();
let inputs: [(NeuraVector<2, f64>, NeuraVector<1, f64>); 4] = [
([0.0, 0.0].into(), [0.0].into()),
([0.0, 1.0].into(), [1.0].into()),
([1.0, 0.0].into(), [1.0].into()),
([1.0, 1.0].into(), [0.0].into()),
let inputs = [
(dvector![0.0, 0.0], dvector![0.0]),
(dvector![0.0, 1.0], dvector![1.0]),
(dvector![1.0, 0.0], dvector![1.0]),
(dvector![1.0, 1.0], dvector![0.0]),
];
for (input, target) in &inputs {

@ -167,10 +167,10 @@ impl<const WIDTH: usize, const HEIGHT: usize, F: NeuraVectorSpace + Clone> Neura
}
}
#[inline(always)]
fn zero() -> Self {
Self::from_value(F::zero())
}
// #[inline(always)]
// fn zero() -> Self {
// Self::from_value(F::zero())
// }
fn norm_squared(&self) -> f64 {
let mut sum = 0.0;

@ -2,6 +2,8 @@ mod matrix;
pub use matrix::NeuraMatrix;
mod vector;
use nalgebra::Matrix;
use num::Float;
pub use vector::NeuraVector;
/// An extension of `std::ops::AddAssign` and `std::ops::Default`
@ -10,7 +12,7 @@ pub trait NeuraVectorSpace {
fn mul_assign(&mut self, by: f64);
fn zero() -> Self;
// fn zero() -> Self;
fn norm_squared(&self) -> f64;
}
@ -26,10 +28,10 @@ impl NeuraVectorSpace for () {
// Noop
}
#[inline(always)]
fn zero() -> Self {
()
}
// #[inline(always)]
// fn zero() -> Self {
// ()
// }
fn norm_squared(&self) -> f64 {
0.0
@ -45,9 +47,9 @@ impl<T: NeuraVectorSpace> NeuraVectorSpace for Box<T> {
self.as_mut().mul_assign(by);
}
fn zero() -> Self {
Box::new(T::zero())
}
// fn zero() -> Self {
// Box::new(T::zero())
// }
fn norm_squared(&self) -> f64 {
self.as_ref().norm_squared()
@ -65,9 +67,9 @@ impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left
NeuraVectorSpace::mul_assign(&mut self.1, by);
}
fn zero() -> Self {
(Left::zero(), Right::zero())
}
// fn zero() -> Self {
// (Left::zero(), Right::zero())
// }
fn norm_squared(&self) -> f64 {
self.0.norm_squared() + self.1.norm_squared()
@ -87,21 +89,40 @@ impl<const N: usize, T: NeuraVectorSpace + Clone> NeuraVectorSpace for [T; N] {
}
}
fn zero() -> Self {
let mut res: Vec<T> = Vec::with_capacity(N);
// fn zero() -> Self {
// let mut res: Vec<T> = Vec::with_capacity(N);
for _ in 0..N {
res.push(T::zero());
// for _ in 0..N {
// res.push(T::zero());
// }
// res.try_into().unwrap_or_else(|_| {
// // TODO: check that this panic is optimized away
// unreachable!()
// })
// }
fn norm_squared(&self) -> f64 {
self.iter().map(T::norm_squared).sum()
}
}
res.try_into().unwrap_or_else(|_| {
// TODO: check that this panic is optimized away
unreachable!()
})
impl<F: Float, R: nalgebra::Dim, C: nalgebra::Dim, S: nalgebra::RawStorage<F, R, C>> NeuraVectorSpace for Matrix<F, R, C, S>
where
Matrix<F, R, C, S>: std::ops::MulAssign<F>,
for<'c> Matrix<F, R, C, S>: std::ops::AddAssign<&'c Matrix<F, R, C, S>>,
F: From<f64> + Into<f64>
{
fn add_assign(&mut self, other: &Self) {
*self += other;
}
fn mul_assign(&mut self, by: f64) {
*self *= <F as From<f64>>::from(by);
}
fn norm_squared(&self) -> f64 {
self.iter().map(T::norm_squared).sum()
self.iter().map(|x| *x * *x).reduce(|sum, curr| sum + curr).unwrap_or(F::zero()).into()
}
}
@ -116,9 +137,9 @@ macro_rules! base {
std::ops::MulAssign::mul_assign(self, other as $type);
}
fn zero() -> Self {
<Self as Default>::default()
}
// fn zero() -> Self {
// <Self as Default>::default()
// }
fn norm_squared(&self) -> f64 {
(self * self) as f64

@ -95,10 +95,10 @@ impl<const LENGTH: usize, F: Float + From<f64> + Into<f64>> NeuraVectorSpace
}
}
#[inline(always)]
fn zero() -> Self {
Self::from_value(F::zero())
}
// #[inline(always)]
// fn zero() -> Self {
// Self::from_value(F::zero())
// }
fn norm_squared(&self) -> f64 {
let mut sum = F::zero();

@ -1,19 +1,22 @@
use nalgebra::DVector;
use crate::algebra::NeuraVector;
use super::NeuraLoss;
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Euclidean<const N: usize>;
pub struct Euclidean;
impl<const N: usize> NeuraLoss for Euclidean<N> {
type Input = NeuraVector<N, f64>;
type Target = NeuraVector<N, f64>;
impl NeuraLoss for Euclidean {
type Input = DVector<f64>;
type Target = DVector<f64>;
#[inline]
fn eval(&self, target: &NeuraVector<N, f64>, actual: &NeuraVector<N, f64>) -> f64 {
fn eval(&self, target: &DVector<f64>, actual: &DVector<f64>) -> f64 {
assert_eq!(target.shape(), actual.shape());
let mut sum_squared = 0.0;
for i in 0..N {
for i in 0..target.len() {
sum_squared += (target[i] - actual[i]) * (target[i] - actual[i]);
}
@ -23,13 +26,13 @@ impl<const N: usize> NeuraLoss for Euclidean<N> {
#[inline]
fn nabla(
&self,
target: &NeuraVector<N, f64>,
actual: &NeuraVector<N, f64>,
) -> NeuraVector<N, f64> {
let mut res = NeuraVector::default();
target: &DVector<f64>,
actual: &DVector<f64>,
) -> DVector<f64> {
let mut res = DVector::zeros(target.len());
// ∂E(y)/∂yᵢ = yᵢ - yᵢ'
for i in 0..N {
for i in 0..target.len() {
res[i] = actual[i] - target[i];
}

@ -1,38 +1,49 @@
use super::{NeuraLayer, NeuraTrainableLayer};
use crate::{
algebra::{NeuraMatrix, NeuraVector, NeuraVectorSpace},
derivable::NeuraDerivable,
};
use std::marker::PhantomData;
use nalgebra::{DMatrix, DVector};
use num::Float;
use rand::Rng;
use rand_distr::Distribution;
use crate::derivable::NeuraDerivable;
use super::*;
#[derive(Clone, Debug)]
pub struct NeuraDenseLayer<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
pub struct NeuraDenseLayer<F: Float, Act: NeuraDerivable<F>, Reg: NeuraDerivable<F>> {
weights: DMatrix<F>,
bias: DVector<F>,
activation: Act,
regularization: Reg,
}
#[derive(Clone, Debug)]
pub struct NeuraDenseLayerPartial<
F: Float,
Act: NeuraDerivable<F>,
Reg: NeuraDerivable<F>,
R: Rng,
> {
weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
bias: NeuraVector<OUTPUT_LEN, f64>,
activation: Act,
regularization: Reg,
output_size: usize,
rng: R,
phantom: PhantomData<F>,
}
impl<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
F: Float + From<f64> + std::fmt::Debug + 'static,
Act: NeuraDerivable<F>,
Reg: NeuraDerivable<F>,
> NeuraDenseLayer<F, Act, Reg>
{
pub fn new(
weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
bias: NeuraVector<OUTPUT_LEN, f64>,
weights: DMatrix<F>,
bias: DVector<F>,
activation: Act,
regularization: Reg,
) -> Self {
assert_eq!(bias.shape().0, weights.shape().0);
Self {
weights,
bias,
@ -41,85 +52,129 @@ impl<
}
}
pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self {
let mut weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64> = NeuraMatrix::from_value(0.0f64);
// Use Xavier (or He) initialisation, using the harmonic mean
// Ref: https://www.deeplearning.ai/ai-notes/initialization/index.html
pub fn from_rng(
input_size: usize,
output_size: usize,
rng: &mut impl Rng,
activation: Act,
regularization: Reg,
) -> Self
where
rand_distr::StandardNormal: rand_distr::Distribution<F>,
{
let distribution = rand_distr::Normal::new(
0.0,
activation.variance_hint() * 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64),
F::zero(),
<F as From<f64>>::from(
activation.variance_hint() * 2.0 / (input_size as f64 + output_size as f64),
),
)
.unwrap();
// let distribution = rand_distr::Uniform::new(-0.5, 0.5);
for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN {
weights[i][j] = distribution.sample(rng);
Self {
weights: DMatrix::from_distribution(output_size, input_size, &distribution, rng),
bias: DVector::from_element(
output_size,
<F as From<f64>>::from(activation.bias_hint()),
),
activation,
regularization,
}
}
Self {
weights,
// Biases are initialized based on the activation's hint
bias: NeuraVector::from_value(activation.bias_hint()),
pub fn new_partial<R: Rng>(
output_size: usize,
rng: R,
activation: Act,
regularization: Reg,
) -> NeuraDenseLayerPartial<F, Act, Reg, R> {
NeuraDenseLayerPartial {
activation,
regularization,
output_size,
rng,
phantom: PhantomData,
}
}
}
impl<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> NeuraLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
F: Float + From<f64> + std::fmt::Debug + 'static,
Act: NeuraDerivable<F>,
Reg: NeuraDerivable<F>,
R: Rng,
> NeuraPartialLayer for NeuraDenseLayerPartial<F, Act, Reg, R>
where
rand_distr::StandardNormal: rand_distr::Distribution<F>,
{
type Input = NeuraVector<INPUT_LEN, f64>;
type Output = NeuraVector<OUTPUT_LEN, f64>;
type Constructed = NeuraDenseLayer<F, Act, Reg>;
type Err = ();
fn eval(&self, input: &Self::Input) -> Self::Output {
let mut result = self.weights.multiply_vector(input);
fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
let mut rng = self.rng;
Ok(NeuraDenseLayer::from_rng(
input_shape.size(),
self.output_size,
&mut rng,
self.activation,
self.regularization,
))
}
for i in 0..OUTPUT_LEN {
result[i] = self.activation.eval(result[i] + self.bias[i]);
fn output_shape(constructed: &Self::Constructed) -> NeuraShape {
NeuraShape::Vector(constructed.weights.shape().0)
}
}
result
impl<
F: Float + From<f64> + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
Act: NeuraDerivable<F>,
Reg: NeuraDerivable<F>,
> NeuraLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
{
type Output = DVector<F>;
fn eval(&self, input: &DVector<F>) -> Self::Output {
assert_eq!(input.shape().0, self.weights.shape().1);
let res = &self.weights * input + &self.bias;
res.map(|x| self.activation.eval(x))
}
}
impl<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> NeuraTrainableLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
F: Float + From<f64> + Into<f64> + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
Act: NeuraDerivable<F>,
Reg: NeuraDerivable<F>,
> NeuraTrainableLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
{
type Delta = (
NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
NeuraVector<OUTPUT_LEN, f64>,
);
type Gradient = (DMatrix<F>, DVector<F>);
fn backpropagate(
fn default_gradient(&self) -> Self::Gradient {
(
DMatrix::zeros(self.weights.shape().0, self.weights.shape().1),
DVector::zeros(self.bias.shape().0),
)
}
fn backprop_layer(
&self,
input: &Self::Input,
input: &DVector<F>,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta) {
let evaluated = self.weights.multiply_vector(input);
) -> (DVector<F>, Self::Gradient) {
let evaluated = &self.weights * input;
// Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron),
// with `self.activation'(input) ° epsilon = delta`
let mut delta: NeuraVector<OUTPUT_LEN, f64> = epsilon.clone();
for i in 0..OUTPUT_LEN {
let mut delta = epsilon.clone();
for i in 0..delta.len() {
delta[i] *= self.activation.derivate(evaluated[i]);
}
// Compute the weight gradient
let weights_gradient = delta.reverse_dot(input);
let weights_gradient = &delta * input.transpose();
let new_epsilon = self.weights.transpose_multiply_vector(&delta);
let new_epsilon = self.weights.tr_mul(&delta);
// According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
// The gradient of the bias is equal to the delta term of the backpropagation algorithm
@ -128,53 +183,12 @@ impl<
(new_epsilon, (weights_gradient, bias_gradient))
}
fn apply_gradient(&mut self, gradient: &Self::Delta) {
NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0);
NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1);
}
fn regularize(&self) -> Self::Delta {
let mut res = Self::Delta::default();
for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN {
res.0[i][j] = self.regularization.derivate(self.weights[i][j]);
fn regularize_layer(&self) -> Self::Gradient {
(self.weights.map(|x| self.regularization.derivate(x)), DVector::zeros(self.bias.shape().0))
}
}
// Note: biases aren't taken into account here, as per https://stats.stackexchange.com/questions/153605/no-regularisation-term-for-bias-unit-in-neural-network
res
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::{
derivable::{activation::Relu, regularize::NeuraL0},
utils::uniform_vector,
};
#[test]
fn test_from_rng() {
let mut rng = rand::thread_rng();
let layer: NeuraDenseLayer<_, _, 64, 32> =
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0);
let mut input = [0.0; 64];
for x in 0..64 {
input[x] = rng.gen();
}
assert!(layer.eval(&input.into()).len() == 32);
}
#[test]
fn test_stack_overflow_big_layer() {
let layer = NeuraDenseLayer::from_rng(&mut rand::thread_rng(), Relu, NeuraL0)
as NeuraDenseLayer<Relu, NeuraL0, 1000, 1000>;
layer.backpropagate(&uniform_vector(), uniform_vector());
<NeuraDenseLayer<Relu, NeuraL0, 1000, 1000> as NeuraTrainableLayer>::Delta::zero();
fn apply_gradient(&mut self, gradient: &Self::Gradient) {
self.weights += &gradient.0;
self.bias += &gradient.1;
}
}

@ -1,39 +1,55 @@
mod dense;
pub use dense::NeuraDenseLayer;
use num::Float;
mod convolution;
pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DBlockLayer, NeuraConv2DPadLayer};
use crate::algebra::NeuraVectorSpace;
mod dropout;
pub use dropout::NeuraDropoutLayer;
pub mod dense;
pub use dense::NeuraDenseLayer;
mod softmax;
pub use softmax::NeuraSoftmaxLayer;
#[derive(Clone, Copy, PartialEq, Debug)]
pub enum NeuraShape {
Vector(usize), // entries
Matrix(usize, usize), // rows, columns
Tensor(usize, usize, usize), // rows, columns, channels
}
mod one_hot;
pub use one_hot::NeuraOneHotLayer;
impl NeuraShape {
pub fn size(&self) -> usize {
match self {
NeuraShape::Vector(entries) => *entries,
NeuraShape::Matrix(rows, columns) => rows * columns,
NeuraShape::Tensor(rows, columns, channels) => rows * columns * channels
}
}
}
pub trait NeuraLayer<Input> {
type Output;
mod lock;
pub use lock::NeuraLockLayer;
fn eval(&self, input: &Input) -> Self::Output;
}
mod pool;
pub use pool::{NeuraGlobalPoolLayer, NeuraPool1DLayer};
impl<Input: Clone> NeuraLayer<Input> for () {
type Output = Input;
mod reshape;
pub use reshape::{NeuraFlattenLayer, NeuraReshapeLayer};
fn eval(&self, input: &Input) -> Self::Output {
input.clone()
}
}
use crate::algebra::NeuraVectorSpace;
pub trait NeuraPartialLayer {
type Constructed;
type Err;
pub trait NeuraLayer {
type Input;
type Output;
fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err>;
fn eval(&self, input: &Self::Input) -> Self::Output;
fn output_shape(constructed: &Self::Constructed) -> NeuraShape;
}
pub trait NeuraTrainableLayer: NeuraLayer {
pub trait NeuraTrainableLayer<Input>: NeuraLayer<Input> {
/// The representation of the layer gradient as a vector space
type Delta: NeuraVectorSpace;
type Gradient: NeuraVectorSpace;
fn default_gradient(&self) -> Self::Gradient;
/// Computes the backpropagation term and the derivative of the internal weights,
/// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
@ -46,125 +62,28 @@ pub trait NeuraTrainableLayer: NeuraLayer {
/// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
/// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`.
/// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers.
fn backpropagate(
fn backprop_layer(
&self,
input: &Self::Input,
input: &Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta);
) -> (Input, Self::Gradient);
/// Computes the regularization
fn regularize(&self) -> Self::Delta;
fn regularize_layer(&self) -> Self::Gradient;
/// Applies `δW_l` to the weights of the layer
fn apply_gradient(&mut self, gradient: &Self::Delta);
/// Called before an iteration begins, to allow the layer to set itself up for training.
#[inline(always)]
fn prepare_epoch(&mut self) {}
fn apply_gradient(&mut self, gradient: &Self::Gradient);
/// Called at the end of training, to allow the layer to clean itself up
/// Arbitrary computation that can be executed at the start of an epoch
#[allow(unused_variables)]
#[inline(always)]
fn cleanup(&mut self) {}
fn prepare_layer(&mut self, is_training: bool) {}
}
/// Temporary implementation of neura_layer
#[macro_export]
macro_rules! neura_layer {
( "dense", $( $shape:expr ),*; $activation:expr ) => {
$crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0)
as neura_layer!("_dense_shape", $($shape),*)
};
( "dense", $( $shape:expr ),*; $activation:expr, $regularization:expr ) => {
$crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $regularization)
as neura_layer!("_dense_shape", $($shape),*)
};
( "_dense_shape", $output:expr ) => {
$crate::layer::NeuraDenseLayer<_, _, _, $output>
};
( "_dense_shape", $input:expr, $output:expr ) => {
$crate::layer::NeuraDenseLayer<_, _, $input, $output>
};
( "dropout", $probability:expr ) => {
$crate::layer::NeuraDropoutLayer::new($probability, rand::thread_rng())
as $crate::layer::NeuraDropoutLayer<_, _>
};
( "softmax" ) => {
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<_>
};
( "softmax", $length:expr ) => {
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<$length>
};
( "one_hot" ) => {
$crate::layer::NeuraOneHotLayer as $crate::layer::NeuraOneHotLayer<2, _>
};
( "lock", $layer:expr ) => {
$crate::layer::NeuraLockLayer($layer)
};
( "conv1d_pad", $length:expr, $feats:expr; $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<$length, $feats, $window, _>
};
( "conv1d_pad"; $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<_, _, $window, _>
};
( "conv2d_pad", $feats:expr, $length:expr; $width:expr, $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<$length, $feats, $window, _>
};
( "conv2d_pad"; $width:expr, $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _>
};
( "conv2d_block", $feats:expr, $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, $feats, $block_size, _>
};
( "conv2d_block", $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, _, $block_size, _>
};
( "pool_global"; $reduce:expr ) => {
$crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _>
};
( "pool_global", $feats:expr, $length:expr; $reduce:expr ) => {
$crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<$length, $feats, _>
};
( "pool1d", $blocklength:expr; $reduce:expr ) => {
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<_, $blocklength, _, _>
};
( "pool1d", $blocks:expr, $blocklength:expr; $reduce:expr ) => {
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, _, _>
};
( "pool1d", $feats:expr, $blocks:expr, $blocklength:expr; $reduce:expr ) => {
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, $feats, _>
};
( "unstable_flatten" ) => {
$crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<_, _, f64>
};
( "unstable_flatten", $width:expr, $height:expr ) => {
$crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<$width, $height, f64>
};
( "unstable_reshape", $height:expr ) => {
$crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<_, $height, f64>
};
( "unstable_reshape", $width:expr, $height:expr ) => {
$crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<$width, $height, f64>
};
( "dense", $output:expr, $activation:expr ) => {
$crate::layer::dense::NeuraDenseLayer::new_partial($output, rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0)
}
}

@ -1,12 +1,15 @@
#![feature(generic_arg_infer)]
#![feature(generic_const_exprs)]
#![feature(negative_impls)]
pub mod algebra;
pub mod derivable;
pub mod layer;
// pub mod layer;
pub mod network;
pub mod train;
pub mod layer;
mod utils;
// TODO: move to a different file
@ -17,7 +20,7 @@ pub mod prelude {
pub use crate::{neura_layer, neura_sequential};
// Structs and traits
pub use crate::layer::{NeuraDenseLayer, NeuraDropoutLayer, NeuraLayer};
pub use crate::network::sequential::{NeuraSequential, NeuraSequentialTail};
pub use crate::layer::*;
pub use crate::network::sequential::{NeuraSequential, NeuraSequentialTail, NeuraSequentialBuild};
pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer};
}

@ -2,25 +2,24 @@ use crate::{algebra::NeuraVectorSpace, derivable::NeuraLoss, layer::NeuraLayer};
pub mod sequential;
pub trait NeuraTrainableNetwork: NeuraLayer {
pub trait NeuraTrainableNetwork<Input>: NeuraLayer<Input> {
type Delta: NeuraVectorSpace;
fn default_gradient(&self) -> Self::Delta;
fn apply_gradient(&mut self, gradient: &Self::Delta);
/// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information.
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
&self,
input: &Self::Input,
input: &Input,
target: &Loss::Target,
loss: Loss,
) -> (Self::Input, Self::Delta);
) -> (Input, Self::Delta);
/// Should return the regularization gradient
fn regularize(&self) -> Self::Delta;
/// Called before an iteration begins, to allow the network to set itself up for training.
fn prepare_epoch(&mut self);
/// Called at the end of training, to allow the network to clean itself up
fn cleanup(&mut self);
/// Called before an iteration begins, to allow the network to set itself up for training or not.
fn prepare(&mut self, train_iteration: bool);
}

@ -1,12 +1,14 @@
use num::Float;
use crate::{
derivable::NeuraLoss,
layer::{NeuraLayer, NeuraTrainableLayer},
layer::{NeuraLayer, NeuraTrainableLayer, NeuraShape, NeuraPartialLayer},
};
use super::NeuraTrainableNetwork;
#[derive(Clone, Debug)]
pub struct NeuraSequential<Layer: NeuraLayer, ChildNetwork> {
pub struct NeuraSequential<Layer, ChildNetwork> {
pub layer: Layer,
pub child_network: Box<ChildNetwork>,
}
@ -14,13 +16,13 @@ pub struct NeuraSequential<Layer: NeuraLayer, ChildNetwork> {
/// Operations on the tail end of a sequential network
pub trait NeuraSequentialTail {
type TailTrimmed;
type TailPushed<T: NeuraLayer>;
type TailPushed<T>;
fn trim_tail(self) -> Self::TailTrimmed;
fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T>;
fn push_tail<T>(self, layer: T) -> Self::TailPushed<T>;
}
impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
impl<Layer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
pub fn new(layer: Layer, child_network: ChildNetwork) -> Self {
Self {
layer,
@ -28,9 +30,10 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
}
}
pub fn new_match_output(layer: Layer, child_network: ChildNetwork) -> Self
pub fn new_match_output<Input>(layer: Layer, child_network: ChildNetwork) -> Self
where
ChildNetwork: NeuraLayer<Input = Layer::Output>,
Layer: NeuraLayer<Input>,
ChildNetwork: NeuraLayer<Layer::Output>,
{
Self::new(layer, child_network)
}
@ -39,7 +42,10 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
*self.child_network
}
pub fn push_front<T: NeuraLayer>(self, layer: T) -> NeuraSequential<T, Self> {
pub fn push_front<Input, Input2, T: NeuraLayer<Input2, Output=Input>>(self, layer: T) -> NeuraSequential<T, Self>
where
Layer: NeuraLayer<Input>
{
NeuraSequential {
layer: layer,
child_network: Box::new(self),
@ -48,15 +54,15 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
}
// Trimming the last layer returns an empty network
impl<Layer: NeuraLayer> NeuraSequentialTail for NeuraSequential<Layer, ()> {
impl<Layer> NeuraSequentialTail for NeuraSequential<Layer, ()> {
type TailTrimmed = ();
type TailPushed<T: NeuraLayer> = NeuraSequential<Layer, NeuraSequential<T, ()>>;
type TailPushed<T> = NeuraSequential<Layer, NeuraSequential<T, ()>>;
fn trim_tail(self) -> Self::TailTrimmed {
()
}
fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T> {
fn push_tail<T>(self, layer: T) -> Self::TailPushed<T> {
NeuraSequential {
layer: self.layer,
child_network: Box::new(NeuraSequential {
@ -68,11 +74,11 @@ impl<Layer: NeuraLayer> NeuraSequentialTail for NeuraSequential<Layer, ()> {
}
// Trimming another layer returns a network which calls trim recursively
impl<Layer: NeuraLayer, ChildNetwork: NeuraSequentialTail> NeuraSequentialTail
impl<Layer, ChildNetwork: NeuraSequentialTail> NeuraSequentialTail
for NeuraSequential<Layer, ChildNetwork>
{
type TailTrimmed = NeuraSequential<Layer, <ChildNetwork as NeuraSequentialTail>::TailTrimmed>;
type TailPushed<T: NeuraLayer> =
type TailPushed<T> =
NeuraSequential<Layer, <ChildNetwork as NeuraSequentialTail>::TailPushed<T>>;
fn trim_tail(self) -> Self::TailTrimmed {
@ -82,7 +88,7 @@ impl<Layer: NeuraLayer, ChildNetwork: NeuraSequentialTail> NeuraSequentialTail
}
}
fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T> {
fn push_tail<T>(self, layer: T) -> Self::TailPushed<T> {
NeuraSequential {
layer: self.layer,
child_network: Box::new(self.child_network.push_tail(layer)),
@ -90,62 +96,55 @@ impl<Layer: NeuraLayer, ChildNetwork: NeuraSequentialTail> NeuraSequentialTail
}
}
impl<Layer: NeuraLayer> NeuraLayer for NeuraSequential<Layer, ()> {
type Input = Layer::Input;
type Output = Layer::Output;
fn eval(&self, input: &Self::Input) -> Self::Output {
self.layer.eval(input)
}
}
impl<Layer: NeuraLayer, ChildNetwork: NeuraLayer<Input = Layer::Output>> NeuraLayer
impl<Input, Layer: NeuraLayer<Input>, ChildNetwork: NeuraLayer<Layer::Output>> NeuraLayer<Input>
for NeuraSequential<Layer, ChildNetwork>
{
type Input = Layer::Input;
type Output = ChildNetwork::Output;
fn eval(&self, input: &Self::Input) -> Self::Output {
fn eval(&self, input: &Input) -> Self::Output {
self.child_network.eval(&self.layer.eval(input))
}
}
impl<Layer: NeuraTrainableLayer> NeuraTrainableNetwork for NeuraSequential<Layer, ()> {
type Delta = Layer::Delta;
impl<Input: Clone> NeuraTrainableNetwork<Input> for () {
type Delta = ();
fn apply_gradient(&mut self, gradient: &Self::Delta) {
self.layer.apply_gradient(gradient);
fn default_gradient(&self) -> () {
()
}
fn apply_gradient(&mut self, _gradient: &()) {
// Noop
}
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
&self,
input: &Self::Input,
final_activation: &Input,
target: &Loss::Target,
loss: Loss,
) -> (Self::Input, Self::Delta) {
let final_activation = self.layer.eval(input);
) -> (Input, Self::Delta) {
let backprop_epsilon = loss.nabla(target, &final_activation);
self.layer.backpropagate(&input, backprop_epsilon)
}
fn regularize(&self) -> Self::Delta {
self.layer.regularize()
(backprop_epsilon, ())
}
fn prepare_epoch(&mut self) {
self.layer.prepare_epoch();
fn regularize(&self) -> () {
()
}
fn cleanup(&mut self) {
self.layer.cleanup();
fn prepare(&mut self, _is_training: bool) {
// Noop
}
}
impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainableNetwork<Input = Layer::Output>>
NeuraTrainableNetwork for NeuraSequential<Layer, ChildNetwork>
impl<Input, Layer: NeuraTrainableLayer<Input>, ChildNetwork: NeuraTrainableNetwork<Layer::Output>>
NeuraTrainableNetwork<Input> for NeuraSequential<Layer, ChildNetwork>
{
type Delta = (Layer::Delta, Box<ChildNetwork::Delta>);
type Delta = (Layer::Gradient, Box<ChildNetwork::Delta>);
fn default_gradient(&self) -> Self::Delta {
(self.layer.default_gradient(), Box::new(self.child_network.default_gradient()))
}
fn apply_gradient(&mut self, gradient: &Self::Delta) {
self.layer.apply_gradient(&gradient.0);
@ -154,16 +153,16 @@ impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainableNetwork<Input = Lay
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
&self,
input: &Self::Input,
input: &Input,
target: &Loss::Target,
loss: Loss,
) -> (Self::Input, Self::Delta) {
) -> (Input, Self::Delta) {
let next_activation = self.layer.eval(input);
let (backprop_gradient, weights_gradient) =
self.child_network
.backpropagate(&next_activation, target, loss);
let (backprop_gradient, layer_gradient) =
self.layer.backpropagate(input, backprop_gradient);
self.layer.backprop_layer(input, backprop_gradient);
(
backprop_gradient,
@ -173,23 +172,18 @@ impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainableNetwork<Input = Lay
fn regularize(&self) -> Self::Delta {
(
self.layer.regularize(),
self.layer.regularize_layer(),
Box::new(self.child_network.regularize()),
)
}
fn prepare_epoch(&mut self) {
self.layer.prepare_epoch();
self.child_network.prepare_epoch();
}
fn cleanup(&mut self) {
self.layer.cleanup();
self.child_network.cleanup();
fn prepare(&mut self, is_training: bool) {
self.layer.prepare_layer(is_training);
self.child_network.prepare(is_training);
}
}
impl<Layer: NeuraLayer> From<Layer> for NeuraSequential<Layer, ()> {
impl<Layer> From<Layer> for NeuraSequential<Layer, ()> {
fn from(layer: Layer) -> Self {
Self {
layer,
@ -198,6 +192,53 @@ impl<Layer: NeuraLayer> From<Layer> for NeuraSequential<Layer, ()> {
}
}
pub trait NeuraSequentialBuild {
type Constructed;
type Err;
fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err>;
}
#[derive(Debug, Clone)]
pub enum NeuraSequentialBuildErr<Err, ChildErr> {
Current(Err),
Child(ChildErr),
}
impl<Layer: NeuraPartialLayer> NeuraSequentialBuild for NeuraSequential<Layer, ()> {
type Constructed = NeuraSequential<Layer::Constructed, ()>;
type Err = Layer::Err;
fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
Ok(NeuraSequential {
layer: self.layer.construct(input_shape)?,
child_network: Box::new(())
})
}
}
impl<Layer: NeuraPartialLayer + , ChildNetwork: NeuraSequentialBuild> NeuraSequentialBuild for NeuraSequential<Layer, ChildNetwork> {
type Constructed = NeuraSequential<Layer::Constructed, ChildNetwork::Constructed>;
type Err = NeuraSequentialBuildErr<Layer::Err, ChildNetwork::Err>;
fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
let layer = self.layer.construct(input_shape).map_err(|e| NeuraSequentialBuildErr::Current(e))?;
// TODO: ensure that this operation (and all recursive operations) are directly allocated on the heap
let child_network = self.child_network
.construct(Layer::output_shape(&layer))
.map_err(|e| NeuraSequentialBuildErr::Child(e))?;
let child_network = Box::new(child_network);
Ok(NeuraSequential {
layer,
child_network,
})
}
}
/// An utility to recursively create a NeuraSequential network, while writing it in a declarative and linear fashion.
/// Note that this can quickly create big and unwieldly types.
#[macro_export]
@ -211,41 +252,47 @@ macro_rules! neura_sequential {
};
[ $first:expr, $($rest:expr),+ $(,)? ] => {
$crate::network::sequential::NeuraSequential::new_match_output($first, neura_sequential![$($rest),+])
$crate::network::sequential::NeuraSequential::new($first, neura_sequential![$($rest),+])
};
}
#[cfg(test)]
mod test {
use nalgebra::dvector;
use crate::{
derivable::{activation::Relu, regularize::NeuraL0},
layer::NeuraDenseLayer,
layer::{NeuraDenseLayer, NeuraShape, NeuraLayer},
neura_layer,
};
use super::NeuraSequentialBuild;
#[test]
fn test_neura_network_macro() {
let mut rng = rand::thread_rng();
let _ = neura_sequential![
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>,
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 12>,
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 2>
NeuraDenseLayer::from_rng(8, 12, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>,
NeuraDenseLayer::from_rng(12, 16, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>,
NeuraDenseLayer::from_rng(16, 2, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>
];
let _ = neura_sequential![
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>,
NeuraDenseLayer::from_rng(2, 2, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>,
];
let _ = neura_sequential![
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>,
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 12>,
NeuraDenseLayer::from_rng(8, 16, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>,
NeuraDenseLayer::from_rng(16, 12, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>,
];
let _ = neura_sequential![
neura_layer!("dense", 8, 16; Relu),
neura_layer!("dense", 12; Relu),
neura_layer!("dense", 2; Relu)
];
let network = neura_sequential![
neura_layer!("dense", 16, Relu),
neura_layer!("dense", 12, Relu),
neura_layer!("dense", 2, Relu)
].construct(NeuraShape::Vector(2)).unwrap();
network.eval(&dvector![0.0f64, 0.0]);
}
}

@ -0,0 +1,180 @@
use super::{NeuraLayer, NeuraTrainableLayer};
use crate::{
algebra::{NeuraMatrix, NeuraVector, NeuraVectorSpace},
derivable::NeuraDerivable,
};
use rand::Rng;
use rand_distr::Distribution;
#[derive(Clone, Debug)]
pub struct NeuraDenseLayer<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> {
weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
bias: NeuraVector<OUTPUT_LEN, f64>,
activation: Act,
regularization: Reg,
}
impl<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{
pub fn new(
weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
bias: NeuraVector<OUTPUT_LEN, f64>,
activation: Act,
regularization: Reg,
) -> Self {
Self {
weights,
bias,
activation,
regularization,
}
}
pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self {
let mut weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64> = NeuraMatrix::from_value(0.0f64);
// Use Xavier (or He) initialisation, using the harmonic mean
// Ref: https://www.deeplearning.ai/ai-notes/initialization/index.html
let distribution = rand_distr::Normal::new(
0.0,
activation.variance_hint() * 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64),
)
.unwrap();
// let distribution = rand_distr::Uniform::new(-0.5, 0.5);
for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN {
weights[i][j] = distribution.sample(rng);
}
}
Self {
weights,
// Biases are initialized based on the activation's hint
bias: NeuraVector::from_value(activation.bias_hint()),
activation,
regularization,
}
}
}
impl<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> NeuraLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{
type Input = NeuraVector<INPUT_LEN, f64>;
type Output = NeuraVector<OUTPUT_LEN, f64>;
fn eval(&self, input: &Self::Input) -> Self::Output {
let mut result = self.weights.multiply_vector(input);
for i in 0..OUTPUT_LEN {
result[i] = self.activation.eval(result[i] + self.bias[i]);
}
result
}
}
impl<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> NeuraTrainableLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{
type Delta = (
NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
NeuraVector<OUTPUT_LEN, f64>,
);
fn backpropagate(
&self,
input: &Self::Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta) {
let evaluated = self.weights.multiply_vector(input);
// Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron),
// with `self.activation'(input) ° epsilon = delta`
let mut delta: NeuraVector<OUTPUT_LEN, f64> = epsilon.clone();
for i in 0..OUTPUT_LEN {
delta[i] *= self.activation.derivate(evaluated[i]);
}
// Compute the weight gradient
let weights_gradient = delta.reverse_dot(input);
let new_epsilon = self.weights.transpose_multiply_vector(&delta);
// According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
// The gradient of the bias is equal to the delta term of the backpropagation algorithm
let bias_gradient = delta;
(new_epsilon, (weights_gradient, bias_gradient))
}
fn apply_gradient(&mut self, gradient: &Self::Delta) {
NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0);
NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1);
}
fn regularize(&self) -> Self::Delta {
let mut res = Self::Delta::default();
for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN {
res.0[i][j] = self.regularization.derivate(self.weights[i][j]);
}
}
// Note: biases aren't taken into account here, as per https://stats.stackexchange.com/questions/153605/no-regularisation-term-for-bias-unit-in-neural-network
res
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::{
derivable::{activation::Relu, regularize::NeuraL0},
utils::uniform_vector,
};
#[test]
fn test_from_rng() {
let mut rng = rand::thread_rng();
let layer: NeuraDenseLayer<_, _, 64, 32> =
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0);
let mut input = [0.0; 64];
for x in 0..64 {
input[x] = rng.gen();
}
assert!(layer.eval(&input.into()).len() == 32);
}
#[test]
fn test_stack_overflow_big_layer() {
let layer = NeuraDenseLayer::from_rng(&mut rand::thread_rng(), Relu, NeuraL0)
as NeuraDenseLayer<Relu, NeuraL0, 1000, 1000>;
layer.backpropagate(&uniform_vector(), uniform_vector());
<NeuraDenseLayer<Relu, NeuraL0, 1000, 1000> as NeuraTrainableLayer>::Delta::zero();
}
}

@ -0,0 +1,170 @@
mod dense;
pub use dense::NeuraDenseLayer;
mod convolution;
pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DBlockLayer, NeuraConv2DPadLayer};
mod dropout;
pub use dropout::NeuraDropoutLayer;
mod softmax;
pub use softmax::NeuraSoftmaxLayer;
mod one_hot;
pub use one_hot::NeuraOneHotLayer;
mod lock;
pub use lock::NeuraLockLayer;
mod pool;
pub use pool::{NeuraGlobalPoolLayer, NeuraPool1DLayer};
mod reshape;
pub use reshape::{NeuraFlattenLayer, NeuraReshapeLayer};
use crate::algebra::NeuraVectorSpace;
pub trait NeuraLayer {
type Input;
type Output;
fn eval(&self, input: &Self::Input) -> Self::Output;
}
pub trait NeuraTrainableLayer: NeuraLayer {
/// The representation of the layer gradient as a vector space
type Delta: NeuraVectorSpace;
/// Computes the backpropagation term and the derivative of the internal weights,
/// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
///
/// Note: we introduce the term `epsilon`, which together with the activation of the current function can be used to compute `delta_l`:
/// ```no_rust
/// f_l'(a_l) * epsilon_l = delta_l
/// ```
///
/// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
/// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`.
/// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers.
fn backpropagate(
&self,
input: &Self::Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta);
/// Computes the regularization
fn regularize(&self) -> Self::Delta;
/// Applies `δW_l` to the weights of the layer
fn apply_gradient(&mut self, gradient: &Self::Delta);
/// Called before an iteration begins, to allow the layer to set itself up for training.
#[inline(always)]
fn prepare_epoch(&mut self) {}
/// Called at the end of training, to allow the layer to clean itself up
#[inline(always)]
fn cleanup(&mut self) {}
}
#[macro_export]
macro_rules! neura_layer {
( "dense", $( $shape:expr ),*; $activation:expr ) => {
$crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0)
as neura_layer!("_dense_shape", $($shape),*)
};
( "dense", $( $shape:expr ),*; $activation:expr, $regularization:expr ) => {
$crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $regularization)
as neura_layer!("_dense_shape", $($shape),*)
};
( "_dense_shape", $output:expr ) => {
$crate::layer::NeuraDenseLayer<_, _, _, $output>
};
( "_dense_shape", $input:expr, $output:expr ) => {
$crate::layer::NeuraDenseLayer<_, _, $input, $output>
};
( "dropout", $probability:expr ) => {
$crate::layer::NeuraDropoutLayer::new($probability, rand::thread_rng())
as $crate::layer::NeuraDropoutLayer<_, _>
};
( "softmax" ) => {
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<_>
};
( "softmax", $length:expr ) => {
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<$length>
};
( "one_hot" ) => {
$crate::layer::NeuraOneHotLayer as $crate::layer::NeuraOneHotLayer<2, _>
};
( "lock", $layer:expr ) => {
$crate::layer::NeuraLockLayer($layer)
};
( "conv1d_pad", $length:expr, $feats:expr; $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<$length, $feats, $window, _>
};
( "conv1d_pad"; $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<_, _, $window, _>
};
( "conv2d_pad", $feats:expr, $length:expr; $width:expr, $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<$length, $feats, $window, _>
};
( "conv2d_pad"; $width:expr, $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _>
};
( "conv2d_block", $feats:expr, $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, $feats, $block_size, _>
};
( "conv2d_block", $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, _, $block_size, _>
};
( "pool_global"; $reduce:expr ) => {
$crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _>
};
( "pool_global", $feats:expr, $length:expr; $reduce:expr ) => {
$crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<$length, $feats, _>
};
( "pool1d", $blocklength:expr; $reduce:expr ) => {
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<_, $blocklength, _, _>
};
( "pool1d", $blocks:expr, $blocklength:expr; $reduce:expr ) => {
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, _, _>
};
( "pool1d", $feats:expr, $blocks:expr, $blocklength:expr; $reduce:expr ) => {
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, $feats, _>
};
( "unstable_flatten" ) => {
$crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<_, _, f64>
};
( "unstable_flatten", $width:expr, $height:expr ) => {
$crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<$width, $height, f64>
};
( "unstable_reshape", $height:expr ) => {
$crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<_, $height, f64>
};
( "unstable_reshape", $width:expr, $height:expr ) => {
$crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<$width, $height, f64>
};
}

@ -5,26 +5,20 @@ use crate::{
network::{sequential::NeuraSequential, NeuraTrainableNetwork},
};
pub trait NeuraGradientSolver<Output, Target = Output> {
fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
pub trait NeuraGradientSolver<Input, Target, Trainable: NeuraTrainableNetwork<Input>> {
fn get_gradient(
&self,
trainable: &NeuraSequential<Layer, ChildNetwork>,
input: &Layer::Input,
trainable: &Trainable,
input: &Input,
target: &Target,
) -> <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta
where
NeuraSequential<Layer, ChildNetwork>:
NeuraTrainableNetwork<Input = Layer::Input, Output = Output>;
) -> Trainable::Delta;
fn score<Layer: NeuraLayer, ChildNetwork>(
fn score(
&self,
trainable: &NeuraSequential<Layer, ChildNetwork>,
input: &Layer::Input,
trainable: &Trainable,
input: &Input,
target: &Target,
) -> f64
where
NeuraSequential<Layer, ChildNetwork>:
NeuraTrainableNetwork<Input = Layer::Input, Output = Output>;
) -> f64;
}
#[non_exhaustive]
@ -38,32 +32,24 @@ impl<Loss: NeuraLoss + Clone> NeuraBackprop<Loss> {
}
}
impl<const N: usize, Loss: NeuraLoss<Input = NeuraVector<N, f64>> + Clone>
NeuraGradientSolver<NeuraVector<N, f64>, Loss::Target> for NeuraBackprop<Loss>
impl<Input, Target, Trainable: NeuraTrainableNetwork<Input>, Loss: NeuraLoss<Input = Trainable::Output, Target = Target> + Clone>
NeuraGradientSolver<Input, Target, Trainable> for NeuraBackprop<Loss>
{
fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
fn get_gradient(
&self,
trainable: &NeuraSequential<Layer, ChildNetwork>,
input: &Layer::Input,
target: &Loss::Target,
) -> <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta
where
NeuraSequential<Layer, ChildNetwork>:
NeuraTrainableNetwork<Input = Layer::Input, Output = NeuraVector<N, f64>>,
{
trainable: &Trainable,
input: &Input,
target: &Target,
) -> Trainable::Delta {
trainable.backpropagate(input, target, self.loss.clone()).1
}
fn score<Layer: NeuraLayer, ChildNetwork>(
fn score(
&self,
trainable: &NeuraSequential<Layer, ChildNetwork>,
input: &Layer::Input,
target: &Loss::Target,
) -> f64
where
NeuraSequential<Layer, ChildNetwork>:
NeuraTrainableNetwork<Input = Layer::Input, Output = NeuraVector<N, f64>>,
{
trainable: &Trainable,
input: &Input,
target: &Target,
) -> f64 {
let output = trainable.eval(&input);
self.loss.eval(target, &output)
}
@ -137,41 +123,32 @@ impl NeuraBatchedTrainer {
}
pub fn train<
Output,
Input: Clone,
Target: Clone,
GradientSolver: NeuraGradientSolver<Output, Target>,
Layer: NeuraLayer,
ChildNetwork,
Inputs: IntoIterator<Item = (Layer::Input, Target)>,
Network: NeuraTrainableNetwork<Input>,
GradientSolver: NeuraGradientSolver<Input, Target, Network>,
Inputs: IntoIterator<Item = (Input, Target)>,
>(
&self,
gradient_solver: GradientSolver,
network: &mut NeuraSequential<Layer, ChildNetwork>,
network: &mut Network,
inputs: Inputs,
test_inputs: &[(Layer::Input, Target)],
) where
NeuraSequential<Layer, ChildNetwork>:
NeuraTrainableNetwork<Input = Layer::Input, Output = Output>,
Layer::Input: Clone,
{
test_inputs: &[(Input, Target)],
) {
let mut iter = inputs.into_iter();
let factor = -self.learning_rate / (self.batch_size as f64);
let momentum_factor = self.learning_momentum / self.learning_rate;
let reg_factor = -self.learning_rate;
// Contains `momentum_factor * factor * gradient_sum_previous_iter`
let mut previous_gradient_sum =
Box::<<NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta>::zero();
let mut previous_gradient_sum = network.default_gradient();
'd: for iteration in 0..self.iterations {
let mut gradient_sum = Box::<
<NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta,
>::zero();
network.prepare_epoch();
let mut gradient_sum = network.default_gradient();
network.prepare(true);
for _ in 0..self.batch_size {
if let Some((input, target)) = iter.next() {
let gradient =
Box::new(gradient_solver.get_gradient(&network, &input, &target));
let gradient = gradient_solver.get_gradient(&network, &input, &target);
gradient_sum.add_assign(&gradient);
} else {
break 'd;
@ -194,7 +171,7 @@ impl NeuraBatchedTrainer {
}
if self.log_iterations > 0 && (iteration + 1) % self.log_iterations == 0 {
network.cleanup();
network.prepare(false);
let mut loss_sum = 0.0;
for (input, target) in test_inputs {
loss_sum += gradient_solver.score(&network, input, target);
@ -204,12 +181,14 @@ impl NeuraBatchedTrainer {
}
}
network.cleanup();
network.prepare(false);
}
}
#[cfg(test)]
mod test {
use nalgebra::{DMatrix, dmatrix, dvector};
use super::*;
use crate::{
assert_approx,
@ -224,19 +203,19 @@ mod test {
for wa in [0.0, 0.25, 0.5, 1.0] {
for wb in [0.0, 0.25, 0.5, 1.0] {
let network = NeuraSequential::new(
NeuraDenseLayer::new([[wa, wb]].into(), [0.0].into(), Linear, NeuraL0),
NeuraDenseLayer::new(dmatrix![wa, wb], dvector![0.0], Linear, NeuraL0),
(),
);
let gradient = NeuraBackprop::new(Euclidean).get_gradient(
let (gradient, _) = NeuraBackprop::new(Euclidean).get_gradient(
&network,
&[1.0, 1.0].into(),
&[0.0].into(),
&dvector![1.0, 1.0],
&dvector![0.0],
);
let expected = wa + wb;
assert!((gradient.0[0][0] - expected) < 0.001);
assert!((gradient.0[0][1] - expected) < 0.001);
assert!((gradient.0[(0, 0)] - expected) < 0.001);
assert!((gradient.0[(0, 1)] - expected) < 0.001);
}
}
}
@ -247,42 +226,42 @@ mod test {
// Test that we get the same values as https://hmkcode.com/ai/backpropagation-step-by-step/
let network = neura_sequential![
NeuraDenseLayer::new(
[[0.11, 0.21], [0.12, 0.08]].into(),
[0.0; 2].into(),
dmatrix![0.11, 0.21; 0.12, 0.08],
dvector![0.0, 0.0],
Linear,
NeuraL0
),
NeuraDenseLayer::new([[0.14, 0.15]].into(), [0.0].into(), Linear, NeuraL0)
NeuraDenseLayer::new(dmatrix![0.14, 0.15], dvector![0.0], Linear, NeuraL0)
];
let input = [2.0, 3.0];
let target = [1.0];
let input = dvector![2.0, 3.0];
let target = dvector![1.0];
let intermediary = network.clone().trim_tail().eval(&input.into());
let intermediary = network.clone().trim_tail().eval(&input);
assert_approx!(0.85, intermediary[0], EPSILON);
assert_approx!(0.48, intermediary[1], EPSILON);
assert_approx!(0.191, network.eval(&input.into())[0], EPSILON);
assert_approx!(0.191, network.eval(&input)[0], EPSILON);
assert_approx!(
0.327,
Euclidean.eval(&target.into(), &network.eval(&input.into())),
Euclidean.eval(&target, &network.eval(&input)),
0.001
);
let delta = network.eval(&input.into())[0] - target[0];
let delta = network.eval(&input)[0] - target[0];
let (gradient_first, gradient_second) =
NeuraBackprop::new(Euclidean).get_gradient(&network, &input.into(), &target.into());
NeuraBackprop::new(Euclidean).get_gradient(&network, &input, &target);
let gradient_first = gradient_first.0;
let gradient_second = gradient_second.0[0];
let gradient_second = gradient_second.0.0;
assert_approx!(gradient_second[0], intermediary[0] * delta, EPSILON);
assert_approx!(gradient_second[1], intermediary[1] * delta, EPSILON);
assert_approx!(gradient_first[0][0], input[0] * delta * 0.14, EPSILON);
assert_approx!(gradient_first[0][1], input[1] * delta * 0.14, EPSILON);
assert_approx!(gradient_first[(0, 0)], input[0] * delta * 0.14, EPSILON);
assert_approx!(gradient_first[(0, 1)], input[1] * delta * 0.14, EPSILON);
assert_approx!(gradient_first[1][0], input[0] * delta * 0.15, EPSILON);
assert_approx!(gradient_first[1][1], input[1] * delta * 0.15, EPSILON);
assert_approx!(gradient_first[(1, 0)], input[0] * delta * 0.15, EPSILON);
assert_approx!(gradient_first[(1, 1)], input[1] * delta * 0.15, EPSILON);
}
}

Loading…
Cancel
Save