🔥 🚚 ♻️ Refactoring the previous layer system

It was becoming almost impossible to manage the dimensions of the layers,
especially with convolution layers. Generic consts are nice, but they are a bit too early
to have right now for this use-case. We'll probably be expanding the implementations to accept
const or dynamically-sized layers at some point, for performance-critical applications.
main
Shad Amethyst 2 years ago
parent cc7686569a
commit 2edbff860c

@ -7,6 +7,7 @@ edition = "2021"
[dependencies] [dependencies]
boxed-array = "0.1.0" boxed-array = "0.1.0"
nalgebra = { version = "^0.32", features = ["std", "macros", "rand"] }
ndarray = "^0.15" ndarray = "^0.15"
num = "^0.4" num = "^0.4"
# num-traits = "0.2.15" # num-traits = "0.2.15"

@ -1,22 +1,24 @@
#![feature(generic_arg_infer)] #![feature(generic_arg_infer)]
use neuramethyst::algebra::NeuraVector; use nalgebra::dvector;
use neuramethyst::derivable::activation::Relu; use neuramethyst::derivable::activation::Relu;
use neuramethyst::derivable::loss::Euclidean; use neuramethyst::derivable::loss::Euclidean;
use neuramethyst::{cycle_shuffling, prelude::*}; use neuramethyst::prelude::*;
use neuramethyst::cycle_shuffling;
fn main() { fn main() {
let mut network = neura_sequential![ let mut network = neura_sequential![
neura_layer!("dense", 2, 4; Relu), neura_layer!("dense", 4, Relu),
neura_layer!("dense", 3; Relu), neura_layer!("dense", 3, Relu),
neura_layer!("dense", 1; Relu) neura_layer!("dense", 1, Relu)
]; ].construct(NeuraShape::Vector(2)).unwrap();
let inputs: [(NeuraVector<2, f64>, NeuraVector<1, f64>); 4] = [ let inputs = [
([0.0, 0.0].into(), [0.0].into()), (dvector![0.0, 0.0], dvector![0.0]),
([0.0, 1.0].into(), [1.0].into()), (dvector![0.0, 1.0], dvector![1.0]),
([1.0, 0.0].into(), [1.0].into()), (dvector![1.0, 0.0], dvector![1.0]),
([1.0, 1.0].into(), [0.0].into()), (dvector![1.0, 1.0], dvector![0.0]),
]; ];
for (input, target) in &inputs { for (input, target) in &inputs {

@ -167,10 +167,10 @@ impl<const WIDTH: usize, const HEIGHT: usize, F: NeuraVectorSpace + Clone> Neura
} }
} }
#[inline(always)] // #[inline(always)]
fn zero() -> Self { // fn zero() -> Self {
Self::from_value(F::zero()) // Self::from_value(F::zero())
} // }
fn norm_squared(&self) -> f64 { fn norm_squared(&self) -> f64 {
let mut sum = 0.0; let mut sum = 0.0;

@ -2,6 +2,8 @@ mod matrix;
pub use matrix::NeuraMatrix; pub use matrix::NeuraMatrix;
mod vector; mod vector;
use nalgebra::Matrix;
use num::Float;
pub use vector::NeuraVector; pub use vector::NeuraVector;
/// An extension of `std::ops::AddAssign` and `std::ops::Default` /// An extension of `std::ops::AddAssign` and `std::ops::Default`
@ -10,7 +12,7 @@ pub trait NeuraVectorSpace {
fn mul_assign(&mut self, by: f64); fn mul_assign(&mut self, by: f64);
fn zero() -> Self; // fn zero() -> Self;
fn norm_squared(&self) -> f64; fn norm_squared(&self) -> f64;
} }
@ -26,10 +28,10 @@ impl NeuraVectorSpace for () {
// Noop // Noop
} }
#[inline(always)] // #[inline(always)]
fn zero() -> Self { // fn zero() -> Self {
() // ()
} // }
fn norm_squared(&self) -> f64 { fn norm_squared(&self) -> f64 {
0.0 0.0
@ -45,9 +47,9 @@ impl<T: NeuraVectorSpace> NeuraVectorSpace for Box<T> {
self.as_mut().mul_assign(by); self.as_mut().mul_assign(by);
} }
fn zero() -> Self { // fn zero() -> Self {
Box::new(T::zero()) // Box::new(T::zero())
} // }
fn norm_squared(&self) -> f64 { fn norm_squared(&self) -> f64 {
self.as_ref().norm_squared() self.as_ref().norm_squared()
@ -65,9 +67,9 @@ impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left
NeuraVectorSpace::mul_assign(&mut self.1, by); NeuraVectorSpace::mul_assign(&mut self.1, by);
} }
fn zero() -> Self { // fn zero() -> Self {
(Left::zero(), Right::zero()) // (Left::zero(), Right::zero())
} // }
fn norm_squared(&self) -> f64 { fn norm_squared(&self) -> f64 {
self.0.norm_squared() + self.1.norm_squared() self.0.norm_squared() + self.1.norm_squared()
@ -87,21 +89,40 @@ impl<const N: usize, T: NeuraVectorSpace + Clone> NeuraVectorSpace for [T; N] {
} }
} }
fn zero() -> Self { // fn zero() -> Self {
let mut res: Vec<T> = Vec::with_capacity(N); // let mut res: Vec<T> = Vec::with_capacity(N);
for _ in 0..N { // for _ in 0..N {
res.push(T::zero()); // res.push(T::zero());
// }
// res.try_into().unwrap_or_else(|_| {
// // TODO: check that this panic is optimized away
// unreachable!()
// })
// }
fn norm_squared(&self) -> f64 {
self.iter().map(T::norm_squared).sum()
}
} }
res.try_into().unwrap_or_else(|_| { impl<F: Float, R: nalgebra::Dim, C: nalgebra::Dim, S: nalgebra::RawStorage<F, R, C>> NeuraVectorSpace for Matrix<F, R, C, S>
// TODO: check that this panic is optimized away where
unreachable!() Matrix<F, R, C, S>: std::ops::MulAssign<F>,
}) for<'c> Matrix<F, R, C, S>: std::ops::AddAssign<&'c Matrix<F, R, C, S>>,
F: From<f64> + Into<f64>
{
fn add_assign(&mut self, other: &Self) {
*self += other;
}
fn mul_assign(&mut self, by: f64) {
*self *= <F as From<f64>>::from(by);
} }
fn norm_squared(&self) -> f64 { fn norm_squared(&self) -> f64 {
self.iter().map(T::norm_squared).sum() self.iter().map(|x| *x * *x).reduce(|sum, curr| sum + curr).unwrap_or(F::zero()).into()
} }
} }
@ -116,9 +137,9 @@ macro_rules! base {
std::ops::MulAssign::mul_assign(self, other as $type); std::ops::MulAssign::mul_assign(self, other as $type);
} }
fn zero() -> Self { // fn zero() -> Self {
<Self as Default>::default() // <Self as Default>::default()
} // }
fn norm_squared(&self) -> f64 { fn norm_squared(&self) -> f64 {
(self * self) as f64 (self * self) as f64

@ -95,10 +95,10 @@ impl<const LENGTH: usize, F: Float + From<f64> + Into<f64>> NeuraVectorSpace
} }
} }
#[inline(always)] // #[inline(always)]
fn zero() -> Self { // fn zero() -> Self {
Self::from_value(F::zero()) // Self::from_value(F::zero())
} // }
fn norm_squared(&self) -> f64 { fn norm_squared(&self) -> f64 {
let mut sum = F::zero(); let mut sum = F::zero();

@ -1,19 +1,22 @@
use nalgebra::DVector;
use crate::algebra::NeuraVector; use crate::algebra::NeuraVector;
use super::NeuraLoss; use super::NeuraLoss;
#[derive(Clone, Copy, Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]
pub struct Euclidean<const N: usize>; pub struct Euclidean;
impl<const N: usize> NeuraLoss for Euclidean<N> { impl NeuraLoss for Euclidean {
type Input = NeuraVector<N, f64>; type Input = DVector<f64>;
type Target = NeuraVector<N, f64>; type Target = DVector<f64>;
#[inline] #[inline]
fn eval(&self, target: &NeuraVector<N, f64>, actual: &NeuraVector<N, f64>) -> f64 { fn eval(&self, target: &DVector<f64>, actual: &DVector<f64>) -> f64 {
assert_eq!(target.shape(), actual.shape());
let mut sum_squared = 0.0; let mut sum_squared = 0.0;
for i in 0..N { for i in 0..target.len() {
sum_squared += (target[i] - actual[i]) * (target[i] - actual[i]); sum_squared += (target[i] - actual[i]) * (target[i] - actual[i]);
} }
@ -23,13 +26,13 @@ impl<const N: usize> NeuraLoss for Euclidean<N> {
#[inline] #[inline]
fn nabla( fn nabla(
&self, &self,
target: &NeuraVector<N, f64>, target: &DVector<f64>,
actual: &NeuraVector<N, f64>, actual: &DVector<f64>,
) -> NeuraVector<N, f64> { ) -> DVector<f64> {
let mut res = NeuraVector::default(); let mut res = DVector::zeros(target.len());
// ∂E(y)/∂yᵢ = yᵢ - yᵢ' // ∂E(y)/∂yᵢ = yᵢ - yᵢ'
for i in 0..N { for i in 0..target.len() {
res[i] = actual[i] - target[i]; res[i] = actual[i] - target[i];
} }

@ -1,38 +1,49 @@
use super::{NeuraLayer, NeuraTrainableLayer}; use std::marker::PhantomData;
use crate::{
algebra::{NeuraMatrix, NeuraVector, NeuraVectorSpace},
derivable::NeuraDerivable,
};
use nalgebra::{DMatrix, DVector};
use num::Float;
use rand::Rng; use rand::Rng;
use rand_distr::Distribution;
use crate::derivable::NeuraDerivable;
use super::*;
#[derive(Clone, Debug)]
pub struct NeuraDenseLayer<F: Float, Act: NeuraDerivable<F>, Reg: NeuraDerivable<F>> {
weights: DMatrix<F>,
bias: DVector<F>,
activation: Act,
regularization: Reg,
}
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct NeuraDenseLayer< pub struct NeuraDenseLayerPartial<
Act: NeuraDerivable<f64>, F: Float,
Reg: NeuraDerivable<f64>, Act: NeuraDerivable<F>,
const INPUT_LEN: usize, Reg: NeuraDerivable<F>,
const OUTPUT_LEN: usize, R: Rng,
> { > {
weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
bias: NeuraVector<OUTPUT_LEN, f64>,
activation: Act, activation: Act,
regularization: Reg, regularization: Reg,
output_size: usize,
rng: R,
phantom: PhantomData<F>,
} }
impl< impl<
Act: NeuraDerivable<f64>, F: Float + From<f64> + std::fmt::Debug + 'static,
Reg: NeuraDerivable<f64>, Act: NeuraDerivable<F>,
const INPUT_LEN: usize, Reg: NeuraDerivable<F>,
const OUTPUT_LEN: usize, > NeuraDenseLayer<F, Act, Reg>
> NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{ {
pub fn new( pub fn new(
weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>, weights: DMatrix<F>,
bias: NeuraVector<OUTPUT_LEN, f64>, bias: DVector<F>,
activation: Act, activation: Act,
regularization: Reg, regularization: Reg,
) -> Self { ) -> Self {
assert_eq!(bias.shape().0, weights.shape().0);
Self { Self {
weights, weights,
bias, bias,
@ -41,85 +52,129 @@ impl<
} }
} }
pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self { pub fn from_rng(
let mut weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64> = NeuraMatrix::from_value(0.0f64); input_size: usize,
output_size: usize,
// Use Xavier (or He) initialisation, using the harmonic mean rng: &mut impl Rng,
// Ref: https://www.deeplearning.ai/ai-notes/initialization/index.html activation: Act,
regularization: Reg,
) -> Self
where
rand_distr::StandardNormal: rand_distr::Distribution<F>,
{
let distribution = rand_distr::Normal::new( let distribution = rand_distr::Normal::new(
0.0, F::zero(),
activation.variance_hint() * 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64), <F as From<f64>>::from(
activation.variance_hint() * 2.0 / (input_size as f64 + output_size as f64),
),
) )
.unwrap(); .unwrap();
// let distribution = rand_distr::Uniform::new(-0.5, 0.5);
for i in 0..OUTPUT_LEN { Self {
for j in 0..INPUT_LEN { weights: DMatrix::from_distribution(output_size, input_size, &distribution, rng),
weights[i][j] = distribution.sample(rng); bias: DVector::from_element(
output_size,
<F as From<f64>>::from(activation.bias_hint()),
),
activation,
regularization,
} }
} }
Self { pub fn new_partial<R: Rng>(
weights, output_size: usize,
// Biases are initialized based on the activation's hint rng: R,
bias: NeuraVector::from_value(activation.bias_hint()), activation: Act,
regularization: Reg,
) -> NeuraDenseLayerPartial<F, Act, Reg, R> {
NeuraDenseLayerPartial {
activation, activation,
regularization, regularization,
output_size,
rng,
phantom: PhantomData,
} }
} }
} }
impl< impl<
Act: NeuraDerivable<f64>, F: Float + From<f64> + std::fmt::Debug + 'static,
Reg: NeuraDerivable<f64>, Act: NeuraDerivable<F>,
const INPUT_LEN: usize, Reg: NeuraDerivable<F>,
const OUTPUT_LEN: usize, R: Rng,
> NeuraLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN> > NeuraPartialLayer for NeuraDenseLayerPartial<F, Act, Reg, R>
where
rand_distr::StandardNormal: rand_distr::Distribution<F>,
{ {
type Input = NeuraVector<INPUT_LEN, f64>; type Constructed = NeuraDenseLayer<F, Act, Reg>;
type Err = ();
type Output = NeuraVector<OUTPUT_LEN, f64>;
fn eval(&self, input: &Self::Input) -> Self::Output { fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
let mut result = self.weights.multiply_vector(input); let mut rng = self.rng;
Ok(NeuraDenseLayer::from_rng(
input_shape.size(),
self.output_size,
&mut rng,
self.activation,
self.regularization,
))
}
for i in 0..OUTPUT_LEN { fn output_shape(constructed: &Self::Constructed) -> NeuraShape {
result[i] = self.activation.eval(result[i] + self.bias[i]); NeuraShape::Vector(constructed.weights.shape().0)
}
} }
result impl<
F: Float + From<f64> + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
Act: NeuraDerivable<F>,
Reg: NeuraDerivable<F>,
> NeuraLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
{
type Output = DVector<F>;
fn eval(&self, input: &DVector<F>) -> Self::Output {
assert_eq!(input.shape().0, self.weights.shape().1);
let res = &self.weights * input + &self.bias;
res.map(|x| self.activation.eval(x))
} }
} }
impl< impl<
Act: NeuraDerivable<f64>, F: Float + From<f64> + Into<f64> + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
Reg: NeuraDerivable<f64>, Act: NeuraDerivable<F>,
const INPUT_LEN: usize, Reg: NeuraDerivable<F>,
const OUTPUT_LEN: usize, > NeuraTrainableLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
> NeuraTrainableLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{ {
type Delta = ( type Gradient = (DMatrix<F>, DVector<F>);
NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
NeuraVector<OUTPUT_LEN, f64>, fn default_gradient(&self) -> Self::Gradient {
); (
DMatrix::zeros(self.weights.shape().0, self.weights.shape().1),
DVector::zeros(self.bias.shape().0),
)
}
fn backpropagate( fn backprop_layer(
&self, &self,
input: &Self::Input, input: &DVector<F>,
epsilon: Self::Output, epsilon: Self::Output,
) -> (Self::Input, Self::Delta) { ) -> (DVector<F>, Self::Gradient) {
let evaluated = self.weights.multiply_vector(input); let evaluated = &self.weights * input;
// Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron), // Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron),
// with `self.activation'(input) ° epsilon = delta` // with `self.activation'(input) ° epsilon = delta`
let mut delta: NeuraVector<OUTPUT_LEN, f64> = epsilon.clone(); let mut delta = epsilon.clone();
for i in 0..OUTPUT_LEN {
for i in 0..delta.len() {
delta[i] *= self.activation.derivate(evaluated[i]); delta[i] *= self.activation.derivate(evaluated[i]);
} }
// Compute the weight gradient // Compute the weight gradient
let weights_gradient = delta.reverse_dot(input); let weights_gradient = &delta * input.transpose();
let new_epsilon = self.weights.transpose_multiply_vector(&delta); let new_epsilon = self.weights.tr_mul(&delta);
// According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation // According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
// The gradient of the bias is equal to the delta term of the backpropagation algorithm // The gradient of the bias is equal to the delta term of the backpropagation algorithm
@ -128,53 +183,12 @@ impl<
(new_epsilon, (weights_gradient, bias_gradient)) (new_epsilon, (weights_gradient, bias_gradient))
} }
fn apply_gradient(&mut self, gradient: &Self::Delta) { fn regularize_layer(&self) -> Self::Gradient {
NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0); (self.weights.map(|x| self.regularization.derivate(x)), DVector::zeros(self.bias.shape().0))
NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1);
}
fn regularize(&self) -> Self::Delta {
let mut res = Self::Delta::default();
for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN {
res.0[i][j] = self.regularization.derivate(self.weights[i][j]);
}
} }
// Note: biases aren't taken into account here, as per https://stats.stackexchange.com/questions/153605/no-regularisation-term-for-bias-unit-in-neural-network fn apply_gradient(&mut self, gradient: &Self::Gradient) {
self.weights += &gradient.0;
res self.bias += &gradient.1;
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::{
derivable::{activation::Relu, regularize::NeuraL0},
utils::uniform_vector,
};
#[test]
fn test_from_rng() {
let mut rng = rand::thread_rng();
let layer: NeuraDenseLayer<_, _, 64, 32> =
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0);
let mut input = [0.0; 64];
for x in 0..64 {
input[x] = rng.gen();
}
assert!(layer.eval(&input.into()).len() == 32);
}
#[test]
fn test_stack_overflow_big_layer() {
let layer = NeuraDenseLayer::from_rng(&mut rand::thread_rng(), Relu, NeuraL0)
as NeuraDenseLayer<Relu, NeuraL0, 1000, 1000>;
layer.backpropagate(&uniform_vector(), uniform_vector());
<NeuraDenseLayer<Relu, NeuraL0, 1000, 1000> as NeuraTrainableLayer>::Delta::zero();
} }
} }

@ -1,39 +1,55 @@
mod dense; use num::Float;
pub use dense::NeuraDenseLayer;
mod convolution; use crate::algebra::NeuraVectorSpace;
pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DBlockLayer, NeuraConv2DPadLayer};
mod dropout; pub mod dense;
pub use dropout::NeuraDropoutLayer; pub use dense::NeuraDenseLayer;
mod softmax; #[derive(Clone, Copy, PartialEq, Debug)]
pub use softmax::NeuraSoftmaxLayer; pub enum NeuraShape {
Vector(usize), // entries
Matrix(usize, usize), // rows, columns
Tensor(usize, usize, usize), // rows, columns, channels
}
mod one_hot; impl NeuraShape {
pub use one_hot::NeuraOneHotLayer; pub fn size(&self) -> usize {
match self {
NeuraShape::Vector(entries) => *entries,
NeuraShape::Matrix(rows, columns) => rows * columns,
NeuraShape::Tensor(rows, columns, channels) => rows * columns * channels
}
}
}
pub trait NeuraLayer<Input> {
type Output;
mod lock; fn eval(&self, input: &Input) -> Self::Output;
pub use lock::NeuraLockLayer; }
mod pool; impl<Input: Clone> NeuraLayer<Input> for () {
pub use pool::{NeuraGlobalPoolLayer, NeuraPool1DLayer}; type Output = Input;
mod reshape; fn eval(&self, input: &Input) -> Self::Output {
pub use reshape::{NeuraFlattenLayer, NeuraReshapeLayer}; input.clone()
}
}
use crate::algebra::NeuraVectorSpace; pub trait NeuraPartialLayer {
type Constructed;
type Err;
pub trait NeuraLayer { fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err>;
type Input;
type Output;
fn eval(&self, input: &Self::Input) -> Self::Output; fn output_shape(constructed: &Self::Constructed) -> NeuraShape;
} }
pub trait NeuraTrainableLayer: NeuraLayer { pub trait NeuraTrainableLayer<Input>: NeuraLayer<Input> {
/// The representation of the layer gradient as a vector space /// The representation of the layer gradient as a vector space
type Delta: NeuraVectorSpace; type Gradient: NeuraVectorSpace;
fn default_gradient(&self) -> Self::Gradient;
/// Computes the backpropagation term and the derivative of the internal weights, /// Computes the backpropagation term and the derivative of the internal weights,
/// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer. /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
@ -46,125 +62,28 @@ pub trait NeuraTrainableLayer: NeuraLayer {
/// The function should then return a pair `(epsilon_{l-1}, δW_l)`, /// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
/// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`. /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`.
/// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers. /// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers.
fn backpropagate( fn backprop_layer(
&self, &self,
input: &Self::Input, input: &Input,
epsilon: Self::Output, epsilon: Self::Output,
) -> (Self::Input, Self::Delta); ) -> (Input, Self::Gradient);
/// Computes the regularization /// Computes the regularization
fn regularize(&self) -> Self::Delta; fn regularize_layer(&self) -> Self::Gradient;
/// Applies `δW_l` to the weights of the layer /// Applies `δW_l` to the weights of the layer
fn apply_gradient(&mut self, gradient: &Self::Delta); fn apply_gradient(&mut self, gradient: &Self::Gradient);
/// Called before an iteration begins, to allow the layer to set itself up for training.
#[inline(always)]
fn prepare_epoch(&mut self) {}
/// Called at the end of training, to allow the layer to clean itself up /// Arbitrary computation that can be executed at the start of an epoch
#[allow(unused_variables)]
#[inline(always)] #[inline(always)]
fn cleanup(&mut self) {} fn prepare_layer(&mut self, is_training: bool) {}
} }
/// Temporary implementation of neura_layer
#[macro_export] #[macro_export]
macro_rules! neura_layer { macro_rules! neura_layer {
( "dense", $( $shape:expr ),*; $activation:expr ) => { ( "dense", $output:expr, $activation:expr ) => {
$crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0) $crate::layer::dense::NeuraDenseLayer::new_partial($output, rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0)
as neura_layer!("_dense_shape", $($shape),*) }
};
( "dense", $( $shape:expr ),*; $activation:expr, $regularization:expr ) => {
$crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $regularization)
as neura_layer!("_dense_shape", $($shape),*)
};
( "_dense_shape", $output:expr ) => {
$crate::layer::NeuraDenseLayer<_, _, _, $output>
};
( "_dense_shape", $input:expr, $output:expr ) => {
$crate::layer::NeuraDenseLayer<_, _, $input, $output>
};
( "dropout", $probability:expr ) => {
$crate::layer::NeuraDropoutLayer::new($probability, rand::thread_rng())
as $crate::layer::NeuraDropoutLayer<_, _>
};
( "softmax" ) => {
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<_>
};
( "softmax", $length:expr ) => {
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<$length>
};
( "one_hot" ) => {
$crate::layer::NeuraOneHotLayer as $crate::layer::NeuraOneHotLayer<2, _>
};
( "lock", $layer:expr ) => {
$crate::layer::NeuraLockLayer($layer)
};
( "conv1d_pad", $length:expr, $feats:expr; $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<$length, $feats, $window, _>
};
( "conv1d_pad"; $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<_, _, $window, _>
};
( "conv2d_pad", $feats:expr, $length:expr; $width:expr, $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<$length, $feats, $window, _>
};
( "conv2d_pad"; $width:expr, $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _>
};
( "conv2d_block", $feats:expr, $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, $feats, $block_size, _>
};
( "conv2d_block", $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, _, $block_size, _>
};
( "pool_global"; $reduce:expr ) => {
$crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _>
};
( "pool_global", $feats:expr, $length:expr; $reduce:expr ) => {
$crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<$length, $feats, _>
};
( "pool1d", $blocklength:expr; $reduce:expr ) => {
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<_, $blocklength, _, _>
};
( "pool1d", $blocks:expr, $blocklength:expr; $reduce:expr ) => {
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, _, _>
};
( "pool1d", $feats:expr, $blocks:expr, $blocklength:expr; $reduce:expr ) => {
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, $feats, _>
};
( "unstable_flatten" ) => {
$crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<_, _, f64>
};
( "unstable_flatten", $width:expr, $height:expr ) => {
$crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<$width, $height, f64>
};
( "unstable_reshape", $height:expr ) => {
$crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<_, $height, f64>
};
( "unstable_reshape", $width:expr, $height:expr ) => {
$crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<$width, $height, f64>
};
} }

@ -1,12 +1,15 @@
#![feature(generic_arg_infer)] #![feature(generic_arg_infer)]
#![feature(generic_const_exprs)] #![feature(generic_const_exprs)]
#![feature(negative_impls)]
pub mod algebra; pub mod algebra;
pub mod derivable; pub mod derivable;
pub mod layer; // pub mod layer;
pub mod network; pub mod network;
pub mod train; pub mod train;
pub mod layer;
mod utils; mod utils;
// TODO: move to a different file // TODO: move to a different file
@ -17,7 +20,7 @@ pub mod prelude {
pub use crate::{neura_layer, neura_sequential}; pub use crate::{neura_layer, neura_sequential};
// Structs and traits // Structs and traits
pub use crate::layer::{NeuraDenseLayer, NeuraDropoutLayer, NeuraLayer}; pub use crate::layer::*;
pub use crate::network::sequential::{NeuraSequential, NeuraSequentialTail}; pub use crate::network::sequential::{NeuraSequential, NeuraSequentialTail, NeuraSequentialBuild};
pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer}; pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer};
} }

@ -2,25 +2,24 @@ use crate::{algebra::NeuraVectorSpace, derivable::NeuraLoss, layer::NeuraLayer};
pub mod sequential; pub mod sequential;
pub trait NeuraTrainableNetwork: NeuraLayer { pub trait NeuraTrainableNetwork<Input>: NeuraLayer<Input> {
type Delta: NeuraVectorSpace; type Delta: NeuraVectorSpace;
fn default_gradient(&self) -> Self::Delta;
fn apply_gradient(&mut self, gradient: &Self::Delta); fn apply_gradient(&mut self, gradient: &Self::Delta);
/// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information. /// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information.
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>( fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
&self, &self,
input: &Self::Input, input: &Input,
target: &Loss::Target, target: &Loss::Target,
loss: Loss, loss: Loss,
) -> (Self::Input, Self::Delta); ) -> (Input, Self::Delta);
/// Should return the regularization gradient /// Should return the regularization gradient
fn regularize(&self) -> Self::Delta; fn regularize(&self) -> Self::Delta;
/// Called before an iteration begins, to allow the network to set itself up for training. /// Called before an iteration begins, to allow the network to set itself up for training or not.
fn prepare_epoch(&mut self); fn prepare(&mut self, train_iteration: bool);
/// Called at the end of training, to allow the network to clean itself up
fn cleanup(&mut self);
} }

@ -1,12 +1,14 @@
use num::Float;
use crate::{ use crate::{
derivable::NeuraLoss, derivable::NeuraLoss,
layer::{NeuraLayer, NeuraTrainableLayer}, layer::{NeuraLayer, NeuraTrainableLayer, NeuraShape, NeuraPartialLayer},
}; };
use super::NeuraTrainableNetwork; use super::NeuraTrainableNetwork;
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct NeuraSequential<Layer: NeuraLayer, ChildNetwork> { pub struct NeuraSequential<Layer, ChildNetwork> {
pub layer: Layer, pub layer: Layer,
pub child_network: Box<ChildNetwork>, pub child_network: Box<ChildNetwork>,
} }
@ -14,13 +16,13 @@ pub struct NeuraSequential<Layer: NeuraLayer, ChildNetwork> {
/// Operations on the tail end of a sequential network /// Operations on the tail end of a sequential network
pub trait NeuraSequentialTail { pub trait NeuraSequentialTail {
type TailTrimmed; type TailTrimmed;
type TailPushed<T: NeuraLayer>; type TailPushed<T>;
fn trim_tail(self) -> Self::TailTrimmed; fn trim_tail(self) -> Self::TailTrimmed;
fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T>; fn push_tail<T>(self, layer: T) -> Self::TailPushed<T>;
} }
impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> { impl<Layer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
pub fn new(layer: Layer, child_network: ChildNetwork) -> Self { pub fn new(layer: Layer, child_network: ChildNetwork) -> Self {
Self { Self {
layer, layer,
@ -28,9 +30,10 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
} }
} }
pub fn new_match_output(layer: Layer, child_network: ChildNetwork) -> Self pub fn new_match_output<Input>(layer: Layer, child_network: ChildNetwork) -> Self
where where
ChildNetwork: NeuraLayer<Input = Layer::Output>, Layer: NeuraLayer<Input>,
ChildNetwork: NeuraLayer<Layer::Output>,
{ {
Self::new(layer, child_network) Self::new(layer, child_network)
} }
@ -39,7 +42,10 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
*self.child_network *self.child_network
} }
pub fn push_front<T: NeuraLayer>(self, layer: T) -> NeuraSequential<T, Self> { pub fn push_front<Input, Input2, T: NeuraLayer<Input2, Output=Input>>(self, layer: T) -> NeuraSequential<T, Self>
where
Layer: NeuraLayer<Input>
{
NeuraSequential { NeuraSequential {
layer: layer, layer: layer,
child_network: Box::new(self), child_network: Box::new(self),
@ -48,15 +54,15 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
} }
// Trimming the last layer returns an empty network // Trimming the last layer returns an empty network
impl<Layer: NeuraLayer> NeuraSequentialTail for NeuraSequential<Layer, ()> { impl<Layer> NeuraSequentialTail for NeuraSequential<Layer, ()> {
type TailTrimmed = (); type TailTrimmed = ();
type TailPushed<T: NeuraLayer> = NeuraSequential<Layer, NeuraSequential<T, ()>>; type TailPushed<T> = NeuraSequential<Layer, NeuraSequential<T, ()>>;
fn trim_tail(self) -> Self::TailTrimmed { fn trim_tail(self) -> Self::TailTrimmed {
() ()
} }
fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T> { fn push_tail<T>(self, layer: T) -> Self::TailPushed<T> {
NeuraSequential { NeuraSequential {
layer: self.layer, layer: self.layer,
child_network: Box::new(NeuraSequential { child_network: Box::new(NeuraSequential {
@ -68,11 +74,11 @@ impl<Layer: NeuraLayer> NeuraSequentialTail for NeuraSequential<Layer, ()> {
} }
// Trimming another layer returns a network which calls trim recursively // Trimming another layer returns a network which calls trim recursively
impl<Layer: NeuraLayer, ChildNetwork: NeuraSequentialTail> NeuraSequentialTail impl<Layer, ChildNetwork: NeuraSequentialTail> NeuraSequentialTail
for NeuraSequential<Layer, ChildNetwork> for NeuraSequential<Layer, ChildNetwork>
{ {
type TailTrimmed = NeuraSequential<Layer, <ChildNetwork as NeuraSequentialTail>::TailTrimmed>; type TailTrimmed = NeuraSequential<Layer, <ChildNetwork as NeuraSequentialTail>::TailTrimmed>;
type TailPushed<T: NeuraLayer> = type TailPushed<T> =
NeuraSequential<Layer, <ChildNetwork as NeuraSequentialTail>::TailPushed<T>>; NeuraSequential<Layer, <ChildNetwork as NeuraSequentialTail>::TailPushed<T>>;
fn trim_tail(self) -> Self::TailTrimmed { fn trim_tail(self) -> Self::TailTrimmed {
@ -82,7 +88,7 @@ impl<Layer: NeuraLayer, ChildNetwork: NeuraSequentialTail> NeuraSequentialTail
} }
} }
fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T> { fn push_tail<T>(self, layer: T) -> Self::TailPushed<T> {
NeuraSequential { NeuraSequential {
layer: self.layer, layer: self.layer,
child_network: Box::new(self.child_network.push_tail(layer)), child_network: Box::new(self.child_network.push_tail(layer)),
@ -90,62 +96,55 @@ impl<Layer: NeuraLayer, ChildNetwork: NeuraSequentialTail> NeuraSequentialTail
} }
} }
impl<Layer: NeuraLayer> NeuraLayer for NeuraSequential<Layer, ()> { impl<Input, Layer: NeuraLayer<Input>, ChildNetwork: NeuraLayer<Layer::Output>> NeuraLayer<Input>
type Input = Layer::Input;
type Output = Layer::Output;
fn eval(&self, input: &Self::Input) -> Self::Output {
self.layer.eval(input)
}
}
impl<Layer: NeuraLayer, ChildNetwork: NeuraLayer<Input = Layer::Output>> NeuraLayer
for NeuraSequential<Layer, ChildNetwork> for NeuraSequential<Layer, ChildNetwork>
{ {
type Input = Layer::Input;
type Output = ChildNetwork::Output; type Output = ChildNetwork::Output;
fn eval(&self, input: &Self::Input) -> Self::Output { fn eval(&self, input: &Input) -> Self::Output {
self.child_network.eval(&self.layer.eval(input)) self.child_network.eval(&self.layer.eval(input))
} }
} }
impl<Layer: NeuraTrainableLayer> NeuraTrainableNetwork for NeuraSequential<Layer, ()> { impl<Input: Clone> NeuraTrainableNetwork<Input> for () {
type Delta = Layer::Delta; type Delta = ();
fn apply_gradient(&mut self, gradient: &Self::Delta) { fn default_gradient(&self) -> () {
self.layer.apply_gradient(gradient); ()
}
fn apply_gradient(&mut self, _gradient: &()) {
// Noop
} }
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>( fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
&self, &self,
input: &Self::Input, final_activation: &Input,
target: &Loss::Target, target: &Loss::Target,
loss: Loss, loss: Loss,
) -> (Self::Input, Self::Delta) { ) -> (Input, Self::Delta) {
let final_activation = self.layer.eval(input);
let backprop_epsilon = loss.nabla(target, &final_activation); let backprop_epsilon = loss.nabla(target, &final_activation);
self.layer.backpropagate(&input, backprop_epsilon)
}
fn regularize(&self) -> Self::Delta { (backprop_epsilon, ())
self.layer.regularize()
} }
fn prepare_epoch(&mut self) { fn regularize(&self) -> () {
self.layer.prepare_epoch(); ()
} }
fn cleanup(&mut self) { fn prepare(&mut self, _is_training: bool) {
self.layer.cleanup(); // Noop
} }
} }
impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainableNetwork<Input = Layer::Output>> impl<Input, Layer: NeuraTrainableLayer<Input>, ChildNetwork: NeuraTrainableNetwork<Layer::Output>>
NeuraTrainableNetwork for NeuraSequential<Layer, ChildNetwork> NeuraTrainableNetwork<Input> for NeuraSequential<Layer, ChildNetwork>
{ {
type Delta = (Layer::Delta, Box<ChildNetwork::Delta>); type Delta = (Layer::Gradient, Box<ChildNetwork::Delta>);
fn default_gradient(&self) -> Self::Delta {
(self.layer.default_gradient(), Box::new(self.child_network.default_gradient()))
}
fn apply_gradient(&mut self, gradient: &Self::Delta) { fn apply_gradient(&mut self, gradient: &Self::Delta) {
self.layer.apply_gradient(&gradient.0); self.layer.apply_gradient(&gradient.0);
@ -154,16 +153,16 @@ impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainableNetwork<Input = Lay
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>( fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
&self, &self,
input: &Self::Input, input: &Input,
target: &Loss::Target, target: &Loss::Target,
loss: Loss, loss: Loss,
) -> (Self::Input, Self::Delta) { ) -> (Input, Self::Delta) {
let next_activation = self.layer.eval(input); let next_activation = self.layer.eval(input);
let (backprop_gradient, weights_gradient) = let (backprop_gradient, weights_gradient) =
self.child_network self.child_network
.backpropagate(&next_activation, target, loss); .backpropagate(&next_activation, target, loss);
let (backprop_gradient, layer_gradient) = let (backprop_gradient, layer_gradient) =
self.layer.backpropagate(input, backprop_gradient); self.layer.backprop_layer(input, backprop_gradient);
( (
backprop_gradient, backprop_gradient,
@ -173,23 +172,18 @@ impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainableNetwork<Input = Lay
fn regularize(&self) -> Self::Delta { fn regularize(&self) -> Self::Delta {
( (
self.layer.regularize(), self.layer.regularize_layer(),
Box::new(self.child_network.regularize()), Box::new(self.child_network.regularize()),
) )
} }
fn prepare_epoch(&mut self) { fn prepare(&mut self, is_training: bool) {
self.layer.prepare_epoch(); self.layer.prepare_layer(is_training);
self.child_network.prepare_epoch(); self.child_network.prepare(is_training);
}
fn cleanup(&mut self) {
self.layer.cleanup();
self.child_network.cleanup();
} }
} }
impl<Layer: NeuraLayer> From<Layer> for NeuraSequential<Layer, ()> { impl<Layer> From<Layer> for NeuraSequential<Layer, ()> {
fn from(layer: Layer) -> Self { fn from(layer: Layer) -> Self {
Self { Self {
layer, layer,
@ -198,6 +192,53 @@ impl<Layer: NeuraLayer> From<Layer> for NeuraSequential<Layer, ()> {
} }
} }
pub trait NeuraSequentialBuild {
type Constructed;
type Err;
fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err>;
}
#[derive(Debug, Clone)]
pub enum NeuraSequentialBuildErr<Err, ChildErr> {
Current(Err),
Child(ChildErr),
}
impl<Layer: NeuraPartialLayer> NeuraSequentialBuild for NeuraSequential<Layer, ()> {
type Constructed = NeuraSequential<Layer::Constructed, ()>;
type Err = Layer::Err;
fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
Ok(NeuraSequential {
layer: self.layer.construct(input_shape)?,
child_network: Box::new(())
})
}
}
impl<Layer: NeuraPartialLayer + , ChildNetwork: NeuraSequentialBuild> NeuraSequentialBuild for NeuraSequential<Layer, ChildNetwork> {
type Constructed = NeuraSequential<Layer::Constructed, ChildNetwork::Constructed>;
type Err = NeuraSequentialBuildErr<Layer::Err, ChildNetwork::Err>;
fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
let layer = self.layer.construct(input_shape).map_err(|e| NeuraSequentialBuildErr::Current(e))?;
// TODO: ensure that this operation (and all recursive operations) are directly allocated on the heap
let child_network = self.child_network
.construct(Layer::output_shape(&layer))
.map_err(|e| NeuraSequentialBuildErr::Child(e))?;
let child_network = Box::new(child_network);
Ok(NeuraSequential {
layer,
child_network,
})
}
}
/// An utility to recursively create a NeuraSequential network, while writing it in a declarative and linear fashion. /// An utility to recursively create a NeuraSequential network, while writing it in a declarative and linear fashion.
/// Note that this can quickly create big and unwieldly types. /// Note that this can quickly create big and unwieldly types.
#[macro_export] #[macro_export]
@ -211,41 +252,47 @@ macro_rules! neura_sequential {
}; };
[ $first:expr, $($rest:expr),+ $(,)? ] => { [ $first:expr, $($rest:expr),+ $(,)? ] => {
$crate::network::sequential::NeuraSequential::new_match_output($first, neura_sequential![$($rest),+]) $crate::network::sequential::NeuraSequential::new($first, neura_sequential![$($rest),+])
}; };
} }
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use nalgebra::dvector;
use crate::{ use crate::{
derivable::{activation::Relu, regularize::NeuraL0}, derivable::{activation::Relu, regularize::NeuraL0},
layer::NeuraDenseLayer, layer::{NeuraDenseLayer, NeuraShape, NeuraLayer},
neura_layer, neura_layer,
}; };
use super::NeuraSequentialBuild;
#[test] #[test]
fn test_neura_network_macro() { fn test_neura_network_macro() {
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
let _ = neura_sequential![ let _ = neura_sequential![
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>, NeuraDenseLayer::from_rng(8, 12, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>,
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 12>, NeuraDenseLayer::from_rng(12, 16, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>,
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 2> NeuraDenseLayer::from_rng(16, 2, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>
]; ];
let _ = neura_sequential![ let _ = neura_sequential![
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>, NeuraDenseLayer::from_rng(2, 2, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>,
]; ];
let _ = neura_sequential![ let _ = neura_sequential![
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>, NeuraDenseLayer::from_rng(8, 16, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>,
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 12>, NeuraDenseLayer::from_rng(16, 12, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>,
]; ];
let _ = neura_sequential![ let network = neura_sequential![
neura_layer!("dense", 8, 16; Relu), neura_layer!("dense", 16, Relu),
neura_layer!("dense", 12; Relu), neura_layer!("dense", 12, Relu),
neura_layer!("dense", 2; Relu) neura_layer!("dense", 2, Relu)
]; ].construct(NeuraShape::Vector(2)).unwrap();
network.eval(&dvector![0.0f64, 0.0]);
} }
} }

@ -0,0 +1,180 @@
use super::{NeuraLayer, NeuraTrainableLayer};
use crate::{
algebra::{NeuraMatrix, NeuraVector, NeuraVectorSpace},
derivable::NeuraDerivable,
};
use rand::Rng;
use rand_distr::Distribution;
#[derive(Clone, Debug)]
pub struct NeuraDenseLayer<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> {
weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
bias: NeuraVector<OUTPUT_LEN, f64>,
activation: Act,
regularization: Reg,
}
impl<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{
pub fn new(
weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
bias: NeuraVector<OUTPUT_LEN, f64>,
activation: Act,
regularization: Reg,
) -> Self {
Self {
weights,
bias,
activation,
regularization,
}
}
pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self {
let mut weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64> = NeuraMatrix::from_value(0.0f64);
// Use Xavier (or He) initialisation, using the harmonic mean
// Ref: https://www.deeplearning.ai/ai-notes/initialization/index.html
let distribution = rand_distr::Normal::new(
0.0,
activation.variance_hint() * 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64),
)
.unwrap();
// let distribution = rand_distr::Uniform::new(-0.5, 0.5);
for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN {
weights[i][j] = distribution.sample(rng);
}
}
Self {
weights,
// Biases are initialized based on the activation's hint
bias: NeuraVector::from_value(activation.bias_hint()),
activation,
regularization,
}
}
}
impl<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> NeuraLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{
type Input = NeuraVector<INPUT_LEN, f64>;
type Output = NeuraVector<OUTPUT_LEN, f64>;
fn eval(&self, input: &Self::Input) -> Self::Output {
let mut result = self.weights.multiply_vector(input);
for i in 0..OUTPUT_LEN {
result[i] = self.activation.eval(result[i] + self.bias[i]);
}
result
}
}
impl<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> NeuraTrainableLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{
type Delta = (
NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
NeuraVector<OUTPUT_LEN, f64>,
);
fn backpropagate(
&self,
input: &Self::Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta) {
let evaluated = self.weights.multiply_vector(input);
// Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron),
// with `self.activation'(input) ° epsilon = delta`
let mut delta: NeuraVector<OUTPUT_LEN, f64> = epsilon.clone();
for i in 0..OUTPUT_LEN {
delta[i] *= self.activation.derivate(evaluated[i]);
}
// Compute the weight gradient
let weights_gradient = delta.reverse_dot(input);
let new_epsilon = self.weights.transpose_multiply_vector(&delta);
// According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
// The gradient of the bias is equal to the delta term of the backpropagation algorithm
let bias_gradient = delta;
(new_epsilon, (weights_gradient, bias_gradient))
}
fn apply_gradient(&mut self, gradient: &Self::Delta) {
NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0);
NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1);
}
fn regularize(&self) -> Self::Delta {
let mut res = Self::Delta::default();
for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN {
res.0[i][j] = self.regularization.derivate(self.weights[i][j]);
}
}
// Note: biases aren't taken into account here, as per https://stats.stackexchange.com/questions/153605/no-regularisation-term-for-bias-unit-in-neural-network
res
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::{
derivable::{activation::Relu, regularize::NeuraL0},
utils::uniform_vector,
};
#[test]
fn test_from_rng() {
let mut rng = rand::thread_rng();
let layer: NeuraDenseLayer<_, _, 64, 32> =
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0);
let mut input = [0.0; 64];
for x in 0..64 {
input[x] = rng.gen();
}
assert!(layer.eval(&input.into()).len() == 32);
}
#[test]
fn test_stack_overflow_big_layer() {
let layer = NeuraDenseLayer::from_rng(&mut rand::thread_rng(), Relu, NeuraL0)
as NeuraDenseLayer<Relu, NeuraL0, 1000, 1000>;
layer.backpropagate(&uniform_vector(), uniform_vector());
<NeuraDenseLayer<Relu, NeuraL0, 1000, 1000> as NeuraTrainableLayer>::Delta::zero();
}
}

@ -0,0 +1,170 @@
mod dense;
pub use dense::NeuraDenseLayer;
mod convolution;
pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DBlockLayer, NeuraConv2DPadLayer};
mod dropout;
pub use dropout::NeuraDropoutLayer;
mod softmax;
pub use softmax::NeuraSoftmaxLayer;
mod one_hot;
pub use one_hot::NeuraOneHotLayer;
mod lock;
pub use lock::NeuraLockLayer;
mod pool;
pub use pool::{NeuraGlobalPoolLayer, NeuraPool1DLayer};
mod reshape;
pub use reshape::{NeuraFlattenLayer, NeuraReshapeLayer};
use crate::algebra::NeuraVectorSpace;
pub trait NeuraLayer {
type Input;
type Output;
fn eval(&self, input: &Self::Input) -> Self::Output;
}
pub trait NeuraTrainableLayer: NeuraLayer {
/// The representation of the layer gradient as a vector space
type Delta: NeuraVectorSpace;
/// Computes the backpropagation term and the derivative of the internal weights,
/// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
///
/// Note: we introduce the term `epsilon`, which together with the activation of the current function can be used to compute `delta_l`:
/// ```no_rust
/// f_l'(a_l) * epsilon_l = delta_l
/// ```
///
/// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
/// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`.
/// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers.
fn backpropagate(
&self,
input: &Self::Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta);
/// Computes the regularization
fn regularize(&self) -> Self::Delta;
/// Applies `δW_l` to the weights of the layer
fn apply_gradient(&mut self, gradient: &Self::Delta);
/// Called before an iteration begins, to allow the layer to set itself up for training.
#[inline(always)]
fn prepare_epoch(&mut self) {}
/// Called at the end of training, to allow the layer to clean itself up
#[inline(always)]
fn cleanup(&mut self) {}
}
#[macro_export]
macro_rules! neura_layer {
( "dense", $( $shape:expr ),*; $activation:expr ) => {
$crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0)
as neura_layer!("_dense_shape", $($shape),*)
};
( "dense", $( $shape:expr ),*; $activation:expr, $regularization:expr ) => {
$crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $regularization)
as neura_layer!("_dense_shape", $($shape),*)
};
( "_dense_shape", $output:expr ) => {
$crate::layer::NeuraDenseLayer<_, _, _, $output>
};
( "_dense_shape", $input:expr, $output:expr ) => {
$crate::layer::NeuraDenseLayer<_, _, $input, $output>
};
( "dropout", $probability:expr ) => {
$crate::layer::NeuraDropoutLayer::new($probability, rand::thread_rng())
as $crate::layer::NeuraDropoutLayer<_, _>
};
( "softmax" ) => {
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<_>
};
( "softmax", $length:expr ) => {
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<$length>
};
( "one_hot" ) => {
$crate::layer::NeuraOneHotLayer as $crate::layer::NeuraOneHotLayer<2, _>
};
( "lock", $layer:expr ) => {
$crate::layer::NeuraLockLayer($layer)
};
( "conv1d_pad", $length:expr, $feats:expr; $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<$length, $feats, $window, _>
};
( "conv1d_pad"; $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<_, _, $window, _>
};
( "conv2d_pad", $feats:expr, $length:expr; $width:expr, $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<$length, $feats, $window, _>
};
( "conv2d_pad"; $width:expr, $window:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _>
};
( "conv2d_block", $feats:expr, $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, $feats, $block_size, _>
};
( "conv2d_block", $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
$crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, _, $block_size, _>
};
( "pool_global"; $reduce:expr ) => {
$crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _>
};
( "pool_global", $feats:expr, $length:expr; $reduce:expr ) => {
$crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<$length, $feats, _>
};
( "pool1d", $blocklength:expr; $reduce:expr ) => {
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<_, $blocklength, _, _>
};
( "pool1d", $blocks:expr, $blocklength:expr; $reduce:expr ) => {
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, _, _>
};
( "pool1d", $feats:expr, $blocks:expr, $blocklength:expr; $reduce:expr ) => {
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, $feats, _>
};
( "unstable_flatten" ) => {
$crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<_, _, f64>
};
( "unstable_flatten", $width:expr, $height:expr ) => {
$crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<$width, $height, f64>
};
( "unstable_reshape", $height:expr ) => {
$crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<_, $height, f64>
};
( "unstable_reshape", $width:expr, $height:expr ) => {
$crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<$width, $height, f64>
};
}

@ -5,26 +5,20 @@ use crate::{
network::{sequential::NeuraSequential, NeuraTrainableNetwork}, network::{sequential::NeuraSequential, NeuraTrainableNetwork},
}; };
pub trait NeuraGradientSolver<Output, Target = Output> { pub trait NeuraGradientSolver<Input, Target, Trainable: NeuraTrainableNetwork<Input>> {
fn get_gradient<Layer: NeuraLayer, ChildNetwork>( fn get_gradient(
&self, &self,
trainable: &NeuraSequential<Layer, ChildNetwork>, trainable: &Trainable,
input: &Layer::Input, input: &Input,
target: &Target, target: &Target,
) -> <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta ) -> Trainable::Delta;
where
NeuraSequential<Layer, ChildNetwork>:
NeuraTrainableNetwork<Input = Layer::Input, Output = Output>;
fn score<Layer: NeuraLayer, ChildNetwork>( fn score(
&self, &self,
trainable: &NeuraSequential<Layer, ChildNetwork>, trainable: &Trainable,
input: &Layer::Input, input: &Input,
target: &Target, target: &Target,
) -> f64 ) -> f64;
where
NeuraSequential<Layer, ChildNetwork>:
NeuraTrainableNetwork<Input = Layer::Input, Output = Output>;
} }
#[non_exhaustive] #[non_exhaustive]
@ -38,32 +32,24 @@ impl<Loss: NeuraLoss + Clone> NeuraBackprop<Loss> {
} }
} }
impl<const N: usize, Loss: NeuraLoss<Input = NeuraVector<N, f64>> + Clone> impl<Input, Target, Trainable: NeuraTrainableNetwork<Input>, Loss: NeuraLoss<Input = Trainable::Output, Target = Target> + Clone>
NeuraGradientSolver<NeuraVector<N, f64>, Loss::Target> for NeuraBackprop<Loss> NeuraGradientSolver<Input, Target, Trainable> for NeuraBackprop<Loss>
{ {
fn get_gradient<Layer: NeuraLayer, ChildNetwork>( fn get_gradient(
&self, &self,
trainable: &NeuraSequential<Layer, ChildNetwork>, trainable: &Trainable,
input: &Layer::Input, input: &Input,
target: &Loss::Target, target: &Target,
) -> <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta ) -> Trainable::Delta {
where
NeuraSequential<Layer, ChildNetwork>:
NeuraTrainableNetwork<Input = Layer::Input, Output = NeuraVector<N, f64>>,
{
trainable.backpropagate(input, target, self.loss.clone()).1 trainable.backpropagate(input, target, self.loss.clone()).1
} }
fn score<Layer: NeuraLayer, ChildNetwork>( fn score(
&self, &self,
trainable: &NeuraSequential<Layer, ChildNetwork>, trainable: &Trainable,
input: &Layer::Input, input: &Input,
target: &Loss::Target, target: &Target,
) -> f64 ) -> f64 {
where
NeuraSequential<Layer, ChildNetwork>:
NeuraTrainableNetwork<Input = Layer::Input, Output = NeuraVector<N, f64>>,
{
let output = trainable.eval(&input); let output = trainable.eval(&input);
self.loss.eval(target, &output) self.loss.eval(target, &output)
} }
@ -137,41 +123,32 @@ impl NeuraBatchedTrainer {
} }
pub fn train< pub fn train<
Output, Input: Clone,
Target: Clone, Target: Clone,
GradientSolver: NeuraGradientSolver<Output, Target>, Network: NeuraTrainableNetwork<Input>,
Layer: NeuraLayer, GradientSolver: NeuraGradientSolver<Input, Target, Network>,
ChildNetwork, Inputs: IntoIterator<Item = (Input, Target)>,
Inputs: IntoIterator<Item = (Layer::Input, Target)>,
>( >(
&self, &self,
gradient_solver: GradientSolver, gradient_solver: GradientSolver,
network: &mut NeuraSequential<Layer, ChildNetwork>, network: &mut Network,
inputs: Inputs, inputs: Inputs,
test_inputs: &[(Layer::Input, Target)], test_inputs: &[(Input, Target)],
) where ) {
NeuraSequential<Layer, ChildNetwork>:
NeuraTrainableNetwork<Input = Layer::Input, Output = Output>,
Layer::Input: Clone,
{
let mut iter = inputs.into_iter(); let mut iter = inputs.into_iter();
let factor = -self.learning_rate / (self.batch_size as f64); let factor = -self.learning_rate / (self.batch_size as f64);
let momentum_factor = self.learning_momentum / self.learning_rate; let momentum_factor = self.learning_momentum / self.learning_rate;
let reg_factor = -self.learning_rate; let reg_factor = -self.learning_rate;
// Contains `momentum_factor * factor * gradient_sum_previous_iter` // Contains `momentum_factor * factor * gradient_sum_previous_iter`
let mut previous_gradient_sum = let mut previous_gradient_sum = network.default_gradient();
Box::<<NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta>::zero();
'd: for iteration in 0..self.iterations { 'd: for iteration in 0..self.iterations {
let mut gradient_sum = Box::< let mut gradient_sum = network.default_gradient();
<NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta, network.prepare(true);
>::zero();
network.prepare_epoch();
for _ in 0..self.batch_size { for _ in 0..self.batch_size {
if let Some((input, target)) = iter.next() { if let Some((input, target)) = iter.next() {
let gradient = let gradient = gradient_solver.get_gradient(&network, &input, &target);
Box::new(gradient_solver.get_gradient(&network, &input, &target));
gradient_sum.add_assign(&gradient); gradient_sum.add_assign(&gradient);
} else { } else {
break 'd; break 'd;
@ -194,7 +171,7 @@ impl NeuraBatchedTrainer {
} }
if self.log_iterations > 0 && (iteration + 1) % self.log_iterations == 0 { if self.log_iterations > 0 && (iteration + 1) % self.log_iterations == 0 {
network.cleanup(); network.prepare(false);
let mut loss_sum = 0.0; let mut loss_sum = 0.0;
for (input, target) in test_inputs { for (input, target) in test_inputs {
loss_sum += gradient_solver.score(&network, input, target); loss_sum += gradient_solver.score(&network, input, target);
@ -204,12 +181,14 @@ impl NeuraBatchedTrainer {
} }
} }
network.cleanup(); network.prepare(false);
} }
} }
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use nalgebra::{DMatrix, dmatrix, dvector};
use super::*; use super::*;
use crate::{ use crate::{
assert_approx, assert_approx,
@ -224,19 +203,19 @@ mod test {
for wa in [0.0, 0.25, 0.5, 1.0] { for wa in [0.0, 0.25, 0.5, 1.0] {
for wb in [0.0, 0.25, 0.5, 1.0] { for wb in [0.0, 0.25, 0.5, 1.0] {
let network = NeuraSequential::new( let network = NeuraSequential::new(
NeuraDenseLayer::new([[wa, wb]].into(), [0.0].into(), Linear, NeuraL0), NeuraDenseLayer::new(dmatrix![wa, wb], dvector![0.0], Linear, NeuraL0),
(), (),
); );
let gradient = NeuraBackprop::new(Euclidean).get_gradient( let (gradient, _) = NeuraBackprop::new(Euclidean).get_gradient(
&network, &network,
&[1.0, 1.0].into(), &dvector![1.0, 1.0],
&[0.0].into(), &dvector![0.0],
); );
let expected = wa + wb; let expected = wa + wb;
assert!((gradient.0[0][0] - expected) < 0.001); assert!((gradient.0[(0, 0)] - expected) < 0.001);
assert!((gradient.0[0][1] - expected) < 0.001); assert!((gradient.0[(0, 1)] - expected) < 0.001);
} }
} }
} }
@ -247,42 +226,42 @@ mod test {
// Test that we get the same values as https://hmkcode.com/ai/backpropagation-step-by-step/ // Test that we get the same values as https://hmkcode.com/ai/backpropagation-step-by-step/
let network = neura_sequential![ let network = neura_sequential![
NeuraDenseLayer::new( NeuraDenseLayer::new(
[[0.11, 0.21], [0.12, 0.08]].into(), dmatrix![0.11, 0.21; 0.12, 0.08],
[0.0; 2].into(), dvector![0.0, 0.0],
Linear, Linear,
NeuraL0 NeuraL0
), ),
NeuraDenseLayer::new([[0.14, 0.15]].into(), [0.0].into(), Linear, NeuraL0) NeuraDenseLayer::new(dmatrix![0.14, 0.15], dvector![0.0], Linear, NeuraL0)
]; ];
let input = [2.0, 3.0]; let input = dvector![2.0, 3.0];
let target = [1.0]; let target = dvector![1.0];
let intermediary = network.clone().trim_tail().eval(&input.into()); let intermediary = network.clone().trim_tail().eval(&input);
assert_approx!(0.85, intermediary[0], EPSILON); assert_approx!(0.85, intermediary[0], EPSILON);
assert_approx!(0.48, intermediary[1], EPSILON); assert_approx!(0.48, intermediary[1], EPSILON);
assert_approx!(0.191, network.eval(&input.into())[0], EPSILON); assert_approx!(0.191, network.eval(&input)[0], EPSILON);
assert_approx!( assert_approx!(
0.327, 0.327,
Euclidean.eval(&target.into(), &network.eval(&input.into())), Euclidean.eval(&target, &network.eval(&input)),
0.001 0.001
); );
let delta = network.eval(&input.into())[0] - target[0]; let delta = network.eval(&input)[0] - target[0];
let (gradient_first, gradient_second) = let (gradient_first, gradient_second) =
NeuraBackprop::new(Euclidean).get_gradient(&network, &input.into(), &target.into()); NeuraBackprop::new(Euclidean).get_gradient(&network, &input, &target);
let gradient_first = gradient_first.0; let gradient_first = gradient_first.0;
let gradient_second = gradient_second.0[0]; let gradient_second = gradient_second.0.0;
assert_approx!(gradient_second[0], intermediary[0] * delta, EPSILON); assert_approx!(gradient_second[0], intermediary[0] * delta, EPSILON);
assert_approx!(gradient_second[1], intermediary[1] * delta, EPSILON); assert_approx!(gradient_second[1], intermediary[1] * delta, EPSILON);
assert_approx!(gradient_first[0][0], input[0] * delta * 0.14, EPSILON); assert_approx!(gradient_first[(0, 0)], input[0] * delta * 0.14, EPSILON);
assert_approx!(gradient_first[0][1], input[1] * delta * 0.14, EPSILON); assert_approx!(gradient_first[(0, 1)], input[1] * delta * 0.14, EPSILON);
assert_approx!(gradient_first[1][0], input[0] * delta * 0.15, EPSILON); assert_approx!(gradient_first[(1, 0)], input[0] * delta * 0.15, EPSILON);
assert_approx!(gradient_first[1][1], input[1] * delta * 0.15, EPSILON); assert_approx!(gradient_first[(1, 1)], input[1] * delta * 0.15, EPSILON);
} }
} }

Loading…
Cancel
Save