From 2edbff860c09af468afca473fa226ba6a1be0cae Mon Sep 17 00:00:00 2001
From: Adrien Burgun <adrien.burgun@orange.fr>
Date: Wed, 19 Apr 2023 00:54:30 +0200
Subject: [PATCH] :fire: :truck: :recycle: Refactoring the previous layer
 system

It was becoming almost impossible to manage the dimensions of the layers,
especially with convolution layers. Generic consts are nice, but they are a bit too early
to have right now for this use-case. We'll probably be expanding the implementations to accept
const or dynamically-sized layers at some point, for performance-critical applications.
---
 Cargo.toml                              |   1 +
 examples/xor.rs                         |  24 +--
 src/algebra/matrix.rs                   |   8 +-
 src/algebra/mod.rs                      |  69 ++++---
 src/algebra/vector.rs                   |   8 +-
 src/derivable/loss.rs                   |  25 +--
 src/layer/dense.rs                      | 238 +++++++++++++-----------
 src/layer/mod.rs                        | 183 +++++-------------
 src/lib.rs                              |   9 +-
 src/network/mod.rs                      |  15 +-
 src/network/sequential.rs               | 189 ++++++++++++-------
 src/{layer => old_layer}/convolution.rs |   0
 src/old_layer/dense.rs                  | 180 ++++++++++++++++++
 src/{layer => old_layer}/dropout.rs     |   0
 src/{layer => old_layer}/lock.rs        |   0
 src/old_layer/mod.rs                    | 170 +++++++++++++++++
 src/{layer => old_layer}/one_hot.rs     |   0
 src/{layer => old_layer}/pool.rs        |   0
 src/{layer => old_layer}/reshape.rs     |   0
 src/{layer => old_layer}/softmax.rs     |   0
 src/train.rs                            | 135 ++++++--------
 21 files changed, 796 insertions(+), 458 deletions(-)
 rename src/{layer => old_layer}/convolution.rs (100%)
 create mode 100644 src/old_layer/dense.rs
 rename src/{layer => old_layer}/dropout.rs (100%)
 rename src/{layer => old_layer}/lock.rs (100%)
 create mode 100644 src/old_layer/mod.rs
 rename src/{layer => old_layer}/one_hot.rs (100%)
 rename src/{layer => old_layer}/pool.rs (100%)
 rename src/{layer => old_layer}/reshape.rs (100%)
 rename src/{layer => old_layer}/softmax.rs (100%)
diff --git a/Cargo.toml b/Cargo.toml
index 48cafe8..fe4ca1f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,6 +7,7 @@ edition = "2021"
 
 [dependencies]
 boxed-array = "0.1.0"
+nalgebra = { version = "^0.32", features = ["std", "macros", "rand"] }
 ndarray = "^0.15"
 num = "^0.4"
 # num-traits = "0.2.15"
diff --git a/examples/xor.rs b/examples/xor.rs
index 759dd9d..4b7c277 100644
--- a/examples/xor.rs
+++ b/examples/xor.rs
@@ -1,22 +1,24 @@
 #![feature(generic_arg_infer)]
 
-use neuramethyst::algebra::NeuraVector;
+use nalgebra::dvector;
+
 use neuramethyst::derivable::activation::Relu;
 use neuramethyst::derivable::loss::Euclidean;
-use neuramethyst::{cycle_shuffling, prelude::*};
+use neuramethyst::prelude::*;
+use neuramethyst::cycle_shuffling;
 
 fn main() {
     let mut network = neura_sequential![
-        neura_layer!("dense", 2, 4; Relu),
-        neura_layer!("dense", 3; Relu),
-        neura_layer!("dense", 1; Relu)
-    ];
+        neura_layer!("dense", 4, Relu),
+        neura_layer!("dense", 3, Relu),
+        neura_layer!("dense", 1, Relu)
+    ].construct(NeuraShape::Vector(2)).unwrap();
 
-    let inputs: [(NeuraVector<2, f64>, NeuraVector<1, f64>); 4] = [
-        ([0.0, 0.0].into(), [0.0].into()),
-        ([0.0, 1.0].into(), [1.0].into()),
-        ([1.0, 0.0].into(), [1.0].into()),
-        ([1.0, 1.0].into(), [0.0].into()),
+    let inputs = [
+        (dvector![0.0, 0.0], dvector![0.0]),
+        (dvector![0.0, 1.0], dvector![1.0]),
+        (dvector![1.0, 0.0], dvector![1.0]),
+        (dvector![1.0, 1.0], dvector![0.0]),
     ];
 
     for (input, target) in &inputs {
diff --git a/src/algebra/matrix.rs b/src/algebra/matrix.rs
index 484b0a1..9b15213 100644
--- a/src/algebra/matrix.rs
+++ b/src/algebra/matrix.rs
@@ -167,10 +167,10 @@ impl<const WIDTH: usize, const HEIGHT: usize, F: NeuraVectorSpace + Clone> Neura
         }
     }
 
-    #[inline(always)]
-    fn zero() -> Self {
-        Self::from_value(F::zero())
-    }
+    // #[inline(always)]
+    // fn zero() -> Self {
+    //     Self::from_value(F::zero())
+    // }
 
     fn norm_squared(&self) -> f64 {
         let mut sum = 0.0;
diff --git a/src/algebra/mod.rs b/src/algebra/mod.rs
index ec9e3ad..8e26d80 100644
--- a/src/algebra/mod.rs
+++ b/src/algebra/mod.rs
@@ -2,6 +2,8 @@ mod matrix;
 pub use matrix::NeuraMatrix;
 
 mod vector;
+use nalgebra::Matrix;
+use num::Float;
 pub use vector::NeuraVector;
 
 /// An extension of `std::ops::AddAssign` and `std::ops::Default`
@@ -10,7 +12,7 @@ pub trait NeuraVectorSpace {
 
     fn mul_assign(&mut self, by: f64);
 
-    fn zero() -> Self;
+    // fn zero() -> Self;
 
     fn norm_squared(&self) -> f64;
 }
@@ -26,10 +28,10 @@ impl NeuraVectorSpace for () {
         // Noop
     }
 
-    #[inline(always)]
-    fn zero() -> Self {
-        ()
-    }
+    // #[inline(always)]
+    // fn zero() -> Self {
+    //     ()
+    // }
 
     fn norm_squared(&self) -> f64 {
         0.0
@@ -45,9 +47,9 @@ impl<T: NeuraVectorSpace> NeuraVectorSpace for Box<T> {
         self.as_mut().mul_assign(by);
     }
 
-    fn zero() -> Self {
-        Box::new(T::zero())
-    }
+    // fn zero() -> Self {
+    //     Box::new(T::zero())
+    // }
 
     fn norm_squared(&self) -> f64 {
         self.as_ref().norm_squared()
@@ -65,9 +67,9 @@ impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left
         NeuraVectorSpace::mul_assign(&mut self.1, by);
     }
 
-    fn zero() -> Self {
-        (Left::zero(), Right::zero())
-    }
+    // fn zero() -> Self {
+    //     (Left::zero(), Right::zero())
+    // }
 
     fn norm_squared(&self) -> f64 {
         self.0.norm_squared() + self.1.norm_squared()
@@ -87,24 +89,43 @@ impl<const N: usize, T: NeuraVectorSpace + Clone> NeuraVectorSpace for [T; N] {
         }
     }
 
-    fn zero() -> Self {
-        let mut res: Vec<T> = Vec::with_capacity(N);
+    // fn zero() -> Self {
+    //     let mut res: Vec<T> = Vec::with_capacity(N);
 
-        for _ in 0..N {
-            res.push(T::zero());
-        }
+    //     for _ in 0..N {
+    //         res.push(T::zero());
+    //     }
 
-        res.try_into().unwrap_or_else(|_| {
-            // TODO: check that this panic is optimized away
-            unreachable!()
-        })
-    }
+    //     res.try_into().unwrap_or_else(|_| {
+    //         // TODO: check that this panic is optimized away
+    //         unreachable!()
+    //     })
+    // }
 
     fn norm_squared(&self) -> f64 {
         self.iter().map(T::norm_squared).sum()
     }
 }
 
+impl<F: Float, R: nalgebra::Dim, C: nalgebra::Dim, S: nalgebra::RawStorage<F, R, C>> NeuraVectorSpace for Matrix<F, R, C, S>
+where
+    Matrix<F, R, C, S>: std::ops::MulAssign<F>,
+    for<'c> Matrix<F, R, C, S>: std::ops::AddAssign<&'c Matrix<F, R, C, S>>,
+    F: From<f64> + Into<f64>
+{
+    fn add_assign(&mut self, other: &Self) {
+        *self += other;
+    }
+
+    fn mul_assign(&mut self, by: f64) {
+        *self *= <F as From<f64>>::from(by);
+    }
+
+    fn norm_squared(&self) -> f64 {
+        self.iter().map(|x| *x * *x).reduce(|sum, curr| sum + curr).unwrap_or(F::zero()).into()
+    }
+}
+
 macro_rules! base {
     ( $type:ty ) => {
         impl NeuraVectorSpace for $type {
@@ -116,9 +137,9 @@ macro_rules! base {
                 std::ops::MulAssign::mul_assign(self, other as $type);
             }
 
-            fn zero() -> Self {
-                <Self as Default>::default()
-            }
+            // fn zero() -> Self {
+            //     <Self as Default>::default()
+            // }
 
             fn norm_squared(&self) -> f64 {
                 (self * self) as f64
diff --git a/src/algebra/vector.rs b/src/algebra/vector.rs
index 7042431..52f4a66 100644
--- a/src/algebra/vector.rs
+++ b/src/algebra/vector.rs
@@ -95,10 +95,10 @@ impl<const LENGTH: usize, F: Float + From<f64> + Into<f64>> NeuraVectorSpace
         }
     }
 
-    #[inline(always)]
-    fn zero() -> Self {
-        Self::from_value(F::zero())
-    }
+    // #[inline(always)]
+    // fn zero() -> Self {
+    //     Self::from_value(F::zero())
+    // }
 
     fn norm_squared(&self) -> f64 {
         let mut sum = F::zero();
diff --git a/src/derivable/loss.rs b/src/derivable/loss.rs
index 7d23d8b..9bb79da 100644
--- a/src/derivable/loss.rs
+++ b/src/derivable/loss.rs
@@ -1,19 +1,22 @@
+use nalgebra::DVector;
+
 use crate::algebra::NeuraVector;
 
 use super::NeuraLoss;
 
 #[derive(Clone, Copy, Debug, PartialEq)]
-pub struct Euclidean<const N: usize>;
+pub struct Euclidean;
 
-impl<const N: usize> NeuraLoss for Euclidean<N> {
-    type Input = NeuraVector<N, f64>;
-    type Target = NeuraVector<N, f64>;
+impl NeuraLoss for Euclidean {
+    type Input = DVector<f64>;
+    type Target = DVector<f64>;
 
     #[inline]
-    fn eval(&self, target: &NeuraVector<N, f64>, actual: &NeuraVector<N, f64>) -> f64 {
+    fn eval(&self, target: &DVector<f64>, actual: &DVector<f64>) -> f64 {
+        assert_eq!(target.shape(), actual.shape());
         let mut sum_squared = 0.0;
 
-        for i in 0..N {
+        for i in 0..target.len() {
             sum_squared += (target[i] - actual[i]) * (target[i] - actual[i]);
         }
 
@@ -23,13 +26,13 @@ impl<const N: usize> NeuraLoss for Euclidean<N> {
     #[inline]
     fn nabla(
         &self,
-        target: &NeuraVector<N, f64>,
-        actual: &NeuraVector<N, f64>,
-    ) -> NeuraVector<N, f64> {
-        let mut res = NeuraVector::default();
+        target: &DVector<f64>,
+        actual: &DVector<f64>,
+    ) -> DVector<f64> {
+        let mut res = DVector::zeros(target.len());
 
         // ∂E(y)/∂yᵢ = yᵢ - yᵢ'
-        for i in 0..N {
+        for i in 0..target.len() {
             res[i] = actual[i] - target[i];
         }
 
diff --git a/src/layer/dense.rs b/src/layer/dense.rs
index ff921a8..dd56367 100644
--- a/src/layer/dense.rs
+++ b/src/layer/dense.rs
@@ -1,38 +1,49 @@
-use super::{NeuraLayer, NeuraTrainableLayer};
-use crate::{
-    algebra::{NeuraMatrix, NeuraVector, NeuraVectorSpace},
-    derivable::NeuraDerivable,
-};
+use std::marker::PhantomData;
 
+use nalgebra::{DMatrix, DVector};
+use num::Float;
 use rand::Rng;
-use rand_distr::Distribution;
+
+use crate::derivable::NeuraDerivable;
+
+use super::*;
 
 #[derive(Clone, Debug)]
-pub struct NeuraDenseLayer<
-    Act: NeuraDerivable<f64>,
-    Reg: NeuraDerivable<f64>,
-    const INPUT_LEN: usize,
-    const OUTPUT_LEN: usize,
+pub struct NeuraDenseLayer<F: Float, Act: NeuraDerivable<F>, Reg: NeuraDerivable<F>> {
+    weights: DMatrix<F>,
+    bias: DVector<F>,
+    activation: Act,
+    regularization: Reg,
+}
+
+#[derive(Clone, Debug)]
+pub struct NeuraDenseLayerPartial<
+    F: Float,
+    Act: NeuraDerivable<F>,
+    Reg: NeuraDerivable<F>,
+    R: Rng,
 > {
-    weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
-    bias: NeuraVector<OUTPUT_LEN, f64>,
     activation: Act,
     regularization: Reg,
+    output_size: usize,
+    rng: R,
+    phantom: PhantomData<F>,
 }
 
 impl<
-        Act: NeuraDerivable<f64>,
-        Reg: NeuraDerivable<f64>,
-        const INPUT_LEN: usize,
-        const OUTPUT_LEN: usize,
-    > NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
+        F: Float + From<f64> + std::fmt::Debug + 'static,
+        Act: NeuraDerivable<F>,
+        Reg: NeuraDerivable<F>,
+    > NeuraDenseLayer<F, Act, Reg>
 {
     pub fn new(
-        weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
-        bias: NeuraVector<OUTPUT_LEN, f64>,
+        weights: DMatrix<F>,
+        bias: DVector<F>,
         activation: Act,
         regularization: Reg,
     ) -> Self {
+        assert_eq!(bias.shape().0, weights.shape().0);
+
         Self {
             weights,
             bias,
@@ -41,85 +52,129 @@ impl<
         }
     }
 
-    pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self {
-        let mut weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64> = NeuraMatrix::from_value(0.0f64);
-
-        // Use Xavier (or He) initialisation, using the harmonic mean
-        // Ref: https://www.deeplearning.ai/ai-notes/initialization/index.html
+    pub fn from_rng(
+        input_size: usize,
+        output_size: usize,
+        rng: &mut impl Rng,
+        activation: Act,
+        regularization: Reg,
+    ) -> Self
+    where
+        rand_distr::StandardNormal: rand_distr::Distribution<F>,
+    {
         let distribution = rand_distr::Normal::new(
-            0.0,
-            activation.variance_hint() * 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64),
+            F::zero(),
+            <F as From<f64>>::from(
+                activation.variance_hint() * 2.0 / (input_size as f64 + output_size as f64),
+            ),
         )
         .unwrap();
-        // let distribution = rand_distr::Uniform::new(-0.5, 0.5);
 
-        for i in 0..OUTPUT_LEN {
-            for j in 0..INPUT_LEN {
-                weights[i][j] = distribution.sample(rng);
-            }
+        Self {
+            weights: DMatrix::from_distribution(output_size, input_size, &distribution, rng),
+            bias: DVector::from_element(
+                output_size,
+                <F as From<f64>>::from(activation.bias_hint()),
+            ),
+            activation,
+            regularization,
         }
+    }
 
-        Self {
-            weights,
-            // Biases are initialized based on the activation's hint
-            bias: NeuraVector::from_value(activation.bias_hint()),
+    pub fn new_partial<R: Rng>(
+        output_size: usize,
+        rng: R,
+        activation: Act,
+        regularization: Reg,
+    ) -> NeuraDenseLayerPartial<F, Act, Reg, R> {
+        NeuraDenseLayerPartial {
             activation,
             regularization,
+            output_size,
+            rng,
+            phantom: PhantomData,
         }
     }
 }
 
 impl<
-        Act: NeuraDerivable<f64>,
-        Reg: NeuraDerivable<f64>,
-        const INPUT_LEN: usize,
-        const OUTPUT_LEN: usize,
-    > NeuraLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
+        F: Float + From<f64> + std::fmt::Debug + 'static,
+        Act: NeuraDerivable<F>,
+        Reg: NeuraDerivable<F>,
+        R: Rng,
+    > NeuraPartialLayer for NeuraDenseLayerPartial<F, Act, Reg, R>
+where
+    rand_distr::StandardNormal: rand_distr::Distribution<F>,
 {
-    type Input = NeuraVector<INPUT_LEN, f64>;
+    type Constructed = NeuraDenseLayer<F, Act, Reg>;
+    type Err = ();
+
+    fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
+        let mut rng = self.rng;
+        Ok(NeuraDenseLayer::from_rng(
+            input_shape.size(),
+            self.output_size,
+            &mut rng,
+            self.activation,
+            self.regularization,
+        ))
+    }
+
+    fn output_shape(constructed: &Self::Constructed) -> NeuraShape {
+        NeuraShape::Vector(constructed.weights.shape().0)
+    }
+}
 
-    type Output = NeuraVector<OUTPUT_LEN, f64>;
+impl<
+        F: Float + From<f64> + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
+        Act: NeuraDerivable<F>,
+        Reg: NeuraDerivable<F>,
+    > NeuraLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
+{
+    type Output = DVector<F>;
 
-    fn eval(&self, input: &Self::Input) -> Self::Output {
-        let mut result = self.weights.multiply_vector(input);
+    fn eval(&self, input: &DVector<F>) -> Self::Output {
+        assert_eq!(input.shape().0, self.weights.shape().1);
 
-        for i in 0..OUTPUT_LEN {
-            result[i] = self.activation.eval(result[i] + self.bias[i]);
-        }
+        let res = &self.weights * input + &self.bias;
 
-        result
+        res.map(|x| self.activation.eval(x))
     }
 }
 
 impl<
-        Act: NeuraDerivable<f64>,
-        Reg: NeuraDerivable<f64>,
-        const INPUT_LEN: usize,
-        const OUTPUT_LEN: usize,
-    > NeuraTrainableLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
+        F: Float + From<f64> + Into<f64> + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
+        Act: NeuraDerivable<F>,
+        Reg: NeuraDerivable<F>,
+    > NeuraTrainableLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
 {
-    type Delta = (
-        NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
-        NeuraVector<OUTPUT_LEN, f64>,
-    );
+    type Gradient = (DMatrix<F>, DVector<F>);
 
-    fn backpropagate(
+    fn default_gradient(&self) -> Self::Gradient {
+        (
+            DMatrix::zeros(self.weights.shape().0, self.weights.shape().1),
+            DVector::zeros(self.bias.shape().0),
+        )
+    }
+
+    fn backprop_layer(
         &self,
-        input: &Self::Input,
+        input: &DVector<F>,
         epsilon: Self::Output,
-    ) -> (Self::Input, Self::Delta) {
-        let evaluated = self.weights.multiply_vector(input);
+    ) -> (DVector<F>, Self::Gradient) {
+        let evaluated = &self.weights * input;
         // Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron),
         // with `self.activation'(input) ° epsilon = delta`
-        let mut delta: NeuraVector<OUTPUT_LEN, f64> = epsilon.clone();
-        for i in 0..OUTPUT_LEN {
+        let mut delta = epsilon.clone();
+
+        for i in 0..delta.len() {
             delta[i] *= self.activation.derivate(evaluated[i]);
         }
 
         // Compute the weight gradient
-        let weights_gradient = delta.reverse_dot(input);
+        let weights_gradient = &delta * input.transpose();
 
-        let new_epsilon = self.weights.transpose_multiply_vector(&delta);
+        let new_epsilon = self.weights.tr_mul(&delta);
 
         // According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
         // The gradient of the bias is equal to the delta term of the backpropagation algorithm
@@ -128,53 +183,12 @@ impl<
         (new_epsilon, (weights_gradient, bias_gradient))
     }
 
-    fn apply_gradient(&mut self, gradient: &Self::Delta) {
-        NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0);
-        NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1);
+    fn regularize_layer(&self) -> Self::Gradient {
+        (self.weights.map(|x| self.regularization.derivate(x)), DVector::zeros(self.bias.shape().0))
     }
 
-    fn regularize(&self) -> Self::Delta {
-        let mut res = Self::Delta::default();
-
-        for i in 0..OUTPUT_LEN {
-            for j in 0..INPUT_LEN {
-                res.0[i][j] = self.regularization.derivate(self.weights[i][j]);
-            }
-        }
-
-        // Note: biases aren't taken into account here, as per https://stats.stackexchange.com/questions/153605/no-regularisation-term-for-bias-unit-in-neural-network
-
-        res
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    use crate::{
-        derivable::{activation::Relu, regularize::NeuraL0},
-        utils::uniform_vector,
-    };
-
-    #[test]
-    fn test_from_rng() {
-        let mut rng = rand::thread_rng();
-        let layer: NeuraDenseLayer<_, _, 64, 32> =
-            NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0);
-        let mut input = [0.0; 64];
-        for x in 0..64 {
-            input[x] = rng.gen();
-        }
-        assert!(layer.eval(&input.into()).len() == 32);
-    }
-
-    #[test]
-    fn test_stack_overflow_big_layer() {
-        let layer = NeuraDenseLayer::from_rng(&mut rand::thread_rng(), Relu, NeuraL0)
-            as NeuraDenseLayer<Relu, NeuraL0, 1000, 1000>;
-
-        layer.backpropagate(&uniform_vector(), uniform_vector());
-
-        <NeuraDenseLayer<Relu, NeuraL0, 1000, 1000> as NeuraTrainableLayer>::Delta::zero();
+    fn apply_gradient(&mut self, gradient: &Self::Gradient) {
+        self.weights += &gradient.0;
+        self.bias += &gradient.1;
     }
 }
diff --git a/src/layer/mod.rs b/src/layer/mod.rs
index c89d835..560d738 100644
--- a/src/layer/mod.rs
+++ b/src/layer/mod.rs
@@ -1,39 +1,55 @@
-mod dense;
-pub use dense::NeuraDenseLayer;
+use num::Float;
 
-mod convolution;
-pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DBlockLayer, NeuraConv2DPadLayer};
+use crate::algebra::NeuraVectorSpace;
 
-mod dropout;
-pub use dropout::NeuraDropoutLayer;
+pub mod dense;
+pub use dense::NeuraDenseLayer;
 
-mod softmax;
-pub use softmax::NeuraSoftmaxLayer;
+#[derive(Clone, Copy, PartialEq, Debug)]
+pub enum NeuraShape {
+    Vector(usize),        // entries
+    Matrix(usize, usize), // rows, columns
+    Tensor(usize, usize, usize), // rows, columns, channels
+}
 
-mod one_hot;
-pub use one_hot::NeuraOneHotLayer;
+impl NeuraShape {
+    pub fn size(&self) -> usize {
+        match self {
+            NeuraShape::Vector(entries) => *entries,
+            NeuraShape::Matrix(rows, columns) => rows * columns,
+            NeuraShape::Tensor(rows, columns, channels) => rows * columns * channels
+        }
+    }
+}
+
+pub trait NeuraLayer<Input> {
+    type Output;
 
-mod lock;
-pub use lock::NeuraLockLayer;
+    fn eval(&self, input: &Input) -> Self::Output;
+}
 
-mod pool;
-pub use pool::{NeuraGlobalPoolLayer, NeuraPool1DLayer};
+impl<Input: Clone> NeuraLayer<Input> for () {
+    type Output = Input;
 
-mod reshape;
-pub use reshape::{NeuraFlattenLayer, NeuraReshapeLayer};
+    fn eval(&self, input: &Input) -> Self::Output {
+        input.clone()
+    }
+}
 
-use crate::algebra::NeuraVectorSpace;
+pub trait NeuraPartialLayer {
+    type Constructed;
+    type Err;
 
-pub trait NeuraLayer {
-    type Input;
-    type Output;
+    fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err>;
 
-    fn eval(&self, input: &Self::Input) -> Self::Output;
+    fn output_shape(constructed: &Self::Constructed) -> NeuraShape;
 }
 
-pub trait NeuraTrainableLayer: NeuraLayer {
+pub trait NeuraTrainableLayer<Input>: NeuraLayer<Input> {
     /// The representation of the layer gradient as a vector space
-    type Delta: NeuraVectorSpace;
+    type Gradient: NeuraVectorSpace;
+
+    fn default_gradient(&self) -> Self::Gradient;
 
     /// Computes the backpropagation term and the derivative of the internal weights,
     /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
@@ -46,125 +62,28 @@ pub trait NeuraTrainableLayer: NeuraLayer {
     /// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
     /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`.
     /// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers.
-    fn backpropagate(
+    fn backprop_layer(
         &self,
-        input: &Self::Input,
+        input: &Input,
         epsilon: Self::Output,
-    ) -> (Self::Input, Self::Delta);
+    ) -> (Input, Self::Gradient);
 
     /// Computes the regularization
-    fn regularize(&self) -> Self::Delta;
+    fn regularize_layer(&self) -> Self::Gradient;
 
     /// Applies `δW_l` to the weights of the layer
-    fn apply_gradient(&mut self, gradient: &Self::Delta);
-
-    /// Called before an iteration begins, to allow the layer to set itself up for training.
-    #[inline(always)]
-    fn prepare_epoch(&mut self) {}
+    fn apply_gradient(&mut self, gradient: &Self::Gradient);
 
-    /// Called at the end of training, to allow the layer to clean itself up
+    /// Arbitrary computation that can be executed at the start of an epoch
+    #[allow(unused_variables)]
     #[inline(always)]
-    fn cleanup(&mut self) {}
+    fn prepare_layer(&mut self, is_training: bool) {}
 }
 
+/// Temporary implementation of neura_layer
 #[macro_export]
 macro_rules! neura_layer {
-    ( "dense", $( $shape:expr ),*; $activation:expr ) => {
-        $crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0)
-            as neura_layer!("_dense_shape", $($shape),*)
-    };
-
-    ( "dense", $( $shape:expr ),*; $activation:expr, $regularization:expr ) => {
-        $crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $regularization)
-            as neura_layer!("_dense_shape", $($shape),*)
-    };
-
-    ( "_dense_shape", $output:expr ) => {
-        $crate::layer::NeuraDenseLayer<_, _, _, $output>
-    };
-
-    ( "_dense_shape", $input:expr, $output:expr ) => {
-        $crate::layer::NeuraDenseLayer<_, _, $input, $output>
-    };
-
-    ( "dropout", $probability:expr ) => {
-        $crate::layer::NeuraDropoutLayer::new($probability, rand::thread_rng())
-            as $crate::layer::NeuraDropoutLayer<_, _>
-    };
-
-    ( "softmax" ) => {
-        $crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<_>
-    };
-
-    ( "softmax", $length:expr ) => {
-        $crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<$length>
-    };
-
-    ( "one_hot" ) => {
-        $crate::layer::NeuraOneHotLayer as $crate::layer::NeuraOneHotLayer<2, _>
-    };
-
-    ( "lock", $layer:expr ) => {
-        $crate::layer::NeuraLockLayer($layer)
-    };
-
-    ( "conv1d_pad", $length:expr, $feats:expr; $window:expr; $layer:expr ) => {
-        $crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<$length, $feats, $window, _>
-    };
-
-    ( "conv1d_pad"; $window:expr; $layer:expr ) => {
-        $crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<_, _, $window, _>
-    };
-
-    ( "conv2d_pad", $feats:expr, $length:expr; $width:expr, $window:expr; $layer:expr ) => {
-        $crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<$length, $feats, $window, _>
-    };
-
-    ( "conv2d_pad"; $width:expr, $window:expr; $layer:expr ) => {
-        $crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _>
-    };
-
-    ( "conv2d_block", $feats:expr, $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
-        $crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, $feats, $block_size, _>
-    };
-
-    ( "conv2d_block", $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
-        $crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, _, $block_size, _>
-    };
-
-    ( "pool_global"; $reduce:expr ) => {
-        $crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _>
-    };
-
-    ( "pool_global", $feats:expr, $length:expr; $reduce:expr ) => {
-        $crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<$length, $feats, _>
-    };
-
-    ( "pool1d", $blocklength:expr; $reduce:expr ) => {
-        $crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<_, $blocklength, _, _>
-    };
-
-    ( "pool1d", $blocks:expr, $blocklength:expr; $reduce:expr ) => {
-        $crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, _, _>
-    };
-
-    ( "pool1d", $feats:expr, $blocks:expr, $blocklength:expr; $reduce:expr ) => {
-        $crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, $feats, _>
-    };
-
-    ( "unstable_flatten" ) => {
-        $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<_, _, f64>
-    };
-
-    ( "unstable_flatten", $width:expr, $height:expr ) => {
-        $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<$width, $height, f64>
-    };
-
-    ( "unstable_reshape", $height:expr ) => {
-        $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<_, $height, f64>
-    };
-
-    ( "unstable_reshape", $width:expr, $height:expr ) => {
-        $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<$width, $height, f64>
-    };
+    ( "dense", $output:expr, $activation:expr ) => {
+        $crate::layer::dense::NeuraDenseLayer::new_partial($output, rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0)
+    }
 }
diff --git a/src/lib.rs b/src/lib.rs
index 0ccb167..4b2d837 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,12 +1,15 @@
 #![feature(generic_arg_infer)]
 #![feature(generic_const_exprs)]
+#![feature(negative_impls)]
 
 pub mod algebra;
 pub mod derivable;
-pub mod layer;
+// pub mod layer;
 pub mod network;
 pub mod train;
 
+pub mod layer;
+
 mod utils;
 
 // TODO: move to a different file
@@ -17,7 +20,7 @@ pub mod prelude {
     pub use crate::{neura_layer, neura_sequential};
 
     // Structs and traits
-    pub use crate::layer::{NeuraDenseLayer, NeuraDropoutLayer, NeuraLayer};
-    pub use crate::network::sequential::{NeuraSequential, NeuraSequentialTail};
+    pub use crate::layer::*;
+    pub use crate::network::sequential::{NeuraSequential, NeuraSequentialTail, NeuraSequentialBuild};
     pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer};
 }
diff --git a/src/network/mod.rs b/src/network/mod.rs
index 68b953f..68bdcfa 100644
--- a/src/network/mod.rs
+++ b/src/network/mod.rs
@@ -2,25 +2,24 @@ use crate::{algebra::NeuraVectorSpace, derivable::NeuraLoss, layer::NeuraLayer};
 
 pub mod sequential;
 
-pub trait NeuraTrainableNetwork: NeuraLayer {
+pub trait NeuraTrainableNetwork<Input>: NeuraLayer<Input> {
     type Delta: NeuraVectorSpace;
 
+    fn default_gradient(&self) -> Self::Delta;
+
     fn apply_gradient(&mut self, gradient: &Self::Delta);
 
     /// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information.
     fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
         &self,
-        input: &Self::Input,
+        input: &Input,
         target: &Loss::Target,
         loss: Loss,
-    ) -> (Self::Input, Self::Delta);
+    ) -> (Input, Self::Delta);
 
     /// Should return the regularization gradient
     fn regularize(&self) -> Self::Delta;
 
-    /// Called before an iteration begins, to allow the network to set itself up for training.
-    fn prepare_epoch(&mut self);
-
-    /// Called at the end of training, to allow the network to clean itself up
-    fn cleanup(&mut self);
+    /// Called before an iteration begins, to allow the network to set itself up for training or not.
+    fn prepare(&mut self, train_iteration: bool);
 }
diff --git a/src/network/sequential.rs b/src/network/sequential.rs
index 602e50d..0fcd876 100644
--- a/src/network/sequential.rs
+++ b/src/network/sequential.rs
@@ -1,12 +1,14 @@
+use num::Float;
+
 use crate::{
     derivable::NeuraLoss,
-    layer::{NeuraLayer, NeuraTrainableLayer},
+    layer::{NeuraLayer, NeuraTrainableLayer, NeuraShape, NeuraPartialLayer},
 };
 
 use super::NeuraTrainableNetwork;
 
 #[derive(Clone, Debug)]
-pub struct NeuraSequential<Layer: NeuraLayer, ChildNetwork> {
+pub struct NeuraSequential<Layer, ChildNetwork> {
     pub layer: Layer,
     pub child_network: Box<ChildNetwork>,
 }
@@ -14,13 +16,13 @@ pub struct NeuraSequential<Layer: NeuraLayer, ChildNetwork> {
 /// Operations on the tail end of a sequential network
 pub trait NeuraSequentialTail {
     type TailTrimmed;
-    type TailPushed<T: NeuraLayer>;
+    type TailPushed<T>;
 
     fn trim_tail(self) -> Self::TailTrimmed;
-    fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T>;
+    fn push_tail<T>(self, layer: T) -> Self::TailPushed<T>;
 }
 
-impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
+impl<Layer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
     pub fn new(layer: Layer, child_network: ChildNetwork) -> Self {
         Self {
             layer,
@@ -28,9 +30,10 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
         }
     }
 
-    pub fn new_match_output(layer: Layer, child_network: ChildNetwork) -> Self
+    pub fn new_match_output<Input>(layer: Layer, child_network: ChildNetwork) -> Self
     where
-        ChildNetwork: NeuraLayer<Input = Layer::Output>,
+        Layer: NeuraLayer<Input>,
+        ChildNetwork: NeuraLayer<Layer::Output>,
     {
         Self::new(layer, child_network)
     }
@@ -39,7 +42,10 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
         *self.child_network
     }
 
-    pub fn push_front<T: NeuraLayer>(self, layer: T) -> NeuraSequential<T, Self> {
+    pub fn push_front<Input, Input2, T: NeuraLayer<Input2, Output=Input>>(self, layer: T) -> NeuraSequential<T, Self>
+    where
+        Layer: NeuraLayer<Input>
+    {
         NeuraSequential {
             layer: layer,
             child_network: Box::new(self),
@@ -48,15 +54,15 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
 }
 
 // Trimming the last layer returns an empty network
-impl<Layer: NeuraLayer> NeuraSequentialTail for NeuraSequential<Layer, ()> {
+impl<Layer> NeuraSequentialTail for NeuraSequential<Layer, ()> {
     type TailTrimmed = ();
-    type TailPushed<T: NeuraLayer> = NeuraSequential<Layer, NeuraSequential<T, ()>>;
+    type TailPushed<T> = NeuraSequential<Layer, NeuraSequential<T, ()>>;
 
     fn trim_tail(self) -> Self::TailTrimmed {
         ()
     }
 
-    fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T> {
+    fn push_tail<T>(self, layer: T) -> Self::TailPushed<T> {
         NeuraSequential {
             layer: self.layer,
             child_network: Box::new(NeuraSequential {
@@ -68,11 +74,11 @@ impl<Layer: NeuraLayer> NeuraSequentialTail for NeuraSequential<Layer, ()> {
 }
 
 // Trimming another layer returns a network which calls trim recursively
-impl<Layer: NeuraLayer, ChildNetwork: NeuraSequentialTail> NeuraSequentialTail
+impl<Layer, ChildNetwork: NeuraSequentialTail> NeuraSequentialTail
     for NeuraSequential<Layer, ChildNetwork>
 {
     type TailTrimmed = NeuraSequential<Layer, <ChildNetwork as NeuraSequentialTail>::TailTrimmed>;
-    type TailPushed<T: NeuraLayer> =
+    type TailPushed<T> =
         NeuraSequential<Layer, <ChildNetwork as NeuraSequentialTail>::TailPushed<T>>;
 
     fn trim_tail(self) -> Self::TailTrimmed {
@@ -82,7 +88,7 @@ impl<Layer: NeuraLayer, ChildNetwork: NeuraSequentialTail> NeuraSequentialTail
         }
     }
 
-    fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T> {
+    fn push_tail<T>(self, layer: T) -> Self::TailPushed<T> {
         NeuraSequential {
             layer: self.layer,
             child_network: Box::new(self.child_network.push_tail(layer)),
@@ -90,62 +96,55 @@ impl<Layer: NeuraLayer, ChildNetwork: NeuraSequentialTail> NeuraSequentialTail
     }
 }
 
-impl<Layer: NeuraLayer> NeuraLayer for NeuraSequential<Layer, ()> {
-    type Input = Layer::Input;
-    type Output = Layer::Output;
-
-    fn eval(&self, input: &Self::Input) -> Self::Output {
-        self.layer.eval(input)
-    }
-}
-
-impl<Layer: NeuraLayer, ChildNetwork: NeuraLayer<Input = Layer::Output>> NeuraLayer
+impl<Input, Layer: NeuraLayer<Input>, ChildNetwork: NeuraLayer<Layer::Output>> NeuraLayer<Input>
     for NeuraSequential<Layer, ChildNetwork>
 {
-    type Input = Layer::Input;
-
     type Output = ChildNetwork::Output;
 
-    fn eval(&self, input: &Self::Input) -> Self::Output {
+    fn eval(&self, input: &Input) -> Self::Output {
         self.child_network.eval(&self.layer.eval(input))
     }
 }
 
-impl<Layer: NeuraTrainableLayer> NeuraTrainableNetwork for NeuraSequential<Layer, ()> {
-    type Delta = Layer::Delta;
+impl<Input: Clone> NeuraTrainableNetwork<Input> for () {
+    type Delta = ();
 
-    fn apply_gradient(&mut self, gradient: &Self::Delta) {
-        self.layer.apply_gradient(gradient);
+    fn default_gradient(&self) -> () {
+        ()
+    }
+
+    fn apply_gradient(&mut self, _gradient: &()) {
+        // Noop
     }
 
     fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
         &self,
-        input: &Self::Input,
+        final_activation: &Input,
         target: &Loss::Target,
         loss: Loss,
-    ) -> (Self::Input, Self::Delta) {
-        let final_activation = self.layer.eval(input);
+    ) -> (Input, Self::Delta) {
         let backprop_epsilon = loss.nabla(target, &final_activation);
-        self.layer.backpropagate(&input, backprop_epsilon)
-    }
 
-    fn regularize(&self) -> Self::Delta {
-        self.layer.regularize()
+        (backprop_epsilon, ())
     }
 
-    fn prepare_epoch(&mut self) {
-        self.layer.prepare_epoch();
+    fn regularize(&self) -> () {
+        ()
     }
 
-    fn cleanup(&mut self) {
-        self.layer.cleanup();
+    fn prepare(&mut self, _is_training: bool) {
+        // Noop
     }
 }
 
-impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainableNetwork<Input = Layer::Output>>
-    NeuraTrainableNetwork for NeuraSequential<Layer, ChildNetwork>
+impl<Input, Layer: NeuraTrainableLayer<Input>, ChildNetwork: NeuraTrainableNetwork<Layer::Output>>
+    NeuraTrainableNetwork<Input> for NeuraSequential<Layer, ChildNetwork>
 {
-    type Delta = (Layer::Delta, Box<ChildNetwork::Delta>);
+    type Delta = (Layer::Gradient, Box<ChildNetwork::Delta>);
+
+    fn default_gradient(&self) -> Self::Delta {
+        (self.layer.default_gradient(), Box::new(self.child_network.default_gradient()))
+    }
 
     fn apply_gradient(&mut self, gradient: &Self::Delta) {
         self.layer.apply_gradient(&gradient.0);
@@ -154,16 +153,16 @@ impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainableNetwork<Input = Lay
 
     fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
         &self,
-        input: &Self::Input,
+        input: &Input,
         target: &Loss::Target,
         loss: Loss,
-    ) -> (Self::Input, Self::Delta) {
+    ) -> (Input, Self::Delta) {
         let next_activation = self.layer.eval(input);
         let (backprop_gradient, weights_gradient) =
             self.child_network
                 .backpropagate(&next_activation, target, loss);
         let (backprop_gradient, layer_gradient) =
-            self.layer.backpropagate(input, backprop_gradient);
+            self.layer.backprop_layer(input, backprop_gradient);
 
         (
             backprop_gradient,
@@ -173,23 +172,18 @@ impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainableNetwork<Input = Lay
 
     fn regularize(&self) -> Self::Delta {
         (
-            self.layer.regularize(),
+            self.layer.regularize_layer(),
             Box::new(self.child_network.regularize()),
         )
     }
 
-    fn prepare_epoch(&mut self) {
-        self.layer.prepare_epoch();
-        self.child_network.prepare_epoch();
-    }
-
-    fn cleanup(&mut self) {
-        self.layer.cleanup();
-        self.child_network.cleanup();
+    fn prepare(&mut self, is_training: bool) {
+        self.layer.prepare_layer(is_training);
+        self.child_network.prepare(is_training);
     }
 }
 
-impl<Layer: NeuraLayer> From<Layer> for NeuraSequential<Layer, ()> {
+impl<Layer> From<Layer> for NeuraSequential<Layer, ()> {
     fn from(layer: Layer) -> Self {
         Self {
             layer,
@@ -198,6 +192,53 @@ impl<Layer: NeuraLayer> From<Layer> for NeuraSequential<Layer, ()> {
     }
 }
 
+pub trait NeuraSequentialBuild {
+    type Constructed;
+    type Err;
+
+    fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err>;
+}
+
+#[derive(Debug, Clone)]
+pub enum NeuraSequentialBuildErr<Err, ChildErr> {
+    Current(Err),
+    Child(ChildErr),
+}
+
+impl<Layer: NeuraPartialLayer> NeuraSequentialBuild for NeuraSequential<Layer, ()> {
+    type Constructed = NeuraSequential<Layer::Constructed, ()>;
+    type Err = Layer::Err;
+
+    fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
+        Ok(NeuraSequential {
+            layer: self.layer.construct(input_shape)?,
+            child_network: Box::new(())
+        })
+    }
+}
+
+impl<Layer: NeuraPartialLayer + , ChildNetwork: NeuraSequentialBuild> NeuraSequentialBuild for NeuraSequential<Layer, ChildNetwork> {
+    type Constructed = NeuraSequential<Layer::Constructed, ChildNetwork::Constructed>;
+    type Err = NeuraSequentialBuildErr<Layer::Err, ChildNetwork::Err>;
+
+    fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
+        let layer = self.layer.construct(input_shape).map_err(|e| NeuraSequentialBuildErr::Current(e))?;
+
+        // TODO: ensure that this operation (and all recursive operations) are directly allocated on the heap
+        let child_network = self.child_network
+            .construct(Layer::output_shape(&layer))
+            .map_err(|e| NeuraSequentialBuildErr::Child(e))?;
+        let child_network = Box::new(child_network);
+
+        Ok(NeuraSequential {
+            layer,
+            child_network,
+        })
+    }
+
+
+}
+
 /// An utility to recursively create a NeuraSequential network, while writing it in a declarative and linear fashion.
 /// Note that this can quickly create big and unwieldly types.
 #[macro_export]
@@ -211,41 +252,47 @@ macro_rules! neura_sequential {
     };
 
     [ $first:expr, $($rest:expr),+ $(,)? ] => {
-        $crate::network::sequential::NeuraSequential::new_match_output($first, neura_sequential![$($rest),+])
+        $crate::network::sequential::NeuraSequential::new($first, neura_sequential![$($rest),+])
     };
 }
 
 #[cfg(test)]
 mod test {
+    use nalgebra::dvector;
+
     use crate::{
         derivable::{activation::Relu, regularize::NeuraL0},
-        layer::NeuraDenseLayer,
+        layer::{NeuraDenseLayer, NeuraShape, NeuraLayer},
         neura_layer,
     };
 
+    use super::NeuraSequentialBuild;
+
     #[test]
     fn test_neura_network_macro() {
         let mut rng = rand::thread_rng();
 
         let _ = neura_sequential![
-            NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>,
-            NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 12>,
-            NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 2>
+            NeuraDenseLayer::from_rng(8, 12, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>,
+            NeuraDenseLayer::from_rng(12, 16, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>,
+            NeuraDenseLayer::from_rng(16, 2, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>
         ];
 
         let _ = neura_sequential![
-            NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>,
+            NeuraDenseLayer::from_rng(2, 2, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>,
         ];
 
         let _ = neura_sequential![
-            NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>,
-            NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 12>,
+            NeuraDenseLayer::from_rng(8, 16, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>,
+            NeuraDenseLayer::from_rng(16, 12, &mut rng, Relu, NeuraL0) as NeuraDenseLayer<f64, _, _>,
         ];
 
-        let _ = neura_sequential![
-            neura_layer!("dense", 8, 16; Relu),
-            neura_layer!("dense", 12; Relu),
-            neura_layer!("dense", 2; Relu)
-        ];
+        let network = neura_sequential![
+            neura_layer!("dense", 16, Relu),
+            neura_layer!("dense", 12, Relu),
+            neura_layer!("dense", 2, Relu)
+        ].construct(NeuraShape::Vector(2)).unwrap();
+
+        network.eval(&dvector![0.0f64, 0.0]);
     }
 }
diff --git a/src/layer/convolution.rs b/src/old_layer/convolution.rs
similarity index 100%
rename from src/layer/convolution.rs
rename to src/old_layer/convolution.rs
diff --git a/src/old_layer/dense.rs b/src/old_layer/dense.rs
new file mode 100644
index 0000000..ff921a8
--- /dev/null
+++ b/src/old_layer/dense.rs
@@ -0,0 +1,180 @@
+use super::{NeuraLayer, NeuraTrainableLayer};
+use crate::{
+    algebra::{NeuraMatrix, NeuraVector, NeuraVectorSpace},
+    derivable::NeuraDerivable,
+};
+
+use rand::Rng;
+use rand_distr::Distribution;
+
+#[derive(Clone, Debug)]
+pub struct NeuraDenseLayer<
+    Act: NeuraDerivable<f64>,
+    Reg: NeuraDerivable<f64>,
+    const INPUT_LEN: usize,
+    const OUTPUT_LEN: usize,
+> {
+    weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
+    bias: NeuraVector<OUTPUT_LEN, f64>,
+    activation: Act,
+    regularization: Reg,
+}
+
+impl<
+        Act: NeuraDerivable<f64>,
+        Reg: NeuraDerivable<f64>,
+        const INPUT_LEN: usize,
+        const OUTPUT_LEN: usize,
+    > NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
+{
+    pub fn new(
+        weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
+        bias: NeuraVector<OUTPUT_LEN, f64>,
+        activation: Act,
+        regularization: Reg,
+    ) -> Self {
+        Self {
+            weights,
+            bias,
+            activation,
+            regularization,
+        }
+    }
+
+    pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self {
+        let mut weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64> = NeuraMatrix::from_value(0.0f64);
+
+        // Use Xavier (or He) initialisation, using the harmonic mean
+        // Ref: https://www.deeplearning.ai/ai-notes/initialization/index.html
+        let distribution = rand_distr::Normal::new(
+            0.0,
+            activation.variance_hint() * 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64),
+        )
+        .unwrap();
+        // let distribution = rand_distr::Uniform::new(-0.5, 0.5);
+
+        for i in 0..OUTPUT_LEN {
+            for j in 0..INPUT_LEN {
+                weights[i][j] = distribution.sample(rng);
+            }
+        }
+
+        Self {
+            weights,
+            // Biases are initialized based on the activation's hint
+            bias: NeuraVector::from_value(activation.bias_hint()),
+            activation,
+            regularization,
+        }
+    }
+}
+
+impl<
+        Act: NeuraDerivable<f64>,
+        Reg: NeuraDerivable<f64>,
+        const INPUT_LEN: usize,
+        const OUTPUT_LEN: usize,
+    > NeuraLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
+{
+    type Input = NeuraVector<INPUT_LEN, f64>;
+
+    type Output = NeuraVector<OUTPUT_LEN, f64>;
+
+    fn eval(&self, input: &Self::Input) -> Self::Output {
+        let mut result = self.weights.multiply_vector(input);
+
+        for i in 0..OUTPUT_LEN {
+            result[i] = self.activation.eval(result[i] + self.bias[i]);
+        }
+
+        result
+    }
+}
+
+impl<
+        Act: NeuraDerivable<f64>,
+        Reg: NeuraDerivable<f64>,
+        const INPUT_LEN: usize,
+        const OUTPUT_LEN: usize,
+    > NeuraTrainableLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
+{
+    type Delta = (
+        NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
+        NeuraVector<OUTPUT_LEN, f64>,
+    );
+
+    fn backpropagate(
+        &self,
+        input: &Self::Input,
+        epsilon: Self::Output,
+    ) -> (Self::Input, Self::Delta) {
+        let evaluated = self.weights.multiply_vector(input);
+        // Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron),
+        // with `self.activation'(input) ° epsilon = delta`
+        let mut delta: NeuraVector<OUTPUT_LEN, f64> = epsilon.clone();
+        for i in 0..OUTPUT_LEN {
+            delta[i] *= self.activation.derivate(evaluated[i]);
+        }
+
+        // Compute the weight gradient
+        let weights_gradient = delta.reverse_dot(input);
+
+        let new_epsilon = self.weights.transpose_multiply_vector(&delta);
+
+        // According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
+        // The gradient of the bias is equal to the delta term of the backpropagation algorithm
+        let bias_gradient = delta;
+
+        (new_epsilon, (weights_gradient, bias_gradient))
+    }
+
+    fn apply_gradient(&mut self, gradient: &Self::Delta) {
+        NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0);
+        NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1);
+    }
+
+    fn regularize(&self) -> Self::Delta {
+        let mut res = Self::Delta::default();
+
+        for i in 0..OUTPUT_LEN {
+            for j in 0..INPUT_LEN {
+                res.0[i][j] = self.regularization.derivate(self.weights[i][j]);
+            }
+        }
+
+        // Note: biases aren't taken into account here, as per https://stats.stackexchange.com/questions/153605/no-regularisation-term-for-bias-unit-in-neural-network
+
+        res
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::{
+        derivable::{activation::Relu, regularize::NeuraL0},
+        utils::uniform_vector,
+    };
+
+    #[test]
+    fn test_from_rng() {
+        let mut rng = rand::thread_rng();
+        let layer: NeuraDenseLayer<_, _, 64, 32> =
+            NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0);
+        let mut input = [0.0; 64];
+        for x in 0..64 {
+            input[x] = rng.gen();
+        }
+        assert!(layer.eval(&input.into()).len() == 32);
+    }
+
+    #[test]
+    fn test_stack_overflow_big_layer() {
+        let layer = NeuraDenseLayer::from_rng(&mut rand::thread_rng(), Relu, NeuraL0)
+            as NeuraDenseLayer<Relu, NeuraL0, 1000, 1000>;
+
+        layer.backpropagate(&uniform_vector(), uniform_vector());
+
+        <NeuraDenseLayer<Relu, NeuraL0, 1000, 1000> as NeuraTrainableLayer>::Delta::zero();
+    }
+}
diff --git a/src/layer/dropout.rs b/src/old_layer/dropout.rs
similarity index 100%
rename from src/layer/dropout.rs
rename to src/old_layer/dropout.rs
diff --git a/src/layer/lock.rs b/src/old_layer/lock.rs
similarity index 100%
rename from src/layer/lock.rs
rename to src/old_layer/lock.rs
diff --git a/src/old_layer/mod.rs b/src/old_layer/mod.rs
new file mode 100644
index 0000000..c89d835
--- /dev/null
+++ b/src/old_layer/mod.rs
@@ -0,0 +1,170 @@
+mod dense;
+pub use dense::NeuraDenseLayer;
+
+mod convolution;
+pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DBlockLayer, NeuraConv2DPadLayer};
+
+mod dropout;
+pub use dropout::NeuraDropoutLayer;
+
+mod softmax;
+pub use softmax::NeuraSoftmaxLayer;
+
+mod one_hot;
+pub use one_hot::NeuraOneHotLayer;
+
+mod lock;
+pub use lock::NeuraLockLayer;
+
+mod pool;
+pub use pool::{NeuraGlobalPoolLayer, NeuraPool1DLayer};
+
+mod reshape;
+pub use reshape::{NeuraFlattenLayer, NeuraReshapeLayer};
+
+use crate::algebra::NeuraVectorSpace;
+
+pub trait NeuraLayer {
+    type Input;
+    type Output;
+
+    fn eval(&self, input: &Self::Input) -> Self::Output;
+}
+
+pub trait NeuraTrainableLayer: NeuraLayer {
+    /// The representation of the layer gradient as a vector space
+    type Delta: NeuraVectorSpace;
+
+    /// Computes the backpropagation term and the derivative of the internal weights,
+    /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
+    ///
+    /// Note: we introduce the term `epsilon`, which together with the activation of the current function can be used to compute `delta_l`:
+    /// ```no_rust
+    /// f_l'(a_l) * epsilon_l = delta_l
+    /// ```
+    ///
+    /// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
+    /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`.
+    /// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers.
+    fn backpropagate(
+        &self,
+        input: &Self::Input,
+        epsilon: Self::Output,
+    ) -> (Self::Input, Self::Delta);
+
+    /// Computes the regularization
+    fn regularize(&self) -> Self::Delta;
+
+    /// Applies `δW_l` to the weights of the layer
+    fn apply_gradient(&mut self, gradient: &Self::Delta);
+
+    /// Called before an iteration begins, to allow the layer to set itself up for training.
+    #[inline(always)]
+    fn prepare_epoch(&mut self) {}
+
+    /// Called at the end of training, to allow the layer to clean itself up
+    #[inline(always)]
+    fn cleanup(&mut self) {}
+}
+
+#[macro_export]
+macro_rules! neura_layer {
+    ( "dense", $( $shape:expr ),*; $activation:expr ) => {
+        $crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0)
+            as neura_layer!("_dense_shape", $($shape),*)
+    };
+
+    ( "dense", $( $shape:expr ),*; $activation:expr, $regularization:expr ) => {
+        $crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $regularization)
+            as neura_layer!("_dense_shape", $($shape),*)
+    };
+
+    ( "_dense_shape", $output:expr ) => {
+        $crate::layer::NeuraDenseLayer<_, _, _, $output>
+    };
+
+    ( "_dense_shape", $input:expr, $output:expr ) => {
+        $crate::layer::NeuraDenseLayer<_, _, $input, $output>
+    };
+
+    ( "dropout", $probability:expr ) => {
+        $crate::layer::NeuraDropoutLayer::new($probability, rand::thread_rng())
+            as $crate::layer::NeuraDropoutLayer<_, _>
+    };
+
+    ( "softmax" ) => {
+        $crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<_>
+    };
+
+    ( "softmax", $length:expr ) => {
+        $crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<$length>
+    };
+
+    ( "one_hot" ) => {
+        $crate::layer::NeuraOneHotLayer as $crate::layer::NeuraOneHotLayer<2, _>
+    };
+
+    ( "lock", $layer:expr ) => {
+        $crate::layer::NeuraLockLayer($layer)
+    };
+
+    ( "conv1d_pad", $length:expr, $feats:expr; $window:expr; $layer:expr ) => {
+        $crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<$length, $feats, $window, _>
+    };
+
+    ( "conv1d_pad"; $window:expr; $layer:expr ) => {
+        $crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<_, _, $window, _>
+    };
+
+    ( "conv2d_pad", $feats:expr, $length:expr; $width:expr, $window:expr; $layer:expr ) => {
+        $crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<$length, $feats, $window, _>
+    };
+
+    ( "conv2d_pad"; $width:expr, $window:expr; $layer:expr ) => {
+        $crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _>
+    };
+
+    ( "conv2d_block", $feats:expr, $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
+        $crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, $feats, $block_size, _>
+    };
+
+    ( "conv2d_block", $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
+        $crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, _, $block_size, _>
+    };
+
+    ( "pool_global"; $reduce:expr ) => {
+        $crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _>
+    };
+
+    ( "pool_global", $feats:expr, $length:expr; $reduce:expr ) => {
+        $crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<$length, $feats, _>
+    };
+
+    ( "pool1d", $blocklength:expr; $reduce:expr ) => {
+        $crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<_, $blocklength, _, _>
+    };
+
+    ( "pool1d", $blocks:expr, $blocklength:expr; $reduce:expr ) => {
+        $crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, _, _>
+    };
+
+    ( "pool1d", $feats:expr, $blocks:expr, $blocklength:expr; $reduce:expr ) => {
+        $crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, $feats, _>
+    };
+
+    ( "unstable_flatten" ) => {
+        $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<_, _, f64>
+    };
+
+    ( "unstable_flatten", $width:expr, $height:expr ) => {
+        $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<$width, $height, f64>
+    };
+
+    ( "unstable_reshape", $height:expr ) => {
+        $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<_, $height, f64>
+    };
+
+    ( "unstable_reshape", $width:expr, $height:expr ) => {
+        $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<$width, $height, f64>
+    };
+}
diff --git a/src/layer/one_hot.rs b/src/old_layer/one_hot.rs
similarity index 100%
rename from src/layer/one_hot.rs
rename to src/old_layer/one_hot.rs
diff --git a/src/layer/pool.rs b/src/old_layer/pool.rs
similarity index 100%
rename from src/layer/pool.rs
rename to src/old_layer/pool.rs
diff --git a/src/layer/reshape.rs b/src/old_layer/reshape.rs
similarity index 100%
rename from src/layer/reshape.rs
rename to src/old_layer/reshape.rs
diff --git a/src/layer/softmax.rs b/src/old_layer/softmax.rs
similarity index 100%
rename from src/layer/softmax.rs
rename to src/old_layer/softmax.rs
diff --git a/src/train.rs b/src/train.rs
index 9d26ac7..78d5e63 100644
--- a/src/train.rs
+++ b/src/train.rs
@@ -5,26 +5,20 @@ use crate::{
     network::{sequential::NeuraSequential, NeuraTrainableNetwork},
 };
 
-pub trait NeuraGradientSolver<Output, Target = Output> {
-    fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
+pub trait NeuraGradientSolver<Input, Target, Trainable: NeuraTrainableNetwork<Input>> {
+    fn get_gradient(
         &self,
-        trainable: &NeuraSequential<Layer, ChildNetwork>,
-        input: &Layer::Input,
+        trainable: &Trainable,
+        input: &Input,
         target: &Target,
-    ) -> <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta
-    where
-        NeuraSequential<Layer, ChildNetwork>:
-            NeuraTrainableNetwork<Input = Layer::Input, Output = Output>;
+    ) -> Trainable::Delta;
 
-    fn score<Layer: NeuraLayer, ChildNetwork>(
+    fn score(
         &self,
-        trainable: &NeuraSequential<Layer, ChildNetwork>,
-        input: &Layer::Input,
+        trainable: &Trainable,
+        input: &Input,
         target: &Target,
-    ) -> f64
-    where
-        NeuraSequential<Layer, ChildNetwork>:
-            NeuraTrainableNetwork<Input = Layer::Input, Output = Output>;
+    ) -> f64;
 }
 
 #[non_exhaustive]
@@ -38,32 +32,24 @@ impl<Loss: NeuraLoss + Clone> NeuraBackprop<Loss> {
     }
 }
 
-impl<const N: usize, Loss: NeuraLoss<Input = NeuraVector<N, f64>> + Clone>
-    NeuraGradientSolver<NeuraVector<N, f64>, Loss::Target> for NeuraBackprop<Loss>
+impl<Input, Target, Trainable: NeuraTrainableNetwork<Input>, Loss: NeuraLoss<Input = Trainable::Output, Target = Target> + Clone>
+    NeuraGradientSolver<Input, Target, Trainable> for NeuraBackprop<Loss>
 {
-    fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
+    fn get_gradient(
         &self,
-        trainable: &NeuraSequential<Layer, ChildNetwork>,
-        input: &Layer::Input,
-        target: &Loss::Target,
-    ) -> <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta
-    where
-        NeuraSequential<Layer, ChildNetwork>:
-            NeuraTrainableNetwork<Input = Layer::Input, Output = NeuraVector<N, f64>>,
-    {
+        trainable: &Trainable,
+        input: &Input,
+        target: &Target,
+    ) -> Trainable::Delta {
         trainable.backpropagate(input, target, self.loss.clone()).1
     }
 
-    fn score<Layer: NeuraLayer, ChildNetwork>(
+    fn score(
         &self,
-        trainable: &NeuraSequential<Layer, ChildNetwork>,
-        input: &Layer::Input,
-        target: &Loss::Target,
-    ) -> f64
-    where
-        NeuraSequential<Layer, ChildNetwork>:
-            NeuraTrainableNetwork<Input = Layer::Input, Output = NeuraVector<N, f64>>,
-    {
+        trainable: &Trainable,
+        input: &Input,
+        target: &Target,
+    ) -> f64 {
         let output = trainable.eval(&input);
         self.loss.eval(target, &output)
     }
@@ -137,41 +123,32 @@ impl NeuraBatchedTrainer {
     }
 
     pub fn train<
-        Output,
+        Input: Clone,
         Target: Clone,
-        GradientSolver: NeuraGradientSolver<Output, Target>,
-        Layer: NeuraLayer,
-        ChildNetwork,
-        Inputs: IntoIterator<Item = (Layer::Input, Target)>,
+        Network: NeuraTrainableNetwork<Input>,
+        GradientSolver: NeuraGradientSolver<Input, Target, Network>,
+        Inputs: IntoIterator<Item = (Input, Target)>,
     >(
         &self,
         gradient_solver: GradientSolver,
-        network: &mut NeuraSequential<Layer, ChildNetwork>,
+        network: &mut Network,
         inputs: Inputs,
-        test_inputs: &[(Layer::Input, Target)],
-    ) where
-        NeuraSequential<Layer, ChildNetwork>:
-            NeuraTrainableNetwork<Input = Layer::Input, Output = Output>,
-        Layer::Input: Clone,
-    {
+        test_inputs: &[(Input, Target)],
+    ) {
         let mut iter = inputs.into_iter();
         let factor = -self.learning_rate / (self.batch_size as f64);
         let momentum_factor = self.learning_momentum / self.learning_rate;
         let reg_factor = -self.learning_rate;
 
         // Contains `momentum_factor * factor * gradient_sum_previous_iter`
-        let mut previous_gradient_sum =
-            Box::<<NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta>::zero();
+        let mut previous_gradient_sum = network.default_gradient();
         'd: for iteration in 0..self.iterations {
-            let mut gradient_sum = Box::<
-                <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta,
-            >::zero();
-            network.prepare_epoch();
+            let mut gradient_sum = network.default_gradient();
+            network.prepare(true);
 
             for _ in 0..self.batch_size {
                 if let Some((input, target)) = iter.next() {
-                    let gradient =
-                        Box::new(gradient_solver.get_gradient(&network, &input, &target));
+                    let gradient = gradient_solver.get_gradient(&network, &input, &target);
                     gradient_sum.add_assign(&gradient);
                 } else {
                     break 'd;
@@ -194,7 +171,7 @@ impl NeuraBatchedTrainer {
             }
 
             if self.log_iterations > 0 && (iteration + 1) % self.log_iterations == 0 {
-                network.cleanup();
+                network.prepare(false);
                 let mut loss_sum = 0.0;
                 for (input, target) in test_inputs {
                     loss_sum += gradient_solver.score(&network, input, target);
@@ -204,12 +181,14 @@ impl NeuraBatchedTrainer {
             }
         }
 
-        network.cleanup();
+        network.prepare(false);
     }
 }
 
 #[cfg(test)]
 mod test {
+    use nalgebra::{DMatrix, dmatrix, dvector};
+
     use super::*;
     use crate::{
         assert_approx,
@@ -224,19 +203,19 @@ mod test {
         for wa in [0.0, 0.25, 0.5, 1.0] {
             for wb in [0.0, 0.25, 0.5, 1.0] {
                 let network = NeuraSequential::new(
-                    NeuraDenseLayer::new([[wa, wb]].into(), [0.0].into(), Linear, NeuraL0),
+                    NeuraDenseLayer::new(dmatrix![wa, wb], dvector![0.0], Linear, NeuraL0),
                     (),
                 );
 
-                let gradient = NeuraBackprop::new(Euclidean).get_gradient(
+                let (gradient, _) = NeuraBackprop::new(Euclidean).get_gradient(
                     &network,
-                    &[1.0, 1.0].into(),
-                    &[0.0].into(),
+                    &dvector![1.0, 1.0],
+                    &dvector![0.0],
                 );
 
                 let expected = wa + wb;
-                assert!((gradient.0[0][0] - expected) < 0.001);
-                assert!((gradient.0[0][1] - expected) < 0.001);
+                assert!((gradient.0[(0, 0)] - expected) < 0.001);
+                assert!((gradient.0[(0, 1)] - expected) < 0.001);
             }
         }
     }
@@ -247,42 +226,42 @@ mod test {
         // Test that we get the same values as https://hmkcode.com/ai/backpropagation-step-by-step/
         let network = neura_sequential![
             NeuraDenseLayer::new(
-                [[0.11, 0.21], [0.12, 0.08]].into(),
-                [0.0; 2].into(),
+                dmatrix![0.11, 0.21; 0.12, 0.08],
+                dvector![0.0, 0.0],
                 Linear,
                 NeuraL0
             ),
-            NeuraDenseLayer::new([[0.14, 0.15]].into(), [0.0].into(), Linear, NeuraL0)
+            NeuraDenseLayer::new(dmatrix![0.14, 0.15], dvector![0.0], Linear, NeuraL0)
         ];
 
-        let input = [2.0, 3.0];
-        let target = [1.0];
+        let input = dvector![2.0, 3.0];
+        let target = dvector![1.0];
 
-        let intermediary = network.clone().trim_tail().eval(&input.into());
+        let intermediary = network.clone().trim_tail().eval(&input);
         assert_approx!(0.85, intermediary[0], EPSILON);
         assert_approx!(0.48, intermediary[1], EPSILON);
-        assert_approx!(0.191, network.eval(&input.into())[0], EPSILON);
+        assert_approx!(0.191, network.eval(&input)[0], EPSILON);
 
         assert_approx!(
             0.327,
-            Euclidean.eval(&target.into(), &network.eval(&input.into())),
+            Euclidean.eval(&target, &network.eval(&input)),
             0.001
         );
 
-        let delta = network.eval(&input.into())[0] - target[0];
+        let delta = network.eval(&input)[0] - target[0];
 
         let (gradient_first, gradient_second) =
-            NeuraBackprop::new(Euclidean).get_gradient(&network, &input.into(), &target.into());
+            NeuraBackprop::new(Euclidean).get_gradient(&network, &input, &target);
         let gradient_first = gradient_first.0;
-        let gradient_second = gradient_second.0[0];
+        let gradient_second = gradient_second.0.0;
 
         assert_approx!(gradient_second[0], intermediary[0] * delta, EPSILON);
         assert_approx!(gradient_second[1], intermediary[1] * delta, EPSILON);
 
-        assert_approx!(gradient_first[0][0], input[0] * delta * 0.14, EPSILON);
-        assert_approx!(gradient_first[0][1], input[1] * delta * 0.14, EPSILON);
+        assert_approx!(gradient_first[(0, 0)], input[0] * delta * 0.14, EPSILON);
+        assert_approx!(gradient_first[(0, 1)], input[1] * delta * 0.14, EPSILON);
 
-        assert_approx!(gradient_first[1][0], input[0] * delta * 0.15, EPSILON);
-        assert_approx!(gradient_first[1][1], input[1] * delta * 0.15, EPSILON);
+        assert_approx!(gradient_first[(1, 0)], input[0] * delta * 0.15, EPSILON);
+        assert_approx!(gradient_first[(1, 1)], input[1] * delta * 0.15, EPSILON);
     }
 }