🔥 ♻️ Combine all NeuraTrainable* traits into NeuraLayer

2 years ago · 72ffce457a
parent 41711d4668
commit 72ffce457a
24 changed files with 387 additions and 652 deletions
--- a/src/gradient_solver/backprop.rs
+++ b/src/gradient_solver/backprop.rs
@ -17,22 +17,22 @@ impl<Loss> NeuraBackprop<Loss> {
 impl<
        Input,
        Target,
-        Trainable: NeuraTrainableLayerBase + NeuraLayer<Input> + NeuraNetworkRec,
+        Trainable: NeuraLayer<Input> + NeuraNetworkRec,
        Loss: NeuraLoss<Trainable::Output, Target = Target> + Clone,
    > NeuraGradientSolver<Input, Target, Trainable> for NeuraBackprop<Loss>
 where
    <Loss as NeuraLoss<Trainable::Output>>::Output: ToPrimitive,
-    // Trainable: NeuraOldTrainableNetworkBase<Input, Gradient = <Trainable as NeuraTrainableLayerBase>::Gradient>,
+    // Trainable: NeuraOldTrainableNetworkBase<Input, Gradient = <Trainable as NeuraLayerBase>::Gradient>,
    // Trainable: for<'a> NeuraOldTrainableNetwork<Input, (&'a NeuraBackprop<Loss>, &'a Target)>,
    for<'a> (&'a NeuraBackprop<Loss>, &'a Target):
-        BackpropRecurse<Input, Trainable, <Trainable as NeuraTrainableLayerBase>::Gradient>,
+        BackpropRecurse<Input, Trainable, <Trainable as NeuraLayerBase>::Gradient>,
 {
    fn get_gradient(
        &self,
        trainable: &Trainable,
        input: &Input,
        target: &Target,
-    ) -> <Trainable as NeuraTrainableLayerBase>::Gradient {
+    ) -> <Trainable as NeuraLayerBase>::Gradient {
        let (_, gradient) = (self, target).recurse(trainable, input);
        // let (_, gradient) = trainable.traverse(input, &(self, target));
@ -59,7 +59,7 @@ impl<Input, Loss: NeuraLoss<Input>> BackpropRecurse<Input, (), ()>
 impl<
        Input: Clone,
-        Network: NeuraNetworkRec + NeuraNetwork<Input> + NeuraTrainableLayerEval<Input>,
+        Network: NeuraNetworkRec + NeuraNetwork<Input> + NeuraLayer<Input>,
        Loss,
        Target,
    > BackpropRecurse<Input, Network, Network::Gradient> for (&NeuraBackprop<Loss>, &Target)
@ -68,14 +68,13 @@ where
    for<'a> (&'a NeuraBackprop<Loss>, &'a Target): BackpropRecurse<
        Network::NodeOutput,
        Network::NextNode,
-        <Network::NextNode as NeuraTrainableLayerBase>::Gradient,
+        <Network::NextNode as NeuraLayerBase>::Gradient,
    >,
    // Verify that the current layer implements the right traits
-    Network::Layer: NeuraTrainableLayerSelf<Network::LayerInput>
+    Network::Layer: NeuraLayer<Network::LayerInput>,
        + NeuraTrainableLayerBackprop<Network::LayerInput>,
    // Verify that the layer output can be cloned
    <Network::Layer as NeuraLayer<Network::LayerInput>>::Output: Clone,
-    Network::NextNode: NeuraTrainableLayerEval<Network::NodeOutput>,
+    Network::NextNode: NeuraLayer<Network::NodeOutput>,
 {
    fn recurse(&self, network: &Network, input: &Input) -> (Input, Network::Gradient) {
        let layer = network.get_layer();
--- a/src/gradient_solver/forward_forward.rs
+++ b/src/gradient_solver/forward_forward.rs
@ -3,7 +3,7 @@ use num::{traits::NumAssignOps, Float};
 use crate::{
    derivable::NeuraDerivable,
-    layer::NeuraTrainableLayerSelf,
+    layer::*,
    network::{NeuraNetwork, NeuraNetworkRec},
    prelude::NeuraLayer,
 };
@ -69,18 +69,18 @@ impl<
        F: Float,
        Act: Clone + NeuraDerivable<f64>,
        Input: Clone,
-        Trainable: NeuraTrainableLayerBase + NeuraLayer<Input, Output = DVector<F>>,
+        Trainable: NeuraLayer<Input, Output = DVector<F>>,
    > NeuraGradientSolver<Input, bool, Trainable> for NeuraForwardForward<Act>
 where
    NeuraForwardPair<Act>:
-        ForwardForwardRecurse<Input, Trainable, <Trainable as NeuraTrainableLayerBase>::Gradient>,
+        ForwardForwardRecurse<Input, Trainable, <Trainable as NeuraLayerBase>::Gradient>,
 {
    fn get_gradient(
        &self,
        trainable: &Trainable,
        input: &Input,
        target: &bool,
-    ) -> <Trainable as NeuraTrainableLayerBase>::Gradient {
+    ) -> <Trainable as NeuraLayerBase>::Gradient {
        let target = *target;
        let pair = NeuraForwardPair {
            threshold: self.threshold,
@ -137,13 +137,13 @@ impl<Act, Input> ForwardForwardRecurse<Input, (), ()> for NeuraForwardPair<Act>
 impl<Act, Input: Clone, Network: NeuraNetwork<Input> + NeuraNetworkRec>
    ForwardForwardRecurse<Input, Network, Network::Gradient> for NeuraForwardPair<Act>
 where
-    Network::Layer: NeuraTrainableLayerSelf<Network::LayerInput>,
+    Network::Layer: NeuraLayer<Network::LayerInput>,
    <Network::Layer as NeuraLayer<Network::LayerInput>>::Output: Clone,
    Self: ForwardForwardDerivate<<Network::Layer as NeuraLayer<Network::LayerInput>>::Output>,
    Self: ForwardForwardRecurse<
        Network::NodeOutput,
        Network::NextNode,
-        <Network::NextNode as NeuraTrainableLayerBase>::Gradient,
+        <Network::NextNode as NeuraLayerBase>::Gradient,
    >,
 {
    fn recurse(&self, network: &Network, input: &Input) -> Network::Gradient {
--- a/src/gradient_solver/mod.rs
+++ b/src/gradient_solver/mod.rs
@ -4,9 +4,9 @@ pub use backprop::NeuraBackprop;
 mod forward_forward;
 pub use forward_forward::NeuraForwardForward;
-use crate::layer::{NeuraTrainableLayerBase, NeuraTrainableLayerEval};
+use crate::layer::NeuraLayerBase;
-pub trait NeuraGradientSolver<Input, Target, Trainable: NeuraTrainableLayerBase> {
+pub trait NeuraGradientSolver<Input, Target, Trainable: NeuraLayerBase> {
    fn get_gradient(
        &self,
        trainable: &Trainable,
--- a/src/layer/dense.rs
+++ b/src/layer/dense.rs
@ -1,7 +1,7 @@
 use std::marker::PhantomData;
-use nalgebra::{DMatrix, DVector};
+use nalgebra::{DMatrix, DVector, Scalar};
-use num::Float;
+use num::{traits::NumAssignOps, Float};
 use rand::Rng;
 use crate::{derivable::NeuraDerivable, err::NeuraDimensionsMismatch};
@ -126,18 +126,10 @@ impl<F, Act, Reg, R: Rng> NeuraDenseLayerPartial<F, Act, Reg, R> {
    }
 }
 impl<F: Float, Act: NeuraDerivable<F>, Reg: NeuraDerivable<F>> NeuraShapedLayer
    for NeuraDenseLayer<F, Act, Reg>
 {
    fn output_shape(&self) -> NeuraShape {
        NeuraShape::Vector(self.weights.shape().0)
    }
 }
 impl<
-        F: Float + std::fmt::Debug + 'static,
+        F: Float + Scalar + Send + NumAssignOps,
-        Act: NeuraDerivable<F>,
+        Act: NeuraDerivable<F> + Clone + std::fmt::Debug + 'static,
-        Reg: NeuraDerivable<F>,
+        Reg: NeuraDerivable<F> + Clone + std::fmt::Debug + 'static,
        R: Rng,
    > NeuraPartialLayer for NeuraDenseLayerPartial<F, Act, Reg, R>
 where
@ -158,8 +150,10 @@ where
    }
 }
-impl<F: Float, Act: NeuraDerivable<F>, Reg: NeuraDerivable<F>> NeuraPartialLayer
+impl<F: Float + Scalar + Send + NumAssignOps, Act: NeuraDerivable<F>, Reg: NeuraDerivable<F>>
-    for NeuraDenseLayer<F, Act, Reg>
+    NeuraPartialLayer for NeuraDenseLayer<F, Act, Reg>
 where
    Self: Clone + std::fmt::Debug + 'static,
 {
    type Constructed = Self;
    type Err = NeuraDimensionsMismatch;
@ -176,28 +170,10 @@ impl<F: Float, Act: NeuraDerivable<F>, Reg: NeuraDerivable<F>> NeuraPartialLayer
    }
 }
-impl<
+impl<F: Float + NumAssignOps + Scalar + Send, Act: NeuraDerivable<F>, Reg: NeuraDerivable<F>>
-        F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
+    NeuraLayerBase for NeuraDenseLayer<F, Act, Reg>
-        Act: NeuraDerivable<F>,
+where
-        Reg: NeuraDerivable<F>,
+    Self: Clone + std::fmt::Debug + 'static,
    > NeuraLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
 {
    type Output = DVector<F>;
    fn eval(&self, input: &DVector<F>) -> Self::Output {
        assert_eq!(input.shape().0, self.weights.shape().1);
        let evaluated = &self.weights * input + &self.bias;
        evaluated.map(|x| self.activation.eval(x))
    }
 }
 impl<
        F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
        Act: NeuraDerivable<F>,
        Reg: NeuraDerivable<F>,
    > NeuraTrainableLayerBase for NeuraDenseLayer<F, Act, Reg>
 {
    type Gradient = (DMatrix<F>, DVector<F>);
@ -212,14 +188,25 @@ impl<
        self.weights += &gradient.0;
        self.bias += &gradient.1;
    }
    fn output_shape(&self) -> NeuraShape {
        NeuraShape::Vector(self.weights.shape().0)
    }
    fn regularize_layer(&self) -> Self::Gradient {
        (
            self.weights.map(|x| self.regularization.derivate(x)),
            DVector::zeros(self.bias.shape().0),
        )
    }
 }
-impl<
+impl<F: Float + NumAssignOps + Scalar + Send, Act: NeuraDerivable<F>, Reg: NeuraDerivable<F>>
-        F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
+    NeuraLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
-        Act: NeuraDerivable<F>,
+where
-        Reg: NeuraDerivable<F>,
+    Self: Clone + std::fmt::Debug + 'static,
    > NeuraTrainableLayerEval<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
 {
    type Output = DVector<F>;
    type IntermediaryRepr = DVector<F>; // pre-activation values
    fn eval_training(&self, input: &DVector<F>) -> (Self::Output, Self::IntermediaryRepr) {
@ -228,20 +215,6 @@ impl<
        (output, evaluated)
    }
 }
 impl<
        F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
        Act: NeuraDerivable<F>,
        Reg: NeuraDerivable<F>,
    > NeuraTrainableLayerSelf<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
 {
    fn regularize_layer(&self) -> Self::Gradient {
        (
            self.weights.map(|x| self.regularization.derivate(x)),
            DVector::zeros(self.bias.shape().0),
        )
    }
    fn get_gradient(
        &self,
@ -266,14 +239,7 @@ impl<
        (weights_gradient, bias_gradient)
    }
 }
 impl<
        F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
        Act: NeuraDerivable<F>,
        Reg: NeuraDerivable<F>,
    > NeuraTrainableLayerBackprop<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
 {
    fn backprop_layer(
        &self,
        _input: &DVector<F>,
--- a/src/layer/dropout.rs
+++ b/src/layer/dropout.rs
@ -35,13 +35,7 @@ impl<R: Rng> NeuraDropoutLayer<R> {
    }
 }
-impl<R: Rng> NeuraShapedLayer for NeuraDropoutLayer<R> {
+impl<R: Rng + Clone + std::fmt::Debug + 'static> NeuraPartialLayer for NeuraDropoutLayer<R> {
    fn output_shape(&self) -> NeuraShape {
        self.shape
    }
 }
 impl<R: Rng> NeuraPartialLayer for NeuraDropoutLayer<R> {
    type Constructed = NeuraDropoutLayer<R>;
    type Err = ();
@ -53,25 +47,15 @@ impl<R: Rng> NeuraPartialLayer for NeuraDropoutLayer<R> {
    }
 }
-impl<R: Rng, F: Float> NeuraLayer<DVector<F>> for NeuraDropoutLayer<R> {
+impl<R: Rng + Clone + std::fmt::Debug + 'static> NeuraLayerBase for NeuraDropoutLayer<R> {
    type Output = DVector<F>;
    fn eval(&self, input: &DVector<F>) -> Self::Output {
        let mut output = input.clone();
        self.apply_dropout(&mut output);
        output
    }
 }
 impl<R: Rng> NeuraTrainableLayerBase for NeuraDropoutLayer<R> {
    type Gradient = ();
    fn default_gradient(&self) -> Self::Gradient {
        ()
    }
-    fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
+    fn output_shape(&self) -> NeuraShape {
-        // Noop
+        self.shape
    }
    fn prepare_layer(&mut self, is_training: bool) {
@ -98,30 +82,19 @@ impl<R: Rng> NeuraTrainableLayerBase for NeuraDropoutLayer<R> {
    }
 }
-impl<R: Rng, F: Float> NeuraTrainableLayerEval<DVector<F>> for NeuraDropoutLayer<R> {
+impl<R: Rng + Clone + std::fmt::Debug + 'static, F: Float> NeuraLayer<DVector<F>>
    for NeuraDropoutLayer<R>
 {
    type Output = DVector<F>;
    type IntermediaryRepr = ();
    fn eval_training(&self, input: &DVector<F>) -> (Self::Output, Self::IntermediaryRepr) {
-        (self.eval(input), ())
+        let mut output = input.clone();
-    }
+        self.apply_dropout(&mut output);
-}
+        (output, ())
 impl<R: Rng, F: Float> NeuraTrainableLayerSelf<DVector<F>> for NeuraDropoutLayer<R> {
    fn regularize_layer(&self) -> Self::Gradient {
        ()
    }
    fn get_gradient(
        &self,
        _input: &DVector<F>,
        _intermediary: &Self::IntermediaryRepr,
        _epsilon: &Self::Output,
    ) -> Self::Gradient {
        ()
    }
 }
 impl<R: Rng, F: Float> NeuraTrainableLayerBackprop<DVector<F>> for NeuraDropoutLayer<R> {
    fn backprop_layer(
        &self,
        _input: &DVector<F>,
--- a/src/layer/isolate.rs
+++ b/src/layer/isolate.rs
@ -24,12 +24,6 @@ impl NeuraIsolateLayer {
    }
 }
 impl NeuraShapedLayer for NeuraIsolateLayer {
    fn output_shape(&self) -> NeuraShape {
        self.end.sub(self.start).unwrap_or_else(|| unreachable!())
    }
 }
 impl NeuraPartialLayer for NeuraIsolateLayer {
    type Constructed = NeuraIsolateLayer;
    type Err = NeuraIsolateLayerErr;
@ -70,19 +64,7 @@ impl NeuraPartialLayer for NeuraIsolateLayer {
    }
 }
-impl<F: Clone + Scalar> NeuraLayer<DVector<F>> for NeuraIsolateLayer {
+impl NeuraLayerBase for NeuraIsolateLayer {
    type Output = DVector<F>;
    fn eval(&self, input: &DVector<F>) -> Self::Output {
        let (NeuraShape::Vector(start), NeuraShape::Vector(end)) = (self.start, self.end) else {
            panic!("NeuraIsolateLayer expected a value of dimension {}, got a vector", self.start.dims());
        };
        DVector::from_iterator(end - start, input.iter().cloned().skip(start).take(end))
    }
 }
 impl NeuraTrainableLayerBase for NeuraIsolateLayer {
    type Gradient = ();
    #[inline(always)]
@ -90,41 +72,26 @@ impl NeuraTrainableLayerBase for NeuraIsolateLayer {
        ()
    }
-    #[inline(always)]
+    fn output_shape(&self) -> NeuraShape {
-    fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
+        self.end.sub(self.start).unwrap_or_else(|| unreachable!())
        // Noop
    }
 }
-impl<F: Clone + Scalar> NeuraTrainableLayerEval<DVector<F>> for NeuraIsolateLayer {
+impl<F: Clone + Scalar + Default> NeuraLayer<DVector<F>> for NeuraIsolateLayer {
    type Output = DVector<F>;
    type IntermediaryRepr = ();
    fn eval_training(&self, input: &DVector<F>) -> (Self::Output, Self::IntermediaryRepr) {
-        (self.eval(input), ())
+        let (NeuraShape::Vector(start), NeuraShape::Vector(end)) = (self.start, self.end) else {
-    }
+            panic!("NeuraIsolateLayer expected a value of dimension {}, got a vector", self.start.dims());
-}
+        };
-impl<Input> NeuraTrainableLayerSelf<Input> for NeuraIsolateLayer
+        let res = DVector::from_iterator(end - start, input.iter().cloned().skip(start).take(end));
 where
    Self: NeuraTrainableLayerEval<Input>,
 {
    #[inline(always)]
    fn regularize_layer(&self) -> Self::Gradient {
        ()
    }
-    #[inline(always)]
+        (res, ())
    fn get_gradient(
        &self,
        _input: &Input,
        _intermediary: &Self::IntermediaryRepr,
        _epsilon: &Self::Output,
    ) -> Self::Gradient {
        ()
    }
 }
 impl<F: Clone + Scalar + Default> NeuraTrainableLayerBackprop<DVector<F>> for NeuraIsolateLayer {
    fn backprop_layer(
        &self,
        input: &DVector<F>,
--- a/src/layer/lock.rs
+++ b/src/layer/lock.rs
@ -24,56 +24,35 @@ impl<Layer> NeuraLockLayer<Layer> {
    }
 }
-impl<Input, Layer: NeuraLayer<Input>> NeuraLayer<Input> for NeuraLockLayer<Layer> {
+impl<Layer: NeuraLayerBase> NeuraLayerBase for NeuraLockLayer<Layer> {
-    type Output = Layer::Output;
+    type Gradient = ();
-    fn eval(&self, input: &Input) -> Self::Output {
+    fn output_shape(&self) -> NeuraShape {
-        self.layer.eval(input)
+        self.layer.output_shape()
    }
 }
 impl<Layer: NeuraTrainableLayerBase> NeuraTrainableLayerBase for NeuraLockLayer<Layer> {
    type Gradient = ();
    fn default_gradient(&self) -> Self::Gradient {
        ()
    }
-    fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
+    fn prepare_layer(&mut self, is_training: bool) {
-        // Noop
+        self.layer.prepare_layer(is_training);
    }
 }
-impl<Input, Layer: NeuraTrainableLayerEval<Input>> NeuraTrainableLayerEval<Input>
+impl<Input, Layer: NeuraLayer<Input>> NeuraLayer<Input> for NeuraLockLayer<Layer> {
-    for NeuraLockLayer<Layer>
+    type Output = Layer::Output;
 {
    type IntermediaryRepr = Layer::IntermediaryRepr;
-    fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr) {
+    fn eval(&self, input: &Input) -> Self::Output {
-        self.layer.eval_training(input)
+        self.layer.eval(input)
    }
 }
-impl<Input, Layer: NeuraTrainableLayerEval<Input>> NeuraTrainableLayerSelf<Input>
+    type IntermediaryRepr = Layer::IntermediaryRepr;
    for NeuraLockLayer<Layer>
 {
    fn regularize_layer(&self) -> Self::Gradient {
        ()
    }
-    fn get_gradient(
+    fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr) {
-        &self,
+        self.layer.eval_training(input)
        _input: &Input,
        _intermediary: &Self::IntermediaryRepr,
        _epsilon: &Self::Output,
    ) -> Self::Gradient {
        ()
    }
 }
 impl<Input, Layer: NeuraTrainableLayerBackprop<Input>> NeuraTrainableLayerBackprop<Input>
    for NeuraLockLayer<Layer>
 {
    fn backprop_layer(
        &self,
        input: &Input,
--- a/src/layer/mod.rs
+++ b/src/layer/mod.rs
@ -75,11 +75,35 @@ impl From<(usize, usize, usize)> for NeuraShape {
    }
 }
-pub trait NeuraLayer<Input> {
+pub trait NeuraLayerBase: std::fmt::Debug + Clone + 'static {
-    /// What type the layer outputs
+    /// What type the gradient of the layer is
-    type Output;
+    type Gradient: NeuraVectorSpace + Send + 'static;
    /// What the desired output shape of the layer is
    fn output_shape(&self) -> NeuraShape;
    /// Constructor for the gradient, should return the zero vector
    fn default_gradient(&self) -> Self::Gradient;
    /// Applies `δW_l` to the weights of the layer, the default implementation is a noop
    #[allow(unused_variables)]
    #[inline(always)]
    fn apply_gradient(&mut self, gradient: &Self::Gradient) {
        // Noop
    }
    /// Arbitrary computation that can be executed at the start of an epoch
    #[allow(unused_variables)]
    #[inline(always)]
    fn prepare_layer(&mut self, is_training: bool) {
        // Noop
    }
-    fn eval(&self, input: &Input) -> Self::Output;
+    /// Computes the regularization terms of the layer's gradient, called once per batch
    #[inline(always)]
    fn regularize_layer(&self) -> Self::Gradient {
        self.default_gradient()
    }
    fn lock_layer(self) -> NeuraLockLayer<Self>
    where
@ -89,87 +113,34 @@ pub trait NeuraLayer<Input> {
    }
 }
-impl<Input: Clone> NeuraLayer<Input> for () {
+pub trait NeuraLayer<Input>: NeuraLayerBase {
-    type Output = Input;
+    /// What type the layer outputs, may depend on `Input`.
    type Output;
    /// A type that can hold data between calls to `eval_training`, `backprop_layer` and `get_gradient`
    type IntermediaryRepr: 'static;
    #[inline(always)]
    fn eval(&self, input: &Input) -> Self::Output {
-        input.clone()
+        self.eval_training(input).0
    }
 }
 pub trait NeuraShapedLayer {
    fn output_shape(&self) -> NeuraShape;
 }
 pub trait NeuraPartialLayer {
    type Constructed: NeuraShapedLayer;
    type Err;
    fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err>;
 }
 pub trait NeuraTrainableLayerBase {
    /// The representation of the layer gradient as a vector space
    type Gradient: NeuraVectorSpace;
    fn default_gradient(&self) -> Self::Gradient;
    /// Applies `δW_l` to the weights of the layer
    fn apply_gradient(&mut self, gradient: &Self::Gradient);
    /// Arbitrary computation that can be executed at the start of an epoch
    #[allow(unused_variables)]
    #[inline(always)]
    fn prepare_layer(&mut self, is_training: bool) {}
 }
 pub trait NeuraTrainableLayerEval<Input>: NeuraTrainableLayerBase + NeuraLayer<Input> {
    /// An intermediary object type to be passed to the various training methods
    type IntermediaryRepr;
    // TODO: move this into another trait
    fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr);
 }
 /// Contains methods relative to a layer's ability to compute its own weights gradients,
 /// given the derivative of the output variables.
 pub trait NeuraTrainableLayerSelf<Input>: NeuraTrainableLayerEval<Input> {
    /// Computes the regularization
    fn regularize_layer(&self) -> Self::Gradient;
    /// Computes the layer's gradient,
    ///
    /// `intermediary` is guaranteed to have been generated by a previous call to `eval_training`,
    /// without mutation of `self` in-between, and with the same `input`.
    #[allow(unused_variables)]
    #[inline(always)]
    fn get_gradient(
        &self,
        input: &Input,
        intermediary: &Self::IntermediaryRepr,
        epsilon: &Self::Output,
-    ) -> Self::Gradient;
+    ) -> Self::Gradient {
-}
+        self.default_gradient()
    }
 // impl<Input, Layer: NeuraTrainableLayerBase<Input, Gradient = ()>> NeuraTrainableLayerSelf<Input>
 //     for Layer
 // {
 //     #[inline(always)]
 //     fn regularize_layer(&self) -> Self::Gradient {
 //         ()
 //     }
 //     #[inline(always)]
 //     fn get_gradient(
 //         &self,
 //         input: &Input,
 //         intermediary: &Self::IntermediaryRepr,
 //         epsilon: Self::Output,
 //     ) -> Self::Gradient {
 //         ()
 //     }
 // }
 pub trait NeuraTrainableLayerBackprop<Input>: NeuraTrainableLayerEval<Input> {
    /// Computes the backpropagation term and the derivative of the internal weights,
    /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
    ///
@ -189,7 +160,31 @@ pub trait NeuraTrainableLayerBackprop<Input>: NeuraTrainableLayerEval<Input> {
    ) -> Input;
 }
-impl NeuraTrainableLayerBase for () {
+#[deprecated]
 pub trait NeuraShapedLayer: NeuraLayerBase {}
 pub trait NeuraPartialLayer {
    type Constructed: NeuraLayerBase + 'static;
    type Err;
    fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err>;
 }
 #[deprecated]
 pub trait NeuraTrainableLayerBase: NeuraLayerBase {}
 #[deprecated]
 pub trait NeuraTrainableLayerEval<Input>: NeuraLayer<Input> {}
 /// Contains methods relative to a layer's ability to compute its own weights gradients,
 /// given the derivative of the output variables.
 #[deprecated]
 pub trait NeuraTrainableLayerSelf<Input>: NeuraLayer<Input> {}
 #[deprecated]
 pub trait NeuraTrainableLayerBackprop<Input>: NeuraLayer<Input> {}
 impl NeuraLayerBase for () {
    type Gradient = ();
    #[inline(always)]
@ -201,35 +196,22 @@ impl NeuraTrainableLayerBase for () {
    fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
        // Noop
    }
    fn output_shape(&self) -> NeuraShape {
        panic!("() has no shape!");
    }
 }
-impl<Input: Clone> NeuraTrainableLayerEval<Input> for () {
+impl<Input: Clone> NeuraLayer<Input> for () {
    type Output = Input;
    type IntermediaryRepr = ();
    #[inline(always)]
    fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr) {
-        (self.eval(input), ())
+        (input.clone(), ())
    }
 }
 impl<Input: Clone> NeuraTrainableLayerSelf<Input> for () {
    #[inline(always)]
    fn regularize_layer(&self) -> Self::Gradient {
        ()
    }
    #[inline(always)]
    fn get_gradient(
        &self,
        _input: &Input,
        _intermediary: &Self::IntermediaryRepr,
        _epsilon: &Self::Output,
    ) -> Self::Gradient {
        ()
    }
 }
 impl<Input: Clone> NeuraTrainableLayerBackprop<Input> for () {
    fn backprop_layer(
        &self,
        _input: &Input,
--- a/src/layer/normalize.rs
+++ b/src/layer/normalize.rs
@ -23,12 +23,6 @@ impl NeuraNormalizeLayer {
    }
 }
 impl NeuraShapedLayer for NeuraNormalizeLayer {
    fn output_shape(&self) -> NeuraShape {
        self.shape
    }
 }
 impl NeuraPartialLayer for NeuraNormalizeLayer {
    type Constructed = NeuraNormalizeLayer;
@ -39,7 +33,22 @@ impl NeuraPartialLayer for NeuraNormalizeLayer {
    }
 }
-impl<F: Float + Scalar> NeuraLayer<DVector<F>> for NeuraNormalizeLayer {
+impl NeuraLayerBase for NeuraNormalizeLayer {
    fn output_shape(&self) -> NeuraShape {
        self.shape
    }
    type Gradient = ();
    fn default_gradient(&self) -> Self::Gradient {
        ()
    }
 }
 impl<F: Float + Scalar + NumAssignOps> NeuraLayer<DVector<F>> for NeuraNormalizeLayer {
    // TODO: store the kroenecker term in the jacobian matrix (might as well)
    type IntermediaryRepr = (DMatrix<F>, F); // Partial jacobian matrix (without the kroenecker term) and stddev
    type Output = DVector<F>;
    fn eval(&self, input: &DVector<F>) -> Self::Output {
@ -54,22 +63,6 @@ impl<F: Float + Scalar> NeuraLayer<DVector<F>> for NeuraNormalizeLayer {
        output
    }
 }
 impl NeuraTrainableLayerBase for NeuraNormalizeLayer {
    type Gradient = ();
    fn default_gradient(&self) -> Self::Gradient {
        ()
    }
    fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
        // Noop
    }
 }
 impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerEval<DVector<F>> for NeuraNormalizeLayer {
    type IntermediaryRepr = (DMatrix<F>, F); // Partial jacobian matrix (without the kroenecker term) and stddev
    fn eval_training(&self, input: &DVector<F>) -> (Self::Output, Self::IntermediaryRepr) {
        let (mean, variance, len) = mean_variance(input);
@ -85,26 +78,7 @@ impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerEval<DVector<F>> for N
        (input_centered / stddev, (jacobian_partial, stddev))
    }
 }
 impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerSelf<DVector<F>> for NeuraNormalizeLayer {
    fn regularize_layer(&self) -> Self::Gradient {
        ()
    }
    fn get_gradient(
        &self,
        _input: &DVector<F>,
        _intermediary: &Self::IntermediaryRepr,
        _epsilon: &Self::Output,
    ) -> Self::Gradient {
        ()
    }
 }
 impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerBackprop<DVector<F>>
    for NeuraNormalizeLayer
 {
    fn backprop_layer(
        &self,
        _input: &DVector<F>,
--- a/src/layer/softmax.rs
+++ b/src/layer/softmax.rs
@ -16,8 +16,30 @@ impl NeuraSoftmaxLayer {
    }
 }
 impl NeuraPartialLayer for NeuraSoftmaxLayer {
    type Constructed = Self;
    type Err = ();
    fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
        Ok(Self { shape: input_shape })
    }
 }
 impl NeuraLayerBase for NeuraSoftmaxLayer {
    type Gradient = ();
    fn output_shape(&self) -> NeuraShape {
        self.shape
    }
    fn default_gradient(&self) -> Self::Gradient {
        ()
    }
 }
 impl<F: Float + Scalar + NumAssignOps> NeuraLayer<DVector<F>> for NeuraSoftmaxLayer {
    type Output = DVector<F>;
    type IntermediaryRepr = Self::Output; // Result of self.eval
    fn eval(&self, input: &DVector<F>) -> Self::Output {
        let mut res = input.clone();
@ -39,64 +61,12 @@ impl<F: Float + Scalar + NumAssignOps> NeuraLayer<DVector<F>> for NeuraSoftmaxLa
        res
    }
 }
 impl NeuraShapedLayer for NeuraSoftmaxLayer {
    fn output_shape(&self) -> NeuraShape {
        self.shape
    }
 }
 impl NeuraPartialLayer for NeuraSoftmaxLayer {
    type Constructed = Self;
    type Err = ();
    fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
        Ok(Self { shape: input_shape })
    }
 }
 impl NeuraTrainableLayerBase for NeuraSoftmaxLayer {
    type Gradient = ();
    fn default_gradient(&self) -> Self::Gradient {
        ()
    }
    fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
        // Noop
    }
 }
 impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerEval<DVector<F>> for NeuraSoftmaxLayer {
    type IntermediaryRepr = Self::Output; // Result of self.eval
    fn eval_training(&self, input: &DVector<F>) -> (Self::Output, Self::IntermediaryRepr) {
        let res = self.eval(input);
        (res.clone(), res)
    }
 }
 impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerSelf<DVector<F>> for NeuraSoftmaxLayer {
    #[inline(always)]
    fn regularize_layer(&self) -> Self::Gradient {
        ()
    }
    #[inline(always)]
    fn get_gradient(
        &self,
        _input: &DVector<F>,
        _intermediary: &Self::IntermediaryRepr,
        _epsilon: &Self::Output,
    ) -> Self::Gradient {
        ()
    }
 }
 impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerBackprop<DVector<F>>
    for NeuraSoftmaxLayer
 {
    fn backprop_layer(
        &self,
        input: &DVector<F>,
--- a/src/lib.rs
+++ b/src/lib.rs
@ -23,10 +23,7 @@ pub mod prelude {
    // Structs and traits
    pub use crate::gradient_solver::NeuraBackprop;
-    pub use crate::layer::{
+    pub use crate::layer::{NeuraLayer, NeuraLayerBase, NeuraPartialLayer, NeuraShape};
        NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayerBackprop,
        NeuraTrainableLayerBase, NeuraTrainableLayerSelf,
    };
    pub use crate::network::sequential::{
        NeuraSequential, NeuraSequentialLock, NeuraSequentialTail,
    };
--- a/src/network/graph/from.rs
+++ b/src/network/graph/from.rs
@ -25,7 +25,7 @@ impl<Data> FromSequential<(), Data> for NeuraGraph<Data> {
    }
 }
-impl<Data: Clone + 'static, Layer: NeuraTrainableLayerFull<Data, Output = Data>, ChildNetwork>
+impl<Data: Clone + 'static, Layer: NeuraLayer<Data, Output = Data>, ChildNetwork>
    FromSequential<NeuraSequential<Layer, ChildNetwork>, Data> for NeuraGraph<Data>
 where
    NeuraGraph<Data>: FromSequential<ChildNetwork, Data>,
@ -57,7 +57,7 @@ impl<Data> NeuraGraph<Data> {
    ) -> Self
    where
        NeuraGraph<Data>: FromSequential<NeuraSequential<Layer, ChildNetwork>, Data>,
-        NeuraSequential<Layer, ChildNetwork>: NeuraShapedLayer,
+        NeuraSequential<Layer, ChildNetwork>: NeuraLayerBase,
    {
        Self::from_sequential_rec(&network, vec![], input_shape)
    }
--- a/src/network/graph/mod.rs
+++ b/src/network/graph/mod.rs
@ -1,10 +1,7 @@
 use std::any::Any;
 use crate::{
-    algebra::NeuraDynVectorSpace,
+    algebra::NeuraDynVectorSpace, derivable::NeuraLoss, layer::NeuraLayerBase, prelude::*,
    derivable::NeuraLoss,
    layer::{NeuraShapedLayer, NeuraTrainableLayerEval},
    prelude::*,
 };
 mod node;
@ -16,31 +13,10 @@ pub use partial::NeuraGraphPartial;
 mod from;
 pub use from::FromSequential;
-pub trait NeuraTrainableLayerFull<Input>:
+#[deprecated]
-    NeuraTrainableLayerEval<Input>
+pub trait NeuraTrainableLayerFull<Input>: NeuraLayer<Input> {}
    + NeuraTrainableLayerBackprop<Input>
    + NeuraTrainableLayerSelf<Input>
    + NeuraShapedLayer
    + Clone
    + std::fmt::Debug
    + 'static
 where
    Self::IntermediaryRepr: 'static,
 {
 }
-impl<Input, T> NeuraTrainableLayerFull<Input> for T
+impl<Input, T> NeuraTrainableLayerFull<Input> for T where T: NeuraLayer<Input> {}
 where
    T: NeuraTrainableLayerEval<Input>
        + NeuraTrainableLayerBackprop<Input>
        + NeuraTrainableLayerSelf<Input>
        + NeuraShapedLayer
        + Clone
        + std::fmt::Debug
        + 'static,
    T::IntermediaryRepr: 'static,
 {
 }
 #[derive(Debug)]
 pub struct NeuraGraphNodeConstructed<Data> {
@ -49,7 +25,17 @@ pub struct NeuraGraphNodeConstructed<Data> {
    output: usize,
 }
-#[derive(Debug)]
+impl<Data> Clone for NeuraGraphNodeConstructed<Data> {
    fn clone(&self) -> Self {
        Self {
            node: dyn_clone::clone_box(&*self.node),
            inputs: self.inputs.clone(),
            output: self.output.clone(),
        }
    }
 }
 #[derive(Clone, Debug)]
 pub struct NeuraGraph<Data> {
    /// ## Class invariants
    ///
@ -65,10 +51,16 @@ pub struct NeuraGraph<Data> {
    buffer_size: usize,
 }
-impl<Data> NeuraShapedLayer for NeuraGraph<Data> {
+impl<Data: Clone + std::fmt::Debug + 'static> NeuraLayerBase for NeuraGraph<Data> {
    type Gradient = ();
    fn output_shape(&self) -> NeuraShape {
        self.output_shape
    }
    fn default_gradient(&self) -> Self::Gradient {
        unimplemented!("NeuraGraph cannot be used as a layer yet")
    }
 }
 impl<Data> NeuraGraph<Data> {
@ -178,8 +170,9 @@ impl<Data> NeuraGraph<Data> {
    }
 }
-impl<Data: Clone> NeuraLayer<Data> for NeuraGraph<Data> {
+impl<Data: Clone + std::fmt::Debug + 'static> NeuraLayer<Data> for NeuraGraph<Data> {
    type Output = Data;
    type IntermediaryRepr = ();
    fn eval(&self, input: &Data) -> Self::Output {
        let mut buffer = self.create_buffer();
@ -190,6 +183,21 @@ impl<Data: Clone> NeuraLayer<Data> for NeuraGraph<Data> {
            .take()
            .expect("Unreachable: output was not set")
    }
    #[allow(unused)]
    fn eval_training(&self, input: &Data) -> (Self::Output, Self::IntermediaryRepr) {
        unimplemented!("NeuraGraph cannot be used as a trainable layer yet");
    }
    #[allow(unused)]
    fn backprop_layer(
        &self,
        input: &Data,
        intermediary: &Self::IntermediaryRepr,
        epsilon: &Self::Output,
    ) -> Data {
        unimplemented!("NeuraGraph cannot be used as a trainable layer yet");
    }
 }
 #[cfg(test)]
--- a/src/network/graph/node.rs
+++ b/src/network/graph/node.rs
@ -4,7 +4,6 @@ use std::{any::Any, fmt::Debug};
 use crate::{
    algebra::NeuraDynVectorSpace,
    err::NeuraAxisErr,
    layer::{NeuraShapedLayer, NeuraTrainableLayerEval},
    network::residual::{NeuraAxisDefault, NeuraCombineInputs, NeuraSplitInputs},
    prelude::{NeuraPartialLayer, NeuraShape},
 };
@ -81,9 +80,9 @@ impl<Axis, Layer> NeuraGraphNode<Axis, Layer> {
            + 'static,
        Layer: NeuraPartialLayer + Clone + Debug + 'static,
        Layer::Constructed:
-            NeuraTrainableLayerFull<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data>,
+            NeuraLayer<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data>,
        Layer::Err: Debug,
-        <Layer::Constructed as NeuraTrainableLayerEval<
+        <Layer::Constructed as NeuraLayer<
            <Axis as NeuraCombineInputs<Data>>::Combined,
        >>::IntermediaryRepr: 'static,
        <Axis as NeuraCombineInputs<Data>>::Combined: 'static,
@ -97,7 +96,7 @@ impl<Axis, Layer> NeuraGraphNode<Axis, Layer> {
    ) -> &'a Intermediary<Axis::Combined, Layer>
    where
        Axis: NeuraCombineInputs<Data>,
-        Layer: NeuraTrainableLayerFull<Axis::Combined>,
+        Layer: NeuraLayer<Axis::Combined>,
        Axis::Combined: 'static,
    {
        intermediary
@ -106,7 +105,7 @@ impl<Axis, Layer> NeuraGraphNode<Axis, Layer> {
    }
 }
-struct Intermediary<Combined, Layer: NeuraTrainableLayerFull<Combined>>
+struct Intermediary<Combined, Layer: NeuraLayer<Combined>>
 where
    Layer::IntermediaryRepr: 'static,
 {
@ -117,7 +116,7 @@ where
 impl<
        Data: Clone,
        Axis: NeuraSplitInputs<Data> + Clone + Debug,
-        Layer: NeuraTrainableLayerFull<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data>,
+        Layer: NeuraLayer<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data>,
    > NeuraGraphNodeEval<Data> for NeuraGraphNode<Axis, Layer>
 where
    Layer::IntermediaryRepr: 'static,
@ -188,9 +187,9 @@ impl<
        Layer: NeuraPartialLayer + Clone + Debug,
    > NeuraGraphNodePartial<Data> for NeuraGraphNode<Axis, Layer>
 where
-    Layer::Constructed: NeuraTrainableLayerFull<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data>,
+    Layer::Constructed: NeuraLayer<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data>,
    Layer::Err: Debug,
-    <Layer::Constructed as NeuraTrainableLayerEval<<Axis as NeuraCombineInputs<Data>>::Combined>>::IntermediaryRepr: 'static,
+    <Layer::Constructed as NeuraLayer<<Axis as NeuraCombineInputs<Data>>::Combined>>::IntermediaryRepr: 'static,
    <Axis as NeuraCombineInputs<Data>>::Combined: 'static,
 {
    fn inputs<'a>(&'a self) -> &'a [String] {
--- a/src/network/graph/partial.rs
+++ b/src/network/graph/partial.rs
@ -122,7 +122,7 @@ impl<Data> NeuraGraphPartial<Data> {
    }
 }
-impl<Data> NeuraPartialLayer for NeuraGraphPartial<Data> {
+impl<Data: Clone + std::fmt::Debug + 'static> NeuraPartialLayer for NeuraGraphPartial<Data> {
    type Constructed = NeuraGraph<Data>;
    type Err = NeuraGraphErr;
--- a/src/network/residual/construct.rs
+++ b/src/network/residual/construct.rs
@ -78,33 +78,10 @@ where
    }
 }
 impl<Layer, Axis> NeuraShapedLayer for NeuraResidualNode<Layer, (), Axis> {
    #[inline(always)]
    fn output_shape(&self) -> NeuraShape {
        self.output_shape.unwrap()
    }
 }
 impl<Layer, ChildNetwork: NeuraShapedLayer, Axis> NeuraShapedLayer
    for NeuraResidualNode<Layer, ChildNetwork, Axis>
 {
    #[inline(always)]
    fn output_shape(&self) -> NeuraShape {
        self.child_network.output_shape()
    }
 }
 impl<Layers: NeuraShapedLayer> NeuraShapedLayer for NeuraResidual<Layers> {
    #[inline(always)]
    fn output_shape(&self) -> NeuraShape {
        self.layers.output_shape()
    }
 }
 impl<Layers: NeuraResidualConstruct> NeuraPartialLayer for NeuraResidual<Layers>
 where
    // Should always be satisfied:
-    Layers::Constructed: NeuraShapedLayer,
+    Layers::Constructed: NeuraLayerBase,
 {
    type Constructed = NeuraResidual<Layers::Constructed>;
    type Err = Layers::Err;
--- a/src/network/residual/last.rs
+++ b/src/network/residual/last.rs
@ -63,7 +63,13 @@ impl NeuraResidualConstruct for NeuraResidualLast {
    }
 }
-impl NeuraShapedLayer for NeuraResidualLast {
+impl NeuraLayerBase for NeuraResidualLast {
    type Gradient = ();
    fn default_gradient(&self) -> Self::Gradient {
        ()
    }
    fn output_shape(&self) -> NeuraShape {
        self.output_shape
            .expect("Called NeuraResidualLast::output_shape before constructing it")
@ -90,7 +96,7 @@ impl NeuraNetworkRec for NeuraResidualLast {
    #[inline(always)]
    fn merge_gradient(&self, _rec_gradient: (), _layer_gradient: ()) -> Self::Gradient
    where
-        Self::Layer: NeuraTrainableLayerBase,
+        Self::Layer: NeuraLayerBase,
    {
        ()
    }
@ -135,62 +141,24 @@ impl<Data: Clone> NeuraNetwork<NeuraResidualInput<Data>> for NeuraResidualLast {
    }
 }
 impl NeuraTrainableLayerBase for NeuraResidualLast {
    type Gradient = ();
    #[inline(always)]
    fn default_gradient(&self) -> Self::Gradient {
        ()
    }
    #[inline(always)]
    fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
        // Noop
    }
 }
 impl<Data: Clone> NeuraLayer<NeuraResidualInput<Data>> for NeuraResidualLast {
    type Output = Data;
    type IntermediaryRepr = ();
-    fn eval(&self, input: &NeuraResidualInput<Data>) -> Self::Output {
+    fn eval_training(&self, input: &NeuraResidualInput<Data>) -> (Self::Output, ()) {
        let result: Rc<Self::Output> = input.clone().get_first()
            .expect("Invalid NeuraResidual state: network returned no data, did you forget to link the last layer?")
            .into();
-        unwrap_or_clone(result)
+        (unwrap_or_clone(result), ())
    }
 }
-impl<Data: Clone> NeuraTrainableLayerEval<NeuraResidualInput<Data>> for NeuraResidualLast {
+    fn backprop_layer(
    type IntermediaryRepr = ();
    #[inline(always)]
    fn eval_training(
        &self,
        input: &NeuraResidualInput<Data>,
    ) -> (Self::Output, Self::IntermediaryRepr) {
        (self.eval(input), ())
    }
 }
 impl<Data: Clone> NeuraTrainableLayerSelf<NeuraResidualInput<Data>> for NeuraResidualLast {
    #[inline(always)]
    fn regularize_layer(&self) -> Self::Gradient {
        ()
    }
    #[inline(always)]
    fn get_gradient(
        &self,
        _input: &NeuraResidualInput<Data>,
        _intermediary: &Self::IntermediaryRepr,
-        _epsilon: &Self::Output,
+        epsilon: &Self::Output,
-    ) -> Self::Gradient {
+    ) -> NeuraResidualInput<Data> {
-        ()
+        Cow::into_owned(self.map_gradient_out(input, epsilon, epsilon))
    }
 }
 // let epsilon = Rc::new(epsilon.clone());
 // let mut epsilon_residual = NeuraResidualInput::new();
 // epsilon_residual.push(0, epsilon);
--- a/src/network/residual/node.rs
+++ b/src/network/residual/node.rs
@ -1,5 +1,3 @@
 use nalgebra::{DVector, Scalar};
 use num::Float;
 use std::borrow::Cow;
 use crate::network::*;
@ -102,24 +100,6 @@ impl<Layer, ChildNetwork, Axis> NeuraResidualNode<Layer, ChildNetwork, Axis> {
    }
 }
 impl<F: Float + Scalar, Layer, ChildNetwork, Axis> NeuraLayer<NeuraResidualInput<DVector<F>>>
    for NeuraResidualNode<Layer, ChildNetwork, Axis>
 where
    Axis: NeuraCombineInputs<DVector<F>>,
    Layer: NeuraLayer<Axis::Combined, Output = DVector<F>>,
    ChildNetwork: NeuraLayer<NeuraResidualInput<DVector<F>>>,
 {
    type Output = <ChildNetwork as NeuraLayer<NeuraResidualInput<DVector<F>>>>::Output;
    fn eval(&self, input: &NeuraResidualInput<DVector<F>>) -> Self::Output {
        let (layer_input, mut rest) = self.process_input(input);
        self.combine_outputs(self.layer.eval(&layer_input), &mut rest);
        self.child_network.eval(&rest)
    }
 }
 #[allow(dead_code)]
 pub struct NeuraResidualIntermediary<LayerIntermediary, LayerOutput, ChildIntermediary> {
    layer_intermediary: LayerIntermediary,
@ -127,9 +107,18 @@ pub struct NeuraResidualIntermediary<LayerIntermediary, LayerOutput, ChildInterm
    child_intermediary: Box<ChildIntermediary>,
 }
-impl<Layer: NeuraTrainableLayerBase, ChildNetwork: NeuraTrainableLayerBase, Axis>
+impl<
-    NeuraTrainableLayerBase for NeuraResidualNode<Layer, ChildNetwork, Axis>
+        Layer: NeuraLayerBase,
        ChildNetwork: NeuraLayerBase,
        Axis: Clone + std::fmt::Debug + 'static,
    > NeuraLayerBase for NeuraResidualNode<Layer, ChildNetwork, Axis>
 {
    #[inline(always)]
    fn output_shape(&self) -> NeuraShape {
        todo!("output_shape for NeuraResidualNode is not yet ready");
        self.child_network.output_shape()
    }
    type Gradient = (Layer::Gradient, Box<ChildNetwork::Gradient>);
    fn default_gradient(&self) -> Self::Gradient {
@ -148,25 +137,37 @@ impl<Layer: NeuraTrainableLayerBase, ChildNetwork: NeuraTrainableLayerBase, Axis
        self.layer.prepare_layer(is_training);
        self.child_network.prepare_layer(is_training);
    }
    fn regularize_layer(&self) -> Self::Gradient {
        (
            self.layer.regularize_layer(),
            Box::new(self.child_network.regularize_layer()),
        )
    }
 }
-impl<
+impl<Data: Clone + 'static, Layer, ChildNetwork, Axis: Clone + std::fmt::Debug + 'static>
-        Data,
+    NeuraLayer<NeuraResidualInput<Data>> for NeuraResidualNode<Layer, ChildNetwork, Axis>
        Axis: NeuraCombineInputs<Data>,
        Layer: NeuraTrainableLayerEval<Axis::Combined, Output = Data>,
        ChildNetwork: NeuraTrainableLayerEval<NeuraResidualInput<Data>>,
    > NeuraTrainableLayerEval<NeuraResidualInput<Data>>
    for NeuraResidualNode<Layer, ChildNetwork, Axis>
 where
-    NeuraResidualNode<Layer, ChildNetwork, Axis>:
+    Axis: NeuraCombineInputs<Data>,
-        NeuraLayer<NeuraResidualInput<Data>, Output = ChildNetwork::Output>,
+    Layer: NeuraLayer<Axis::Combined, Output = Data>,
    ChildNetwork: NeuraLayer<NeuraResidualInput<Data>>,
 {
    type Output = <ChildNetwork as NeuraLayer<NeuraResidualInput<Data>>>::Output;
    type IntermediaryRepr = NeuraResidualIntermediary<
        Layer::IntermediaryRepr,
        Layer::Output,
        ChildNetwork::IntermediaryRepr,
    >;
    fn eval(&self, input: &NeuraResidualInput<Data>) -> Self::Output {
        let (layer_input, mut rest) = self.process_input(input);
        self.combine_outputs(self.layer.eval(&layer_input), &mut rest);
        self.child_network.eval(&rest)
    }
    fn eval_training(
        &self,
        input: &NeuraResidualInput<Data>,
@ -186,25 +187,6 @@ where
        (output, intermediary)
    }
 }
 impl<
        Data,
        Axis: NeuraCombineInputs<Data>,
        Layer: NeuraTrainableLayerSelf<Axis::Combined, Output = Data>,
        ChildNetwork: NeuraTrainableLayerSelf<NeuraResidualInput<Data>>,
    > NeuraTrainableLayerSelf<NeuraResidualInput<Data>>
    for NeuraResidualNode<Layer, ChildNetwork, Axis>
 where
    NeuraResidualNode<Layer, ChildNetwork, Axis>:
        NeuraLayer<NeuraResidualInput<Data>, Output = ChildNetwork::Output>,
 {
    fn regularize_layer(&self) -> Self::Gradient {
        (
            self.layer.regularize_layer(),
            Box::new(self.child_network.regularize_layer()),
        )
    }
    #[allow(unused)]
    fn get_gradient(
@ -213,7 +195,17 @@ where
        intermediary: &Self::IntermediaryRepr,
        epsilon: &Self::Output,
    ) -> Self::Gradient {
-        unimplemented!("NeuraResidualNode::get_gradient is not yet implemented, sorry");
+        unimplemented!("NeuraResidualNode::get_gradient is not yet implemented");
    }
    #[allow(unused)]
    fn backprop_layer(
        &self,
        input: &NeuraResidualInput<Data>,
        intermediary: &Self::IntermediaryRepr,
        epsilon: &Self::Output,
    ) -> NeuraResidualInput<Data> {
        unimplemented!("NeuraResidualNode::backprop_layer is not yet implemented");
    }
 }
@ -225,8 +217,11 @@ impl<Axis, Layer, ChildNetwork> NeuraNetworkBase for NeuraResidualNode<Layer, Ch
    }
 }
-impl<Axis, Layer: NeuraTrainableLayerBase, ChildNetwork: NeuraTrainableLayerBase> NeuraNetworkRec
+impl<
-    for NeuraResidualNode<Layer, ChildNetwork, Axis>
+        Axis: Clone + std::fmt::Debug + 'static,
        Layer: NeuraLayerBase,
        ChildNetwork: NeuraLayerBase,
    > NeuraNetworkRec for NeuraResidualNode<Layer, ChildNetwork, Axis>
 {
    type NextNode = ChildNetwork;
@ -236,8 +231,8 @@ impl<Axis, Layer: NeuraTrainableLayerBase, ChildNetwork: NeuraTrainableLayerBase
    fn merge_gradient(
        &self,
-        rec_gradient: <Self::NextNode as NeuraTrainableLayerBase>::Gradient,
+        rec_gradient: <Self::NextNode as NeuraLayerBase>::Gradient,
-        layer_gradient: <Self::Layer as NeuraTrainableLayerBase>::Gradient,
+        layer_gradient: <Self::Layer as NeuraLayerBase>::Gradient,
    ) -> Self::Gradient {
        (layer_gradient, Box::new(rec_gradient))
    }
--- a/src/network/residual/wrapper.rs
+++ b/src/network/residual/wrapper.rs
@ -1,6 +1,6 @@
 use std::borrow::Cow;
-use crate::network::*;
+use crate::{network::*, utils::unwrap_or_clone};
 use super::*;
@ -45,18 +45,48 @@ impl<Layers> NeuraResidual<Layers> {
    }
 }
-impl<Input: Clone, Layers> NeuraLayer<Input> for NeuraResidual<Layers>
+impl<Data: Clone, Layers> NeuraLayer<Data> for NeuraResidual<Layers>
 where
-    Layers: NeuraLayer<NeuraResidualInput<Input>>,
+    Layers: NeuraLayer<NeuraResidualInput<Data>>,
 {
    type Output = Layers::Output;
    type IntermediaryRepr = Layers::IntermediaryRepr;
-    fn eval(&self, input: &Input) -> Self::Output {
+    fn eval(&self, input: &Data) -> Self::Output {
        self.layers.eval(&self.input_to_residual_input(input))
    }
    fn eval_training(&self, input: &Data) -> (Self::Output, Self::IntermediaryRepr) {
        self.layers
            .eval_training(&self.input_to_residual_input(input))
    }
    fn get_gradient(
        &self,
        input: &Data,
        intermediary: &Self::IntermediaryRepr,
        epsilon: &Self::Output,
    ) -> Self::Gradient {
        self.layers
            .get_gradient(&self.input_to_residual_input(input), intermediary, &epsilon)
    }
    fn backprop_layer(
        &self,
        input: &Data,
        intermediary: &Self::IntermediaryRepr,
        epsilon: &Self::Output,
    ) -> Data {
        unwrap_or_clone(
            self.layers
                .backprop_layer(&self.input_to_residual_input(input), intermediary, &epsilon)
                .get_first()
                .unwrap(),
        )
    }
 }
-impl<Layers: NeuraTrainableLayerBase> NeuraTrainableLayerBase for NeuraResidual<Layers> {
+impl<Layers: NeuraLayerBase> NeuraLayerBase for NeuraResidual<Layers> {
    type Gradient = Layers::Gradient;
    #[inline(always)]
@ -68,34 +98,13 @@ impl<Layers: NeuraTrainableLayerBase> NeuraTrainableLayerBase for NeuraResidual<
    fn apply_gradient(&mut self, gradient: &Self::Gradient) {
        self.layers.apply_gradient(gradient);
    }
 }
 impl<Data: Clone, Layers: NeuraTrainableLayerEval<NeuraResidualInput<Data>>>
    NeuraTrainableLayerEval<Data> for NeuraResidual<Layers>
 {
    type IntermediaryRepr = Layers::IntermediaryRepr;
    fn eval_training(&self, input: &Data) -> (Self::Output, Self::IntermediaryRepr) {
        self.layers
            .eval_training(&self.input_to_residual_input(input))
    }
 }
 impl<Data: Clone, Layers: NeuraTrainableLayerSelf<NeuraResidualInput<Data>>>
    NeuraTrainableLayerSelf<Data> for NeuraResidual<Layers>
 {
    fn regularize_layer(&self) -> Self::Gradient {
        self.layers.regularize_layer()
    }
-    fn get_gradient(
+    fn output_shape(&self) -> NeuraShape {
-        &self,
+        self.layers.output_shape()
        input: &Data,
        intermediary: &Self::IntermediaryRepr,
        epsilon: &Self::Output,
    ) -> Self::Gradient {
        self.layers
            .get_gradient(&self.input_to_residual_input(input), intermediary, &epsilon)
    }
 }
@ -108,7 +117,7 @@ impl<Layers> NeuraNetworkBase for NeuraResidual<Layers> {
    }
 }
-impl<Layers: NeuraTrainableLayerBase> NeuraNetworkRec for NeuraResidual<Layers> {
+impl<Layers: NeuraLayerBase> NeuraNetworkRec for NeuraResidual<Layers> {
    type NextNode = Layers;
    #[inline(always)]
@ -119,8 +128,8 @@ impl<Layers: NeuraTrainableLayerBase> NeuraNetworkRec for NeuraResidual<Layers>
    #[inline(always)]
    fn merge_gradient(
        &self,
-        rec_gradient: <Self::NextNode as NeuraTrainableLayerBase>::Gradient,
+        rec_gradient: <Self::NextNode as NeuraLayerBase>::Gradient,
-        _layer_gradient: <Self::Layer as NeuraTrainableLayerBase>::Gradient,
+        _layer_gradient: <Self::Layer as NeuraLayerBase>::Gradient,
    ) -> Self::Gradient {
        rec_gradient
    }
--- a/src/network/sequential/construct.rs
+++ b/src/network/sequential/construct.rs
@ -1,4 +1,4 @@
-use crate::{err::NeuraRecursiveErr, layer::NeuraShapedLayer};
+use crate::err::NeuraRecursiveErr;
 use super::*;
@ -39,19 +39,3 @@ impl<Layer: NeuraPartialLayer, ChildNetwork: NeuraPartialLayer> NeuraPartialLaye
        })
    }
 }
 impl<Layer: NeuraShapedLayer> NeuraShapedLayer for NeuraSequential<Layer, ()> {
    #[inline(always)]
    fn output_shape(&self) -> NeuraShape {
        self.layer.output_shape()
    }
 }
 impl<Layer, ChildNetwork: NeuraShapedLayer> NeuraShapedLayer
    for NeuraSequential<Layer, ChildNetwork>
 {
    #[inline(always)]
    fn output_shape(&self) -> NeuraShape {
        self.child_network.output_shape()
    }
 }
--- a/src/network/sequential/layer_impl.rs
+++ b/src/network/sequential/layer_impl.rs
@ -1,19 +1,22 @@
 use super::*;
-use crate::layer::{NeuraTrainableLayerBackprop, NeuraTrainableLayerEval};
+use crate::layer::{NeuraLayer, NeuraLayerBase};
-impl<Input, Layer: NeuraLayer<Input>, ChildNetwork: NeuraLayer<Layer::Output>> NeuraLayer<Input>
+// impl<Layer: NeuraLayerBase> NeuraLayerBase for NeuraSequential<Layer, ()> {
 //     #[inline(always)]
 //     fn output_shape(&self) -> NeuraShape {
 //         self.layer.output_shape()
 //     }
 // }
 impl<Layer: NeuraLayerBase, ChildNetwork: NeuraLayerBase> NeuraLayerBase
    for NeuraSequential<Layer, ChildNetwork>
 {
-    type Output = ChildNetwork::Output;
+    #[inline(always)]
-
+    fn output_shape(&self) -> NeuraShape {
-    fn eval(&self, input: &Input) -> Self::Output {
+        todo!("Have output_shape return Option");
-        self.child_network.eval(&self.layer.eval(input))
+        self.child_network.output_shape()
    }
 }
 impl<Layer: NeuraTrainableLayerBase, ChildNetwork: NeuraTrainableLayerBase> NeuraTrainableLayerBase
    for NeuraSequential<Layer, ChildNetwork>
 {
    type Gradient = (Layer::Gradient, Box<ChildNetwork::Gradient>);
    fn default_gradient(&self) -> Self::Gradient {
@ -32,16 +35,25 @@ impl<Layer: NeuraTrainableLayerBase, ChildNetwork: NeuraTrainableLayerBase> Neur
        self.layer.apply_gradient(&gradient.0);
        self.child_network.apply_gradient(&gradient.1);
    }
    fn regularize_layer(&self) -> Self::Gradient {
        (
            self.layer.regularize_layer(),
            Box::new(self.child_network.regularize_layer()),
        )
    }
 }
-impl<
+impl<Input, Layer: NeuraLayer<Input>, ChildNetwork: NeuraLayer<Layer::Output>> NeuraLayer<Input>
-        Input,
+    for NeuraSequential<Layer, ChildNetwork>
        Layer: NeuraTrainableLayerEval<Input>,
        ChildNetwork: NeuraTrainableLayerEval<Layer::Output>,
    > NeuraTrainableLayerEval<Input> for NeuraSequential<Layer, ChildNetwork>
 {
    type Output = ChildNetwork::Output;
    type IntermediaryRepr = (Layer::IntermediaryRepr, Box<ChildNetwork::IntermediaryRepr>);
    fn eval(&self, input: &Input) -> Self::Output {
        self.child_network.eval(&self.layer.eval(input))
    }
    fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr) {
        let (layer_output, layer_intermediary) = self.layer.eval_training(input);
        let (child_output, child_intermediary) = self.child_network.eval_training(&layer_output);
@ -51,20 +63,6 @@ impl<
            (layer_intermediary, Box::new(child_intermediary)),
        )
    }
 }
 impl<
        Input,
        Layer: NeuraTrainableLayerSelf<Input>,
        ChildNetwork: NeuraTrainableLayerSelf<Layer::Output> + NeuraTrainableLayerBackprop<Layer::Output>,
    > NeuraTrainableLayerSelf<Input> for NeuraSequential<Layer, ChildNetwork>
 {
    fn regularize_layer(&self) -> Self::Gradient {
        (
            self.layer.regularize_layer(),
            Box::new(self.child_network.regularize_layer()),
        )
    }
    fn get_gradient(
        &self,
@ -74,14 +72,7 @@ impl<
    ) -> Self::Gradient {
        unimplemented!("NeuraSequential::get_gradient is not yet implemented, sorry");
    }
 }
 impl<
        Input,
        Layer: NeuraTrainableLayerBackprop<Input>,
        ChildNetwork: NeuraTrainableLayerBackprop<Layer::Output>,
    > NeuraTrainableLayerBackprop<Input> for NeuraSequential<Layer, ChildNetwork>
 {
    fn backprop_layer(
        &self,
        input: &Input,
--- a/src/network/sequential/mod.rs
+++ b/src/network/sequential/mod.rs
@ -1,10 +1,7 @@
 use std::borrow::Cow;
 use super::*;
-use crate::layer::{
+use crate::layer::{NeuraLayer, NeuraLayerBase, NeuraPartialLayer, NeuraShape};
    NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayerBase, NeuraTrainableLayerEval,
    NeuraTrainableLayerSelf,
 };
 mod construct;
 mod layer_impl;
@ -29,7 +26,7 @@ pub use tail::*;
 /// ## Notes on implemented traits
 ///
 /// The different implementations for `NeuraTrainableNetwork`,
-/// `NeuraLayer` and `NeuraTrainableLayerBase` each require that `ChildNetwork` implements those respective traits,
+/// `NeuraLayer` and `NeuraLayerBase` each require that `ChildNetwork` implements those respective traits,
 /// and that the output type of `Layer` matches the input type of `ChildNetwork`.
 ///
 /// If a method, like `eval`, is reported as missing,
@ -96,7 +93,7 @@ impl<Layer, ChildNetwork> NeuraNetworkBase for NeuraSequential<Layer, ChildNetwo
    }
 }
-impl<Layer: NeuraTrainableLayerBase, ChildNetwork: NeuraTrainableLayerBase> NeuraNetworkRec
+impl<Layer: NeuraLayerBase, ChildNetwork: NeuraLayerBase> NeuraNetworkRec
    for NeuraSequential<Layer, ChildNetwork>
 {
    type NextNode = ChildNetwork;
@ -107,14 +104,14 @@ impl<Layer: NeuraTrainableLayerBase, ChildNetwork: NeuraTrainableLayerBase> Neur
    fn merge_gradient(
        &self,
-        rec_gradient: <Self::NextNode as NeuraTrainableLayerBase>::Gradient,
+        rec_gradient: <Self::NextNode as NeuraLayerBase>::Gradient,
-        layer_gradient: <Self::Layer as NeuraTrainableLayerBase>::Gradient,
+        layer_gradient: <Self::Layer as NeuraLayerBase>::Gradient,
    ) -> Self::Gradient {
        (layer_gradient, Box::new(rec_gradient))
    }
 }
-impl<Input: Clone, Layer: NeuraTrainableLayerEval<Input>, ChildNetwork> NeuraNetwork<Input>
+impl<Input: Clone, Layer: NeuraLayer<Input>, ChildNetwork> NeuraNetwork<Input>
    for NeuraSequential<Layer, ChildNetwork>
 where
    Layer::Output: Clone,
--- a/src/network/traits.rs
+++ b/src/network/traits.rs
@ -46,18 +46,18 @@ where
    ) -> Cow<'a, NodeInput>;
 }
-pub trait NeuraNetworkRec: NeuraNetworkBase + NeuraTrainableLayerBase {
+pub trait NeuraNetworkRec: NeuraNetworkBase + NeuraLayerBase {
    /// The type of the children network, it does not need to implement `NeuraNetworkBase`,
    /// although many functions will expect it to be either `()` or an implementation of `NeuraNetworkRec`.
-    type NextNode: NeuraTrainableLayerBase;
+    type NextNode: NeuraLayerBase;
    fn get_next(&self) -> &Self::NextNode;
    fn merge_gradient(
        &self,
-        rec_gradient: <Self::NextNode as NeuraTrainableLayerBase>::Gradient,
+        rec_gradient: <Self::NextNode as NeuraLayerBase>::Gradient,
-        layer_gradient: <Self::Layer as NeuraTrainableLayerBase>::Gradient,
+        layer_gradient: <Self::Layer as NeuraLayerBase>::Gradient,
    ) -> Self::Gradient
    where
-        Self::Layer: NeuraTrainableLayerBase;
+        Self::Layer: NeuraLayerBase;
 }
--- a/src/train.rs
+++ b/src/train.rs
@ -70,7 +70,7 @@ impl NeuraBatchedTrainer {
    pub fn train<
        Input: Clone,
        Target: Clone,
-        Network: NeuraTrainableLayerBase + NeuraTrainableLayerSelf<Input>,
+        Network: NeuraLayer<Input>,
        GradientSolver: NeuraGradientSolver<Input, Target, Network>,
        Inputs: IntoIterator<Item = (Input, Target)>,
    >(