From a5237a8ef15527891073c6424aa01aef28c4f13a Mon Sep 17 00:00:00 2001
From: Adrien Burgun <adrien.burgun@orange.fr>
Date: Fri, 21 Apr 2023 16:08:36 +0200
Subject: [PATCH] :sparkles: Inefficient but working forward-forward
 implementation

---
 examples/bivariate-forward.rs          | 137 ++++++++++++++++++
 examples/bivariate.rs                  |  21 +--
 examples/xor.rs                        |   2 +-
 src/gradient_solver/backprop.rs        |   3 +-
 src/gradient_solver/forward_forward.rs | 191 +++++++++++++++++++++++++
 src/gradient_solver/mod.rs             |   3 +
 src/layer/mod.rs                       |   5 +
 src/layer/normalize.rs                 | 116 +++++++++++++++
 src/lib.rs                             |   4 +-
 src/network/mod.rs                     |   4 +-
 src/network/sequential/mod.rs          |   6 +-
 src/train.rs                           |   7 +-
 12 files changed, 479 insertions(+), 20 deletions(-)
 create mode 100644 examples/bivariate-forward.rs
 create mode 100644 src/gradient_solver/forward_forward.rs
 create mode 100644 src/layer/normalize.rs

diff --git a/examples/bivariate-forward.rs b/examples/bivariate-forward.rs
new file mode 100644
index 0000000..9fde389
--- /dev/null
+++ b/examples/bivariate-forward.rs
@@ -0,0 +1,137 @@
+#![feature(generic_arg_infer)]
+
+use nalgebra::{dvector, DVector};
+#[allow(unused_imports)]
+use neuramethyst::derivable::activation::{LeakyRelu, Linear, Relu, Tanh};
+use neuramethyst::derivable::regularize::NeuraL1;
+use neuramethyst::gradient_solver::NeuraForwardForward;
+use neuramethyst::prelude::*;
+
+use rand::Rng;
+
+fn main() {
+    let mut network = neura_sequential![
+        neura_layer!("dense", 10).regularization(NeuraL1(0.001)),
+        neura_layer!("dropout", 0.25),
+        neura_layer!("normalize"),
+        neura_layer!("dense", 6).regularization(NeuraL1(0.001)),
+    ]
+    .construct(NeuraShape::Vector(3))
+    .unwrap();
+
+    let inputs = (0..1).cycle().map(move |_| {
+        let mut rng = rand::thread_rng();
+        let category = rng.gen_bool(0.5);
+        let good = rng.gen_bool(0.5);
+        let (x, y) = if category {
+            let radius: f32 = rng.gen_range(0.0..2.0);
+            let angle = rng.gen_range(0.0..std::f32::consts::TAU);
+            (angle.cos() * radius, angle.sin() * radius)
+        } else {
+            let radius: f32 = rng.gen_range(3.0..5.0);
+            let angle = rng.gen_range(0.0..std::f32::consts::TAU);
+            (angle.cos() * radius, angle.sin() * radius)
+        };
+
+        if good {
+            (dvector![x, y, category as u8 as f32], true)
+        } else {
+            (dvector![x, y, 1.0 - category as u8 as f32], false)
+        }
+    });
+
+    let test_inputs: Vec<_> = inputs.clone().filter(|(_, good)| *good).take(10).collect();
+    let threshold = 0.25f32;
+
+    if std::env::args().any(|arg| arg == "draw") {
+        for epoch in 0..200 {
+            let mut trainer = NeuraBatchedTrainer::new(0.03, 10);
+            trainer.batch_size = 10;
+
+            trainer.train(
+                &NeuraForwardForward::new(Tanh, threshold as f64),
+                &mut network,
+                inputs.clone(),
+                &test_inputs,
+            );
+
+            // let network = network.clone().trim_tail().trim_tail();
+            draw_neuron_activation(
+                |input| {
+                    let cat0 = network.eval(&dvector![input[0] as f32, input[1] as f32, 0.0]);
+                    let cat1 = network.eval(&dvector![input[0] as f32, input[1] as f32, 1.0]);
+
+                    let cat0_good = cat0.map(|x| x * x).sum();
+                    let cat1_good = cat1.map(|x| x * x).sum();
+                    let estimation = cat1_good / (cat0_good + cat1_good);
+
+                    let cat0_norm = cat0 / cat0_good.sqrt();
+                    let mut cat0_rgb = DVector::from_element(3, 0.0);
+
+                    for i in 0..cat0_norm.len() {
+                        cat0_rgb[i % 3] += cat0_norm[i].abs();
+                    }
+
+                    (cat0_rgb * estimation)
+                        .into_iter()
+                        .map(|x| *x as f64)
+                        .collect()
+                },
+                6.0,
+            );
+            println!("{}", epoch);
+
+            std::thread::sleep(std::time::Duration::new(0, 50_000_000));
+        }
+    } else {
+        let mut trainer = NeuraBatchedTrainer::new(0.03, 20 * 50);
+        trainer.batch_size = 10;
+        trainer.log_iterations = 20;
+
+        trainer.train(
+            &NeuraForwardForward::new(Tanh, threshold as f64),
+            &mut network,
+            inputs.clone(),
+            &test_inputs,
+        );
+
+        // println!("{}", String::from("\n").repeat(64));
+        // draw_neuron_activation(|input| network.eval(&input).into_iter().collect(), 6.0);
+    }
+}
+
+// TODO: move this to the library?
+fn draw_neuron_activation<F: Fn([f64; 2]) -> Vec<f64>>(callback: F, scale: f64) {
+    use viuer::Config;
+
+    const WIDTH: u32 = 64;
+    const HEIGHT: u32 = 64;
+
+    let mut image = image::RgbImage::new(WIDTH, HEIGHT);
+
+    fn sigmoid(x: f64) -> f64 {
+        0.1 + 0.9 * x.abs().powf(0.8)
+    }
+
+    for y in 0..HEIGHT {
+        let y2 = 2.0 * y as f64 / HEIGHT as f64 - 1.0;
+        for x in 0..WIDTH {
+            let x2 = 2.0 * x as f64 / WIDTH as f64 - 1.0;
+            let activation = callback([x2 * scale, y2 * scale]);
+            let r = (sigmoid(activation.get(0).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
+            let g = (sigmoid(activation.get(1).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
+            let b = (sigmoid(activation.get(2).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
+
+            *image.get_pixel_mut(x, y) = image::Rgb([r, g, b]);
+        }
+    }
+
+    let config = Config {
+        use_kitty: false,
+        truecolor: true,
+        // absolute_offset: false,
+        ..Default::default()
+    };
+
+    viuer::print(&image::DynamicImage::ImageRgb8(image), &config).unwrap();
+}
diff --git a/examples/bivariate.rs b/examples/bivariate.rs
index 10da1af..82fd87b 100644
--- a/examples/bivariate.rs
+++ b/examples/bivariate.rs
@@ -47,20 +47,24 @@ fn main() {
             trainer.batch_size = 10;
 
             trainer.train(
-                NeuraBackprop::new(CrossEntropy),
+                &NeuraBackprop::new(CrossEntropy),
                 &mut network,
                 inputs.clone(),
                 &test_inputs,
             );
 
-            let network = network.clone();
+            let network_trimmed = network.clone().trim_tail().trim_tail();
             draw_neuron_activation(
                 |input| {
-                    network
-                        .eval(&dvector![input[0] as f32, input[1] as f32])
+                    let output = network.eval(&dvector![input[0] as f32, input[1] as f32]);
+                    let estimation = output[0] / (output[0] + output[1]);
+
+                    let color = network_trimmed.eval(&dvector![input[0] as f32, input[1] as f32]);
+
+                    (&color / color.map(|x| x * x).sum() * estimation)
                         .into_iter()
-                        .map(|x| *x as f64)
-                        .collect()
+                        .map(|x| x.abs() as f64)
+                        .collect::<Vec<_>>()
                 },
                 6.0,
             );
@@ -74,7 +78,7 @@ fn main() {
         trainer.log_iterations = 20;
 
         trainer.train(
-            NeuraBackprop::new(CrossEntropy),
+            &NeuraBackprop::new(CrossEntropy),
             &mut network,
             inputs.clone(),
             &test_inputs,
@@ -101,7 +105,7 @@ fn draw_neuron_activation<F: Fn([f64; 2]) -> Vec<f64>>(callback: F, scale: f64)
     let mut image = image::RgbImage::new(WIDTH, HEIGHT);
 
     fn sigmoid(x: f64) -> f64 {
-        1.0 / (1.0 + (-x * 3.0).exp())
+        1.9 / (1.0 + (-x * 3.0).exp()) - 0.9
     }
 
     for y in 0..HEIGHT {
@@ -119,6 +123,7 @@ fn draw_neuron_activation<F: Fn([f64; 2]) -> Vec<f64>>(callback: F, scale: f64)
 
     let config = Config {
         use_kitty: false,
+        truecolor: true,
         // absolute_offset: false,
         ..Default::default()
     };
diff --git a/examples/xor.rs b/examples/xor.rs
index 6657de1..8d4d7c3 100644
--- a/examples/xor.rs
+++ b/examples/xor.rs
@@ -38,7 +38,7 @@ fn main() {
     trainer.learning_momentum = 0.01;
 
     trainer.train(
-        NeuraBackprop::new(Euclidean),
+        &NeuraBackprop::new(Euclidean),
         &mut network,
         cycle_shuffling(inputs.iter().cloned(), rand::thread_rng()),
         &inputs,
diff --git a/src/gradient_solver/backprop.rs b/src/gradient_solver/backprop.rs
index f924f1f..d53ebf2 100644
--- a/src/gradient_solver/backprop.rs
+++ b/src/gradient_solver/backprop.rs
@@ -1,10 +1,9 @@
 use num::ToPrimitive;
 
-use crate::{network::NeuraTrainableNetworkBase, derivable::NeuraLoss, layer::NeuraTrainableLayer};
+use crate::{derivable::NeuraLoss, layer::NeuraTrainableLayer, network::NeuraTrainableNetworkBase};
 
 use super::*;
 
-
 pub struct NeuraBackprop<Loss> {
     loss: Loss,
 }
diff --git a/src/gradient_solver/forward_forward.rs b/src/gradient_solver/forward_forward.rs
new file mode 100644
index 0000000..76c8b70
--- /dev/null
+++ b/src/gradient_solver/forward_forward.rs
@@ -0,0 +1,191 @@
+use nalgebra::{DVector, Scalar};
+use num::{traits::NumAssignOps, Float, ToPrimitive};
+
+use crate::derivable::NeuraDerivable;
+
+use super::*;
+
+// TODO: add `max_depth: usize`
+pub struct NeuraForwardForward<Act> {
+    threshold: f64,
+    activation: Act,
+}
+
+impl<Act: Clone + NeuraDerivable<f64>> NeuraForwardForward<Act> {
+    pub fn new(activation: Act, threshold: f64) -> Self {
+        Self {
+            threshold,
+            activation,
+        }
+    }
+}
+
+struct NeuraForwardPair<Act> {
+    threshold: f64,
+    maximize: bool,
+    activation: Act,
+}
+
+impl<
+        F,
+        Act: Clone + NeuraDerivable<f64>,
+        Input,
+        Trainable: NeuraTrainableNetwork<Input, NeuraForwardPair<Act>, Output = DVector<F>>,
+    > NeuraGradientSolver<Input, bool, Trainable> for NeuraForwardForward<Act>
+where
+    F: ToPrimitive,
+{
+    fn get_gradient(
+        &self,
+        trainable: &Trainable,
+        input: &Input,
+        target: &bool,
+    ) -> Trainable::Gradient {
+        let target = *target;
+
+        trainable.traverse(
+            input,
+            &NeuraForwardPair {
+                threshold: self.threshold,
+                maximize: target,
+                activation: self.activation.clone(),
+            },
+        )
+    }
+
+    fn score(&self, trainable: &Trainable, input: &Input, target: &bool) -> f64 {
+        let output = trainable.eval(input);
+        let goodness = output
+            .iter()
+            .map(|x| x.to_f64().unwrap())
+            .reduce(|acc, x| acc + x * x)
+            .unwrap_or(0.0);
+        let goodness = goodness - self.threshold;
+        let goodness = self.activation.eval(goodness);
+
+        // Try to normalize the goodness so that if `Act([-threshold; +inf]) = [α; 1]`, `goodness ∈ [0; 1]`
+        let activation_zero = self.activation.eval(-self.threshold);
+        const EPSILON: f64 = 0.01;
+        let goodness = if activation_zero < 1.0 - EPSILON {
+            (goodness - activation_zero) / (1.0 - activation_zero)
+        } else {
+            goodness
+        };
+
+        if *target {
+            1.0 - goodness
+        } else {
+            goodness
+        }
+    }
+}
+
+impl<Act> NeuraGradientSolverBase for NeuraForwardPair<Act> {
+    type Output<NetworkInput, NetworkGradient> = NetworkGradient;
+}
+
+impl<Act, LayerOutput> NeuraGradientSolverFinal<LayerOutput> for NeuraForwardPair<Act> {
+    fn eval_final(&self, _output: LayerOutput) -> Self::Output<LayerOutput, ()> {
+        ()
+    }
+}
+
+impl<F: Float + Scalar + NumAssignOps, Act: NeuraDerivable<F>>
+    NeuraGradientSolverTransient<DVector<F>> for NeuraForwardPair<Act>
+{
+    fn eval_layer<
+        Input,
+        NetworkGradient,
+        RecGradient,
+        Layer: NeuraTrainableLayer<Input, Output = DVector<F>>,
+    >(
+        &self,
+        layer: &Layer,
+        input: &Input,
+        rec_gradient: RecGradient,
+        combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient,
+    ) -> Self::Output<Input, NetworkGradient> {
+        let output = layer.eval(input);
+        let goodness = output
+            .iter()
+            .copied()
+            .reduce(|acc, x| acc + x * x)
+            .unwrap_or(F::zero());
+        let goodness = if self.maximize {
+            goodness - F::from(self.threshold).unwrap()
+        } else {
+            F::from(self.threshold).unwrap() - goodness
+        };
+        // We skip self.activation.eval(goodness)
+
+        let two = F::from(2.0).unwrap();
+
+        // The original formula does not have a 1/2 term,
+        // so we must multiply by 2
+        let mut goodness_derivative = output * (two * self.activation.derivate(goodness));
+
+        if self.maximize {
+            goodness_derivative = -goodness_derivative;
+        }
+
+        // TODO: split backprop_layer into eval_training, get_gradient and get_backprop
+        let (_, layer_gradient) = layer.backprop_layer(input, goodness_derivative);
+
+        combine_gradients(layer_gradient, rec_gradient)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use rand::Rng;
+
+    use super::*;
+    use crate::{
+        derivable::activation::{Relu, Tanh},
+        prelude::*,
+        utils::uniform_vector,
+    };
+
+    #[test]
+    fn test_forward_forward() {
+        let network = neura_sequential![
+            neura_layer!("dense", 10).activation(Relu),
+            neura_layer!("normalize"),
+            neura_layer!("dense", 4).activation(Relu),
+            neura_layer!("normalize"),
+            neura_layer!("dense", 1),
+        ]
+        .construct(NeuraShape::Vector(10))
+        .unwrap();
+
+        let fwd_solver = NeuraForwardForward::new(Tanh, 0.25);
+
+        fwd_solver.get_gradient(&network, &uniform_vector(10).map(|x| x as f32), &true);
+    }
+
+    #[test]
+    fn test_forward_forward_train() {
+        let mut network = neura_sequential![
+            neura_layer!("dense", 5).activation(Relu),
+            neura_layer!("normalize"),
+            neura_layer!("dense", 3).activation(Relu),
+            neura_layer!("normalize"),
+            neura_layer!("dense", 1)
+        ]
+        .construct(NeuraShape::Vector(4))
+        .unwrap();
+
+        let solver = NeuraForwardForward::new(Tanh, 0.25);
+
+        let trainer = NeuraBatchedTrainer::new(0.01, 20);
+
+        let inputs = (0..1).cycle().map(|_| {
+            let mut rng = rand::thread_rng();
+            (uniform_vector(4).map(|x| x as f32), rng.gen_bool(0.5))
+        });
+
+        let test_inputs: Vec<_> = inputs.clone().take(10).collect();
+
+        trainer.train(&solver, &mut network, inputs, test_inputs.as_slice());
+    }
+}
diff --git a/src/gradient_solver/mod.rs b/src/gradient_solver/mod.rs
index 83bc595..3f68076 100644
--- a/src/gradient_solver/mod.rs
+++ b/src/gradient_solver/mod.rs
@@ -1,6 +1,9 @@
 mod backprop;
 pub use backprop::NeuraBackprop;
 
+mod forward_forward;
+pub use forward_forward::NeuraForwardForward;
+
 use crate::{
     layer::NeuraTrainableLayer,
     network::{NeuraTrainableNetwork, NeuraTrainableNetworkBase},
diff --git a/src/layer/mod.rs b/src/layer/mod.rs
index 672ee00..10cc623 100644
--- a/src/layer/mod.rs
+++ b/src/layer/mod.rs
@@ -2,6 +2,7 @@ use crate::algebra::NeuraVectorSpace;
 
 pub mod dense;
 pub mod dropout;
+pub mod normalize;
 pub mod softmax;
 
 #[derive(Clone, Copy, PartialEq, Debug)]
@@ -124,4 +125,8 @@ macro_rules! neura_layer {
     ( "softmax" ) => {
         $crate::layer::softmax::NeuraSoftmaxLayer::new()
     };
+
+    ( "normalize" ) => {
+        $crate::layer::normalize::NeuraNormalizeLayer::new()
+    };
 }
diff --git a/src/layer/normalize.rs b/src/layer/normalize.rs
new file mode 100644
index 0000000..291657e
--- /dev/null
+++ b/src/layer/normalize.rs
@@ -0,0 +1,116 @@
+use nalgebra::{DVector, Scalar};
+use num::{traits::NumAssignOps, Float};
+
+use super::*;
+
+/// A layer that normalizes and centers its input, as follows:
+///
+/// ```no_rust
+/// μ = sum_i(x_i) / n
+/// σ = sum_i((x_i - μ)^2) / n
+/// y_i = (x_i - μ) / σ
+/// ```
+#[derive(Debug, Clone, Copy)]
+pub struct NeuraNormalizeLayer {
+    shape: NeuraShape,
+}
+
+impl NeuraNormalizeLayer {
+    pub fn new() -> Self {
+        Self {
+            shape: NeuraShape::Vector(0),
+        }
+    }
+}
+
+impl NeuraPartialLayer for NeuraNormalizeLayer {
+    type Constructed = NeuraNormalizeLayer;
+
+    type Err = ();
+
+    fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
+        Ok(Self { shape: input_shape })
+    }
+
+    fn output_shape(constructed: &Self::Constructed) -> NeuraShape {
+        constructed.shape
+    }
+}
+
+impl<F: Float + Scalar> NeuraLayer<DVector<F>> for NeuraNormalizeLayer {
+    type Output = DVector<F>;
+
+    fn eval(&self, input: &DVector<F>) -> Self::Output {
+        let (mean, variance, _) = mean_variance(input);
+        let stddev = F::sqrt(variance);
+
+        let mut output = input.clone();
+
+        for item in &mut output {
+            *item = (*item - mean) / stddev;
+        }
+
+        output
+    }
+}
+
+impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayer<DVector<F>> for NeuraNormalizeLayer {
+    type Gradient = ();
+
+    fn backprop_layer(
+        &self,
+        input: &DVector<F>,
+        epsilon: Self::Output,
+    ) -> (DVector<F>, Self::Gradient) {
+        let (mean, variance, len) = mean_variance(input);
+        let stddev = F::sqrt(variance);
+        let input_centered = input.clone().map(|x| x - mean);
+
+        let mut jacobian_partial = &input_centered * input_centered.transpose();
+        jacobian_partial /= -variance * (stddev * len);
+        // Apply the 1/σ * dμ/dx_i term
+        for value in jacobian_partial.iter_mut() {
+            *value += F::one() / (stddev * len);
+        }
+
+        let mut epsilon_out = jacobian_partial * &epsilon;
+
+        // Apply the δ_{ik}/σ term
+        for i in 0..epsilon_out.len() {
+            epsilon_out[i] += epsilon[i] / stddev;
+        }
+
+        (epsilon_out, ())
+    }
+
+    fn default_gradient(&self) -> Self::Gradient {
+        ()
+    }
+
+    fn regularize_layer(&self) -> Self::Gradient {
+        ()
+    }
+
+    fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
+        // Noop
+    }
+}
+
+fn mean_variance<'a, F: Float + Scalar>(input: impl IntoIterator<Item = &'a F>) -> (F, F, F) {
+    // Quickly compute mean and variance in one pass
+    let mut count = 0;
+    let mut sum = F::zero();
+    let mut sum_squared = F::zero();
+
+    for &value in input.into_iter() {
+        count += 1;
+        sum = sum + value;
+        sum_squared = sum_squared + value * value;
+    }
+
+    let len = F::from(count).unwrap();
+    let mean = sum / len;
+    let variance = sum_squared / len - mean * mean;
+
+    (mean, variance, len)
+}
diff --git a/src/lib.rs b/src/lib.rs
index d8f1896..e5abe7d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,9 +3,9 @@
 
 pub mod algebra;
 pub mod derivable;
+pub mod gradient_solver;
 pub mod layer;
 pub mod network;
-pub mod gradient_solver;
 pub mod train;
 
 mod utils;
@@ -18,10 +18,10 @@ pub mod prelude {
     pub use crate::{neura_layer, neura_sequential};
 
     // Structs and traits
+    pub use crate::gradient_solver::NeuraBackprop;
     pub use crate::layer::*;
     pub use crate::network::sequential::{
         NeuraSequential, NeuraSequentialConstruct, NeuraSequentialTail,
     };
-    pub use crate::gradient_solver::NeuraBackprop;
     pub use crate::train::NeuraBatchedTrainer;
 }
diff --git a/src/network/mod.rs b/src/network/mod.rs
index 0e2556b..823ee61 100644
--- a/src/network/mod.rs
+++ b/src/network/mod.rs
@@ -1,4 +1,6 @@
-use crate::{algebra::NeuraVectorSpace, layer::NeuraLayer, gradient_solver::NeuraGradientSolverBase};
+use crate::{
+    algebra::NeuraVectorSpace, gradient_solver::NeuraGradientSolverBase, layer::NeuraLayer,
+};
 
 pub mod sequential;
 
diff --git a/src/network/sequential/mod.rs b/src/network/sequential/mod.rs
index 637a42f..a969d33 100644
--- a/src/network/sequential/mod.rs
+++ b/src/network/sequential/mod.rs
@@ -1,7 +1,7 @@
 use super::{NeuraTrainableNetwork, NeuraTrainableNetworkBase};
 use crate::{
-    layer::{NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayer},
     gradient_solver::{NeuraGradientSolverFinal, NeuraGradientSolverTransient},
+    layer::{NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayer},
 };
 
 mod construct;
@@ -212,8 +212,8 @@ where
     }
 }
 
-impl<Input: Clone, Optimizer: NeuraGradientSolverFinal<Input>> NeuraTrainableNetwork<Input, Optimizer>
-    for ()
+impl<Input: Clone, Optimizer: NeuraGradientSolverFinal<Input>>
+    NeuraTrainableNetwork<Input, Optimizer> for ()
 {
     fn traverse(
         &self,
diff --git a/src/train.rs b/src/train.rs
index 1288578..b782b5c 100644
--- a/src/train.rs
+++ b/src/train.rs
@@ -1,5 +1,6 @@
 use crate::{
-    algebra::NeuraVectorSpace, network::NeuraTrainableNetworkBase, gradient_solver::NeuraGradientSolver,
+    algebra::NeuraVectorSpace, gradient_solver::NeuraGradientSolver,
+    network::NeuraTrainableNetworkBase,
 };
 
 #[non_exhaustive]
@@ -77,7 +78,7 @@ impl NeuraBatchedTrainer {
         Inputs: IntoIterator<Item = (Input, Target)>,
     >(
         &self,
-        gradient_solver: GradientSolver,
+        gradient_solver: &GradientSolver,
         network: &mut Network,
         inputs: Inputs,
         test_inputs: &[(Input, Target)],
@@ -140,10 +141,10 @@ mod test {
     use crate::{
         assert_approx,
         derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0, NeuraLoss},
+        gradient_solver::NeuraBackprop,
         layer::{dense::NeuraDenseLayer, NeuraLayer},
         network::sequential::{NeuraSequential, NeuraSequentialTail},
         neura_sequential,
-        gradient_solver::NeuraBackprop,
     };
 
     #[test]