From 6c1d6874d7a7e1d40694141ad207b4923b34c73a Mon Sep 17 00:00:00 2001
From: Adrien Burgun <adrien.burgun@orange.fr>
Date: Sun, 16 Apr 2023 12:38:18 +0200
Subject: [PATCH] :recycle: Implement and transition to NeuraMatrix and
 NeuraVector, to prevent stack overflows

---
 .gitignore                         |   1 +
 Cargo.toml                         |   3 +
 examples/bivariate.rs              |   9 +-
 examples/convolution.rs            |  77 ++++++++
 examples/xor.rs                    |  15 +-
 src/algebra/matrix.rs              | 292 +++++++++++++++++++++++++++++
 src/{algebra.rs => algebra/mod.rs} |  24 +++
 src/algebra/vector.rs              | 278 +++++++++++++++++++++++++++
 src/derivable/loss.rs              |  22 ++-
 src/layer/dense.rs                 |  55 ++++--
 src/layer/dropout.rs               |  14 +-
 src/layer/mod.rs                   |  22 +++
 src/layer/one_hot.rs               |  26 ++-
 src/layer/reshape.rs               | 147 +++++++++++++++
 src/layer/softmax.rs               |  38 ++--
 src/lib.rs                         |   5 +-
 src/network/sequential.rs          |  32 ++--
 src/train.rs                       |  54 ++++--
 src/utils.rs                       | 105 +----------
 19 files changed, 1006 insertions(+), 213 deletions(-)
 create mode 100644 examples/convolution.rs
 create mode 100644 src/algebra/matrix.rs
 rename src/{algebra.rs => algebra/mod.rs} (84%)
 create mode 100644 src/algebra/vector.rs
 create mode 100644 src/layer/reshape.rs

diff --git a/.gitignore b/.gitignore
index 4fffb2f..d461ead 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 /target
 /Cargo.lock
+/data
diff --git a/Cargo.toml b/Cargo.toml
index d245660..48cafe8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,7 +6,9 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
+boxed-array = "0.1.0"
 ndarray = "^0.15"
+num = "^0.4"
 # num-traits = "0.2.15"
 rand = "^0.8"
 rand_distr = "0.4.3"
@@ -14,3 +16,4 @@ rand_distr = "0.4.3"
 [dev-dependencies]
 image = "0.24.6"
 viuer = "0.6.2"
+rust-mnist = "0.2.0"
diff --git a/examples/bivariate.rs b/examples/bivariate.rs
index 4b6e0bf..e03744f 100644
--- a/examples/bivariate.rs
+++ b/examples/bivariate.rs
@@ -31,7 +31,7 @@ fn main() {
             (angle.cos() * radius, angle.sin() * radius)
         };
 
-        ([x, y], neuramethyst::one_hot::<2>(category))
+        ([x, y].into(), neuramethyst::one_hot::<2>(category))
     });
 
     let test_inputs: Vec<_> = inputs.clone().take(10).collect();
@@ -49,7 +49,10 @@ fn main() {
             );
 
             let network = network.clone();
-            draw_neuron_activation(|input| network.eval(&input).into_iter().collect(), 6.0);
+            draw_neuron_activation(
+                |input| network.eval(&input.into()).into_iter().collect(),
+                6.0,
+            );
             println!("{}", epoch);
 
             std::thread::sleep(std::time::Duration::new(0, 50_000_000));
@@ -72,7 +75,7 @@ fn main() {
 
     let mut file = std::fs::File::create("target/bivariate.csv").unwrap();
     for (input, _target) in test_inputs {
-        let guess = neuramethyst::argmax(&network.eval(&input));
+        let guess = neuramethyst::argmax(network.eval(&input).as_ref());
         writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap();
     }
 }
diff --git a/examples/convolution.rs b/examples/convolution.rs
new file mode 100644
index 0000000..8cbeeaa
--- /dev/null
+++ b/examples/convolution.rs
@@ -0,0 +1,77 @@
+#![feature(generic_arg_infer)]
+// #![feature(generic_const_exprs)]
+
+use neuramethyst::algebra::NeuraVector;
+use rust_mnist::Mnist;
+
+use neuramethyst::derivable::activation::{Linear, Relu};
+use neuramethyst::derivable::loss::CrossEntropy;
+use neuramethyst::{cycle_shuffling, one_hot, prelude::*};
+
+fn main() {
+    const TRAIN_SIZE: usize = 100;
+
+    let Mnist {
+        train_data: train_images,
+        train_labels,
+        test_data: test_images,
+        test_labels,
+        ..
+    } = Mnist::new("data/");
+
+    let train_images = train_images
+        .into_iter()
+        .map(|raw| {
+            raw.into_iter()
+                .map(|x| x as f64 / 255.0)
+                .collect::<NeuraVector<{ 28 * 28 }, f64>>()
+        })
+        .take(TRAIN_SIZE);
+    let train_labels = train_labels
+        .into_iter()
+        .map(|x| one_hot::<10>(x as usize))
+        .take(TRAIN_SIZE);
+
+    let test_images = test_images
+        .into_iter()
+        .map(|raw| {
+            raw.into_iter()
+                .map(|x| x as f64 / 255.0)
+                .collect::<NeuraVector<{ 28 * 28 }, f64>>()
+        })
+        .take(TRAIN_SIZE / 6);
+    let test_labels = test_labels
+        .into_iter()
+        .map(|x| one_hot::<10>(x as usize))
+        .take(TRAIN_SIZE / 6);
+
+    let train_iter = cycle_shuffling(
+        train_images.zip(train_labels.into_iter()),
+        rand::thread_rng(),
+    );
+
+    let test_inputs: Vec<_> = test_images.zip(test_labels.into_iter()).collect();
+
+    let mut network = neura_sequential![
+        neura_layer!("dense", { 28 * 28 }, 200; Relu),
+        neura_layer!("dropout", 0.5),
+        neura_layer!("dense", 100; Relu),
+        neura_layer!("dropout", 0.5),
+        neura_layer!("dense", 30; Relu),
+        neura_layer!("dropout", 0.5),
+        neura_layer!("dense", 10; Linear),
+        neura_layer!("softmax")
+    ];
+
+    let mut trainer = NeuraBatchedTrainer::new(0.03, TRAIN_SIZE * 10);
+    trainer.log_iterations = (TRAIN_SIZE / 128).max(1);
+    trainer.batch_size = 128;
+    trainer.learning_momentum = 0.001;
+
+    trainer.train(
+        NeuraBackprop::new(CrossEntropy),
+        &mut network,
+        train_iter,
+        &test_inputs,
+    );
+}
diff --git a/examples/xor.rs b/examples/xor.rs
index b1e35d6..759dd9d 100644
--- a/examples/xor.rs
+++ b/examples/xor.rs
@@ -1,8 +1,9 @@
 #![feature(generic_arg_infer)]
 
+use neuramethyst::algebra::NeuraVector;
 use neuramethyst::derivable::activation::Relu;
 use neuramethyst::derivable::loss::Euclidean;
-use neuramethyst::prelude::*;
+use neuramethyst::{cycle_shuffling, prelude::*};
 
 fn main() {
     let mut network = neura_sequential![
@@ -11,14 +12,14 @@ fn main() {
         neura_layer!("dense", 1; Relu)
     ];
 
-    let inputs = [
-        ([0.0, 0.0], [0.0]),
-        ([0.0, 1.0], [1.0]),
-        ([1.0, 0.0], [1.0]),
-        ([1.0, 1.0], [0.0]),
+    let inputs: [(NeuraVector<2, f64>, NeuraVector<1, f64>); 4] = [
+        ([0.0, 0.0].into(), [0.0].into()),
+        ([0.0, 1.0].into(), [1.0].into()),
+        ([1.0, 0.0].into(), [1.0].into()),
+        ([1.0, 1.0].into(), [0.0].into()),
     ];
 
-    for (input, target) in inputs {
+    for (input, target) in &inputs {
         println!(
             "Input: {:?}, target: {}, actual: {:.3}",
             &input,
diff --git a/src/algebra/matrix.rs b/src/algebra/matrix.rs
new file mode 100644
index 0000000..fbe7a3c
--- /dev/null
+++ b/src/algebra/matrix.rs
@@ -0,0 +1,292 @@
+use std::borrow::Borrow;
+
+use super::*;
+use boxed_array::from_cloned;
+use num::Float;
+
+/// A simple abstraction around `[[F; WIDTH]; HEIGHT]`,
+/// which ensures that all allocations that depend on `WIDTH` or `HEIGHT` are done on the heap,
+/// without losing the length information.
+#[derive(Clone, Debug, PartialEq)]
+pub struct NeuraMatrix<const WIDTH: usize, const HEIGHT: usize, F> {
+    pub data: Box<[[F; WIDTH]; HEIGHT]>,
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, F> NeuraMatrix<WIDTH, HEIGHT, F> {
+    #[inline(always)]
+    pub fn from_value(value: F) -> Self
+    where
+        F: Clone,
+    {
+        Self {
+            data: from_cloned(&value),
+        }
+    }
+
+    #[inline(always)]
+    pub fn get(&self, x: usize, y: usize) -> Option<&F> {
+        if x >= WIDTH || y >= HEIGHT {
+            return None;
+        }
+
+        Some(&self.data[y][x])
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, F: Float> NeuraMatrix<WIDTH, HEIGHT, F> {
+    /// Returns `self * vector`
+    pub fn multiply_vector(&self, vector: impl Borrow<[F; WIDTH]>) -> NeuraVector<HEIGHT, F> {
+        let mut result: NeuraVector<HEIGHT, F> = NeuraVector::from_value(F::zero());
+        let vector = vector.borrow();
+
+        for i in 0..HEIGHT {
+            let mut sum = F::zero();
+            for k in 0..WIDTH {
+                sum = sum + self.data[i][k] * vector[k];
+            }
+            result[i] = sum;
+        }
+
+        result
+    }
+
+    /// Returns `transpose(self) * vector`,
+    /// without actually performing the transpose operation
+    pub fn transpose_multiply_vector(
+        &self,
+        vector: impl AsRef<[F; HEIGHT]>,
+    ) -> NeuraVector<WIDTH, F> {
+        let mut result: NeuraVector<WIDTH, F> = NeuraVector::from_value(F::zero());
+        let vector = vector.as_ref();
+
+        for j in 0..WIDTH {
+            let mut sum = F::zero();
+            for k in 0..HEIGHT {
+                sum = sum + self.data[k][j] * vector[k];
+            }
+            result[j] = sum;
+        }
+
+        result
+    }
+}
+
+impl<const LENGTH: usize, F: Default + Clone> NeuraMatrix<LENGTH, LENGTH, F> {
+    pub fn from_diagonal(vector: impl AsRef<[F; LENGTH]>) -> Self {
+        let mut result: NeuraMatrix<LENGTH, LENGTH, F> = NeuraMatrix::default();
+        let vector = vector.as_ref();
+
+        for i in 0..LENGTH {
+            result[i][i] = vector[i].clone();
+        }
+
+        result
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, F: Float + From<f64> + Into<f64>> NeuraVectorSpace
+    for NeuraMatrix<WIDTH, HEIGHT, F>
+{
+    fn add_assign(&mut self, other: &Self) {
+        for i in 0..HEIGHT {
+            for j in 0..WIDTH {
+                self.data[i][j] = self.data[i][j] + other.data[i][j];
+            }
+        }
+    }
+
+    fn mul_assign(&mut self, by: f64) {
+        let by: F = by.into();
+        for i in 0..HEIGHT {
+            for j in 0..WIDTH {
+                self.data[i][j] = self.data[i][j] * by;
+            }
+        }
+    }
+
+    #[inline(always)]
+    fn zero() -> Self {
+        Self::from_value(F::zero())
+    }
+
+    fn norm_squared(&self) -> f64 {
+        let mut sum = F::zero();
+
+        for i in 0..HEIGHT {
+            for j in 0..WIDTH {
+                let x = self.data[i][j];
+                sum = sum + x * x;
+            }
+        }
+
+        sum.into()
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, F> From<Box<[[F; WIDTH]; HEIGHT]>>
+    for NeuraMatrix<WIDTH, HEIGHT, F>
+{
+    #[inline]
+    fn from(data: Box<[[F; WIDTH]; HEIGHT]>) -> Self {
+        Self { data }
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, F> From<NeuraMatrix<WIDTH, HEIGHT, F>>
+    for Box<[[F; WIDTH]; HEIGHT]>
+{
+    #[inline]
+    fn from(matrix: NeuraMatrix<WIDTH, HEIGHT, F>) -> Self {
+        matrix.data
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, F: Default + Clone> From<&[[F; WIDTH]; HEIGHT]>
+    for NeuraMatrix<WIDTH, HEIGHT, F>
+{
+    /// **Warning:** when using this function, make sure that the array is not allocated on the stack
+    /// or that `WIDTH` and `HEIGHT` are bounded.
+    #[inline]
+    fn from(data: &[[F; WIDTH]; HEIGHT]) -> Self {
+        let mut res = Self::default();
+
+        for i in 0..HEIGHT {
+            for j in 0..WIDTH {
+                res[i][j] = data[i][j].clone();
+            }
+        }
+
+        res
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, F> From<[[F; WIDTH]; HEIGHT]>
+    for NeuraMatrix<WIDTH, HEIGHT, F>
+{
+    /// **Warning:** when using this function, make sure that `WIDTH` and `HEIGHT` are bounded.
+    fn from(data: [[F; WIDTH]; HEIGHT]) -> Self {
+        Self {
+            data: Box::new(data),
+        }
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, F> std::ops::Index<(usize, usize)>
+    for NeuraMatrix<WIDTH, HEIGHT, F>
+{
+    type Output = F;
+
+    #[inline]
+    fn index(&self, index: (usize, usize)) -> &Self::Output {
+        if index.0 >= WIDTH || index.1 >= HEIGHT {
+            panic!(
+                "Index out of bound: tried indexing matrix element ({}, {}), which is outside of NeuraMatrix<{}, {}, _>",
+                index.0, index.1, WIDTH, HEIGHT
+            );
+        }
+
+        &self.data[index.1][index.0]
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, F> std::ops::IndexMut<(usize, usize)>
+    for NeuraMatrix<WIDTH, HEIGHT, F>
+{
+    #[inline]
+    fn index_mut(&mut self, index: (usize, usize)) -> &mut Self::Output {
+        if index.0 >= WIDTH || index.1 >= HEIGHT {
+            panic!(
+                "Index out of bound: tried indexing matrix element ({}, {}), which is outside of NeuraMatrix<{}, {}, _>",
+                index.0, index.1, WIDTH, HEIGHT
+            );
+        }
+
+        &mut self.data[index.1][index.0]
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, F> std::ops::Index<usize>
+    for NeuraMatrix<WIDTH, HEIGHT, F>
+{
+    type Output = [F; WIDTH];
+
+    #[inline(always)]
+    fn index(&self, index: usize) -> &Self::Output {
+        if index >= HEIGHT {
+            panic!(
+                "Index out of bound: tried indexing matrix row {}, which is outside of NeuraMatrix<{}, {}, _>",
+                index, WIDTH, HEIGHT
+            );
+        }
+
+        &self.data[index]
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, F> std::ops::IndexMut<usize>
+    for NeuraMatrix<WIDTH, HEIGHT, F>
+{
+    #[inline(always)]
+    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
+        if index >= HEIGHT {
+            panic!(
+                "Index out of bound: tried indexing matrix row {}, which is outside of NeuraMatrix<{}, {}, _>",
+                index, WIDTH, HEIGHT
+            );
+        }
+
+        &mut self.data[index]
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, F> AsRef<[[F; WIDTH]; HEIGHT]>
+    for NeuraMatrix<WIDTH, HEIGHT, F>
+{
+    #[inline(always)]
+    fn as_ref(&self) -> &[[F; WIDTH]; HEIGHT] {
+        &self.data
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, F> Borrow<[[F; WIDTH]; HEIGHT]>
+    for NeuraMatrix<WIDTH, HEIGHT, F>
+{
+    #[inline(always)]
+    fn borrow(&self) -> &[[F; WIDTH]; HEIGHT] {
+        &self.data
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, F: Default + Clone> Default
+    for NeuraMatrix<WIDTH, HEIGHT, F>
+{
+    #[inline(always)]
+    fn default() -> Self {
+        Self::from_value(F::default())
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_index() {
+        let mut matrix: NeuraMatrix<1000, 1000, f64> = NeuraMatrix::from_value(0.0);
+
+        matrix[100][200] = 0.3;
+        assert_eq!(matrix[(200, 100)], 0.3);
+        matrix[(999, 999)] = 0.5;
+        assert_eq!(matrix[999][999], 0.5);
+    }
+
+    #[test]
+    #[should_panic(
+        expected = "Index out of bound: tried indexing matrix row 100, which is outside of NeuraMatrix<100, 100, _>"
+    )]
+    fn test_index_oob() {
+        let matrix: NeuraMatrix<100, 100, f64> = NeuraMatrix::from_value(0.0);
+
+        let _ = matrix[100];
+    }
+}
diff --git a/src/algebra.rs b/src/algebra/mod.rs
similarity index 84%
rename from src/algebra.rs
rename to src/algebra/mod.rs
index fffafe5..ec9e3ad 100644
--- a/src/algebra.rs
+++ b/src/algebra/mod.rs
@@ -1,3 +1,9 @@
+mod matrix;
+pub use matrix::NeuraMatrix;
+
+mod vector;
+pub use vector::NeuraVector;
+
 /// An extension of `std::ops::AddAssign` and `std::ops::Default`
 pub trait NeuraVectorSpace {
     fn add_assign(&mut self, other: &Self);
@@ -30,6 +36,24 @@ impl NeuraVectorSpace for () {
     }
 }
 
+impl<T: NeuraVectorSpace> NeuraVectorSpace for Box<T> {
+    fn add_assign(&mut self, other: &Self) {
+        self.as_mut().add_assign(other.as_ref());
+    }
+
+    fn mul_assign(&mut self, by: f64) {
+        self.as_mut().mul_assign(by);
+    }
+
+    fn zero() -> Self {
+        Box::new(T::zero())
+    }
+
+    fn norm_squared(&self) -> f64 {
+        self.as_ref().norm_squared()
+    }
+}
+
 impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left, Right) {
     fn add_assign(&mut self, other: &Self) {
         NeuraVectorSpace::add_assign(&mut self.0, &other.0);
diff --git a/src/algebra/vector.rs b/src/algebra/vector.rs
new file mode 100644
index 0000000..f5e637e
--- /dev/null
+++ b/src/algebra/vector.rs
@@ -0,0 +1,278 @@
+use std::borrow::Borrow;
+
+use super::*;
+use boxed_array::from_cloned;
+use num::Float;
+
+#[derive(Clone, Debug, PartialEq)]
+pub struct NeuraVector<const LENGTH: usize, F> {
+    pub data: Box<[F; LENGTH]>,
+}
+
+impl<const LENGTH: usize, F> NeuraVector<LENGTH, F> {
+    #[inline(always)]
+    pub fn from_value(value: F) -> Self
+    where
+        F: Clone,
+    {
+        Self {
+            data: from_cloned(&value),
+        }
+    }
+
+    #[inline(always)]
+    pub fn get(&self, index: usize) -> Option<&F> {
+        if index >= LENGTH {
+            None
+        } else {
+            Some(&self.data[index])
+        }
+    }
+
+    #[inline(always)]
+    pub fn len(&self) -> usize {
+        LENGTH
+    }
+
+    pub fn iter<'a>(&'a self) -> std::slice::Iter<'a, F> {
+        self.data.iter()
+    }
+}
+
+impl<const LENGTH: usize, F: Float> NeuraVector<LENGTH, F> {
+    pub fn dot(&self, other: impl AsRef<[F; LENGTH]>) -> F {
+        let mut sum = F::zero();
+        let other = other.as_ref();
+
+        for i in 0..LENGTH {
+            sum = sum + self.data[i] * other[i];
+        }
+
+        sum
+    }
+
+    /// Returns $left^{\top} \cdot right$, ie. $\ket{left} \bra{right}$
+    pub fn reverse_dot<const WIDTH: usize>(
+        &self,
+        other: impl Borrow<[F; WIDTH]>,
+    ) -> NeuraMatrix<WIDTH, LENGTH, F> {
+        let mut result: NeuraMatrix<WIDTH, LENGTH, F> = NeuraMatrix::from_value(F::zero());
+        let other = other.borrow();
+
+        for i in 0..LENGTH {
+            for j in 0..WIDTH {
+                result[i][j] = self.data[i] * other[j];
+            }
+        }
+
+        result
+    }
+
+    pub fn hadamard_product(&self, other: impl AsRef<[F; LENGTH]>) -> NeuraVector<LENGTH, F> {
+        let mut result: NeuraVector<LENGTH, F> = NeuraVector::from_value(F::zero());
+        let other = other.as_ref();
+
+        for i in 0..LENGTH {
+            result[i] = self.data[i] * other[i];
+        }
+
+        result
+    }
+}
+
+impl<const LENGTH: usize, F: Float + From<f64> + Into<f64>> NeuraVectorSpace
+    for NeuraVector<LENGTH, F>
+{
+    fn add_assign(&mut self, other: &Self) {
+        for i in 0..LENGTH {
+            self.data[i] = self.data[i] + other.data[i];
+        }
+    }
+
+    fn mul_assign(&mut self, by: f64) {
+        for i in 0..LENGTH {
+            self.data[i] = self.data[i] * by.into();
+        }
+    }
+
+    #[inline(always)]
+    fn zero() -> Self {
+        Self::from_value(F::zero())
+    }
+
+    fn norm_squared(&self) -> f64 {
+        let mut sum = F::zero();
+
+        for i in 0..LENGTH {
+            sum = sum + self.data[i] * self.data[i];
+        }
+
+        sum.into()
+    }
+}
+
+impl<const LENGTH: usize, F> std::ops::Index<usize> for NeuraVector<LENGTH, F> {
+    type Output = F;
+
+    #[inline(always)]
+    fn index(&self, index: usize) -> &Self::Output {
+        if index >= LENGTH {
+            panic!(
+                "Tried indexing element {} of NeuraVector<{}, _>",
+                index, LENGTH
+            );
+        }
+
+        &self.data[index]
+    }
+}
+
+impl<const LENGTH: usize, F> std::ops::IndexMut<usize> for NeuraVector<LENGTH, F> {
+    #[inline(always)]
+    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
+        if index >= LENGTH {
+            panic!(
+                "Tried indexing element {} of NeuraVector<{}, _>",
+                index, LENGTH
+            );
+        }
+
+        &mut self.data[index]
+    }
+}
+
+impl<const LENGTH: usize, F> AsRef<[F; LENGTH]> for NeuraVector<LENGTH, F> {
+    #[inline(always)]
+    fn as_ref(&self) -> &[F; LENGTH] {
+        &self.data
+    }
+}
+
+impl<const LENGTH: usize, F> AsRef<[F]> for NeuraVector<LENGTH, F> {
+    #[inline(always)]
+    fn as_ref(&self) -> &[F] {
+        self.data.as_ref()
+    }
+}
+
+impl<const LENGTH: usize, F> Borrow<[F; LENGTH]> for NeuraVector<LENGTH, F> {
+    #[inline(always)]
+    fn borrow(&self) -> &[F; LENGTH] {
+        &self.data
+    }
+}
+
+impl<const LENGTH: usize, F> Borrow<[F; LENGTH]> for &NeuraVector<LENGTH, F> {
+    #[inline(always)]
+    fn borrow(&self) -> &[F; LENGTH] {
+        &self.data
+    }
+}
+
+impl<const LENGTH: usize, F> From<Box<[F; LENGTH]>> for NeuraVector<LENGTH, F> {
+    fn from(data: Box<[F; LENGTH]>) -> Self {
+        Self { data }
+    }
+}
+
+impl<const LENGTH: usize, F> From<NeuraVector<LENGTH, F>> for Box<[F; LENGTH]> {
+    fn from(vector: NeuraVector<LENGTH, F>) -> Self {
+        vector.data
+    }
+}
+
+impl<const LENGTH: usize, F: Default + Clone> From<&[F; LENGTH]> for NeuraVector<LENGTH, F> {
+    /// **Warning:** when using this function, make sure that the array is not allocated on the stack,
+    /// or that `LENGTH` is bounded.
+    fn from(data: &[F; LENGTH]) -> Self {
+        let mut res = Self::default();
+
+        for i in 0..LENGTH {
+            res.data[i] = data[i].clone();
+        }
+
+        res
+    }
+}
+
+impl<const LENGTH: usize, F> From<[F; LENGTH]> for NeuraVector<LENGTH, F> {
+    /// **Warning:** when using this function, make sure that `LENGTH` is bounded.
+    fn from(data: [F; LENGTH]) -> Self {
+        Self {
+            data: Box::new(data),
+        }
+    }
+}
+
+impl<const LENGTH: usize, F: Default + Clone> Default for NeuraVector<LENGTH, F> {
+    #[inline(always)]
+    fn default() -> Self {
+        Self::from_value(F::default())
+    }
+}
+
+impl<const LENGTH: usize, F> IntoIterator for NeuraVector<LENGTH, F> {
+    type Item = F;
+    type IntoIter = std::array::IntoIter<F, LENGTH>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.data.into_iter()
+    }
+}
+
+impl<'a, const LENGTH: usize, F> IntoIterator for &'a NeuraVector<LENGTH, F> {
+    type Item = &'a F;
+    type IntoIter = std::slice::Iter<'a, F>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.data.iter()
+    }
+}
+
+impl<'a, const LENGTH: usize, F> IntoIterator for &'a mut NeuraVector<LENGTH, F> {
+    type Item = &'a mut F;
+    type IntoIter = std::slice::IterMut<'a, F>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.data.iter_mut()
+    }
+}
+
+impl<'a, const LENGTH: usize, F: Default + Clone> FromIterator<F> for NeuraVector<LENGTH, F> {
+    fn from_iter<T: IntoIterator<Item = F>>(iter: T) -> Self {
+        let mut res = Self::default();
+        let mut iter = iter.into_iter();
+
+        for i in 0..LENGTH {
+            if let Some(next) = iter.next() {
+                res[i] = next;
+            } else {
+                break;
+            }
+        }
+
+        res
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_reverse_dot() {
+        let left: NeuraVector<_, f64> = [2.0, 3.0, 5.0].into();
+        let right: NeuraVector<_, f64> = [7.0, 11.0, 13.0, 17.0].into();
+
+        let expected: NeuraMatrix<_, _, f64> = [
+            [14.0, 22.0, 26.0, 34.0],
+            [21.0, 33.0, 39.0, 51.0],
+            [35.0, 55.0, 65.0, 85.0],
+        ]
+        .into();
+
+        let actual = left.reverse_dot(right);
+
+        assert_eq!(expected, actual);
+    }
+}
diff --git a/src/derivable/loss.rs b/src/derivable/loss.rs
index 301ea08..7d23d8b 100644
--- a/src/derivable/loss.rs
+++ b/src/derivable/loss.rs
@@ -1,14 +1,16 @@
+use crate::algebra::NeuraVector;
+
 use super::NeuraLoss;
 
 #[derive(Clone, Copy, Debug, PartialEq)]
 pub struct Euclidean<const N: usize>;
 
 impl<const N: usize> NeuraLoss for Euclidean<N> {
-    type Input = [f64; N];
-    type Target = [f64; N];
+    type Input = NeuraVector<N, f64>;
+    type Target = NeuraVector<N, f64>;
 
     #[inline]
-    fn eval(&self, target: &[f64; N], actual: &[f64; N]) -> f64 {
+    fn eval(&self, target: &NeuraVector<N, f64>, actual: &NeuraVector<N, f64>) -> f64 {
         let mut sum_squared = 0.0;
 
         for i in 0..N {
@@ -19,8 +21,12 @@ impl<const N: usize> NeuraLoss for Euclidean<N> {
     }
 
     #[inline]
-    fn nabla(&self, target: &[f64; N], actual: &[f64; N]) -> [f64; N] {
-        let mut res = [0.0; N];
+    fn nabla(
+        &self,
+        target: &NeuraVector<N, f64>,
+        actual: &NeuraVector<N, f64>,
+    ) -> NeuraVector<N, f64> {
+        let mut res = NeuraVector::default();
 
         // ∂E(y)/∂yᵢ = yᵢ - yᵢ'
         for i in 0..N {
@@ -57,8 +63,8 @@ impl<const N: usize> CrossEntropy<N> {
 }
 
 impl<const N: usize> NeuraLoss for CrossEntropy<N> {
-    type Input = [f64; N];
-    type Target = [f64; N];
+    type Input = NeuraVector<N, f64>;
+    type Target = NeuraVector<N, f64>;
 
     fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64 {
         let mut result = 0.0;
@@ -71,7 +77,7 @@ impl<const N: usize> NeuraLoss for CrossEntropy<N> {
     }
 
     fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input {
-        let mut result = [0.0; N];
+        let mut result = NeuraVector::default();
 
         for i in 0..N {
             result[i] = self.derivate_single(target[i], actual[i]);
diff --git a/src/layer/dense.rs b/src/layer/dense.rs
index e1be4f0..ff921a8 100644
--- a/src/layer/dense.rs
+++ b/src/layer/dense.rs
@@ -1,8 +1,7 @@
 use super::{NeuraLayer, NeuraTrainableLayer};
 use crate::{
-    algebra::NeuraVectorSpace,
+    algebra::{NeuraMatrix, NeuraVector, NeuraVectorSpace},
     derivable::NeuraDerivable,
-    utils::{multiply_matrix_transpose_vector, multiply_matrix_vector, reverse_dot_product},
 };
 
 use rand::Rng;
@@ -15,8 +14,8 @@ pub struct NeuraDenseLayer<
     const INPUT_LEN: usize,
     const OUTPUT_LEN: usize,
 > {
-    weights: [[f64; INPUT_LEN]; OUTPUT_LEN],
-    bias: [f64; OUTPUT_LEN],
+    weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
+    bias: NeuraVector<OUTPUT_LEN, f64>,
     activation: Act,
     regularization: Reg,
 }
@@ -29,8 +28,8 @@ impl<
     > NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
 {
     pub fn new(
-        weights: [[f64; INPUT_LEN]; OUTPUT_LEN],
-        bias: [f64; OUTPUT_LEN],
+        weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
+        bias: NeuraVector<OUTPUT_LEN, f64>,
         activation: Act,
         regularization: Reg,
     ) -> Self {
@@ -43,7 +42,7 @@ impl<
     }
 
     pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self {
-        let mut weights = [[0.0; INPUT_LEN]; OUTPUT_LEN];
+        let mut weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64> = NeuraMatrix::from_value(0.0f64);
 
         // Use Xavier (or He) initialisation, using the harmonic mean
         // Ref: https://www.deeplearning.ai/ai-notes/initialization/index.html
@@ -63,7 +62,7 @@ impl<
         Self {
             weights,
             // Biases are initialized based on the activation's hint
-            bias: [activation.bias_hint(); OUTPUT_LEN],
+            bias: NeuraVector::from_value(activation.bias_hint()),
             activation,
             regularization,
         }
@@ -77,12 +76,12 @@ impl<
         const OUTPUT_LEN: usize,
     > NeuraLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
 {
-    type Input = [f64; INPUT_LEN];
+    type Input = NeuraVector<INPUT_LEN, f64>;
 
-    type Output = [f64; OUTPUT_LEN];
+    type Output = NeuraVector<OUTPUT_LEN, f64>;
 
     fn eval(&self, input: &Self::Input) -> Self::Output {
-        let mut result = multiply_matrix_vector(&self.weights, input);
+        let mut result = self.weights.multiply_vector(input);
 
         for i in 0..OUTPUT_LEN {
             result[i] = self.activation.eval(result[i] + self.bias[i]);
@@ -99,30 +98,33 @@ impl<
         const OUTPUT_LEN: usize,
     > NeuraTrainableLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
 {
-    type Delta = ([[f64; INPUT_LEN]; OUTPUT_LEN], [f64; OUTPUT_LEN]);
+    type Delta = (
+        NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
+        NeuraVector<OUTPUT_LEN, f64>,
+    );
 
     fn backpropagate(
         &self,
         input: &Self::Input,
         epsilon: Self::Output,
     ) -> (Self::Input, Self::Delta) {
-        let evaluated = multiply_matrix_vector(&self.weights, input);
+        let evaluated = self.weights.multiply_vector(input);
         // Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron),
         // with `self.activation'(input) ° epsilon = delta`
-        let mut delta = epsilon.clone();
+        let mut delta: NeuraVector<OUTPUT_LEN, f64> = epsilon.clone();
         for i in 0..OUTPUT_LEN {
             delta[i] *= self.activation.derivate(evaluated[i]);
         }
 
         // Compute the weight gradient
-        let weights_gradient = reverse_dot_product(&delta, input);
+        let weights_gradient = delta.reverse_dot(input);
+
+        let new_epsilon = self.weights.transpose_multiply_vector(&delta);
 
         // According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
         // The gradient of the bias is equal to the delta term of the backpropagation algorithm
         let bias_gradient = delta;
 
-        let new_epsilon = multiply_matrix_transpose_vector(&self.weights, &delta);
-
         (new_epsilon, (weights_gradient, bias_gradient))
     }
 
@@ -132,7 +134,7 @@ impl<
     }
 
     fn regularize(&self) -> Self::Delta {
-        let mut res = ([[0.0; INPUT_LEN]; OUTPUT_LEN], [0.0; OUTPUT_LEN]);
+        let mut res = Self::Delta::default();
 
         for i in 0..OUTPUT_LEN {
             for j in 0..INPUT_LEN {
@@ -149,7 +151,10 @@ impl<
 #[cfg(test)]
 mod test {
     use super::*;
-    use crate::derivable::{activation::Relu, regularize::NeuraL0};
+    use crate::{
+        derivable::{activation::Relu, regularize::NeuraL0},
+        utils::uniform_vector,
+    };
 
     #[test]
     fn test_from_rng() {
@@ -160,6 +165,16 @@ mod test {
         for x in 0..64 {
             input[x] = rng.gen();
         }
-        assert!(layer.eval(&input).len() == 32);
+        assert!(layer.eval(&input.into()).len() == 32);
+    }
+
+    #[test]
+    fn test_stack_overflow_big_layer() {
+        let layer = NeuraDenseLayer::from_rng(&mut rand::thread_rng(), Relu, NeuraL0)
+            as NeuraDenseLayer<Relu, NeuraL0, 1000, 1000>;
+
+        layer.backpropagate(&uniform_vector(), uniform_vector());
+
+        <NeuraDenseLayer<Relu, NeuraL0, 1000, 1000> as NeuraTrainableLayer>::Delta::zero();
     }
 }
diff --git a/src/layer/dropout.rs b/src/layer/dropout.rs
index 442afa7..1111e66 100644
--- a/src/layer/dropout.rs
+++ b/src/layer/dropout.rs
@@ -1,12 +1,14 @@
 use rand::Rng;
 
+use crate::algebra::NeuraVector;
+
 use super::{NeuraLayer, NeuraTrainableLayer};
 
 #[derive(Clone, Debug)]
 pub struct NeuraDropoutLayer<const LENGTH: usize, R: Rng> {
     pub dropout_probability: f64,
     multiplier: f64,
-    mask: [bool; LENGTH],
+    mask: NeuraVector<LENGTH, bool>,
     rng: R,
 }
 
@@ -15,12 +17,12 @@ impl<const LENGTH: usize, R: Rng> NeuraDropoutLayer<LENGTH, R> {
         Self {
             dropout_probability,
             multiplier: 1.0,
-            mask: [false; LENGTH],
+            mask: NeuraVector::from_value(false),
             rng,
         }
     }
 
-    fn apply_dropout(&self, vector: &mut [f64; LENGTH]) {
+    fn apply_dropout(&self, vector: &mut NeuraVector<LENGTH, f64>) {
         for (index, &dropout) in self.mask.iter().enumerate() {
             if dropout {
                 vector[index] = 0.0;
@@ -32,8 +34,8 @@ impl<const LENGTH: usize, R: Rng> NeuraDropoutLayer<LENGTH, R> {
 }
 
 impl<const LENGTH: usize, R: Rng> NeuraLayer for NeuraDropoutLayer<LENGTH, R> {
-    type Input = [f64; LENGTH];
-    type Output = [f64; LENGTH];
+    type Input = NeuraVector<LENGTH, f64>;
+    type Output = NeuraVector<LENGTH, f64>;
 
     fn eval(&self, input: &Self::Input) -> Self::Output {
         let mut result = input.clone();
@@ -83,7 +85,7 @@ impl<const LENGTH: usize, R: Rng> NeuraTrainableLayer for NeuraDropoutLayer<LENG
     }
 
     fn cleanup(&mut self) {
-        self.mask = [false; LENGTH];
+        self.mask = NeuraVector::from_value(false);
         self.multiplier = 1.0;
     }
 }
diff --git a/src/layer/mod.rs b/src/layer/mod.rs
index e89857b..5baf882 100644
--- a/src/layer/mod.rs
+++ b/src/layer/mod.rs
@@ -10,6 +10,12 @@ pub use softmax::NeuraSoftmaxLayer;
 mod one_hot;
 pub use one_hot::NeuraOneHotLayer;
 
+// mod reshape;
+// pub use reshape::{
+//     NeuraFlattenLayer,
+//     NeuraReshapeLayer
+// };
+
 mod lock;
 pub use lock::NeuraLockLayer;
 
@@ -98,4 +104,20 @@ macro_rules! neura_layer {
     ( "lock", $layer:expr ) => {
         $crate::layer::NeuraLockLayer($layer)
     };
+
+    // ( "flatten" ) => {
+    //     $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<_, _, f64>
+    // };
+
+    // ( "flatten", $width:expr, $height:expr ) => {
+    //     $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<$width, $height, f64>
+    // };
+
+    // ( "reshape", $height:expr ) => {
+    //     $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<_, $height, f64>
+    // };
+
+    // ( "reshape", $width:expr, $height:expr ) => {
+    //     $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<$width, $height, f64>
+    // };
 }
diff --git a/src/layer/one_hot.rs b/src/layer/one_hot.rs
index 029f9a9..22c094f 100644
--- a/src/layer/one_hot.rs
+++ b/src/layer/one_hot.rs
@@ -1,34 +1,32 @@
+use crate::algebra::{NeuraMatrix, NeuraVector};
+
 use super::{NeuraLayer, NeuraTrainableLayer};
 
 /// A special layer that allows you to split a vector into one-hot vectors
 #[derive(Debug, Clone, PartialEq)]
 pub struct NeuraOneHotLayer<const CATS: usize, const LENGTH: usize>;
 
-impl<const CATS: usize, const LENGTH: usize> NeuraLayer for NeuraOneHotLayer<CATS, LENGTH>
-where
-    [(); LENGTH * CATS]: Sized,
-{
-    type Input = [f64; LENGTH];
-    type Output = [f64; LENGTH * CATS];
+impl<const CATS: usize, const LENGTH: usize> NeuraLayer for NeuraOneHotLayer<CATS, LENGTH> {
+    type Input = NeuraVector<LENGTH, f64>;
+    type Output = NeuraMatrix<LENGTH, CATS, f64>;
 
     fn eval(&self, input: &Self::Input) -> Self::Output {
-        let mut res = [0.0; LENGTH * CATS];
+        let mut res = NeuraMatrix::default();
 
         for i in 0..LENGTH {
             let cat_low = input[i].floor().max(0.0).min(CATS as f64 - 2.0);
             let amount = (input[i] - cat_low).max(0.0).min(1.0);
             let cat_low = cat_low as usize;
-            res[i * LENGTH + cat_low] = 1.0 - amount;
-            res[i * LENGTH + cat_low + 1] = amount;
+            res[i][cat_low] = 1.0 - amount;
+            res[i][cat_low + 1] = amount;
         }
 
         res
     }
 }
 
-impl<const CATS: usize, const LENGTH: usize> NeuraTrainableLayer for NeuraOneHotLayer<CATS, LENGTH>
-where
-    [(); LENGTH * CATS]: Sized,
+impl<const CATS: usize, const LENGTH: usize> NeuraTrainableLayer
+    for NeuraOneHotLayer<CATS, LENGTH>
 {
     type Delta = ();
 
@@ -37,11 +35,11 @@ where
         input: &Self::Input,
         epsilon: Self::Output,
     ) -> (Self::Input, Self::Delta) {
-        let mut res = [0.0; LENGTH];
+        let mut res = NeuraVector::default();
 
         for i in 0..LENGTH {
             let cat_low = input[i].floor().max(0.0).min(CATS as f64 - 2.0) as usize;
-            let epsilon = -epsilon[i * LENGTH + cat_low] + epsilon[i * LENGTH + cat_low + 1];
+            let epsilon = -epsilon[i][cat_low] + epsilon[i][cat_low + 1];
             // Scale epsilon by how many entries were ignored
             res[i] = epsilon * CATS as f64 / 2.0;
         }
diff --git a/src/layer/reshape.rs b/src/layer/reshape.rs
new file mode 100644
index 0000000..96979ab
--- /dev/null
+++ b/src/layer/reshape.rs
@@ -0,0 +1,147 @@
+//! This module is currently disabled, as it relies on `generic_const_exprs`, which is too unstable to use as of now
+
+use super::{NeuraLayer, NeuraTrainableLayer};
+
+/// Converts a `[[T; WIDTH]; HEIGHT]` into a `[T; WIDTH * HEIGHT]`.
+/// Requires the `#![feature(generic_const_exprs)]` feature to be enabled.
+pub struct NeuraFlattenLayer<const WIDTH: usize, const HEIGHT: usize, T> {
+    phantom: std::marker::PhantomData<T>,
+}
+
+/// Converts a `[T; WIDTH * HEIGHT]` into a `[[T; WIDTH]; HEIGHT]`.
+/// Requires the `#![feature(generic_const_exprs)]` feature to be enabled.
+pub struct NeuraReshapeLayer<const WIDTH: usize, const HEIGHT: usize, T> {
+    phantom: std::marker::PhantomData<T>,
+}
+
+#[inline(always)]
+fn flatten<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default>(
+    input: &[[T; WIDTH]; HEIGHT],
+) -> [T; WIDTH * HEIGHT]
+where
+    [T; WIDTH * HEIGHT]: Sized,
+{
+    let mut res = [T::default(); WIDTH * HEIGHT];
+
+    // Hopefully the optimizer realizes this can be all optimized away
+    for i in 0..HEIGHT {
+        for j in 0..WIDTH {
+            res[i * WIDTH + j] = input[i][j];
+        }
+    }
+
+    res
+}
+
+#[inline(always)]
+fn reshape<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default>(
+    input: &[T; WIDTH * HEIGHT],
+) -> [[T; WIDTH]; HEIGHT]
+where
+    [T; WIDTH * HEIGHT]: Sized,
+{
+    let mut res = [[T::default(); WIDTH]; HEIGHT];
+
+    // Hopefully the optimizer realizes this can be all optimized away
+    for i in 0..HEIGHT {
+        for j in 0..WIDTH {
+            res[i][j] = input[i * WIDTH + j];
+        }
+    }
+
+    res
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, T> NeuraFlattenLayer<WIDTH, HEIGHT, T> {
+    pub fn new() -> Self {
+        Self {
+            phantom: std::marker::PhantomData,
+        }
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, T> NeuraReshapeLayer<WIDTH, HEIGHT, T> {
+    pub fn new() -> Self {
+        Self {
+            phantom: std::marker::PhantomData,
+        }
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default> NeuraLayer
+    for NeuraFlattenLayer<WIDTH, HEIGHT, T>
+where
+    [T; WIDTH * HEIGHT]: Sized,
+{
+    type Input = [[T; WIDTH]; HEIGHT];
+
+    type Output = [T; WIDTH * HEIGHT];
+
+    #[inline(always)]
+    fn eval(&self, input: &Self::Input) -> Self::Output {
+        flatten(input)
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default> NeuraLayer
+    for NeuraReshapeLayer<WIDTH, HEIGHT, T>
+where
+    [T; WIDTH * HEIGHT]: Sized,
+{
+    type Input = [T; WIDTH * HEIGHT];
+
+    type Output = [[T; WIDTH]; HEIGHT];
+
+    #[inline(always)]
+    fn eval(&self, input: &Self::Input) -> Self::Output {
+        reshape(input)
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default> NeuraTrainableLayer
+    for NeuraFlattenLayer<WIDTH, HEIGHT, T>
+where
+    [T; WIDTH * HEIGHT]: Sized,
+{
+    type Delta = ();
+
+    fn backpropagate(
+        &self,
+        _input: &Self::Input,
+        epsilon: Self::Output,
+    ) -> (Self::Input, Self::Delta) {
+        (reshape(&epsilon), ())
+    }
+
+    fn regularize(&self) -> Self::Delta {
+        todo!()
+    }
+
+    fn apply_gradient(&mut self, _gradient: &Self::Delta) {
+        // Noop
+    }
+}
+
+impl<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default> NeuraTrainableLayer
+    for NeuraReshapeLayer<WIDTH, HEIGHT, T>
+where
+    [T; WIDTH * HEIGHT]: Sized,
+{
+    type Delta = ();
+
+    fn backpropagate(
+        &self,
+        _input: &Self::Input,
+        epsilon: Self::Output,
+    ) -> (Self::Input, Self::Delta) {
+        (flatten(&epsilon), ())
+    }
+
+    fn regularize(&self) -> Self::Delta {
+        todo!()
+    }
+
+    fn apply_gradient(&mut self, _gradient: &Self::Delta) {
+        // Noop
+    }
+}
diff --git a/src/layer/softmax.rs b/src/layer/softmax.rs
index ebbc22b..ab27a69 100644
--- a/src/layer/softmax.rs
+++ b/src/layer/softmax.rs
@@ -1,4 +1,4 @@
-use crate::utils::multiply_vectors_pointwise;
+use crate::algebra::NeuraVector;
 
 use super::{NeuraLayer, NeuraTrainableLayer};
 
@@ -13,16 +13,16 @@ impl<const LENGTH: usize> NeuraSoftmaxLayer<LENGTH> {
 }
 
 impl<const LENGTH: usize> NeuraLayer for NeuraSoftmaxLayer<LENGTH> {
-    type Input = [f64; LENGTH];
-    type Output = [f64; LENGTH];
+    type Input = NeuraVector<LENGTH, f64>;
+    type Output = NeuraVector<LENGTH, f64>;
 
     fn eval(&self, input: &Self::Input) -> Self::Output {
-        let mut res = input.clone();
+        let mut res: Self::Input = input.clone();
 
         let mut max = 0.0;
-        for item in &res {
-            if *item > max {
-                max = *item;
+        for &item in &res {
+            if item > max {
+                max = item;
             }
         }
 
@@ -55,10 +55,10 @@ impl<const LENGTH: usize> NeuraTrainableLayer for NeuraSoftmaxLayer<LENGTH> {
         let evaluated = self.eval(input);
 
         // Compute $a_{l-1,i} \epsilon_{l,i}$
-        epsilon = multiply_vectors_pointwise(&epsilon, &evaluated);
+        epsilon = epsilon.hadamard_product(&evaluated);
 
         // Compute $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$
-        let sum_diagonal_terms: f64 = epsilon.iter().copied().sum();
+        let sum_diagonal_terms: f64 = epsilon.iter().sum();
 
         for i in 0..LENGTH {
             // Multiply $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$ by $a_{l-1,i}$ and add it to $a_{l-1,i} \epsilon_{l,i}$
@@ -79,10 +79,8 @@ impl<const LENGTH: usize> NeuraTrainableLayer for NeuraSoftmaxLayer<LENGTH> {
 
 #[cfg(test)]
 mod test {
-    use crate::algebra::NeuraVectorSpace;
-    use crate::utils::{
-        matrix_from_diagonal, multiply_matrix_vector, reverse_dot_product, uniform_vector,
-    };
+    use crate::algebra::{NeuraMatrix, NeuraVectorSpace};
+    use crate::utils::uniform_vector;
 
     use super::*;
 
@@ -91,7 +89,7 @@ mod test {
         const EPSILON: f64 = 0.000002;
         let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<3>;
 
-        let result = layer.eval(&[1.0, 2.0, 8.0]);
+        let result = layer.eval(&[1.0, 2.0, 8.0].into());
 
         assert!((result[0] - 0.0009088).abs() < EPSILON);
         assert!((result[1] - 0.0024704).abs() < EPSILON);
@@ -113,7 +111,7 @@ mod test {
                     for epsilon2 in [2.9, 3.1, 3.7] {
                         let epsilon = [epsilon1, epsilon2];
 
-                        let (epsilon, _) = layer.backpropagate(&input, epsilon);
+                        let (epsilon, _) = layer.backpropagate(&input.into(), epsilon.into());
                         let expected = [
                             output[0] * (1.0 - output[0]) * epsilon1
                                 - output[1] * output[0] * epsilon2,
@@ -136,15 +134,15 @@ mod test {
         let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<4>;
 
         for _ in 0..100 {
-            let input: [f64; 4] = uniform_vector();
+            let input = uniform_vector::<4>();
             let evaluated = layer.eval(&input);
-            let loss: [f64; 4] = uniform_vector();
+            let loss = uniform_vector::<4>();
 
-            let mut derivative = reverse_dot_product(&evaluated, &evaluated);
+            let mut derivative = evaluated.reverse_dot(&evaluated);
             derivative.mul_assign(-1.0);
-            derivative.add_assign(&matrix_from_diagonal(&evaluated));
+            derivative.add_assign(&NeuraMatrix::from_diagonal(&evaluated));
 
-            let expected = multiply_matrix_vector(&derivative, &loss);
+            let expected = derivative.multiply_vector(&loss);
             let (actual, _) = layer.backpropagate(&input, loss);
 
             for i in 0..4 {
diff --git a/src/lib.rs b/src/lib.rs
index 61ee52a..493e989 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,6 +1,6 @@
 #![feature(generic_arg_infer)]
 #![feature(generic_associated_types)]
-#![feature(generic_const_exprs)]
+// #![feature(generic_const_exprs)]
 
 pub mod algebra;
 pub mod derivable;
@@ -11,7 +11,7 @@ pub mod train;
 mod utils;
 
 // TODO: move to a different file
-pub use utils::{argmax, one_hot};
+pub use utils::{argmax, cycle_shuffling, one_hot};
 
 pub mod prelude {
     // Macros
@@ -21,5 +21,4 @@ pub mod prelude {
     pub use crate::layer::{NeuraDenseLayer, NeuraDropoutLayer, NeuraLayer};
     pub use crate::network::sequential::{NeuraSequential, NeuraSequentialTail};
     pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer};
-    pub use crate::utils::cycle_shuffling;
 }
diff --git a/src/network/sequential.rs b/src/network/sequential.rs
index 370a75c..602e50d 100644
--- a/src/network/sequential.rs
+++ b/src/network/sequential.rs
@@ -8,7 +8,7 @@ use super::NeuraTrainableNetwork;
 #[derive(Clone, Debug)]
 pub struct NeuraSequential<Layer: NeuraLayer, ChildNetwork> {
     pub layer: Layer,
-    pub child_network: ChildNetwork,
+    pub child_network: Box<ChildNetwork>,
 }
 
 /// Operations on the tail end of a sequential network
@@ -24,7 +24,7 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
     pub fn new(layer: Layer, child_network: ChildNetwork) -> Self {
         Self {
             layer,
-            child_network,
+            child_network: Box::new(child_network),
         }
     }
 
@@ -36,13 +36,13 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
     }
 
     pub fn trim_front(self) -> ChildNetwork {
-        self.child_network
+        *self.child_network
     }
 
     pub fn push_front<T: NeuraLayer>(self, layer: T) -> NeuraSequential<T, Self> {
         NeuraSequential {
             layer: layer,
-            child_network: self,
+            child_network: Box::new(self),
         }
     }
 }
@@ -59,10 +59,10 @@ impl<Layer: NeuraLayer> NeuraSequentialTail for NeuraSequential<Layer, ()> {
     fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T> {
         NeuraSequential {
             layer: self.layer,
-            child_network: NeuraSequential {
+            child_network: Box::new(NeuraSequential {
                 layer,
-                child_network: (),
-            },
+                child_network: Box::new(()),
+            }),
         }
     }
 }
@@ -78,14 +78,14 @@ impl<Layer: NeuraLayer, ChildNetwork: NeuraSequentialTail> NeuraSequentialTail
     fn trim_tail(self) -> Self::TailTrimmed {
         NeuraSequential {
             layer: self.layer,
-            child_network: self.child_network.trim_tail(),
+            child_network: Box::new(self.child_network.trim_tail()),
         }
     }
 
     fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T> {
         NeuraSequential {
             layer: self.layer,
-            child_network: self.child_network.push_tail(layer),
+            child_network: Box::new(self.child_network.push_tail(layer)),
         }
     }
 }
@@ -145,7 +145,7 @@ impl<Layer: NeuraTrainableLayer> NeuraTrainableNetwork for NeuraSequential<Layer
 impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainableNetwork<Input = Layer::Output>>
     NeuraTrainableNetwork for NeuraSequential<Layer, ChildNetwork>
 {
-    type Delta = (Layer::Delta, ChildNetwork::Delta);
+    type Delta = (Layer::Delta, Box<ChildNetwork::Delta>);
 
     fn apply_gradient(&mut self, gradient: &Self::Delta) {
         self.layer.apply_gradient(&gradient.0);
@@ -165,11 +165,17 @@ impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainableNetwork<Input = Lay
         let (backprop_gradient, layer_gradient) =
             self.layer.backpropagate(input, backprop_gradient);
 
-        (backprop_gradient, (layer_gradient, weights_gradient))
+        (
+            backprop_gradient,
+            (layer_gradient, Box::new(weights_gradient)),
+        )
     }
 
     fn regularize(&self) -> Self::Delta {
-        (self.layer.regularize(), self.child_network.regularize())
+        (
+            self.layer.regularize(),
+            Box::new(self.child_network.regularize()),
+        )
     }
 
     fn prepare_epoch(&mut self) {
@@ -187,7 +193,7 @@ impl<Layer: NeuraLayer> From<Layer> for NeuraSequential<Layer, ()> {
     fn from(layer: Layer) -> Self {
         Self {
             layer,
-            child_network: (),
+            child_network: Box::new(()),
         }
     }
 }
diff --git a/src/train.rs b/src/train.rs
index 875ac0a..714dd4f 100644
--- a/src/train.rs
+++ b/src/train.rs
@@ -1,5 +1,5 @@
 use crate::{
-    algebra::NeuraVectorSpace,
+    algebra::{NeuraVector, NeuraVectorSpace},
     derivable::NeuraLoss,
     layer::NeuraLayer,
     network::{sequential::NeuraSequential, NeuraTrainableNetwork},
@@ -38,8 +38,8 @@ impl<Loss: NeuraLoss + Clone> NeuraBackprop<Loss> {
     }
 }
 
-impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone>
-    NeuraGradientSolver<[f64; N], Loss::Target> for NeuraBackprop<Loss>
+impl<const N: usize, Loss: NeuraLoss<Input = NeuraVector<N, f64>> + Clone>
+    NeuraGradientSolver<NeuraVector<N, f64>, Loss::Target> for NeuraBackprop<Loss>
 {
     fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
         &self,
@@ -49,7 +49,7 @@ impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone>
     ) -> <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta
     where
         NeuraSequential<Layer, ChildNetwork>:
-            NeuraTrainableNetwork<Input = Layer::Input, Output = [f64; N]>,
+            NeuraTrainableNetwork<Input = Layer::Input, Output = NeuraVector<N, f64>>,
     {
         trainable.backpropagate(input, target, self.loss.clone()).1
     }
@@ -62,7 +62,7 @@ impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone>
     ) -> f64
     where
         NeuraSequential<Layer, ChildNetwork>:
-            NeuraTrainableNetwork<Input = Layer::Input, Output = [f64; N]>,
+            NeuraTrainableNetwork<Input = Layer::Input, Output = NeuraVector<N, f64>>,
     {
         let output = trainable.eval(&input);
         self.loss.eval(target, &output)
@@ -146,15 +146,17 @@ impl NeuraBatchedTrainer {
 
         // Contains `momentum_factor * factor * gradient_sum_previous_iter`
         let mut previous_gradient_sum =
-            <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta::zero();
+            Box::<<NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta>::zero();
         'd: for iteration in 0..self.iterations {
-            let mut gradient_sum =
-                <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta::zero();
+            let mut gradient_sum = Box::<
+                <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta,
+            >::zero();
             network.prepare_epoch();
 
             for _ in 0..self.batch_size {
                 if let Some((input, target)) = iter.next() {
-                    let gradient = gradient_solver.get_gradient(&network, &input, &target);
+                    let gradient =
+                        Box::new(gradient_solver.get_gradient(&network, &input, &target));
                     gradient_sum.add_assign(&gradient);
                 } else {
                     break 'd;
@@ -164,7 +166,7 @@ impl NeuraBatchedTrainer {
             gradient_sum.mul_assign(factor);
 
             // Add regularization gradient
-            let mut reg_gradient = network.regularize();
+            let mut reg_gradient = Box::new(network.regularize());
             reg_gradient.mul_assign(reg_factor);
             gradient_sum.add_assign(&reg_gradient);
 
@@ -207,12 +209,15 @@ mod test {
         for wa in [0.0, 0.25, 0.5, 1.0] {
             for wb in [0.0, 0.25, 0.5, 1.0] {
                 let network = NeuraSequential::new(
-                    NeuraDenseLayer::new([[wa, wb]], [0.0], Linear, NeuraL0),
+                    NeuraDenseLayer::new([[wa, wb]].into(), [0.0].into(), Linear, NeuraL0),
                     (),
                 );
 
-                let gradient =
-                    NeuraBackprop::new(Euclidean).get_gradient(&network, &[1.0, 1.0], &[0.0]);
+                let gradient = NeuraBackprop::new(Euclidean).get_gradient(
+                    &network,
+                    &[1.0, 1.0].into(),
+                    &[0.0].into(),
+                );
 
                 let expected = wa + wb;
                 assert!((gradient.0[0][0] - expected) < 0.001);
@@ -226,24 +231,33 @@ mod test {
         const EPSILON: f64 = 0.00001;
         // Test that we get the same values as https://hmkcode.com/ai/backpropagation-step-by-step/
         let network = neura_sequential![
-            NeuraDenseLayer::new([[0.11, 0.21], [0.12, 0.08]], [0.0; 2], Linear, NeuraL0),
-            NeuraDenseLayer::new([[0.14, 0.15]], [0.0], Linear, NeuraL0)
+            NeuraDenseLayer::new(
+                [[0.11, 0.21], [0.12, 0.08]].into(),
+                [0.0; 2].into(),
+                Linear,
+                NeuraL0
+            ),
+            NeuraDenseLayer::new([[0.14, 0.15]].into(), [0.0].into(), Linear, NeuraL0)
         ];
 
         let input = [2.0, 3.0];
         let target = [1.0];
 
-        let intermediary = network.clone().trim_tail().eval(&input);
+        let intermediary = network.clone().trim_tail().eval(&input.into());
         assert_approx!(0.85, intermediary[0], EPSILON);
         assert_approx!(0.48, intermediary[1], EPSILON);
-        assert_approx!(0.191, network.eval(&input)[0], EPSILON);
+        assert_approx!(0.191, network.eval(&input.into())[0], EPSILON);
 
-        assert_approx!(0.327, Euclidean.eval(&target, &network.eval(&input)), 0.001);
+        assert_approx!(
+            0.327,
+            Euclidean.eval(&target.into(), &network.eval(&input.into())),
+            0.001
+        );
 
-        let delta = network.eval(&input)[0] - target[0];
+        let delta = network.eval(&input.into())[0] - target[0];
 
         let (gradient_first, gradient_second) =
-            NeuraBackprop::new(Euclidean).get_gradient(&network, &input, &target);
+            NeuraBackprop::new(Euclidean).get_gradient(&network, &input.into(), &target.into());
         let gradient_first = gradient_first.0;
         let gradient_second = gradient_second.0[0];
 
diff --git a/src/utils.rs b/src/utils.rs
index 7367324..8fc3122 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -1,79 +1,4 @@
-pub(crate) fn multiply_matrix_vector<const WIDTH: usize, const HEIGHT: usize>(
-    matrix: &[[f64; WIDTH]; HEIGHT],
-    vector: &[f64; WIDTH],
-) -> [f64; HEIGHT] {
-    let mut result = [0.0; HEIGHT];
-
-    for i in 0..HEIGHT {
-        let mut sum = 0.0;
-        for k in 0..WIDTH {
-            sum += matrix[i][k] * vector[k];
-        }
-        result[i] = sum;
-    }
-
-    result
-}
-
-/// Equivalent to `multiply_matrix_vector(transpose(matrix), vector)`.
-pub(crate) fn multiply_matrix_transpose_vector<const WIDTH: usize, const HEIGHT: usize>(
-    matrix: &[[f64; WIDTH]; HEIGHT],
-    vector: &[f64; HEIGHT],
-) -> [f64; WIDTH] {
-    let mut result = [0.0; WIDTH];
-
-    for i in 0..WIDTH {
-        let mut sum = 0.0;
-        for k in 0..HEIGHT {
-            sum += matrix[k][i] * vector[k];
-        }
-        result[i] = sum;
-    }
-
-    result
-}
-
-// Returns $left^{\top} \cdot right$, ie. $\ket{left} \bra{right}$
-pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>(
-    left: &[f64; HEIGHT],
-    right: &[f64; WIDTH],
-) -> [[f64; WIDTH]; HEIGHT] {
-    let mut result = [[0.0; WIDTH]; HEIGHT];
-
-    for i in 0..HEIGHT {
-        for j in 0..WIDTH {
-            result[i][j] = left[i] * right[j];
-        }
-    }
-
-    result
-}
-
-pub(crate) fn multiply_vectors_pointwise<const LENGTH: usize>(
-    left: &[f64; LENGTH],
-    right: &[f64; LENGTH],
-) -> [f64; LENGTH] {
-    let mut result = [0.0; LENGTH];
-
-    for i in 0..LENGTH {
-        result[i] = left[i] * right[i];
-    }
-
-    result
-}
-
-#[cfg(test)]
-pub(crate) fn matrix_from_diagonal<const LENGTH: usize>(
-    vector: &[f64; LENGTH],
-) -> [[f64; LENGTH]; LENGTH] {
-    let mut result = [[0.0; LENGTH]; LENGTH];
-
-    for i in 0..LENGTH {
-        result[i][i] = vector[i];
-    }
-
-    result
-}
+use crate::algebra::NeuraVector;
 
 #[allow(dead_code)]
 pub(crate) fn assign_add_vector<const N: usize>(sum: &mut [f64; N], operand: &[f64; N]) {
@@ -164,9 +89,10 @@ where
 }
 
 #[cfg(test)]
-pub(crate) fn uniform_vector<const LENGTH: usize>() -> [f64; LENGTH] {
+pub(crate) fn uniform_vector<const LENGTH: usize>() -> NeuraVector<LENGTH, f64> {
     use rand::Rng;
-    let mut res = [0.0; LENGTH];
+
+    let mut res: NeuraVector<LENGTH, f64> = NeuraVector::default();
     let mut rng = rand::thread_rng();
 
     for i in 0..LENGTH {
@@ -176,8 +102,8 @@ pub(crate) fn uniform_vector<const LENGTH: usize>() -> [f64; LENGTH] {
     res
 }
 
-pub fn one_hot<const N: usize>(value: usize) -> [f64; N] {
-    let mut res = [0.0; N];
+pub fn one_hot<const N: usize>(value: usize) -> NeuraVector<N, f64> {
+    let mut res = NeuraVector::default();
     if value < N {
         res[value] = 1.0;
     }
@@ -196,25 +122,6 @@ pub fn argmax(array: &[f64]) -> usize {
     res
 }
 
-#[cfg(test)]
-mod test {
-    use super::*;
-
-    #[test]
-    fn test_reverse_dot_product() {
-        let left = [2.0, 3.0, 5.0];
-        let right = [7.0, 11.0, 13.0, 17.0];
-
-        let expected = [
-            [14.0, 22.0, 26.0, 34.0],
-            [21.0, 33.0, 39.0, 51.0],
-            [35.0, 55.0, 65.0, 85.0],
-        ];
-
-        assert_eq!(expected, reverse_dot_product(&left, &right));
-    }
-}
-
 #[cfg(test)]
 #[macro_export]
 macro_rules! assert_approx {