diff --git a/examples/xor.rs b/examples/xor.rs
new file mode 100644
index 0000000..9d19aa0
--- /dev/null
+++ b/examples/xor.rs
@@ -0,0 +1,42 @@
+#![feature(generic_arg_infer)]
+
+use neuramethyst::prelude::*;
+use neuramethyst::derivable::activation::{Relu, Tanh};
+use neuramethyst::derivable::loss::Euclidean;
+
+fn main() {
+    let mut network = neura_network![
+        neura_layer!("dense", Tanh, 2, 2),
+        neura_layer!("dense", Tanh, 3),
+        neura_layer!("dense", Relu, 1)
+    ];
+
+    let inputs = [
+        ([0.0, 0.0], [0.0]),
+        ([0.0, 1.0], [1.0]),
+        ([1.0, 0.0], [1.0]),
+        ([1.0, 1.0], [0.0])
+    ];
+
+    // println!("{:#?}", network);
+
+    for (input, target) in inputs {
+        println!("Input: {:?}, target: {}, actual: {}", &input, target[0], network.eval(&input)[0]);
+    }
+
+    train_batched(
+        &mut network,
+        inputs.clone(),
+        &inputs,
+        NeuraBackprop::new(Euclidean),
+        0.01,
+        1,
+        25
+    );
+
+    // println!("{:#?}", network);
+
+    for (input, target) in inputs {
+        println!("Input: {:?}, target: {}, actual: {}", &input, target[0], network.eval(&input)[0]);
+    }
+}
diff --git a/src/algebra.rs b/src/algebra.rs
index 9f7df6d..8ed39ab 100644
--- a/src/algebra.rs
+++ b/src/algebra.rs
@@ -1,33 +1,46 @@
 /// An extension of `std::ops::AddAssign` and `std::ops::Default`
-pub trait NeuraAddAssign {
+pub trait NeuraVectorSpace {
     fn add_assign(&mut self, other: &Self);
 
-    fn default() -> Self;
+    fn mul_assign(&mut self, by: f64);
+
+    fn zero() -> Self;
 }
 
-impl<Left: NeuraAddAssign, Right: NeuraAddAssign> NeuraAddAssign for (Left, Right) {
+impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left, Right) {
     fn add_assign(&mut self, other: &Self) {
-        NeuraAddAssign::add_assign(&mut self.0, &other.0);
-        NeuraAddAssign::add_assign(&mut self.1, &other.1);
+        NeuraVectorSpace::add_assign(&mut self.0, &other.0);
+        NeuraVectorSpace::add_assign(&mut self.1, &other.1);
+    }
+
+    fn mul_assign(&mut self, by: f64) {
+        NeuraVectorSpace::mul_assign(&mut self.0, by);
+        NeuraVectorSpace::mul_assign(&mut self.1, by);
     }
 
-    fn default() -> Self {
-        (Left::default(), Right::default())
+    fn zero() -> Self {
+        (Left::zero(), Right::zero())
     }
 }
 
-impl<const N: usize, T: NeuraAddAssign + Clone> NeuraAddAssign for [T; N] {
+impl<const N: usize, T: NeuraVectorSpace + Clone> NeuraVectorSpace for [T; N] {
     fn add_assign(&mut self, other: &[T; N]) {
         for i in 0..N {
-            NeuraAddAssign::add_assign(&mut self[i], &other[i]);
+            NeuraVectorSpace::add_assign(&mut self[i], &other[i]);
+        }
+    }
+
+    fn mul_assign(&mut self, by: f64) {
+        for i in 0..N {
+            NeuraVectorSpace::mul_assign(&mut self[i], by);
         }
     }
 
-    fn default() -> Self {
+    fn zero() -> Self {
         let mut res: Vec<T> = Vec::with_capacity(N);
 
         for _ in 0..N {
-            res.push(T::default());
+            res.push(T::zero());
         }
 
         res.try_into().unwrap_or_else(|_| {
@@ -39,16 +52,20 @@ impl<const N: usize, T: NeuraAddAssign + Clone> NeuraAddAssign for [T; N] {
 
 macro_rules! base {
     ( $type:ty ) => {
-        impl NeuraAddAssign for $type {
+        impl NeuraVectorSpace for $type {
             fn add_assign(&mut self, other: &Self) {
                 std::ops::AddAssign::add_assign(self, other);
             }
 
-            fn default() -> Self {
+            fn mul_assign(&mut self, other: f64) {
+                std::ops::MulAssign::mul_assign(self, other as $type);
+            }
+
+            fn zero() -> Self {
                 <Self as Default>::default()
             }
         }
-    }
+    };
 }
 
 base!(f32);
diff --git a/src/derivable/activation.rs b/src/derivable/activation.rs
index 0a3cd1c..0bac5ee 100644
--- a/src/derivable/activation.rs
+++ b/src/derivable/activation.rs
@@ -34,3 +34,32 @@ impl NeuraDerivable<f32> for Relu {
         }
     }
 }
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct Tanh;
+
+impl NeuraDerivable<f64> for Tanh {
+    #[inline(always)]
+    fn eval(&self, input: f64) -> f64 {
+        0.5 * input.tanh() + 0.5
+    }
+
+    #[inline(always)]
+    fn derivate(&self, at: f64) -> f64 {
+        let tanh = at.tanh();
+        0.5 * (1.0 - tanh * tanh)
+    }
+}
+
+impl NeuraDerivable<f32> for Tanh {
+    #[inline(always)]
+    fn eval(&self, input: f32) -> f32 {
+        0.5 * input.tanh() + 0.5
+    }
+
+    #[inline(always)]
+    fn derivate(&self, at: f32) -> f32 {
+        let tanh = at.tanh();
+        0.5 * (1.0 - tanh * tanh)
+    }
+}
diff --git a/src/derivable/loss.rs b/src/derivable/loss.rs
index 3e35dcd..a15e4d3 100644
--- a/src/derivable/loss.rs
+++ b/src/derivable/loss.rs
@@ -1,12 +1,14 @@
 use super::NeuraLoss;
 
 #[derive(Clone, Copy, Debug, PartialEq)]
-pub struct Euclidean;
-impl<const N: usize> NeuraLoss<[f64; N]> for Euclidean {
-    type Out = f64;
+pub struct Euclidean<const N: usize>;
+
+impl<const N: usize> NeuraLoss for Euclidean<N> {
+    type Input = [f64; N];
     type Target = [f64; N];
 
-    fn eval(&self, target: [f64; N], actual: [f64; N]) -> f64 {
+    #[inline]
+    fn eval(&self, target: &[f64; N], actual: &[f64; N]) -> f64 {
         let mut sum_squared = 0.0;
 
         for i in 0..N {
@@ -16,7 +18,15 @@ impl<const N: usize> NeuraLoss<[f64; N]> for Euclidean {
         sum_squared * 0.5
     }
 
-    fn nabla(&self, target: [f64; N], actual: [f64; N]) -> [f64; N] {
-        todo!()
+    #[inline]
+    fn nabla(&self, target: &[f64; N], actual: &[f64; N]) -> [f64; N] {
+        let mut res = [0.0; N];
+
+        // ∂E(y)/∂yᵢ = yᵢ - yᵢ'
+        for i in 0..N {
+            res[i] = actual[i] - target[i];
+        }
+
+        res
     }
 }
diff --git a/src/derivable/mod.rs b/src/derivable/mod.rs
index 5c3db62..9888423 100644
--- a/src/derivable/mod.rs
+++ b/src/derivable/mod.rs
@@ -8,13 +8,13 @@ pub trait NeuraDerivable<F> {
     fn derivate(&self, at: F) -> F;
 }
 
-pub trait NeuraLoss<F> {
-    type Out;
+pub trait NeuraLoss {
+    type Input;
     type Target;
 
-    fn eval(&self, target: Self::Target, actual: F) -> Self::Out;
+    fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64;
 
     /// Should return the gradient of the loss function according to `actual`
     /// ($\nabla_{\texttt{actual}} \texttt{self.eval}(\texttt{target}, \texttt{actual})$).
-    fn nabla(&self, target: Self::Target, actual: F) -> F;
+    fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input;
 }
diff --git a/src/layer/dense.rs b/src/layer/dense.rs
index 337bdc1..2929f22 100644
--- a/src/layer/dense.rs
+++ b/src/layer/dense.rs
@@ -1,7 +1,8 @@
 use super::NeuraLayer;
-use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer};
+use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer, algebra::NeuraVectorSpace};
 use rand::Rng;
 
+#[derive(Clone, Debug)]
 pub struct NeuraDenseLayer<
     Act: NeuraDerivable<f64>,
     const INPUT_LEN: usize,
@@ -34,7 +35,7 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
 
         for i in 0..OUTPUT_LEN {
             for j in 0..INPUT_LEN {
-                weights[i][j] = rng.gen::<f64>() * multiplier;
+                weights[i][j] = rng.gen_range(-multiplier..multiplier);
             }
         }
 
@@ -88,6 +89,11 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
 
         (new_epsilon, (weights_gradient, bias_gradient))
     }
+
+    fn apply_gradient(&mut self, gradient: &Self::Delta) {
+        NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0);
+        NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1);
+    }
 }
 
 #[cfg(test)]
diff --git a/src/lib.rs b/src/lib.rs
index 0a3fc5d..d17f734 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,9 +1,19 @@
 #![feature(generic_arg_infer)]
 
+pub mod algebra;
 pub mod derivable;
 pub mod layer;
 pub mod network;
 pub mod train;
-pub mod algebra;
 
 mod utils;
+
+pub mod prelude {
+    // Macros
+    pub use crate::{neura_network, neura_layer};
+
+    // Structs and traits
+    pub use super::network::{NeuraNetwork};
+    pub use super::layer::{NeuraLayer, NeuraDenseLayer};
+    pub use super::train::{NeuraBackprop, train_batched};
+}
diff --git a/src/network.rs b/src/network.rs
index 0fddb33..29ac603 100644
--- a/src/network.rs
+++ b/src/network.rs
@@ -1,5 +1,10 @@
-use crate::{layer::NeuraLayer, train::{NeuraTrainable, NeuraTrainableLayer}, derivable::NeuraLoss};
+use crate::{
+    derivable::NeuraLoss,
+    layer::NeuraLayer,
+    train::{NeuraTrainable, NeuraTrainableLayer},
+};
 
+#[derive(Clone, Debug)]
 pub struct NeuraNetwork<Layer: NeuraLayer, ChildNetwork> {
     layer: Layer,
     child_network: ChildNetwork,
@@ -62,20 +67,44 @@ impl<Layer: NeuraLayer, ChildNetwork: NeuraLayer<Input = Layer::Output>> NeuraLa
 impl<Layer: NeuraTrainableLayer> NeuraTrainable for NeuraNetwork<Layer, ()> {
     type Delta = Layer::Delta;
 
-    fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) {
+    fn apply_gradient(&mut self, gradient: &Self::Delta) {
+        self.layer.apply_gradient(gradient);
+    }
+
+    fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
+        &self,
+        input: &Self::Input,
+        target: &Loss::Target,
+        loss: Loss,
+    ) -> (Self::Input, Self::Delta) {
         let final_activation = self.layer.eval(input);
-        let backprop_epsilon = loss.nabla(target, final_activation);
+        let backprop_epsilon = loss.nabla(target, &final_activation);
         self.layer.backpropagate(&input, backprop_epsilon)
     }
 }
 
-impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Output>> NeuraTrainable for NeuraNetwork<Layer, ChildNetwork> {
+impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Output>> NeuraTrainable
+    for NeuraNetwork<Layer, ChildNetwork>
+{
     type Delta = (Layer::Delta, ChildNetwork::Delta);
 
-    fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) {
+    fn apply_gradient(&mut self, gradient: &Self::Delta) {
+        self.layer.apply_gradient(&gradient.0);
+        self.child_network.apply_gradient(&gradient.1);
+    }
+
+    fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
+        &self,
+        input: &Self::Input,
+        target: &Loss::Target,
+        loss: Loss,
+    ) -> (Self::Input, Self::Delta) {
         let next_activation = self.layer.eval(input);
-        let (backprop_gradient, weights_gradient) = self.child_network.backpropagate(&next_activation, target, loss);
-        let (backprop_gradient, layer_gradient) = self.layer.backpropagate(input, backprop_gradient);
+        let (backprop_gradient, weights_gradient) =
+            self.child_network
+                .backpropagate(&next_activation, target, loss);
+        let (backprop_gradient, layer_gradient) =
+            self.layer.backpropagate(input, backprop_gradient);
 
         (backprop_gradient, (layer_gradient, weights_gradient))
     }
diff --git a/src/train.rs b/src/train.rs
index f0c126a..adc23fb 100644
--- a/src/train.rs
+++ b/src/train.rs
@@ -1,14 +1,13 @@
 use crate::{
+    // utils::{assign_add_vector, chunked},
+    algebra::NeuraVectorSpace,
     derivable::NeuraLoss,
     layer::NeuraLayer,
     network::NeuraNetwork,
-    // utils::{assign_add_vector, chunked},
-    algebra::NeuraAddAssign,
 };
 
-
 pub trait NeuraTrainableLayer: NeuraLayer {
-    type Delta: NeuraAddAssign;
+    type Delta: NeuraVectorSpace;
 
     /// Computes the backpropagation term and the derivative of the internal weights,
     /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
@@ -19,44 +18,134 @@ pub trait NeuraTrainableLayer: NeuraLayer {
     /// ```
     ///
     /// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
-    /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)`.
-    fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta);
+    /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`.
+    /// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers.
+    fn backpropagate(
+        &self,
+        input: &Self::Input,
+        epsilon: Self::Output,
+    ) -> (Self::Input, Self::Delta);
+
+    /// Applies `δW_l` to the weights of the layer
+    fn apply_gradient(&mut self, gradient: &Self::Delta);
 }
 
 pub trait NeuraTrainable: NeuraLayer {
-    type Delta: NeuraAddAssign;
+    type Delta: NeuraVectorSpace;
+
+    fn apply_gradient(&mut self, gradient: &Self::Delta);
 
-    fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta);
+    /// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information.
+    fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
+        &self,
+        input: &Self::Input,
+        target: &Loss::Target,
+        loss: Loss,
+    ) -> (Self::Input, Self::Delta);
 }
 
-pub trait NeuraTrainer<F, Loss: NeuraLoss<F>> {
+pub trait NeuraTrainer<Output, Target = Output> {
     fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
         &self,
         trainable: &NeuraNetwork<Layer, ChildNetwork>,
         input: &Layer::Input,
-        target: Loss::Target,
-        loss: Loss,
-    ) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta where
-        NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = F>
-    ;
+        target: &Target,
+    ) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta
+    where
+        NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>;
+
+    fn score<Layer: NeuraLayer, ChildNetwork>(
+        &self,
+        trainable: &NeuraNetwork<Layer, ChildNetwork>,
+        input: &Layer::Input,
+        target: &Target,
+    ) -> f64
+    where
+        NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>;
 }
 
 #[non_exhaustive]
-pub struct NeuraBackprop {
-    pub epsilon: f64,
-    pub batch_size: usize,
+pub struct NeuraBackprop<Loss: NeuraLoss + Clone> {
+    loss: Loss,
+}
+
+impl<Loss: NeuraLoss + Clone> NeuraBackprop<Loss> {
+    pub fn new(loss: Loss) -> Self {
+        Self { loss }
+    }
 }
 
-impl<const N: usize, Loss: NeuraLoss<[f64; N]>> NeuraTrainer<[f64; N], Loss> for NeuraBackprop {
+impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone> NeuraTrainer<[f64; N], Loss::Target>
+    for NeuraBackprop<Loss>
+{
     fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
         &self,
         trainable: &NeuraNetwork<Layer, ChildNetwork>,
         input: &Layer::Input,
-        target: Loss::Target,
-        loss: Loss,
-    ) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta where
+        target: &Loss::Target,
+    ) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta
+    where
+        NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>,
+    {
+        trainable.backpropagate(input, target, self.loss.clone()).1
+    }
+
+    fn score<Layer: NeuraLayer, ChildNetwork>(
+        &self,
+        trainable: &NeuraNetwork<Layer, ChildNetwork>,
+        input: &Layer::Input,
+        target: &Loss::Target,
+    ) -> f64
+    where
         NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>,
     {
-        trainable.backpropagate(input, target, loss).1
+        self.loss.eval(target, &trainable.eval(&input))
+    }
+}
+
+pub fn train_batched<
+    Output,
+    Target,
+    Trainer: NeuraTrainer<Output, Target>,
+    Layer: NeuraLayer,
+    ChildNetwork,
+    Inputs: IntoIterator<Item = (Layer::Input, Target)>,
+>(
+    network: &mut NeuraNetwork<Layer, ChildNetwork>,
+    inputs: Inputs,
+    test_inputs: &[(Layer::Input, Target)],
+    trainer: Trainer,
+    learning_rate: f64,
+    batch_size: usize,
+    epochs: usize,
+) where
+    NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>,
+    Inputs::IntoIter: Clone,
+{
+    // TODO: apply shuffling?
+    let mut iter = inputs.into_iter().cycle();
+    let factor = -learning_rate / (batch_size as f64);
+
+    'd: for epoch in 0..epochs {
+        let mut gradient_sum = <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
+
+        for _ in 0..batch_size {
+            if let Some((input, target)) = iter.next() {
+                let gradient = trainer.get_gradient(&network, &input, &target);
+                gradient_sum.add_assign(&gradient);
+            } else {
+                break 'd;
+            }
+        }
+
+        gradient_sum.mul_assign(factor);
+        network.apply_gradient(&gradient_sum);
+
+        let mut loss_sum = 0.0;
+        for (input, target) in test_inputs {
+            loss_sum += trainer.score(&network, input, target);
+        }
+        loss_sum /= test_inputs.len() as f64;
+        println!("Epoch {epoch}, Loss: {:.3}", loss_sum);
     }
 }
diff --git a/src/utils.rs b/src/utils.rs
index 94d2bf2..7b63642 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -35,7 +35,7 @@ pub(crate) fn multiply_matrix_transpose_vector<const WIDTH: usize, const HEIGHT:
 
 pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>(
     left: &[f64; HEIGHT],
-    right: &[f64; WIDTH]
+    right: &[f64; WIDTH],
 ) -> [[f64; WIDTH]; HEIGHT] {
     let mut result = [[0.0; WIDTH]; HEIGHT];