🔥 Attempt at backpropagation

My head is tired :/
main
Shad Amethyst 2 years ago
parent 5a20acf595
commit d3d5f57a2b

@ -7,4 +7,5 @@ edition = "2021"
[dependencies] [dependencies]
ndarray = "^0.15" ndarray = "^0.15"
# num-traits = "0.2.15"
rand = "^0.8" rand = "^0.8"

@ -1,6 +1,8 @@
/// An extension of `std::ops::AddAssign` /// An extension of `std::ops::AddAssign` and `std::ops::Default`
pub trait NeuraAddAssign { pub trait NeuraAddAssign {
fn add_assign(&mut self, other: &Self); fn add_assign(&mut self, other: &Self);
fn default() -> Self;
} }
impl<Left: NeuraAddAssign, Right: NeuraAddAssign> NeuraAddAssign for (Left, Right) { impl<Left: NeuraAddAssign, Right: NeuraAddAssign> NeuraAddAssign for (Left, Right) {
@ -8,14 +10,31 @@ impl<Left: NeuraAddAssign, Right: NeuraAddAssign> NeuraAddAssign for (Left, Righ
NeuraAddAssign::add_assign(&mut self.0, &other.0); NeuraAddAssign::add_assign(&mut self.0, &other.0);
NeuraAddAssign::add_assign(&mut self.1, &other.1); NeuraAddAssign::add_assign(&mut self.1, &other.1);
} }
fn default() -> Self {
(Left::default(), Right::default())
}
} }
impl<const N: usize, T: NeuraAddAssign> NeuraAddAssign for [T; N] { impl<const N: usize, T: NeuraAddAssign + Clone> NeuraAddAssign for [T; N] {
fn add_assign(&mut self, other: &[T; N]) { fn add_assign(&mut self, other: &[T; N]) {
for i in 0..N { for i in 0..N {
NeuraAddAssign::add_assign(&mut self[i], &other[i]); NeuraAddAssign::add_assign(&mut self[i], &other[i]);
} }
} }
fn default() -> Self {
let mut res: Vec<T> = Vec::with_capacity(N);
for _ in 0..N {
res.push(T::default());
}
res.try_into().unwrap_or_else(|_| {
// TODO: check that this panic is optimized away
unreachable!()
})
}
} }
macro_rules! base { macro_rules! base {
@ -24,6 +43,10 @@ macro_rules! base {
fn add_assign(&mut self, other: &Self) { fn add_assign(&mut self, other: &Self) {
std::ops::AddAssign::add_assign(self, other); std::ops::AddAssign::add_assign(self, other);
} }
fn default() -> Self {
<Self as Default>::default()
}
} }
} }
} }

@ -1,5 +1,5 @@
use super::NeuraLayer; use super::NeuraLayer;
use crate::{derivable::NeuraDerivable, utils::multiply_matrix_vector}; use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer};
use rand::Rng; use rand::Rng;
pub struct NeuraDenseLayer< pub struct NeuraDenseLayer<
@ -65,6 +65,31 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
} }
} }
impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize> NeuraTrainableLayer
for NeuraDenseLayer<Act, INPUT_LEN, OUTPUT_LEN>
{
type Delta = ([[f64; INPUT_LEN]; OUTPUT_LEN], [f64; OUTPUT_LEN]);
// TODO: double-check the math in this
fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta) {
let evaluated = multiply_matrix_vector(&self.weights, input);
// Compute delta from epsilon, with `self.activation'(z) * epsilon = delta`
let mut delta = epsilon.clone();
for i in 0..OUTPUT_LEN {
delta[i] = self.activation.derivate(evaluated[i]);
}
let weights_gradient = reverse_dot_product(&delta, input);
// According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
// The gradient of the bias is equal to the delta term of the backpropagation algorithm
let bias_gradient = delta;
let new_epsilon = multiply_matrix_transpose_vector(&self.weights, &delta);
(new_epsilon, (weights_gradient, bias_gradient))
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;

@ -3,6 +3,7 @@
pub mod derivable; pub mod derivable;
pub mod layer; pub mod layer;
pub mod network; pub mod network;
pub mod train;
pub mod algebra; pub mod algebra;
mod utils; mod utils;

@ -1,4 +1,4 @@
use crate::{layer::NeuraLayer, train::NeuraTrainable}; use crate::{layer::NeuraLayer, train::{NeuraTrainable, NeuraTrainableLayer}, derivable::NeuraLoss};
pub struct NeuraNetwork<Layer: NeuraLayer, ChildNetwork> { pub struct NeuraNetwork<Layer: NeuraLayer, ChildNetwork> {
layer: Layer, layer: Layer,
@ -23,6 +23,10 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraNetwork<Layer, ChildNetwork> {
pub fn child_network(&self) -> &ChildNetwork { pub fn child_network(&self) -> &ChildNetwork {
&self.child_network &self.child_network
} }
pub fn layer(&self) -> &Layer {
&self.layer
}
} }
impl<Layer: NeuraLayer> From<Layer> for NeuraNetwork<Layer, ()> { impl<Layer: NeuraLayer> From<Layer> for NeuraNetwork<Layer, ()> {
@ -55,6 +59,28 @@ impl<Layer: NeuraLayer, ChildNetwork: NeuraLayer<Input = Layer::Output>> NeuraLa
} }
} }
impl<Layer: NeuraTrainableLayer> NeuraTrainable for NeuraNetwork<Layer, ()> {
type Delta = Layer::Delta;
fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) {
let final_activation = self.layer.eval(input);
let backprop_epsilon = loss.nabla(target, final_activation);
self.layer.backpropagate(&input, backprop_epsilon)
}
}
impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Output>> NeuraTrainable for NeuraNetwork<Layer, ChildNetwork> {
type Delta = (Layer::Delta, ChildNetwork::Delta);
fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) {
let next_activation = self.layer.eval(input);
let (backprop_gradient, weights_gradient) = self.child_network.backpropagate(&next_activation, target, loss);
let (backprop_gradient, layer_gradient) = self.layer.backpropagate(input, backprop_gradient);
(backprop_gradient, (layer_gradient, weights_gradient))
}
}
#[macro_export] #[macro_export]
macro_rules! neura_network { macro_rules! neura_network {
[] => { [] => {

@ -0,0 +1,62 @@
use crate::{
derivable::NeuraLoss,
layer::NeuraLayer,
network::NeuraNetwork,
// utils::{assign_add_vector, chunked},
algebra::NeuraAddAssign,
};
pub trait NeuraTrainableLayer: NeuraLayer {
type Delta: NeuraAddAssign;
/// Computes the backpropagation term and the derivative of the internal weights,
/// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
///
/// Note: we introduce the term `epsilon`, which together with the activation of the current function can be used to compute `delta_l`:
/// ```no_rust
/// f_l'(a_l) * epsilon_l = delta_l
/// ```
///
/// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
/// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)`.
fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta);
}
pub trait NeuraTrainable: NeuraLayer {
type Delta: NeuraAddAssign;
fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta);
}
pub trait NeuraTrainer<F, Loss: NeuraLoss<F>> {
fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
&self,
trainable: &NeuraNetwork<Layer, ChildNetwork>,
input: &Layer::Input,
target: Loss::Target,
loss: Loss,
) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta where
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = F>
;
}
#[non_exhaustive]
pub struct NeuraBackprop {
pub epsilon: f64,
pub batch_size: usize,
}
impl<const N: usize, Loss: NeuraLoss<[f64; N]>> NeuraTrainer<[f64; N], Loss> for NeuraBackprop {
fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
&self,
trainable: &NeuraNetwork<Layer, ChildNetwork>,
input: &Layer::Input,
target: Loss::Target,
loss: Loss,
) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta where
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>,
{
trainable.backpropagate(input, target, loss).1
}
}

@ -15,6 +15,39 @@ pub(crate) fn multiply_matrix_vector<const WIDTH: usize, const HEIGHT: usize>(
result result
} }
/// Equivalent to `multiply_matrix_vector(transpose(matrix), vector)`.
pub(crate) fn multiply_matrix_transpose_vector<const WIDTH: usize, const HEIGHT: usize>(
matrix: &[[f64; WIDTH]; HEIGHT],
vector: &[f64; HEIGHT],
) -> [f64; WIDTH] {
let mut result = [0.0; WIDTH];
for i in 0..WIDTH {
let mut sum = 0.0;
for k in 0..HEIGHT {
sum += matrix[k][i] * vector[k];
}
result[i] = sum;
}
result
}
pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>(
left: &[f64; HEIGHT],
right: &[f64; WIDTH]
) -> [[f64; WIDTH]; HEIGHT] {
let mut result = [[0.0; WIDTH]; HEIGHT];
for i in 0..HEIGHT {
for j in 0..WIDTH {
result[i][j] = left[i] * right[j];
}
}
result
}
pub(crate) fn assign_add_vector<const N: usize>(sum: &mut [f64; N], operand: &[f64; N]) { pub(crate) fn assign_add_vector<const N: usize>(sum: &mut [f64; N], operand: &[f64; N]) {
for i in 0..N { for i in 0..N {
sum[i] += operand[i]; sum[i] += operand[i];

Loading…
Cancel
Save