🔥 Semi-working training, although it seems to be only want to converge to zero

main
Shad Amethyst 2 years ago
parent d3d5f57a2b
commit 7a6921a1c1

@ -0,0 +1,42 @@
#![feature(generic_arg_infer)]
use neuramethyst::prelude::*;
use neuramethyst::derivable::activation::{Relu, Tanh};
use neuramethyst::derivable::loss::Euclidean;
fn main() {
let mut network = neura_network![
neura_layer!("dense", Tanh, 2, 2),
neura_layer!("dense", Tanh, 3),
neura_layer!("dense", Relu, 1)
];
let inputs = [
([0.0, 0.0], [0.0]),
([0.0, 1.0], [1.0]),
([1.0, 0.0], [1.0]),
([1.0, 1.0], [0.0])
];
// println!("{:#?}", network);
for (input, target) in inputs {
println!("Input: {:?}, target: {}, actual: {}", &input, target[0], network.eval(&input)[0]);
}
train_batched(
&mut network,
inputs.clone(),
&inputs,
NeuraBackprop::new(Euclidean),
0.01,
1,
25
);
// println!("{:#?}", network);
for (input, target) in inputs {
println!("Input: {:?}, target: {}, actual: {}", &input, target[0], network.eval(&input)[0]);
}
}

@ -1,33 +1,46 @@
/// An extension of `std::ops::AddAssign` and `std::ops::Default` /// An extension of `std::ops::AddAssign` and `std::ops::Default`
pub trait NeuraAddAssign { pub trait NeuraVectorSpace {
fn add_assign(&mut self, other: &Self); fn add_assign(&mut self, other: &Self);
fn default() -> Self; fn mul_assign(&mut self, by: f64);
fn zero() -> Self;
} }
impl<Left: NeuraAddAssign, Right: NeuraAddAssign> NeuraAddAssign for (Left, Right) { impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left, Right) {
fn add_assign(&mut self, other: &Self) { fn add_assign(&mut self, other: &Self) {
NeuraAddAssign::add_assign(&mut self.0, &other.0); NeuraVectorSpace::add_assign(&mut self.0, &other.0);
NeuraAddAssign::add_assign(&mut self.1, &other.1); NeuraVectorSpace::add_assign(&mut self.1, &other.1);
}
fn mul_assign(&mut self, by: f64) {
NeuraVectorSpace::mul_assign(&mut self.0, by);
NeuraVectorSpace::mul_assign(&mut self.1, by);
} }
fn default() -> Self { fn zero() -> Self {
(Left::default(), Right::default()) (Left::zero(), Right::zero())
} }
} }
impl<const N: usize, T: NeuraAddAssign + Clone> NeuraAddAssign for [T; N] { impl<const N: usize, T: NeuraVectorSpace + Clone> NeuraVectorSpace for [T; N] {
fn add_assign(&mut self, other: &[T; N]) { fn add_assign(&mut self, other: &[T; N]) {
for i in 0..N { for i in 0..N {
NeuraAddAssign::add_assign(&mut self[i], &other[i]); NeuraVectorSpace::add_assign(&mut self[i], &other[i]);
}
}
fn mul_assign(&mut self, by: f64) {
for i in 0..N {
NeuraVectorSpace::mul_assign(&mut self[i], by);
} }
} }
fn default() -> Self { fn zero() -> Self {
let mut res: Vec<T> = Vec::with_capacity(N); let mut res: Vec<T> = Vec::with_capacity(N);
for _ in 0..N { for _ in 0..N {
res.push(T::default()); res.push(T::zero());
} }
res.try_into().unwrap_or_else(|_| { res.try_into().unwrap_or_else(|_| {
@ -39,16 +52,20 @@ impl<const N: usize, T: NeuraAddAssign + Clone> NeuraAddAssign for [T; N] {
macro_rules! base { macro_rules! base {
( $type:ty ) => { ( $type:ty ) => {
impl NeuraAddAssign for $type { impl NeuraVectorSpace for $type {
fn add_assign(&mut self, other: &Self) { fn add_assign(&mut self, other: &Self) {
std::ops::AddAssign::add_assign(self, other); std::ops::AddAssign::add_assign(self, other);
} }
fn default() -> Self { fn mul_assign(&mut self, other: f64) {
<Self as Default>::default() std::ops::MulAssign::mul_assign(self, other as $type);
} }
fn zero() -> Self {
<Self as Default>::default()
} }
} }
};
} }
base!(f32); base!(f32);

@ -34,3 +34,32 @@ impl NeuraDerivable<f32> for Relu {
} }
} }
} }
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Tanh;
impl NeuraDerivable<f64> for Tanh {
#[inline(always)]
fn eval(&self, input: f64) -> f64 {
0.5 * input.tanh() + 0.5
}
#[inline(always)]
fn derivate(&self, at: f64) -> f64 {
let tanh = at.tanh();
0.5 * (1.0 - tanh * tanh)
}
}
impl NeuraDerivable<f32> for Tanh {
#[inline(always)]
fn eval(&self, input: f32) -> f32 {
0.5 * input.tanh() + 0.5
}
#[inline(always)]
fn derivate(&self, at: f32) -> f32 {
let tanh = at.tanh();
0.5 * (1.0 - tanh * tanh)
}
}

@ -1,12 +1,14 @@
use super::NeuraLoss; use super::NeuraLoss;
#[derive(Clone, Copy, Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]
pub struct Euclidean; pub struct Euclidean<const N: usize>;
impl<const N: usize> NeuraLoss<[f64; N]> for Euclidean {
type Out = f64; impl<const N: usize> NeuraLoss for Euclidean<N> {
type Input = [f64; N];
type Target = [f64; N]; type Target = [f64; N];
fn eval(&self, target: [f64; N], actual: [f64; N]) -> f64 { #[inline]
fn eval(&self, target: &[f64; N], actual: &[f64; N]) -> f64 {
let mut sum_squared = 0.0; let mut sum_squared = 0.0;
for i in 0..N { for i in 0..N {
@ -16,7 +18,15 @@ impl<const N: usize> NeuraLoss<[f64; N]> for Euclidean {
sum_squared * 0.5 sum_squared * 0.5
} }
fn nabla(&self, target: [f64; N], actual: [f64; N]) -> [f64; N] { #[inline]
todo!() fn nabla(&self, target: &[f64; N], actual: &[f64; N]) -> [f64; N] {
let mut res = [0.0; N];
// ∂E(y)/∂yᵢ = yᵢ - yᵢ'
for i in 0..N {
res[i] = actual[i] - target[i];
}
res
} }
} }

@ -8,13 +8,13 @@ pub trait NeuraDerivable<F> {
fn derivate(&self, at: F) -> F; fn derivate(&self, at: F) -> F;
} }
pub trait NeuraLoss<F> { pub trait NeuraLoss {
type Out; type Input;
type Target; type Target;
fn eval(&self, target: Self::Target, actual: F) -> Self::Out; fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64;
/// Should return the gradient of the loss function according to `actual` /// Should return the gradient of the loss function according to `actual`
/// ($\nabla_{\texttt{actual}} \texttt{self.eval}(\texttt{target}, \texttt{actual})$). /// ($\nabla_{\texttt{actual}} \texttt{self.eval}(\texttt{target}, \texttt{actual})$).
fn nabla(&self, target: Self::Target, actual: F) -> F; fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input;
} }

@ -1,7 +1,8 @@
use super::NeuraLayer; use super::NeuraLayer;
use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer}; use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer, algebra::NeuraVectorSpace};
use rand::Rng; use rand::Rng;
#[derive(Clone, Debug)]
pub struct NeuraDenseLayer< pub struct NeuraDenseLayer<
Act: NeuraDerivable<f64>, Act: NeuraDerivable<f64>,
const INPUT_LEN: usize, const INPUT_LEN: usize,
@ -34,7 +35,7 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
for i in 0..OUTPUT_LEN { for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN { for j in 0..INPUT_LEN {
weights[i][j] = rng.gen::<f64>() * multiplier; weights[i][j] = rng.gen_range(-multiplier..multiplier);
} }
} }
@ -88,6 +89,11 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
(new_epsilon, (weights_gradient, bias_gradient)) (new_epsilon, (weights_gradient, bias_gradient))
} }
fn apply_gradient(&mut self, gradient: &Self::Delta) {
NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0);
NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1);
}
} }
#[cfg(test)] #[cfg(test)]

@ -1,9 +1,19 @@
#![feature(generic_arg_infer)] #![feature(generic_arg_infer)]
pub mod algebra;
pub mod derivable; pub mod derivable;
pub mod layer; pub mod layer;
pub mod network; pub mod network;
pub mod train; pub mod train;
pub mod algebra;
mod utils; mod utils;
pub mod prelude {
// Macros
pub use crate::{neura_network, neura_layer};
// Structs and traits
pub use super::network::{NeuraNetwork};
pub use super::layer::{NeuraLayer, NeuraDenseLayer};
pub use super::train::{NeuraBackprop, train_batched};
}

@ -1,5 +1,10 @@
use crate::{layer::NeuraLayer, train::{NeuraTrainable, NeuraTrainableLayer}, derivable::NeuraLoss}; use crate::{
derivable::NeuraLoss,
layer::NeuraLayer,
train::{NeuraTrainable, NeuraTrainableLayer},
};
#[derive(Clone, Debug)]
pub struct NeuraNetwork<Layer: NeuraLayer, ChildNetwork> { pub struct NeuraNetwork<Layer: NeuraLayer, ChildNetwork> {
layer: Layer, layer: Layer,
child_network: ChildNetwork, child_network: ChildNetwork,
@ -62,20 +67,44 @@ impl<Layer: NeuraLayer, ChildNetwork: NeuraLayer<Input = Layer::Output>> NeuraLa
impl<Layer: NeuraTrainableLayer> NeuraTrainable for NeuraNetwork<Layer, ()> { impl<Layer: NeuraTrainableLayer> NeuraTrainable for NeuraNetwork<Layer, ()> {
type Delta = Layer::Delta; type Delta = Layer::Delta;
fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) { fn apply_gradient(&mut self, gradient: &Self::Delta) {
self.layer.apply_gradient(gradient);
}
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
&self,
input: &Self::Input,
target: &Loss::Target,
loss: Loss,
) -> (Self::Input, Self::Delta) {
let final_activation = self.layer.eval(input); let final_activation = self.layer.eval(input);
let backprop_epsilon = loss.nabla(target, final_activation); let backprop_epsilon = loss.nabla(target, &final_activation);
self.layer.backpropagate(&input, backprop_epsilon) self.layer.backpropagate(&input, backprop_epsilon)
} }
} }
impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Output>> NeuraTrainable for NeuraNetwork<Layer, ChildNetwork> { impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Output>> NeuraTrainable
for NeuraNetwork<Layer, ChildNetwork>
{
type Delta = (Layer::Delta, ChildNetwork::Delta); type Delta = (Layer::Delta, ChildNetwork::Delta);
fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) { fn apply_gradient(&mut self, gradient: &Self::Delta) {
self.layer.apply_gradient(&gradient.0);
self.child_network.apply_gradient(&gradient.1);
}
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
&self,
input: &Self::Input,
target: &Loss::Target,
loss: Loss,
) -> (Self::Input, Self::Delta) {
let next_activation = self.layer.eval(input); let next_activation = self.layer.eval(input);
let (backprop_gradient, weights_gradient) = self.child_network.backpropagate(&next_activation, target, loss); let (backprop_gradient, weights_gradient) =
let (backprop_gradient, layer_gradient) = self.layer.backpropagate(input, backprop_gradient); self.child_network
.backpropagate(&next_activation, target, loss);
let (backprop_gradient, layer_gradient) =
self.layer.backpropagate(input, backprop_gradient);
(backprop_gradient, (layer_gradient, weights_gradient)) (backprop_gradient, (layer_gradient, weights_gradient))
} }

@ -1,14 +1,13 @@
use crate::{ use crate::{
// utils::{assign_add_vector, chunked},
algebra::NeuraVectorSpace,
derivable::NeuraLoss, derivable::NeuraLoss,
layer::NeuraLayer, layer::NeuraLayer,
network::NeuraNetwork, network::NeuraNetwork,
// utils::{assign_add_vector, chunked},
algebra::NeuraAddAssign,
}; };
pub trait NeuraTrainableLayer: NeuraLayer { pub trait NeuraTrainableLayer: NeuraLayer {
type Delta: NeuraAddAssign; type Delta: NeuraVectorSpace;
/// Computes the backpropagation term and the derivative of the internal weights, /// Computes the backpropagation term and the derivative of the internal weights,
/// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer. /// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
@ -19,44 +18,134 @@ pub trait NeuraTrainableLayer: NeuraLayer {
/// ``` /// ```
/// ///
/// The function should then return a pair `(epsilon_{l-1}, δW_l)`, /// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
/// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)`. /// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`.
fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta); /// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers.
fn backpropagate(
&self,
input: &Self::Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta);
/// Applies `δW_l` to the weights of the layer
fn apply_gradient(&mut self, gradient: &Self::Delta);
} }
pub trait NeuraTrainable: NeuraLayer { pub trait NeuraTrainable: NeuraLayer {
type Delta: NeuraAddAssign; type Delta: NeuraVectorSpace;
fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta); fn apply_gradient(&mut self, gradient: &Self::Delta);
/// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information.
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
&self,
input: &Self::Input,
target: &Loss::Target,
loss: Loss,
) -> (Self::Input, Self::Delta);
} }
pub trait NeuraTrainer<F, Loss: NeuraLoss<F>> { pub trait NeuraTrainer<Output, Target = Output> {
fn get_gradient<Layer: NeuraLayer, ChildNetwork>( fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
&self, &self,
trainable: &NeuraNetwork<Layer, ChildNetwork>, trainable: &NeuraNetwork<Layer, ChildNetwork>,
input: &Layer::Input, input: &Layer::Input,
target: Loss::Target, target: &Target,
loss: Loss, ) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta
) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta where where
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = F> NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>;
;
fn score<Layer: NeuraLayer, ChildNetwork>(
&self,
trainable: &NeuraNetwork<Layer, ChildNetwork>,
input: &Layer::Input,
target: &Target,
) -> f64
where
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>;
} }
#[non_exhaustive] #[non_exhaustive]
pub struct NeuraBackprop { pub struct NeuraBackprop<Loss: NeuraLoss + Clone> {
pub epsilon: f64, loss: Loss,
pub batch_size: usize,
} }
impl<const N: usize, Loss: NeuraLoss<[f64; N]>> NeuraTrainer<[f64; N], Loss> for NeuraBackprop { impl<Loss: NeuraLoss + Clone> NeuraBackprop<Loss> {
pub fn new(loss: Loss) -> Self {
Self { loss }
}
}
impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone> NeuraTrainer<[f64; N], Loss::Target>
for NeuraBackprop<Loss>
{
fn get_gradient<Layer: NeuraLayer, ChildNetwork>( fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
&self, &self,
trainable: &NeuraNetwork<Layer, ChildNetwork>, trainable: &NeuraNetwork<Layer, ChildNetwork>,
input: &Layer::Input, input: &Layer::Input,
target: Loss::Target, target: &Loss::Target,
loss: Loss, ) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta
) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta where where
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>, NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>,
{ {
trainable.backpropagate(input, target, loss).1 trainable.backpropagate(input, target, self.loss.clone()).1
}
fn score<Layer: NeuraLayer, ChildNetwork>(
&self,
trainable: &NeuraNetwork<Layer, ChildNetwork>,
input: &Layer::Input,
target: &Loss::Target,
) -> f64
where
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>,
{
self.loss.eval(target, &trainable.eval(&input))
}
}
pub fn train_batched<
Output,
Target,
Trainer: NeuraTrainer<Output, Target>,
Layer: NeuraLayer,
ChildNetwork,
Inputs: IntoIterator<Item = (Layer::Input, Target)>,
>(
network: &mut NeuraNetwork<Layer, ChildNetwork>,
inputs: Inputs,
test_inputs: &[(Layer::Input, Target)],
trainer: Trainer,
learning_rate: f64,
batch_size: usize,
epochs: usize,
) where
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>,
Inputs::IntoIter: Clone,
{
// TODO: apply shuffling?
let mut iter = inputs.into_iter().cycle();
let factor = -learning_rate / (batch_size as f64);
'd: for epoch in 0..epochs {
let mut gradient_sum = <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
for _ in 0..batch_size {
if let Some((input, target)) = iter.next() {
let gradient = trainer.get_gradient(&network, &input, &target);
gradient_sum.add_assign(&gradient);
} else {
break 'd;
}
}
gradient_sum.mul_assign(factor);
network.apply_gradient(&gradient_sum);
let mut loss_sum = 0.0;
for (input, target) in test_inputs {
loss_sum += trainer.score(&network, input, target);
}
loss_sum /= test_inputs.len() as f64;
println!("Epoch {epoch}, Loss: {:.3}", loss_sum);
} }
} }

@ -35,7 +35,7 @@ pub(crate) fn multiply_matrix_transpose_vector<const WIDTH: usize, const HEIGHT:
pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>( pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>(
left: &[f64; HEIGHT], left: &[f64; HEIGHT],
right: &[f64; WIDTH] right: &[f64; WIDTH],
) -> [[f64; WIDTH]; HEIGHT] { ) -> [[f64; WIDTH]; HEIGHT] {
let mut result = [[0.0; WIDTH]; HEIGHT]; let mut result = [[0.0; WIDTH]; HEIGHT];

Loading…
Cancel
Save