Working backpropagation :3

main
Shad Amethyst 2 years ago
parent 7a6921a1c1
commit 8ac82e20e2

@ -0,0 +1,71 @@
#![feature(generic_arg_infer)]
use std::io::Write;
use neuramethyst::prelude::*;
use neuramethyst::derivable::activation::{Relu, Tanh, LeakyRelu};
use neuramethyst::derivable::loss::Euclidean;
use rand::Rng;
fn main() {
let mut network = neura_network![
neura_layer!("dense", LeakyRelu(0.01), 4, 2),
neura_layer!("dense", Tanh, 3),
neura_layer!("dense", Relu, 2)
];
let mut rng = rand::thread_rng();
let inputs = (0..=1).cycle().map(move |category| {
let (x, y) = if category == 0 {
let radius: f64 = rng.gen_range(0.0..1.0);
let radius = radius.sqrt();
let angle = rng.gen_range(0.0..std::f64::consts::TAU);
(angle.cos() * radius, angle.sin() * radius)
} else {
let radius: f64 = rng.gen_range(1.0..2.0);
let angle = rng.gen_range(0.0..std::f64::consts::TAU);
(angle.cos() * radius, angle.sin() * radius)
};
([x, y], one_hot::<2>(category))
});
let test_inputs: Vec<_> = inputs.clone().take(100).collect();
let mut trainer = NeuraBatchedTrainer::new(0.1, 4000);
trainer.log_epochs = 500;
trainer.train(
NeuraBackprop::new(Euclidean),
&mut network,
inputs,
&test_inputs
);
let mut file = std::fs::File::create("target/bivariate.csv").unwrap();
for (input, _target) in test_inputs {
let guess = argmax(&network.eval(&input));
writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap();
}
}
fn one_hot<const N: usize>(value: usize) -> [f64; N] {
let mut res = [0.0; N];
if value < N {
res[value] = 1.0;
}
res
}
fn argmax(array: &[f64]) -> usize {
let mut res = 0;
for n in 1..array.len() {
if array[n] > array[res] {
res = n;
}
}
res
}

@ -1,13 +1,13 @@
#![feature(generic_arg_infer)] #![feature(generic_arg_infer)]
use neuramethyst::prelude::*; use neuramethyst::prelude::*;
use neuramethyst::derivable::activation::{Relu, Tanh}; use neuramethyst::derivable::activation::{Relu};
use neuramethyst::derivable::loss::Euclidean; use neuramethyst::derivable::loss::Euclidean;
fn main() { fn main() {
let mut network = neura_network![ let mut network = neura_network![
neura_layer!("dense", Tanh, 2, 2), neura_layer!("dense", Relu, 4, 2),
neura_layer!("dense", Tanh, 3), neura_layer!("dense", Relu, 3),
neura_layer!("dense", Relu, 1) neura_layer!("dense", Relu, 1)
]; ];
@ -18,25 +18,23 @@ fn main() {
([1.0, 1.0], [0.0]) ([1.0, 1.0], [0.0])
]; ];
// println!("{:#?}", network);
for (input, target) in inputs { for (input, target) in inputs {
println!("Input: {:?}, target: {}, actual: {}", &input, target[0], network.eval(&input)[0]); println!("Input: {:?}, target: {}, actual: {:.3}", &input, target[0], network.eval(&input)[0]);
} }
train_batched( let mut trainer = NeuraBatchedTrainer::new(0.05, 1000);
trainer.batch_size = 6;
trainer.log_epochs = 250;
trainer.learning_momentum = 0.01;
trainer.train(
NeuraBackprop::new(Euclidean),
&mut network, &mut network,
inputs.clone(), cycle_shuffling(inputs.iter().cloned(), rand::thread_rng()),
&inputs, &inputs,
NeuraBackprop::new(Euclidean),
0.01,
1,
25
); );
// println!("{:#?}", network);
for (input, target) in inputs { for (input, target) in inputs {
println!("Input: {:?}, target: {}, actual: {}", &input, target[0], network.eval(&input)[0]); println!("Input: {:?}, target: {}, actual: {:.3}", &input, target[0], network.eval(&input)[0]);
} }
} }

@ -35,6 +35,50 @@ impl NeuraDerivable<f32> for Relu {
} }
} }
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct LeakyRelu(pub f64);
impl NeuraDerivable<f64> for LeakyRelu {
#[inline(always)]
fn eval(&self, input: f64) -> f64 {
if input > 0.0 {
input
} else {
self.0 * input
}
}
#[inline(always)]
fn derivate(&self, input: f64) -> f64 {
if input > 0.0 {
1.0
} else {
self.0
}
}
}
impl NeuraDerivable<f32> for LeakyRelu {
#[inline(always)]
fn eval(&self, input: f32) -> f32 {
if input > 0.0 {
input
} else {
(self.0 as f32) * input
}
}
#[inline(always)]
fn derivate(&self, input: f32) -> f32 {
if input > 0.0 {
1.0
} else {
self.0 as f32
}
}
}
#[derive(Clone, Copy, Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]
pub struct Tanh; pub struct Tanh;
@ -63,3 +107,30 @@ impl NeuraDerivable<f32> for Tanh {
0.5 * (1.0 - tanh * tanh) 0.5 * (1.0 - tanh * tanh)
} }
} }
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Linear;
impl NeuraDerivable<f64> for Linear {
#[inline(always)]
fn eval(&self, input: f64) -> f64 {
input
}
#[inline(always)]
fn derivate(&self, _at: f64) -> f64 {
1.0
}
}
impl NeuraDerivable<f32> for Linear {
#[inline(always)]
fn eval(&self, input: f32) -> f32 {
input
}
#[inline(always)]
fn derivate(&self, _at: f32) -> f32 {
1.0
}
}

@ -35,7 +35,7 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
for i in 0..OUTPUT_LEN { for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN { for j in 0..INPUT_LEN {
weights[i][j] = rng.gen_range(-multiplier..multiplier); weights[i][j] = rng.gen_range(0.0..multiplier);
} }
} }
@ -74,10 +74,10 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
// TODO: double-check the math in this // TODO: double-check the math in this
fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta) { fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta) {
let evaluated = multiply_matrix_vector(&self.weights, input); let evaluated = multiply_matrix_vector(&self.weights, input);
// Compute delta from epsilon, with `self.activation'(z) * epsilon = delta` // Compute delta from epsilon, with `self.activation'(input) ° epsilon = delta`
let mut delta = epsilon.clone(); let mut delta = epsilon.clone();
for i in 0..OUTPUT_LEN { for i in 0..OUTPUT_LEN {
delta[i] = self.activation.derivate(evaluated[i]); delta[i] *= self.activation.derivate(evaluated[i]);
} }
let weights_gradient = reverse_dot_product(&delta, input); let weights_gradient = reverse_dot_product(&delta, input);

@ -13,7 +13,8 @@ pub mod prelude {
pub use crate::{neura_network, neura_layer}; pub use crate::{neura_network, neura_layer};
// Structs and traits // Structs and traits
pub use super::network::{NeuraNetwork}; pub use crate::network::{NeuraNetwork};
pub use super::layer::{NeuraLayer, NeuraDenseLayer}; pub use crate::layer::{NeuraLayer, NeuraDenseLayer};
pub use super::train::{NeuraBackprop, train_batched}; pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer};
pub use crate::utils::cycle_shuffling;
} }

@ -3,7 +3,7 @@ use crate::{
algebra::NeuraVectorSpace, algebra::NeuraVectorSpace,
derivable::NeuraLoss, derivable::NeuraLoss,
layer::NeuraLayer, layer::NeuraLayer,
network::NeuraNetwork, network::NeuraNetwork, utils::cycle_shuffling,
}; };
pub trait NeuraTrainableLayer: NeuraLayer { pub trait NeuraTrainableLayer: NeuraLayer {
@ -44,7 +44,7 @@ pub trait NeuraTrainable: NeuraLayer {
) -> (Self::Input, Self::Delta); ) -> (Self::Input, Self::Delta);
} }
pub trait NeuraTrainer<Output, Target = Output> { pub trait NeuraGradientSolver<Output, Target = Output> {
fn get_gradient<Layer: NeuraLayer, ChildNetwork>( fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
&self, &self,
trainable: &NeuraNetwork<Layer, ChildNetwork>, trainable: &NeuraNetwork<Layer, ChildNetwork>,
@ -75,7 +75,7 @@ impl<Loss: NeuraLoss + Clone> NeuraBackprop<Loss> {
} }
} }
impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone> NeuraTrainer<[f64; N], Loss::Target> impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone> NeuraGradientSolver<[f64; N], Loss::Target>
for NeuraBackprop<Loss> for NeuraBackprop<Loss>
{ {
fn get_gradient<Layer: NeuraLayer, ChildNetwork>( fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
@ -103,35 +103,86 @@ impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone> NeuraTrainer<[f6
} }
} }
pub fn train_batched< #[non_exhaustive]
pub struct NeuraBatchedTrainer {
/// The learning rate of the gradient descent algorithm; the weights `W` will be updated as follows:
/// `W += -learning_rate * gradient_average`.
///
/// Defaults to `0.1`
pub learning_rate: f64,
/// The momentum of the gradient descent algorithm; if set to a non-zero value, then the weights `W` will be updated as follows:
/// `W += -learning_rate * gradient_average - learning_momentum * previous_gradient`.
/// This value should be smaller than `learning_rate`.
///
/// Defaults to `0.0`
pub learning_momentum: f64,
/// How many gradient computations to average before updating the weights
pub batch_size: usize,
/// How many batches to run for; if `epochs * batch_size` exceeds the input length, then training will stop.
/// You should use `cycle_shuffling` from the `prelude` module to avoid this.
pub epochs: usize,
/// The trainer will log progress at every multiple of `log_epochs` steps.
/// If `log_epochs` is zero (default), then no progress will be logged.
///
/// The test inputs is used to measure the score of the network.
pub log_epochs: usize,
}
impl Default for NeuraBatchedTrainer {
fn default() -> Self {
Self {
learning_rate: 0.1,
learning_momentum: 0.0,
batch_size: 100,
epochs: 100,
log_epochs: 0,
}
}
}
impl NeuraBatchedTrainer {
pub fn new(learning_rate: f64, epochs: usize) -> Self {
Self {
learning_rate,
epochs,
..Default::default()
}
}
pub fn train<
Output, Output,
Target, Target: Clone,
Trainer: NeuraTrainer<Output, Target>, GradientSolver: NeuraGradientSolver<Output, Target>,
Layer: NeuraLayer, Layer: NeuraLayer,
ChildNetwork, ChildNetwork,
Inputs: IntoIterator<Item = (Layer::Input, Target)>, Inputs: IntoIterator<Item = (Layer::Input, Target)>,
>( >(
&self,
gradient_solver: GradientSolver,
network: &mut NeuraNetwork<Layer, ChildNetwork>, network: &mut NeuraNetwork<Layer, ChildNetwork>,
inputs: Inputs, inputs: Inputs,
test_inputs: &[(Layer::Input, Target)], test_inputs: &[(Layer::Input, Target)],
trainer: Trainer, ) where
learning_rate: f64,
batch_size: usize,
epochs: usize,
) where
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>, NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>,
Inputs::IntoIter: Clone, Layer::Input: Clone,
{ {
// TODO: apply shuffling? // TODO: apply shuffling?
let mut iter = inputs.into_iter().cycle(); let mut iter = inputs.into_iter();
let factor = -learning_rate / (batch_size as f64); let factor = -self.learning_rate / (self.batch_size as f64);
let momentum_factor = self.learning_momentum / self.learning_rate;
'd: for epoch in 0..epochs { // Contains `momentum_factor * factor * gradient_sum_previous_iter`
let mut previous_gradient_sum = <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
'd: for epoch in 0..self.epochs {
let mut gradient_sum = <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero(); let mut gradient_sum = <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
for _ in 0..batch_size { for _ in 0..self.batch_size {
if let Some((input, target)) = iter.next() { if let Some((input, target)) = iter.next() {
let gradient = trainer.get_gradient(&network, &input, &target); let gradient = gradient_solver.get_gradient(&network, &input, &target);
gradient_sum.add_assign(&gradient); gradient_sum.add_assign(&gradient);
} else { } else {
break 'd; break 'd;
@ -141,11 +192,48 @@ pub fn train_batched<
gradient_sum.mul_assign(factor); gradient_sum.mul_assign(factor);
network.apply_gradient(&gradient_sum); network.apply_gradient(&gradient_sum);
if self.learning_momentum != 0.0 {
network.apply_gradient(&previous_gradient_sum);
previous_gradient_sum = gradient_sum;
previous_gradient_sum.mul_assign(momentum_factor);
}
if self.log_epochs > 0 && (epoch + 1) % self.log_epochs == 0 {
let mut loss_sum = 0.0; let mut loss_sum = 0.0;
for (input, target) in test_inputs { for (input, target) in test_inputs {
loss_sum += trainer.score(&network, input, target); loss_sum += gradient_solver.score(&network, input, target);
} }
loss_sum /= test_inputs.len() as f64; loss_sum /= test_inputs.len() as f64;
println!("Epoch {epoch}, Loss: {:.3}", loss_sum); println!("Epoch {}, Loss: {:.3}", epoch + 1, loss_sum);
}
}
}
}
#[cfg(test)]
mod test {
use crate::{layer::NeuraDenseLayer, derivable::{activation::Linear, loss::Euclidean}};
use super::*;
#[test]
fn test_backpropagation_simple() {
for wa in [0.0, 0.25, 0.5, 1.0] {
for wb in [0.0, 0.25, 0.5, 1.0] {
let network = NeuraNetwork::new(
NeuraDenseLayer::new([[wa, wb]], [0.0], Linear),
()
);
let gradient = NeuraBackprop::new(Euclidean).get_gradient(
&network,
&[1.0, 1.0],
&[0.0]
);
let expected = wa + wb;
assert!((gradient.0[0][0] - expected) < 0.001);
assert!((gradient.0[0][1] - expected) < 0.001);
}
}
} }
} }

@ -54,16 +54,12 @@ pub(crate) fn assign_add_vector<const N: usize>(sum: &mut [f64; N], operand: &[f
} }
} }
pub(crate) fn chunked<I: Iterator>( struct Chunked<J: Iterator> {
iter: I,
chunk_size: usize,
) -> impl Iterator<Item = Vec<I::Item>> {
struct Chunked<J: Iterator> {
iter: J, iter: J,
chunk_size: usize, chunk_size: usize,
} }
impl<J: Iterator> Iterator for Chunked<J> { impl<J: Iterator> Iterator for Chunked<J> {
type Item = Vec<J::Item>; type Item = Vec<J::Item>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
@ -83,7 +79,66 @@ pub(crate) fn chunked<I: Iterator>(
None None
} }
} }
} }
pub(crate) fn chunked<I: Iterator>(
iter: I,
chunk_size: usize,
) -> impl Iterator<Item = Vec<I::Item>> {
Chunked { iter, chunk_size } Chunked { iter, chunk_size }
} }
struct ShuffleCycled<I: Iterator, R: rand::Rng> {
buffer: Vec<I::Item>,
index: usize,
iter: I,
rng: R,
}
impl<I: Iterator, R: rand::Rng> Iterator for ShuffleCycled<I, R> where I::Item: Clone {
type Item = I::Item;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
use rand::prelude::SliceRandom;
if let Some(next) = self.iter.next() {
// Base iterator is not empty yet
self.buffer.push(next.clone());
return Some(next)
} else if self.buffer.len() > 0 {
if self.index == 0 {
// Shuffle the vector and return the first element, setting the index to 1
self.buffer.shuffle(&mut self.rng);
self.index = 1;
Some(self.buffer[0].clone())
} else {
// Keep consuming the shuffled vector
let res = self.buffer[self.index].clone();
self.index = (self.index + 1) % self.buffer.len();
Some(res)
}
} else {
None
}
}
}
pub fn cycle_shuffling<I: Iterator>(
iter: I,
rng: impl rand::Rng
) -> impl Iterator<Item=I::Item>
where
I::Item: Clone
{
let size_hint = iter.size_hint();
let size_hint = size_hint.1.unwrap_or(size_hint.0).max(1);
ShuffleCycled {
buffer: Vec::with_capacity(size_hint),
index: 0,
iter,
rng
}
}

Loading…
Cancel
Save