🎨 Clean things up, add unit tests, add one hot layer

main
Shad Amethyst 2 years ago
parent bca56a5557
commit b4a97694a6

@ -10,3 +10,7 @@ ndarray = "^0.15"
# num-traits = "0.2.15" # num-traits = "0.2.15"
rand = "^0.8" rand = "^0.8"
rand_distr = "0.4.3" rand_distr = "0.4.3"
[dev-dependencies]
image = "0.24.6"
viuer = "0.6.2"

@ -2,85 +2,112 @@
use std::io::Write; use std::io::Write;
use neuramethyst::derivable::activation::Linear;
#[allow(unused_imports)] #[allow(unused_imports)]
use neuramethyst::derivable::activation::{LeakyRelu, Relu, Tanh}; use neuramethyst::derivable::activation::{LeakyRelu, Linear, Relu, Tanh};
use neuramethyst::derivable::loss::Euclidean; use neuramethyst::derivable::loss::CrossEntropy;
use neuramethyst::derivable::regularize::NeuraElastic; use neuramethyst::derivable::regularize::NeuraL1;
use neuramethyst::prelude::*; use neuramethyst::prelude::*;
use rand::Rng; use rand::Rng;
fn main() { fn main() {
let mut network = neura_network![ let mut network = neura_network![
neura_layer!("dense", 2, 8; LeakyRelu(0.01)), neura_layer!("dense", 2, 8; Relu, NeuraL1(0.001)),
neura_layer!("dropout", 0.1), neura_layer!("dropout", 0.25),
neura_layer!("dense", 8; LeakyRelu(0.01), NeuraElastic::new(0.0001, 0.002)), neura_layer!("dense", 2; Linear, NeuraL1(0.001)),
neura_layer!("dropout", 0.3),
neura_layer!("dense", 8; LeakyRelu(0.01), NeuraElastic::new(0.0001, 0.002)),
neura_layer!("dropout", 0.1),
neura_layer!("dense", 4; LeakyRelu(0.1), NeuraElastic::new(0.0001, 0.002)),
neura_layer!("dense", 2; Linear),
neura_layer!("softmax"), neura_layer!("softmax"),
]; ];
// println!("{:#?}", network);
let mut rng = rand::thread_rng(); let inputs = (0..1).cycle().map(move |_| {
let inputs = (0..=1).cycle().map(move |category| { let mut rng = rand::thread_rng(); // TODO: move out
let category = rng.gen_bool(0.5) as usize;
let (x, y) = if category == 0 { let (x, y) = if category == 0 {
let radius: f64 = rng.gen_range(0.0..1.0); let radius: f64 = rng.gen_range(0.0..2.0);
let radius = radius.sqrt();
let angle = rng.gen_range(0.0..std::f64::consts::TAU); let angle = rng.gen_range(0.0..std::f64::consts::TAU);
(angle.cos() * radius, angle.sin() * radius) (angle.cos() * radius, angle.sin() * radius)
} else { } else {
let radius: f64 = rng.gen_range(1.0..2.0); let radius: f64 = rng.gen_range(3.0..5.0);
let angle = rng.gen_range(0.0..std::f64::consts::TAU); let angle = rng.gen_range(0.0..std::f64::consts::TAU);
(angle.cos() * radius, angle.sin() * radius) (angle.cos() * radius, angle.sin() * radius)
}; };
([x, y], one_hot::<2>(category)) ([x, y], neuramethyst::one_hot::<2>(category))
}); });
let test_inputs: Vec<_> = inputs.clone().take(100).collect(); let test_inputs: Vec<_> = inputs.clone().take(10).collect();
let mut trainer = NeuraBatchedTrainer::new(0.25, 1000); if std::env::args().any(|arg| arg == "draw") {
trainer.log_epochs = 50; for epoch in 0..200 {
trainer.learning_momentum = 0.05; let mut trainer = NeuraBatchedTrainer::new(0.03, 10);
trainer.batch_size = 2000; trainer.batch_size = 10;
trainer.train( trainer.train(
NeuraBackprop::new(Euclidean), NeuraBackprop::new(CrossEntropy),
&mut network, &mut network,
inputs, inputs.clone(),
&test_inputs, &test_inputs,
); );
let network = network.clone();
draw_neuron_activation(|input| network.eval(&input).into_iter().collect(), 6.0);
println!("{}", epoch);
std::thread::sleep(std::time::Duration::new(0, 50_000_000));
}
} else {
let mut trainer = NeuraBatchedTrainer::new(0.03, 20 * 50);
trainer.batch_size = 10;
trainer.log_iterations = 20;
trainer.train(
NeuraBackprop::new(CrossEntropy),
&mut network,
inputs.clone(),
&test_inputs,
);
// println!("{}", String::from("\n").repeat(64));
// draw_neuron_activation(|input| network.eval(&input).into_iter().collect(), 6.0);
}
let mut file = std::fs::File::create("target/bivariate.csv").unwrap(); let mut file = std::fs::File::create("target/bivariate.csv").unwrap();
for (input, _target) in test_inputs { for (input, _target) in test_inputs {
let guess = argmax(&network.eval(&input)); let guess = neuramethyst::argmax(&network.eval(&input));
writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap(); writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap();
// println!("{:?}", network.eval(&input));
} }
// println!("{:#?}", network);
} }
fn one_hot<const N: usize>(value: usize) -> [f64; N] { // TODO: move this to the library?
let mut res = [0.0; N]; fn draw_neuron_activation<F: Fn([f64; 2]) -> Vec<f64>>(callback: F, scale: f64) {
if value < N { use viuer::Config;
res[value] = 1.0;
const WIDTH: u32 = 64;
const HEIGHT: u32 = 64;
let mut image = image::RgbImage::new(WIDTH, HEIGHT);
fn sigmoid(x: f64) -> f64 {
1.0 / (1.0 + (-x * 3.0).exp())
} }
res
}
fn argmax(array: &[f64]) -> usize { for y in 0..HEIGHT {
let mut res = 0; let y2 = 2.0 * y as f64 / HEIGHT as f64 - 1.0;
for x in 0..WIDTH {
let x2 = 2.0 * x as f64 / WIDTH as f64 - 1.0;
let activation = callback([x2 * scale, y2 * scale]);
let r = (sigmoid(activation.get(0).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
let g = (sigmoid(activation.get(1).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
let b = (sigmoid(activation.get(2).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
for n in 1..array.len() { *image.get_pixel_mut(x, y) = image::Rgb([r, g, b]);
if array[n] > array[res] {
res = n;
} }
} }
res let config = Config {
use_kitty: false,
// absolute_offset: false,
..Default::default()
};
viuer::print(&image::DynamicImage::ImageRgb8(image), &config).unwrap();
} }

@ -29,7 +29,7 @@ fn main() {
let mut trainer = NeuraBatchedTrainer::new(0.05, 1000); let mut trainer = NeuraBatchedTrainer::new(0.05, 1000);
trainer.batch_size = 6; trainer.batch_size = 6;
trainer.log_epochs = 250; trainer.log_iterations = 250;
trainer.learning_momentum = 0.01; trainer.learning_momentum = 0.01;
trainer.train( trainer.train(

@ -5,6 +5,8 @@ pub trait NeuraVectorSpace {
fn mul_assign(&mut self, by: f64); fn mul_assign(&mut self, by: f64);
fn zero() -> Self; fn zero() -> Self;
fn norm_squared(&self) -> f64;
} }
impl NeuraVectorSpace for () { impl NeuraVectorSpace for () {
@ -22,6 +24,10 @@ impl NeuraVectorSpace for () {
fn zero() -> Self { fn zero() -> Self {
() ()
} }
fn norm_squared(&self) -> f64 {
0.0
}
} }
impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left, Right) { impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left, Right) {
@ -38,6 +44,10 @@ impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left
fn zero() -> Self { fn zero() -> Self {
(Left::zero(), Right::zero()) (Left::zero(), Right::zero())
} }
fn norm_squared(&self) -> f64 {
self.0.norm_squared() + self.1.norm_squared()
}
} }
impl<const N: usize, T: NeuraVectorSpace + Clone> NeuraVectorSpace for [T; N] { impl<const N: usize, T: NeuraVectorSpace + Clone> NeuraVectorSpace for [T; N] {
@ -65,6 +75,10 @@ impl<const N: usize, T: NeuraVectorSpace + Clone> NeuraVectorSpace for [T; N] {
unreachable!() unreachable!()
}) })
} }
fn norm_squared(&self) -> f64 {
self.iter().map(T::norm_squared).sum()
}
} }
macro_rules! base { macro_rules! base {
@ -81,6 +95,10 @@ macro_rules! base {
fn zero() -> Self { fn zero() -> Self {
<Self as Default>::default() <Self as Default>::default()
} }
fn norm_squared(&self) -> f64 {
(self * self) as f64
}
} }
}; };
} }

@ -1,135 +1,108 @@
use super::NeuraDerivable; #![allow(unused_variables)]
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Relu;
impl NeuraDerivable<f64> for Relu { use super::NeuraDerivable;
#[inline(always)]
fn eval(&self, input: f64) -> f64 {
input.max(0.0)
}
#[inline(always)] macro_rules! impl_derivable {
fn derivate(&self, input: f64) -> f64 { ( $type_f32:ty, $type_f64:ty, $self:ident, $variable:ident, $eval:expr, $derivate:expr $(; $variance_hint:expr, $bias_hint:expr )? ) => {
if input > 0.0 { impl NeuraDerivable<f32> for $type_f32 {
1.0 #[inline(always)]
} else { fn eval($self: &Self, $variable: f32) -> f32 {
0.0 $eval
}
#[inline(always)]
fn derivate($self: &Self, $variable: f32) -> f32 {
$derivate
}
$(
#[inline(always)]
fn variance_hint($self: &Self) -> f64 {
$variance_hint
}
#[inline(always)]
fn bias_hint($self: &Self) -> f64 {
$bias_hint
}
)?
} }
}
}
impl NeuraDerivable<f32> for Relu { impl NeuraDerivable<f64> for $type_f64 {
#[inline(always)] #[inline(always)]
fn eval(&self, input: f32) -> f32 { fn eval($self: &Self, $variable: f64) -> f64 {
input.max(0.0) $eval
} }
#[inline(always)] #[inline(always)]
fn derivate(&self, input: f32) -> f32 { fn derivate($self: &Self, $variable: f64) -> f64 {
if input > 0.0 { $derivate
1.0 }
} else {
0.0 $(
#[inline(always)]
fn variance_hint($self: &Self) -> f64 {
$variance_hint
}
#[inline(always)]
fn bias_hint($self: &Self) -> f64 {
$bias_hint
}
)?
} }
} };
( $type:ty, $variable:ident, $eval:expr, $derivate:expr $(; $variance_hint:expr, $bias_hint:expr )? ) => {
impl_derivable!($type, $type, self, $variable, $eval, $derivate $(; $variance_hint, $bias_hint)?);
};
} }
#[derive(Clone, Copy, Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]
pub struct LeakyRelu<F>(pub F); pub struct Relu;
impl NeuraDerivable<f64> for LeakyRelu<f64> { impl_derivable!(Relu, x, x.max(0.0), {
#[inline(always)] if x > 0.0 {
fn eval(&self, input: f64) -> f64 { 1.0
if input > 0.0 { } else {
input 0.0
} else {
self.0 * input
}
} }
}; 2.0, 0.1);
#[inline(always)] #[derive(Clone, Copy, Debug, PartialEq)]
fn derivate(&self, input: f64) -> f64 { pub struct LeakyRelu<F>(pub F);
if input > 0.0 {
1.0
} else {
self.0
}
}
}
impl NeuraDerivable<f32> for LeakyRelu<f32> { impl_derivable!(
#[inline(always)] LeakyRelu<f32>,
fn eval(&self, input: f32) -> f32 { LeakyRelu<f64>,
if input > 0.0 { self,
input x,
{
if x > 0.0 {
x
} else { } else {
self.0 * input self.0 * x
} }
} },
{
#[inline(always)] if x > 0.0 {
fn derivate(&self, input: f32) -> f32 {
if input > 0.0 {
1.0 1.0
} else { } else {
self.0 self.0
} }
} };
} 2.0, 0.1
);
#[derive(Clone, Copy, Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]
pub struct Tanh; pub struct Tanh;
impl NeuraDerivable<f64> for Tanh { impl_derivable!(Tanh, x, x.tanh(), {
#[inline(always)] let y = x.tanh();
fn eval(&self, input: f64) -> f64 { 1.0 - y * y
0.5 * input.tanh() + 0.5 });
}
#[inline(always)]
fn derivate(&self, at: f64) -> f64 {
let tanh = at.tanh();
0.5 * (1.0 - tanh * tanh)
}
}
impl NeuraDerivable<f32> for Tanh {
#[inline(always)]
fn eval(&self, input: f32) -> f32 {
0.5 * input.tanh() + 0.5
}
#[inline(always)]
fn derivate(&self, at: f32) -> f32 {
let tanh = at.tanh();
0.5 * (1.0 - tanh * tanh)
}
}
#[derive(Clone, Copy, Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]
pub struct Linear; pub struct Linear;
impl NeuraDerivable<f64> for Linear { impl_derivable!(Linear, x, x, 1.0);
#[inline(always)]
fn eval(&self, input: f64) -> f64 {
input
}
#[inline(always)]
fn derivate(&self, _at: f64) -> f64 {
1.0
}
}
impl NeuraDerivable<f32> for Linear {
#[inline(always)]
fn eval(&self, input: f32) -> f32 {
input
}
#[inline(always)]
fn derivate(&self, _at: f32) -> f32 {
1.0
}
}

@ -30,3 +30,56 @@ impl<const N: usize> NeuraLoss for Euclidean<N> {
res res
} }
} }
/// The cross-entropy loss function, defined as `L(y, ŷ) = -Σᵢ(yᵢ*ln(ŷᵢ))`.
///
/// This version of the cross-entropy function does not make assumptions about the target vector being one-hot encoded.
///
/// This function requires that `ŷ` (the output of the neural network) is in `[0; 1]^n`.
/// This guarantee is notably not given by the `Relu`, `LeakyRelu` and `Swish` activation functions,
/// so you should pick another activation on the last layer, or pass it into a `NeuraSoftmax` layer.
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct CrossEntropy<const N: usize>;
const DERIVATIVE_CAP: f64 = 100.0;
const LOG_MIN: f64 = 0.00001;
impl<const N: usize> CrossEntropy<N> {
#[inline(always)]
pub fn eval_single(&self, target: f64, actual: f64) -> f64 {
-target * actual.max(LOG_MIN).log(std::f64::consts::E)
}
#[inline(always)]
pub fn derivate_single(&self, target: f64, actual: f64) -> f64 {
-(target / actual).min(DERIVATIVE_CAP)
}
}
impl<const N: usize> NeuraLoss for CrossEntropy<N> {
type Input = [f64; N];
type Target = [f64; N];
fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64 {
let mut result = 0.0;
for i in 0..N {
result += self.eval_single(target[i], actual[i]);
}
result
}
fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input {
let mut result = [0.0; N];
for i in 0..N {
result[i] = self.derivate_single(target[i], actual[i]);
}
result
}
}
// TODO: a one-hot encoded, CrossEntropy + Softmax loss function?
// It would be a lot more efficient than the current method

@ -7,6 +7,18 @@ pub trait NeuraDerivable<F> {
/// Should return the derivative of `self.eval(input)` /// Should return the derivative of `self.eval(input)`
fn derivate(&self, at: F) -> F; fn derivate(&self, at: F) -> F;
/// Should return a hint for how much the variance for a random initialization should be
#[inline(always)]
fn variance_hint(&self) -> f64 {
1.0
}
/// Should return a hint for what the default bias value should be
#[inline(always)]
fn bias_hint(&self) -> f64 {
0.0
}
} }
pub trait NeuraLoss { pub trait NeuraLoss {

@ -73,7 +73,7 @@ pub struct NeuraL2<F>(pub F);
impl NeuraDerivable<f64> for NeuraL2<f64> { impl NeuraDerivable<f64> for NeuraL2<f64> {
#[inline(always)] #[inline(always)]
fn eval(&self, input: f64) -> f64 { fn eval(&self, input: f64) -> f64 {
self.0 * (input * input) (0.5 * self.0) * (input * input)
} }
#[inline(always)] #[inline(always)]
@ -85,7 +85,7 @@ impl NeuraDerivable<f64> for NeuraL2<f64> {
impl NeuraDerivable<f32> for NeuraL2<f32> { impl NeuraDerivable<f32> for NeuraL2<f32> {
#[inline(always)] #[inline(always)]
fn eval(&self, input: f32) -> f32 { fn eval(&self, input: f32) -> f32 {
self.0 * (input * input) (0.5 * self.0) * (input * input)
} }
#[inline(always)] #[inline(always)]

@ -46,8 +46,14 @@ impl<
pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self { pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self {
let mut weights = [[0.0; INPUT_LEN]; OUTPUT_LEN]; let mut weights = [[0.0; INPUT_LEN]; OUTPUT_LEN];
let distribution = // Use Xavier (or He) initialisation, using the harmonic mean
rand_distr::Normal::new(0.0, 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64)).unwrap(); // Ref: https://www.deeplearning.ai/ai-notes/initialization/index.html
let distribution = rand_distr::Normal::new(
0.0,
activation.variance_hint() * 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64),
)
.unwrap();
// let distribution = rand_distr::Uniform::new(-0.5, 0.5);
for i in 0..OUTPUT_LEN { for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN { for j in 0..INPUT_LEN {
@ -57,8 +63,8 @@ impl<
Self { Self {
weights, weights,
// Biases are zero-initialized, as this shouldn't cause any issues during training // Biases are initialized based on the activation's hint
bias: [0.0; OUTPUT_LEN], bias: [activation.bias_hint(); OUTPUT_LEN],
activation, activation,
regularization, regularization,
} }
@ -96,20 +102,22 @@ impl<
{ {
type Delta = ([[f64; INPUT_LEN]; OUTPUT_LEN], [f64; OUTPUT_LEN]); type Delta = ([[f64; INPUT_LEN]; OUTPUT_LEN], [f64; OUTPUT_LEN]);
// TODO: double-check the math in this
fn backpropagate( fn backpropagate(
&self, &self,
input: &Self::Input, input: &Self::Input,
epsilon: Self::Output, epsilon: Self::Output,
) -> (Self::Input, Self::Delta) { ) -> (Self::Input, Self::Delta) {
let evaluated = multiply_matrix_vector(&self.weights, input); let evaluated = multiply_matrix_vector(&self.weights, input);
// Compute delta from epsilon, with `self.activation'(input) ° epsilon = delta` // Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron),
// with `self.activation'(input) ° epsilon = delta`
let mut delta = epsilon.clone(); let mut delta = epsilon.clone();
for i in 0..OUTPUT_LEN { for i in 0..OUTPUT_LEN {
delta[i] *= self.activation.derivate(evaluated[i]); delta[i] *= self.activation.derivate(evaluated[i]);
} }
// Compute the weight gradient
let weights_gradient = reverse_dot_product(&delta, input); let weights_gradient = reverse_dot_product(&delta, input);
// According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation // According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
// The gradient of the bias is equal to the delta term of the backpropagation algorithm // The gradient of the bias is equal to the delta term of the backpropagation algorithm
let bias_gradient = delta; let bias_gradient = delta;

@ -7,6 +7,9 @@ pub use dropout::NeuraDropoutLayer;
mod softmax; mod softmax;
pub use softmax::NeuraSoftmaxLayer; pub use softmax::NeuraSoftmaxLayer;
mod one_hot;
pub use one_hot::NeuraOneHotLayer;
pub trait NeuraLayer { pub trait NeuraLayer {
type Input; type Input;
type Output; type Output;
@ -46,4 +49,8 @@ macro_rules! neura_layer {
( "softmax", $length:expr ) => { ( "softmax", $length:expr ) => {
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<$length> $crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<$length>
}; };
( "one_hot" ) => {
$crate::layer::NeuraOneHotLayer as $crate::layer::NeuraOneHotLayer<2, _>
};
} }

@ -0,0 +1,61 @@
use crate::train::NeuraTrainableLayer;
use super::NeuraLayer;
/// A special layer that allows you to split a vector into one-hot vectors
#[derive(Debug, Clone, PartialEq)]
pub struct NeuraOneHotLayer<const CATS: usize, const LENGTH: usize>;
impl<const CATS: usize, const LENGTH: usize> NeuraLayer for NeuraOneHotLayer<CATS, LENGTH>
where
[(); LENGTH * CATS]: Sized,
{
type Input = [f64; LENGTH];
type Output = [f64; LENGTH * CATS];
fn eval(&self, input: &Self::Input) -> Self::Output {
let mut res = [0.0; LENGTH * CATS];
for i in 0..LENGTH {
let cat_low = input[i].floor().max(0.0).min(CATS as f64 - 2.0);
let amount = (input[i] - cat_low).max(0.0).min(1.0);
let cat_low = cat_low as usize;
res[i * LENGTH + cat_low] = 1.0 - amount;
res[i * LENGTH + cat_low + 1] = amount;
}
res
}
}
impl<const CATS: usize, const LENGTH: usize> NeuraTrainableLayer for NeuraOneHotLayer<CATS, LENGTH>
where
[(); LENGTH * CATS]: Sized,
{
type Delta = ();
fn backpropagate(
&self,
input: &Self::Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta) {
let mut res = [0.0; LENGTH];
for i in 0..LENGTH {
let cat_low = input[i].floor().max(0.0).min(CATS as f64 - 2.0) as usize;
let epsilon = -epsilon[i * LENGTH + cat_low] + epsilon[i * LENGTH + cat_low + 1];
// Scale epsilon by how many entries were ignored
res[i] = epsilon * CATS as f64 / 2.0;
}
(res, ())
}
fn regularize(&self) -> Self::Delta {
()
}
fn apply_gradient(&mut self, _gradient: &Self::Delta) {
// Noop
}
}

@ -1,4 +1,6 @@
#![feature(generic_arg_infer)] #![feature(generic_arg_infer)]
#![feature(generic_associated_types)]
#![feature(generic_const_exprs)]
pub mod algebra; pub mod algebra;
pub mod derivable; pub mod derivable;
@ -8,13 +10,16 @@ pub mod train;
mod utils; mod utils;
// TODO: move to a different file
pub use utils::{argmax, one_hot};
pub mod prelude { pub mod prelude {
// Macros // Macros
pub use crate::{neura_layer, neura_network}; pub use crate::{neura_layer, neura_network};
// Structs and traits // Structs and traits
pub use crate::layer::{NeuraDenseLayer, NeuraDropoutLayer, NeuraLayer}; pub use crate::layer::{NeuraDenseLayer, NeuraDropoutLayer, NeuraLayer};
pub use crate::network::NeuraNetwork; pub use crate::network::{NeuraNetwork, NeuraNetworkTail};
pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer}; pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer};
pub use crate::utils::cycle_shuffling; pub use crate::utils::cycle_shuffling;
} }

@ -6,8 +6,8 @@ use crate::{
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct NeuraNetwork<Layer: NeuraLayer, ChildNetwork> { pub struct NeuraNetwork<Layer: NeuraLayer, ChildNetwork> {
layer: Layer, pub layer: Layer,
child_network: ChildNetwork, pub child_network: ChildNetwork,
} }
impl<Layer: NeuraLayer, ChildNetwork> NeuraNetwork<Layer, ChildNetwork> { impl<Layer: NeuraLayer, ChildNetwork> NeuraNetwork<Layer, ChildNetwork> {
@ -25,20 +25,66 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraNetwork<Layer, ChildNetwork> {
Self::new(layer, child_network) Self::new(layer, child_network)
} }
pub fn child_network(&self) -> &ChildNetwork { pub fn trim_front(self) -> ChildNetwork {
&self.child_network self.child_network
} }
pub fn layer(&self) -> &Layer { pub fn push_front<T: NeuraLayer>(self, layer: T) -> NeuraNetwork<T, Self> {
&self.layer NeuraNetwork {
layer: layer,
child_network: self,
}
} }
} }
impl<Layer: NeuraLayer> From<Layer> for NeuraNetwork<Layer, ()> { /// Operations on the tail end of the network
fn from(layer: Layer) -> Self { pub trait NeuraNetworkTail {
Self { type TailTrimmed;
layer, type TailPushed<T: NeuraLayer>;
child_network: (),
fn trim_tail(self) -> Self::TailTrimmed;
fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T>;
}
// Trimming the last layer returns an empty network
impl<Layer: NeuraLayer> NeuraNetworkTail for NeuraNetwork<Layer, ()> {
type TailTrimmed = ();
type TailPushed<T: NeuraLayer> = NeuraNetwork<Layer, NeuraNetwork<T, ()>>;
fn trim_tail(self) -> Self::TailTrimmed {
()
}
fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T> {
NeuraNetwork {
layer: self.layer,
child_network: NeuraNetwork {
layer,
child_network: (),
},
}
}
}
// Trimming another layer returns a network which calls trim recursively
impl<Layer: NeuraLayer, ChildNetwork: NeuraNetworkTail> NeuraNetworkTail
for NeuraNetwork<Layer, ChildNetwork>
{
type TailTrimmed = NeuraNetwork<Layer, <ChildNetwork as NeuraNetworkTail>::TailTrimmed>;
type TailPushed<T: NeuraLayer> =
NeuraNetwork<Layer, <ChildNetwork as NeuraNetworkTail>::TailPushed<T>>;
fn trim_tail(self) -> Self::TailTrimmed {
NeuraNetwork {
layer: self.layer,
child_network: self.child_network.trim_tail(),
}
}
fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T> {
NeuraNetwork {
layer: self.layer,
child_network: self.child_network.push_tail(layer),
} }
} }
} }
@ -136,6 +182,15 @@ impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Out
} }
} }
impl<Layer: NeuraLayer> From<Layer> for NeuraNetwork<Layer, ()> {
fn from(layer: Layer) -> Self {
Self {
layer,
child_network: (),
}
}
}
#[macro_export] #[macro_export]
macro_rules! neura_network { macro_rules! neura_network {
[] => { [] => {
@ -143,11 +198,11 @@ macro_rules! neura_network {
}; };
[ $layer:expr $(,)? ] => { [ $layer:expr $(,)? ] => {
NeuraNetwork::from($layer) $crate::network::NeuraNetwork::from($layer)
}; };
[ $first:expr, $($rest:expr),+ $(,)? ] => { [ $first:expr, $($rest:expr),+ $(,)? ] => {
NeuraNetwork::new_match_output($first, neura_network![$($rest),+]) $crate::network::NeuraNetwork::new_match_output($first, neura_network![$($rest),+])
}; };
} }
@ -159,8 +214,6 @@ mod test {
neura_layer, neura_layer,
}; };
use super::*;
#[test] #[test]
fn test_neura_network_macro() { fn test_neura_network_macro() {
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();

@ -2,7 +2,7 @@ use crate::{
algebra::NeuraVectorSpace, derivable::NeuraLoss, layer::NeuraLayer, network::NeuraNetwork, algebra::NeuraVectorSpace, derivable::NeuraLoss, layer::NeuraLayer, network::NeuraNetwork,
}; };
// TODO: move this to layer/mod.rs // TODO: move this trait to layer/mod.rs
pub trait NeuraTrainableLayer: NeuraLayer { pub trait NeuraTrainableLayer: NeuraLayer {
type Delta: NeuraVectorSpace; type Delta: NeuraVectorSpace;
@ -29,7 +29,7 @@ pub trait NeuraTrainableLayer: NeuraLayer {
/// Applies `δW_l` to the weights of the layer /// Applies `δW_l` to the weights of the layer
fn apply_gradient(&mut self, gradient: &Self::Delta); fn apply_gradient(&mut self, gradient: &Self::Delta);
/// Called before an epoch begins, to allow the layer to set itself up for training. /// Called before an iteration begins, to allow the layer to set itself up for training.
#[inline(always)] #[inline(always)]
fn prepare_epoch(&mut self) {} fn prepare_epoch(&mut self) {}
@ -54,7 +54,7 @@ pub trait NeuraTrainable: NeuraLayer {
/// Should return the regularization gradient /// Should return the regularization gradient
fn regularize(&self) -> Self::Delta; fn regularize(&self) -> Self::Delta;
/// Called before an epoch begins, to allow the network to set itself up for training. /// Called before an iteration begins, to allow the network to set itself up for training.
fn prepare_epoch(&mut self); fn prepare_epoch(&mut self);
/// Called at the end of training, to allow the network to clean itself up /// Called at the end of training, to allow the network to clean itself up
@ -116,7 +116,8 @@ impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone>
where where
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>, NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>,
{ {
self.loss.eval(target, &trainable.eval(&input)) let output = trainable.eval(&input);
self.loss.eval(target, &output)
} }
} }
@ -138,15 +139,17 @@ pub struct NeuraBatchedTrainer {
/// How many gradient computations to average before updating the weights /// How many gradient computations to average before updating the weights
pub batch_size: usize, pub batch_size: usize,
/// How many batches to run for; if `epochs * batch_size` exceeds the input length, then training will stop. /// How many batches to run for; if `iterations * batch_size` exceeds the input length, then training will stop.
/// You should use `cycle_shuffling` from the `prelude` module to avoid this. /// You should use `cycle_shuffling` from the `prelude` module to avoid this.
pub epochs: usize, ///
/// Note that this is different from epochs, which count how many times the dataset has been fully iterated over.
pub iterations: usize,
/// The trainer will log progress at every multiple of `log_epochs` steps. /// The trainer will log progress at every multiple of `log_iterations` iterations.
/// If `log_epochs` is zero (default), then no progress will be logged. /// If `log_iterations` is zero (default), then no progress will be logged.
/// ///
/// The test inputs is used to measure the score of the network. /// The test inputs is used to measure the score of the network.
pub log_epochs: usize, pub log_iterations: usize,
} }
impl Default for NeuraBatchedTrainer { impl Default for NeuraBatchedTrainer {
@ -155,17 +158,17 @@ impl Default for NeuraBatchedTrainer {
learning_rate: 0.1, learning_rate: 0.1,
learning_momentum: 0.0, learning_momentum: 0.0,
batch_size: 100, batch_size: 100,
epochs: 100, iterations: 100,
log_epochs: 0, log_iterations: 0,
} }
} }
} }
impl NeuraBatchedTrainer { impl NeuraBatchedTrainer {
pub fn new(learning_rate: f64, epochs: usize) -> Self { pub fn new(learning_rate: f64, iterations: usize) -> Self {
Self { Self {
learning_rate, learning_rate,
epochs, iterations,
..Default::default() ..Default::default()
} }
} }
@ -195,7 +198,7 @@ impl NeuraBatchedTrainer {
// Contains `momentum_factor * factor * gradient_sum_previous_iter` // Contains `momentum_factor * factor * gradient_sum_previous_iter`
let mut previous_gradient_sum = let mut previous_gradient_sum =
<NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero(); <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
'd: for epoch in 0..self.epochs { 'd: for iteration in 0..self.iterations {
let mut gradient_sum = let mut gradient_sum =
<NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero(); <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
network.prepare_epoch(); network.prepare_epoch();
@ -211,7 +214,7 @@ impl NeuraBatchedTrainer {
gradient_sum.mul_assign(factor); gradient_sum.mul_assign(factor);
// Add regularization gradient (TODO: check if it can be factored out of momentum) // Add regularization gradient
let mut reg_gradient = network.regularize(); let mut reg_gradient = network.regularize();
reg_gradient.mul_assign(reg_factor); reg_gradient.mul_assign(reg_factor);
gradient_sum.add_assign(&reg_gradient); gradient_sum.add_assign(&reg_gradient);
@ -224,14 +227,14 @@ impl NeuraBatchedTrainer {
previous_gradient_sum.mul_assign(momentum_factor); previous_gradient_sum.mul_assign(momentum_factor);
} }
if self.log_epochs > 0 && (epoch + 1) % self.log_epochs == 0 { if self.log_iterations > 0 && (iteration + 1) % self.log_iterations == 0 {
network.cleanup(); network.cleanup();
let mut loss_sum = 0.0; let mut loss_sum = 0.0;
for (input, target) in test_inputs { for (input, target) in test_inputs {
loss_sum += gradient_solver.score(&network, input, target); loss_sum += gradient_solver.score(&network, input, target);
} }
loss_sum /= test_inputs.len() as f64; loss_sum /= test_inputs.len() as f64;
println!("Epoch {}, Loss: {:.3}", epoch + 1, loss_sum); println!("Iteration {}, Loss: {:.3}", iteration + 1, loss_sum);
} }
} }
@ -243,8 +246,11 @@ impl NeuraBatchedTrainer {
mod test { mod test {
use super::*; use super::*;
use crate::{ use crate::{
assert_approx,
derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0}, derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0},
layer::NeuraDenseLayer, layer::NeuraDenseLayer,
network::NeuraNetworkTail,
neura_network,
}; };
#[test] #[test]
@ -263,4 +269,40 @@ mod test {
} }
} }
} }
#[test]
fn test_backpropagation_complex() {
const EPSILON: f64 = 0.00001;
// Test that we get the same values as https://hmkcode.com/ai/backpropagation-step-by-step/
let network = neura_network![
NeuraDenseLayer::new([[0.11, 0.21], [0.12, 0.08]], [0.0; 2], Linear, NeuraL0),
NeuraDenseLayer::new([[0.14, 0.15]], [0.0], Linear, NeuraL0)
];
let input = [2.0, 3.0];
let target = [1.0];
let intermediary = network.clone().trim_tail().eval(&input);
assert_approx!(0.85, intermediary[0], EPSILON);
assert_approx!(0.48, intermediary[1], EPSILON);
assert_approx!(0.191, network.eval(&input)[0], EPSILON);
assert_approx!(0.327, Euclidean.eval(&target, &network.eval(&input)), 0.001);
let delta = network.eval(&input)[0] - target[0];
let (gradient_first, gradient_second) =
NeuraBackprop::new(Euclidean).get_gradient(&network, &input, &target);
let gradient_first = gradient_first.0;
let gradient_second = gradient_second.0[0];
assert_approx!(gradient_second[0], intermediary[0] * delta, EPSILON);
assert_approx!(gradient_second[1], intermediary[1] * delta, EPSILON);
assert_approx!(gradient_first[0][0], input[0] * delta * 0.14, EPSILON);
assert_approx!(gradient_first[0][1], input[1] * delta * 0.14, EPSILON);
assert_approx!(gradient_first[1][0], input[0] * delta * 0.15, EPSILON);
assert_approx!(gradient_first[1][1], input[1] * delta * 0.15, EPSILON);
}
} }

@ -175,3 +175,54 @@ pub(crate) fn uniform_vector<const LENGTH: usize>() -> [f64; LENGTH] {
res res
} }
pub fn one_hot<const N: usize>(value: usize) -> [f64; N] {
let mut res = [0.0; N];
if value < N {
res[value] = 1.0;
}
res
}
pub fn argmax(array: &[f64]) -> usize {
let mut res = 0;
for n in 1..array.len() {
if array[n] > array[res] {
res = n;
}
}
res
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_reverse_dot_product() {
let left = [2.0, 3.0, 5.0];
let right = [7.0, 11.0, 13.0, 17.0];
let expected = [
[14.0, 22.0, 26.0, 34.0],
[21.0, 33.0, 39.0, 51.0],
[35.0, 55.0, 65.0, 85.0],
];
assert_eq!(expected, reverse_dot_product(&left, &right));
}
}
#[cfg(test)]
#[macro_export]
macro_rules! assert_approx {
( $left:expr, $right:expr, $epsilon:expr ) => {
let left = $left;
let right = $right;
if (left - right).abs() >= $epsilon {
panic!("Expected {} to be approximately equal to {}", left, right);
}
};
}

Loading…
Cancel
Save