diff --git a/Cargo.toml b/Cargo.toml index 8bbb664..9d8f4ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,3 +9,4 @@ edition = "2021" ndarray = "^0.15" # num-traits = "0.2.15" rand = "^0.8" +rand_distr = "0.4.3" diff --git a/examples/bivariate.rs b/examples/bivariate.rs index 4a7c0b0..67443fe 100644 --- a/examples/bivariate.rs +++ b/examples/bivariate.rs @@ -10,10 +10,16 @@ use rand::Rng; fn main() { let mut network = neura_network![ - neura_layer!("dense", LeakyRelu(0.01), 4, 2), - neura_layer!("dense", Tanh, 3), - neura_layer!("dense", Relu, 2) + neura_layer!("dense", LeakyRelu(0.01), 9, 2), + neura_layer!("dropout", 0.1), + neura_layer!("dense", LeakyRelu(0.01), 9), + neura_layer!("dropout", 0.3), + neura_layer!("dense", LeakyRelu(0.01), 6), + neura_layer!("dropout", 0.1), + neura_layer!("dense", LeakyRelu(0.01), 4), + neura_layer!("dense", LeakyRelu(0.1), 2) ]; + // println!("{:#?}", network); let mut rng = rand::thread_rng(); let inputs = (0..=1).cycle().map(move |category| { @@ -48,6 +54,8 @@ fn main() { let guess = argmax(&network.eval(&input)); writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap(); } + + // println!("{:#?}", network); } fn one_hot(value: usize) -> [f64; N] { diff --git a/src/algebra.rs b/src/algebra.rs index 8ed39ab..d3bcd12 100644 --- a/src/algebra.rs +++ b/src/algebra.rs @@ -7,6 +7,23 @@ pub trait NeuraVectorSpace { fn zero() -> Self; } +impl NeuraVectorSpace for () { + #[inline(always)] + fn add_assign(&mut self, _other: &Self) { + // Noop + } + + #[inline(always)] + fn mul_assign(&mut self, _by: f64) { + // Noop + } + + #[inline(always)] + fn zero() -> Self { + () + } +} + impl NeuraVectorSpace for (Left, Right) { fn add_assign(&mut self, other: &Self) { NeuraVectorSpace::add_assign(&mut self.0, &other.0); diff --git a/src/layer/dense.rs b/src/layer/dense.rs index 1776bc8..bf94b76 100644 --- a/src/layer/dense.rs +++ b/src/layer/dense.rs @@ -1,5 +1,7 @@ use super::NeuraLayer; use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer, algebra::NeuraVectorSpace}; + +use rand_distr::Distribution; use rand::Rng; #[derive(Clone, Debug)] @@ -31,11 +33,11 @@ impl, const INPUT_LEN: usize, const OUTPUT_LEN: usize> pub fn from_rng(rng: &mut impl Rng, activation: Act) -> Self { let mut weights = [[0.0; INPUT_LEN]; OUTPUT_LEN]; - let multiplier = std::f64::consts::SQRT_2 / (INPUT_LEN as f64).sqrt(); + let distribution = rand_distr::Normal::new(0.0, 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64)).unwrap(); for i in 0..OUTPUT_LEN { for j in 0..INPUT_LEN { - weights[i][j] = rng.gen_range(0.0..multiplier); + weights[i][j] = distribution.sample(rng); } } diff --git a/src/layer/dropout.rs b/src/layer/dropout.rs new file mode 100644 index 0000000..d8ff615 --- /dev/null +++ b/src/layer/dropout.rs @@ -0,0 +1,87 @@ +use rand::Rng; + +use crate::train::NeuraTrainableLayer; + +use super::NeuraLayer; + +#[derive(Clone, Debug)] +pub struct NeuraDropoutLayer { + pub dropout_probability: f64, + multiplier: f64, + mask: [bool; LENGTH], + rng: R, +} + +impl NeuraDropoutLayer { + pub fn new(dropout_probability: f64, rng: R) -> Self { + Self { + dropout_probability, + multiplier: 1.0, + mask: [false; LENGTH], + rng, + } + } + + fn apply_dropout(&self, vector: &mut [f64; LENGTH]) { + for (index, &dropout) in self.mask.iter().enumerate() { + if dropout { + vector[index] = 0.0; + } else { + vector[index] *= self.multiplier; + } + } + } +} + +impl NeuraLayer for NeuraDropoutLayer { + type Input = [f64; LENGTH]; + type Output = [f64; LENGTH]; + + fn eval(&self, input: &Self::Input) -> Self::Output { + let mut result = input.clone(); + + self.apply_dropout(&mut result); + + result + } +} + +impl NeuraTrainableLayer for NeuraDropoutLayer { + type Delta = (); + + fn backpropagate( + &self, + _input: &Self::Input, + mut epsilon: Self::Output, + ) -> (Self::Input, Self::Delta) { + self.apply_dropout(&mut epsilon); + + (epsilon, ()) + } + + #[inline(always)] + fn apply_gradient(&mut self, _gradient: &Self::Delta) { + // Noop + } + + fn prepare_epoch(&mut self) { + // Rejection sampling to prevent all the inputs from being dropped out + loop { + let mut sum = 0; + for i in 0..LENGTH { + self.mask[i] = self.rng.gen_bool(self.dropout_probability); + sum += (!self.mask[i]) as usize; + } + + if sum < LENGTH { + self.multiplier = LENGTH as f64 / sum as f64; + break; + } + } + } + + fn cleanup(&mut self) { + self.mask = [false; LENGTH]; + self.multiplier = 1.0; + } +} diff --git a/src/layer/mod.rs b/src/layer/mod.rs index d10964b..a74bd69 100644 --- a/src/layer/mod.rs +++ b/src/layer/mod.rs @@ -1,6 +1,9 @@ mod dense; pub use dense::NeuraDenseLayer; +mod dropout; +pub use dropout::NeuraDropoutLayer; + pub trait NeuraLayer { type Input; type Output; @@ -19,4 +22,9 @@ macro_rules! neura_layer { NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation) as NeuraDenseLayer<_, $input, $output> }; + + ( "dropout", $probability:expr ) => { + NeuraDropoutLayer::new($probability, rand::thread_rng()) + as NeuraDropoutLayer<_, _> + }; } diff --git a/src/lib.rs b/src/lib.rs index eb9b2e8..0c66809 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,11 @@ pub mod prelude { // Structs and traits pub use crate::network::{NeuraNetwork}; - pub use crate::layer::{NeuraLayer, NeuraDenseLayer}; + pub use crate::layer::{ + NeuraLayer, + NeuraDenseLayer, + NeuraDropoutLayer + }; pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer}; pub use crate::utils::cycle_shuffling; } diff --git a/src/network.rs b/src/network.rs index 29ac603..f165e3e 100644 --- a/src/network.rs +++ b/src/network.rs @@ -81,6 +81,14 @@ impl NeuraTrainable for NeuraNetwork { let backprop_epsilon = loss.nabla(target, &final_activation); self.layer.backpropagate(&input, backprop_epsilon) } + + fn prepare_epoch(&mut self) { + self.layer.prepare_epoch(); + } + + fn cleanup(&mut self) { + self.layer.cleanup(); + } } impl> NeuraTrainable @@ -108,6 +116,16 @@ impl (Self::Input, Self::Delta); + + /// Called before an epoch begins, to allow the network to set itself up for training. + fn prepare_epoch(&mut self); + + /// Called at the end of training, to allow the network to clean itself up + fn cleanup(&mut self); } pub trait NeuraGradientSolver { @@ -179,6 +193,7 @@ impl NeuraBatchedTrainer { let mut previous_gradient_sum = as NeuraTrainable>::Delta::zero(); 'd: for epoch in 0..self.epochs { let mut gradient_sum = as NeuraTrainable>::Delta::zero(); + network.prepare_epoch(); for _ in 0..self.batch_size { if let Some((input, target)) = iter.next() { @@ -199,6 +214,7 @@ impl NeuraBatchedTrainer { } if self.log_epochs > 0 && (epoch + 1) % self.log_epochs == 0 { + network.cleanup(); let mut loss_sum = 0.0; for (input, target) in test_inputs { loss_sum += gradient_solver.score(&network, input, target); @@ -207,6 +223,8 @@ impl NeuraBatchedTrainer { println!("Epoch {}, Loss: {:.3}", epoch + 1, loss_sum); } } + + network.cleanup(); } } diff --git a/src/utils.rs b/src/utils.rs index 4f8e535..c6db3a4 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -48,6 +48,7 @@ pub(crate) fn reverse_dot_product( result } +#[allow(dead_code)] pub(crate) fn assign_add_vector(sum: &mut [f64; N], operand: &[f64; N]) { for i in 0..N { sum[i] += operand[i]; @@ -81,14 +82,6 @@ impl Iterator for Chunked { } } -pub(crate) fn chunked( - iter: I, - chunk_size: usize, -) -> impl Iterator> { - Chunked { iter, chunk_size } -} - - struct ShuffleCycled { buffer: Vec, index: usize,