Dropout layers

main
Shad Amethyst 2 years ago
parent 8ac82e20e2
commit 220c61ff6b

@ -9,3 +9,4 @@ edition = "2021"
ndarray = "^0.15" ndarray = "^0.15"
# num-traits = "0.2.15" # num-traits = "0.2.15"
rand = "^0.8" rand = "^0.8"
rand_distr = "0.4.3"

@ -10,10 +10,16 @@ use rand::Rng;
fn main() { fn main() {
let mut network = neura_network![ let mut network = neura_network![
neura_layer!("dense", LeakyRelu(0.01), 4, 2), neura_layer!("dense", LeakyRelu(0.01), 9, 2),
neura_layer!("dense", Tanh, 3), neura_layer!("dropout", 0.1),
neura_layer!("dense", Relu, 2) neura_layer!("dense", LeakyRelu(0.01), 9),
neura_layer!("dropout", 0.3),
neura_layer!("dense", LeakyRelu(0.01), 6),
neura_layer!("dropout", 0.1),
neura_layer!("dense", LeakyRelu(0.01), 4),
neura_layer!("dense", LeakyRelu(0.1), 2)
]; ];
// println!("{:#?}", network);
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
let inputs = (0..=1).cycle().map(move |category| { let inputs = (0..=1).cycle().map(move |category| {
@ -48,6 +54,8 @@ fn main() {
let guess = argmax(&network.eval(&input)); let guess = argmax(&network.eval(&input));
writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap(); writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap();
} }
// println!("{:#?}", network);
} }
fn one_hot<const N: usize>(value: usize) -> [f64; N] { fn one_hot<const N: usize>(value: usize) -> [f64; N] {

@ -7,6 +7,23 @@ pub trait NeuraVectorSpace {
fn zero() -> Self; fn zero() -> Self;
} }
impl NeuraVectorSpace for () {
#[inline(always)]
fn add_assign(&mut self, _other: &Self) {
// Noop
}
#[inline(always)]
fn mul_assign(&mut self, _by: f64) {
// Noop
}
#[inline(always)]
fn zero() -> Self {
()
}
}
impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left, Right) { impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left, Right) {
fn add_assign(&mut self, other: &Self) { fn add_assign(&mut self, other: &Self) {
NeuraVectorSpace::add_assign(&mut self.0, &other.0); NeuraVectorSpace::add_assign(&mut self.0, &other.0);

@ -1,5 +1,7 @@
use super::NeuraLayer; use super::NeuraLayer;
use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer, algebra::NeuraVectorSpace}; use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer, algebra::NeuraVectorSpace};
use rand_distr::Distribution;
use rand::Rng; use rand::Rng;
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
@ -31,11 +33,11 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
pub fn from_rng(rng: &mut impl Rng, activation: Act) -> Self { pub fn from_rng(rng: &mut impl Rng, activation: Act) -> Self {
let mut weights = [[0.0; INPUT_LEN]; OUTPUT_LEN]; let mut weights = [[0.0; INPUT_LEN]; OUTPUT_LEN];
let multiplier = std::f64::consts::SQRT_2 / (INPUT_LEN as f64).sqrt(); let distribution = rand_distr::Normal::new(0.0, 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64)).unwrap();
for i in 0..OUTPUT_LEN { for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN { for j in 0..INPUT_LEN {
weights[i][j] = rng.gen_range(0.0..multiplier); weights[i][j] = distribution.sample(rng);
} }
} }

@ -0,0 +1,87 @@
use rand::Rng;
use crate::train::NeuraTrainableLayer;
use super::NeuraLayer;
#[derive(Clone, Debug)]
pub struct NeuraDropoutLayer<const LENGTH: usize, R: Rng> {
pub dropout_probability: f64,
multiplier: f64,
mask: [bool; LENGTH],
rng: R,
}
impl<const LENGTH: usize, R: Rng> NeuraDropoutLayer<LENGTH, R> {
pub fn new(dropout_probability: f64, rng: R) -> Self {
Self {
dropout_probability,
multiplier: 1.0,
mask: [false; LENGTH],
rng,
}
}
fn apply_dropout(&self, vector: &mut [f64; LENGTH]) {
for (index, &dropout) in self.mask.iter().enumerate() {
if dropout {
vector[index] = 0.0;
} else {
vector[index] *= self.multiplier;
}
}
}
}
impl<const LENGTH: usize, R: Rng> NeuraLayer for NeuraDropoutLayer<LENGTH, R> {
type Input = [f64; LENGTH];
type Output = [f64; LENGTH];
fn eval(&self, input: &Self::Input) -> Self::Output {
let mut result = input.clone();
self.apply_dropout(&mut result);
result
}
}
impl<const LENGTH: usize, R: Rng> NeuraTrainableLayer for NeuraDropoutLayer<LENGTH, R> {
type Delta = ();
fn backpropagate(
&self,
_input: &Self::Input,
mut epsilon: Self::Output,
) -> (Self::Input, Self::Delta) {
self.apply_dropout(&mut epsilon);
(epsilon, ())
}
#[inline(always)]
fn apply_gradient(&mut self, _gradient: &Self::Delta) {
// Noop
}
fn prepare_epoch(&mut self) {
// Rejection sampling to prevent all the inputs from being dropped out
loop {
let mut sum = 0;
for i in 0..LENGTH {
self.mask[i] = self.rng.gen_bool(self.dropout_probability);
sum += (!self.mask[i]) as usize;
}
if sum < LENGTH {
self.multiplier = LENGTH as f64 / sum as f64;
break;
}
}
}
fn cleanup(&mut self) {
self.mask = [false; LENGTH];
self.multiplier = 1.0;
}
}

@ -1,6 +1,9 @@
mod dense; mod dense;
pub use dense::NeuraDenseLayer; pub use dense::NeuraDenseLayer;
mod dropout;
pub use dropout::NeuraDropoutLayer;
pub trait NeuraLayer { pub trait NeuraLayer {
type Input; type Input;
type Output; type Output;
@ -19,4 +22,9 @@ macro_rules! neura_layer {
NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation) NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation)
as NeuraDenseLayer<_, $input, $output> as NeuraDenseLayer<_, $input, $output>
}; };
( "dropout", $probability:expr ) => {
NeuraDropoutLayer::new($probability, rand::thread_rng())
as NeuraDropoutLayer<_, _>
};
} }

@ -14,7 +14,11 @@ pub mod prelude {
// Structs and traits // Structs and traits
pub use crate::network::{NeuraNetwork}; pub use crate::network::{NeuraNetwork};
pub use crate::layer::{NeuraLayer, NeuraDenseLayer}; pub use crate::layer::{
NeuraLayer,
NeuraDenseLayer,
NeuraDropoutLayer
};
pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer}; pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer};
pub use crate::utils::cycle_shuffling; pub use crate::utils::cycle_shuffling;
} }

@ -81,6 +81,14 @@ impl<Layer: NeuraTrainableLayer> NeuraTrainable for NeuraNetwork<Layer, ()> {
let backprop_epsilon = loss.nabla(target, &final_activation); let backprop_epsilon = loss.nabla(target, &final_activation);
self.layer.backpropagate(&input, backprop_epsilon) self.layer.backpropagate(&input, backprop_epsilon)
} }
fn prepare_epoch(&mut self) {
self.layer.prepare_epoch();
}
fn cleanup(&mut self) {
self.layer.cleanup();
}
} }
impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Output>> NeuraTrainable impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Output>> NeuraTrainable
@ -108,6 +116,16 @@ impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Out
(backprop_gradient, (layer_gradient, weights_gradient)) (backprop_gradient, (layer_gradient, weights_gradient))
} }
fn prepare_epoch(&mut self) {
self.layer.prepare_epoch();
self.child_network.prepare_epoch();
}
fn cleanup(&mut self) {
self.layer.cleanup();
self.child_network.cleanup();
}
} }
#[macro_export] #[macro_export]

@ -1,11 +1,11 @@
use crate::{ use crate::{
// utils::{assign_add_vector, chunked},
algebra::NeuraVectorSpace, algebra::NeuraVectorSpace,
derivable::NeuraLoss, derivable::NeuraLoss,
layer::NeuraLayer, layer::NeuraLayer,
network::NeuraNetwork, utils::cycle_shuffling, network::NeuraNetwork,
}; };
// TODO: move this to layer/mod.rs
pub trait NeuraTrainableLayer: NeuraLayer { pub trait NeuraTrainableLayer: NeuraLayer {
type Delta: NeuraVectorSpace; type Delta: NeuraVectorSpace;
@ -28,6 +28,14 @@ pub trait NeuraTrainableLayer: NeuraLayer {
/// Applies `δW_l` to the weights of the layer /// Applies `δW_l` to the weights of the layer
fn apply_gradient(&mut self, gradient: &Self::Delta); fn apply_gradient(&mut self, gradient: &Self::Delta);
/// Called before an epoch begins, to allow the layer to set itself up for training.
#[inline(always)]
fn prepare_epoch(&mut self) {}
/// Called at the end of training, to allow the layer to clean itself up
#[inline(always)]
fn cleanup(&mut self) {}
} }
pub trait NeuraTrainable: NeuraLayer { pub trait NeuraTrainable: NeuraLayer {
@ -42,6 +50,12 @@ pub trait NeuraTrainable: NeuraLayer {
target: &Loss::Target, target: &Loss::Target,
loss: Loss, loss: Loss,
) -> (Self::Input, Self::Delta); ) -> (Self::Input, Self::Delta);
/// Called before an epoch begins, to allow the network to set itself up for training.
fn prepare_epoch(&mut self);
/// Called at the end of training, to allow the network to clean itself up
fn cleanup(&mut self);
} }
pub trait NeuraGradientSolver<Output, Target = Output> { pub trait NeuraGradientSolver<Output, Target = Output> {
@ -179,6 +193,7 @@ impl NeuraBatchedTrainer {
let mut previous_gradient_sum = <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero(); let mut previous_gradient_sum = <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
'd: for epoch in 0..self.epochs { 'd: for epoch in 0..self.epochs {
let mut gradient_sum = <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero(); let mut gradient_sum = <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
network.prepare_epoch();
for _ in 0..self.batch_size { for _ in 0..self.batch_size {
if let Some((input, target)) = iter.next() { if let Some((input, target)) = iter.next() {
@ -199,6 +214,7 @@ impl NeuraBatchedTrainer {
} }
if self.log_epochs > 0 && (epoch + 1) % self.log_epochs == 0 { if self.log_epochs > 0 && (epoch + 1) % self.log_epochs == 0 {
network.cleanup();
let mut loss_sum = 0.0; let mut loss_sum = 0.0;
for (input, target) in test_inputs { for (input, target) in test_inputs {
loss_sum += gradient_solver.score(&network, input, target); loss_sum += gradient_solver.score(&network, input, target);
@ -207,6 +223,8 @@ impl NeuraBatchedTrainer {
println!("Epoch {}, Loss: {:.3}", epoch + 1, loss_sum); println!("Epoch {}, Loss: {:.3}", epoch + 1, loss_sum);
} }
} }
network.cleanup();
} }
} }

@ -48,6 +48,7 @@ pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>(
result result
} }
#[allow(dead_code)]
pub(crate) fn assign_add_vector<const N: usize>(sum: &mut [f64; N], operand: &[f64; N]) { pub(crate) fn assign_add_vector<const N: usize>(sum: &mut [f64; N], operand: &[f64; N]) {
for i in 0..N { for i in 0..N {
sum[i] += operand[i]; sum[i] += operand[i];
@ -81,14 +82,6 @@ impl<J: Iterator> Iterator for Chunked<J> {
} }
} }
pub(crate) fn chunked<I: Iterator>(
iter: I,
chunk_size: usize,
) -> impl Iterator<Item = Vec<I::Item>> {
Chunked { iter, chunk_size }
}
struct ShuffleCycled<I: Iterator, R: rand::Rng> { struct ShuffleCycled<I: Iterator, R: rand::Rng> {
buffer: Vec<I::Item>, buffer: Vec<I::Item>,
index: usize, index: usize,

Loading…
Cancel
Save