diff --git a/src/algebra/mod.rs b/src/algebra/mod.rs index 15d1478..e7557c2 100644 --- a/src/algebra/mod.rs +++ b/src/algebra/mod.rs @@ -112,14 +112,13 @@ impl: std::ops::MulAssign, for<'c> Matrix: std::ops::AddAssign<&'c Matrix>, - F: From + Into, { fn add_assign(&mut self, other: &Self) { *self += other; } fn mul_assign(&mut self, by: f64) { - *self *= >::from(by); + *self *= F::from(by).unwrap(); } fn norm_squared(&self) -> f64 { @@ -127,7 +126,8 @@ where .map(|x| *x * *x) .reduce(|sum, curr| sum + curr) .unwrap_or(F::zero()) - .into() + .to_f64() + .unwrap_or(0.0) } } @@ -142,10 +142,6 @@ macro_rules! base { std::ops::MulAssign::mul_assign(self, other as $type); } - // fn zero() -> Self { - // ::default() - // } - fn norm_squared(&self) -> f64 { (self * self) as f64 } diff --git a/src/derivable/loss.rs b/src/derivable/loss.rs index 0b4d833..b6cb987 100644 --- a/src/derivable/loss.rs +++ b/src/derivable/loss.rs @@ -1,4 +1,5 @@ use nalgebra::DVector; +use num::Float; use crate::algebra::NeuraVector; @@ -7,24 +8,24 @@ use super::NeuraLoss; #[derive(Clone, Copy, Debug, PartialEq)] pub struct Euclidean; -impl NeuraLoss for Euclidean { - type Input = DVector; - type Target = DVector; +impl NeuraLoss> for Euclidean { + type Target = DVector; + type Output = F; #[inline] - fn eval(&self, target: &DVector, actual: &DVector) -> f64 { + fn eval(&self, target: &DVector, actual: &DVector) -> F { assert_eq!(target.shape(), actual.shape()); - let mut sum_squared = 0.0; + let mut sum_squared = F::zero(); for i in 0..target.len() { - sum_squared += (target[i] - actual[i]) * (target[i] - actual[i]); + sum_squared = sum_squared + (target[i] - actual[i]) * (target[i] - actual[i]); } - sum_squared * 0.5 + sum_squared * F::from(0.5).unwrap() } #[inline] - fn nabla(&self, target: &DVector, actual: &DVector) -> DVector { + fn nabla(&self, target: &DVector, actual: &DVector) -> DVector { let mut res = DVector::zeros(target.len()); // ∂E(y)/∂yᵢ = yᵢ - yᵢ' @@ -61,11 +62,11 @@ impl CrossEntropy { } } -impl NeuraLoss for CrossEntropy { - type Input = NeuraVector; +impl NeuraLoss> for CrossEntropy { type Target = NeuraVector; + type Output = f64; - fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64 { + fn eval(&self, target: &Self::Target, actual: &NeuraVector) -> f64 { let mut result = 0.0; for i in 0..N { @@ -75,7 +76,7 @@ impl NeuraLoss for CrossEntropy { result } - fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input { + fn nabla(&self, target: &Self::Target, actual: &NeuraVector) -> NeuraVector { let mut result = NeuraVector::default(); for i in 0..N { diff --git a/src/derivable/mod.rs b/src/derivable/mod.rs index 94a7a84..2100224 100644 --- a/src/derivable/mod.rs +++ b/src/derivable/mod.rs @@ -24,15 +24,15 @@ pub trait NeuraDerivable { } } -pub trait NeuraLoss { - type Input; +pub trait NeuraLoss { type Target; + type Output; - fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64; + fn eval(&self, target: &Self::Target, actual: &Input) -> Self::Output; /// Should return the gradient of the loss function according to `actual` /// ($\nabla_{\texttt{actual}} \texttt{self.eval}(\texttt{target}, \texttt{actual})$). - fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input; + fn nabla(&self, target: &Self::Target, actual: &Input) -> Input; } pub trait NeuraReducer { diff --git a/src/layer/dense.rs b/src/layer/dense.rs index ef5a6df..b2e46a7 100644 --- a/src/layer/dense.rs +++ b/src/layer/dense.rs @@ -17,8 +17,7 @@ pub struct NeuraDenseLayer, Reg: NeuraDerivable } #[derive(Clone, Debug)] -pub struct NeuraDenseLayerPartial, Reg: NeuraDerivable, R: Rng> -{ +pub struct NeuraDenseLayerPartial { activation: Act, regularization: Reg, output_size: usize, @@ -26,11 +25,8 @@ pub struct NeuraDenseLayerPartial, Reg: NeuraDe phantom: PhantomData, } -impl< - F: Float + From + std::fmt::Debug + 'static, - Act: NeuraDerivable, - Reg: NeuraDerivable, - > NeuraDenseLayer +impl, Reg: NeuraDerivable> + NeuraDenseLayer { pub fn new( weights: DMatrix, @@ -58,20 +54,28 @@ impl< where rand_distr::StandardNormal: rand_distr::Distribution, { - let distribution = rand_distr::Normal::new( - F::zero(), - >::from( - activation.variance_hint() * 2.0 / (input_size as f64 + output_size as f64), - ), - ) - .unwrap(); + let stddev = activation.variance_hint() * 2.0 / (input_size as f64 + output_size as f64); + let stddev = F::from(stddev).unwrap_or_else(|| { + panic!( + "Couldn't convert stddev ({}) to type {}", + stddev, + stringify!(F) + ); + }); + let bias = F::from(activation.bias_hint()).unwrap_or_else(|| { + panic!( + "Couldn't convert bias ({}) to type {}", + activation.bias_hint(), + stringify!(F) + ); + }); + + let distribution = rand_distr::Normal::new(F::zero(), stddev) + .expect("Couldn't create normal distribution"); Self { weights: DMatrix::from_distribution(output_size, input_size, &distribution, rng), - bias: DVector::from_element( - output_size, - >::from(activation.bias_hint()), - ), + bias: DVector::from_element(output_size, bias), activation, regularization, } @@ -94,7 +98,7 @@ impl< } impl< - F: Float + From + std::fmt::Debug + 'static, + F: Float + std::fmt::Debug + 'static, Act: NeuraDerivable, Reg: NeuraDerivable, R: Rng, @@ -122,7 +126,7 @@ where } impl< - F: Float + From + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign, + F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign, Act: NeuraDerivable, Reg: NeuraDerivable, > NeuraLayer> for NeuraDenseLayer @@ -139,13 +143,7 @@ impl< } impl< - F: Float - + From - + Into - + std::fmt::Debug - + 'static - + std::ops::AddAssign - + std::ops::MulAssign, + F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign, Act: NeuraDerivable, Reg: NeuraDerivable, > NeuraTrainableLayer> for NeuraDenseLayer diff --git a/src/layer/dropout.rs b/src/layer/dropout.rs new file mode 100644 index 0000000..bc2e92c --- /dev/null +++ b/src/layer/dropout.rs @@ -0,0 +1,131 @@ +use super::*; +use nalgebra::DVector; +use num::Float; +use rand::Rng; + +#[derive(Clone, Debug)] +pub struct NeuraDropoutLayer { + pub dropout_probability: f64, + multiplier: f64, + mask: DVector, + rng: R, + shape: NeuraShape, +} + +impl NeuraDropoutLayer { + pub fn new(dropout_probability: f64, rng: R) -> Self { + Self { + dropout_probability, + multiplier: 1.0, + mask: DVector::from_element(0, false), + rng, + shape: NeuraShape::Vector(0), + } + } + + fn apply_dropout>(&self, vector: &mut DVector) { + let multiplier = >::from(self.multiplier); + for (index, &dropout) in self.mask.iter().enumerate() { + if dropout { + vector[index] = F::zero(); + } else { + vector[index] = vector[index] * multiplier; + } + } + } +} + +impl NeuraPartialLayer for NeuraDropoutLayer { + type Constructed = NeuraDropoutLayer; + + type Err = (); + + fn construct(mut self, input_shape: NeuraShape) -> Result { + self.shape = input_shape; + self.mask = DVector::from_element(input_shape.size(), false); + Ok(self) + } + + fn output_shape(constructed: &Self::Constructed) -> NeuraShape { + constructed.shape + } +} + +impl> NeuraLayer> for NeuraDropoutLayer { + type Output = DVector; + + fn eval(&self, input: &DVector) -> Self::Output { + let mut output = input.clone(); + self.apply_dropout(&mut output); + output + } +} + +impl> NeuraTrainableLayer> for NeuraDropoutLayer { + type Gradient = (); + + fn default_gradient(&self) -> Self::Gradient { + () + } + + fn backprop_layer( + &self, + _input: &DVector, + mut epsilon: Self::Output, + ) -> (DVector, Self::Gradient) { + self.apply_dropout(&mut epsilon); + + (epsilon, ()) + } + + fn regularize_layer(&self) -> Self::Gradient { + () + } + + fn apply_gradient(&mut self, _gradient: &Self::Gradient) { + // Noop + } + + fn prepare_layer(&mut self, is_training: bool) { + let length = self.shape.size(); + if !is_training { + self.mask = DVector::from_element(length, false); + self.multiplier = 1.0; + return; + } + + // Rejection sampling to prevent all the inputs from being dropped out + loop { + let mut sum = 0; + for i in 0..length { + self.mask[i] = self.rng.gen_bool(self.dropout_probability); + sum += self.mask[i] as usize; + } + + if sum < length { + self.multiplier = length as f64 / (length - sum) as f64; + break; + } + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_rejection_sampling() { + let mut layer = NeuraDropoutLayer::new(0.9, rand::thread_rng()) + .construct(NeuraShape::Vector(1)) + .unwrap(); + + for _ in 0..100 { + as NeuraTrainableLayer>>::prepare_layer( + &mut layer, true, + ); + assert!(layer.multiplier.is_finite()); + assert!(!layer.multiplier.is_nan()); + } + } +} diff --git a/src/layer/mod.rs b/src/layer/mod.rs index 5f61dd0..7e70357 100644 --- a/src/layer/mod.rs +++ b/src/layer/mod.rs @@ -1,6 +1,8 @@ use crate::algebra::NeuraVectorSpace; pub mod dense; +pub mod dropout; + pub use dense::NeuraDenseLayer; #[derive(Clone, Copy, PartialEq, Debug)] @@ -108,6 +110,6 @@ macro_rules! neura_layer { rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0, - ) + ) as $crate::layer::dense::NeuraDenseLayerPartial }; } diff --git a/src/network/mod.rs b/src/network/mod.rs index d08ae3f..5527e21 100644 --- a/src/network/mod.rs +++ b/src/network/mod.rs @@ -10,7 +10,7 @@ pub trait NeuraTrainableNetwork: NeuraLayer { fn apply_gradient(&mut self, gradient: &Self::Gradient); /// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information. - fn backpropagate>( + fn backpropagate>( &self, input: &Input, target: &Loss::Target, diff --git a/src/network/sequential/mod.rs b/src/network/sequential/mod.rs index e6920ea..f3bafcc 100644 --- a/src/network/sequential/mod.rs +++ b/src/network/sequential/mod.rs @@ -146,7 +146,7 @@ impl< self.child_network.apply_gradient(&gradient.1); } - fn backpropagate>( + fn backpropagate>( &self, input: &Input, target: &Loss::Target, @@ -193,7 +193,7 @@ impl NeuraTrainableNetwork for () { } #[inline(always)] - fn backpropagate>( + fn backpropagate>( &self, final_activation: &Input, target: &Loss::Target, @@ -282,6 +282,6 @@ mod test { .construct(NeuraShape::Vector(2)) .unwrap(); - network.eval(&dvector![0.0f64, 0.0]); + network.eval(&dvector![0.0, 0.0]); } } diff --git a/src/train.rs b/src/train.rs index 4eede29..b9de86e 100644 --- a/src/train.rs +++ b/src/train.rs @@ -1,3 +1,5 @@ +use num::ToPrimitive; + use crate::{algebra::NeuraVectorSpace, derivable::NeuraLoss, network::NeuraTrainableNetwork}; pub trait NeuraGradientSolver> { @@ -12,11 +14,11 @@ pub trait NeuraGradientSolver { +pub struct NeuraBackprop { loss: Loss, } -impl NeuraBackprop { +impl NeuraBackprop { pub fn new(loss: Loss) -> Self { Self { loss } } @@ -26,8 +28,10 @@ impl< Input, Target, Trainable: NeuraTrainableNetwork, - Loss: NeuraLoss + Clone, + Loss: NeuraLoss + Clone, > NeuraGradientSolver for NeuraBackprop +where + >::Output: ToPrimitive, { fn get_gradient( &self, @@ -40,7 +44,7 @@ impl< fn score(&self, trainable: &Trainable, input: &Input, target: &Target) -> f64 { let output = trainable.eval(&input); - self.loss.eval(target, &output) + self.loss.eval(target, &output).to_f64().unwrap() } } @@ -182,8 +186,8 @@ mod test { use crate::{ assert_approx, derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0}, - layer::{NeuraLayer, NeuraDenseLayer}, - network::sequential::{NeuraSequentialTail, NeuraSequential}, + layer::{NeuraDenseLayer, NeuraLayer}, + network::sequential::{NeuraSequential, NeuraSequentialTail}, neura_sequential, }; diff --git a/src/utils.rs b/src/utils.rs index 8fc3122..c342c95 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -128,7 +128,7 @@ macro_rules! assert_approx { ( $left:expr, $right:expr, $epsilon:expr ) => { let left = $left; let right = $right; - if (left - right).abs() >= $epsilon { + if ((left - right) as f64).abs() >= $epsilon as f64 { panic!("Expected {} to be approximately equal to {}", left, right); } };