🎨 Clean up types for NeuraLoss and NeuraDensePartialLayer

main
Shad Amethyst 2 years ago
parent 9b821b92b0
commit 969fa3197a

@ -112,14 +112,13 @@ impl<F: Float, R: nalgebra::Dim, C: nalgebra::Dim, S: nalgebra::RawStorage<F, R,
where where
Matrix<F, R, C, S>: std::ops::MulAssign<F>, Matrix<F, R, C, S>: std::ops::MulAssign<F>,
for<'c> Matrix<F, R, C, S>: std::ops::AddAssign<&'c Matrix<F, R, C, S>>, for<'c> Matrix<F, R, C, S>: std::ops::AddAssign<&'c Matrix<F, R, C, S>>,
F: From<f64> + Into<f64>,
{ {
fn add_assign(&mut self, other: &Self) { fn add_assign(&mut self, other: &Self) {
*self += other; *self += other;
} }
fn mul_assign(&mut self, by: f64) { fn mul_assign(&mut self, by: f64) {
*self *= <F as From<f64>>::from(by); *self *= F::from(by).unwrap();
} }
fn norm_squared(&self) -> f64 { fn norm_squared(&self) -> f64 {
@ -127,7 +126,8 @@ where
.map(|x| *x * *x) .map(|x| *x * *x)
.reduce(|sum, curr| sum + curr) .reduce(|sum, curr| sum + curr)
.unwrap_or(F::zero()) .unwrap_or(F::zero())
.into() .to_f64()
.unwrap_or(0.0)
} }
} }
@ -142,10 +142,6 @@ macro_rules! base {
std::ops::MulAssign::mul_assign(self, other as $type); std::ops::MulAssign::mul_assign(self, other as $type);
} }
// fn zero() -> Self {
// <Self as Default>::default()
// }
fn norm_squared(&self) -> f64 { fn norm_squared(&self) -> f64 {
(self * self) as f64 (self * self) as f64
} }

@ -1,4 +1,5 @@
use nalgebra::DVector; use nalgebra::DVector;
use num::Float;
use crate::algebra::NeuraVector; use crate::algebra::NeuraVector;
@ -7,24 +8,24 @@ use super::NeuraLoss;
#[derive(Clone, Copy, Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]
pub struct Euclidean; pub struct Euclidean;
impl NeuraLoss for Euclidean { impl<F: Float + std::fmt::Debug + 'static> NeuraLoss<DVector<F>> for Euclidean {
type Input = DVector<f64>; type Target = DVector<F>;
type Target = DVector<f64>; type Output = F;
#[inline] #[inline]
fn eval(&self, target: &DVector<f64>, actual: &DVector<f64>) -> f64 { fn eval(&self, target: &DVector<F>, actual: &DVector<F>) -> F {
assert_eq!(target.shape(), actual.shape()); assert_eq!(target.shape(), actual.shape());
let mut sum_squared = 0.0; let mut sum_squared = F::zero();
for i in 0..target.len() { for i in 0..target.len() {
sum_squared += (target[i] - actual[i]) * (target[i] - actual[i]); sum_squared = sum_squared + (target[i] - actual[i]) * (target[i] - actual[i]);
} }
sum_squared * 0.5 sum_squared * F::from(0.5).unwrap()
} }
#[inline] #[inline]
fn nabla(&self, target: &DVector<f64>, actual: &DVector<f64>) -> DVector<f64> { fn nabla(&self, target: &DVector<F>, actual: &DVector<F>) -> DVector<F> {
let mut res = DVector::zeros(target.len()); let mut res = DVector::zeros(target.len());
// ∂E(y)/∂yᵢ = yᵢ - yᵢ' // ∂E(y)/∂yᵢ = yᵢ - yᵢ'
@ -61,11 +62,11 @@ impl<const N: usize> CrossEntropy<N> {
} }
} }
impl<const N: usize> NeuraLoss for CrossEntropy<N> { impl<const N: usize> NeuraLoss<NeuraVector<N, f64>> for CrossEntropy<N> {
type Input = NeuraVector<N, f64>;
type Target = NeuraVector<N, f64>; type Target = NeuraVector<N, f64>;
type Output = f64;
fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64 { fn eval(&self, target: &Self::Target, actual: &NeuraVector<N, f64>) -> f64 {
let mut result = 0.0; let mut result = 0.0;
for i in 0..N { for i in 0..N {
@ -75,7 +76,7 @@ impl<const N: usize> NeuraLoss for CrossEntropy<N> {
result result
} }
fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input { fn nabla(&self, target: &Self::Target, actual: &NeuraVector<N, f64>) -> NeuraVector<N, f64> {
let mut result = NeuraVector::default(); let mut result = NeuraVector::default();
for i in 0..N { for i in 0..N {

@ -24,15 +24,15 @@ pub trait NeuraDerivable<F> {
} }
} }
pub trait NeuraLoss { pub trait NeuraLoss<Input> {
type Input;
type Target; type Target;
type Output;
fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64; fn eval(&self, target: &Self::Target, actual: &Input) -> Self::Output;
/// Should return the gradient of the loss function according to `actual` /// Should return the gradient of the loss function according to `actual`
/// ($\nabla_{\texttt{actual}} \texttt{self.eval}(\texttt{target}, \texttt{actual})$). /// ($\nabla_{\texttt{actual}} \texttt{self.eval}(\texttt{target}, \texttt{actual})$).
fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input; fn nabla(&self, target: &Self::Target, actual: &Input) -> Input;
} }
pub trait NeuraReducer<F> { pub trait NeuraReducer<F> {

@ -17,8 +17,7 @@ pub struct NeuraDenseLayer<F: Float, Act: NeuraDerivable<F>, Reg: NeuraDerivable
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct NeuraDenseLayerPartial<F: Float, Act: NeuraDerivable<F>, Reg: NeuraDerivable<F>, R: Rng> pub struct NeuraDenseLayerPartial<F, Act, Reg, R: Rng> {
{
activation: Act, activation: Act,
regularization: Reg, regularization: Reg,
output_size: usize, output_size: usize,
@ -26,11 +25,8 @@ pub struct NeuraDenseLayerPartial<F: Float, Act: NeuraDerivable<F>, Reg: NeuraDe
phantom: PhantomData<F>, phantom: PhantomData<F>,
} }
impl< impl<F: Float + std::fmt::Debug + 'static, Act: NeuraDerivable<F>, Reg: NeuraDerivable<F>>
F: Float + From<f64> + std::fmt::Debug + 'static, NeuraDenseLayer<F, Act, Reg>
Act: NeuraDerivable<F>,
Reg: NeuraDerivable<F>,
> NeuraDenseLayer<F, Act, Reg>
{ {
pub fn new( pub fn new(
weights: DMatrix<F>, weights: DMatrix<F>,
@ -58,20 +54,28 @@ impl<
where where
rand_distr::StandardNormal: rand_distr::Distribution<F>, rand_distr::StandardNormal: rand_distr::Distribution<F>,
{ {
let distribution = rand_distr::Normal::new( let stddev = activation.variance_hint() * 2.0 / (input_size as f64 + output_size as f64);
F::zero(), let stddev = F::from(stddev).unwrap_or_else(|| {
<F as From<f64>>::from( panic!(
activation.variance_hint() * 2.0 / (input_size as f64 + output_size as f64), "Couldn't convert stddev ({}) to type {}",
), stddev,
) stringify!(F)
.unwrap(); );
});
let bias = F::from(activation.bias_hint()).unwrap_or_else(|| {
panic!(
"Couldn't convert bias ({}) to type {}",
activation.bias_hint(),
stringify!(F)
);
});
let distribution = rand_distr::Normal::new(F::zero(), stddev)
.expect("Couldn't create normal distribution");
Self { Self {
weights: DMatrix::from_distribution(output_size, input_size, &distribution, rng), weights: DMatrix::from_distribution(output_size, input_size, &distribution, rng),
bias: DVector::from_element( bias: DVector::from_element(output_size, bias),
output_size,
<F as From<f64>>::from(activation.bias_hint()),
),
activation, activation,
regularization, regularization,
} }
@ -94,7 +98,7 @@ impl<
} }
impl< impl<
F: Float + From<f64> + std::fmt::Debug + 'static, F: Float + std::fmt::Debug + 'static,
Act: NeuraDerivable<F>, Act: NeuraDerivable<F>,
Reg: NeuraDerivable<F>, Reg: NeuraDerivable<F>,
R: Rng, R: Rng,
@ -122,7 +126,7 @@ where
} }
impl< impl<
F: Float + From<f64> + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign, F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
Act: NeuraDerivable<F>, Act: NeuraDerivable<F>,
Reg: NeuraDerivable<F>, Reg: NeuraDerivable<F>,
> NeuraLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg> > NeuraLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
@ -139,13 +143,7 @@ impl<
} }
impl< impl<
F: Float F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
+ From<f64>
+ Into<f64>
+ std::fmt::Debug
+ 'static
+ std::ops::AddAssign
+ std::ops::MulAssign,
Act: NeuraDerivable<F>, Act: NeuraDerivable<F>,
Reg: NeuraDerivable<F>, Reg: NeuraDerivable<F>,
> NeuraTrainableLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg> > NeuraTrainableLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg>

@ -0,0 +1,131 @@
use super::*;
use nalgebra::DVector;
use num::Float;
use rand::Rng;
#[derive(Clone, Debug)]
pub struct NeuraDropoutLayer<R: Rng> {
pub dropout_probability: f64,
multiplier: f64,
mask: DVector<bool>,
rng: R,
shape: NeuraShape,
}
impl<R: Rng> NeuraDropoutLayer<R> {
pub fn new(dropout_probability: f64, rng: R) -> Self {
Self {
dropout_probability,
multiplier: 1.0,
mask: DVector::from_element(0, false),
rng,
shape: NeuraShape::Vector(0),
}
}
fn apply_dropout<F: Float + From<f64>>(&self, vector: &mut DVector<F>) {
let multiplier = <F as From<f64>>::from(self.multiplier);
for (index, &dropout) in self.mask.iter().enumerate() {
if dropout {
vector[index] = F::zero();
} else {
vector[index] = vector[index] * multiplier;
}
}
}
}
impl<R: Rng> NeuraPartialLayer for NeuraDropoutLayer<R> {
type Constructed = NeuraDropoutLayer<R>;
type Err = ();
fn construct(mut self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
self.shape = input_shape;
self.mask = DVector::from_element(input_shape.size(), false);
Ok(self)
}
fn output_shape(constructed: &Self::Constructed) -> NeuraShape {
constructed.shape
}
}
impl<R: Rng, F: Float + From<f64>> NeuraLayer<DVector<F>> for NeuraDropoutLayer<R> {
type Output = DVector<F>;
fn eval(&self, input: &DVector<F>) -> Self::Output {
let mut output = input.clone();
self.apply_dropout(&mut output);
output
}
}
impl<R: Rng, F: Float + From<f64>> NeuraTrainableLayer<DVector<F>> for NeuraDropoutLayer<R> {
type Gradient = ();
fn default_gradient(&self) -> Self::Gradient {
()
}
fn backprop_layer(
&self,
_input: &DVector<F>,
mut epsilon: Self::Output,
) -> (DVector<F>, Self::Gradient) {
self.apply_dropout(&mut epsilon);
(epsilon, ())
}
fn regularize_layer(&self) -> Self::Gradient {
()
}
fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
// Noop
}
fn prepare_layer(&mut self, is_training: bool) {
let length = self.shape.size();
if !is_training {
self.mask = DVector::from_element(length, false);
self.multiplier = 1.0;
return;
}
// Rejection sampling to prevent all the inputs from being dropped out
loop {
let mut sum = 0;
for i in 0..length {
self.mask[i] = self.rng.gen_bool(self.dropout_probability);
sum += self.mask[i] as usize;
}
if sum < length {
self.multiplier = length as f64 / (length - sum) as f64;
break;
}
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_rejection_sampling() {
let mut layer = NeuraDropoutLayer::new(0.9, rand::thread_rng())
.construct(NeuraShape::Vector(1))
.unwrap();
for _ in 0..100 {
<NeuraDropoutLayer<_> as NeuraTrainableLayer<DVector<f64>>>::prepare_layer(
&mut layer, true,
);
assert!(layer.multiplier.is_finite());
assert!(!layer.multiplier.is_nan());
}
}
}

@ -1,6 +1,8 @@
use crate::algebra::NeuraVectorSpace; use crate::algebra::NeuraVectorSpace;
pub mod dense; pub mod dense;
pub mod dropout;
pub use dense::NeuraDenseLayer; pub use dense::NeuraDenseLayer;
#[derive(Clone, Copy, PartialEq, Debug)] #[derive(Clone, Copy, PartialEq, Debug)]
@ -108,6 +110,6 @@ macro_rules! neura_layer {
rand::thread_rng(), rand::thread_rng(),
$activation, $activation,
$crate::derivable::regularize::NeuraL0, $crate::derivable::regularize::NeuraL0,
) ) as $crate::layer::dense::NeuraDenseLayerPartial<f32, _, _, _>
}; };
} }

@ -10,7 +10,7 @@ pub trait NeuraTrainableNetwork<Input>: NeuraLayer<Input> {
fn apply_gradient(&mut self, gradient: &Self::Gradient); fn apply_gradient(&mut self, gradient: &Self::Gradient);
/// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information. /// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information.
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>( fn backpropagate<Loss: NeuraLoss<Self::Output>>(
&self, &self,
input: &Input, input: &Input,
target: &Loss::Target, target: &Loss::Target,

@ -146,7 +146,7 @@ impl<
self.child_network.apply_gradient(&gradient.1); self.child_network.apply_gradient(&gradient.1);
} }
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>( fn backpropagate<Loss: NeuraLoss<Self::Output>>(
&self, &self,
input: &Input, input: &Input,
target: &Loss::Target, target: &Loss::Target,
@ -193,7 +193,7 @@ impl<Input: Clone> NeuraTrainableNetwork<Input> for () {
} }
#[inline(always)] #[inline(always)]
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>( fn backpropagate<Loss: NeuraLoss<Self::Output>>(
&self, &self,
final_activation: &Input, final_activation: &Input,
target: &Loss::Target, target: &Loss::Target,
@ -282,6 +282,6 @@ mod test {
.construct(NeuraShape::Vector(2)) .construct(NeuraShape::Vector(2))
.unwrap(); .unwrap();
network.eval(&dvector![0.0f64, 0.0]); network.eval(&dvector![0.0, 0.0]);
} }
} }

@ -1,3 +1,5 @@
use num::ToPrimitive;
use crate::{algebra::NeuraVectorSpace, derivable::NeuraLoss, network::NeuraTrainableNetwork}; use crate::{algebra::NeuraVectorSpace, derivable::NeuraLoss, network::NeuraTrainableNetwork};
pub trait NeuraGradientSolver<Input, Target, Trainable: NeuraTrainableNetwork<Input>> { pub trait NeuraGradientSolver<Input, Target, Trainable: NeuraTrainableNetwork<Input>> {
@ -12,11 +14,11 @@ pub trait NeuraGradientSolver<Input, Target, Trainable: NeuraTrainableNetwork<In
} }
#[non_exhaustive] #[non_exhaustive]
pub struct NeuraBackprop<Loss: NeuraLoss + Clone> { pub struct NeuraBackprop<Loss> {
loss: Loss, loss: Loss,
} }
impl<Loss: NeuraLoss + Clone> NeuraBackprop<Loss> { impl<Loss> NeuraBackprop<Loss> {
pub fn new(loss: Loss) -> Self { pub fn new(loss: Loss) -> Self {
Self { loss } Self { loss }
} }
@ -26,8 +28,10 @@ impl<
Input, Input,
Target, Target,
Trainable: NeuraTrainableNetwork<Input>, Trainable: NeuraTrainableNetwork<Input>,
Loss: NeuraLoss<Input = Trainable::Output, Target = Target> + Clone, Loss: NeuraLoss<Trainable::Output, Target = Target> + Clone,
> NeuraGradientSolver<Input, Target, Trainable> for NeuraBackprop<Loss> > NeuraGradientSolver<Input, Target, Trainable> for NeuraBackprop<Loss>
where
<Loss as NeuraLoss<Trainable::Output>>::Output: ToPrimitive,
{ {
fn get_gradient( fn get_gradient(
&self, &self,
@ -40,7 +44,7 @@ impl<
fn score(&self, trainable: &Trainable, input: &Input, target: &Target) -> f64 { fn score(&self, trainable: &Trainable, input: &Input, target: &Target) -> f64 {
let output = trainable.eval(&input); let output = trainable.eval(&input);
self.loss.eval(target, &output) self.loss.eval(target, &output).to_f64().unwrap()
} }
} }
@ -182,8 +186,8 @@ mod test {
use crate::{ use crate::{
assert_approx, assert_approx,
derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0}, derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0},
layer::{NeuraLayer, NeuraDenseLayer}, layer::{NeuraDenseLayer, NeuraLayer},
network::sequential::{NeuraSequentialTail, NeuraSequential}, network::sequential::{NeuraSequential, NeuraSequentialTail},
neura_sequential, neura_sequential,
}; };

@ -128,7 +128,7 @@ macro_rules! assert_approx {
( $left:expr, $right:expr, $epsilon:expr ) => { ( $left:expr, $right:expr, $epsilon:expr ) => {
let left = $left; let left = $left;
let right = $right; let right = $right;
if (left - right).abs() >= $epsilon { if ((left - right) as f64).abs() >= $epsilon as f64 {
panic!("Expected {} to be approximately equal to {}", left, right); panic!("Expected {} to be approximately equal to {}", left, right);
} }
}; };

Loading…
Cancel
Save