🎨 Clean up types for NeuraLoss and NeuraDensePartialLayer

2 years ago · 969fa3197a
parent 9b821b92b0
commit 969fa3197a
10 changed files with 194 additions and 62 deletions
--- a/src/algebra/mod.rs
+++ b/src/algebra/mod.rs
@ -112,14 +112,13 @@ impl<F: Float, R: nalgebra::Dim, C: nalgebra::Dim, S: nalgebra::RawStorage<F, R,
 where
    Matrix<F, R, C, S>: std::ops::MulAssign<F>,
    for<'c> Matrix<F, R, C, S>: std::ops::AddAssign<&'c Matrix<F, R, C, S>>,
    F: From<f64> + Into<f64>,
 {
    fn add_assign(&mut self, other: &Self) {
        *self += other;
    }
    fn mul_assign(&mut self, by: f64) {
-        *self *= <F as From<f64>>::from(by);
+        *self *= F::from(by).unwrap();
    }
    fn norm_squared(&self) -> f64 {
@ -127,7 +126,8 @@ where
            .map(|x| *x * *x)
            .reduce(|sum, curr| sum + curr)
            .unwrap_or(F::zero())
-            .into()
+            .to_f64()
            .unwrap_or(0.0)
    }
 }
@ -142,10 +142,6 @@ macro_rules! base {
                std::ops::MulAssign::mul_assign(self, other as $type);
            }
            // fn zero() -> Self {
            //     <Self as Default>::default()
            // }
            fn norm_squared(&self) -> f64 {
                (self * self) as f64
            }
--- a/src/derivable/loss.rs
+++ b/src/derivable/loss.rs
@ -1,4 +1,5 @@
 use nalgebra::DVector;
 use num::Float;
 use crate::algebra::NeuraVector;
@ -7,24 +8,24 @@ use super::NeuraLoss;
 #[derive(Clone, Copy, Debug, PartialEq)]
 pub struct Euclidean;
-impl NeuraLoss for Euclidean {
+impl<F: Float + std::fmt::Debug + 'static> NeuraLoss<DVector<F>> for Euclidean {
-    type Input = DVector<f64>;
+    type Target = DVector<F>;
-    type Target = DVector<f64>;
+    type Output = F;
    #[inline]
-    fn eval(&self, target: &DVector<f64>, actual: &DVector<f64>) -> f64 {
+    fn eval(&self, target: &DVector<F>, actual: &DVector<F>) -> F {
        assert_eq!(target.shape(), actual.shape());
-        let mut sum_squared = 0.0;
+        let mut sum_squared = F::zero();
        for i in 0..target.len() {
-            sum_squared += (target[i] - actual[i]) * (target[i] - actual[i]);
+            sum_squared = sum_squared + (target[i] - actual[i]) * (target[i] - actual[i]);
        }
-        sum_squared * 0.5
+        sum_squared * F::from(0.5).unwrap()
    }
    #[inline]
-    fn nabla(&self, target: &DVector<f64>, actual: &DVector<f64>) -> DVector<f64> {
+    fn nabla(&self, target: &DVector<F>, actual: &DVector<F>) -> DVector<F> {
        let mut res = DVector::zeros(target.len());
        // ∂E(y)/∂yᵢ = yᵢ - yᵢ'
@ -61,11 +62,11 @@ impl<const N: usize> CrossEntropy<N> {
    }
 }
-impl<const N: usize> NeuraLoss for CrossEntropy<N> {
+impl<const N: usize> NeuraLoss<NeuraVector<N, f64>> for CrossEntropy<N> {
    type Input = NeuraVector<N, f64>;
    type Target = NeuraVector<N, f64>;
    type Output = f64;
-    fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64 {
+    fn eval(&self, target: &Self::Target, actual: &NeuraVector<N, f64>) -> f64 {
        let mut result = 0.0;
        for i in 0..N {
@ -75,7 +76,7 @@ impl<const N: usize> NeuraLoss for CrossEntropy<N> {
        result
    }
-    fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input {
+    fn nabla(&self, target: &Self::Target, actual: &NeuraVector<N, f64>) -> NeuraVector<N, f64> {
        let mut result = NeuraVector::default();
        for i in 0..N {
--- a/src/derivable/mod.rs
+++ b/src/derivable/mod.rs
@ -24,15 +24,15 @@ pub trait NeuraDerivable<F> {
    }
 }
-pub trait NeuraLoss {
+pub trait NeuraLoss<Input> {
    type Input;
    type Target;
    type Output;
-    fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64;
+    fn eval(&self, target: &Self::Target, actual: &Input) -> Self::Output;
    /// Should return the gradient of the loss function according to `actual`
    /// ($\nabla_{\texttt{actual}} \texttt{self.eval}(\texttt{target}, \texttt{actual})$).
-    fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input;
+    fn nabla(&self, target: &Self::Target, actual: &Input) -> Input;
 }
 pub trait NeuraReducer<F> {
--- a/src/layer/dense.rs
+++ b/src/layer/dense.rs
@ -17,8 +17,7 @@ pub struct NeuraDenseLayer<F: Float, Act: NeuraDerivable<F>, Reg: NeuraDerivable
 }
 #[derive(Clone, Debug)]
-pub struct NeuraDenseLayerPartial<F: Float, Act: NeuraDerivable<F>, Reg: NeuraDerivable<F>, R: Rng>
+pub struct NeuraDenseLayerPartial<F, Act, Reg, R: Rng> {
 {
    activation: Act,
    regularization: Reg,
    output_size: usize,
@ -26,11 +25,8 @@ pub struct NeuraDenseLayerPartial<F: Float, Act: NeuraDerivable<F>, Reg: NeuraDe
    phantom: PhantomData<F>,
 }
-impl<
+impl<F: Float + std::fmt::Debug + 'static, Act: NeuraDerivable<F>, Reg: NeuraDerivable<F>>
-        F: Float + From<f64> + std::fmt::Debug + 'static,
+    NeuraDenseLayer<F, Act, Reg>
        Act: NeuraDerivable<F>,
        Reg: NeuraDerivable<F>,
    > NeuraDenseLayer<F, Act, Reg>
 {
    pub fn new(
        weights: DMatrix<F>,
@ -58,20 +54,28 @@ impl<
    where
        rand_distr::StandardNormal: rand_distr::Distribution<F>,
    {
-        let distribution = rand_distr::Normal::new(
+        let stddev = activation.variance_hint() * 2.0 / (input_size as f64 + output_size as f64);
-            F::zero(),
+        let stddev = F::from(stddev).unwrap_or_else(|| {
-            <F as From<f64>>::from(
+            panic!(
-                activation.variance_hint() * 2.0 / (input_size as f64 + output_size as f64),
+                "Couldn't convert stddev ({}) to type {}",
-            ),
+                stddev,
-        )
+                stringify!(F)
-        .unwrap();
+            );
        });
        let bias = F::from(activation.bias_hint()).unwrap_or_else(|| {
            panic!(
                "Couldn't convert bias ({}) to type {}",
                activation.bias_hint(),
                stringify!(F)
            );
        });
        let distribution = rand_distr::Normal::new(F::zero(), stddev)
            .expect("Couldn't create normal distribution");
        Self {
            weights: DMatrix::from_distribution(output_size, input_size, &distribution, rng),
-            bias: DVector::from_element(
+            bias: DVector::from_element(output_size, bias),
                output_size,
                <F as From<f64>>::from(activation.bias_hint()),
            ),
            activation,
            regularization,
        }
@ -94,7 +98,7 @@ impl<
 }
 impl<
-        F: Float + From<f64> + std::fmt::Debug + 'static,
+        F: Float + std::fmt::Debug + 'static,
        Act: NeuraDerivable<F>,
        Reg: NeuraDerivable<F>,
        R: Rng,
@ -122,7 +126,7 @@ where
 }
 impl<
-        F: Float + From<f64> + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
+        F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
        Act: NeuraDerivable<F>,
        Reg: NeuraDerivable<F>,
    > NeuraLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
@ -139,13 +143,7 @@ impl<
 }
 impl<
-        F: Float
+        F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
            + From<f64>
            + Into<f64>
            + std::fmt::Debug
            + 'static
            + std::ops::AddAssign
            + std::ops::MulAssign,
        Act: NeuraDerivable<F>,
        Reg: NeuraDerivable<F>,
    > NeuraTrainableLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
--- a/src/layer/dropout.rs
+++ b/src/layer/dropout.rs
@ -0,0 +1,131 @@
 use super::*;
 use nalgebra::DVector;
 use num::Float;
 use rand::Rng;
 #[derive(Clone, Debug)]
 pub struct NeuraDropoutLayer<R: Rng> {
    pub dropout_probability: f64,
    multiplier: f64,
    mask: DVector<bool>,
    rng: R,
    shape: NeuraShape,
 }
 impl<R: Rng> NeuraDropoutLayer<R> {
    pub fn new(dropout_probability: f64, rng: R) -> Self {
        Self {
            dropout_probability,
            multiplier: 1.0,
            mask: DVector::from_element(0, false),
            rng,
            shape: NeuraShape::Vector(0),
        }
    }
    fn apply_dropout<F: Float + From<f64>>(&self, vector: &mut DVector<F>) {
        let multiplier = <F as From<f64>>::from(self.multiplier);
        for (index, &dropout) in self.mask.iter().enumerate() {
            if dropout {
                vector[index] = F::zero();
            } else {
                vector[index] = vector[index] * multiplier;
            }
        }
    }
 }
 impl<R: Rng> NeuraPartialLayer for NeuraDropoutLayer<R> {
    type Constructed = NeuraDropoutLayer<R>;
    type Err = ();
    fn construct(mut self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
        self.shape = input_shape;
        self.mask = DVector::from_element(input_shape.size(), false);
        Ok(self)
    }
    fn output_shape(constructed: &Self::Constructed) -> NeuraShape {
        constructed.shape
    }
 }
 impl<R: Rng, F: Float + From<f64>> NeuraLayer<DVector<F>> for NeuraDropoutLayer<R> {
    type Output = DVector<F>;
    fn eval(&self, input: &DVector<F>) -> Self::Output {
        let mut output = input.clone();
        self.apply_dropout(&mut output);
        output
    }
 }
 impl<R: Rng, F: Float + From<f64>> NeuraTrainableLayer<DVector<F>> for NeuraDropoutLayer<R> {
    type Gradient = ();
    fn default_gradient(&self) -> Self::Gradient {
        ()
    }
    fn backprop_layer(
        &self,
        _input: &DVector<F>,
        mut epsilon: Self::Output,
    ) -> (DVector<F>, Self::Gradient) {
        self.apply_dropout(&mut epsilon);
        (epsilon, ())
    }
    fn regularize_layer(&self) -> Self::Gradient {
        ()
    }
    fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
        // Noop
    }
    fn prepare_layer(&mut self, is_training: bool) {
        let length = self.shape.size();
        if !is_training {
            self.mask = DVector::from_element(length, false);
            self.multiplier = 1.0;
            return;
        }
        // Rejection sampling to prevent all the inputs from being dropped out
        loop {
            let mut sum = 0;
            for i in 0..length {
                self.mask[i] = self.rng.gen_bool(self.dropout_probability);
                sum += self.mask[i] as usize;
            }
            if sum < length {
                self.multiplier = length as f64 / (length - sum) as f64;
                break;
            }
        }
    }
 }
 #[cfg(test)]
 mod test {
    use super::*;
    #[test]
    fn test_rejection_sampling() {
        let mut layer = NeuraDropoutLayer::new(0.9, rand::thread_rng())
            .construct(NeuraShape::Vector(1))
            .unwrap();
        for _ in 0..100 {
            <NeuraDropoutLayer<_> as NeuraTrainableLayer<DVector<f64>>>::prepare_layer(
                &mut layer, true,
            );
            assert!(layer.multiplier.is_finite());
            assert!(!layer.multiplier.is_nan());
        }
    }
 }
--- a/src/layer/mod.rs
+++ b/src/layer/mod.rs
@ -1,6 +1,8 @@
 use crate::algebra::NeuraVectorSpace;
 pub mod dense;
 pub mod dropout;
 pub use dense::NeuraDenseLayer;
 #[derive(Clone, Copy, PartialEq, Debug)]
@ -108,6 +110,6 @@ macro_rules! neura_layer {
            rand::thread_rng(),
            $activation,
            $crate::derivable::regularize::NeuraL0,
-        )
+        ) as $crate::layer::dense::NeuraDenseLayerPartial<f32, _, _, _>
    };
 }
--- a/src/network/mod.rs
+++ b/src/network/mod.rs
@ -10,7 +10,7 @@ pub trait NeuraTrainableNetwork<Input>: NeuraLayer<Input> {
    fn apply_gradient(&mut self, gradient: &Self::Gradient);
    /// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information.
-    fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
+    fn backpropagate<Loss: NeuraLoss<Self::Output>>(
        &self,
        input: &Input,
        target: &Loss::Target,
--- a/src/network/sequential/mod.rs
+++ b/src/network/sequential/mod.rs
@ -146,7 +146,7 @@ impl<
        self.child_network.apply_gradient(&gradient.1);
    }
-    fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
+    fn backpropagate<Loss: NeuraLoss<Self::Output>>(
        &self,
        input: &Input,
        target: &Loss::Target,
@ -193,7 +193,7 @@ impl<Input: Clone> NeuraTrainableNetwork<Input> for () {
    }
    #[inline(always)]
-    fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
+    fn backpropagate<Loss: NeuraLoss<Self::Output>>(
        &self,
        final_activation: &Input,
        target: &Loss::Target,
@ -282,6 +282,6 @@ mod test {
        .construct(NeuraShape::Vector(2))
        .unwrap();
-        network.eval(&dvector![0.0f64, 0.0]);
+        network.eval(&dvector![0.0, 0.0]);
    }
 }
--- a/src/train.rs
+++ b/src/train.rs
@ -1,3 +1,5 @@
 use num::ToPrimitive;
 use crate::{algebra::NeuraVectorSpace, derivable::NeuraLoss, network::NeuraTrainableNetwork};
 pub trait NeuraGradientSolver<Input, Target, Trainable: NeuraTrainableNetwork<Input>> {
@ -12,11 +14,11 @@ pub trait NeuraGradientSolver<Input, Target, Trainable: NeuraTrainableNetwork<In
 }
 #[non_exhaustive]
-pub struct NeuraBackprop<Loss: NeuraLoss + Clone> {
+pub struct NeuraBackprop<Loss> {
    loss: Loss,
 }
-impl<Loss: NeuraLoss + Clone> NeuraBackprop<Loss> {
+impl<Loss> NeuraBackprop<Loss> {
    pub fn new(loss: Loss) -> Self {
        Self { loss }
    }
@ -26,8 +28,10 @@ impl<
        Input,
        Target,
        Trainable: NeuraTrainableNetwork<Input>,
-        Loss: NeuraLoss<Input = Trainable::Output, Target = Target> + Clone,
+        Loss: NeuraLoss<Trainable::Output, Target = Target> + Clone,
    > NeuraGradientSolver<Input, Target, Trainable> for NeuraBackprop<Loss>
 where
    <Loss as NeuraLoss<Trainable::Output>>::Output: ToPrimitive,
 {
    fn get_gradient(
        &self,
@ -40,7 +44,7 @@ impl<
    fn score(&self, trainable: &Trainable, input: &Input, target: &Target) -> f64 {
        let output = trainable.eval(&input);
-        self.loss.eval(target, &output)
+        self.loss.eval(target, &output).to_f64().unwrap()
    }
 }
@ -182,8 +186,8 @@ mod test {
    use crate::{
        assert_approx,
        derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0},
-        layer::{NeuraLayer, NeuraDenseLayer},
+        layer::{NeuraDenseLayer, NeuraLayer},
-        network::sequential::{NeuraSequentialTail, NeuraSequential},
+        network::sequential::{NeuraSequential, NeuraSequentialTail},
        neura_sequential,
    };
--- a/src/utils.rs
+++ b/src/utils.rs
@ -128,7 +128,7 @@ macro_rules! assert_approx {
    ( $left:expr, $right:expr, $epsilon:expr ) => {
        let left = $left;
        let right = $right;
-        if (left - right).abs() >= $epsilon {
+        if ((left - right) as f64).abs() >= $epsilon as f64 {
            panic!("Expected {} to be approximately equal to {}", left, right);
        }
    };