🎨 Remove From<f64> requirement in dropout, working bivariate layer, add builder pattern

2 years ago · 0c97a65013
parent 969fa3197a
commit 0c97a65013
9 changed files with 101 additions and 47 deletions
--- a/examples/bivariate.rs
+++ b/examples/bivariate.rs
@ -2,6 +2,7 @@

 use std::io::Write;

+use nalgebra::{dvector, DVector};
 #[allow(unused_imports)]
 use neuramethyst::derivable::activation::{LeakyRelu, Linear, Relu, Tanh};
 use neuramethyst::derivable::loss::CrossEntropy;
@ -12,26 +13,28 @@ use rand::Rng;

 fn main() {
    let mut network = neura_sequential![
-        neura_layer!("dense", 2, 8; Relu, NeuraL1(0.001)),
+        neura_layer!("dense", 8),
        neura_layer!("dropout", 0.25),
-        neura_layer!("dense", 2; Linear, NeuraL1(0.001)),
-        neura_layer!("softmax"),
-    ];
+        neura_layer!("dense", 2).activation(Linear),
+        // neura_layer!("softmax"),
+    ]
+    .construct(NeuraShape::Vector(2))
+    .unwrap();

    let inputs = (0..1).cycle().map(move |_| {
-        let mut rng = rand::thread_rng(); // TODO: move out
+        let mut rng = rand::thread_rng();
        let category = rng.gen_bool(0.5) as usize;
        let (x, y) = if category == 0 {
-            let radius: f64 = rng.gen_range(0.0..2.0);
-            let angle = rng.gen_range(0.0..std::f64::consts::TAU);
+            let radius: f32 = rng.gen_range(0.0..2.0);
+            let angle = rng.gen_range(0.0..std::f32::consts::TAU);
            (angle.cos() * radius, angle.sin() * radius)
        } else {
-            let radius: f64 = rng.gen_range(3.0..5.0);
-            let angle = rng.gen_range(0.0..std::f64::consts::TAU);
+            let radius: f32 = rng.gen_range(3.0..5.0);
+            let angle = rng.gen_range(0.0..std::f32::consts::TAU);
            (angle.cos() * radius, angle.sin() * radius)
        };

-        ([x, y].into(), neuramethyst::one_hot::<2>(category))
+        (dvector![x, y], one_hot(category, 2))
    });

    let test_inputs: Vec<_> = inputs.clone().take(10).collect();
@ -50,7 +53,13 @@ fn main() {

            let network = network.clone();
            draw_neuron_activation(
-                |input| network.eval(&input.into()).into_iter().collect(),
+                |input| {
+                    network
+                        .eval(&dvector![input[0] as f32, input[1] as f32])
+                        .into_iter()
+                        .map(|x| *x as f64)
+                        .collect()
+                },
                6.0,
            );
            println!("{}", epoch);
@ -75,7 +84,7 @@ fn main() {

    let mut file = std::fs::File::create("target/bivariate.csv").unwrap();
    for (input, _target) in test_inputs {
-        let guess = neuramethyst::argmax(network.eval(&input).as_ref());
+        let guess = neuramethyst::argmax(network.eval(&input).as_slice());
        writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap();
    }
 }
@ -114,3 +123,11 @@ fn draw_neuron_activation<F: Fn([f64; 2]) -> Vec<f64>>(callback: F, scale: f64)

    viuer::print(&image::DynamicImage::ImageRgb8(image), &config).unwrap();
 }
+
+fn one_hot(value: usize, categories: usize) -> DVector<f32> {
+    let mut res = DVector::from_element(categories, 0.0);
+    if value < categories {
+        res[value] = 1.0;
+    }
+    res
+}
--- a/examples/convolution.rs
+++ b/examples/convolution.rs
@ -71,7 +71,7 @@ fn main() {
        // neura_layer!("pool1d", {14 * 2}, 7; Max),
        neura_layer!("unstable_flatten"),
        neura_layer!("dropout", 0.2),
-        neura_layer!("dense", 10; Linear),
+        neura_layer!("dense", 10).activation(Linear),
        neura_layer!("softmax")
    ];

--- a/examples/xor.rs
+++ b/examples/xor.rs
@ -9,9 +9,9 @@ use neuramethyst::prelude::*;

 fn main() {
    let mut network = neura_sequential![
-        neura_layer!("dense", 4, Relu),
-        neura_layer!("dense", 3, Relu),
-        neura_layer!("dense", 1, Relu)
+        neura_layer!("dense", 4).activation(Relu),
+        neura_layer!("dense", 3).activation(Relu),
+        neura_layer!("dense", 1).activation(Relu)
    ]
    .construct(NeuraShape::Vector(2))
    .unwrap();
--- a/src/derivable/loss.rs
+++ b/src/derivable/loss.rs
@ -45,41 +45,44 @@ impl<F: Float + std::fmt::Debug + 'static> NeuraLoss<DVector<F>> for Euclidean {
 /// This guarantee is notably not given by the `Relu`, `LeakyRelu` and `Swish` activation functions,
 /// so you should pick another activation on the last layer, or pass it into a `NeuraSoftmax` layer.
 #[derive(Clone, Copy, Debug, PartialEq)]
-pub struct CrossEntropy<const N: usize>;
+pub struct CrossEntropy;

 const DERIVATIVE_CAP: f64 = 100.0;
 const LOG_MIN: f64 = 0.00001;

-impl<const N: usize> CrossEntropy<N> {
+impl CrossEntropy {
    #[inline(always)]
-    pub fn eval_single(&self, target: f64, actual: f64) -> f64 {
-        -target * actual.max(LOG_MIN).log(std::f64::consts::E)
+    pub fn eval_single<F: Float>(&self, target: F, actual: F) -> F {
+        -target
+            * actual
+                .max(F::from(LOG_MIN).unwrap())
+                .log(F::from(std::f64::consts::E).unwrap())
    }

    #[inline(always)]
-    pub fn derivate_single(&self, target: f64, actual: f64) -> f64 {
-        -(target / actual).min(DERIVATIVE_CAP)
+    pub fn derivate_single<F: Float>(&self, target: F, actual: F) -> F {
+        -(target / actual).min(F::from(DERIVATIVE_CAP).unwrap())
    }
 }

-impl<const N: usize> NeuraLoss<NeuraVector<N, f64>> for CrossEntropy<N> {
-    type Target = NeuraVector<N, f64>;
-    type Output = f64;
+impl<F: Float + std::fmt::Debug + 'static> NeuraLoss<DVector<F>> for CrossEntropy {
+    type Target = DVector<F>;
+    type Output = F;

-    fn eval(&self, target: &Self::Target, actual: &NeuraVector<N, f64>) -> f64 {
-        let mut result = 0.0;
+    fn eval(&self, target: &Self::Target, actual: &DVector<F>) -> F {
+        let mut result = F::zero();

-        for i in 0..N {
-            result += self.eval_single(target[i], actual[i]);
+        for i in 0..target.len() {
+            result = result + self.eval_single(target[i], actual[i]);
        }

        result
    }

-    fn nabla(&self, target: &Self::Target, actual: &NeuraVector<N, f64>) -> NeuraVector<N, f64> {
-        let mut result = NeuraVector::default();
+    fn nabla(&self, target: &Self::Target, actual: &DVector<F>) -> DVector<F> {
+        let mut result = DVector::from_element(target.len(), F::zero());

-        for i in 0..N {
+        for i in 0..target.len() {
            result[i] = self.derivate_single(target[i], actual[i]);
        }

--- a/src/layer/dense.rs
+++ b/src/layer/dense.rs
@ -97,6 +97,31 @@ impl<F: Float + std::fmt::Debug + 'static, Act: NeuraDerivable<F>, Reg: NeuraDer
    }
 }

+impl<F, Act, Reg, R: Rng> NeuraDenseLayerPartial<F, Act, Reg, R> {
+    pub fn activation<Act2>(self, activation: Act2) -> NeuraDenseLayerPartial<F, Act2, Reg, R> {
+        NeuraDenseLayerPartial {
+            activation,
+            regularization: self.regularization,
+            output_size: self.output_size,
+            rng: self.rng,
+            phantom: PhantomData,
+        }
+    }
+
+    pub fn regularization<Reg2>(
+        self,
+        regularization: Reg2,
+    ) -> NeuraDenseLayerPartial<F, Act, Reg2, R> {
+        NeuraDenseLayerPartial {
+            activation: self.activation,
+            regularization,
+            output_size: self.output_size,
+            rng: self.rng,
+            phantom: PhantomData,
+        }
+    }
+}
+
 impl<
        F: Float + std::fmt::Debug + 'static,
        Act: NeuraDerivable<F>,
--- a/src/layer/dropout.rs
+++ b/src/layer/dropout.rs
@ -23,8 +23,8 @@ impl<R: Rng> NeuraDropoutLayer<R> {
        }
    }

-    fn apply_dropout<F: Float + From<f64>>(&self, vector: &mut DVector<F>) {
-        let multiplier = <F as From<f64>>::from(self.multiplier);
+    fn apply_dropout<F: Float>(&self, vector: &mut DVector<F>) {
+        let multiplier = F::from(self.multiplier).unwrap();
        for (index, &dropout) in self.mask.iter().enumerate() {
            if dropout {
                vector[index] = F::zero();
@ -51,7 +51,7 @@ impl<R: Rng> NeuraPartialLayer for NeuraDropoutLayer<R> {
    }
 }

-impl<R: Rng, F: Float + From<f64>> NeuraLayer<DVector<F>> for NeuraDropoutLayer<R> {
+impl<R: Rng, F: Float> NeuraLayer<DVector<F>> for NeuraDropoutLayer<R> {
    type Output = DVector<F>;

    fn eval(&self, input: &DVector<F>) -> Self::Output {
@ -61,7 +61,7 @@ impl<R: Rng, F: Float + From<f64>> NeuraLayer<DVector<F>> for NeuraDropoutLayer<
    }
 }

-impl<R: Rng, F: Float + From<f64>> NeuraTrainableLayer<DVector<F>> for NeuraDropoutLayer<R> {
+impl<R: Rng, F: Float> NeuraTrainableLayer<DVector<F>> for NeuraDropoutLayer<R> {
    type Gradient = ();

    fn default_gradient(&self) -> Self::Gradient {
--- a/src/layer/mod.rs
+++ b/src/layer/mod.rs
@ -104,12 +104,21 @@ impl<Input: Clone> NeuraTrainableLayer<Input> for () {
 /// Temporary implementation of neura_layer
 #[macro_export]
 macro_rules! neura_layer {
-    ( "dense", $output:expr, $activation:expr ) => {
+    ( "dense", $output:expr, $type:ty ) => {{
+        let res: $crate::layer::dense::NeuraDenseLayerPartial<$type, _, _, _> =
            $crate::layer::dense::NeuraDenseLayer::new_partial(
                $output,
                rand::thread_rng(),
-            $activation,
+                $crate::derivable::activation::LeakyRelu(0.1),
                $crate::derivable::regularize::NeuraL0,
-        ) as $crate::layer::dense::NeuraDenseLayerPartial<f32, _, _, _>
+            );
+        res
+    }};
+    ( "dense", $output:expr ) => {
+        $crate::neura_layer!("dense", $output, f32)
+    };
+
+    ( "dropout", $probability:expr ) => {
+        $crate::layer::dropout::NeuraDropoutLayer::new($probability, rand::thread_rng())
    };
 }
--- a/src/network/sequential/mod.rs
+++ b/src/network/sequential/mod.rs
@ -275,9 +275,9 @@ mod test {
        ];

        let network = neura_sequential![
-            neura_layer!("dense", 16, Relu),
-            neura_layer!("dense", 12, Relu),
-            neura_layer!("dense", 2, Relu)
+            neura_layer!("dense", 16).activation(Relu),
+            neura_layer!("dense", 12).activation(Relu),
+            neura_layer!("dense", 2).activation(Relu)
        ]
        .construct(NeuraShape::Vector(2))
        .unwrap();
--- a/src/utils.rs
+++ b/src/utils.rs
@ -110,7 +110,7 @@ pub fn one_hot<const N: usize>(value: usize) -> NeuraVector<N, f64> {
    res
 }

-pub fn argmax(array: &[f64]) -> usize {
+pub fn argmax<F: PartialOrd>(array: &[F]) -> usize {
    let mut res = 0;

    for n in 1..array.len() {