From 0c97a65013e57b1879e2f6e5b00f545d9c0b09e5 Mon Sep 17 00:00:00 2001 From: Adrien Burgun Date: Wed, 19 Apr 2023 19:32:35 +0200 Subject: [PATCH] :art: Remove From requirement in dropout, working bivariate layer, add builder pattern --- examples/bivariate.rs | 41 +++++++++++++++++++++++++---------- examples/convolution.rs | 2 +- examples/xor.rs | 6 ++--- src/derivable/loss.rs | 35 ++++++++++++++++-------------- src/layer/dense.rs | 25 +++++++++++++++++++++ src/layer/dropout.rs | 8 +++---- src/layer/mod.rs | 23 ++++++++++++++------ src/network/sequential/mod.rs | 6 ++--- src/utils.rs | 2 +- 9 files changed, 101 insertions(+), 47 deletions(-) diff --git a/examples/bivariate.rs b/examples/bivariate.rs index e03744f..cb89a56 100644 --- a/examples/bivariate.rs +++ b/examples/bivariate.rs @@ -2,6 +2,7 @@ use std::io::Write; +use nalgebra::{dvector, DVector}; #[allow(unused_imports)] use neuramethyst::derivable::activation::{LeakyRelu, Linear, Relu, Tanh}; use neuramethyst::derivable::loss::CrossEntropy; @@ -12,26 +13,28 @@ use rand::Rng; fn main() { let mut network = neura_sequential![ - neura_layer!("dense", 2, 8; Relu, NeuraL1(0.001)), + neura_layer!("dense", 8), neura_layer!("dropout", 0.25), - neura_layer!("dense", 2; Linear, NeuraL1(0.001)), - neura_layer!("softmax"), - ]; + neura_layer!("dense", 2).activation(Linear), + // neura_layer!("softmax"), + ] + .construct(NeuraShape::Vector(2)) + .unwrap(); let inputs = (0..1).cycle().map(move |_| { - let mut rng = rand::thread_rng(); // TODO: move out + let mut rng = rand::thread_rng(); let category = rng.gen_bool(0.5) as usize; let (x, y) = if category == 0 { - let radius: f64 = rng.gen_range(0.0..2.0); - let angle = rng.gen_range(0.0..std::f64::consts::TAU); + let radius: f32 = rng.gen_range(0.0..2.0); + let angle = rng.gen_range(0.0..std::f32::consts::TAU); (angle.cos() * radius, angle.sin() * radius) } else { - let radius: f64 = rng.gen_range(3.0..5.0); - let angle = rng.gen_range(0.0..std::f64::consts::TAU); + let radius: f32 = rng.gen_range(3.0..5.0); + let angle = rng.gen_range(0.0..std::f32::consts::TAU); (angle.cos() * radius, angle.sin() * radius) }; - ([x, y].into(), neuramethyst::one_hot::<2>(category)) + (dvector![x, y], one_hot(category, 2)) }); let test_inputs: Vec<_> = inputs.clone().take(10).collect(); @@ -50,7 +53,13 @@ fn main() { let network = network.clone(); draw_neuron_activation( - |input| network.eval(&input.into()).into_iter().collect(), + |input| { + network + .eval(&dvector![input[0] as f32, input[1] as f32]) + .into_iter() + .map(|x| *x as f64) + .collect() + }, 6.0, ); println!("{}", epoch); @@ -75,7 +84,7 @@ fn main() { let mut file = std::fs::File::create("target/bivariate.csv").unwrap(); for (input, _target) in test_inputs { - let guess = neuramethyst::argmax(network.eval(&input).as_ref()); + let guess = neuramethyst::argmax(network.eval(&input).as_slice()); writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap(); } } @@ -114,3 +123,11 @@ fn draw_neuron_activation Vec>(callback: F, scale: f64) viuer::print(&image::DynamicImage::ImageRgb8(image), &config).unwrap(); } + +fn one_hot(value: usize, categories: usize) -> DVector { + let mut res = DVector::from_element(categories, 0.0); + if value < categories { + res[value] = 1.0; + } + res +} diff --git a/examples/convolution.rs b/examples/convolution.rs index e59d38f..325cb1a 100644 --- a/examples/convolution.rs +++ b/examples/convolution.rs @@ -71,7 +71,7 @@ fn main() { // neura_layer!("pool1d", {14 * 2}, 7; Max), neura_layer!("unstable_flatten"), neura_layer!("dropout", 0.2), - neura_layer!("dense", 10; Linear), + neura_layer!("dense", 10).activation(Linear), neura_layer!("softmax") ]; diff --git a/examples/xor.rs b/examples/xor.rs index b7cde53..6657de1 100644 --- a/examples/xor.rs +++ b/examples/xor.rs @@ -9,9 +9,9 @@ use neuramethyst::prelude::*; fn main() { let mut network = neura_sequential![ - neura_layer!("dense", 4, Relu), - neura_layer!("dense", 3, Relu), - neura_layer!("dense", 1, Relu) + neura_layer!("dense", 4).activation(Relu), + neura_layer!("dense", 3).activation(Relu), + neura_layer!("dense", 1).activation(Relu) ] .construct(NeuraShape::Vector(2)) .unwrap(); diff --git a/src/derivable/loss.rs b/src/derivable/loss.rs index b6cb987..da7c2f5 100644 --- a/src/derivable/loss.rs +++ b/src/derivable/loss.rs @@ -45,41 +45,44 @@ impl NeuraLoss> for Euclidean { /// This guarantee is notably not given by the `Relu`, `LeakyRelu` and `Swish` activation functions, /// so you should pick another activation on the last layer, or pass it into a `NeuraSoftmax` layer. #[derive(Clone, Copy, Debug, PartialEq)] -pub struct CrossEntropy; +pub struct CrossEntropy; const DERIVATIVE_CAP: f64 = 100.0; const LOG_MIN: f64 = 0.00001; -impl CrossEntropy { +impl CrossEntropy { #[inline(always)] - pub fn eval_single(&self, target: f64, actual: f64) -> f64 { - -target * actual.max(LOG_MIN).log(std::f64::consts::E) + pub fn eval_single(&self, target: F, actual: F) -> F { + -target + * actual + .max(F::from(LOG_MIN).unwrap()) + .log(F::from(std::f64::consts::E).unwrap()) } #[inline(always)] - pub fn derivate_single(&self, target: f64, actual: f64) -> f64 { - -(target / actual).min(DERIVATIVE_CAP) + pub fn derivate_single(&self, target: F, actual: F) -> F { + -(target / actual).min(F::from(DERIVATIVE_CAP).unwrap()) } } -impl NeuraLoss> for CrossEntropy { - type Target = NeuraVector; - type Output = f64; +impl NeuraLoss> for CrossEntropy { + type Target = DVector; + type Output = F; - fn eval(&self, target: &Self::Target, actual: &NeuraVector) -> f64 { - let mut result = 0.0; + fn eval(&self, target: &Self::Target, actual: &DVector) -> F { + let mut result = F::zero(); - for i in 0..N { - result += self.eval_single(target[i], actual[i]); + for i in 0..target.len() { + result = result + self.eval_single(target[i], actual[i]); } result } - fn nabla(&self, target: &Self::Target, actual: &NeuraVector) -> NeuraVector { - let mut result = NeuraVector::default(); + fn nabla(&self, target: &Self::Target, actual: &DVector) -> DVector { + let mut result = DVector::from_element(target.len(), F::zero()); - for i in 0..N { + for i in 0..target.len() { result[i] = self.derivate_single(target[i], actual[i]); } diff --git a/src/layer/dense.rs b/src/layer/dense.rs index b2e46a7..71275e4 100644 --- a/src/layer/dense.rs +++ b/src/layer/dense.rs @@ -97,6 +97,31 @@ impl, Reg: NeuraDer } } +impl NeuraDenseLayerPartial { + pub fn activation(self, activation: Act2) -> NeuraDenseLayerPartial { + NeuraDenseLayerPartial { + activation, + regularization: self.regularization, + output_size: self.output_size, + rng: self.rng, + phantom: PhantomData, + } + } + + pub fn regularization( + self, + regularization: Reg2, + ) -> NeuraDenseLayerPartial { + NeuraDenseLayerPartial { + activation: self.activation, + regularization, + output_size: self.output_size, + rng: self.rng, + phantom: PhantomData, + } + } +} + impl< F: Float + std::fmt::Debug + 'static, Act: NeuraDerivable, diff --git a/src/layer/dropout.rs b/src/layer/dropout.rs index bc2e92c..afd0511 100644 --- a/src/layer/dropout.rs +++ b/src/layer/dropout.rs @@ -23,8 +23,8 @@ impl NeuraDropoutLayer { } } - fn apply_dropout>(&self, vector: &mut DVector) { - let multiplier = >::from(self.multiplier); + fn apply_dropout(&self, vector: &mut DVector) { + let multiplier = F::from(self.multiplier).unwrap(); for (index, &dropout) in self.mask.iter().enumerate() { if dropout { vector[index] = F::zero(); @@ -51,7 +51,7 @@ impl NeuraPartialLayer for NeuraDropoutLayer { } } -impl> NeuraLayer> for NeuraDropoutLayer { +impl NeuraLayer> for NeuraDropoutLayer { type Output = DVector; fn eval(&self, input: &DVector) -> Self::Output { @@ -61,7 +61,7 @@ impl> NeuraLayer> for NeuraDropoutLayer< } } -impl> NeuraTrainableLayer> for NeuraDropoutLayer { +impl NeuraTrainableLayer> for NeuraDropoutLayer { type Gradient = (); fn default_gradient(&self) -> Self::Gradient { diff --git a/src/layer/mod.rs b/src/layer/mod.rs index 7e70357..4e4e4e0 100644 --- a/src/layer/mod.rs +++ b/src/layer/mod.rs @@ -104,12 +104,21 @@ impl NeuraTrainableLayer for () { /// Temporary implementation of neura_layer #[macro_export] macro_rules! neura_layer { - ( "dense", $output:expr, $activation:expr ) => { - $crate::layer::dense::NeuraDenseLayer::new_partial( - $output, - rand::thread_rng(), - $activation, - $crate::derivable::regularize::NeuraL0, - ) as $crate::layer::dense::NeuraDenseLayerPartial + ( "dense", $output:expr, $type:ty ) => {{ + let res: $crate::layer::dense::NeuraDenseLayerPartial<$type, _, _, _> = + $crate::layer::dense::NeuraDenseLayer::new_partial( + $output, + rand::thread_rng(), + $crate::derivable::activation::LeakyRelu(0.1), + $crate::derivable::regularize::NeuraL0, + ); + res + }}; + ( "dense", $output:expr ) => { + $crate::neura_layer!("dense", $output, f32) + }; + + ( "dropout", $probability:expr ) => { + $crate::layer::dropout::NeuraDropoutLayer::new($probability, rand::thread_rng()) }; } diff --git a/src/network/sequential/mod.rs b/src/network/sequential/mod.rs index f3bafcc..cbcc54f 100644 --- a/src/network/sequential/mod.rs +++ b/src/network/sequential/mod.rs @@ -275,9 +275,9 @@ mod test { ]; let network = neura_sequential![ - neura_layer!("dense", 16, Relu), - neura_layer!("dense", 12, Relu), - neura_layer!("dense", 2, Relu) + neura_layer!("dense", 16).activation(Relu), + neura_layer!("dense", 12).activation(Relu), + neura_layer!("dense", 2).activation(Relu) ] .construct(NeuraShape::Vector(2)) .unwrap(); diff --git a/src/utils.rs b/src/utils.rs index c342c95..442c5bd 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -110,7 +110,7 @@ pub fn one_hot(value: usize) -> NeuraVector { res } -pub fn argmax(array: &[f64]) -> usize { +pub fn argmax(array: &[F]) -> usize { let mut res = 0; for n in 1..array.len() {