From 520fbcf3173900bc03c19a2634704e32e1663eba Mon Sep 17 00:00:00 2001 From: Adrien Burgun Date: Sat, 29 Apr 2023 14:50:47 +0200 Subject: [PATCH] :art: Clean up NeuraResidual a bit --- examples/bivariate.rs | 2 - examples/densenet.rs | 141 ++++++++++++++ src/lib.rs | 2 +- src/network/residual/last.rs | 176 ++++++++++++++++++ src/network/residual/mod.rs | 101 +++------- .../residual/{layer_impl.rs => node.rs} | 52 +++++- src/network/residual/wrapper.rs | 70 +++---- src/network/sequential/construct.rs | 11 +- src/network/sequential/mod.rs | 3 +- src/utils.rs | 5 + 10 files changed, 433 insertions(+), 130 deletions(-) create mode 100644 examples/densenet.rs create mode 100644 src/network/residual/last.rs rename src/network/residual/{layer_impl.rs => node.rs} (84%) diff --git a/examples/bivariate.rs b/examples/bivariate.rs index 4e8c90b..115925c 100644 --- a/examples/bivariate.rs +++ b/examples/bivariate.rs @@ -1,5 +1,3 @@ -#![feature(generic_arg_infer)] - use std::io::Write; use nalgebra::{dvector, DVector}; diff --git a/examples/densenet.rs b/examples/densenet.rs new file mode 100644 index 0000000..9e41e46 --- /dev/null +++ b/examples/densenet.rs @@ -0,0 +1,141 @@ +use std::io::Write; + +use nalgebra::{dvector, DVector}; +use neuramethyst::derivable::activation::Linear; +use neuramethyst::derivable::loss::CrossEntropy; +use neuramethyst::derivable::regularize::NeuraL1; +use neuramethyst::{plot_losses, prelude::*}; + +use rand::Rng; + +fn main() { + let mut network = neura_residual![ + <= 0, 2; + neura_layer!("dense", 4).regularization(NeuraL1(0.001)); + neura_layer!("dropout", 0.25); + neura_layer!("dense", 2) + .activation(Linear) + .regularization(NeuraL1(0.001)); + neura_layer!("softmax"); + ] + .construct(NeuraShape::Vector(2)) + .unwrap(); + + let inputs = (0..1).cycle().map(move |_| { + let mut rng = rand::thread_rng(); + let category = rng.gen_bool(0.5) as usize; + let (x, y) = if category == 0 { + let radius: f32 = rng.gen_range(0.0..2.0); + let angle = rng.gen_range(0.0..std::f32::consts::TAU); + (angle.cos() * radius, angle.sin() * radius) + } else { + let radius: f32 = rng.gen_range(3.0..5.0); + let angle = rng.gen_range(0.0..std::f32::consts::TAU); + (angle.cos() * radius, angle.sin() * radius) + }; + + (dvector![x, y], one_hot(category, 2)) + }); + + let test_inputs: Vec<_> = inputs.clone().take(10).collect(); + + if std::env::args().any(|arg| arg == "draw") { + for epoch in 0..200 { + let mut trainer = NeuraBatchedTrainer::new(0.03, 10); + trainer.batch_size = 10; + + trainer.train( + &NeuraBackprop::new(CrossEntropy), + &mut network, + inputs.clone(), + &test_inputs, + ); + + draw_neuron_activation( + |input| { + let output = network.eval(&dvector![input[0] as f32, input[1] as f32]); + let estimation = output[0] / (output[0] + output[1]); + + let color = network.eval(&dvector![input[0] as f32, input[1] as f32]); + + (&color / color.map(|x| x * x).sum() * estimation) + .into_iter() + .map(|x| x.abs() as f64) + .collect::>() + }, + 6.0, + ); + println!("{}", epoch); + + std::thread::sleep(std::time::Duration::new(0, 50_000_000)); + } + } else { + let mut trainer = NeuraBatchedTrainer::new(0.03, 20 * 50); + trainer.batch_size = 10; + trainer.log_iterations = 20; + + plot_losses( + trainer.train( + &NeuraBackprop::new(CrossEntropy), + &mut network, + inputs.clone(), + &test_inputs, + ), + 128, + 48, + ); + + // println!("{}", String::from("\n").repeat(64)); + // draw_neuron_activation(|input| network.eval(&input).into_iter().collect(), 6.0); + } + + let mut file = std::fs::File::create("target/bivariate.csv").unwrap(); + for (input, _target) in test_inputs { + let guess = neuramethyst::argmax(network.eval(&input).as_slice()); + writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap(); + } +} + +// TODO: move this to the library? +fn draw_neuron_activation Vec>(callback: F, scale: f64) { + use viuer::Config; + + const WIDTH: u32 = 64; + const HEIGHT: u32 = 64; + + let mut image = image::RgbImage::new(WIDTH, HEIGHT); + + fn sigmoid(x: f64) -> f64 { + 1.9 / (1.0 + (-x * 3.0).exp()) - 0.9 + } + + for y in 0..HEIGHT { + let y2 = 2.0 * y as f64 / HEIGHT as f64 - 1.0; + for x in 0..WIDTH { + let x2 = 2.0 * x as f64 / WIDTH as f64 - 1.0; + let activation = callback([x2 * scale, y2 * scale]); + let r = (sigmoid(activation.get(0).copied().unwrap_or(-1.0)) * 255.0).floor() as u8; + let g = (sigmoid(activation.get(1).copied().unwrap_or(-1.0)) * 255.0).floor() as u8; + let b = (sigmoid(activation.get(2).copied().unwrap_or(-1.0)) * 255.0).floor() as u8; + + *image.get_pixel_mut(x, y) = image::Rgb([r, g, b]); + } + } + + let config = Config { + use_kitty: false, + truecolor: true, + // absolute_offset: false, + ..Default::default() + }; + + viuer::print(&image::DynamicImage::ImageRgb8(image), &config).unwrap(); +} + +fn one_hot(value: usize, categories: usize) -> DVector { + let mut res = DVector::from_element(categories, 0.0); + if value < categories { + res[value] = 1.0; + } + res +} diff --git a/src/lib.rs b/src/lib.rs index 7edd616..0eeb5b2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,7 +24,7 @@ pub mod prelude { NeuraTrainableLayerBase, NeuraTrainableLayerSelf, }; pub use crate::network::sequential::{ - NeuraSequential, NeuraSequentialConstruct, NeuraSequentialLock, NeuraSequentialTail, + NeuraSequential, NeuraSequentialLock, NeuraSequentialTail, }; pub use crate::train::NeuraBatchedTrainer; } diff --git a/src/network/residual/last.rs b/src/network/residual/last.rs new file mode 100644 index 0000000..f3f37eb --- /dev/null +++ b/src/network/residual/last.rs @@ -0,0 +1,176 @@ +use crate::layer::*; +use crate::network::*; +use crate::utils::unwrap_or_clone; + +use std::borrow::Cow; + +use super::construct::*; +use super::*; + +#[derive(Clone, Debug, PartialEq)] +pub struct NeuraResidualLast { + output_shape: Option, +} + +impl NeuraResidualLast { + #[inline(always)] + pub fn new() -> Self { + Self { output_shape: None } + } +} + +impl Default for NeuraResidualLast { + #[inline(always)] + fn default() -> Self { + Self::new() + } +} + +impl NeuraResidualConstruct for NeuraResidualLast { + type Constructed = NeuraResidualLast; + type Err = NeuraResidualConstructErr<(), ()>; + + fn construct_residual( + self, + input: NeuraResidualInput, + ) -> Result { + let input = *input + .get_first() + .ok_or(Self::Err::AxisErr(NeuraAxisErr::NoInput))?; + + Ok(Self { + output_shape: Some(input), + }) + } +} + +impl NeuraShapedLayer for NeuraResidualLast { + fn output_shape(&self) -> NeuraShape { + self.output_shape + .expect("Called NeuraResidualLast::output_shape before constructing it") + } +} + +impl NeuraNetworkBase for NeuraResidualLast { + type Layer = (); + + #[inline(always)] + fn get_layer(&self) -> &Self::Layer { + &() + } +} + +impl NeuraNetworkRec for NeuraResidualLast { + type NextNode = (); + + #[inline(always)] + fn get_next(&self) -> &Self::NextNode { + &() + } + + fn merge_gradient( + &self, + rec_gradient: ::Gradient, + layer_gradient: ::Gradient, + ) -> Self::Gradient + where + Self::Layer: NeuraTrainableLayerBase, + { + todo!() + } +} + +impl NeuraNetwork> for NeuraResidualLast { + type LayerInput = Data; + + type NodeOutput = Data; + + fn map_input<'a>(&'_ self, input: &'a NeuraResidualInput) -> Cow<'a, Self::LayerInput> { + Cow::Owned(unwrap_or_clone(input.clone().get_first().unwrap())) + } + + fn map_output<'a>( + &'_ self, + _input: &'_ NeuraResidualInput, + layer_output: &'a Data, + ) -> Cow<'a, Self::NodeOutput> { + Cow::Borrowed(layer_output) + } + + fn map_gradient_in<'a>( + &'_ self, + _input: &'_ NeuraResidualInput, + gradient_in: &'a Self::NodeOutput, + ) -> Cow<'a, >::Output> { + Cow::Borrowed(gradient_in) + } + + fn map_gradient_out<'a>( + &'_ self, + input: &'_ NeuraResidualInput, + gradient_in: &'_ Self::NodeOutput, + gradient_out: &'a Self::LayerInput, + ) -> Cow<'a, NeuraResidualInput> { + unimplemented!() + } +} + +impl NeuraTrainableLayerBase for NeuraResidualLast { + type Gradient = (); + + #[inline(always)] + fn default_gradient(&self) -> Self::Gradient { + () + } + + #[inline(always)] + fn apply_gradient(&mut self, _gradient: &Self::Gradient) { + // Noop + } +} + +impl NeuraLayer> for NeuraResidualLast { + type Output = Data; + + fn eval(&self, input: &NeuraResidualInput) -> Self::Output { + let result: Rc = input.clone().get_first() + .expect("Invalid NeuraResidual state: network returned no data, did you forget to link the last layer?") + .into(); + + unwrap_or_clone(result) + } +} + +impl NeuraTrainableLayerEval> for NeuraResidualLast { + type IntermediaryRepr = (); + + #[inline(always)] + fn eval_training( + &self, + input: &NeuraResidualInput, + ) -> (Self::Output, Self::IntermediaryRepr) { + (self.eval(input), ()) + } +} + +impl NeuraTrainableLayerSelf> for NeuraResidualLast { + #[inline(always)] + fn regularize_layer(&self) -> Self::Gradient { + () + } + + #[inline(always)] + fn get_gradient( + &self, + _input: &NeuraResidualInput, + _intermediary: &Self::IntermediaryRepr, + _epsilon: &Self::Output, + ) -> Self::Gradient { + () + } +} + +// let epsilon = Rc::new(epsilon.clone()); +// let mut epsilon_residual = NeuraResidualInput::new(); + +// epsilon_residual.push(0, epsilon); diff --git a/src/network/residual/mod.rs b/src/network/residual/mod.rs index 5e12a81..26bc45d 100644 --- a/src/network/residual/mod.rs +++ b/src/network/residual/mod.rs @@ -1,12 +1,7 @@ use std::rc::Rc; -use nalgebra::{DVector, Scalar}; -use num::Float; - use crate::layer::*; -mod layer_impl; - mod wrapper; pub use wrapper::*; @@ -19,78 +14,16 @@ pub use axis::*; mod construct; pub use construct::NeuraResidualConstructErr; -impl NeuraResidual { - pub fn new(layers: Layers) -> Self { - Self { - layers, - initial_offsets: vec![0], - } - } - - pub fn offset(mut self, offset: usize) -> Self { - self.initial_offsets.push(offset); - self - } - - pub fn offsets(mut self, offsets: Vec) -> Self { - self.initial_offsets = offsets; - self - } -} - -#[derive(Clone, Debug, PartialEq)] -pub struct NeuraResidualNode { - pub layer: Layer, - pub child_network: ChildNetwork, - - /// Array of relative layers indices to send the offset of this layer to, - /// defaults to `vec![0]`. - offsets: Vec, - - pub axis: Axis, - - output_shape: Option, -} - -impl NeuraResidualNode { - pub fn new(layer: Layer, child_network: ChildNetwork) -> Self { - Self { - layer, - child_network, - offsets: vec![0], - axis: NeuraAxisAppend, - output_shape: None, - } - } -} - -impl NeuraResidualNode { - pub fn offsets(mut self, offsets: Vec) -> Self { - self.offsets = offsets; - self - } - - pub fn offset(mut self, offset: usize) -> Self { - self.offsets.push(offset); - self - } +mod node; +pub use node::*; - pub fn axis(self, axis: Axis2) -> NeuraResidualNode { - NeuraResidualNode { - layer: self.layer, - child_network: self.child_network, - offsets: self.offsets, - axis, - // Drop the knowledge of output_shape - output_shape: None, - } - } -} +mod last; +pub use last::*; #[macro_export] macro_rules! neura_residual { [ "__combine_layers", ] => { - () + $crate::network::residual::NeuraResidualLast::new() }; [ "__combine_layers", @@ -136,7 +69,8 @@ macro_rules! neura_residual { mod test { use nalgebra::dvector; - use crate::neura_layer; + use crate::gradient_solver::NeuraGradientSolver; + use crate::{derivable::loss::Euclidean, neura_layer, prelude::NeuraBackprop}; use super::*; @@ -179,7 +113,10 @@ mod test { assert_eq!(network.initial_offsets, vec![0, 2]); assert_eq!(network.layers.offsets, vec![0, 1]); assert_eq!(network.layers.child_network.offsets, vec![0]); - assert_eq!(network.layers.child_network.child_network.child_network, ()); + assert_eq!( + network.layers.child_network.child_network.child_network, + NeuraResidualLast::new() + ); let network = neura_residual![ neura_layer!("dense", 4) => 0; @@ -203,4 +140,20 @@ mod test { network.eval(&dvector![0.0]); } + + #[test] + fn test_resnet_backprop() { + let network = neura_residual![ + <= 0, 1; + neura_layer!("dense", 2) => 0, 1; + neura_layer!("dense", 4); + neura_layer!("dense", 8) + ] + .construct(NeuraShape::Vector(1)) + .unwrap(); + + let backprop = NeuraBackprop::new(Euclidean); + + backprop.get_gradient(&network, &dvector![0.0], &dvector![0.0]); + } } diff --git a/src/network/residual/layer_impl.rs b/src/network/residual/node.rs similarity index 84% rename from src/network/residual/layer_impl.rs rename to src/network/residual/node.rs index c78b87b..c283047 100644 --- a/src/network/residual/layer_impl.rs +++ b/src/network/residual/node.rs @@ -1,11 +1,59 @@ -//! Implementations for NeuraLayer* +use nalgebra::{DVector, Scalar}; +use num::Float; use std::borrow::Cow; use crate::network::*; use super::*; -impl NeuraResidualNode { +#[derive(Clone, Debug, PartialEq)] +pub struct NeuraResidualNode { + pub layer: Layer, + pub child_network: ChildNetwork, + + /// Array of relative layers indices to send the offset of this layer to, + /// defaults to `vec![0]`. + pub(crate) offsets: Vec, + + pub axis: Axis, + + pub(crate) output_shape: Option, +} + +impl NeuraResidualNode { + pub fn new(layer: Layer, child_network: ChildNetwork) -> Self { + Self { + layer, + child_network, + offsets: vec![0], + axis: NeuraAxisAppend, + output_shape: None, + } + } +} + +impl NeuraResidualNode { + pub fn offsets(mut self, offsets: Vec) -> Self { + self.offsets = offsets; + self + } + + pub fn offset(mut self, offset: usize) -> Self { + self.offsets.push(offset); + self + } + + pub fn axis(self, axis: Axis2) -> NeuraResidualNode { + NeuraResidualNode { + layer: self.layer, + child_network: self.child_network, + offsets: self.offsets, + axis, + // Drop the knowledge of output_shape + output_shape: None, + } + } + fn process_input( &self, input: &NeuraResidualInput, diff --git a/src/network/residual/wrapper.rs b/src/network/residual/wrapper.rs index a851da3..a0df715 100644 --- a/src/network/residual/wrapper.rs +++ b/src/network/residual/wrapper.rs @@ -1,6 +1,6 @@ use std::borrow::Cow; -use crate::network::*; +use crate::{network::*, utils::unwrap_or_clone}; use super::*; @@ -14,6 +14,23 @@ pub struct NeuraResidual { } impl NeuraResidual { + pub fn new(layers: Layers) -> Self { + Self { + layers, + initial_offsets: vec![0], + } + } + + pub fn offset(mut self, offset: usize) -> Self { + self.initial_offsets.push(offset); + self + } + + pub fn offsets(mut self, offsets: Vec) -> Self { + self.initial_offsets = offsets; + self + } + fn input_to_residual_input(&self, input: &Input) -> NeuraResidualInput { let input: Rc = Rc::new((*input).clone()); let mut inputs = NeuraResidualInput::new(); @@ -28,21 +45,14 @@ impl NeuraResidual { } } -impl NeuraLayer for NeuraResidual +impl NeuraLayer for NeuraResidual where - Layers: NeuraLayer, Output = NeuraResidualInput>, + Layers: NeuraLayer>, { - type Output = Output; + type Output = Layers::Output; fn eval(&self, input: &Input) -> Self::Output { - let output = self.layers.eval(&self.input_to_residual_input(input)); - - let result: Rc = output.get_first() - .expect("Invalid NeuraResidual state: network returned no data, did you forget to link the last layer?") - .into(); - - // TODO: replace with Rc::unwrap_or_clone once https://github.com/rust-lang/rust/issues/93610 is closed - Rc::try_unwrap(result).unwrap_or_else(|result| (*result).clone()) + self.layers.eval(&self.input_to_residual_input(input)) } } @@ -60,31 +70,19 @@ impl NeuraTrainableLayerBase for NeuraResidual< } } -impl< - Data: Clone, - Layers: NeuraTrainableLayerEval, Output = NeuraResidualInput>, - > NeuraTrainableLayerEval for NeuraResidual +impl>> + NeuraTrainableLayerEval for NeuraResidual { type IntermediaryRepr = Layers::IntermediaryRepr; fn eval_training(&self, input: &Data) -> (Self::Output, Self::IntermediaryRepr) { - let (output, intermediary) = self - .layers - .eval_training(&self.input_to_residual_input(input)); - - let result: Rc = output.get_first().unwrap().into(); - - ( - Rc::try_unwrap(result).unwrap_or_else(|result| (*result).clone()), - intermediary, - ) + self.layers + .eval_training(&self.input_to_residual_input(input)) } } -impl< - Data: Clone, - Layers: NeuraTrainableLayerSelf, Output = NeuraResidualInput>, - > NeuraTrainableLayerSelf for NeuraResidual +impl>> + NeuraTrainableLayerSelf for NeuraResidual { fn regularize_layer(&self) -> Self::Gradient { self.layers.regularize_layer() @@ -96,16 +94,8 @@ impl< intermediary: &Self::IntermediaryRepr, epsilon: &Self::Output, ) -> Self::Gradient { - let epsilon = Rc::new(epsilon.clone()); - let mut epsilon_residual = NeuraResidualInput::new(); - - epsilon_residual.push(0, epsilon); - - self.layers.get_gradient( - &self.input_to_residual_input(input), - intermediary, - &epsilon_residual, - ) + self.layers + .get_gradient(&self.input_to_residual_input(input), intermediary, &epsilon) } } diff --git a/src/network/sequential/construct.rs b/src/network/sequential/construct.rs index 4c837e3..74977ce 100644 --- a/src/network/sequential/construct.rs +++ b/src/network/sequential/construct.rs @@ -2,20 +2,13 @@ use crate::layer::NeuraShapedLayer; use super::*; -pub trait NeuraSequentialConstruct { - type Constructed; - type Err; - - fn construct(self, input_shape: NeuraShape) -> Result; -} - #[derive(Debug, Clone)] pub enum NeuraSequentialConstructErr { Current(Err), Child(ChildErr), } -impl NeuraSequentialConstruct for NeuraSequential { +impl NeuraPartialLayer for NeuraSequential { type Constructed = NeuraSequential; type Err = Layer::Err; @@ -27,7 +20,7 @@ impl NeuraSequentialConstruct for NeuraSequential NeuraSequentialConstruct +impl NeuraPartialLayer for NeuraSequential { type Constructed = NeuraSequential; diff --git a/src/network/sequential/mod.rs b/src/network/sequential/mod.rs index deedf8c..d6481a9 100644 --- a/src/network/sequential/mod.rs +++ b/src/network/sequential/mod.rs @@ -190,10 +190,9 @@ mod test { derivable::{activation::Relu, regularize::NeuraL0}, layer::{dense::NeuraDenseLayer, NeuraLayer, NeuraShape}, neura_layer, + prelude::NeuraPartialLayer, }; - use super::NeuraSequentialConstruct; - #[test] fn test_neura_network_macro() { let mut rng = rand::thread_rng(); diff --git a/src/utils.rs b/src/utils.rs index 1b378fd..9b2191d 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -149,3 +149,8 @@ pub fn plot_losses(losses: Vec<(f64, f64)>, width: u32, height: u32) { .linecolorplot(&Shape::Lines(&val_losses), (255, 0, 255).into()) .nice(); } + +pub(crate) fn unwrap_or_clone(value: std::rc::Rc) -> T { + // TODO: replace with Rc::unwrap_or_clone once https://github.com/rust-lang/rust/issues/93610 is closed + std::rc::Rc::try_unwrap(value).unwrap_or_else(|value| (*value).clone()) +}