From fa0bc0be9f935d59aa83ab83f80fe2f1bf089e34 Mon Sep 17 00:00:00 2001 From: Adrien Burgun Date: Sat, 22 Apr 2023 23:34:16 +0200 Subject: [PATCH] :sparkles: Lock layers --- examples/forward-progressive.rs | 144 +++++++++++++++++++++++++ src/derivable/activation.rs | 42 ++++++++ src/gradient_solver/forward_forward.rs | 2 +- src/layer/lock.rs | 82 ++++++++++++++ src/layer/mod.rs | 10 ++ src/lib.rs | 11 +- src/network/sequential/layer_impl.rs | 2 +- src/network/sequential/lock.rs | 35 ++++++ src/network/sequential/mod.rs | 20 +++- tests/xor.rs | 3 +- 10 files changed, 344 insertions(+), 7 deletions(-) create mode 100644 examples/forward-progressive.rs create mode 100644 src/layer/lock.rs create mode 100644 src/network/sequential/lock.rs diff --git a/examples/forward-progressive.rs b/examples/forward-progressive.rs new file mode 100644 index 0000000..e9ce9c1 --- /dev/null +++ b/examples/forward-progressive.rs @@ -0,0 +1,144 @@ +use nalgebra::{dvector, DVector}; +use neuramethyst::derivable::activation::{LeakyRelu, Logistic, Swish, Tanh}; +use neuramethyst::derivable::regularize::*; +use neuramethyst::gradient_solver::NeuraForwardForward; +use neuramethyst::prelude::*; +use rand::Rng; + +const EPOCHS: usize = 10; +const REG_FACTOR: f32 = 0.003; + +macro_rules! iteration { + ( $network:ident, $width:expr, $trainer:expr, $gradient_solver:expr, $test_inputs:expr ) => { + let mut $network = neura_sequential![ + ..($network.lock()), + neura_layer!("normalize") + .construct(NeuraShape::Vector($width)) + .unwrap(), + neura_layer!("dense", $width) + .activation(Swish(Logistic)) + .regularization(NeuraL2(REG_FACTOR)) + .construct(NeuraShape::Vector($width)) + .unwrap() + ]; + for _epoch in 0..EPOCHS { + $trainer.train(&$gradient_solver, &mut $network, generator(), &$test_inputs); + + draw_network(&$network); + } + }; +} + +pub fn main() { + let width: usize = 60; + + let mut network = neura_sequential![neura_layer!("dense", width).activation(LeakyRelu(0.1)),] + .construct(NeuraShape::Vector(2)) + .unwrap(); + + let test_inputs = generator().filter(|x| x.1).take(50).collect::>(); + + let gradient_solver = NeuraForwardForward::new(Tanh, 0.5); + let mut trainer = NeuraBatchedTrainer::new(0.01, 200); + trainer.batch_size = 256; + + for _epoch in 0..EPOCHS { + trainer.train(&gradient_solver, &mut network, generator(), &test_inputs); + + draw_network(&network); + } + + iteration!(network, width, trainer, gradient_solver, test_inputs); + iteration!(network, width, trainer, gradient_solver, test_inputs); + iteration!(network, width, trainer, gradient_solver, test_inputs); + iteration!(network, width, trainer, gradient_solver, test_inputs); + // iteration!(network, width, trainer, gradient_solver, test_inputs); + // iteration!(network, width, trainer, gradient_solver, test_inputs); +} + +fn generator() -> impl Iterator, bool)> { + let mut rng = rand::thread_rng(); + std::iter::repeat_with(move || { + let good = rng.gen_bool(0.5); + // Clifford attractor + let (a, b, c, d) = (1.5, -1.8, 1.6, 0.9); + + let noise = 0.0005; + let mut x: f32 = rng.gen_range(-noise..noise); + let mut y: f32 = rng.gen_range(-noise..noise); + for _ in 0..rng.gen_range(150..200) { + let nx = (a * y).sin() + c * (a * x).cos(); + let ny = (b * x).sin() + d * (b * y).cos(); + x = nx; + y = ny; + } + + // Bad samples are shifted by a random amount + if !good { + let radius = rng.gen_range(0.4..0.5); + let angle = rng.gen_range(0.0..std::f32::consts::TAU); + x += angle.cos() * radius; + y += angle.sin() * radius; + } + + (dvector![x, y], good) + }) +} + +// TODO: move this to the library? +fn draw_neuron_activation Vec>(callback: F, scale: f64) { + use viuer::Config; + + const WIDTH: u32 = 64; + const HEIGHT: u32 = 64; + + let mut image = image::RgbImage::new(WIDTH, HEIGHT); + + fn sigmoid(x: f64) -> f64 { + 0.1 + 0.9 * x.abs().powf(0.8) + } + + for y in 0..HEIGHT { + let y2 = 2.0 * y as f64 / HEIGHT as f64 - 1.0; + for x in 0..WIDTH { + let x2 = 2.0 * x as f64 / WIDTH as f64 - 1.0; + let activation = callback([x2 * scale, y2 * scale]); + let r = (sigmoid(activation.get(0).copied().unwrap_or(-1.0)) * 255.0).floor() as u8; + let g = (sigmoid(activation.get(1).copied().unwrap_or(-1.0)) * 255.0).floor() as u8; + let b = (sigmoid(activation.get(2).copied().unwrap_or(-1.0)) * 255.0).floor() as u8; + + *image.get_pixel_mut(x, y) = image::Rgb([r, g, b]); + } + } + + let config = Config { + use_kitty: false, + truecolor: true, + // absolute_offset: false, + ..Default::default() + }; + + viuer::print(&image::DynamicImage::ImageRgb8(image), &config).unwrap(); +} + +fn draw_network, Output = DVector>>(network: &Network) { + draw_neuron_activation( + |input| { + let result = network.eval(&dvector![input[0] as f32, input[1] as f32]); + let result_good = result.map(|x| x * x).sum(); + + let result_norm = result / result_good.sqrt(); + let mut result_rgb = DVector::from_element(3, 0.0); + + for i in 0..result_norm.len() { + result_rgb[i % 3] += result_norm[i].abs(); + } + + (result_rgb * result_good.tanh() * 12.0 / result_norm.len() as f32) + .into_iter() + .map(|x| *x as f64) + .collect() + }, + 2.0, + ); +} diff --git a/src/derivable/activation.rs b/src/derivable/activation.rs index 8ef3a5d..36f20bf 100644 --- a/src/derivable/activation.rs +++ b/src/derivable/activation.rs @@ -106,3 +106,45 @@ impl_derivable!(Tanh, x, x.tanh(), { pub struct Linear; impl_derivable!(Linear, x, x, 1.0); + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Logistic; + +impl_derivable!(Logistic, x, { + if x < 0.0 { + let x2 = x.exp(); + x2 / (1.0 + x2) + } else { + 1.0 / (1.0 + (-x).exp()) + } +}, { + if x.abs() > 50.0 { + 0.0 + } else { + let y = Logistic.eval(x); + y * (1.0 - y) + } +}; 3.2, 0.0); // 3.2 ~= pi^2 / 3 + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Swish(pub F); + +impl> NeuraDerivable for Swish { + fn eval(&self, input: f32) -> f32 { + input * self.0.eval(input) + } + + fn derivate(&self, at: f32) -> f32 { + let result = self.0.eval(at); + let swish_result = at * result; + swish_result + result * (1.0 - swish_result) + } + + fn bias_hint(&self) -> f64 { + self.0.bias_hint() + } + + fn variance_hint(&self) -> f64 { + self.0.variance_hint() + } +} diff --git a/src/gradient_solver/forward_forward.rs b/src/gradient_solver/forward_forward.rs index 2b88a00..b07bf41 100644 --- a/src/gradient_solver/forward_forward.rs +++ b/src/gradient_solver/forward_forward.rs @@ -1,7 +1,7 @@ use nalgebra::{DVector, Scalar}; use num::{traits::NumAssignOps, Float, ToPrimitive}; -use crate::{derivable::NeuraDerivable, prelude::NeuraTrainableLayerSelf}; +use crate::{derivable::NeuraDerivable, layer::NeuraTrainableLayerSelf}; use super::*; diff --git a/src/layer/lock.rs b/src/layer/lock.rs new file mode 100644 index 0000000..5d5026a --- /dev/null +++ b/src/layer/lock.rs @@ -0,0 +1,82 @@ +use super::*; + +/// A layer wrapper that disables any kind of training for the wrappee: +/// traits like NeuraTrainableLayerBackprop will still work as-is, +/// but `apply_gradient` will do nothing, and weights gradient computation is skipped. +#[derive(Clone, Debug)] +pub struct NeuraLockLayer { + layer: Box, +} + +impl NeuraLockLayer { + pub fn new(layer: Layer) -> Self { + Self { + layer: Box::new(layer), + } + } + + pub fn unlock_layer(self) -> Layer { + *self.layer + } + + pub fn get(&self) -> &Layer { + &self.layer + } +} + +impl> NeuraLayer for NeuraLockLayer { + type Output = Layer::Output; + + fn eval(&self, input: &Input) -> Self::Output { + self.layer.eval(input) + } +} + +impl> NeuraTrainableLayerBase + for NeuraLockLayer +{ + type Gradient = (); + type IntermediaryRepr = Layer::IntermediaryRepr; + + fn default_gradient(&self) -> Self::Gradient { + () + } + + fn apply_gradient(&mut self, _gradient: &Self::Gradient) { + // Noop + } + + fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr) { + self.layer.eval_training(input) + } +} + +impl> NeuraTrainableLayerSelf + for NeuraLockLayer +{ + fn regularize_layer(&self) -> Self::Gradient { + () + } + + fn get_gradient( + &self, + _input: &Input, + _intermediary: &Self::IntermediaryRepr, + _epsilon: &Self::Output, + ) -> Self::Gradient { + () + } +} + +impl> NeuraTrainableLayerBackprop + for NeuraLockLayer +{ + fn backprop_layer( + &self, + input: &Input, + intermediary: &Self::IntermediaryRepr, + epsilon: &Self::Output, + ) -> Input { + self.layer.backprop_layer(input, intermediary, epsilon) + } +} diff --git a/src/layer/mod.rs b/src/layer/mod.rs index 6a80e40..1c30e62 100644 --- a/src/layer/mod.rs +++ b/src/layer/mod.rs @@ -1,7 +1,10 @@ use crate::algebra::NeuraVectorSpace; +use self::lock::NeuraLockLayer; + pub mod dense; pub mod dropout; +pub mod lock; pub mod normalize; pub mod softmax; @@ -27,6 +30,13 @@ pub trait NeuraLayer { type Output; fn eval(&self, input: &Input) -> Self::Output; + + fn lock_layer(self) -> NeuraLockLayer + where + Self: Sized, + { + NeuraLockLayer::new(self) + } } impl NeuraLayer for () { diff --git a/src/lib.rs b/src/lib.rs index 0bdf1cc..f3ee7bb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ #![feature(generic_arg_infer)] // #![feature(generic_const_exprs)] +#![feature(associated_type_defaults)] pub mod algebra; pub mod derivable; @@ -13,15 +14,21 @@ mod utils; // TODO: move to a different file pub use utils::{argmax, cycle_shuffling, one_hot, plot_losses}; +/// Common traits and structs that are useful to use this library. +/// All of these traits are prefixed with the word "neura" in some way, +/// so there should not be any conflicts when doing a wildcard import of `prelude`. pub mod prelude { // Macros pub use crate::{neura_layer, neura_sequential}; // Structs and traits pub use crate::gradient_solver::NeuraBackprop; - pub use crate::layer::*; + pub use crate::layer::{ + NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayerBackprop, + NeuraTrainableLayerBase, NeuraTrainableLayerSelf, + }; pub use crate::network::sequential::{ - NeuraSequential, NeuraSequentialConstruct, NeuraSequentialTail, + NeuraSequential, NeuraSequentialConstruct, NeuraSequentialLock, NeuraSequentialTail, }; pub use crate::train::NeuraBatchedTrainer; } diff --git a/src/network/sequential/layer_impl.rs b/src/network/sequential/layer_impl.rs index 088b725..3412bef 100644 --- a/src/network/sequential/layer_impl.rs +++ b/src/network/sequential/layer_impl.rs @@ -1,5 +1,5 @@ use super::*; -use crate::prelude::NeuraTrainableLayerBackprop; +use crate::layer::NeuraTrainableLayerBackprop; impl, ChildNetwork: NeuraLayer> NeuraLayer for NeuraSequential diff --git a/src/network/sequential/lock.rs b/src/network/sequential/lock.rs new file mode 100644 index 0000000..45e2591 --- /dev/null +++ b/src/network/sequential/lock.rs @@ -0,0 +1,35 @@ +use crate::layer::lock::NeuraLockLayer; + +use super::*; + +pub trait NeuraSequentialLock { + type Locked; + + fn lock(self) -> Self::Locked; +} + +impl NeuraSequentialLock for () { + type Locked = (); + + fn lock(self) -> Self::Locked { + () + } +} + +impl NeuraSequentialLock + for NeuraSequential +{ + type Locked = NeuraSequential, ChildNetwork::Locked>; + + fn lock(self) -> Self::Locked { + let Self { + layer, + child_network, + } = self; + + NeuraSequential { + layer: NeuraLockLayer::new(layer), + child_network: Box::new(child_network.lock()), + } + } +} diff --git a/src/network/sequential/mod.rs b/src/network/sequential/mod.rs index dfeb577..f2da4ca 100644 --- a/src/network/sequential/mod.rs +++ b/src/network/sequential/mod.rs @@ -1,15 +1,18 @@ use super::{NeuraTrainableNetwork, NeuraTrainableNetworkBase}; use crate::{ gradient_solver::{NeuraGradientSolverFinal, NeuraGradientSolverTransient}, - layer::{NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayerBase}, - prelude::NeuraTrainableLayerSelf, + layer::{ + NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayerBase, NeuraTrainableLayerSelf, + }, }; mod construct; mod layer_impl; +mod lock; mod tail; pub use construct::*; +pub use lock::*; pub use tail::*; /// Chains a layer with the rest of a neural network, in a fashion similar to a cartesian product, @@ -193,6 +196,18 @@ macro_rules! neura_sequential { () }; + [ .. $network:expr $(,)? ] => { + $network + }; + + [ .. $network:expr, $layer:expr $(, $($rest:expr),+ )? $(,)? ] => { + neura_sequential![ .. (($network).push_tail($layer)), $( $( $rest ),+ )? ] + }; + + // [ $( $lhs:expr, )* $layer:expr, .. $network:expr $(, $($rhs:expr),* )?] => { + // neura_sequential![ $($lhs,)* .. (($network).push_front($layer)) $(, $($rhs),* )? ] + // }; + [ $layer:expr $(,)? ] => { $crate::network::sequential::NeuraSequential::from($layer) }; @@ -200,6 +215,7 @@ macro_rules! neura_sequential { [ $first:expr, $($rest:expr),+ $(,)? ] => { $crate::network::sequential::NeuraSequential::new($first, neura_sequential![$($rest),+]) }; + } #[cfg(test)] diff --git a/tests/xor.rs b/tests/xor.rs index 1b9edb0..a19f4e1 100644 --- a/tests/xor.rs +++ b/tests/xor.rs @@ -8,7 +8,8 @@ use neuramethyst::{ loss::Euclidean, regularize::NeuraL0, }, - prelude::{dense::NeuraDenseLayer, *}, + layer::dense::NeuraDenseLayer, + prelude::*, }; fn load_test_data() -> Vec<(DMatrix, DVector, DMatrix, DVector)> {