✨ Lock layers

2 years ago · fa0bc0be9f
parent 2ea5502575
commit fa0bc0be9f
10 changed files with 344 additions and 7 deletions
--- a/examples/forward-progressive.rs
+++ b/examples/forward-progressive.rs
@ -0,0 +1,144 @@
+use nalgebra::{dvector, DVector};
+use neuramethyst::derivable::activation::{LeakyRelu, Logistic, Swish, Tanh};
+use neuramethyst::derivable::regularize::*;
+use neuramethyst::gradient_solver::NeuraForwardForward;
+use neuramethyst::prelude::*;
+use rand::Rng;
+
+const EPOCHS: usize = 10;
+const REG_FACTOR: f32 = 0.003;
+
+macro_rules! iteration {
+    ( $network:ident, $width:expr, $trainer:expr, $gradient_solver:expr, $test_inputs:expr ) => {
+        let mut $network = neura_sequential![
+            ..($network.lock()),
+            neura_layer!("normalize")
+                .construct(NeuraShape::Vector($width))
+                .unwrap(),
+            neura_layer!("dense", $width)
+                .activation(Swish(Logistic))
+                .regularization(NeuraL2(REG_FACTOR))
+                .construct(NeuraShape::Vector($width))
+                .unwrap()
+        ];
+        for _epoch in 0..EPOCHS {
+            $trainer.train(&$gradient_solver, &mut $network, generator(), &$test_inputs);
+
+            draw_network(&$network);
+        }
+    };
+}
+
+pub fn main() {
+    let width: usize = 60;
+
+    let mut network = neura_sequential![neura_layer!("dense", width).activation(LeakyRelu(0.1)),]
+        .construct(NeuraShape::Vector(2))
+        .unwrap();
+
+    let test_inputs = generator().filter(|x| x.1).take(50).collect::<Vec<_>>();
+
+    let gradient_solver = NeuraForwardForward::new(Tanh, 0.5);
+    let mut trainer = NeuraBatchedTrainer::new(0.01, 200);
+    trainer.batch_size = 256;
+
+    for _epoch in 0..EPOCHS {
+        trainer.train(&gradient_solver, &mut network, generator(), &test_inputs);
+
+        draw_network(&network);
+    }
+
+    iteration!(network, width, trainer, gradient_solver, test_inputs);
+    iteration!(network, width, trainer, gradient_solver, test_inputs);
+    iteration!(network, width, trainer, gradient_solver, test_inputs);
+    iteration!(network, width, trainer, gradient_solver, test_inputs);
+    // iteration!(network, width, trainer, gradient_solver, test_inputs);
+    // iteration!(network, width, trainer, gradient_solver, test_inputs);
+}
+
+fn generator() -> impl Iterator<Item = (DVector<f32>, bool)> {
+    let mut rng = rand::thread_rng();
+    std::iter::repeat_with(move || {
+        let good = rng.gen_bool(0.5);
+        // Clifford attractor
+        let (a, b, c, d) = (1.5, -1.8, 1.6, 0.9);
+
+        let noise = 0.0005;
+        let mut x: f32 = rng.gen_range(-noise..noise);
+        let mut y: f32 = rng.gen_range(-noise..noise);
+        for _ in 0..rng.gen_range(150..200) {
+            let nx = (a * y).sin() + c * (a * x).cos();
+            let ny = (b * x).sin() + d * (b * y).cos();
+            x = nx;
+            y = ny;
+        }
+
+        // Bad samples are shifted by a random amount
+        if !good {
+            let radius = rng.gen_range(0.4..0.5);
+            let angle = rng.gen_range(0.0..std::f32::consts::TAU);
+            x += angle.cos() * radius;
+            y += angle.sin() * radius;
+        }
+
+        (dvector![x, y], good)
+    })
+}
+
+// TODO: move this to the library?
+fn draw_neuron_activation<F: Fn([f64; 2]) -> Vec<f64>>(callback: F, scale: f64) {
+    use viuer::Config;
+
+    const WIDTH: u32 = 64;
+    const HEIGHT: u32 = 64;
+
+    let mut image = image::RgbImage::new(WIDTH, HEIGHT);
+
+    fn sigmoid(x: f64) -> f64 {
+        0.1 + 0.9 * x.abs().powf(0.8)
+    }
+
+    for y in 0..HEIGHT {
+        let y2 = 2.0 * y as f64 / HEIGHT as f64 - 1.0;
+        for x in 0..WIDTH {
+            let x2 = 2.0 * x as f64 / WIDTH as f64 - 1.0;
+            let activation = callback([x2 * scale, y2 * scale]);
+            let r = (sigmoid(activation.get(0).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
+            let g = (sigmoid(activation.get(1).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
+            let b = (sigmoid(activation.get(2).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
+
+            *image.get_pixel_mut(x, y) = image::Rgb([r, g, b]);
+        }
+    }
+
+    let config = Config {
+        use_kitty: false,
+        truecolor: true,
+        // absolute_offset: false,
+        ..Default::default()
+    };
+
+    viuer::print(&image::DynamicImage::ImageRgb8(image), &config).unwrap();
+}
+
+fn draw_network<Network: NeuraLayer<DVector<f32>, Output = DVector<f32>>>(network: &Network) {
+    draw_neuron_activation(
+        |input| {
+            let result = network.eval(&dvector![input[0] as f32, input[1] as f32]);
+            let result_good = result.map(|x| x * x).sum();
+
+            let result_norm = result / result_good.sqrt();
+            let mut result_rgb = DVector::from_element(3, 0.0);
+
+            for i in 0..result_norm.len() {
+                result_rgb[i % 3] += result_norm[i].abs();
+            }
+
+            (result_rgb * result_good.tanh() * 12.0 / result_norm.len() as f32)
+                .into_iter()
+                .map(|x| *x as f64)
+                .collect()
+        },
+        2.0,
+    );
+}
--- a/src/derivable/activation.rs
+++ b/src/derivable/activation.rs
@ -106,3 +106,45 @@ impl_derivable!(Tanh, x, x.tanh(), {
 pub struct Linear;

 impl_derivable!(Linear, x, x, 1.0);
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct Logistic;
+
+impl_derivable!(Logistic, x, {
+    if x < 0.0 {
+        let x2 = x.exp();
+        x2 / (1.0 + x2)
+    } else {
+        1.0 / (1.0 + (-x).exp())
+    }
+}, {
+    if x.abs() > 50.0 {
+        0.0
+    } else {
+        let y = Logistic.eval(x);
+        y * (1.0 - y)
+    }
+}; 3.2, 0.0); // 3.2 ~= pi^2 / 3
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct Swish<F>(pub F);
+
+impl<F: NeuraDerivable<f32>> NeuraDerivable<f32> for Swish<F> {
+    fn eval(&self, input: f32) -> f32 {
+        input * self.0.eval(input)
+    }
+
+    fn derivate(&self, at: f32) -> f32 {
+        let result = self.0.eval(at);
+        let swish_result = at * result;
+        swish_result + result * (1.0 - swish_result)
+    }
+
+    fn bias_hint(&self) -> f64 {
+        self.0.bias_hint()
+    }
+
+    fn variance_hint(&self) -> f64 {
+        self.0.variance_hint()
+    }
+}
--- a/src/gradient_solver/forward_forward.rs
+++ b/src/gradient_solver/forward_forward.rs
@ -1,7 +1,7 @@
 use nalgebra::{DVector, Scalar};
 use num::{traits::NumAssignOps, Float, ToPrimitive};

-use crate::{derivable::NeuraDerivable, prelude::NeuraTrainableLayerSelf};
+use crate::{derivable::NeuraDerivable, layer::NeuraTrainableLayerSelf};

 use super::*;

--- a/src/layer/lock.rs
+++ b/src/layer/lock.rs
@ -0,0 +1,82 @@
+use super::*;
+
+/// A layer wrapper that disables any kind of training for the wrappee:
+/// traits like NeuraTrainableLayerBackprop will still work as-is,
+/// but `apply_gradient` will do nothing, and weights gradient computation is skipped.
+#[derive(Clone, Debug)]
+pub struct NeuraLockLayer<Layer: ?Sized> {
+    layer: Box<Layer>,
+}
+
+impl<Layer> NeuraLockLayer<Layer> {
+    pub fn new(layer: Layer) -> Self {
+        Self {
+            layer: Box::new(layer),
+        }
+    }
+
+    pub fn unlock_layer(self) -> Layer {
+        *self.layer
+    }
+
+    pub fn get(&self) -> &Layer {
+        &self.layer
+    }
+}
+
+impl<Input, Layer: NeuraLayer<Input>> NeuraLayer<Input> for NeuraLockLayer<Layer> {
+    type Output = Layer::Output;
+
+    fn eval(&self, input: &Input) -> Self::Output {
+        self.layer.eval(input)
+    }
+}
+
+impl<Input, Layer: NeuraTrainableLayerBase<Input>> NeuraTrainableLayerBase<Input>
+    for NeuraLockLayer<Layer>
+{
+    type Gradient = ();
+    type IntermediaryRepr = Layer::IntermediaryRepr;
+
+    fn default_gradient(&self) -> Self::Gradient {
+        ()
+    }
+
+    fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
+        // Noop
+    }
+
+    fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr) {
+        self.layer.eval_training(input)
+    }
+}
+
+impl<Input, Layer: NeuraTrainableLayerBase<Input>> NeuraTrainableLayerSelf<Input>
+    for NeuraLockLayer<Layer>
+{
+    fn regularize_layer(&self) -> Self::Gradient {
+        ()
+    }
+
+    fn get_gradient(
+        &self,
+        _input: &Input,
+        _intermediary: &Self::IntermediaryRepr,
+        _epsilon: &Self::Output,
+    ) -> Self::Gradient {
+        ()
+    }
+}
+
+impl<Input, Layer: NeuraTrainableLayerBackprop<Input>> NeuraTrainableLayerBackprop<Input>
+    for NeuraLockLayer<Layer>
+{
+    fn backprop_layer(
+        &self,
+        input: &Input,
+        intermediary: &Self::IntermediaryRepr,
+        epsilon: &Self::Output,
+    ) -> Input {
+        self.layer.backprop_layer(input, intermediary, epsilon)
+    }
+}
--- a/src/layer/mod.rs
+++ b/src/layer/mod.rs
@ -1,7 +1,10 @@
 use crate::algebra::NeuraVectorSpace;

+use self::lock::NeuraLockLayer;
+
 pub mod dense;
 pub mod dropout;
+pub mod lock;
 pub mod normalize;
 pub mod softmax;

@ -27,6 +30,13 @@ pub trait NeuraLayer<Input> {
    type Output;

    fn eval(&self, input: &Input) -> Self::Output;
+
+    fn lock_layer(self) -> NeuraLockLayer<Self>
+    where
+        Self: Sized,
+    {
+        NeuraLockLayer::new(self)
+    }
 }

 impl<Input: Clone> NeuraLayer<Input> for () {
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,5 +1,6 @@
 #![feature(generic_arg_infer)]
 // #![feature(generic_const_exprs)]
+#![feature(associated_type_defaults)]

 pub mod algebra;
 pub mod derivable;
@ -13,15 +14,21 @@ mod utils;
 // TODO: move to a different file
 pub use utils::{argmax, cycle_shuffling, one_hot, plot_losses};

+/// Common traits and structs that are useful to use this library.
+/// All of these traits are prefixed with the word "neura" in some way,
+/// so there should not be any conflicts when doing a wildcard import of `prelude`.
 pub mod prelude {
    // Macros
    pub use crate::{neura_layer, neura_sequential};

    // Structs and traits
    pub use crate::gradient_solver::NeuraBackprop;
-    pub use crate::layer::*;
+    pub use crate::layer::{
+        NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayerBackprop,
+        NeuraTrainableLayerBase, NeuraTrainableLayerSelf,
+    };
    pub use crate::network::sequential::{
-        NeuraSequential, NeuraSequentialConstruct, NeuraSequentialTail,
+        NeuraSequential, NeuraSequentialConstruct, NeuraSequentialLock, NeuraSequentialTail,
    };
    pub use crate::train::NeuraBatchedTrainer;
 }
--- a/src/network/sequential/layer_impl.rs
+++ b/src/network/sequential/layer_impl.rs
@ -1,5 +1,5 @@
 use super::*;
-use crate::prelude::NeuraTrainableLayerBackprop;
+use crate::layer::NeuraTrainableLayerBackprop;

 impl<Input, Layer: NeuraLayer<Input>, ChildNetwork: NeuraLayer<Layer::Output>> NeuraLayer<Input>
    for NeuraSequential<Layer, ChildNetwork>
--- a/src/network/sequential/lock.rs
+++ b/src/network/sequential/lock.rs
@ -0,0 +1,35 @@
+use crate::layer::lock::NeuraLockLayer;
+
+use super::*;
+
+pub trait NeuraSequentialLock {
+    type Locked;
+
+    fn lock(self) -> Self::Locked;
+}
+
+impl NeuraSequentialLock for () {
+    type Locked = ();
+
+    fn lock(self) -> Self::Locked {
+        ()
+    }
+}
+
+impl<Layer, ChildNetwork: NeuraSequentialLock> NeuraSequentialLock
+    for NeuraSequential<Layer, ChildNetwork>
+{
+    type Locked = NeuraSequential<NeuraLockLayer<Layer>, ChildNetwork::Locked>;
+
+    fn lock(self) -> Self::Locked {
+        let Self {
+            layer,
+            child_network,
+        } = self;
+
+        NeuraSequential {
+            layer: NeuraLockLayer::new(layer),
+            child_network: Box::new(child_network.lock()),
+        }
+    }
+}
--- a/src/network/sequential/mod.rs
+++ b/src/network/sequential/mod.rs
@ -1,15 +1,18 @@
 use super::{NeuraTrainableNetwork, NeuraTrainableNetworkBase};
 use crate::{
    gradient_solver::{NeuraGradientSolverFinal, NeuraGradientSolverTransient},
-    layer::{NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayerBase},
-    prelude::NeuraTrainableLayerSelf,
+    layer::{
+        NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayerBase, NeuraTrainableLayerSelf,
+    },
 };

 mod construct;
 mod layer_impl;
+mod lock;
 mod tail;

 pub use construct::*;
+pub use lock::*;
 pub use tail::*;

 /// Chains a layer with the rest of a neural network, in a fashion similar to a cartesian product,
@ -193,6 +196,18 @@ macro_rules! neura_sequential {
        ()
    };

+    [ .. $network:expr $(,)? ] => {
+        $network
+    };
+
+    [ .. $network:expr, $layer:expr $(, $($rest:expr),+ )? $(,)? ] => {
+        neura_sequential![ .. (($network).push_tail($layer)), $( $( $rest ),+ )? ]
+    };
+
+    // [ $( $lhs:expr, )* $layer:expr, .. $network:expr $(, $($rhs:expr),* )?] => {
+    //     neura_sequential![ $($lhs,)* .. (($network).push_front($layer)) $(, $($rhs),* )? ]
+    // };
+
    [ $layer:expr $(,)? ] => {
        $crate::network::sequential::NeuraSequential::from($layer)
    };
@ -200,6 +215,7 @@ macro_rules! neura_sequential {
    [ $first:expr, $($rest:expr),+ $(,)? ] => {
        $crate::network::sequential::NeuraSequential::new($first, neura_sequential![$($rest),+])
    };
+
 }

 #[cfg(test)]
--- a/tests/xor.rs
+++ b/tests/xor.rs
@ -8,7 +8,8 @@ use neuramethyst::{
        loss::Euclidean,
        regularize::NeuraL0,
    },
-    prelude::{dense::NeuraDenseLayer, *},
+    layer::dense::NeuraDenseLayer,
+    prelude::*,
 };

 fn load_test_data() -> Vec<(DMatrix<f64>, DVector<f64>, DMatrix<f64>, DVector<f64>)> {