✨ Lock layers

2 years ago · fa0bc0be9f
parent 2ea5502575
commit fa0bc0be9f
10 changed files with 344 additions and 7 deletions
--- a/examples/forward-progressive.rs
+++ b/examples/forward-progressive.rs
@ -0,0 +1,144 @@
 use nalgebra::{dvector, DVector};
 use neuramethyst::derivable::activation::{LeakyRelu, Logistic, Swish, Tanh};
 use neuramethyst::derivable::regularize::*;
 use neuramethyst::gradient_solver::NeuraForwardForward;
 use neuramethyst::prelude::*;
 use rand::Rng;
 const EPOCHS: usize = 10;
 const REG_FACTOR: f32 = 0.003;
 macro_rules! iteration {
    ( $network:ident, $width:expr, $trainer:expr, $gradient_solver:expr, $test_inputs:expr ) => {
        let mut $network = neura_sequential![
            ..($network.lock()),
            neura_layer!("normalize")
                .construct(NeuraShape::Vector($width))
                .unwrap(),
            neura_layer!("dense", $width)
                .activation(Swish(Logistic))
                .regularization(NeuraL2(REG_FACTOR))
                .construct(NeuraShape::Vector($width))
                .unwrap()
        ];
        for _epoch in 0..EPOCHS {
            $trainer.train(&$gradient_solver, &mut $network, generator(), &$test_inputs);
            draw_network(&$network);
        }
    };
 }
 pub fn main() {
    let width: usize = 60;
    let mut network = neura_sequential![neura_layer!("dense", width).activation(LeakyRelu(0.1)),]
        .construct(NeuraShape::Vector(2))
        .unwrap();
    let test_inputs = generator().filter(|x| x.1).take(50).collect::<Vec<_>>();
    let gradient_solver = NeuraForwardForward::new(Tanh, 0.5);
    let mut trainer = NeuraBatchedTrainer::new(0.01, 200);
    trainer.batch_size = 256;
    for _epoch in 0..EPOCHS {
        trainer.train(&gradient_solver, &mut network, generator(), &test_inputs);
        draw_network(&network);
    }
    iteration!(network, width, trainer, gradient_solver, test_inputs);
    iteration!(network, width, trainer, gradient_solver, test_inputs);
    iteration!(network, width, trainer, gradient_solver, test_inputs);
    iteration!(network, width, trainer, gradient_solver, test_inputs);
    // iteration!(network, width, trainer, gradient_solver, test_inputs);
    // iteration!(network, width, trainer, gradient_solver, test_inputs);
 }
 fn generator() -> impl Iterator<Item = (DVector<f32>, bool)> {
    let mut rng = rand::thread_rng();
    std::iter::repeat_with(move || {
        let good = rng.gen_bool(0.5);
        // Clifford attractor
        let (a, b, c, d) = (1.5, -1.8, 1.6, 0.9);
        let noise = 0.0005;
        let mut x: f32 = rng.gen_range(-noise..noise);
        let mut y: f32 = rng.gen_range(-noise..noise);
        for _ in 0..rng.gen_range(150..200) {
            let nx = (a * y).sin() + c * (a * x).cos();
            let ny = (b * x).sin() + d * (b * y).cos();
            x = nx;
            y = ny;
        }
        // Bad samples are shifted by a random amount
        if !good {
            let radius = rng.gen_range(0.4..0.5);
            let angle = rng.gen_range(0.0..std::f32::consts::TAU);
            x += angle.cos() * radius;
            y += angle.sin() * radius;
        }
        (dvector![x, y], good)
    })
 }
 // TODO: move this to the library?
 fn draw_neuron_activation<F: Fn([f64; 2]) -> Vec<f64>>(callback: F, scale: f64) {
    use viuer::Config;
    const WIDTH: u32 = 64;
    const HEIGHT: u32 = 64;
    let mut image = image::RgbImage::new(WIDTH, HEIGHT);
    fn sigmoid(x: f64) -> f64 {
        0.1 + 0.9 * x.abs().powf(0.8)
    }
    for y in 0..HEIGHT {
        let y2 = 2.0 * y as f64 / HEIGHT as f64 - 1.0;
        for x in 0..WIDTH {
            let x2 = 2.0 * x as f64 / WIDTH as f64 - 1.0;
            let activation = callback([x2 * scale, y2 * scale]);
            let r = (sigmoid(activation.get(0).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
            let g = (sigmoid(activation.get(1).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
            let b = (sigmoid(activation.get(2).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
            *image.get_pixel_mut(x, y) = image::Rgb([r, g, b]);
        }
    }
    let config = Config {
        use_kitty: false,
        truecolor: true,
        // absolute_offset: false,
        ..Default::default()
    };
    viuer::print(&image::DynamicImage::ImageRgb8(image), &config).unwrap();
 }
 fn draw_network<Network: NeuraLayer<DVector<f32>, Output = DVector<f32>>>(network: &Network) {
    draw_neuron_activation(
        |input| {
            let result = network.eval(&dvector![input[0] as f32, input[1] as f32]);
            let result_good = result.map(|x| x * x).sum();
            let result_norm = result / result_good.sqrt();
            let mut result_rgb = DVector::from_element(3, 0.0);
            for i in 0..result_norm.len() {
                result_rgb[i % 3] += result_norm[i].abs();
            }
            (result_rgb * result_good.tanh() * 12.0 / result_norm.len() as f32)
                .into_iter()
                .map(|x| *x as f64)
                .collect()
        },
        2.0,
    );
 }
--- a/src/derivable/activation.rs
+++ b/src/derivable/activation.rs
@ -106,3 +106,45 @@ impl_derivable!(Tanh, x, x.tanh(), {
 pub struct Linear;
 impl_derivable!(Linear, x, x, 1.0);
 #[derive(Clone, Copy, Debug, PartialEq)]
 pub struct Logistic;
 impl_derivable!(Logistic, x, {
    if x < 0.0 {
        let x2 = x.exp();
        x2 / (1.0 + x2)
    } else {
        1.0 / (1.0 + (-x).exp())
    }
 }, {
    if x.abs() > 50.0 {
        0.0
    } else {
        let y = Logistic.eval(x);
        y * (1.0 - y)
    }
 }; 3.2, 0.0); // 3.2 ~= pi^2 / 3
 #[derive(Clone, Copy, Debug, PartialEq)]
 pub struct Swish<F>(pub F);
 impl<F: NeuraDerivable<f32>> NeuraDerivable<f32> for Swish<F> {
    fn eval(&self, input: f32) -> f32 {
        input * self.0.eval(input)
    }
    fn derivate(&self, at: f32) -> f32 {
        let result = self.0.eval(at);
        let swish_result = at * result;
        swish_result + result * (1.0 - swish_result)
    }
    fn bias_hint(&self) -> f64 {
        self.0.bias_hint()
    }
    fn variance_hint(&self) -> f64 {
        self.0.variance_hint()
    }
 }
--- a/src/gradient_solver/forward_forward.rs
+++ b/src/gradient_solver/forward_forward.rs
@ -1,7 +1,7 @@
 use nalgebra::{DVector, Scalar};
 use num::{traits::NumAssignOps, Float, ToPrimitive};
-use crate::{derivable::NeuraDerivable, prelude::NeuraTrainableLayerSelf};
+use crate::{derivable::NeuraDerivable, layer::NeuraTrainableLayerSelf};
 use super::*;
--- a/src/layer/lock.rs
+++ b/src/layer/lock.rs
@ -0,0 +1,82 @@
 use super::*;
 /// A layer wrapper that disables any kind of training for the wrappee:
 /// traits like NeuraTrainableLayerBackprop will still work as-is,
 /// but `apply_gradient` will do nothing, and weights gradient computation is skipped.
 #[derive(Clone, Debug)]
 pub struct NeuraLockLayer<Layer: ?Sized> {
    layer: Box<Layer>,
 }
 impl<Layer> NeuraLockLayer<Layer> {
    pub fn new(layer: Layer) -> Self {
        Self {
            layer: Box::new(layer),
        }
    }
    pub fn unlock_layer(self) -> Layer {
        *self.layer
    }
    pub fn get(&self) -> &Layer {
        &self.layer
    }
 }
 impl<Input, Layer: NeuraLayer<Input>> NeuraLayer<Input> for NeuraLockLayer<Layer> {
    type Output = Layer::Output;
    fn eval(&self, input: &Input) -> Self::Output {
        self.layer.eval(input)
    }
 }
 impl<Input, Layer: NeuraTrainableLayerBase<Input>> NeuraTrainableLayerBase<Input>
    for NeuraLockLayer<Layer>
 {
    type Gradient = ();
    type IntermediaryRepr = Layer::IntermediaryRepr;
    fn default_gradient(&self) -> Self::Gradient {
        ()
    }
    fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
        // Noop
    }
    fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr) {
        self.layer.eval_training(input)
    }
 }
 impl<Input, Layer: NeuraTrainableLayerBase<Input>> NeuraTrainableLayerSelf<Input>
    for NeuraLockLayer<Layer>
 {
    fn regularize_layer(&self) -> Self::Gradient {
        ()
    }
    fn get_gradient(
        &self,
        _input: &Input,
        _intermediary: &Self::IntermediaryRepr,
        _epsilon: &Self::Output,
    ) -> Self::Gradient {
        ()
    }
 }
 impl<Input, Layer: NeuraTrainableLayerBackprop<Input>> NeuraTrainableLayerBackprop<Input>
    for NeuraLockLayer<Layer>
 {
    fn backprop_layer(
        &self,
        input: &Input,
        intermediary: &Self::IntermediaryRepr,
        epsilon: &Self::Output,
    ) -> Input {
        self.layer.backprop_layer(input, intermediary, epsilon)
    }
 }
--- a/src/layer/mod.rs
+++ b/src/layer/mod.rs
@ -1,7 +1,10 @@
 use crate::algebra::NeuraVectorSpace;
 use self::lock::NeuraLockLayer;
 pub mod dense;
 pub mod dropout;
 pub mod lock;
 pub mod normalize;
 pub mod softmax;
@ -27,6 +30,13 @@ pub trait NeuraLayer<Input> {
    type Output;
    fn eval(&self, input: &Input) -> Self::Output;
    fn lock_layer(self) -> NeuraLockLayer<Self>
    where
        Self: Sized,
    {
        NeuraLockLayer::new(self)
    }
 }
 impl<Input: Clone> NeuraLayer<Input> for () {
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,5 +1,6 @@
 #![feature(generic_arg_infer)]
 // #![feature(generic_const_exprs)]
 #![feature(associated_type_defaults)]
 pub mod algebra;
 pub mod derivable;
@ -13,15 +14,21 @@ mod utils;
 // TODO: move to a different file
 pub use utils::{argmax, cycle_shuffling, one_hot, plot_losses};
 /// Common traits and structs that are useful to use this library.
 /// All of these traits are prefixed with the word "neura" in some way,
 /// so there should not be any conflicts when doing a wildcard import of `prelude`.
 pub mod prelude {
    // Macros
    pub use crate::{neura_layer, neura_sequential};
    // Structs and traits
    pub use crate::gradient_solver::NeuraBackprop;
-    pub use crate::layer::*;
+    pub use crate::layer::{
        NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayerBackprop,
        NeuraTrainableLayerBase, NeuraTrainableLayerSelf,
    };
    pub use crate::network::sequential::{
-        NeuraSequential, NeuraSequentialConstruct, NeuraSequentialTail,
+        NeuraSequential, NeuraSequentialConstruct, NeuraSequentialLock, NeuraSequentialTail,
    };
    pub use crate::train::NeuraBatchedTrainer;
 }
--- a/src/network/sequential/layer_impl.rs
+++ b/src/network/sequential/layer_impl.rs
@ -1,5 +1,5 @@
 use super::*;
-use crate::prelude::NeuraTrainableLayerBackprop;
+use crate::layer::NeuraTrainableLayerBackprop;
 impl<Input, Layer: NeuraLayer<Input>, ChildNetwork: NeuraLayer<Layer::Output>> NeuraLayer<Input>
    for NeuraSequential<Layer, ChildNetwork>
--- a/src/network/sequential/lock.rs
+++ b/src/network/sequential/lock.rs
@ -0,0 +1,35 @@
 use crate::layer::lock::NeuraLockLayer;
 use super::*;
 pub trait NeuraSequentialLock {
    type Locked;
    fn lock(self) -> Self::Locked;
 }
 impl NeuraSequentialLock for () {
    type Locked = ();
    fn lock(self) -> Self::Locked {
        ()
    }
 }
 impl<Layer, ChildNetwork: NeuraSequentialLock> NeuraSequentialLock
    for NeuraSequential<Layer, ChildNetwork>
 {
    type Locked = NeuraSequential<NeuraLockLayer<Layer>, ChildNetwork::Locked>;
    fn lock(self) -> Self::Locked {
        let Self {
            layer,
            child_network,
        } = self;
        NeuraSequential {
            layer: NeuraLockLayer::new(layer),
            child_network: Box::new(child_network.lock()),
        }
    }
 }
--- a/src/network/sequential/mod.rs
+++ b/src/network/sequential/mod.rs
@ -1,15 +1,18 @@
 use super::{NeuraTrainableNetwork, NeuraTrainableNetworkBase};
 use crate::{
    gradient_solver::{NeuraGradientSolverFinal, NeuraGradientSolverTransient},
-    layer::{NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayerBase},
+    layer::{
-    prelude::NeuraTrainableLayerSelf,
+        NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayerBase, NeuraTrainableLayerSelf,
    },
 };
 mod construct;
 mod layer_impl;
 mod lock;
 mod tail;
 pub use construct::*;
 pub use lock::*;
 pub use tail::*;
 /// Chains a layer with the rest of a neural network, in a fashion similar to a cartesian product,
@ -193,6 +196,18 @@ macro_rules! neura_sequential {
        ()
    };
    [ .. $network:expr $(,)? ] => {
        $network
    };
    [ .. $network:expr, $layer:expr $(, $($rest:expr),+ )? $(,)? ] => {
        neura_sequential![ .. (($network).push_tail($layer)), $( $( $rest ),+ )? ]
    };
    // [ $( $lhs:expr, )* $layer:expr, .. $network:expr $(, $($rhs:expr),* )?] => {
    //     neura_sequential![ $($lhs,)* .. (($network).push_front($layer)) $(, $($rhs),* )? ]
    // };
    [ $layer:expr $(,)? ] => {
        $crate::network::sequential::NeuraSequential::from($layer)
    };
@ -200,6 +215,7 @@ macro_rules! neura_sequential {
    [ $first:expr, $($rest:expr),+ $(,)? ] => {
        $crate::network::sequential::NeuraSequential::new($first, neura_sequential![$($rest),+])
    };
 }
 #[cfg(test)]
--- a/tests/xor.rs
+++ b/tests/xor.rs
@ -8,7 +8,8 @@ use neuramethyst::{
        loss::Euclidean,
        regularize::NeuraL0,
    },
-    prelude::{dense::NeuraDenseLayer, *},
+    layer::dense::NeuraDenseLayer,
    prelude::*,
 };
 fn load_test_data() -> Vec<(DMatrix<f64>, DVector<f64>, DMatrix<f64>, DVector<f64>)> {