🎨 Clean up NeuraResidual a bit

2 years ago · 520fbcf317
parent b34b1e630b
commit 520fbcf317
10 changed files with 433 additions and 130 deletions
--- a/examples/bivariate.rs
+++ b/examples/bivariate.rs
@ -1,5 +1,3 @@
 #![feature(generic_arg_infer)]
 use std::io::Write;
 use nalgebra::{dvector, DVector};
--- a/examples/densenet.rs
+++ b/examples/densenet.rs
@ -0,0 +1,141 @@
 use std::io::Write;
 use nalgebra::{dvector, DVector};
 use neuramethyst::derivable::activation::Linear;
 use neuramethyst::derivable::loss::CrossEntropy;
 use neuramethyst::derivable::regularize::NeuraL1;
 use neuramethyst::{plot_losses, prelude::*};
 use rand::Rng;
 fn main() {
    let mut network = neura_residual![
        <= 0, 2;
        neura_layer!("dense", 4).regularization(NeuraL1(0.001));
        neura_layer!("dropout", 0.25);
        neura_layer!("dense", 2)
            .activation(Linear)
            .regularization(NeuraL1(0.001));
        neura_layer!("softmax");
    ]
    .construct(NeuraShape::Vector(2))
    .unwrap();
    let inputs = (0..1).cycle().map(move |_| {
        let mut rng = rand::thread_rng();
        let category = rng.gen_bool(0.5) as usize;
        let (x, y) = if category == 0 {
            let radius: f32 = rng.gen_range(0.0..2.0);
            let angle = rng.gen_range(0.0..std::f32::consts::TAU);
            (angle.cos() * radius, angle.sin() * radius)
        } else {
            let radius: f32 = rng.gen_range(3.0..5.0);
            let angle = rng.gen_range(0.0..std::f32::consts::TAU);
            (angle.cos() * radius, angle.sin() * radius)
        };
        (dvector![x, y], one_hot(category, 2))
    });
    let test_inputs: Vec<_> = inputs.clone().take(10).collect();
    if std::env::args().any(|arg| arg == "draw") {
        for epoch in 0..200 {
            let mut trainer = NeuraBatchedTrainer::new(0.03, 10);
            trainer.batch_size = 10;
            trainer.train(
                &NeuraBackprop::new(CrossEntropy),
                &mut network,
                inputs.clone(),
                &test_inputs,
            );
            draw_neuron_activation(
                |input| {
                    let output = network.eval(&dvector![input[0] as f32, input[1] as f32]);
                    let estimation = output[0] / (output[0] + output[1]);
                    let color = network.eval(&dvector![input[0] as f32, input[1] as f32]);
                    (&color / color.map(|x| x * x).sum() * estimation)
                        .into_iter()
                        .map(|x| x.abs() as f64)
                        .collect::<Vec<_>>()
                },
                6.0,
            );
            println!("{}", epoch);
            std::thread::sleep(std::time::Duration::new(0, 50_000_000));
        }
    } else {
        let mut trainer = NeuraBatchedTrainer::new(0.03, 20 * 50);
        trainer.batch_size = 10;
        trainer.log_iterations = 20;
        plot_losses(
            trainer.train(
                &NeuraBackprop::new(CrossEntropy),
                &mut network,
                inputs.clone(),
                &test_inputs,
            ),
            128,
            48,
        );
        // println!("{}", String::from("\n").repeat(64));
        // draw_neuron_activation(|input| network.eval(&input).into_iter().collect(), 6.0);
    }
    let mut file = std::fs::File::create("target/bivariate.csv").unwrap();
    for (input, _target) in test_inputs {
        let guess = neuramethyst::argmax(network.eval(&input).as_slice());
        writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap();
    }
 }
 // TODO: move this to the library?
 fn draw_neuron_activation<F: Fn([f64; 2]) -> Vec<f64>>(callback: F, scale: f64) {
    use viuer::Config;
    const WIDTH: u32 = 64;
    const HEIGHT: u32 = 64;
    let mut image = image::RgbImage::new(WIDTH, HEIGHT);
    fn sigmoid(x: f64) -> f64 {
        1.9 / (1.0 + (-x * 3.0).exp()) - 0.9
    }
    for y in 0..HEIGHT {
        let y2 = 2.0 * y as f64 / HEIGHT as f64 - 1.0;
        for x in 0..WIDTH {
            let x2 = 2.0 * x as f64 / WIDTH as f64 - 1.0;
            let activation = callback([x2 * scale, y2 * scale]);
            let r = (sigmoid(activation.get(0).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
            let g = (sigmoid(activation.get(1).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
            let b = (sigmoid(activation.get(2).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
            *image.get_pixel_mut(x, y) = image::Rgb([r, g, b]);
        }
    }
    let config = Config {
        use_kitty: false,
        truecolor: true,
        // absolute_offset: false,
        ..Default::default()
    };
    viuer::print(&image::DynamicImage::ImageRgb8(image), &config).unwrap();
 }
 fn one_hot(value: usize, categories: usize) -> DVector<f32> {
    let mut res = DVector::from_element(categories, 0.0);
    if value < categories {
        res[value] = 1.0;
    }
    res
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -24,7 +24,7 @@ pub mod prelude {
        NeuraTrainableLayerBase, NeuraTrainableLayerSelf,
    };
    pub use crate::network::sequential::{
-        NeuraSequential, NeuraSequentialConstruct, NeuraSequentialLock, NeuraSequentialTail,
+        NeuraSequential, NeuraSequentialLock, NeuraSequentialTail,
    };
    pub use crate::train::NeuraBatchedTrainer;
 }
--- a/src/network/residual/last.rs
+++ b/src/network/residual/last.rs
@ -0,0 +1,176 @@
 use crate::layer::*;
 use crate::network::*;
 use crate::utils::unwrap_or_clone;
 use std::borrow::Cow;
 use super::construct::*;
 use super::*;
 #[derive(Clone, Debug, PartialEq)]
 pub struct NeuraResidualLast {
    output_shape: Option<NeuraShape>,
 }
 impl NeuraResidualLast {
    #[inline(always)]
    pub fn new() -> Self {
        Self { output_shape: None }
    }
 }
 impl Default for NeuraResidualLast {
    #[inline(always)]
    fn default() -> Self {
        Self::new()
    }
 }
 impl NeuraResidualConstruct for NeuraResidualLast {
    type Constructed = NeuraResidualLast;
    type Err = NeuraResidualConstructErr<(), ()>;
    fn construct_residual(
        self,
        input: NeuraResidualInput<NeuraShape>,
    ) -> Result<Self::Constructed, Self::Err> {
        let input = *input
            .get_first()
            .ok_or(Self::Err::AxisErr(NeuraAxisErr::NoInput))?;
        Ok(Self {
            output_shape: Some(input),
        })
    }
 }
 impl NeuraShapedLayer for NeuraResidualLast {
    fn output_shape(&self) -> NeuraShape {
        self.output_shape
            .expect("Called NeuraResidualLast::output_shape before constructing it")
    }
 }
 impl NeuraNetworkBase for NeuraResidualLast {
    type Layer = ();
    #[inline(always)]
    fn get_layer(&self) -> &Self::Layer {
        &()
    }
 }
 impl NeuraNetworkRec for NeuraResidualLast {
    type NextNode = ();
    #[inline(always)]
    fn get_next(&self) -> &Self::NextNode {
        &()
    }
    fn merge_gradient(
        &self,
        rec_gradient: <Self::NextNode as NeuraTrainableLayerBase>::Gradient,
        layer_gradient: <Self::Layer as NeuraTrainableLayerBase>::Gradient,
    ) -> Self::Gradient
    where
        Self::Layer: NeuraTrainableLayerBase,
    {
        todo!()
    }
 }
 impl<Data: Clone> NeuraNetwork<NeuraResidualInput<Data>> for NeuraResidualLast {
    type LayerInput = Data;
    type NodeOutput = Data;
    fn map_input<'a>(&'_ self, input: &'a NeuraResidualInput<Data>) -> Cow<'a, Self::LayerInput> {
        Cow::Owned(unwrap_or_clone(input.clone().get_first().unwrap()))
    }
    fn map_output<'a>(
        &'_ self,
        _input: &'_ NeuraResidualInput<Data>,
        layer_output: &'a Data,
    ) -> Cow<'a, Self::NodeOutput> {
        Cow::Borrowed(layer_output)
    }
    fn map_gradient_in<'a>(
        &'_ self,
        _input: &'_ NeuraResidualInput<Data>,
        gradient_in: &'a Self::NodeOutput,
    ) -> Cow<'a, <Self::Layer as NeuraLayer<Self::LayerInput>>::Output> {
        Cow::Borrowed(gradient_in)
    }
    fn map_gradient_out<'a>(
        &'_ self,
        input: &'_ NeuraResidualInput<Data>,
        gradient_in: &'_ Self::NodeOutput,
        gradient_out: &'a Self::LayerInput,
    ) -> Cow<'a, NeuraResidualInput<Data>> {
        unimplemented!()
    }
 }
 impl NeuraTrainableLayerBase for NeuraResidualLast {
    type Gradient = ();
    #[inline(always)]
    fn default_gradient(&self) -> Self::Gradient {
        ()
    }
    #[inline(always)]
    fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
        // Noop
    }
 }
 impl<Data: Clone> NeuraLayer<NeuraResidualInput<Data>> for NeuraResidualLast {
    type Output = Data;
    fn eval(&self, input: &NeuraResidualInput<Data>) -> Self::Output {
        let result: Rc<Self::Output> = input.clone().get_first()
            .expect("Invalid NeuraResidual state: network returned no data, did you forget to link the last layer?")
            .into();
        unwrap_or_clone(result)
    }
 }
 impl<Data: Clone> NeuraTrainableLayerEval<NeuraResidualInput<Data>> for NeuraResidualLast {
    type IntermediaryRepr = ();
    #[inline(always)]
    fn eval_training(
        &self,
        input: &NeuraResidualInput<Data>,
    ) -> (Self::Output, Self::IntermediaryRepr) {
        (self.eval(input), ())
    }
 }
 impl<Data: Clone> NeuraTrainableLayerSelf<NeuraResidualInput<Data>> for NeuraResidualLast {
    #[inline(always)]
    fn regularize_layer(&self) -> Self::Gradient {
        ()
    }
    #[inline(always)]
    fn get_gradient(
        &self,
        _input: &NeuraResidualInput<Data>,
        _intermediary: &Self::IntermediaryRepr,
        _epsilon: &Self::Output,
    ) -> Self::Gradient {
        ()
    }
 }
 // let epsilon = Rc::new(epsilon.clone());
 // let mut epsilon_residual = NeuraResidualInput::new();
 // epsilon_residual.push(0, epsilon);
--- a/src/network/residual/mod.rs
+++ b/src/network/residual/mod.rs
@ -1,12 +1,7 @@
 use std::rc::Rc;
 use nalgebra::{DVector, Scalar};
 use num::Float;
 use crate::layer::*;
 mod layer_impl;
 mod wrapper;
 pub use wrapper::*;
@ -19,78 +14,16 @@ pub use axis::*;
 mod construct;
 pub use construct::NeuraResidualConstructErr;
-impl<Layers> NeuraResidual<Layers> {
+mod node;
-    pub fn new(layers: Layers) -> Self {
+pub use node::*;
        Self {
            layers,
            initial_offsets: vec![0],
        }
    }
    pub fn offset(mut self, offset: usize) -> Self {
        self.initial_offsets.push(offset);
        self
    }
    pub fn offsets(mut self, offsets: Vec<usize>) -> Self {
        self.initial_offsets = offsets;
        self
    }
 }
 #[derive(Clone, Debug, PartialEq)]
 pub struct NeuraResidualNode<Layer, ChildNetwork, Axis> {
    pub layer: Layer,
    pub child_network: ChildNetwork,
-    /// Array of relative layers indices to send the offset of this layer to,
+mod last;
-    /// defaults to `vec![0]`.
+pub use last::*;
    offsets: Vec<usize>,
    pub axis: Axis,
    output_shape: Option<NeuraShape>,
 }
 impl<Layer, ChildNetwork> NeuraResidualNode<Layer, ChildNetwork, NeuraAxisAppend> {
    pub fn new(layer: Layer, child_network: ChildNetwork) -> Self {
        Self {
            layer,
            child_network,
            offsets: vec![0],
            axis: NeuraAxisAppend,
            output_shape: None,
        }
    }
 }
 impl<Layer, ChildNetwork, Axis> NeuraResidualNode<Layer, ChildNetwork, Axis> {
    pub fn offsets(mut self, offsets: Vec<usize>) -> Self {
        self.offsets = offsets;
        self
    }
    pub fn offset(mut self, offset: usize) -> Self {
        self.offsets.push(offset);
        self
    }
    pub fn axis<Axis2>(self, axis: Axis2) -> NeuraResidualNode<Layer, ChildNetwork, Axis2> {
        NeuraResidualNode {
            layer: self.layer,
            child_network: self.child_network,
            offsets: self.offsets,
            axis,
            // Drop the knowledge of output_shape
            output_shape: None,
        }
    }
 }
 #[macro_export]
 macro_rules! neura_residual {
    [ "__combine_layers", ] => {
-        ()
+        $crate::network::residual::NeuraResidualLast::new()
    };
    [ "__combine_layers",
@ -136,7 +69,8 @@ macro_rules! neura_residual {
 mod test {
    use nalgebra::dvector;
-    use crate::neura_layer;
+    use crate::gradient_solver::NeuraGradientSolver;
    use crate::{derivable::loss::Euclidean, neura_layer, prelude::NeuraBackprop};
    use super::*;
@ -179,7 +113,10 @@ mod test {
        assert_eq!(network.initial_offsets, vec![0, 2]);
        assert_eq!(network.layers.offsets, vec![0, 1]);
        assert_eq!(network.layers.child_network.offsets, vec![0]);
-        assert_eq!(network.layers.child_network.child_network.child_network, ());
+        assert_eq!(
            network.layers.child_network.child_network.child_network,
            NeuraResidualLast::new()
        );
        let network = neura_residual![
            neura_layer!("dense", 4) => 0;
@ -203,4 +140,20 @@ mod test {
        network.eval(&dvector![0.0]);
    }
    #[test]
    fn test_resnet_backprop() {
        let network = neura_residual![
            <= 0, 1;
            neura_layer!("dense", 2) => 0, 1;
            neura_layer!("dense", 4);
            neura_layer!("dense", 8)
        ]
        .construct(NeuraShape::Vector(1))
        .unwrap();
        let backprop = NeuraBackprop::new(Euclidean);
        backprop.get_gradient(&network, &dvector![0.0], &dvector![0.0]);
    }
 }
--- a/src/network/residual/layer_impl.rs
+++ b/src/network/residual/layer_impl.rs
@ -1,11 +1,59 @@
-//! Implementations for NeuraLayer*
+use nalgebra::{DVector, Scalar};
 use num::Float;
 use std::borrow::Cow;
 use crate::network::*;
 use super::*;
-impl<Axis, Layer, ChildNetwork> NeuraResidualNode<Layer, ChildNetwork, Axis> {
+#[derive(Clone, Debug, PartialEq)]
 pub struct NeuraResidualNode<Layer, ChildNetwork, Axis> {
    pub layer: Layer,
    pub child_network: ChildNetwork,
    /// Array of relative layers indices to send the offset of this layer to,
    /// defaults to `vec![0]`.
    pub(crate) offsets: Vec<usize>,
    pub axis: Axis,
    pub(crate) output_shape: Option<NeuraShape>,
 }
 impl<Layer, ChildNetwork> NeuraResidualNode<Layer, ChildNetwork, NeuraAxisAppend> {
    pub fn new(layer: Layer, child_network: ChildNetwork) -> Self {
        Self {
            layer,
            child_network,
            offsets: vec![0],
            axis: NeuraAxisAppend,
            output_shape: None,
        }
    }
 }
 impl<Layer, ChildNetwork, Axis> NeuraResidualNode<Layer, ChildNetwork, Axis> {
    pub fn offsets(mut self, offsets: Vec<usize>) -> Self {
        self.offsets = offsets;
        self
    }
    pub fn offset(mut self, offset: usize) -> Self {
        self.offsets.push(offset);
        self
    }
    pub fn axis<Axis2>(self, axis: Axis2) -> NeuraResidualNode<Layer, ChildNetwork, Axis2> {
        NeuraResidualNode {
            layer: self.layer,
            child_network: self.child_network,
            offsets: self.offsets,
            axis,
            // Drop the knowledge of output_shape
            output_shape: None,
        }
    }
    fn process_input<Data>(
        &self,
        input: &NeuraResidualInput<Data>,
--- a/src/network/residual/wrapper.rs
+++ b/src/network/residual/wrapper.rs
@ -1,6 +1,6 @@
 use std::borrow::Cow;
-use crate::network::*;
+use crate::{network::*, utils::unwrap_or_clone};
 use super::*;
@ -14,6 +14,23 @@ pub struct NeuraResidual<Layers> {
 }
 impl<Layers> NeuraResidual<Layers> {
    pub fn new(layers: Layers) -> Self {
        Self {
            layers,
            initial_offsets: vec![0],
        }
    }
    pub fn offset(mut self, offset: usize) -> Self {
        self.initial_offsets.push(offset);
        self
    }
    pub fn offsets(mut self, offsets: Vec<usize>) -> Self {
        self.initial_offsets = offsets;
        self
    }
    fn input_to_residual_input<Input: Clone>(&self, input: &Input) -> NeuraResidualInput<Input> {
        let input: Rc<Input> = Rc::new((*input).clone());
        let mut inputs = NeuraResidualInput::new();
@ -28,21 +45,14 @@ impl<Layers> NeuraResidual<Layers> {
    }
 }
-impl<Input: Clone, Output: Clone, Layers> NeuraLayer<Input> for NeuraResidual<Layers>
+impl<Input: Clone, Layers> NeuraLayer<Input> for NeuraResidual<Layers>
 where
-    Layers: NeuraLayer<NeuraResidualInput<Input>, Output = NeuraResidualInput<Output>>,
+    Layers: NeuraLayer<NeuraResidualInput<Input>>,
 {
-    type Output = Output;
+    type Output = Layers::Output;
    fn eval(&self, input: &Input) -> Self::Output {
-        let output = self.layers.eval(&self.input_to_residual_input(input));
+        self.layers.eval(&self.input_to_residual_input(input))
        let result: Rc<Self::Output> = output.get_first()
            .expect("Invalid NeuraResidual state: network returned no data, did you forget to link the last layer?")
            .into();
        // TODO: replace with Rc::unwrap_or_clone once https://github.com/rust-lang/rust/issues/93610 is closed
        Rc::try_unwrap(result).unwrap_or_else(|result| (*result).clone())
    }
 }
@ -60,31 +70,19 @@ impl<Layers: NeuraTrainableLayerBase> NeuraTrainableLayerBase for NeuraResidual<
    }
 }
-impl<
+impl<Data: Clone, Layers: NeuraTrainableLayerEval<NeuraResidualInput<Data>>>
-        Data: Clone,
+    NeuraTrainableLayerEval<Data> for NeuraResidual<Layers>
        Layers: NeuraTrainableLayerEval<NeuraResidualInput<Data>, Output = NeuraResidualInput<Data>>,
    > NeuraTrainableLayerEval<Data> for NeuraResidual<Layers>
 {
    type IntermediaryRepr = Layers::IntermediaryRepr;
    fn eval_training(&self, input: &Data) -> (Self::Output, Self::IntermediaryRepr) {
-        let (output, intermediary) = self
+        self.layers
-            .layers
+            .eval_training(&self.input_to_residual_input(input))
            .eval_training(&self.input_to_residual_input(input));
        let result: Rc<Self::Output> = output.get_first().unwrap().into();
        (
            Rc::try_unwrap(result).unwrap_or_else(|result| (*result).clone()),
            intermediary,
        )
    }
 }
-impl<
+impl<Data: Clone, Layers: NeuraTrainableLayerSelf<NeuraResidualInput<Data>>>
-        Data: Clone,
+    NeuraTrainableLayerSelf<Data> for NeuraResidual<Layers>
        Layers: NeuraTrainableLayerSelf<NeuraResidualInput<Data>, Output = NeuraResidualInput<Data>>,
    > NeuraTrainableLayerSelf<Data> for NeuraResidual<Layers>
 {
    fn regularize_layer(&self) -> Self::Gradient {
        self.layers.regularize_layer()
@ -96,16 +94,8 @@ impl<
        intermediary: &Self::IntermediaryRepr,
        epsilon: &Self::Output,
    ) -> Self::Gradient {
-        let epsilon = Rc::new(epsilon.clone());
+        self.layers
-        let mut epsilon_residual = NeuraResidualInput::new();
+            .get_gradient(&self.input_to_residual_input(input), intermediary, &epsilon)
        epsilon_residual.push(0, epsilon);
        self.layers.get_gradient(
            &self.input_to_residual_input(input),
            intermediary,
            &epsilon_residual,
        )
    }
 }
--- a/src/network/sequential/construct.rs
+++ b/src/network/sequential/construct.rs
@ -2,20 +2,13 @@ use crate::layer::NeuraShapedLayer;
 use super::*;
 pub trait NeuraSequentialConstruct {
    type Constructed;
    type Err;
    fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err>;
 }
 #[derive(Debug, Clone)]
 pub enum NeuraSequentialConstructErr<Err, ChildErr> {
    Current(Err),
    Child(ChildErr),
 }
-impl<Layer: NeuraPartialLayer> NeuraSequentialConstruct for NeuraSequential<Layer, ()> {
+impl<Layer: NeuraPartialLayer> NeuraPartialLayer for NeuraSequential<Layer, ()> {
    type Constructed = NeuraSequential<Layer::Constructed, ()>;
    type Err = Layer::Err;
@ -27,7 +20,7 @@ impl<Layer: NeuraPartialLayer> NeuraSequentialConstruct for NeuraSequential<Laye
    }
 }
-impl<Layer: NeuraPartialLayer, ChildNetwork: NeuraSequentialConstruct> NeuraSequentialConstruct
+impl<Layer: NeuraPartialLayer, ChildNetwork: NeuraPartialLayer> NeuraPartialLayer
    for NeuraSequential<Layer, ChildNetwork>
 {
    type Constructed = NeuraSequential<Layer::Constructed, ChildNetwork::Constructed>;
--- a/src/network/sequential/mod.rs
+++ b/src/network/sequential/mod.rs
@ -190,10 +190,9 @@ mod test {
        derivable::{activation::Relu, regularize::NeuraL0},
        layer::{dense::NeuraDenseLayer, NeuraLayer, NeuraShape},
        neura_layer,
        prelude::NeuraPartialLayer,
    };
    use super::NeuraSequentialConstruct;
    #[test]
    fn test_neura_network_macro() {
        let mut rng = rand::thread_rng();
--- a/src/utils.rs
+++ b/src/utils.rs
@ -149,3 +149,8 @@ pub fn plot_losses(losses: Vec<(f64, f64)>, width: u32, height: u32) {
        .linecolorplot(&Shape::Lines(&val_losses), (255, 0, 255).into())
        .nice();
 }
 pub(crate) fn unwrap_or_clone<T: Clone>(value: std::rc::Rc<T>) -> T {
    // TODO: replace with Rc::unwrap_or_clone once https://github.com/rust-lang/rust/issues/93610 is closed
    std::rc::Rc::try_unwrap(value).unwrap_or_else(|value| (*value).clone())
 }