✨ 🔥 Implement backpropagation for NeuraGraph (untested)

2 years ago · 41711d4668
parent 251e4d02d2
commit 41711d4668
5 changed files with 289 additions and 52 deletions
--- a/src/algebra/mod.rs
+++ b/src/algebra/mod.rs
@ -1,4 +1,6 @@
 mod matrix;
 use std::any::Any;
 pub use matrix::NeuraMatrix;
 mod vector;
@ -17,6 +19,39 @@ pub trait NeuraVectorSpace {
    fn norm_squared(&self) -> f64;
 }
 pub trait NeuraDynVectorSpace {
    fn add_assign(&mut self, other: &dyn NeuraDynVectorSpace);
    fn mul_assign(&mut self, by: f64);
    fn norm_squared(&self) -> f64;
    /// Trampoline for allowing NeuraDynVectorSpace to be cast back into a known type for add_assign
    fn into_any(&self) -> &dyn Any;
 }
 impl<T: NeuraVectorSpace + 'static> NeuraDynVectorSpace for T {
    fn add_assign(&mut self, other: &dyn NeuraDynVectorSpace) {
        let Some(other) = other.into_any().downcast_ref::<Self>() else {
            panic!("Incompatible operand: expected other to be equal to self");
        };
        <Self as NeuraVectorSpace>::add_assign(self, other);
    }
    fn mul_assign(&mut self, by: f64) {
        <Self as NeuraVectorSpace>::mul_assign(self, by);
    }
    fn norm_squared(&self) -> f64 {
        <Self as NeuraVectorSpace>::norm_squared(self)
    }
    fn into_any(&self) -> &dyn Any {
        self
    }
 }
 impl NeuraVectorSpace for () {
    #[inline(always)]
    fn add_assign(&mut self, _other: &Self) {
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,11 +1,11 @@
 pub mod algebra;
 pub mod derivable;
 pub mod err;
 pub mod gradient_solver;
 pub mod layer;
 pub mod network;
 pub mod train;
 pub mod err;
 mod utils;
 // TODO: move to a different file
--- a/src/network/graph/from.rs
+++ b/src/network/graph/from.rs
@ -2,60 +2,63 @@ use crate::network::residual::{NeuraAxisDefault, NeuraSplitInputs};
 use super::*;
-trait FromSequential<Seq, Data> {
+pub trait FromSequential<Seq, Data> {
-    fn from_sequential(
+    fn from_sequential_rec(
        seq: &Seq,
        nodes: Vec<NeuraGraphNodeConstructed<Data>>,
-        output_shape: NeuraShape,
+        input_shape: NeuraShape,
    ) -> Self;
 }
 impl<Data> FromSequential<(), Data> for NeuraGraph<Data> {
-    fn from_sequential(
+    fn from_sequential_rec(
        _seq: &(),
        nodes: Vec<NeuraGraphNodeConstructed<Data>>,
-        output_shape: NeuraShape,
+        input_shape: NeuraShape,
    ) -> Self {
        Self {
            output_index: nodes.len(),
            buffer_size: nodes.len() + 1,
            nodes: nodes,
-            output_shape,
+            output_shape: input_shape,
        }
    }
 }
-impl<
+impl<Data: Clone + 'static, Layer: NeuraTrainableLayerFull<Data, Output = Data>, ChildNetwork>
-        Data: Clone,
+    FromSequential<NeuraSequential<Layer, ChildNetwork>, Data> for NeuraGraph<Data>
        Layer: NeuraLayer<Data, Output = Data> + Clone + std::fmt::Debug + 'static,
        ChildNetwork,
    > FromSequential<NeuraSequential<Layer, ChildNetwork>, Data> for NeuraGraph<Data>
 where
    NeuraGraph<Data>: FromSequential<ChildNetwork, Data>,
    NeuraAxisDefault: NeuraSplitInputs<Data, Combined = Data>,
    Layer::IntermediaryRepr: 'static,
 {
-    fn from_sequential(
+    fn from_sequential_rec(
        seq: &NeuraSequential<Layer, ChildNetwork>,
        mut nodes: Vec<NeuraGraphNodeConstructed<Data>>,
-        output_shape: NeuraShape,
+        input_shape: NeuraShape,
    ) -> Self {
        nodes.push(NeuraGraphNodeConstructed {
-            node: Box::new(NeuraGraphNode::from(seq.layer.clone())),
+            node: Box::new(NeuraGraphNode::from_layer(
                seq.layer.clone(),
                vec![input_shape],
            )),
            inputs: vec![nodes.len()],
            output: nodes.len() + 1,
        });
-        Self::from_sequential(&seq.child_network, nodes, output_shape)
+        Self::from_sequential_rec(&seq.child_network, nodes, seq.layer.output_shape())
    }
 }
-impl<Data, Layer, ChildNetwork> From<NeuraSequential<Layer, ChildNetwork>> for NeuraGraph<Data>
+impl<Data> NeuraGraph<Data> {
    pub fn from_sequential<Layer, ChildNetwork>(
        network: NeuraSequential<Layer, ChildNetwork>,
        input_shape: NeuraShape,
    ) -> Self
    where
        NeuraGraph<Data>: FromSequential<NeuraSequential<Layer, ChildNetwork>, Data>,
        NeuraSequential<Layer, ChildNetwork>: NeuraShapedLayer,
    {
-    fn from(network: NeuraSequential<Layer, ChildNetwork>) -> Self {
+        Self::from_sequential_rec(&network, vec![], input_shape)
        let output_shape = network.output_shape();
        Self::from_sequential(&network, vec![], output_shape)
    }
 }
--- a/src/network/graph/mod.rs
+++ b/src/network/graph/mod.rs
@ -1,4 +1,11 @@
-use crate::{layer::NeuraShapedLayer, prelude::*};
+use std::any::Any;
 use crate::{
    algebra::NeuraDynVectorSpace,
    derivable::NeuraLoss,
    layer::{NeuraShapedLayer, NeuraTrainableLayerEval},
    prelude::*,
 };
 mod node;
 pub use node::*;
@ -7,9 +14,36 @@ mod partial;
 pub use partial::NeuraGraphPartial;
 mod from;
 pub use from::FromSequential;
 pub trait NeuraTrainableLayerFull<Input>:
    NeuraTrainableLayerEval<Input>
    + NeuraTrainableLayerBackprop<Input>
    + NeuraTrainableLayerSelf<Input>
    + NeuraShapedLayer
    + Clone
    + std::fmt::Debug
    + 'static
 where
    Self::IntermediaryRepr: 'static,
 {
 }
 impl<Input, T> NeuraTrainableLayerFull<Input> for T
 where
    T: NeuraTrainableLayerEval<Input>
        + NeuraTrainableLayerBackprop<Input>
        + NeuraTrainableLayerSelf<Input>
        + NeuraShapedLayer
        + Clone
        + std::fmt::Debug
        + 'static,
    T::IntermediaryRepr: 'static,
 {
 }
 #[derive(Debug)]
-struct NeuraGraphNodeConstructed<Data> {
+pub struct NeuraGraphNodeConstructed<Data> {
    node: Box<dyn NeuraGraphNodeEval<Data>>,
    inputs: Vec<usize>,
    output: usize,
@ -48,10 +82,12 @@ impl<Data> NeuraGraph<Data> {
        res
    }
-    fn eval_in(&self, input: &Data, buffer: &mut Vec<Option<Data>>)
+    fn eval_in(&self, input: &Data, buffer: &mut [Option<Data>])
    where
        Data: Clone,
    {
        assert!(buffer.len() >= self.nodes.len());
        buffer[0] = Some(input.clone());
        for node in self.nodes.iter() {
@ -69,6 +105,77 @@ impl<Data> NeuraGraph<Data> {
            buffer[node.output] = Some(result);
        }
    }
    fn backprop_in<Loss, Target>(
        &self,
        input: &Data,
        loss: Loss,
        target: &Target,
        output_buffer: &mut Vec<Option<Data>>,
        backprop_buffer: &mut Vec<Option<Data>>,
        intermediary_buffer: &mut Vec<Option<Box<dyn Any>>>,
        gradient_buffer: &mut Vec<Box<dyn NeuraDynVectorSpace>>,
    ) where
        Data: Clone + std::ops::Add<Data, Output = Data>,
        Loss: NeuraLoss<Data, Target = Target>,
    {
        assert!(output_buffer.len() >= self.nodes.len());
        assert!(backprop_buffer.len() >= self.nodes.len());
        assert!(intermediary_buffer.len() >= self.nodes.len());
        assert!(gradient_buffer.len() >= self.nodes.len());
        output_buffer[0] = Some(input.clone());
        // Forward pass
        for node in self.nodes.iter() {
            // PERF: re-use the allocation for `inputs`, and `.take()` the elements only needed once?
            let inputs: Vec<_> = node
                .inputs
                .iter()
                .map(|&i| {
                    output_buffer[i]
                        .clone()
                        .expect("Unreachable: output of previous layer was not set")
                })
                .collect();
            let (result, intermediary) = node.node.eval_training(&inputs);
            output_buffer[node.output] = Some(result);
            intermediary_buffer[node.output] = Some(intermediary);
        }
        let loss = loss.nabla(
            target,
            output_buffer[self.output_index]
                .as_ref()
                .expect("Unreachable: output was not set"),
        );
        backprop_buffer[self.output_index] = Some(loss);
        // Backward pass
        for node in self.nodes.iter().rev() {
            let Some(epsilon_in) = backprop_buffer[node.output].take() else {
                continue
            };
            let epsilon_out = node
                .node
                .backprop(&intermediary_buffer[node.output], &epsilon_in);
            let gradient = node
                .node
                .get_gradient(&intermediary_buffer[node.output], &epsilon_in);
            gradient_buffer[node.output].add_assign(&*gradient);
            for (&input, epsilon) in node.inputs.iter().zip(epsilon_out.into_iter()) {
                if let Some(existing_gradient) = backprop_buffer[input].take() {
                    backprop_buffer[input] = Some(existing_gradient + epsilon);
                } else {
                    backprop_buffer[input] = Some(epsilon);
                }
            }
        }
    }
 }
 impl<Data: Clone> NeuraLayer<Data> for NeuraGraph<Data> {
@ -213,7 +320,7 @@ mod test {
        .construct(NeuraShape::Vector(3))
        .unwrap();
-        let graph = NeuraGraph::from(network.clone());
+        let graph = NeuraGraph::from_sequential(network.clone(), NeuraShape::Vector(3));
        for _ in 0..10 {
            let input = uniform_vector(3);
--- a/src/network/graph/node.rs
+++ b/src/network/graph/node.rs
@ -1,13 +1,16 @@
 use dyn_clone::DynClone;
-use std::fmt::Debug;
+use std::{any::Any, fmt::Debug};
 use crate::{
    algebra::NeuraDynVectorSpace,
    err::NeuraAxisErr,
-    layer::{NeuraLayer, NeuraShapedLayer},
+    layer::{NeuraShapedLayer, NeuraTrainableLayerEval},
    network::residual::{NeuraAxisDefault, NeuraCombineInputs, NeuraSplitInputs},
    prelude::{NeuraPartialLayer, NeuraShape},
 };
 use super::*;
 // TODO: split into two  traits
 pub trait NeuraGraphNodePartial<Data>: DynClone + Debug {
    fn inputs<'a>(&'a self) -> &'a [String];
@ -20,7 +23,17 @@ pub trait NeuraGraphNodePartial<Data>: DynClone + Debug {
 }
 pub trait NeuraGraphNodeEval<Data>: DynClone + Debug {
-    fn eval<'a>(&'a self, inputs: &[Data]) -> Data;
+    fn eval(&self, inputs: &[Data]) -> Data;
    fn eval_training(&self, inputs: &[Data]) -> (Data, Box<dyn Any>);
    fn backprop(&self, intermediary: &dyn Any, epsilon_in: &Data) -> Vec<Data>;
    fn get_gradient(
        &self,
        intermediary: &dyn Any,
        epsilon_in: &Data,
    ) -> Box<dyn NeuraDynVectorSpace>;
    fn apply_gradient(&mut self, gradient: &dyn NeuraDynVectorSpace);
 }
 #[derive(Clone, Debug)]
@ -29,6 +42,21 @@ pub struct NeuraGraphNode<Axis, Layer> {
    axis: Axis,
    layer: Layer,
    name: String,
    input_shapes: Option<Vec<NeuraShape>>,
 }
 impl<Layer> NeuraGraphNode<NeuraAxisDefault, Layer> {
    pub(crate) fn from_layer(layer: Layer, input_shapes: Vec<NeuraShape>) -> Self {
        Self {
            inputs: vec![],
            axis: NeuraAxisDefault,
            layer,
            name: random_name(),
            input_shapes: Some(input_shapes),
        }
    }
 }
 impl<Axis, Layer> NeuraGraphNode<Axis, Layer> {
@ -39,6 +67,8 @@ impl<Axis, Layer> NeuraGraphNode<Axis, Layer> {
            axis,
            layer,
            name,
            input_shapes: None,
        }
    }
@ -50,38 +80,101 @@ impl<Axis, Layer> NeuraGraphNode<Axis, Layer> {
            + Debug
            + 'static,
        Layer: NeuraPartialLayer + Clone + Debug + 'static,
-        Layer::Constructed: NeuraShapedLayer
+        Layer::Constructed:
-            + NeuraLayer<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data>
+            NeuraTrainableLayerFull<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data>,
            + Clone
            + Debug
            + 'static,
        Layer::Err: Debug,
        <Layer::Constructed as NeuraTrainableLayerEval<
            <Axis as NeuraCombineInputs<Data>>::Combined,
        >>::IntermediaryRepr: 'static,
        <Axis as NeuraCombineInputs<Data>>::Combined: 'static,
    {
        Box::new(self)
    }
    fn downcast_intermediary<'a, Data>(
        &self,
        intermediary: &'a dyn Any,
    ) -> &'a Intermediary<Axis::Combined, Layer>
    where
        Axis: NeuraCombineInputs<Data>,
        Layer: NeuraTrainableLayerFull<Axis::Combined>,
        Axis::Combined: 'static,
    {
        intermediary
            .downcast_ref::<Intermediary<Axis::Combined, Layer>>()
            .expect("Incompatible value passed to NeuraGraphNode::backprop")
    }
 }
 struct Intermediary<Combined, Layer: NeuraTrainableLayerFull<Combined>>
 where
    Layer::IntermediaryRepr: 'static,
 {
    combined: Combined,
    layer_intermediary: Layer::IntermediaryRepr,
 }
 impl<
        Data: Clone,
        Axis: NeuraSplitInputs<Data> + Clone + Debug,
-        Layer: NeuraLayer<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data> + Clone + Debug,
+        Layer: NeuraTrainableLayerFull<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data>,
    > NeuraGraphNodeEval<Data> for NeuraGraphNode<Axis, Layer>
 where
    Layer::IntermediaryRepr: 'static,
    Axis::Combined: 'static,
 {
    fn eval<'a>(&'a self, inputs: &[Data]) -> Data {
        // TODO: use to_vec_in?
        let combined = self.axis.combine(inputs.to_vec());
        self.layer.eval(&combined)
    }
    fn eval_training<'a>(&self, inputs: &[Data]) -> (Data, Box<dyn Any>) {
        let combined = self.axis.combine(inputs.to_vec());
        let (result, layer_intermediary) = self.layer.eval_training(&combined);
        let intermediary: Intermediary<Axis::Combined, Layer> = Intermediary {
            combined,
            layer_intermediary,
        };
        (result, Box::new(intermediary))
    }
-impl<Layer: Clone + Debug> From<Layer> for NeuraGraphNode<NeuraAxisDefault, Layer> {
+    fn backprop(&self, intermediary: &dyn Any, epsilon_in: &Data) -> Vec<Data> {
-    fn from(layer: Layer) -> Self {
+        let intermediary = self.downcast_intermediary(intermediary);
-        Self {
+
-            inputs: vec![],
+        let epsilon_out = self.layer.backprop_layer(
-            axis: NeuraAxisDefault,
+            &intermediary.combined,
-            layer,
+            &intermediary.layer_intermediary,
-            name: random_name(),
+            epsilon_in,
        );
        self.axis
            .split(&epsilon_out, self.input_shapes.as_ref().unwrap())
    }
    fn get_gradient(
        &self,
        intermediary: &dyn Any,
        epsilon_in: &Data,
    ) -> Box<dyn NeuraDynVectorSpace> {
        let intermediary = self.downcast_intermediary(intermediary);
        Box::new(self.layer.get_gradient(
            &intermediary.combined,
            &intermediary.layer_intermediary,
            epsilon_in,
        ))
    }
    fn apply_gradient(&mut self, gradient: &dyn NeuraDynVectorSpace) {
        self.layer.apply_gradient(
            gradient
                .into_any()
                .downcast_ref::<Layer::Gradient>()
                .expect("Invalid gradient type passed to NeuraGraphNode::apply_gradient"),
        );
    }
 }
@ -95,12 +188,10 @@ impl<
        Layer: NeuraPartialLayer + Clone + Debug,
    > NeuraGraphNodePartial<Data> for NeuraGraphNode<Axis, Layer>
 where
-    Layer::Constructed: NeuraShapedLayer
+    Layer::Constructed: NeuraTrainableLayerFull<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data>,
        + NeuraLayer<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data>
        + Clone
        + Debug
        + 'static,
    Layer::Err: Debug,
    <Layer::Constructed as NeuraTrainableLayerEval<<Axis as NeuraCombineInputs<Data>>::Combined>>::IntermediaryRepr: 'static,
    <Axis as NeuraCombineInputs<Data>>::Combined: 'static,
 {
    fn inputs<'a>(&'a self) -> &'a [String] {
        &self.inputs
@ -116,7 +207,7 @@ where
    ) -> Result<(Box<dyn NeuraGraphNodeEval<Data>>, NeuraShape), String> {
        let combined = self
            .axis
-            .combine(input_shapes)
+            .combine(input_shapes.clone())
            .map_err(|err| format!("{:?}", err))?;
        let constructed_layer = self
@ -132,6 +223,7 @@ where
                axis: self.axis.clone(),
                layer: constructed_layer,
                name: self.name.clone(),
                input_shapes: Some(input_shapes)
            }),
            output_shape,
        ))