✨ Implement NeuraGraphBackprop

2 years ago · 93fa7e238a
parent 99d0cb4408
commit 93fa7e238a
5 changed files with 272 additions and 100 deletions
--- a/src/algebra/mod.rs
+++ b/src/algebra/mod.rs
@ -14,12 +14,10 @@ pub trait NeuraVectorSpace {

    fn mul_assign(&mut self, by: f64);

-    // fn zero() -> Self;
-
    fn norm_squared(&self) -> f64;
 }

-pub trait NeuraDynVectorSpace {
+pub trait NeuraDynVectorSpace: Send {
    fn add_assign(&mut self, other: &dyn NeuraDynVectorSpace);

    fn mul_assign(&mut self, by: f64);
@ -30,9 +28,9 @@ pub trait NeuraDynVectorSpace {
    fn into_any(&self) -> &dyn Any;
 }

-impl<T: NeuraVectorSpace + 'static> NeuraDynVectorSpace for T {
+impl<T: NeuraVectorSpace + Send + 'static> NeuraDynVectorSpace for T {
    fn add_assign(&mut self, other: &dyn NeuraDynVectorSpace) {
-        let Some(other) = other.into_any().downcast_ref::<Self>() else {
+        let Some(other) = other.into_any().downcast_ref::<T>() else {
            panic!("Incompatible operand: expected other to be equal to self");
        };

@ -63,17 +61,12 @@ impl NeuraVectorSpace for () {
        // Noop
    }

-    // #[inline(always)]
-    // fn zero() -> Self {
-    //     ()
-    // }
-
    fn norm_squared(&self) -> f64 {
        0.0
    }
 }

-impl<T: NeuraVectorSpace> NeuraVectorSpace for Box<T> {
+impl<T: NeuraVectorSpace + ?Sized> NeuraVectorSpace for Box<T> {
    fn add_assign(&mut self, other: &Self) {
        self.as_mut().add_assign(other.as_ref());
    }
@ -82,15 +75,25 @@ impl<T: NeuraVectorSpace> NeuraVectorSpace for Box<T> {
        self.as_mut().mul_assign(by);
    }

-    // fn zero() -> Self {
-    //     Box::new(T::zero())
-    // }
-
    fn norm_squared(&self) -> f64 {
        self.as_ref().norm_squared()
    }
 }

+impl NeuraVectorSpace for dyn NeuraDynVectorSpace {
+    fn add_assign(&mut self, other: &Self) {
+        <dyn NeuraDynVectorSpace>::add_assign(self, &*other)
+    }
+
+    fn mul_assign(&mut self, by: f64) {
+        <dyn NeuraDynVectorSpace>::mul_assign(self, by)
+    }
+
+    fn norm_squared(&self) -> f64 {
+        <dyn NeuraDynVectorSpace>::norm_squared(self)
+    }
+}
+
 impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left, Right) {
    fn add_assign(&mut self, other: &Self) {
        NeuraVectorSpace::add_assign(&mut self.0, &other.0);
@ -124,21 +127,34 @@ impl<const N: usize, T: NeuraVectorSpace + Clone> NeuraVectorSpace for [T; N] {
        }
    }

-    // fn zero() -> Self {
-    //     let mut res: Vec<T> = Vec::with_capacity(N);
+    fn norm_squared(&self) -> f64 {
+        self.iter().map(T::norm_squared).sum()
+    }
+}

-    //     for _ in 0..N {
-    //         res.push(T::zero());
-    //     }
+impl<T: NeuraVectorSpace> NeuraVectorSpace for Vec<T> {
+    fn add_assign(&mut self, other: &Self) {
+        assert_eq!(self.len(), other.len());

-    //     res.try_into().unwrap_or_else(|_| {
-    //         // TODO: check that this panic is optimized away
-    //         unreachable!()
-    //     })
-    // }
+        for (self_item, other_item) in self.iter_mut().zip(other.iter()) {
+            self_item.add_assign(other_item);
+        }
+    }
+
+    fn mul_assign(&mut self, by: f64) {
+        for item in self.iter_mut() {
+            item.mul_assign(by);
+        }
+    }

    fn norm_squared(&self) -> f64 {
-        self.iter().map(T::norm_squared).sum()
+        let mut res = 0.0;
+
+        for item in self.iter() {
+            res += item.norm_squared();
+        }
+
+        res
    }
 }

--- a/src/gradient_solver/backprop.rs
+++ b/src/gradient_solver/backprop.rs
@ -12,6 +12,10 @@ impl<Loss> NeuraBackprop<Loss> {
    pub fn new(loss: Loss) -> Self {
        Self { loss }
    }
+
+    pub fn get(&self) -> &Loss {
+        &self.loss
+    }
 }

 impl<
--- a/src/network/graph/backprop.rs
+++ b/src/network/graph/backprop.rs
@ -0,0 +1,169 @@
+use crate::gradient_solver::NeuraGradientSolver;
+
+use super::*;
+
+#[derive(Debug, Clone)]
+pub struct NeuraGraphBackprop<Loss> {
+    loss: Loss,
+    // TODO: store buffers for re-use, do not clone them
+}
+
+impl<Loss> NeuraGraphBackprop<Loss> {
+    pub fn new(loss: Loss) -> Self {
+        Self { loss }
+    }
+}
+
+impl<Loss: Clone> From<&NeuraBackprop<Loss>> for NeuraGraphBackprop<Loss> {
+    fn from(value: &NeuraBackprop<Loss>) -> Self {
+        Self {
+            loss: value.get().clone(),
+        }
+    }
+}
+
+impl<
+        Data: Clone + std::fmt::Debug + std::ops::Add<Data, Output = Data> + 'static,
+        Target,
+        Loss: NeuraLoss<Data, Target = Target>,
+    > NeuraGradientSolver<Data, Target, NeuraGraph<Data>> for NeuraGraphBackprop<Loss>
+{
+    // TODO: make it a &mut method
+    fn get_gradient(
+        &self,
+        trainable: &NeuraGraph<Data>,
+        input: &Data,
+        target: &Target,
+    ) -> <NeuraGraph<Data> as NeuraLayerBase>::Gradient {
+        let mut output_buffer = trainable.create_buffer();
+        let mut backprop_buffer = trainable.create_buffer();
+        let mut intermediary_buffer = trainable.create_buffer();
+        let mut gradient_buffer = trainable.default_gradient();
+
+        trainable.backprop_in(
+            input,
+            &self.loss,
+            target,
+            &mut output_buffer,
+            &mut backprop_buffer,
+            &mut intermediary_buffer,
+            &mut gradient_buffer,
+        );
+
+        gradient_buffer
+    }
+
+    #[allow(unused)]
+    fn score(&self, trainable: &NeuraGraph<Data>, input: &Data, target: &Target) -> f64 {
+        todo!()
+    }
+}
+
+impl<Data> NeuraGraph<Data> {
+    fn backprop_in<Loss, Target>(
+        &self,
+        input: &Data,
+        loss: &Loss,
+        target: &Target,
+        output_buffer: &mut Vec<Option<Data>>,
+        backprop_buffer: &mut Vec<Option<Data>>,
+        intermediary_buffer: &mut Vec<Option<Box<dyn Any>>>,
+        gradient_buffer: &mut Vec<Box<dyn NeuraDynVectorSpace>>,
+    ) where
+        Data: Clone + std::ops::Add<Data, Output = Data>,
+        Loss: NeuraLoss<Data, Target = Target>,
+    {
+        assert!(output_buffer.len() >= self.nodes.len());
+        assert!(backprop_buffer.len() >= self.nodes.len());
+        assert!(intermediary_buffer.len() >= self.nodes.len());
+        assert!(gradient_buffer.len() >= self.nodes.len());
+
+        output_buffer[0] = Some(input.clone());
+
+        // Forward pass
+        for node in self.nodes.iter() {
+            // PERF: re-use the allocation for `inputs`, and `.take()` the elements only needed once?
+            let inputs: Vec<_> = node
+                .inputs
+                .iter()
+                .map(|&i| {
+                    output_buffer[i]
+                        .clone()
+                        .expect("Unreachable: output of previous layer was not set")
+                })
+                .collect();
+            let (result, intermediary) = node.node.eval_training(&inputs);
+
+            output_buffer[node.output] = Some(result);
+            intermediary_buffer[node.output] = Some(intermediary);
+        }
+
+        let loss = loss.nabla(
+            target,
+            output_buffer[self.output_index]
+                .as_ref()
+                .expect("Unreachable: output was not set"),
+        );
+        backprop_buffer[self.output_index] = Some(loss);
+
+        // Backward pass
+        for node in self.nodes.iter().rev() {
+            let Some(epsilon_in) = backprop_buffer[node.output].take() else {
+                continue
+            };
+
+            // TODO: create more wrapper types to avoid this dereferencing mess
+            let intermediary = &**intermediary_buffer[node.output].as_ref().unwrap();
+
+            let epsilon_out = node.node.backprop(intermediary, &epsilon_in);
+            let gradient = node.node.get_gradient(intermediary, &epsilon_in);
+
+            (*gradient_buffer[node.output]).add_assign(&*gradient);
+
+            for (&input, epsilon) in node.inputs.iter().zip(epsilon_out.into_iter()) {
+                if let Some(existing_gradient) = backprop_buffer[input].take() {
+                    backprop_buffer[input] = Some(existing_gradient + epsilon);
+                } else {
+                    backprop_buffer[input] = Some(epsilon);
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use crate::{
+        derivable::{activation::LeakyRelu, loss::Euclidean, regularize::NeuraL0},
+        layer::dense::NeuraDenseLayer,
+        utils::uniform_vector,
+    };
+
+    use super::*;
+
+    #[test]
+    fn test_graph_backprop() {
+        let network =
+            neura_sequential![neura_layer!("dense", 4, f64), neura_layer!("dense", 2, f64),]
+                .construct(NeuraShape::Vector(10))
+                .unwrap();
+
+        let graph = NeuraGraph::from_sequential(network.clone(), NeuraShape::Vector(10));
+
+        let trainer = NeuraGraphBackprop::new(Euclidean);
+
+        let input = uniform_vector(10);
+        let target = uniform_vector(2);
+
+        let expected = NeuraBackprop::new(Euclidean).get_gradient(&network, &input, &target);
+        let actual = trainer.get_gradient(&graph, &input, &target);
+
+        type Gradient = <NeuraDenseLayer<f64, LeakyRelu<f64>, NeuraL0> as NeuraLayerBase>::Gradient;
+        fn get_gradient(dynamic: &Box<dyn NeuraDynVectorSpace>) -> &Gradient {
+            (**dynamic).into_any().downcast_ref::<Gradient>().unwrap()
+        }
+
+        assert_eq!(get_gradient(&actual[1]), &expected.0);
+        assert_eq!(get_gradient(&actual[2]), &expected.1 .0);
+    }
+}
--- a/src/network/graph/mod.rs
+++ b/src/network/graph/mod.rs
@ -13,6 +13,9 @@ pub use partial::NeuraGraphPartial;
 mod from;
 pub use from::FromSequential;

+mod backprop;
+pub use backprop::NeuraGraphBackprop;
+
 #[derive(Debug)]
 pub struct NeuraGraphNodeConstructed<Data> {
    node: Box<dyn NeuraGraphNodeEval<Data>>,
@ -47,19 +50,51 @@ pub struct NeuraGraph<Data> {
 }

 impl<Data: Clone + std::fmt::Debug + 'static> NeuraLayerBase for NeuraGraph<Data> {
-    type Gradient = ();
+    type Gradient = Vec<Box<dyn NeuraDynVectorSpace>>;

    fn output_shape(&self) -> NeuraShape {
        self.output_shape
    }

    fn default_gradient(&self) -> Self::Gradient {
-        unimplemented!("NeuraGraph cannot be used as a layer yet")
+        let mut res: Self::Gradient = Vec::with_capacity(self.buffer_size);
+
+        res.push(Box::new(()));
+
+        for node in self.nodes.iter() {
+            res.push(node.node.default_gradient());
+        }
+
+        res
+    }
+
+    fn apply_gradient(&mut self, gradient: &Self::Gradient) {
+        for (node, gradient) in self.nodes.iter_mut().zip(gradient.iter()) {
+            node.node.apply_gradient(gradient);
+        }
+    }
+
+    fn prepare_layer(&mut self, is_training: bool) {
+        for node in self.nodes.iter_mut() {
+            node.node.prepare(is_training);
+        }
+    }
+
+    fn regularize_layer(&self) -> Self::Gradient {
+        let mut res: Self::Gradient = Vec::with_capacity(self.buffer_size);
+
+        res.push(Box::new(()));
+
+        for node in self.nodes.iter() {
+            res.push(node.node.get_regularization_gradient());
+        }
+
+        res
    }
 }

 impl<Data> NeuraGraph<Data> {
-    fn create_buffer(&self) -> Vec<Option<Data>> {
+    fn create_buffer<T>(&self) -> Vec<Option<T>> {
        let mut res = Vec::with_capacity(self.buffer_size);

        for _ in 0..self.buffer_size {
@ -92,77 +127,6 @@ impl<Data> NeuraGraph<Data> {
            buffer[node.output] = Some(result);
        }
    }
-
-    fn backprop_in<Loss, Target>(
-        &self,
-        input: &Data,
-        loss: Loss,
-        target: &Target,
-        output_buffer: &mut Vec<Option<Data>>,
-        backprop_buffer: &mut Vec<Option<Data>>,
-        intermediary_buffer: &mut Vec<Option<Box<dyn Any>>>,
-        gradient_buffer: &mut Vec<Box<dyn NeuraDynVectorSpace>>,
-    ) where
-        Data: Clone + std::ops::Add<Data, Output = Data>,
-        Loss: NeuraLoss<Data, Target = Target>,
-    {
-        assert!(output_buffer.len() >= self.nodes.len());
-        assert!(backprop_buffer.len() >= self.nodes.len());
-        assert!(intermediary_buffer.len() >= self.nodes.len());
-        assert!(gradient_buffer.len() >= self.nodes.len());
-
-        output_buffer[0] = Some(input.clone());
-
-        // Forward pass
-        for node in self.nodes.iter() {
-            // PERF: re-use the allocation for `inputs`, and `.take()` the elements only needed once?
-            let inputs: Vec<_> = node
-                .inputs
-                .iter()
-                .map(|&i| {
-                    output_buffer[i]
-                        .clone()
-                        .expect("Unreachable: output of previous layer was not set")
-                })
-                .collect();
-            let (result, intermediary) = node.node.eval_training(&inputs);
-
-            output_buffer[node.output] = Some(result);
-            intermediary_buffer[node.output] = Some(intermediary);
-        }
-
-        let loss = loss.nabla(
-            target,
-            output_buffer[self.output_index]
-                .as_ref()
-                .expect("Unreachable: output was not set"),
-        );
-        backprop_buffer[self.output_index] = Some(loss);
-
-        // Backward pass
-        for node in self.nodes.iter().rev() {
-            let Some(epsilon_in) = backprop_buffer[node.output].take() else {
-                continue
-            };
-
-            let epsilon_out = node
-                .node
-                .backprop(&intermediary_buffer[node.output], &epsilon_in);
-            let gradient = node
-                .node
-                .get_gradient(&intermediary_buffer[node.output], &epsilon_in);
-
-            gradient_buffer[node.output].add_assign(&*gradient);
-
-            for (&input, epsilon) in node.inputs.iter().zip(epsilon_out.into_iter()) {
-                if let Some(existing_gradient) = backprop_buffer[input].take() {
-                    backprop_buffer[input] = Some(existing_gradient + epsilon);
-                } else {
-                    backprop_buffer[input] = Some(epsilon);
-                }
-            }
-        }
-    }
 }

 impl<Data: Clone + std::fmt::Debug + 'static> NeuraLayer<Data> for NeuraGraph<Data> {
--- a/src/network/graph/node.rs
+++ b/src/network/graph/node.rs
@ -24,13 +24,20 @@ pub trait NeuraGraphNodeEval<Data>: DynClone + Debug {

    fn eval_training(&self, inputs: &[Data]) -> (Data, Box<dyn Any>);
    fn backprop(&self, intermediary: &dyn Any, epsilon_in: &Data) -> Vec<Data>;
+
+    fn default_gradient(&self) -> Box<dyn NeuraDynVectorSpace>;
+
    fn get_gradient(
        &self,
        intermediary: &dyn Any,
        epsilon_in: &Data,
    ) -> Box<dyn NeuraDynVectorSpace>;

+    fn get_regularization_gradient(&self) -> Box<dyn NeuraDynVectorSpace>;
+
    fn apply_gradient(&mut self, gradient: &dyn NeuraDynVectorSpace);
+
+    fn prepare(&mut self, is_training: bool);
 }

 #[derive(Clone, Debug)]
@ -157,6 +164,18 @@ impl<Data: Clone, Axis: NeuraAxis<Data>, Layer: NeuraLayer<Axis::Combined, Outpu
                .expect("Invalid gradient type passed to NeuraGraphNode::apply_gradient"),
        );
    }
+
+    fn default_gradient(&self) -> Box<dyn NeuraDynVectorSpace> {
+        Box::new(self.layer.default_gradient())
+    }
+
+    fn prepare(&mut self, is_training: bool) {
+        self.layer.prepare_layer(is_training);
+    }
+
+    fn get_regularization_gradient(&self) -> Box<dyn NeuraDynVectorSpace> {
+        Box::new(self.layer.regularize_layer())
+    }
 }

 impl<Data: Clone, Axis: NeuraAxis<Data>, Layer: NeuraPartialLayer + Clone + Debug>