diff --git a/src/algebra/mod.rs b/src/algebra/mod.rs index 90bdc08..cde2b58 100644 --- a/src/algebra/mod.rs +++ b/src/algebra/mod.rs @@ -14,12 +14,10 @@ pub trait NeuraVectorSpace { fn mul_assign(&mut self, by: f64); - // fn zero() -> Self; - fn norm_squared(&self) -> f64; } -pub trait NeuraDynVectorSpace { +pub trait NeuraDynVectorSpace: Send { fn add_assign(&mut self, other: &dyn NeuraDynVectorSpace); fn mul_assign(&mut self, by: f64); @@ -30,9 +28,9 @@ pub trait NeuraDynVectorSpace { fn into_any(&self) -> &dyn Any; } -impl NeuraDynVectorSpace for T { +impl NeuraDynVectorSpace for T { fn add_assign(&mut self, other: &dyn NeuraDynVectorSpace) { - let Some(other) = other.into_any().downcast_ref::() else { + let Some(other) = other.into_any().downcast_ref::() else { panic!("Incompatible operand: expected other to be equal to self"); }; @@ -63,17 +61,12 @@ impl NeuraVectorSpace for () { // Noop } - // #[inline(always)] - // fn zero() -> Self { - // () - // } - fn norm_squared(&self) -> f64 { 0.0 } } -impl NeuraVectorSpace for Box { +impl NeuraVectorSpace for Box { fn add_assign(&mut self, other: &Self) { self.as_mut().add_assign(other.as_ref()); } @@ -82,15 +75,25 @@ impl NeuraVectorSpace for Box { self.as_mut().mul_assign(by); } - // fn zero() -> Self { - // Box::new(T::zero()) - // } - fn norm_squared(&self) -> f64 { self.as_ref().norm_squared() } } +impl NeuraVectorSpace for dyn NeuraDynVectorSpace { + fn add_assign(&mut self, other: &Self) { + ::add_assign(self, &*other) + } + + fn mul_assign(&mut self, by: f64) { + ::mul_assign(self, by) + } + + fn norm_squared(&self) -> f64 { + ::norm_squared(self) + } +} + impl NeuraVectorSpace for (Left, Right) { fn add_assign(&mut self, other: &Self) { NeuraVectorSpace::add_assign(&mut self.0, &other.0); @@ -124,21 +127,34 @@ impl NeuraVectorSpace for [T; N] { } } - // fn zero() -> Self { - // let mut res: Vec = Vec::with_capacity(N); + fn norm_squared(&self) -> f64 { + self.iter().map(T::norm_squared).sum() + } +} - // for _ in 0..N { - // res.push(T::zero()); - // } +impl NeuraVectorSpace for Vec { + fn add_assign(&mut self, other: &Self) { + assert_eq!(self.len(), other.len()); - // res.try_into().unwrap_or_else(|_| { - // // TODO: check that this panic is optimized away - // unreachable!() - // }) - // } + for (self_item, other_item) in self.iter_mut().zip(other.iter()) { + self_item.add_assign(other_item); + } + } + + fn mul_assign(&mut self, by: f64) { + for item in self.iter_mut() { + item.mul_assign(by); + } + } fn norm_squared(&self) -> f64 { - self.iter().map(T::norm_squared).sum() + let mut res = 0.0; + + for item in self.iter() { + res += item.norm_squared(); + } + + res } } diff --git a/src/gradient_solver/backprop.rs b/src/gradient_solver/backprop.rs index c2c3d40..269657d 100644 --- a/src/gradient_solver/backprop.rs +++ b/src/gradient_solver/backprop.rs @@ -12,6 +12,10 @@ impl NeuraBackprop { pub fn new(loss: Loss) -> Self { Self { loss } } + + pub fn get(&self) -> &Loss { + &self.loss + } } impl< diff --git a/src/network/graph/backprop.rs b/src/network/graph/backprop.rs new file mode 100644 index 0000000..f6110e9 --- /dev/null +++ b/src/network/graph/backprop.rs @@ -0,0 +1,169 @@ +use crate::gradient_solver::NeuraGradientSolver; + +use super::*; + +#[derive(Debug, Clone)] +pub struct NeuraGraphBackprop { + loss: Loss, + // TODO: store buffers for re-use, do not clone them +} + +impl NeuraGraphBackprop { + pub fn new(loss: Loss) -> Self { + Self { loss } + } +} + +impl From<&NeuraBackprop> for NeuraGraphBackprop { + fn from(value: &NeuraBackprop) -> Self { + Self { + loss: value.get().clone(), + } + } +} + +impl< + Data: Clone + std::fmt::Debug + std::ops::Add + 'static, + Target, + Loss: NeuraLoss, + > NeuraGradientSolver> for NeuraGraphBackprop +{ + // TODO: make it a &mut method + fn get_gradient( + &self, + trainable: &NeuraGraph, + input: &Data, + target: &Target, + ) -> as NeuraLayerBase>::Gradient { + let mut output_buffer = trainable.create_buffer(); + let mut backprop_buffer = trainable.create_buffer(); + let mut intermediary_buffer = trainable.create_buffer(); + let mut gradient_buffer = trainable.default_gradient(); + + trainable.backprop_in( + input, + &self.loss, + target, + &mut output_buffer, + &mut backprop_buffer, + &mut intermediary_buffer, + &mut gradient_buffer, + ); + + gradient_buffer + } + + #[allow(unused)] + fn score(&self, trainable: &NeuraGraph, input: &Data, target: &Target) -> f64 { + todo!() + } +} + +impl NeuraGraph { + fn backprop_in( + &self, + input: &Data, + loss: &Loss, + target: &Target, + output_buffer: &mut Vec>, + backprop_buffer: &mut Vec>, + intermediary_buffer: &mut Vec>>, + gradient_buffer: &mut Vec>, + ) where + Data: Clone + std::ops::Add, + Loss: NeuraLoss, + { + assert!(output_buffer.len() >= self.nodes.len()); + assert!(backprop_buffer.len() >= self.nodes.len()); + assert!(intermediary_buffer.len() >= self.nodes.len()); + assert!(gradient_buffer.len() >= self.nodes.len()); + + output_buffer[0] = Some(input.clone()); + + // Forward pass + for node in self.nodes.iter() { + // PERF: re-use the allocation for `inputs`, and `.take()` the elements only needed once? + let inputs: Vec<_> = node + .inputs + .iter() + .map(|&i| { + output_buffer[i] + .clone() + .expect("Unreachable: output of previous layer was not set") + }) + .collect(); + let (result, intermediary) = node.node.eval_training(&inputs); + + output_buffer[node.output] = Some(result); + intermediary_buffer[node.output] = Some(intermediary); + } + + let loss = loss.nabla( + target, + output_buffer[self.output_index] + .as_ref() + .expect("Unreachable: output was not set"), + ); + backprop_buffer[self.output_index] = Some(loss); + + // Backward pass + for node in self.nodes.iter().rev() { + let Some(epsilon_in) = backprop_buffer[node.output].take() else { + continue + }; + + // TODO: create more wrapper types to avoid this dereferencing mess + let intermediary = &**intermediary_buffer[node.output].as_ref().unwrap(); + + let epsilon_out = node.node.backprop(intermediary, &epsilon_in); + let gradient = node.node.get_gradient(intermediary, &epsilon_in); + + (*gradient_buffer[node.output]).add_assign(&*gradient); + + for (&input, epsilon) in node.inputs.iter().zip(epsilon_out.into_iter()) { + if let Some(existing_gradient) = backprop_buffer[input].take() { + backprop_buffer[input] = Some(existing_gradient + epsilon); + } else { + backprop_buffer[input] = Some(epsilon); + } + } + } + } +} + +#[cfg(test)] +mod test { + use crate::{ + derivable::{activation::LeakyRelu, loss::Euclidean, regularize::NeuraL0}, + layer::dense::NeuraDenseLayer, + utils::uniform_vector, + }; + + use super::*; + + #[test] + fn test_graph_backprop() { + let network = + neura_sequential![neura_layer!("dense", 4, f64), neura_layer!("dense", 2, f64),] + .construct(NeuraShape::Vector(10)) + .unwrap(); + + let graph = NeuraGraph::from_sequential(network.clone(), NeuraShape::Vector(10)); + + let trainer = NeuraGraphBackprop::new(Euclidean); + + let input = uniform_vector(10); + let target = uniform_vector(2); + + let expected = NeuraBackprop::new(Euclidean).get_gradient(&network, &input, &target); + let actual = trainer.get_gradient(&graph, &input, &target); + + type Gradient = , NeuraL0> as NeuraLayerBase>::Gradient; + fn get_gradient(dynamic: &Box) -> &Gradient { + (**dynamic).into_any().downcast_ref::().unwrap() + } + + assert_eq!(get_gradient(&actual[1]), &expected.0); + assert_eq!(get_gradient(&actual[2]), &expected.1 .0); + } +} diff --git a/src/network/graph/mod.rs b/src/network/graph/mod.rs index f307285..f4a33ae 100644 --- a/src/network/graph/mod.rs +++ b/src/network/graph/mod.rs @@ -13,6 +13,9 @@ pub use partial::NeuraGraphPartial; mod from; pub use from::FromSequential; +mod backprop; +pub use backprop::NeuraGraphBackprop; + #[derive(Debug)] pub struct NeuraGraphNodeConstructed { node: Box>, @@ -47,19 +50,51 @@ pub struct NeuraGraph { } impl NeuraLayerBase for NeuraGraph { - type Gradient = (); + type Gradient = Vec>; fn output_shape(&self) -> NeuraShape { self.output_shape } fn default_gradient(&self) -> Self::Gradient { - unimplemented!("NeuraGraph cannot be used as a layer yet") + let mut res: Self::Gradient = Vec::with_capacity(self.buffer_size); + + res.push(Box::new(())); + + for node in self.nodes.iter() { + res.push(node.node.default_gradient()); + } + + res + } + + fn apply_gradient(&mut self, gradient: &Self::Gradient) { + for (node, gradient) in self.nodes.iter_mut().zip(gradient.iter()) { + node.node.apply_gradient(gradient); + } + } + + fn prepare_layer(&mut self, is_training: bool) { + for node in self.nodes.iter_mut() { + node.node.prepare(is_training); + } + } + + fn regularize_layer(&self) -> Self::Gradient { + let mut res: Self::Gradient = Vec::with_capacity(self.buffer_size); + + res.push(Box::new(())); + + for node in self.nodes.iter() { + res.push(node.node.get_regularization_gradient()); + } + + res } } impl NeuraGraph { - fn create_buffer(&self) -> Vec> { + fn create_buffer(&self) -> Vec> { let mut res = Vec::with_capacity(self.buffer_size); for _ in 0..self.buffer_size { @@ -92,77 +127,6 @@ impl NeuraGraph { buffer[node.output] = Some(result); } } - - fn backprop_in( - &self, - input: &Data, - loss: Loss, - target: &Target, - output_buffer: &mut Vec>, - backprop_buffer: &mut Vec>, - intermediary_buffer: &mut Vec>>, - gradient_buffer: &mut Vec>, - ) where - Data: Clone + std::ops::Add, - Loss: NeuraLoss, - { - assert!(output_buffer.len() >= self.nodes.len()); - assert!(backprop_buffer.len() >= self.nodes.len()); - assert!(intermediary_buffer.len() >= self.nodes.len()); - assert!(gradient_buffer.len() >= self.nodes.len()); - - output_buffer[0] = Some(input.clone()); - - // Forward pass - for node in self.nodes.iter() { - // PERF: re-use the allocation for `inputs`, and `.take()` the elements only needed once? - let inputs: Vec<_> = node - .inputs - .iter() - .map(|&i| { - output_buffer[i] - .clone() - .expect("Unreachable: output of previous layer was not set") - }) - .collect(); - let (result, intermediary) = node.node.eval_training(&inputs); - - output_buffer[node.output] = Some(result); - intermediary_buffer[node.output] = Some(intermediary); - } - - let loss = loss.nabla( - target, - output_buffer[self.output_index] - .as_ref() - .expect("Unreachable: output was not set"), - ); - backprop_buffer[self.output_index] = Some(loss); - - // Backward pass - for node in self.nodes.iter().rev() { - let Some(epsilon_in) = backprop_buffer[node.output].take() else { - continue - }; - - let epsilon_out = node - .node - .backprop(&intermediary_buffer[node.output], &epsilon_in); - let gradient = node - .node - .get_gradient(&intermediary_buffer[node.output], &epsilon_in); - - gradient_buffer[node.output].add_assign(&*gradient); - - for (&input, epsilon) in node.inputs.iter().zip(epsilon_out.into_iter()) { - if let Some(existing_gradient) = backprop_buffer[input].take() { - backprop_buffer[input] = Some(existing_gradient + epsilon); - } else { - backprop_buffer[input] = Some(epsilon); - } - } - } - } } impl NeuraLayer for NeuraGraph { diff --git a/src/network/graph/node.rs b/src/network/graph/node.rs index c200122..73ba289 100644 --- a/src/network/graph/node.rs +++ b/src/network/graph/node.rs @@ -24,13 +24,20 @@ pub trait NeuraGraphNodeEval: DynClone + Debug { fn eval_training(&self, inputs: &[Data]) -> (Data, Box); fn backprop(&self, intermediary: &dyn Any, epsilon_in: &Data) -> Vec; + + fn default_gradient(&self) -> Box; + fn get_gradient( &self, intermediary: &dyn Any, epsilon_in: &Data, ) -> Box; + fn get_regularization_gradient(&self) -> Box; + fn apply_gradient(&mut self, gradient: &dyn NeuraDynVectorSpace); + + fn prepare(&mut self, is_training: bool); } #[derive(Clone, Debug)] @@ -157,6 +164,18 @@ impl, Layer: NeuraLayer Box { + Box::new(self.layer.default_gradient()) + } + + fn prepare(&mut self, is_training: bool) { + self.layer.prepare_layer(is_training); + } + + fn get_regularization_gradient(&self) -> Box { + Box::new(self.layer.regularize_layer()) + } } impl, Layer: NeuraPartialLayer + Clone + Debug>