diff --git a/src/algebra/mod.rs b/src/algebra/mod.rs index e7557c2..90bdc08 100644 --- a/src/algebra/mod.rs +++ b/src/algebra/mod.rs @@ -1,4 +1,6 @@ mod matrix; +use std::any::Any; + pub use matrix::NeuraMatrix; mod vector; @@ -17,6 +19,39 @@ pub trait NeuraVectorSpace { fn norm_squared(&self) -> f64; } +pub trait NeuraDynVectorSpace { + fn add_assign(&mut self, other: &dyn NeuraDynVectorSpace); + + fn mul_assign(&mut self, by: f64); + + fn norm_squared(&self) -> f64; + + /// Trampoline for allowing NeuraDynVectorSpace to be cast back into a known type for add_assign + fn into_any(&self) -> &dyn Any; +} + +impl NeuraDynVectorSpace for T { + fn add_assign(&mut self, other: &dyn NeuraDynVectorSpace) { + let Some(other) = other.into_any().downcast_ref::() else { + panic!("Incompatible operand: expected other to be equal to self"); + }; + + ::add_assign(self, other); + } + + fn mul_assign(&mut self, by: f64) { + ::mul_assign(self, by); + } + + fn norm_squared(&self) -> f64 { + ::norm_squared(self) + } + + fn into_any(&self) -> &dyn Any { + self + } +} + impl NeuraVectorSpace for () { #[inline(always)] fn add_assign(&mut self, _other: &Self) { diff --git a/src/lib.rs b/src/lib.rs index 0f1e6d3..270bd68 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,11 @@ pub mod algebra; pub mod derivable; +pub mod err; pub mod gradient_solver; pub mod layer; pub mod network; pub mod train; -pub mod err; mod utils; // TODO: move to a different file diff --git a/src/network/graph/from.rs b/src/network/graph/from.rs index 166ef57..3336bae 100644 --- a/src/network/graph/from.rs +++ b/src/network/graph/from.rs @@ -2,60 +2,63 @@ use crate::network::residual::{NeuraAxisDefault, NeuraSplitInputs}; use super::*; -trait FromSequential { - fn from_sequential( +pub trait FromSequential { + fn from_sequential_rec( seq: &Seq, nodes: Vec>, - output_shape: NeuraShape, + input_shape: NeuraShape, ) -> Self; } impl FromSequential<(), Data> for NeuraGraph { - fn from_sequential( + fn from_sequential_rec( _seq: &(), nodes: Vec>, - output_shape: NeuraShape, + input_shape: NeuraShape, ) -> Self { Self { output_index: nodes.len(), buffer_size: nodes.len() + 1, nodes: nodes, - output_shape, + output_shape: input_shape, } } } -impl< - Data: Clone, - Layer: NeuraLayer + Clone + std::fmt::Debug + 'static, - ChildNetwork, - > FromSequential, Data> for NeuraGraph +impl, ChildNetwork> + FromSequential, Data> for NeuraGraph where NeuraGraph: FromSequential, NeuraAxisDefault: NeuraSplitInputs, + Layer::IntermediaryRepr: 'static, { - fn from_sequential( + fn from_sequential_rec( seq: &NeuraSequential, mut nodes: Vec>, - output_shape: NeuraShape, + input_shape: NeuraShape, ) -> Self { nodes.push(NeuraGraphNodeConstructed { - node: Box::new(NeuraGraphNode::from(seq.layer.clone())), + node: Box::new(NeuraGraphNode::from_layer( + seq.layer.clone(), + vec![input_shape], + )), inputs: vec![nodes.len()], output: nodes.len() + 1, }); - Self::from_sequential(&seq.child_network, nodes, output_shape) + Self::from_sequential_rec(&seq.child_network, nodes, seq.layer.output_shape()) } } -impl From> for NeuraGraph -where - NeuraGraph: FromSequential, Data>, - NeuraSequential: NeuraShapedLayer, -{ - fn from(network: NeuraSequential) -> Self { - let output_shape = network.output_shape(); - Self::from_sequential(&network, vec![], output_shape) +impl NeuraGraph { + pub fn from_sequential( + network: NeuraSequential, + input_shape: NeuraShape, + ) -> Self + where + NeuraGraph: FromSequential, Data>, + NeuraSequential: NeuraShapedLayer, + { + Self::from_sequential_rec(&network, vec![], input_shape) } } diff --git a/src/network/graph/mod.rs b/src/network/graph/mod.rs index 56dfbdd..1156e2e 100644 --- a/src/network/graph/mod.rs +++ b/src/network/graph/mod.rs @@ -1,4 +1,11 @@ -use crate::{layer::NeuraShapedLayer, prelude::*}; +use std::any::Any; + +use crate::{ + algebra::NeuraDynVectorSpace, + derivable::NeuraLoss, + layer::{NeuraShapedLayer, NeuraTrainableLayerEval}, + prelude::*, +}; mod node; pub use node::*; @@ -7,9 +14,36 @@ mod partial; pub use partial::NeuraGraphPartial; mod from; +pub use from::FromSequential; + +pub trait NeuraTrainableLayerFull: + NeuraTrainableLayerEval + + NeuraTrainableLayerBackprop + + NeuraTrainableLayerSelf + + NeuraShapedLayer + + Clone + + std::fmt::Debug + + 'static +where + Self::IntermediaryRepr: 'static, +{ +} + +impl NeuraTrainableLayerFull for T +where + T: NeuraTrainableLayerEval + + NeuraTrainableLayerBackprop + + NeuraTrainableLayerSelf + + NeuraShapedLayer + + Clone + + std::fmt::Debug + + 'static, + T::IntermediaryRepr: 'static, +{ +} #[derive(Debug)] -struct NeuraGraphNodeConstructed { +pub struct NeuraGraphNodeConstructed { node: Box>, inputs: Vec, output: usize, @@ -48,10 +82,12 @@ impl NeuraGraph { res } - fn eval_in(&self, input: &Data, buffer: &mut Vec>) + fn eval_in(&self, input: &Data, buffer: &mut [Option]) where Data: Clone, { + assert!(buffer.len() >= self.nodes.len()); + buffer[0] = Some(input.clone()); for node in self.nodes.iter() { @@ -69,6 +105,77 @@ impl NeuraGraph { buffer[node.output] = Some(result); } } + + fn backprop_in( + &self, + input: &Data, + loss: Loss, + target: &Target, + output_buffer: &mut Vec>, + backprop_buffer: &mut Vec>, + intermediary_buffer: &mut Vec>>, + gradient_buffer: &mut Vec>, + ) where + Data: Clone + std::ops::Add, + Loss: NeuraLoss, + { + assert!(output_buffer.len() >= self.nodes.len()); + assert!(backprop_buffer.len() >= self.nodes.len()); + assert!(intermediary_buffer.len() >= self.nodes.len()); + assert!(gradient_buffer.len() >= self.nodes.len()); + + output_buffer[0] = Some(input.clone()); + + // Forward pass + for node in self.nodes.iter() { + // PERF: re-use the allocation for `inputs`, and `.take()` the elements only needed once? + let inputs: Vec<_> = node + .inputs + .iter() + .map(|&i| { + output_buffer[i] + .clone() + .expect("Unreachable: output of previous layer was not set") + }) + .collect(); + let (result, intermediary) = node.node.eval_training(&inputs); + + output_buffer[node.output] = Some(result); + intermediary_buffer[node.output] = Some(intermediary); + } + + let loss = loss.nabla( + target, + output_buffer[self.output_index] + .as_ref() + .expect("Unreachable: output was not set"), + ); + backprop_buffer[self.output_index] = Some(loss); + + // Backward pass + for node in self.nodes.iter().rev() { + let Some(epsilon_in) = backprop_buffer[node.output].take() else { + continue + }; + + let epsilon_out = node + .node + .backprop(&intermediary_buffer[node.output], &epsilon_in); + let gradient = node + .node + .get_gradient(&intermediary_buffer[node.output], &epsilon_in); + + gradient_buffer[node.output].add_assign(&*gradient); + + for (&input, epsilon) in node.inputs.iter().zip(epsilon_out.into_iter()) { + if let Some(existing_gradient) = backprop_buffer[input].take() { + backprop_buffer[input] = Some(existing_gradient + epsilon); + } else { + backprop_buffer[input] = Some(epsilon); + } + } + } + } } impl NeuraLayer for NeuraGraph { @@ -213,7 +320,7 @@ mod test { .construct(NeuraShape::Vector(3)) .unwrap(); - let graph = NeuraGraph::from(network.clone()); + let graph = NeuraGraph::from_sequential(network.clone(), NeuraShape::Vector(3)); for _ in 0..10 { let input = uniform_vector(3); diff --git a/src/network/graph/node.rs b/src/network/graph/node.rs index 0635739..9d368f2 100644 --- a/src/network/graph/node.rs +++ b/src/network/graph/node.rs @@ -1,13 +1,16 @@ use dyn_clone::DynClone; -use std::fmt::Debug; +use std::{any::Any, fmt::Debug}; use crate::{ + algebra::NeuraDynVectorSpace, err::NeuraAxisErr, - layer::{NeuraLayer, NeuraShapedLayer}, + layer::{NeuraShapedLayer, NeuraTrainableLayerEval}, network::residual::{NeuraAxisDefault, NeuraCombineInputs, NeuraSplitInputs}, prelude::{NeuraPartialLayer, NeuraShape}, }; +use super::*; + // TODO: split into two traits pub trait NeuraGraphNodePartial: DynClone + Debug { fn inputs<'a>(&'a self) -> &'a [String]; @@ -20,7 +23,17 @@ pub trait NeuraGraphNodePartial: DynClone + Debug { } pub trait NeuraGraphNodeEval: DynClone + Debug { - fn eval<'a>(&'a self, inputs: &[Data]) -> Data; + fn eval(&self, inputs: &[Data]) -> Data; + + fn eval_training(&self, inputs: &[Data]) -> (Data, Box); + fn backprop(&self, intermediary: &dyn Any, epsilon_in: &Data) -> Vec; + fn get_gradient( + &self, + intermediary: &dyn Any, + epsilon_in: &Data, + ) -> Box; + + fn apply_gradient(&mut self, gradient: &dyn NeuraDynVectorSpace); } #[derive(Clone, Debug)] @@ -29,6 +42,21 @@ pub struct NeuraGraphNode { axis: Axis, layer: Layer, name: String, + + input_shapes: Option>, +} + +impl NeuraGraphNode { + pub(crate) fn from_layer(layer: Layer, input_shapes: Vec) -> Self { + Self { + inputs: vec![], + axis: NeuraAxisDefault, + layer, + name: random_name(), + + input_shapes: Some(input_shapes), + } + } } impl NeuraGraphNode { @@ -39,6 +67,8 @@ impl NeuraGraphNode { axis, layer, name, + + input_shapes: None, } } @@ -50,38 +80,101 @@ impl NeuraGraphNode { + Debug + 'static, Layer: NeuraPartialLayer + Clone + Debug + 'static, - Layer::Constructed: NeuraShapedLayer - + NeuraLayer<>::Combined, Output = Data> - + Clone - + Debug - + 'static, + Layer::Constructed: + NeuraTrainableLayerFull<>::Combined, Output = Data>, Layer::Err: Debug, + >::Combined, + >>::IntermediaryRepr: 'static, + >::Combined: 'static, { Box::new(self) } + + fn downcast_intermediary<'a, Data>( + &self, + intermediary: &'a dyn Any, + ) -> &'a Intermediary + where + Axis: NeuraCombineInputs, + Layer: NeuraTrainableLayerFull, + Axis::Combined: 'static, + { + intermediary + .downcast_ref::>() + .expect("Incompatible value passed to NeuraGraphNode::backprop") + } +} + +struct Intermediary> +where + Layer::IntermediaryRepr: 'static, +{ + combined: Combined, + layer_intermediary: Layer::IntermediaryRepr, } impl< Data: Clone, Axis: NeuraSplitInputs + Clone + Debug, - Layer: NeuraLayer<>::Combined, Output = Data> + Clone + Debug, + Layer: NeuraTrainableLayerFull<>::Combined, Output = Data>, > NeuraGraphNodeEval for NeuraGraphNode +where + Layer::IntermediaryRepr: 'static, + Axis::Combined: 'static, { fn eval<'a>(&'a self, inputs: &[Data]) -> Data { // TODO: use to_vec_in? let combined = self.axis.combine(inputs.to_vec()); self.layer.eval(&combined) } -} -impl From for NeuraGraphNode { - fn from(layer: Layer) -> Self { - Self { - inputs: vec![], - axis: NeuraAxisDefault, - layer, - name: random_name(), - } + fn eval_training<'a>(&self, inputs: &[Data]) -> (Data, Box) { + let combined = self.axis.combine(inputs.to_vec()); + let (result, layer_intermediary) = self.layer.eval_training(&combined); + + let intermediary: Intermediary = Intermediary { + combined, + layer_intermediary, + }; + + (result, Box::new(intermediary)) + } + + fn backprop(&self, intermediary: &dyn Any, epsilon_in: &Data) -> Vec { + let intermediary = self.downcast_intermediary(intermediary); + + let epsilon_out = self.layer.backprop_layer( + &intermediary.combined, + &intermediary.layer_intermediary, + epsilon_in, + ); + + self.axis + .split(&epsilon_out, self.input_shapes.as_ref().unwrap()) + } + + fn get_gradient( + &self, + intermediary: &dyn Any, + epsilon_in: &Data, + ) -> Box { + let intermediary = self.downcast_intermediary(intermediary); + + Box::new(self.layer.get_gradient( + &intermediary.combined, + &intermediary.layer_intermediary, + epsilon_in, + )) + } + + fn apply_gradient(&mut self, gradient: &dyn NeuraDynVectorSpace) { + self.layer.apply_gradient( + gradient + .into_any() + .downcast_ref::() + .expect("Invalid gradient type passed to NeuraGraphNode::apply_gradient"), + ); } } @@ -95,12 +188,10 @@ impl< Layer: NeuraPartialLayer + Clone + Debug, > NeuraGraphNodePartial for NeuraGraphNode where - Layer::Constructed: NeuraShapedLayer - + NeuraLayer<>::Combined, Output = Data> - + Clone - + Debug - + 'static, + Layer::Constructed: NeuraTrainableLayerFull<>::Combined, Output = Data>, Layer::Err: Debug, + >::Combined>>::IntermediaryRepr: 'static, + >::Combined: 'static, { fn inputs<'a>(&'a self) -> &'a [String] { &self.inputs @@ -116,7 +207,7 @@ where ) -> Result<(Box>, NeuraShape), String> { let combined = self .axis - .combine(input_shapes) + .combine(input_shapes.clone()) .map_err(|err| format!("{:?}", err))?; let constructed_layer = self @@ -132,6 +223,7 @@ where axis: self.axis.clone(), layer: constructed_layer, name: self.name.clone(), + input_shapes: Some(input_shapes) }), output_shape, ))