🔥 Implement backpropagation for NeuraGraph (untested)

main
Shad Amethyst 2 years ago
parent 251e4d02d2
commit 41711d4668

@ -1,4 +1,6 @@
mod matrix; mod matrix;
use std::any::Any;
pub use matrix::NeuraMatrix; pub use matrix::NeuraMatrix;
mod vector; mod vector;
@ -17,6 +19,39 @@ pub trait NeuraVectorSpace {
fn norm_squared(&self) -> f64; fn norm_squared(&self) -> f64;
} }
pub trait NeuraDynVectorSpace {
fn add_assign(&mut self, other: &dyn NeuraDynVectorSpace);
fn mul_assign(&mut self, by: f64);
fn norm_squared(&self) -> f64;
/// Trampoline for allowing NeuraDynVectorSpace to be cast back into a known type for add_assign
fn into_any(&self) -> &dyn Any;
}
impl<T: NeuraVectorSpace + 'static> NeuraDynVectorSpace for T {
fn add_assign(&mut self, other: &dyn NeuraDynVectorSpace) {
let Some(other) = other.into_any().downcast_ref::<Self>() else {
panic!("Incompatible operand: expected other to be equal to self");
};
<Self as NeuraVectorSpace>::add_assign(self, other);
}
fn mul_assign(&mut self, by: f64) {
<Self as NeuraVectorSpace>::mul_assign(self, by);
}
fn norm_squared(&self) -> f64 {
<Self as NeuraVectorSpace>::norm_squared(self)
}
fn into_any(&self) -> &dyn Any {
self
}
}
impl NeuraVectorSpace for () { impl NeuraVectorSpace for () {
#[inline(always)] #[inline(always)]
fn add_assign(&mut self, _other: &Self) { fn add_assign(&mut self, _other: &Self) {

@ -1,11 +1,11 @@
pub mod algebra; pub mod algebra;
pub mod derivable; pub mod derivable;
pub mod err;
pub mod gradient_solver; pub mod gradient_solver;
pub mod layer; pub mod layer;
pub mod network; pub mod network;
pub mod train; pub mod train;
pub mod err;
mod utils; mod utils;
// TODO: move to a different file // TODO: move to a different file

@ -2,60 +2,63 @@ use crate::network::residual::{NeuraAxisDefault, NeuraSplitInputs};
use super::*; use super::*;
trait FromSequential<Seq, Data> { pub trait FromSequential<Seq, Data> {
fn from_sequential( fn from_sequential_rec(
seq: &Seq, seq: &Seq,
nodes: Vec<NeuraGraphNodeConstructed<Data>>, nodes: Vec<NeuraGraphNodeConstructed<Data>>,
output_shape: NeuraShape, input_shape: NeuraShape,
) -> Self; ) -> Self;
} }
impl<Data> FromSequential<(), Data> for NeuraGraph<Data> { impl<Data> FromSequential<(), Data> for NeuraGraph<Data> {
fn from_sequential( fn from_sequential_rec(
_seq: &(), _seq: &(),
nodes: Vec<NeuraGraphNodeConstructed<Data>>, nodes: Vec<NeuraGraphNodeConstructed<Data>>,
output_shape: NeuraShape, input_shape: NeuraShape,
) -> Self { ) -> Self {
Self { Self {
output_index: nodes.len(), output_index: nodes.len(),
buffer_size: nodes.len() + 1, buffer_size: nodes.len() + 1,
nodes: nodes, nodes: nodes,
output_shape, output_shape: input_shape,
} }
} }
} }
impl< impl<Data: Clone + 'static, Layer: NeuraTrainableLayerFull<Data, Output = Data>, ChildNetwork>
Data: Clone, FromSequential<NeuraSequential<Layer, ChildNetwork>, Data> for NeuraGraph<Data>
Layer: NeuraLayer<Data, Output = Data> + Clone + std::fmt::Debug + 'static,
ChildNetwork,
> FromSequential<NeuraSequential<Layer, ChildNetwork>, Data> for NeuraGraph<Data>
where where
NeuraGraph<Data>: FromSequential<ChildNetwork, Data>, NeuraGraph<Data>: FromSequential<ChildNetwork, Data>,
NeuraAxisDefault: NeuraSplitInputs<Data, Combined = Data>, NeuraAxisDefault: NeuraSplitInputs<Data, Combined = Data>,
Layer::IntermediaryRepr: 'static,
{ {
fn from_sequential( fn from_sequential_rec(
seq: &NeuraSequential<Layer, ChildNetwork>, seq: &NeuraSequential<Layer, ChildNetwork>,
mut nodes: Vec<NeuraGraphNodeConstructed<Data>>, mut nodes: Vec<NeuraGraphNodeConstructed<Data>>,
output_shape: NeuraShape, input_shape: NeuraShape,
) -> Self { ) -> Self {
nodes.push(NeuraGraphNodeConstructed { nodes.push(NeuraGraphNodeConstructed {
node: Box::new(NeuraGraphNode::from(seq.layer.clone())), node: Box::new(NeuraGraphNode::from_layer(
seq.layer.clone(),
vec![input_shape],
)),
inputs: vec![nodes.len()], inputs: vec![nodes.len()],
output: nodes.len() + 1, output: nodes.len() + 1,
}); });
Self::from_sequential(&seq.child_network, nodes, output_shape) Self::from_sequential_rec(&seq.child_network, nodes, seq.layer.output_shape())
} }
} }
impl<Data, Layer, ChildNetwork> From<NeuraSequential<Layer, ChildNetwork>> for NeuraGraph<Data> impl<Data> NeuraGraph<Data> {
where pub fn from_sequential<Layer, ChildNetwork>(
network: NeuraSequential<Layer, ChildNetwork>,
input_shape: NeuraShape,
) -> Self
where
NeuraGraph<Data>: FromSequential<NeuraSequential<Layer, ChildNetwork>, Data>, NeuraGraph<Data>: FromSequential<NeuraSequential<Layer, ChildNetwork>, Data>,
NeuraSequential<Layer, ChildNetwork>: NeuraShapedLayer, NeuraSequential<Layer, ChildNetwork>: NeuraShapedLayer,
{ {
fn from(network: NeuraSequential<Layer, ChildNetwork>) -> Self { Self::from_sequential_rec(&network, vec![], input_shape)
let output_shape = network.output_shape();
Self::from_sequential(&network, vec![], output_shape)
} }
} }

@ -1,4 +1,11 @@
use crate::{layer::NeuraShapedLayer, prelude::*}; use std::any::Any;
use crate::{
algebra::NeuraDynVectorSpace,
derivable::NeuraLoss,
layer::{NeuraShapedLayer, NeuraTrainableLayerEval},
prelude::*,
};
mod node; mod node;
pub use node::*; pub use node::*;
@ -7,9 +14,36 @@ mod partial;
pub use partial::NeuraGraphPartial; pub use partial::NeuraGraphPartial;
mod from; mod from;
pub use from::FromSequential;
pub trait NeuraTrainableLayerFull<Input>:
NeuraTrainableLayerEval<Input>
+ NeuraTrainableLayerBackprop<Input>
+ NeuraTrainableLayerSelf<Input>
+ NeuraShapedLayer
+ Clone
+ std::fmt::Debug
+ 'static
where
Self::IntermediaryRepr: 'static,
{
}
impl<Input, T> NeuraTrainableLayerFull<Input> for T
where
T: NeuraTrainableLayerEval<Input>
+ NeuraTrainableLayerBackprop<Input>
+ NeuraTrainableLayerSelf<Input>
+ NeuraShapedLayer
+ Clone
+ std::fmt::Debug
+ 'static,
T::IntermediaryRepr: 'static,
{
}
#[derive(Debug)] #[derive(Debug)]
struct NeuraGraphNodeConstructed<Data> { pub struct NeuraGraphNodeConstructed<Data> {
node: Box<dyn NeuraGraphNodeEval<Data>>, node: Box<dyn NeuraGraphNodeEval<Data>>,
inputs: Vec<usize>, inputs: Vec<usize>,
output: usize, output: usize,
@ -48,10 +82,12 @@ impl<Data> NeuraGraph<Data> {
res res
} }
fn eval_in(&self, input: &Data, buffer: &mut Vec<Option<Data>>) fn eval_in(&self, input: &Data, buffer: &mut [Option<Data>])
where where
Data: Clone, Data: Clone,
{ {
assert!(buffer.len() >= self.nodes.len());
buffer[0] = Some(input.clone()); buffer[0] = Some(input.clone());
for node in self.nodes.iter() { for node in self.nodes.iter() {
@ -69,6 +105,77 @@ impl<Data> NeuraGraph<Data> {
buffer[node.output] = Some(result); buffer[node.output] = Some(result);
} }
} }
fn backprop_in<Loss, Target>(
&self,
input: &Data,
loss: Loss,
target: &Target,
output_buffer: &mut Vec<Option<Data>>,
backprop_buffer: &mut Vec<Option<Data>>,
intermediary_buffer: &mut Vec<Option<Box<dyn Any>>>,
gradient_buffer: &mut Vec<Box<dyn NeuraDynVectorSpace>>,
) where
Data: Clone + std::ops::Add<Data, Output = Data>,
Loss: NeuraLoss<Data, Target = Target>,
{
assert!(output_buffer.len() >= self.nodes.len());
assert!(backprop_buffer.len() >= self.nodes.len());
assert!(intermediary_buffer.len() >= self.nodes.len());
assert!(gradient_buffer.len() >= self.nodes.len());
output_buffer[0] = Some(input.clone());
// Forward pass
for node in self.nodes.iter() {
// PERF: re-use the allocation for `inputs`, and `.take()` the elements only needed once?
let inputs: Vec<_> = node
.inputs
.iter()
.map(|&i| {
output_buffer[i]
.clone()
.expect("Unreachable: output of previous layer was not set")
})
.collect();
let (result, intermediary) = node.node.eval_training(&inputs);
output_buffer[node.output] = Some(result);
intermediary_buffer[node.output] = Some(intermediary);
}
let loss = loss.nabla(
target,
output_buffer[self.output_index]
.as_ref()
.expect("Unreachable: output was not set"),
);
backprop_buffer[self.output_index] = Some(loss);
// Backward pass
for node in self.nodes.iter().rev() {
let Some(epsilon_in) = backprop_buffer[node.output].take() else {
continue
};
let epsilon_out = node
.node
.backprop(&intermediary_buffer[node.output], &epsilon_in);
let gradient = node
.node
.get_gradient(&intermediary_buffer[node.output], &epsilon_in);
gradient_buffer[node.output].add_assign(&*gradient);
for (&input, epsilon) in node.inputs.iter().zip(epsilon_out.into_iter()) {
if let Some(existing_gradient) = backprop_buffer[input].take() {
backprop_buffer[input] = Some(existing_gradient + epsilon);
} else {
backprop_buffer[input] = Some(epsilon);
}
}
}
}
} }
impl<Data: Clone> NeuraLayer<Data> for NeuraGraph<Data> { impl<Data: Clone> NeuraLayer<Data> for NeuraGraph<Data> {
@ -213,7 +320,7 @@ mod test {
.construct(NeuraShape::Vector(3)) .construct(NeuraShape::Vector(3))
.unwrap(); .unwrap();
let graph = NeuraGraph::from(network.clone()); let graph = NeuraGraph::from_sequential(network.clone(), NeuraShape::Vector(3));
for _ in 0..10 { for _ in 0..10 {
let input = uniform_vector(3); let input = uniform_vector(3);

@ -1,13 +1,16 @@
use dyn_clone::DynClone; use dyn_clone::DynClone;
use std::fmt::Debug; use std::{any::Any, fmt::Debug};
use crate::{ use crate::{
algebra::NeuraDynVectorSpace,
err::NeuraAxisErr, err::NeuraAxisErr,
layer::{NeuraLayer, NeuraShapedLayer}, layer::{NeuraShapedLayer, NeuraTrainableLayerEval},
network::residual::{NeuraAxisDefault, NeuraCombineInputs, NeuraSplitInputs}, network::residual::{NeuraAxisDefault, NeuraCombineInputs, NeuraSplitInputs},
prelude::{NeuraPartialLayer, NeuraShape}, prelude::{NeuraPartialLayer, NeuraShape},
}; };
use super::*;
// TODO: split into two traits // TODO: split into two traits
pub trait NeuraGraphNodePartial<Data>: DynClone + Debug { pub trait NeuraGraphNodePartial<Data>: DynClone + Debug {
fn inputs<'a>(&'a self) -> &'a [String]; fn inputs<'a>(&'a self) -> &'a [String];
@ -20,7 +23,17 @@ pub trait NeuraGraphNodePartial<Data>: DynClone + Debug {
} }
pub trait NeuraGraphNodeEval<Data>: DynClone + Debug { pub trait NeuraGraphNodeEval<Data>: DynClone + Debug {
fn eval<'a>(&'a self, inputs: &[Data]) -> Data; fn eval(&self, inputs: &[Data]) -> Data;
fn eval_training(&self, inputs: &[Data]) -> (Data, Box<dyn Any>);
fn backprop(&self, intermediary: &dyn Any, epsilon_in: &Data) -> Vec<Data>;
fn get_gradient(
&self,
intermediary: &dyn Any,
epsilon_in: &Data,
) -> Box<dyn NeuraDynVectorSpace>;
fn apply_gradient(&mut self, gradient: &dyn NeuraDynVectorSpace);
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
@ -29,6 +42,21 @@ pub struct NeuraGraphNode<Axis, Layer> {
axis: Axis, axis: Axis,
layer: Layer, layer: Layer,
name: String, name: String,
input_shapes: Option<Vec<NeuraShape>>,
}
impl<Layer> NeuraGraphNode<NeuraAxisDefault, Layer> {
pub(crate) fn from_layer(layer: Layer, input_shapes: Vec<NeuraShape>) -> Self {
Self {
inputs: vec![],
axis: NeuraAxisDefault,
layer,
name: random_name(),
input_shapes: Some(input_shapes),
}
}
} }
impl<Axis, Layer> NeuraGraphNode<Axis, Layer> { impl<Axis, Layer> NeuraGraphNode<Axis, Layer> {
@ -39,6 +67,8 @@ impl<Axis, Layer> NeuraGraphNode<Axis, Layer> {
axis, axis,
layer, layer,
name, name,
input_shapes: None,
} }
} }
@ -50,38 +80,101 @@ impl<Axis, Layer> NeuraGraphNode<Axis, Layer> {
+ Debug + Debug
+ 'static, + 'static,
Layer: NeuraPartialLayer + Clone + Debug + 'static, Layer: NeuraPartialLayer + Clone + Debug + 'static,
Layer::Constructed: NeuraShapedLayer Layer::Constructed:
+ NeuraLayer<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data> NeuraTrainableLayerFull<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data>,
+ Clone
+ Debug
+ 'static,
Layer::Err: Debug, Layer::Err: Debug,
<Layer::Constructed as NeuraTrainableLayerEval<
<Axis as NeuraCombineInputs<Data>>::Combined,
>>::IntermediaryRepr: 'static,
<Axis as NeuraCombineInputs<Data>>::Combined: 'static,
{ {
Box::new(self) Box::new(self)
} }
fn downcast_intermediary<'a, Data>(
&self,
intermediary: &'a dyn Any,
) -> &'a Intermediary<Axis::Combined, Layer>
where
Axis: NeuraCombineInputs<Data>,
Layer: NeuraTrainableLayerFull<Axis::Combined>,
Axis::Combined: 'static,
{
intermediary
.downcast_ref::<Intermediary<Axis::Combined, Layer>>()
.expect("Incompatible value passed to NeuraGraphNode::backprop")
}
}
struct Intermediary<Combined, Layer: NeuraTrainableLayerFull<Combined>>
where
Layer::IntermediaryRepr: 'static,
{
combined: Combined,
layer_intermediary: Layer::IntermediaryRepr,
} }
impl< impl<
Data: Clone, Data: Clone,
Axis: NeuraSplitInputs<Data> + Clone + Debug, Axis: NeuraSplitInputs<Data> + Clone + Debug,
Layer: NeuraLayer<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data> + Clone + Debug, Layer: NeuraTrainableLayerFull<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data>,
> NeuraGraphNodeEval<Data> for NeuraGraphNode<Axis, Layer> > NeuraGraphNodeEval<Data> for NeuraGraphNode<Axis, Layer>
where
Layer::IntermediaryRepr: 'static,
Axis::Combined: 'static,
{ {
fn eval<'a>(&'a self, inputs: &[Data]) -> Data { fn eval<'a>(&'a self, inputs: &[Data]) -> Data {
// TODO: use to_vec_in? // TODO: use to_vec_in?
let combined = self.axis.combine(inputs.to_vec()); let combined = self.axis.combine(inputs.to_vec());
self.layer.eval(&combined) self.layer.eval(&combined)
} }
}
impl<Layer: Clone + Debug> From<Layer> for NeuraGraphNode<NeuraAxisDefault, Layer> { fn eval_training<'a>(&self, inputs: &[Data]) -> (Data, Box<dyn Any>) {
fn from(layer: Layer) -> Self { let combined = self.axis.combine(inputs.to_vec());
Self { let (result, layer_intermediary) = self.layer.eval_training(&combined);
inputs: vec![],
axis: NeuraAxisDefault, let intermediary: Intermediary<Axis::Combined, Layer> = Intermediary {
layer, combined,
name: random_name(), layer_intermediary,
};
(result, Box::new(intermediary))
} }
fn backprop(&self, intermediary: &dyn Any, epsilon_in: &Data) -> Vec<Data> {
let intermediary = self.downcast_intermediary(intermediary);
let epsilon_out = self.layer.backprop_layer(
&intermediary.combined,
&intermediary.layer_intermediary,
epsilon_in,
);
self.axis
.split(&epsilon_out, self.input_shapes.as_ref().unwrap())
}
fn get_gradient(
&self,
intermediary: &dyn Any,
epsilon_in: &Data,
) -> Box<dyn NeuraDynVectorSpace> {
let intermediary = self.downcast_intermediary(intermediary);
Box::new(self.layer.get_gradient(
&intermediary.combined,
&intermediary.layer_intermediary,
epsilon_in,
))
}
fn apply_gradient(&mut self, gradient: &dyn NeuraDynVectorSpace) {
self.layer.apply_gradient(
gradient
.into_any()
.downcast_ref::<Layer::Gradient>()
.expect("Invalid gradient type passed to NeuraGraphNode::apply_gradient"),
);
} }
} }
@ -95,12 +188,10 @@ impl<
Layer: NeuraPartialLayer + Clone + Debug, Layer: NeuraPartialLayer + Clone + Debug,
> NeuraGraphNodePartial<Data> for NeuraGraphNode<Axis, Layer> > NeuraGraphNodePartial<Data> for NeuraGraphNode<Axis, Layer>
where where
Layer::Constructed: NeuraShapedLayer Layer::Constructed: NeuraTrainableLayerFull<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data>,
+ NeuraLayer<<Axis as NeuraCombineInputs<Data>>::Combined, Output = Data>
+ Clone
+ Debug
+ 'static,
Layer::Err: Debug, Layer::Err: Debug,
<Layer::Constructed as NeuraTrainableLayerEval<<Axis as NeuraCombineInputs<Data>>::Combined>>::IntermediaryRepr: 'static,
<Axis as NeuraCombineInputs<Data>>::Combined: 'static,
{ {
fn inputs<'a>(&'a self) -> &'a [String] { fn inputs<'a>(&'a self) -> &'a [String] {
&self.inputs &self.inputs
@ -116,7 +207,7 @@ where
) -> Result<(Box<dyn NeuraGraphNodeEval<Data>>, NeuraShape), String> { ) -> Result<(Box<dyn NeuraGraphNodeEval<Data>>, NeuraShape), String> {
let combined = self let combined = self
.axis .axis
.combine(input_shapes) .combine(input_shapes.clone())
.map_err(|err| format!("{:?}", err))?; .map_err(|err| format!("{:?}", err))?;
let constructed_layer = self let constructed_layer = self
@ -132,6 +223,7 @@ where
axis: self.axis.clone(), axis: self.axis.clone(),
layer: constructed_layer, layer: constructed_layer,
name: self.name.clone(), name: self.name.clone(),
input_shapes: Some(input_shapes)
}), }),
output_shape, output_shape,
)) ))

Loading…
Cancel
Save