Implement NeuraGraphBackprop

main
Shad Amethyst 2 years ago
parent 99d0cb4408
commit 93fa7e238a

@ -14,12 +14,10 @@ pub trait NeuraVectorSpace {
fn mul_assign(&mut self, by: f64);
// fn zero() -> Self;
fn norm_squared(&self) -> f64;
}
pub trait NeuraDynVectorSpace {
pub trait NeuraDynVectorSpace: Send {
fn add_assign(&mut self, other: &dyn NeuraDynVectorSpace);
fn mul_assign(&mut self, by: f64);
@ -30,9 +28,9 @@ pub trait NeuraDynVectorSpace {
fn into_any(&self) -> &dyn Any;
}
impl<T: NeuraVectorSpace + 'static> NeuraDynVectorSpace for T {
impl<T: NeuraVectorSpace + Send + 'static> NeuraDynVectorSpace for T {
fn add_assign(&mut self, other: &dyn NeuraDynVectorSpace) {
let Some(other) = other.into_any().downcast_ref::<Self>() else {
let Some(other) = other.into_any().downcast_ref::<T>() else {
panic!("Incompatible operand: expected other to be equal to self");
};
@ -63,17 +61,12 @@ impl NeuraVectorSpace for () {
// Noop
}
// #[inline(always)]
// fn zero() -> Self {
// ()
// }
fn norm_squared(&self) -> f64 {
0.0
}
}
impl<T: NeuraVectorSpace> NeuraVectorSpace for Box<T> {
impl<T: NeuraVectorSpace + ?Sized> NeuraVectorSpace for Box<T> {
fn add_assign(&mut self, other: &Self) {
self.as_mut().add_assign(other.as_ref());
}
@ -82,15 +75,25 @@ impl<T: NeuraVectorSpace> NeuraVectorSpace for Box<T> {
self.as_mut().mul_assign(by);
}
// fn zero() -> Self {
// Box::new(T::zero())
// }
fn norm_squared(&self) -> f64 {
self.as_ref().norm_squared()
}
}
impl NeuraVectorSpace for dyn NeuraDynVectorSpace {
fn add_assign(&mut self, other: &Self) {
<dyn NeuraDynVectorSpace>::add_assign(self, &*other)
}
fn mul_assign(&mut self, by: f64) {
<dyn NeuraDynVectorSpace>::mul_assign(self, by)
}
fn norm_squared(&self) -> f64 {
<dyn NeuraDynVectorSpace>::norm_squared(self)
}
}
impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left, Right) {
fn add_assign(&mut self, other: &Self) {
NeuraVectorSpace::add_assign(&mut self.0, &other.0);
@ -124,21 +127,34 @@ impl<const N: usize, T: NeuraVectorSpace + Clone> NeuraVectorSpace for [T; N] {
}
}
// fn zero() -> Self {
// let mut res: Vec<T> = Vec::with_capacity(N);
fn norm_squared(&self) -> f64 {
self.iter().map(T::norm_squared).sum()
}
}
// for _ in 0..N {
// res.push(T::zero());
// }
impl<T: NeuraVectorSpace> NeuraVectorSpace for Vec<T> {
fn add_assign(&mut self, other: &Self) {
assert_eq!(self.len(), other.len());
// res.try_into().unwrap_or_else(|_| {
// // TODO: check that this panic is optimized away
// unreachable!()
// })
// }
for (self_item, other_item) in self.iter_mut().zip(other.iter()) {
self_item.add_assign(other_item);
}
}
fn mul_assign(&mut self, by: f64) {
for item in self.iter_mut() {
item.mul_assign(by);
}
}
fn norm_squared(&self) -> f64 {
self.iter().map(T::norm_squared).sum()
let mut res = 0.0;
for item in self.iter() {
res += item.norm_squared();
}
res
}
}

@ -12,6 +12,10 @@ impl<Loss> NeuraBackprop<Loss> {
pub fn new(loss: Loss) -> Self {
Self { loss }
}
pub fn get(&self) -> &Loss {
&self.loss
}
}
impl<

@ -0,0 +1,169 @@
use crate::gradient_solver::NeuraGradientSolver;
use super::*;
#[derive(Debug, Clone)]
pub struct NeuraGraphBackprop<Loss> {
loss: Loss,
// TODO: store buffers for re-use, do not clone them
}
impl<Loss> NeuraGraphBackprop<Loss> {
pub fn new(loss: Loss) -> Self {
Self { loss }
}
}
impl<Loss: Clone> From<&NeuraBackprop<Loss>> for NeuraGraphBackprop<Loss> {
fn from(value: &NeuraBackprop<Loss>) -> Self {
Self {
loss: value.get().clone(),
}
}
}
impl<
Data: Clone + std::fmt::Debug + std::ops::Add<Data, Output = Data> + 'static,
Target,
Loss: NeuraLoss<Data, Target = Target>,
> NeuraGradientSolver<Data, Target, NeuraGraph<Data>> for NeuraGraphBackprop<Loss>
{
// TODO: make it a &mut method
fn get_gradient(
&self,
trainable: &NeuraGraph<Data>,
input: &Data,
target: &Target,
) -> <NeuraGraph<Data> as NeuraLayerBase>::Gradient {
let mut output_buffer = trainable.create_buffer();
let mut backprop_buffer = trainable.create_buffer();
let mut intermediary_buffer = trainable.create_buffer();
let mut gradient_buffer = trainable.default_gradient();
trainable.backprop_in(
input,
&self.loss,
target,
&mut output_buffer,
&mut backprop_buffer,
&mut intermediary_buffer,
&mut gradient_buffer,
);
gradient_buffer
}
#[allow(unused)]
fn score(&self, trainable: &NeuraGraph<Data>, input: &Data, target: &Target) -> f64 {
todo!()
}
}
impl<Data> NeuraGraph<Data> {
fn backprop_in<Loss, Target>(
&self,
input: &Data,
loss: &Loss,
target: &Target,
output_buffer: &mut Vec<Option<Data>>,
backprop_buffer: &mut Vec<Option<Data>>,
intermediary_buffer: &mut Vec<Option<Box<dyn Any>>>,
gradient_buffer: &mut Vec<Box<dyn NeuraDynVectorSpace>>,
) where
Data: Clone + std::ops::Add<Data, Output = Data>,
Loss: NeuraLoss<Data, Target = Target>,
{
assert!(output_buffer.len() >= self.nodes.len());
assert!(backprop_buffer.len() >= self.nodes.len());
assert!(intermediary_buffer.len() >= self.nodes.len());
assert!(gradient_buffer.len() >= self.nodes.len());
output_buffer[0] = Some(input.clone());
// Forward pass
for node in self.nodes.iter() {
// PERF: re-use the allocation for `inputs`, and `.take()` the elements only needed once?
let inputs: Vec<_> = node
.inputs
.iter()
.map(|&i| {
output_buffer[i]
.clone()
.expect("Unreachable: output of previous layer was not set")
})
.collect();
let (result, intermediary) = node.node.eval_training(&inputs);
output_buffer[node.output] = Some(result);
intermediary_buffer[node.output] = Some(intermediary);
}
let loss = loss.nabla(
target,
output_buffer[self.output_index]
.as_ref()
.expect("Unreachable: output was not set"),
);
backprop_buffer[self.output_index] = Some(loss);
// Backward pass
for node in self.nodes.iter().rev() {
let Some(epsilon_in) = backprop_buffer[node.output].take() else {
continue
};
// TODO: create more wrapper types to avoid this dereferencing mess
let intermediary = &**intermediary_buffer[node.output].as_ref().unwrap();
let epsilon_out = node.node.backprop(intermediary, &epsilon_in);
let gradient = node.node.get_gradient(intermediary, &epsilon_in);
(*gradient_buffer[node.output]).add_assign(&*gradient);
for (&input, epsilon) in node.inputs.iter().zip(epsilon_out.into_iter()) {
if let Some(existing_gradient) = backprop_buffer[input].take() {
backprop_buffer[input] = Some(existing_gradient + epsilon);
} else {
backprop_buffer[input] = Some(epsilon);
}
}
}
}
}
#[cfg(test)]
mod test {
use crate::{
derivable::{activation::LeakyRelu, loss::Euclidean, regularize::NeuraL0},
layer::dense::NeuraDenseLayer,
utils::uniform_vector,
};
use super::*;
#[test]
fn test_graph_backprop() {
let network =
neura_sequential![neura_layer!("dense", 4, f64), neura_layer!("dense", 2, f64),]
.construct(NeuraShape::Vector(10))
.unwrap();
let graph = NeuraGraph::from_sequential(network.clone(), NeuraShape::Vector(10));
let trainer = NeuraGraphBackprop::new(Euclidean);
let input = uniform_vector(10);
let target = uniform_vector(2);
let expected = NeuraBackprop::new(Euclidean).get_gradient(&network, &input, &target);
let actual = trainer.get_gradient(&graph, &input, &target);
type Gradient = <NeuraDenseLayer<f64, LeakyRelu<f64>, NeuraL0> as NeuraLayerBase>::Gradient;
fn get_gradient(dynamic: &Box<dyn NeuraDynVectorSpace>) -> &Gradient {
(**dynamic).into_any().downcast_ref::<Gradient>().unwrap()
}
assert_eq!(get_gradient(&actual[1]), &expected.0);
assert_eq!(get_gradient(&actual[2]), &expected.1 .0);
}
}

@ -13,6 +13,9 @@ pub use partial::NeuraGraphPartial;
mod from;
pub use from::FromSequential;
mod backprop;
pub use backprop::NeuraGraphBackprop;
#[derive(Debug)]
pub struct NeuraGraphNodeConstructed<Data> {
node: Box<dyn NeuraGraphNodeEval<Data>>,
@ -47,19 +50,51 @@ pub struct NeuraGraph<Data> {
}
impl<Data: Clone + std::fmt::Debug + 'static> NeuraLayerBase for NeuraGraph<Data> {
type Gradient = ();
type Gradient = Vec<Box<dyn NeuraDynVectorSpace>>;
fn output_shape(&self) -> NeuraShape {
self.output_shape
}
fn default_gradient(&self) -> Self::Gradient {
unimplemented!("NeuraGraph cannot be used as a layer yet")
let mut res: Self::Gradient = Vec::with_capacity(self.buffer_size);
res.push(Box::new(()));
for node in self.nodes.iter() {
res.push(node.node.default_gradient());
}
res
}
fn apply_gradient(&mut self, gradient: &Self::Gradient) {
for (node, gradient) in self.nodes.iter_mut().zip(gradient.iter()) {
node.node.apply_gradient(gradient);
}
}
fn prepare_layer(&mut self, is_training: bool) {
for node in self.nodes.iter_mut() {
node.node.prepare(is_training);
}
}
fn regularize_layer(&self) -> Self::Gradient {
let mut res: Self::Gradient = Vec::with_capacity(self.buffer_size);
res.push(Box::new(()));
for node in self.nodes.iter() {
res.push(node.node.get_regularization_gradient());
}
res
}
}
impl<Data> NeuraGraph<Data> {
fn create_buffer(&self) -> Vec<Option<Data>> {
fn create_buffer<T>(&self) -> Vec<Option<T>> {
let mut res = Vec::with_capacity(self.buffer_size);
for _ in 0..self.buffer_size {
@ -92,77 +127,6 @@ impl<Data> NeuraGraph<Data> {
buffer[node.output] = Some(result);
}
}
fn backprop_in<Loss, Target>(
&self,
input: &Data,
loss: Loss,
target: &Target,
output_buffer: &mut Vec<Option<Data>>,
backprop_buffer: &mut Vec<Option<Data>>,
intermediary_buffer: &mut Vec<Option<Box<dyn Any>>>,
gradient_buffer: &mut Vec<Box<dyn NeuraDynVectorSpace>>,
) where
Data: Clone + std::ops::Add<Data, Output = Data>,
Loss: NeuraLoss<Data, Target = Target>,
{
assert!(output_buffer.len() >= self.nodes.len());
assert!(backprop_buffer.len() >= self.nodes.len());
assert!(intermediary_buffer.len() >= self.nodes.len());
assert!(gradient_buffer.len() >= self.nodes.len());
output_buffer[0] = Some(input.clone());
// Forward pass
for node in self.nodes.iter() {
// PERF: re-use the allocation for `inputs`, and `.take()` the elements only needed once?
let inputs: Vec<_> = node
.inputs
.iter()
.map(|&i| {
output_buffer[i]
.clone()
.expect("Unreachable: output of previous layer was not set")
})
.collect();
let (result, intermediary) = node.node.eval_training(&inputs);
output_buffer[node.output] = Some(result);
intermediary_buffer[node.output] = Some(intermediary);
}
let loss = loss.nabla(
target,
output_buffer[self.output_index]
.as_ref()
.expect("Unreachable: output was not set"),
);
backprop_buffer[self.output_index] = Some(loss);
// Backward pass
for node in self.nodes.iter().rev() {
let Some(epsilon_in) = backprop_buffer[node.output].take() else {
continue
};
let epsilon_out = node
.node
.backprop(&intermediary_buffer[node.output], &epsilon_in);
let gradient = node
.node
.get_gradient(&intermediary_buffer[node.output], &epsilon_in);
gradient_buffer[node.output].add_assign(&*gradient);
for (&input, epsilon) in node.inputs.iter().zip(epsilon_out.into_iter()) {
if let Some(existing_gradient) = backprop_buffer[input].take() {
backprop_buffer[input] = Some(existing_gradient + epsilon);
} else {
backprop_buffer[input] = Some(epsilon);
}
}
}
}
}
impl<Data: Clone + std::fmt::Debug + 'static> NeuraLayer<Data> for NeuraGraph<Data> {

@ -24,13 +24,20 @@ pub trait NeuraGraphNodeEval<Data>: DynClone + Debug {
fn eval_training(&self, inputs: &[Data]) -> (Data, Box<dyn Any>);
fn backprop(&self, intermediary: &dyn Any, epsilon_in: &Data) -> Vec<Data>;
fn default_gradient(&self) -> Box<dyn NeuraDynVectorSpace>;
fn get_gradient(
&self,
intermediary: &dyn Any,
epsilon_in: &Data,
) -> Box<dyn NeuraDynVectorSpace>;
fn get_regularization_gradient(&self) -> Box<dyn NeuraDynVectorSpace>;
fn apply_gradient(&mut self, gradient: &dyn NeuraDynVectorSpace);
fn prepare(&mut self, is_training: bool);
}
#[derive(Clone, Debug)]
@ -157,6 +164,18 @@ impl<Data: Clone, Axis: NeuraAxis<Data>, Layer: NeuraLayer<Axis::Combined, Outpu
.expect("Invalid gradient type passed to NeuraGraphNode::apply_gradient"),
);
}
fn default_gradient(&self) -> Box<dyn NeuraDynVectorSpace> {
Box::new(self.layer.default_gradient())
}
fn prepare(&mut self, is_training: bool) {
self.layer.prepare_layer(is_training);
}
fn get_regularization_gradient(&self) -> Box<dyn NeuraDynVectorSpace> {
Box::new(self.layer.regularize_layer())
}
}
impl<Data: Clone, Axis: NeuraAxis<Data>, Layer: NeuraPartialLayer + Clone + Debug>

Loading…
Cancel
Save