Allow using NeuraResidual with NeuraBackprop

main
Shad Amethyst 2 years ago
parent 520fbcf317
commit 872cb3a6ce

@ -25,6 +25,11 @@ impl<F: Float + std::fmt::Debug + 'static> NeuraLoss<DVector<F>> for Euclidean {
#[inline] #[inline]
fn nabla(&self, target: &DVector<F>, actual: &DVector<F>) -> DVector<F> { fn nabla(&self, target: &DVector<F>, actual: &DVector<F>) -> DVector<F> {
let mut res = DVector::zeros(target.len()); let mut res = DVector::zeros(target.len());
assert_eq!(
target.shape(),
actual.shape(),
"target value differs in shape with network output"
);
// ∂E(y)/∂yᵢ = yᵢ - yᵢ' // ∂E(y)/∂yᵢ = yᵢ - yᵢ'
for i in 0..target.len() { for i in 0..target.len() {

@ -95,6 +95,10 @@ impl<F: Float + std::fmt::Debug + 'static, Act: NeuraDerivable<F>, Reg: NeuraDer
phantom: PhantomData, phantom: PhantomData,
} }
} }
pub fn input_len(&self) -> usize {
self.weights.shape().1
}
} }
impl<F, Act, Reg, R: Rng> NeuraDenseLayerPartial<F, Act, Reg, R> { impl<F, Act, Reg, R: Rng> NeuraDenseLayerPartial<F, Act, Reg, R> {

@ -1,6 +1,6 @@
use std::{borrow::Borrow, rc::Rc}; use std::borrow::Borrow;
use nalgebra::{Const, DVector, Dyn, VecStorage}; use nalgebra::{Const, DVector, Dyn, Scalar, VecStorage};
use crate::prelude::NeuraShape; use crate::prelude::NeuraShape;
@ -20,7 +20,7 @@ pub trait NeuraCombineInputs<T> {
} }
pub trait NeuraSplitInputs<T>: NeuraCombineInputs<T> { pub trait NeuraSplitInputs<T>: NeuraCombineInputs<T> {
fn split(&self, combined: Self::Combined, input_shapes: &[NeuraShape]) -> Vec<Rc<T>>; fn split(&self, combined: &Self::Combined, input_shapes: &[NeuraShape]) -> Vec<T>;
} }
impl<F: Clone> NeuraCombineInputs<DVector<F>> for NeuraAxisAppend { impl<F: Clone> NeuraCombineInputs<DVector<F>> for NeuraAxisAppend {
@ -62,3 +62,27 @@ impl NeuraCombineInputs<NeuraShape> for NeuraAxisAppend {
} }
} }
} }
impl<F: Clone + Scalar + Default> NeuraSplitInputs<DVector<F>> for NeuraAxisAppend {
fn split(&self, combined: &Self::Combined, input_shapes: &[NeuraShape]) -> Vec<DVector<F>> {
let mut result = Vec::with_capacity(input_shapes.len());
let mut offset = 0;
for &input_shape in input_shapes.iter() {
let NeuraShape::Vector(input_shape) = input_shape else {
panic!("Expected {:?} to be NeuraShape::Vector", input_shape);
};
let mut subvector = DVector::from_element(input_shape, F::default());
for i in 0..input_shape {
subvector[i] = combined[offset + i].clone();
}
result.push(subvector);
offset += input_shape;
}
result
}
}

@ -6,7 +6,9 @@ pub trait NeuraResidualConstruct {
fn construct_residual( fn construct_residual(
self, self,
input: NeuraResidualInput<NeuraShape>, inputs: NeuraResidualInput<NeuraShape>,
indices: NeuraResidualInput<usize>,
current_index: usize,
) -> Result<Self::Constructed, Self::Err>; ) -> Result<Self::Constructed, Self::Err>;
} }
@ -14,49 +16,52 @@ pub trait NeuraResidualConstruct {
pub enum NeuraResidualConstructErr<LayerErr, ChildErr> { pub enum NeuraResidualConstructErr<LayerErr, ChildErr> {
LayerErr(LayerErr), LayerErr(LayerErr),
ChildErr(ChildErr), ChildErr(ChildErr),
OOBConnection(usize), WrongConnection(isize),
AxisErr(NeuraAxisErr), AxisErr(NeuraAxisErr),
NoOutput,
} }
use NeuraResidualConstructErr::*; use NeuraResidualConstructErr::*;
impl<Layer: NeuraPartialLayer, Axis> NeuraResidualConstruct for NeuraResidualNode<Layer, (), Axis> // impl<Layer: NeuraPartialLayer, Axis> NeuraResidualConstruct for NeuraResidualNode<Layer, NeuraResidualLast, Axis>
where // where
Axis: NeuraCombineInputs<NeuraShape, Combined = Result<NeuraShape, NeuraAxisErr>>, // Axis: NeuraCombineInputs<NeuraShape, Combined = Result<NeuraShape, NeuraAxisErr>>,
{ // {
type Constructed = NeuraResidualNode<Layer::Constructed, (), Axis>; // type Constructed = NeuraResidualNode<Layer::Constructed, NeuraResidualLast, Axis>;
type Err = NeuraResidualConstructErr<Layer::Err, ()>; // type Err = NeuraResidualConstructErr<Layer::Err, <NeuraResidualLast as NeuraPartialLayer>::Err>;
fn construct_residual( // fn construct_residual(
self, // self,
input: NeuraResidualInput<NeuraShape>, // inputs: NeuraResidualInput<NeuraShape>,
) -> Result<Self::Constructed, Self::Err> { // indices: NeuraResidualInput<usize>,
let (layer_input_shape, _rest) = input.shift(); // current_index: usize,
let layer_input_shape = self // ) -> Result<Self::Constructed, Self::Err> {
.axis // let (layer_input_shape, _rest) = inputs.shift();
.combine(layer_input_shape) // let layer_input_shape = self
.map_err(|e| AxisErr(e))?; // .axis
// .combine(layer_input_shape)
let layer = self // .map_err(|e| AxisErr(e))?;
.layer
.construct(layer_input_shape) // let layer = self
.map_err(|e| LayerErr(e))?; // .layer
let layer_shape = layer.output_shape(); // .construct(layer_input_shape)
// .map_err(|e| LayerErr(e))?;
if let Some(oob_offset) = self.offsets.iter().copied().find(|o| *o > 0) { // let layer_shape = layer.output_shape();
return Err(OOBConnection(oob_offset));
} // if let Some(oob_offset) = self.offsets.iter().copied().find(|o| *o > 0) {
// TODO: check rest for non-zero columns // return Err(WrongConnection(oob_offset));
// }
Ok(NeuraResidualNode { // // TODO: check rest for non-zero columns
layer,
child_network: (), // Ok(NeuraResidualNode {
offsets: self.offsets, // layer,
axis: self.axis, // child_network: (),
output_shape: Some(layer_shape), // offsets: self.offsets,
}) // axis: self.axis,
} // output_shape: Some(layer_shape),
} // })
// }
// }
impl<Layer: NeuraPartialLayer, ChildNetwork: NeuraResidualConstruct, Axis> NeuraResidualConstruct impl<Layer: NeuraPartialLayer, ChildNetwork: NeuraResidualConstruct, Axis> NeuraResidualConstruct
for NeuraResidualNode<Layer, ChildNetwork, Axis> for NeuraResidualNode<Layer, ChildNetwork, Axis>
@ -68,13 +73,16 @@ where
fn construct_residual( fn construct_residual(
self, self,
input: NeuraResidualInput<NeuraShape>, inputs: NeuraResidualInput<NeuraShape>,
indices: NeuraResidualInput<usize>,
current_index: usize,
) -> Result<Self::Constructed, Self::Err> { ) -> Result<Self::Constructed, Self::Err> {
let (layer_input_shape, mut rest) = input.shift(); let (input_shapes, mut rest_inputs) = inputs.shift();
let layer_input_shape = self let (this_indices, mut rest_indices) = indices.shift();
.axis
.combine(layer_input_shape) let self_input_shapes = input_shapes.iter().map(|x| **x).collect::<Vec<_>>();
.map_err(|e| AxisErr(e))?;
let layer_input_shape = self.axis.combine(input_shapes).map_err(|e| AxisErr(e))?;
let layer = self let layer = self
.layer .layer
@ -82,14 +90,25 @@ where
.map_err(|e| LayerErr(e))?; .map_err(|e| LayerErr(e))?;
let layer_shape = Rc::new(layer.output_shape()); let layer_shape = Rc::new(layer.output_shape());
if self.offsets.len() == 0 {
return Err(NoOutput);
}
for &offset in &self.offsets { for &offset in &self.offsets {
rest.push(offset, Rc::clone(&layer_shape)); rest_inputs.push(offset, Rc::clone(&layer_shape));
rest_indices.push(offset, Rc::new(current_index));
} }
let layer_shape = *layer_shape; let layer_shape = *layer_shape;
debug_assert!(this_indices.iter().all(|x| **x < current_index));
let input_offsets: Vec<usize> = this_indices
.into_iter()
.map(|x| current_index - *x - 1)
.collect();
let child_network = self let child_network = self
.child_network .child_network
.construct_residual(rest) .construct_residual(rest_inputs, rest_indices, current_index + 1)
.map_err(|e| ChildErr(e))?; .map_err(|e| ChildErr(e))?;
Ok(NeuraResidualNode { Ok(NeuraResidualNode {
@ -98,6 +117,8 @@ where
offsets: self.offsets, offsets: self.offsets,
axis: self.axis, axis: self.axis,
output_shape: Some(layer_shape), output_shape: Some(layer_shape),
input_shapes: self_input_shapes,
input_offsets,
}) })
} }
} }
@ -136,14 +157,16 @@ where
fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> { fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
let input_shape = Rc::new(input_shape); let input_shape = Rc::new(input_shape);
let mut inputs = NeuraResidualInput::new(); let mut inputs = NeuraResidualInput::new();
let mut indices = NeuraResidualInput::new();
for &offset in &self.initial_offsets { for &offset in &self.initial_offsets {
inputs.push(offset, Rc::clone(&input_shape)); inputs.push(offset, Rc::clone(&input_shape));
indices.push(offset, Rc::new(0usize));
} }
drop(input_shape); drop(input_shape);
let layers = self.layers.construct_residual(inputs)?; let layers = self.layers.construct_residual(inputs, indices, 1)?;
Ok(NeuraResidual { Ok(NeuraResidual {
layers, layers,

@ -33,11 +33,27 @@ impl NeuraResidualConstruct for NeuraResidualLast {
fn construct_residual( fn construct_residual(
self, self,
input: NeuraResidualInput<NeuraShape>, input: NeuraResidualInput<NeuraShape>,
indices: NeuraResidualInput<usize>,
current_index: usize,
) -> Result<Self::Constructed, Self::Err> { ) -> Result<Self::Constructed, Self::Err> {
let input = *input let (this_input, _rest) = input.shift();
let index = indices
.get_first() .get_first()
.ok_or(Self::Err::AxisErr(NeuraAxisErr::NoInput))?; .ok_or(Self::Err::AxisErr(NeuraAxisErr::NoInput))?;
if *index != current_index - 1 {
return Err(Self::Err::WrongConnection(
current_index as isize - *index as isize - 1,
));
}
if this_input.len() != 1 {
return Err(Self::Err::AxisErr(NeuraAxisErr::NoInput));
}
// TODO: check that rest contains nothing else
let input = unwrap_or_clone(this_input.into_iter().next().unwrap());
Ok(Self { Ok(Self {
output_shape: Some(input), output_shape: Some(input),
}) })
@ -68,15 +84,12 @@ impl NeuraNetworkRec for NeuraResidualLast {
&() &()
} }
fn merge_gradient( #[inline(always)]
&self, fn merge_gradient(&self, _rec_gradient: (), _layer_gradient: ()) -> Self::Gradient
rec_gradient: <Self::NextNode as NeuraTrainableLayerBase>::Gradient,
layer_gradient: <Self::Layer as NeuraTrainableLayerBase>::Gradient,
) -> Self::Gradient
where where
Self::Layer: NeuraTrainableLayerBase, Self::Layer: NeuraTrainableLayerBase,
{ {
todo!() ()
} }
} }
@ -107,11 +120,15 @@ impl<Data: Clone> NeuraNetwork<NeuraResidualInput<Data>> for NeuraResidualLast {
fn map_gradient_out<'a>( fn map_gradient_out<'a>(
&'_ self, &'_ self,
input: &'_ NeuraResidualInput<Data>, _input: &'_ NeuraResidualInput<Data>,
gradient_in: &'_ Self::NodeOutput, _gradient_in: &'_ Self::NodeOutput,
gradient_out: &'a Self::LayerInput, gradient_out: &'a Self::LayerInput,
) -> Cow<'a, NeuraResidualInput<Data>> { ) -> Cow<'a, NeuraResidualInput<Data>> {
unimplemented!() let mut result = NeuraResidualInput::new();
result.push(0, Rc::new(gradient_out.clone()));
Cow::Owned(result)
} }
} }

@ -137,6 +137,36 @@ mod test {
.unwrap(); .unwrap();
assert_eq!(network.output_shape(), NeuraShape::Vector(8)); assert_eq!(network.output_shape(), NeuraShape::Vector(8));
assert_eq!(network.layers.layer.input_len(), 1);
assert_eq!(network.layers.child_network.layer.input_len(), 3); // input (1) + first layer (2)
assert_eq!(
network.layers.child_network.child_network.layer.input_len(),
6
); // first layer (2) + second layer (4)
assert_eq!(network.layers.input_offsets, vec![0]);
assert_eq!(network.layers.child_network.input_offsets, vec![1, 0]); // input, first layer
assert_eq!(
network.layers.child_network.child_network.input_offsets,
vec![1, 0]
); // first layer, second layer
let map_shape = |shapes: &[NeuraShape]| {
shapes
.into_iter()
.map(|shape| shape.size())
.collect::<Vec<_>>()
};
assert_eq!(map_shape(&network.layers.input_shapes), vec![1]);
assert_eq!(
map_shape(&network.layers.child_network.input_shapes),
vec![1, 2]
); // input, first layer
assert_eq!(
map_shape(&network.layers.child_network.child_network.input_shapes),
vec![2, 4]
); // first layer, second layer
network.eval(&dvector![0.0]); network.eval(&dvector![0.0]);
} }
@ -147,13 +177,13 @@ mod test {
<= 0, 1; <= 0, 1;
neura_layer!("dense", 2) => 0, 1; neura_layer!("dense", 2) => 0, 1;
neura_layer!("dense", 4); neura_layer!("dense", 4);
neura_layer!("dense", 8) neura_layer!("dense", 4)
] ]
.construct(NeuraShape::Vector(1)) .construct(NeuraShape::Vector(1))
.unwrap(); .unwrap();
let backprop = NeuraBackprop::new(Euclidean); let backprop = NeuraBackprop::new(Euclidean);
backprop.get_gradient(&network, &dvector![0.0], &dvector![0.0]); backprop.get_gradient(&network, &dvector![0.0], &dvector![0.0, 0.0, 0.0, 0.0]);
} }
} }

@ -18,6 +18,8 @@ pub struct NeuraResidualNode<Layer, ChildNetwork, Axis> {
pub axis: Axis, pub axis: Axis,
pub(crate) output_shape: Option<NeuraShape>, pub(crate) output_shape: Option<NeuraShape>,
pub(crate) input_shapes: Vec<NeuraShape>,
pub(crate) input_offsets: Vec<usize>,
} }
impl<Layer, ChildNetwork> NeuraResidualNode<Layer, ChildNetwork, NeuraAxisAppend> { impl<Layer, ChildNetwork> NeuraResidualNode<Layer, ChildNetwork, NeuraAxisAppend> {
@ -28,6 +30,8 @@ impl<Layer, ChildNetwork> NeuraResidualNode<Layer, ChildNetwork, NeuraAxisAppend
offsets: vec![0], offsets: vec![0],
axis: NeuraAxisAppend, axis: NeuraAxisAppend,
output_shape: None, output_shape: None,
input_shapes: vec![],
input_offsets: vec![],
} }
} }
} }
@ -44,13 +48,20 @@ impl<Layer, ChildNetwork, Axis> NeuraResidualNode<Layer, ChildNetwork, Axis> {
} }
pub fn axis<Axis2>(self, axis: Axis2) -> NeuraResidualNode<Layer, ChildNetwork, Axis2> { pub fn axis<Axis2>(self, axis: Axis2) -> NeuraResidualNode<Layer, ChildNetwork, Axis2> {
if self.output_shape.is_some() {
unimplemented!(
"NeuraResidualNode::axis cannot yet be called after NeuraResidualNode::construct"
);
}
NeuraResidualNode { NeuraResidualNode {
layer: self.layer, layer: self.layer,
child_network: self.child_network, child_network: self.child_network,
offsets: self.offsets, offsets: self.offsets,
axis, axis,
// Drop the knowledge of output_shape
output_shape: None, output_shape: None,
input_shapes: self.input_shapes,
input_offsets: self.input_offsets,
} }
} }
@ -195,13 +206,14 @@ where
) )
} }
#[allow(unused)]
fn get_gradient( fn get_gradient(
&self, &self,
input: &NeuraResidualInput<Data>, input: &NeuraResidualInput<Data>,
intermediary: &Self::IntermediaryRepr, intermediary: &Self::IntermediaryRepr,
epsilon: &Self::Output, epsilon: &Self::Output,
) -> Self::Gradient { ) -> Self::Gradient {
unimplemented!(); unimplemented!("NeuraResidualNode::get_gradient is not yet implemented, sorry");
} }
} }
@ -232,14 +244,15 @@ impl<Axis, Layer: NeuraTrainableLayerBase, ChildNetwork: NeuraTrainableLayerBase
} }
impl< impl<
Data: Clone, Data: Clone + std::fmt::Debug,
Axis: NeuraCombineInputs<Data>, Axis: NeuraCombineInputs<Data> + NeuraSplitInputs<Data>,
Layer: NeuraLayer<Axis::Combined, Output = Data>, Layer: NeuraLayer<Axis::Combined, Output = Data> + std::fmt::Debug,
ChildNetwork, ChildNetwork,
> NeuraNetwork<NeuraResidualInput<Data>> for NeuraResidualNode<Layer, ChildNetwork, Axis> > NeuraNetwork<NeuraResidualInput<Data>> for NeuraResidualNode<Layer, ChildNetwork, Axis>
where where
Layer::Output: Clone, Layer::Output: Clone,
Axis::Combined: Clone, Axis::Combined: Clone,
for<'a> Data: std::iter::Sum<&'a Data>,
{ {
type LayerInput = Axis::Combined; type LayerInput = Axis::Combined;
@ -260,20 +273,26 @@ where
Cow::Owned(remaining_inputs) Cow::Owned(remaining_inputs)
} }
// To convert from gradient_in to gradient_out:
// - pop the first value from `gradient_in` (map_gradient_in)
// - compute its sum (map_gradient_in)
// - use it to compute the outcoming epsilon of the current layer (backprop)
// - split the oucoming epsilon into its original components (map_gradient_out)
// - push those back onto the rest (map_gradient_out)
// At this point, the value for `epsilon` in the gradient solver's state should be ready for another iteration,
// with the first value containing the unsummed incoming epsilon values from the downstream layers
#[allow(unused_variables)] #[allow(unused_variables)]
fn map_gradient_in<'a>( fn map_gradient_in<'a>(
&'_ self, &'_ self,
input: &'_ NeuraResidualInput<Data>, input: &'_ NeuraResidualInput<Data>,
gradient_in: &'a Self::NodeOutput, gradient_in: &'a Self::NodeOutput,
) -> Cow<'a, <Self::Layer as NeuraLayer<Self::LayerInput>>::Output> { ) -> Cow<'a, <Self::Layer as NeuraLayer<Self::LayerInput>>::Output> {
// To convert from gradient_in to layer's gradient_in: let (first_gradient, _) = gradient_in.shift();
// Pop the first value from `epsilon`, then:
// - compute its sum let sum = first_gradient.iter().map(|x| x.as_ref()).sum();
// - use it to compute the outcoming epsilon of the current layer
// - split the oucoming epsilon into its original components, and push those back onto the rest Cow::Owned(sum)
// At this point, the value for `epsilon` in the gradient solver's state should be ready for another iteration,
// with the first value containing the unsummed incoming epsilon values from the downstream layers
unimplemented!()
} }
#[allow(unused_variables)] #[allow(unused_variables)]
@ -283,6 +302,14 @@ where
gradient_in: &'_ Self::NodeOutput, gradient_in: &'_ Self::NodeOutput,
gradient_out: &'a Self::LayerInput, gradient_out: &'a Self::LayerInput,
) -> Cow<'a, NeuraResidualInput<Data>> { ) -> Cow<'a, NeuraResidualInput<Data>> {
unimplemented!() let (_, mut rest) = gradient_in.shift();
let split = self.axis.split(gradient_out, &self.input_shapes);
for (offset, gradient) in self.input_offsets.iter().copied().zip(split.into_iter()) {
rest.push(offset, Rc::new(gradient));
}
Cow::Owned(rest)
} }
} }

Loading…
Cancel
Save