WIP implementation for backprop in residual networks

main
Shad Amethyst 2 years ago
parent 83dc763746
commit dd278e7b90

@ -79,6 +79,16 @@ impl<
(epsilon_out, combine_gradients(layer_gradient, rec_gradient)) (epsilon_out, combine_gradients(layer_gradient, rec_gradient))
} }
fn map_epsilon<From, To, Gradient, Cb: Fn(From) -> To>(
&self,
rec_opt_output: Self::Output<From, Gradient>,
callback: Cb
) -> Self::Output<To, Gradient> {
(
callback(rec_opt_output.0), rec_opt_output.1
)
}
} }
#[cfg(test)] #[cfg(test)]

@ -134,6 +134,14 @@ impl<
combine_gradients(layer_gradient, rec_gradient) combine_gradients(layer_gradient, rec_gradient)
} }
fn map_epsilon<From, To, Gradient, Cb: Fn(From) -> To>(
&self,
rec_opt_output: Self::Output<From, Gradient>,
callback: Cb
) -> Self::Output<To, Gradient> {
rec_opt_output
}
} }
#[cfg(test)] #[cfg(test)]

@ -29,6 +29,12 @@ pub trait NeuraGradientSolverTransient<Input, Layer: NeuraTrainableLayerBase<Inp
rec_opt_output: Self::Output<Layer::Output, RecGradient>, rec_opt_output: Self::Output<Layer::Output, RecGradient>,
combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient, combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient,
) -> Self::Output<Input, NetworkGradient>; ) -> Self::Output<Input, NetworkGradient>;
fn map_epsilon<From, To, Gradient, Cb: Fn(From) -> To>(
&self,
rec_opt_output: Self::Output<From, Gradient>,
callback: Cb
) -> Self::Output<To, Gradient>;
} }
pub trait NeuraGradientSolver<Input, Target, Trainable: NeuraTrainableNetworkBase<Input>> { pub trait NeuraGradientSolver<Input, Target, Trainable: NeuraTrainableNetworkBase<Input>> {

@ -1,5 +1,5 @@
use crate::{ use crate::{
algebra::NeuraVectorSpace, gradient_solver::NeuraGradientSolverBase, layer::NeuraLayer, algebra::NeuraVectorSpace, gradient_solver::{NeuraGradientSolverBase, NeuraGradientSolverFinal}, layer::NeuraLayer,
}; };
pub mod residual; pub mod residual;
@ -31,3 +31,15 @@ where
optimizer: &Optimizer, optimizer: &Optimizer,
) -> Optimizer::Output<Input, Self::Gradient>; ) -> Optimizer::Output<Input, Self::Gradient>;
} }
impl<Input: Clone, Optimizer: NeuraGradientSolverFinal<Input>>
NeuraTrainableNetwork<Input, Optimizer> for ()
{
fn traverse(
&self,
input: &Input,
optimizer: &Optimizer,
) -> Optimizer::Output<Input, Self::Gradient> {
optimizer.eval_final(input.clone())
}
}

@ -1,4 +1,4 @@
use std::borrow::Borrow; use std::{borrow::Borrow, rc::Rc};
use nalgebra::{Const, DVector, Dyn, VecStorage}; use nalgebra::{Const, DVector, Dyn, VecStorage};
@ -17,9 +17,10 @@ pub trait NeuraCombineInputs<T> {
type Combined; type Combined;
fn combine(&self, inputs: Vec<impl Borrow<T>>) -> Self::Combined; fn combine(&self, inputs: Vec<impl Borrow<T>>) -> Self::Combined;
}
// TODO: pub trait NeuraSplitInputs<T>: NeuraCombineInputs<T> {
// fn shape(&self, input_shapes: Vec<NeuraShape>) -> NeuraShape; fn split(&self, combined: Self::Combined, input_shapes: &[NeuraShape]) -> Vec<Rc<T>>;
} }
impl<F: Clone> NeuraCombineInputs<DVector<F>> for NeuraAxisAppend { impl<F: Clone> NeuraCombineInputs<DVector<F>> for NeuraAxisAppend {

@ -0,0 +1,189 @@
//! Implementations for NeuraLayer*
use crate::{gradient_solver::NeuraGradientSolverTransient, network::{NeuraTrainableNetwork, NeuraTrainableNetworkBase}};
use super::*;
impl<Axis, Layer, ChildNetwork> NeuraResidualNode<Layer, ChildNetwork, Axis> {
fn process_input<Data>(&self, input: &NeuraResidualInput<Data>) -> (Axis::Combined, NeuraResidualInput<Data>)
where
Axis: NeuraCombineInputs<Data>,
Layer: NeuraLayer<Axis::Combined>
{
let (inputs, rest) = input.shift();
let layer_input = self.axis.combine(inputs);
(layer_input, rest)
}
fn combine_outputs<Data>(&self, layer_output: Data, output: &mut NeuraResidualInput<Data>) -> Rc<Data> {
let layer_output = Rc::new(layer_output);
for &offset in &self.offsets {
output.push(offset, Rc::clone(&layer_output));
}
layer_output
}
}
impl<F: Float + Scalar, Layer, ChildNetwork, Axis> NeuraLayer<NeuraResidualInput<DVector<F>>>
for NeuraResidualNode<Layer, ChildNetwork, Axis>
where
Axis: NeuraCombineInputs<DVector<F>>,
Layer: NeuraLayer<Axis::Combined, Output = DVector<F>>,
ChildNetwork: NeuraLayer<NeuraResidualInput<DVector<F>>>,
{
type Output = <ChildNetwork as NeuraLayer<NeuraResidualInput<DVector<F>>>>::Output;
fn eval(&self, input: &NeuraResidualInput<DVector<F>>) -> Self::Output {
let (layer_input, mut rest) = self.process_input(input);
self.combine_outputs(self.layer.eval(&layer_input), &mut rest);
self.child_network.eval(&rest)
}
}
impl<F: Clone, Output: Clone, Layers> NeuraLayer<DVector<F>> for NeuraResidual<Layers>
where
Layers: NeuraLayer<NeuraResidualInput<DVector<F>>, Output = NeuraResidualInput<Output>>,
{
type Output = Output;
fn eval(&self, input: &DVector<F>) -> Self::Output {
let input: Rc<DVector<F>> = Rc::new((*input).clone());
let mut inputs = NeuraResidualInput::new();
for &offset in &self.initial_offsets {
inputs.push(offset, Rc::clone(&input));
}
drop(input);
let output = self.layers.eval(&inputs);
let result = output.get_first()
.expect("Invalid NeuraResidual state: network returned no data, did you forget to link the last layer?")
.into();
Rc::unwrap_or_clone(result)
}
}
pub struct NeuraResidualIntermediary<LayerIntermediary, LayerOutput, ChildIntermediary> {
layer_intermediary: LayerIntermediary,
layer_output: Rc<LayerOutput>,
child_intermediary: Box<ChildIntermediary>,
}
impl<
Data,
Axis: NeuraCombineInputs<Data>,
Layer: NeuraTrainableLayerBase<Axis::Combined, Output = Data>,
ChildNetwork: NeuraTrainableLayerBase<NeuraResidualInput<Data>>
> NeuraTrainableLayerBase<NeuraResidualInput<Data>> for NeuraResidualNode<Layer, ChildNetwork, Axis>
where
NeuraResidualNode<Layer, ChildNetwork, Axis>: NeuraLayer<NeuraResidualInput<Data>, Output=ChildNetwork::Output>
{
type Gradient = (Layer::Gradient, Box<ChildNetwork::Gradient>);
type IntermediaryRepr = NeuraResidualIntermediary<Layer::IntermediaryRepr, Layer::Output, ChildNetwork::IntermediaryRepr>;
fn default_gradient(&self) -> Self::Gradient {
(self.layer.default_gradient(), Box::new(self.child_network.default_gradient()))
}
fn apply_gradient(&mut self, gradient: &Self::Gradient) {
self.layer.apply_gradient(&gradient.0);
self.child_network.apply_gradient(&gradient.1);
}
fn eval_training(&self, input: &NeuraResidualInput<Data>) -> (Self::Output, Self::IntermediaryRepr) {
let (layer_input, mut rest) = self.process_input(input);
let (layer_output, layer_intermediary) = self.layer.eval_training(&layer_input);
let layer_output = self.combine_outputs(layer_output, &mut rest);
let (output, child_intermediary) = self.child_network.eval_training(&rest);
let intermediary = NeuraResidualIntermediary {
layer_intermediary,
layer_output,
child_intermediary: Box::new(child_intermediary)
};
(output, intermediary)
}
fn prepare_layer(&mut self, is_training: bool) {
self.layer.prepare_layer(is_training);
self.child_network.prepare_layer(is_training);
}
}
impl<
Data,
Axis: NeuraCombineInputs<Data>,
Layer: NeuraTrainableLayerSelf<Axis::Combined, Output = Data>,
ChildNetwork: NeuraTrainableNetworkBase<NeuraResidualInput<Data>>,
> NeuraTrainableNetworkBase<NeuraResidualInput<Data>> for NeuraResidualNode<Layer, ChildNetwork, Axis>
where
Self: NeuraTrainableLayerBase<NeuraResidualInput<Data>, Gradient = (Layer::Gradient, Box<ChildNetwork::Gradient>)>,
{
type Gradient = <Self as NeuraTrainableLayerBase<NeuraResidualInput<Data>>>::Gradient;
type LayerOutput = Layer::Output;
fn default_gradient(&self) -> Self::Gradient {
<Self as NeuraTrainableLayerBase<NeuraResidualInput<Data>>>::default_gradient(self)
}
fn apply_gradient(&mut self, gradient: &Self::Gradient) {
<Self as NeuraTrainableLayerBase<NeuraResidualInput<Data>>>::apply_gradient(self, gradient)
}
fn regularize(&self) -> Self::Gradient {
(self.layer.regularize_layer(), Box::new(self.child_network.regularize()))
}
fn prepare(&mut self, train_iteration: bool) {
self.layer.prepare_layer(train_iteration);
self.child_network.prepare(train_iteration);
}
}
impl<
Data,
Axis: NeuraSplitInputs<Data>,
Layer: NeuraTrainableLayerSelf<Axis::Combined, Output = Data>,
Optimizer: NeuraGradientSolverTransient<Axis::Combined, Layer>,
ChildNetwork: NeuraTrainableNetwork<NeuraResidualInput<Data>, Optimizer>,
> NeuraTrainableNetwork<NeuraResidualInput<Data>, Optimizer> for NeuraResidualNode<Layer, ChildNetwork, Axis>
where
Self: NeuraTrainableLayerBase<NeuraResidualInput<Data>, Gradient = (Layer::Gradient, Box<ChildNetwork::Gradient>)>,
{
fn traverse(
&self,
input: &NeuraResidualInput<Data>,
optimizer: &Optimizer,
) -> Optimizer::Output<NeuraResidualInput<Data>, Self::Gradient> {
let (layer_input, mut rest) = self.process_input(input);
let (layer_output, layer_intermediary) = self.layer.eval_training(&layer_input);
let layer_output = self.combine_outputs(layer_output, &mut rest);
let child_result = self.child_network.traverse(&rest, optimizer);
// TODO: maybe move this to a custom impl of NeuraGradientSolverTransient for NeuraResidualInput?
// Or have a different set of traits for NeuraTrainableNetwork specific to NeuraResidualNodes
let child_result = optimizer.map_epsilon(child_result, |epsilon| {
// Pop the first value from `epsilon`, then:
// - compute its sum
// - use it to compute the outcoming epsilon of the current layer
// - split the oucoming epsilon into its original components, and push those back onto the rest
// At this point, the value for `epsilon` in the gradient solver's state should be ready for another iteration,
// with the first value containing the unsummed incoming epsilon values from the downstream layers
todo!();
});
todo!();
}
}

@ -5,6 +5,8 @@ use num::Float;
use crate::layer::*; use crate::layer::*;
mod layer_impl;
mod input; mod input;
pub use input::*; pub use input::*;
@ -91,55 +93,6 @@ impl<Layer, ChildNetwork, Axis> NeuraResidualNode<Layer, ChildNetwork, Axis> {
} }
} }
impl<F: Float + Scalar, Layer, ChildNetwork, Axis> NeuraLayer<NeuraResidualInput<DVector<F>>>
for NeuraResidualNode<Layer, ChildNetwork, Axis>
where
Axis: NeuraCombineInputs<DVector<F>>,
Layer: NeuraLayer<Axis::Combined, Output = DVector<F>>,
ChildNetwork: NeuraLayer<NeuraResidualInput<DVector<F>>>,
{
type Output = <ChildNetwork as NeuraLayer<NeuraResidualInput<DVector<F>>>>::Output;
fn eval(&self, input: &NeuraResidualInput<DVector<F>>) -> Self::Output {
let (inputs, mut rest) = input.shift();
let layer_input = self.axis.combine(inputs);
let layer_output = Rc::new(self.layer.eval(&layer_input));
for &offset in &self.offsets {
rest.push(offset, Rc::clone(&layer_output));
}
self.child_network.eval(&rest)
}
}
impl<F: Clone, Output: Clone, Layers> NeuraLayer<DVector<F>> for NeuraResidual<Layers>
where
Layers: NeuraLayer<NeuraResidualInput<DVector<F>>, Output = NeuraResidualInput<Output>>,
{
type Output = Output;
fn eval(&self, input: &DVector<F>) -> Self::Output {
let input: Rc<DVector<F>> = Rc::new((*input).clone());
let mut inputs = NeuraResidualInput::new();
for &offset in &self.initial_offsets {
inputs.push(offset, Rc::clone(&input));
}
drop(input);
let output = self.layers.eval(&inputs);
let result = output.get_first()
.expect("Invalid NeuraResidual state: network returned no data, did you forget to link the last layer?")
.into();
Rc::unwrap_or_clone(result)
}
}
#[macro_export] #[macro_export]
macro_rules! neura_residual { macro_rules! neura_residual {
[ "__combine_layers", ] => { [ "__combine_layers", ] => {

@ -167,18 +167,6 @@ where
} }
} }
impl<Input: Clone, Optimizer: NeuraGradientSolverFinal<Input>>
NeuraTrainableNetwork<Input, Optimizer> for ()
{
fn traverse(
&self,
input: &Input,
optimizer: &Optimizer,
) -> Optimizer::Output<Input, Self::Gradient> {
optimizer.eval_final(input.clone())
}
}
impl<Layer> From<Layer> for NeuraSequential<Layer, ()> { impl<Layer> From<Layer> for NeuraSequential<Layer, ()> {
fn from(layer: Layer) -> Self { fn from(layer: Layer) -> Self {
Self { Self {

Loading…
Cancel
Save