🔥 Refactor of NeuraTrainableLayer, split it into multiple traits

main
Shad Amethyst 2 years ago
parent f3752bd411
commit d40098d2ef

@ -30,7 +30,7 @@ fn main() {
network.layer.weights.clone(),
network.layer.bias.clone(),
network.child_network.layer.weights.clone(),
network.child_network.layer.bias.clone()
network.child_network.layer.bias.clone(),
)];
for iteration in 0..4 {
@ -45,7 +45,7 @@ fn main() {
network.layer.weights.clone(),
network.layer.bias.clone(),
network.child_network.layer.weights.clone(),
network.child_network.layer.bias.clone()
network.child_network.layer.bias.clone(),
));
}

@ -1,6 +1,9 @@
use num::ToPrimitive;
use crate::{derivable::NeuraLoss, layer::NeuraTrainableLayer, network::NeuraTrainableNetworkBase};
use crate::{
derivable::NeuraLoss, layer::NeuraTrainableLayerBackprop, layer::NeuraTrainableLayerSelf,
network::NeuraTrainableNetworkBase,
};
use super::*;
@ -53,23 +56,26 @@ impl<LayerOutput, Target, Loss: NeuraLoss<LayerOutput, Target = Target>>
}
}
impl<LayerOutput, Target, Loss> NeuraGradientSolverTransient<LayerOutput>
for (&NeuraBackprop<Loss>, &Target)
{
fn eval_layer<
impl<
Input,
NetworkGradient,
RecGradient,
Layer: NeuraTrainableLayer<Input, Output = LayerOutput>,
>(
Target,
Loss,
Layer: NeuraTrainableLayerBackprop<Input> + NeuraTrainableLayerSelf<Input>,
> NeuraGradientSolverTransient<Input, Layer> for (&NeuraBackprop<Loss>, &Target)
{
fn eval_layer<NetworkGradient, RecGradient>(
&self,
layer: &Layer,
input: &Input,
rec_opt_output: Self::Output<LayerOutput, RecGradient>,
_output: &Layer::Output,
intermediary: &Layer::IntermediaryRepr,
rec_opt_output: Self::Output<Layer::Output, RecGradient>,
combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient,
) -> Self::Output<Input, NetworkGradient> {
let (epsilon_in, rec_gradient) = rec_opt_output;
let (epsilon_out, layer_gradient) = layer.backprop_layer(input, epsilon_in);
let epsilon_out = layer.backprop_layer(input, intermediary, &epsilon_in);
let layer_gradient = layer.get_gradient(input, intermediary, &epsilon_in);
(epsilon_out, combine_gradients(layer_gradient, rec_gradient))
}
@ -80,7 +86,11 @@ mod test {
use approx::assert_relative_eq;
use super::*;
use crate::{prelude::*, derivable::{activation::Tanh, loss::Euclidean, NeuraDerivable}, utils::uniform_vector};
use crate::{
derivable::{activation::Tanh, loss::Euclidean, NeuraDerivable},
prelude::*,
utils::uniform_vector,
};
#[test]
fn test_backprop_epsilon_bias() {
@ -91,16 +101,22 @@ mod test {
let network = neura_sequential![
neura_layer!("dense", 4, f64).activation(Tanh),
neura_layer!("dense", 2, f64).activation(Tanh)
].construct(NeuraShape::Vector(4)).unwrap();
]
.construct(NeuraShape::Vector(4))
.unwrap();
let optimizer = NeuraBackprop::new(Euclidean);
let input = uniform_vector(4);
let target = uniform_vector(2);
let layer1_intermediary = &network.layer.weights * &input;
let layer2_intermediary = &network.child_network.layer.weights * layer1_intermediary.map(|x| x.tanh());
let layer2_intermediary =
&network.child_network.layer.weights * layer1_intermediary.map(|x| x.tanh());
assert_relative_eq!(layer1_intermediary.map(|x| x.tanh()), network.clone().trim_tail().eval(&input));
assert_relative_eq!(
layer1_intermediary.map(|x| x.tanh()),
network.clone().trim_tail().eval(&input)
);
let output = network.eval(&input);
@ -110,25 +126,27 @@ mod test {
for i in 0..2 {
delta2_expected[i] *= Tanh.derivate(layer2_intermediary[i]);
}
let delta2_actual = gradient.1.0.1;
let delta2_actual = gradient.1 .0 .1;
assert_relative_eq!(delta2_actual.as_slice(), delta2_expected.as_slice());
let gradient2_expected = &delta2_expected * layer1_intermediary.map(|x| x.tanh()).transpose();
let gradient2_actual = gradient.1.0.0;
let gradient2_expected =
&delta2_expected * layer1_intermediary.map(|x| x.tanh()).transpose();
let gradient2_actual = gradient.1 .0 .0;
assert_relative_eq!(gradient2_actual.as_slice(), gradient2_expected.as_slice());
let mut delta1_expected = network.child_network.layer.weights.transpose() * delta2_expected;
let mut delta1_expected =
network.child_network.layer.weights.transpose() * delta2_expected;
for i in 0..4 {
delta1_expected[i] *= Tanh.derivate(layer1_intermediary[i]);
}
let delta1_actual = gradient.0.1;
let delta1_actual = gradient.0 .1;
assert_relative_eq!(delta1_actual.as_slice(), delta1_expected.as_slice());
let gradient1_expected = &delta1_expected * input.transpose();
let gradient1_actual = gradient.0.0;
let gradient1_actual = gradient.0 .0;
assert_relative_eq!(gradient1_actual.as_slice(), gradient1_expected.as_slice());
}

@ -1,7 +1,7 @@
use nalgebra::{DVector, Scalar};
use num::{traits::NumAssignOps, Float, ToPrimitive};
use crate::derivable::NeuraDerivable;
use crate::{derivable::NeuraDerivable, prelude::NeuraTrainableLayerSelf};
use super::*;
@ -90,22 +90,23 @@ impl<Act, LayerOutput> NeuraGradientSolverFinal<LayerOutput> for NeuraForwardPai
}
}
impl<F: Float + Scalar + NumAssignOps, Act: NeuraDerivable<F>>
NeuraGradientSolverTransient<DVector<F>> for NeuraForwardPair<Act>
{
fn eval_layer<
impl<
F: Float + Scalar + NumAssignOps,
Act: NeuraDerivable<F>,
Input,
NetworkGradient,
RecGradient,
Layer: NeuraTrainableLayer<Input, Output = DVector<F>>,
>(
Layer: NeuraTrainableLayerSelf<Input, Output = DVector<F>>,
> NeuraGradientSolverTransient<Input, Layer> for NeuraForwardPair<Act>
{
fn eval_layer<NetworkGradient, RecGradient>(
&self,
layer: &Layer,
input: &Input,
output: &Layer::Output,
intermediary: &Layer::IntermediaryRepr,
rec_gradient: RecGradient,
combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient,
) -> Self::Output<Input, NetworkGradient> {
let output = layer.eval(input);
// let output = layer.eval(input);
let goodness = output
.iter()
.copied()
@ -129,7 +130,7 @@ impl<F: Float + Scalar + NumAssignOps, Act: NeuraDerivable<F>>
}
// TODO: split backprop_layer into eval_training, get_gradient and get_backprop
let (_, layer_gradient) = layer.backprop_layer(input, goodness_derivative);
let layer_gradient = layer.get_gradient(input, intermediary, &goodness_derivative);
combine_gradients(layer_gradient, rec_gradient)
}

@ -5,7 +5,7 @@ mod forward_forward;
pub use forward_forward::NeuraForwardForward;
use crate::{
layer::NeuraTrainableLayer,
layer::NeuraTrainableLayerBase,
network::{NeuraTrainableNetwork, NeuraTrainableNetworkBase},
};
@ -17,17 +17,16 @@ pub trait NeuraGradientSolverFinal<LayerOutput>: NeuraGradientSolverBase {
fn eval_final(&self, output: LayerOutput) -> Self::Output<LayerOutput, ()>;
}
pub trait NeuraGradientSolverTransient<LayerOutput>: NeuraGradientSolverBase {
fn eval_layer<
Input,
NetworkGradient,
RecGradient,
Layer: NeuraTrainableLayer<Input, Output = LayerOutput>,
>(
pub trait NeuraGradientSolverTransient<Input, Layer: NeuraTrainableLayerBase<Input>>:
NeuraGradientSolverBase
{
fn eval_layer<NetworkGradient, RecGradient>(
&self,
layer: &Layer,
input: &Input,
rec_opt_output: Self::Output<LayerOutput, RecGradient>,
output: &Layer::Output,
layer_intermediary: &Layer::IntermediaryRepr,
rec_opt_output: Self::Output<Layer::Output, RecGradient>,
combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient,
) -> Self::Output<Input, NetworkGradient>;
}

@ -161,9 +161,9 @@ impl<
fn eval(&self, input: &DVector<F>) -> Self::Output {
assert_eq!(input.shape().0, self.weights.shape().1);
let res = &self.weights * input + &self.bias;
let evaluated = &self.weights * input + &self.bias;
res.map(|x| self.activation.eval(x))
evaluated.map(|x| self.activation.eval(x))
}
}
@ -171,9 +171,17 @@ impl<
F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
Act: NeuraDerivable<F>,
Reg: NeuraDerivable<F>,
> NeuraTrainableLayer<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
> NeuraTrainableLayerBase<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
{
type Gradient = (DMatrix<F>, DVector<F>);
type IntermediaryRepr = DVector<F>; // pre-activation values
fn eval_training(&self, input: &DVector<F>) -> (Self::Output, Self::IntermediaryRepr) {
let evaluated = &self.weights * input + &self.bias;
let output = evaluated.map(|x| self.activation.eval(x));
(output, evaluated)
}
fn default_gradient(&self) -> Self::Gradient {
(
@ -182,41 +190,70 @@ impl<
)
}
fn backprop_layer(
fn apply_gradient(&mut self, gradient: &Self::Gradient) {
self.weights += &gradient.0;
self.bias += &gradient.1;
}
}
impl<
F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
Act: NeuraDerivable<F>,
Reg: NeuraDerivable<F>,
> NeuraTrainableLayerSelf<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
{
fn regularize_layer(&self) -> Self::Gradient {
(
self.weights.map(|x| self.regularization.derivate(x)),
DVector::zeros(self.bias.shape().0),
)
}
fn get_gradient(
&self,
input: &DVector<F>,
epsilon: Self::Output,
) -> (DVector<F>, Self::Gradient) {
let evaluated = &self.weights * input + &self.bias;
evaluated: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> Self::Gradient {
// Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron),
// with `self.activation'(input) ° epsilon = delta`
let mut delta = epsilon.clone();
for i in 0..delta.len() {
// TODO: remove `- self.bias[i]`
delta[i] *= self.activation.derivate(evaluated[i]);
}
// Compute the weight gradient
let weights_gradient = &delta * input.transpose();
let new_epsilon = self.weights.tr_mul(&delta);
// According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
// The gradient of the bias is equal to the delta term of the backpropagation algorithm
let bias_gradient = delta;
(new_epsilon, (weights_gradient, bias_gradient))
(weights_gradient, bias_gradient)
}
}
fn regularize_layer(&self) -> Self::Gradient {
(
self.weights.map(|x| self.regularization.derivate(x)),
DVector::zeros(self.bias.shape().0),
)
impl<
F: Float + std::fmt::Debug + 'static + std::ops::AddAssign + std::ops::MulAssign,
Act: NeuraDerivable<F>,
Reg: NeuraDerivable<F>,
> NeuraTrainableLayerBackprop<DVector<F>> for NeuraDenseLayer<F, Act, Reg>
{
fn backprop_layer(
&self,
input: &DVector<F>,
evaluated: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> DVector<F> {
// Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron),
// with `self.activation'(input) ° epsilon = delta`
let mut delta = epsilon.clone();
for i in 0..delta.len() {
delta[i] *= self.activation.derivate(evaluated[i]);
}
fn apply_gradient(&mut self, gradient: &Self::Gradient) {
self.weights += &gradient.0;
self.bias += &gradient.1;
self.weights.tr_mul(&delta)
}
}

@ -61,24 +61,15 @@ impl<R: Rng, F: Float> NeuraLayer<DVector<F>> for NeuraDropoutLayer<R> {
}
}
impl<R: Rng, F: Float> NeuraTrainableLayer<DVector<F>> for NeuraDropoutLayer<R> {
impl<R: Rng, F: Float> NeuraTrainableLayerBase<DVector<F>> for NeuraDropoutLayer<R> {
type Gradient = ();
type IntermediaryRepr = ();
fn default_gradient(&self) -> Self::Gradient {
()
fn eval_training(&self, input: &DVector<F>) -> (Self::Output, Self::IntermediaryRepr) {
(self.eval(input), ())
}
fn backprop_layer(
&self,
_input: &DVector<F>,
mut epsilon: Self::Output,
) -> (DVector<F>, Self::Gradient) {
self.apply_dropout(&mut epsilon);
(epsilon, ())
}
fn regularize_layer(&self) -> Self::Gradient {
fn default_gradient(&self) -> Self::Gradient {
()
}
@ -110,6 +101,36 @@ impl<R: Rng, F: Float> NeuraTrainableLayer<DVector<F>> for NeuraDropoutLayer<R>
}
}
impl<R: Rng, F: Float> NeuraTrainableLayerSelf<DVector<F>> for NeuraDropoutLayer<R> {
fn regularize_layer(&self) -> Self::Gradient {
()
}
fn get_gradient(
&self,
_input: &DVector<F>,
_intermediary: &Self::IntermediaryRepr,
_epsilon: &Self::Output,
) -> Self::Gradient {
()
}
}
impl<R: Rng, F: Float> NeuraTrainableLayerBackprop<DVector<F>> for NeuraDropoutLayer<R> {
fn backprop_layer(
&self,
_input: &DVector<F>,
_intermediary: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> DVector<F> {
let mut epsilon = epsilon.clone();
self.apply_dropout(&mut epsilon);
epsilon
}
}
#[cfg(test)]
mod test {
use super::*;
@ -121,7 +142,7 @@ mod test {
.unwrap();
for _ in 0..100 {
<NeuraDropoutLayer<_> as NeuraTrainableLayer<DVector<f64>>>::prepare_layer(
<NeuraDropoutLayer<_> as NeuraTrainableLayerBase<DVector<f64>>>::prepare_layer(
&mut layer, true,
);
assert!(layer.multiplier.is_finite());

@ -23,6 +23,7 @@ impl NeuraShape {
}
pub trait NeuraLayer<Input> {
/// What type the layer outputs
type Output;
fn eval(&self, input: &Input) -> Self::Output;
@ -46,12 +47,64 @@ pub trait NeuraPartialLayer {
fn output_shape(constructed: &Self::Constructed) -> NeuraShape;
}
pub trait NeuraTrainableLayer<Input>: NeuraLayer<Input> {
pub trait NeuraTrainableLayerBase<Input>: NeuraLayer<Input> {
/// The representation of the layer gradient as a vector space
type Gradient: NeuraVectorSpace;
/// An intermediary object type to be passed to the various training methods
type IntermediaryRepr;
fn default_gradient(&self) -> Self::Gradient;
/// Applies `δW_l` to the weights of the layer
fn apply_gradient(&mut self, gradient: &Self::Gradient);
fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr);
/// Arbitrary computation that can be executed at the start of an epoch
#[allow(unused_variables)]
#[inline(always)]
fn prepare_layer(&mut self, is_training: bool) {}
}
/// Contains methods relative to a layer's ability to compute its own weights gradients,
/// given the derivative of the output variables.
pub trait NeuraTrainableLayerSelf<Input>: NeuraTrainableLayerBase<Input> {
/// Computes the regularization
fn regularize_layer(&self) -> Self::Gradient;
/// Computes the layer's gradient,
///
/// `intermediary` is guaranteed to have been generated by a previous call to `eval_training`,
/// without mutation of `self` in-between, and with the same `input`.
fn get_gradient(
&self,
input: &Input,
intermediary: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> Self::Gradient;
}
// impl<Input, Layer: NeuraTrainableLayerBase<Input, Gradient = ()>> NeuraTrainableLayerSelf<Input>
// for Layer
// {
// #[inline(always)]
// fn regularize_layer(&self) -> Self::Gradient {
// ()
// }
// #[inline(always)]
// fn get_gradient(
// &self,
// input: &Input,
// intermediary: &Self::IntermediaryRepr,
// epsilon: Self::Output,
// ) -> Self::Gradient {
// ()
// }
// }
pub trait NeuraTrainableLayerBackprop<Input>: NeuraTrainableLayerBase<Input> {
/// Computes the backpropagation term and the derivative of the internal weights,
/// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
///
@ -63,42 +116,31 @@ pub trait NeuraTrainableLayer<Input>: NeuraLayer<Input> {
/// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
/// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`.
/// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers.
fn backprop_layer(&self, input: &Input, epsilon: Self::Output) -> (Input, Self::Gradient);
/// Computes the regularization
fn regularize_layer(&self) -> Self::Gradient;
/// Applies `δW_l` to the weights of the layer
fn apply_gradient(&mut self, gradient: &Self::Gradient);
/// Arbitrary computation that can be executed at the start of an epoch
#[allow(unused_variables)]
#[inline(always)]
fn prepare_layer(&mut self, is_training: bool) {}
fn backprop_layer(
&self,
input: &Input,
intermediary: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> Input;
}
impl<Input: Clone> NeuraTrainableLayer<Input> for () {
impl<Input: Clone> NeuraTrainableLayerBase<Input> for () {
type Gradient = ();
type IntermediaryRepr = ();
#[inline(always)]
fn default_gradient(&self) -> Self::Gradient {
()
}
#[inline(always)]
fn backprop_layer(&self, _input: &Input, epsilon: Self::Output) -> (Input, Self::Gradient) {
(epsilon, ())
}
#[inline(always)]
fn regularize_layer(&self) -> Self::Gradient {
()
}
#[inline(always)]
fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
// Noop
}
fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr) {
(self.eval(input), ())
}
}
/// Temporary implementation of neura_layer

@ -1,4 +1,4 @@
use nalgebra::{DVector, Scalar};
use nalgebra::{DMatrix, DVector, Scalar};
use num::{traits::NumAssignOps, Float};
use super::*;
@ -54,14 +54,19 @@ impl<F: Float + Scalar> NeuraLayer<DVector<F>> for NeuraNormalizeLayer {
}
}
impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayer<DVector<F>> for NeuraNormalizeLayer {
impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerBase<DVector<F>> for NeuraNormalizeLayer {
type Gradient = ();
type IntermediaryRepr = (DMatrix<F>, F); // Partial jacobian matrix (without the kroenecker term) and stddev
fn backprop_layer(
&self,
input: &DVector<F>,
epsilon: Self::Output,
) -> (DVector<F>, Self::Gradient) {
fn default_gradient(&self) -> Self::Gradient {
()
}
fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
// Noop
}
fn eval_training(&self, input: &DVector<F>) -> (Self::Output, Self::IntermediaryRepr) {
let (mean, variance, len) = mean_variance(input);
let stddev = F::sqrt(variance);
let input_centered = input.clone().map(|x| x - mean);
@ -73,26 +78,42 @@ impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayer<DVector<F>> for Neura
*value += F::one() / (stddev * len);
}
let mut epsilon_out = jacobian_partial * &epsilon;
// Apply the δ_{ik}/σ term
for i in 0..epsilon_out.len() {
epsilon_out[i] += epsilon[i] / stddev;
(input_centered / stddev, (jacobian_partial, stddev))
}
}
(epsilon_out, ())
impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerSelf<DVector<F>> for NeuraNormalizeLayer {
fn regularize_layer(&self) -> Self::Gradient {
()
}
fn default_gradient(&self) -> Self::Gradient {
fn get_gradient(
&self,
input: &DVector<F>,
intermediary: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> Self::Gradient {
()
}
}
fn regularize_layer(&self) -> Self::Gradient {
()
impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerBackprop<DVector<F>>
for NeuraNormalizeLayer
{
fn backprop_layer(
&self,
input: &DVector<F>,
(jacobian_partial, stddev): &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> DVector<F> {
let mut epsilon_out = jacobian_partial * epsilon;
// Apply the δ_{ik}/σ term
for i in 0..epsilon_out.len() {
epsilon_out[i] += epsilon[i] / *stddev;
}
fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
// Noop
epsilon_out
}
}

@ -54,22 +54,53 @@ impl NeuraPartialLayer for NeuraSoftmaxLayer {
}
}
impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayer<DVector<F>> for NeuraSoftmaxLayer {
impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerBase<DVector<F>> for NeuraSoftmaxLayer {
type Gradient = ();
type IntermediaryRepr = Self::Output; // Result of self.eval
fn default_gradient(&self) -> Self::Gradient {
()
}
fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
// Noop
}
fn eval_training(&self, input: &DVector<F>) -> (Self::Output, Self::IntermediaryRepr) {
let res = self.eval(input);
(res.clone(), res)
}
}
impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerSelf<DVector<F>> for NeuraSoftmaxLayer {
#[inline(always)]
fn regularize_layer(&self) -> Self::Gradient {
()
}
#[inline(always)]
fn get_gradient(
&self,
input: &DVector<F>,
intermediary: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> Self::Gradient {
()
}
}
impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayerBackprop<DVector<F>>
for NeuraSoftmaxLayer
{
fn backprop_layer(
&self,
input: &DVector<F>,
mut epsilon: Self::Output,
) -> (DVector<F>, Self::Gradient) {
// Note: a constant value can be added to `input` to bring it to increase precision
let evaluated = self.eval(input);
evaluated: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> DVector<F> {
let mut epsilon = epsilon.clone();
// Compute $a_{l-1,i} \epsilon_{l,i}$
// Compute $a_{l-1,i} ° \epsilon_{l,i}$
hadamard_product(&mut epsilon, &evaluated);
// Compute $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$
@ -80,15 +111,7 @@ impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayer<DVector<F>> for Neura
epsilon[i] -= evaluated[i] * sum_diagonal_terms;
}
(epsilon, ())
}
fn regularize_layer(&self) -> Self::Gradient {
()
}
fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
// Noop
epsilon
}
}
@ -132,8 +155,9 @@ mod test {
for epsilon1 in [1.7, 1.9, 2.3] {
for epsilon2 in [2.9, 3.1, 3.7] {
let epsilon = dvector![epsilon1, epsilon2];
let evaluated = layer.eval(&input);
let (epsilon, _) = layer.backprop_layer(&input, epsilon);
let epsilon = layer.backprop_layer(&input, &evaluated, &epsilon);
let expected = [
output[0] * (1.0 - output[0]) * epsilon1
- output[1] * output[0] * epsilon2,
@ -165,7 +189,8 @@ mod test {
derivative += DMatrix::from_diagonal(&evaluated);
let expected = derivative * &loss;
let (actual, _) = layer.backprop_layer(&input, loss);
let evaluated = layer.eval(&input);
let actual = layer.backprop_layer(&input, &evaluated, &loss);
for i in 0..4 {
assert!((expected[i] - actual[i]).abs() < EPSILON);

@ -4,6 +4,7 @@ use crate::{
pub mod sequential;
// TODO: extract regularize from this, so that we can drop the trait constraints on NeuraSequential's impl
pub trait NeuraTrainableNetworkBase<Input>: NeuraLayer<Input> {
type Gradient: NeuraVectorSpace;
type LayerOutput;

@ -0,0 +1,96 @@
use super::*;
use crate::prelude::NeuraTrainableLayerBackprop;
impl<Input, Layer: NeuraLayer<Input>, ChildNetwork: NeuraLayer<Layer::Output>> NeuraLayer<Input>
for NeuraSequential<Layer, ChildNetwork>
{
type Output = ChildNetwork::Output;
fn eval(&self, input: &Input) -> Self::Output {
self.child_network.eval(&self.layer.eval(input))
}
}
impl<
Input,
Layer: NeuraTrainableLayerBase<Input>,
ChildNetwork: NeuraTrainableLayerBase<Layer::Output>,
> NeuraTrainableLayerBase<Input> for NeuraSequential<Layer, ChildNetwork>
{
type Gradient = (Layer::Gradient, Box<ChildNetwork::Gradient>);
type IntermediaryRepr = (Layer::IntermediaryRepr, Box<ChildNetwork::IntermediaryRepr>);
fn default_gradient(&self) -> Self::Gradient {
(
self.layer.default_gradient(),
Box::new(self.child_network.default_gradient()),
)
}
fn eval_training(&self, input: &Input) -> (Self::Output, Self::IntermediaryRepr) {
let (layer_output, layer_intermediary) = self.layer.eval_training(input);
let (child_output, child_intermediary) = self.child_network.eval_training(&layer_output);
(
child_output,
(layer_intermediary, Box::new(child_intermediary)),
)
}
fn prepare_layer(&mut self, is_training: bool) {
self.layer.prepare_layer(is_training);
self.child_network.prepare_layer(is_training);
}
fn apply_gradient(&mut self, gradient: &Self::Gradient) {
self.layer.apply_gradient(&gradient.0);
self.child_network.apply_gradient(&gradient.1);
}
}
impl<
Input,
Layer: NeuraTrainableLayerSelf<Input>,
ChildNetwork: NeuraTrainableLayerSelf<Layer::Output> + NeuraTrainableLayerBackprop<Layer::Output>,
> NeuraTrainableLayerSelf<Input> for NeuraSequential<Layer, ChildNetwork>
{
fn regularize_layer(&self) -> Self::Gradient {
(
self.layer.regularize_layer(),
Box::new(self.child_network.regularize_layer()),
)
}
fn get_gradient(
&self,
input: &Input,
intermediary: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> Self::Gradient {
unimplemented!("NeuraSequential::get_gradient is not yet implemented, sorry");
}
}
impl<
Input,
Layer: NeuraTrainableLayerBackprop<Input>,
ChildNetwork: NeuraTrainableLayerBackprop<Layer::Output>,
> NeuraTrainableLayerBackprop<Input> for NeuraSequential<Layer, ChildNetwork>
{
fn backprop_layer(
&self,
input: &Input,
intermediary: &Self::IntermediaryRepr,
incoming_epsilon: &Self::Output,
) -> Input {
let transient_output = self.layer.eval(input);
let transient_epsilon =
self.child_network
.backprop_layer(&transient_output, &intermediary.1, incoming_epsilon);
let outgoing_epsilon =
self.layer
.backprop_layer(input, &intermediary.0, &transient_epsilon);
outgoing_epsilon
}
}

@ -1,10 +1,12 @@
use super::{NeuraTrainableNetwork, NeuraTrainableNetworkBase};
use crate::{
gradient_solver::{NeuraGradientSolverFinal, NeuraGradientSolverTransient},
layer::{NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayer},
layer::{NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayerBase},
prelude::NeuraTrainableLayerSelf,
};
mod construct;
mod layer_impl;
mod tail;
pub use construct::*;
@ -24,7 +26,7 @@ pub use tail::*;
/// ## Notes on implemented traits
///
/// The different implementations for `NeuraTrainableNetwork`,
/// `NeuraLayer` and `NeuraTrainableLayer` each require that `ChildNetwork` implements those respective traits,
/// `NeuraLayer` and `NeuraTrainableLayerBase` each require that `ChildNetwork` implements those respective traits,
/// and that the output type of `Layer` matches the input type of `ChildNetwork`.
///
/// If a method, like `eval`, is reported as missing,
@ -74,61 +76,9 @@ impl<Layer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
}
}
impl<Input, Layer: NeuraLayer<Input>, ChildNetwork: NeuraLayer<Layer::Output>> NeuraLayer<Input>
for NeuraSequential<Layer, ChildNetwork>
{
type Output = ChildNetwork::Output;
fn eval(&self, input: &Input) -> Self::Output {
self.child_network.eval(&self.layer.eval(input))
}
}
impl<
Input,
Layer: NeuraTrainableLayer<Input>,
ChildNetwork: NeuraTrainableLayer<Layer::Output>,
> NeuraTrainableLayer<Input> for NeuraSequential<Layer, ChildNetwork>
{
type Gradient = (Layer::Gradient, Box<ChildNetwork::Gradient>);
fn default_gradient(&self) -> Self::Gradient {
(
self.layer.default_gradient(),
Box::new(self.child_network.default_gradient()),
)
}
fn backprop_layer(
&self,
input: &Input,
incoming_epsilon: Self::Output,
) -> (Input, Self::Gradient) {
let output = self.layer.eval(input);
let (transient_epsilon, child_gradient) =
self.child_network.backprop_layer(&output, incoming_epsilon);
let (outgoing_epsilon, layer_gradient) =
self.layer.backprop_layer(input, transient_epsilon);
(outgoing_epsilon, (layer_gradient, Box::new(child_gradient)))
}
fn regularize_layer(&self) -> Self::Gradient {
(
self.layer.regularize_layer(),
Box::new(self.child_network.regularize_layer()),
)
}
fn apply_gradient(&mut self, gradient: &Self::Gradient) {
self.layer.apply_gradient(&gradient.0);
self.child_network.apply_gradient(&gradient.1);
}
}
impl<
Input,
Layer: NeuraTrainableLayer<Input>,
Layer: NeuraTrainableLayerBase<Input> + NeuraTrainableLayerSelf<Input>,
ChildNetwork: NeuraTrainableNetworkBase<Layer::Output>,
> NeuraTrainableNetworkBase<Input> for NeuraSequential<Layer, ChildNetwork>
{
@ -188,8 +138,8 @@ impl<Input: Clone> NeuraTrainableNetworkBase<Input> for () {
impl<
Input,
Layer: NeuraTrainableLayer<Input>,
Optimizer: NeuraGradientSolverTransient<Layer::Output>,
Layer: NeuraTrainableLayerBase<Input> + NeuraTrainableLayerSelf<Input>,
Optimizer: NeuraGradientSolverTransient<Input, Layer>,
ChildNetwork: NeuraTrainableNetworkBase<Layer::Output>,
> NeuraTrainableNetwork<Input, Optimizer> for NeuraSequential<Layer, ChildNetwork>
where
@ -200,12 +150,14 @@ where
input: &Input,
optimizer: &Optimizer,
) -> Optimizer::Output<Input, Self::Gradient> {
let next_activation = self.layer.eval(input);
let (next_activation, intermediary) = self.layer.eval_training(input);
let child_result = self.child_network.traverse(&next_activation, optimizer);
optimizer.eval_layer(
&self.layer,
input,
&next_activation,
&intermediary,
child_result,
|layer_gradient, child_gradient| (layer_gradient, Box::new(child_gradient)),
)

@ -82,7 +82,10 @@ impl NeuraBatchedTrainer {
network: &mut Network,
inputs: Inputs,
test_inputs: &[(Input, Target)],
) -> Vec<(f64, f64)> {
) -> Vec<(f64, f64)>
where
<Network as NeuraTrainableNetworkBase<Input>>::Gradient: std::fmt::Debug,
{
let mut losses = Vec::new();
let mut iter = inputs.into_iter();
let factor = -self.learning_rate / (self.batch_size as f64);

@ -1,12 +1,20 @@
use std::fs::File;
use approx::assert_relative_eq;
use nalgebra::{DMatrix, DVector, dvector};
use neuramethyst::{prelude::{*, dense::NeuraDenseLayer}, derivable::{activation::{Relu, Tanh}, regularize::NeuraL0, loss::Euclidean}};
use nalgebra::{dvector, DMatrix, DVector};
use neuramethyst::{
derivable::{
activation::{Relu, Tanh},
loss::Euclidean,
regularize::NeuraL0,
},
prelude::{dense::NeuraDenseLayer, *},
};
fn load_test_data() -> Vec<(DMatrix<f64>, DVector<f64>, DMatrix<f64>, DVector<f64>)> {
let file = File::open("tests/xor.json").unwrap();
let data: Vec<(DMatrix<f64>, DVector<f64>, DMatrix<f64>, DVector<f64>)> = serde_json::from_reader(&file).unwrap();
let data: Vec<(DMatrix<f64>, DVector<f64>, DMatrix<f64>, DVector<f64>)> =
serde_json::from_reader(&file).unwrap();
data
}
@ -43,7 +51,7 @@ fn test_xor_training() {
network.layer.weights.clone(),
network.layer.bias.clone(),
network.child_network.layer.weights.clone(),
network.child_network.layer.bias.clone()
network.child_network.layer.bias.clone(),
);
assert_relative_eq!(expected.0.as_slice(), actual.0.as_slice());

Loading…
Cancel
Save