🎨 Generic way of computing backpropagation and other gradient solvers

main
Shad Amethyst 2 years ago
parent cb862f12cc
commit 81de6ddbcd

@ -5,6 +5,7 @@ pub mod algebra;
pub mod derivable; pub mod derivable;
pub mod layer; pub mod layer;
pub mod network; pub mod network;
pub mod optimize;
pub mod train; pub mod train;
mod utils; mod utils;
@ -21,5 +22,6 @@ pub mod prelude {
pub use crate::network::sequential::{ pub use crate::network::sequential::{
NeuraSequential, NeuraSequentialConstruct, NeuraSequentialTail, NeuraSequential, NeuraSequentialConstruct, NeuraSequentialTail,
}; };
pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer}; pub use crate::optimize::NeuraBackprop;
pub use crate::train::NeuraBatchedTrainer;
} }

@ -1,25 +1,29 @@
use crate::{algebra::NeuraVectorSpace, derivable::NeuraLoss, layer::NeuraLayer}; use crate::{algebra::NeuraVectorSpace, layer::NeuraLayer, optimize::NeuraOptimizerBase};
pub mod sequential; pub mod sequential;
pub trait NeuraTrainableNetwork<Input>: NeuraLayer<Input> { pub trait NeuraTrainableNetworkBase<Input>: NeuraLayer<Input> {
type Gradient: NeuraVectorSpace; type Gradient: NeuraVectorSpace;
type LayerOutput;
fn default_gradient(&self) -> Self::Gradient; fn default_gradient(&self) -> Self::Gradient;
fn apply_gradient(&mut self, gradient: &Self::Gradient); fn apply_gradient(&mut self, gradient: &Self::Gradient);
/// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information.
fn backpropagate<Loss: NeuraLoss<Self::Output>>(
&self,
input: &Input,
target: &Loss::Target,
loss: Loss,
) -> (Input, Self::Gradient);
/// Should return the regularization gradient /// Should return the regularization gradient
fn regularize(&self) -> Self::Gradient; fn regularize(&self) -> Self::Gradient;
/// Called before an iteration begins, to allow the network to set itself up for training or not. /// Called before an iteration begins, to allow the network to set itself up for training or not.
fn prepare(&mut self, train_iteration: bool); fn prepare(&mut self, train_iteration: bool);
} }
pub trait NeuraTrainableNetwork<Input, Optimizer>: NeuraTrainableNetworkBase<Input>
where
Optimizer: NeuraOptimizerBase,
{
fn traverse(
&self,
input: &Input,
optimizer: &Optimizer,
) -> Optimizer::Output<Input, Self::Gradient>;
}

@ -1,7 +1,7 @@
use super::NeuraTrainableNetwork; use super::{NeuraTrainableNetwork, NeuraTrainableNetworkBase};
use crate::{ use crate::{
derivable::NeuraLoss,
layer::{NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayer}, layer::{NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayer},
optimize::{NeuraOptimizerFinal, NeuraOptimizerTransient},
}; };
mod construct; mod construct;
@ -129,10 +129,11 @@ impl<
impl< impl<
Input, Input,
Layer: NeuraTrainableLayer<Input>, Layer: NeuraTrainableLayer<Input>,
ChildNetwork: NeuraTrainableNetwork<Layer::Output>, ChildNetwork: NeuraTrainableNetworkBase<Layer::Output>,
> NeuraTrainableNetwork<Input> for NeuraSequential<Layer, ChildNetwork> > NeuraTrainableNetworkBase<Input> for NeuraSequential<Layer, ChildNetwork>
{ {
type Gradient = (Layer::Gradient, Box<ChildNetwork::Gradient>); type Gradient = (Layer::Gradient, Box<ChildNetwork::Gradient>);
type LayerOutput = Layer::Output;
fn default_gradient(&self) -> Self::Gradient { fn default_gradient(&self) -> Self::Gradient {
( (
@ -146,25 +147,6 @@ impl<
self.child_network.apply_gradient(&gradient.1); self.child_network.apply_gradient(&gradient.1);
} }
fn backpropagate<Loss: NeuraLoss<Self::Output>>(
&self,
input: &Input,
target: &Loss::Target,
loss: Loss,
) -> (Input, Self::Gradient) {
let next_activation = self.layer.eval(input);
let (backprop_gradient, weights_gradient) =
self.child_network
.backpropagate(&next_activation, target, loss);
let (backprop_gradient, layer_gradient) =
self.layer.backprop_layer(input, backprop_gradient);
(
backprop_gradient,
(layer_gradient, Box::new(weights_gradient)),
)
}
fn regularize(&self) -> Self::Gradient { fn regularize(&self) -> Self::Gradient {
( (
self.layer.regularize_layer(), self.layer.regularize_layer(),
@ -179,8 +161,9 @@ impl<
} }
/// A dummy implementation of `NeuraTrainableNetwork`, which simply calls `loss.eval` in `backpropagate`. /// A dummy implementation of `NeuraTrainableNetwork`, which simply calls `loss.eval` in `backpropagate`.
impl<Input: Clone> NeuraTrainableNetwork<Input> for () { impl<Input: Clone> NeuraTrainableNetworkBase<Input> for () {
type Gradient = (); type Gradient = ();
type LayerOutput = Input;
#[inline(always)] #[inline(always)]
fn default_gradient(&self) -> () { fn default_gradient(&self) -> () {
@ -192,18 +175,6 @@ impl<Input: Clone> NeuraTrainableNetwork<Input> for () {
// Noop // Noop
} }
#[inline(always)]
fn backpropagate<Loss: NeuraLoss<Self::Output>>(
&self,
final_activation: &Input,
target: &Loss::Target,
loss: Loss,
) -> (Input, Self::Gradient) {
let backprop_epsilon = loss.nabla(target, &final_activation);
(backprop_epsilon, ())
}
#[inline(always)] #[inline(always)]
fn regularize(&self) -> () { fn regularize(&self) -> () {
() ()
@ -215,6 +186,44 @@ impl<Input: Clone> NeuraTrainableNetwork<Input> for () {
} }
} }
impl<
Input,
Layer: NeuraTrainableLayer<Input>,
Optimizer: NeuraOptimizerTransient<Layer::Output>,
ChildNetwork: NeuraTrainableNetworkBase<Layer::Output>,
> NeuraTrainableNetwork<Input, Optimizer> for NeuraSequential<Layer, ChildNetwork>
where
ChildNetwork: NeuraTrainableNetwork<Layer::Output, Optimizer>,
{
fn traverse(
&self,
input: &Input,
optimizer: &Optimizer,
) -> Optimizer::Output<Input, Self::Gradient> {
let next_activation = self.layer.eval(input);
let child_result = self.child_network.traverse(&next_activation, optimizer);
optimizer.eval_layer(
&self.layer,
input,
child_result,
|layer_gradient, child_gradient| (layer_gradient, Box::new(child_gradient)),
)
}
}
impl<Input: Clone, Optimizer: NeuraOptimizerFinal<Input>> NeuraTrainableNetwork<Input, Optimizer>
for ()
{
fn traverse(
&self,
input: &Input,
optimizer: &Optimizer,
) -> Optimizer::Output<Input, Self::Gradient> {
optimizer.eval_final(input.clone())
}
}
impl<Layer> From<Layer> for NeuraSequential<Layer, ()> { impl<Layer> From<Layer> for NeuraSequential<Layer, ()> {
fn from(layer: Layer) -> Self { fn from(layer: Layer) -> Self {
Self { Self {

@ -0,0 +1,112 @@
use num::ToPrimitive;
use crate::{
derivable::NeuraLoss,
layer::NeuraTrainableLayer,
network::{NeuraTrainableNetwork, NeuraTrainableNetworkBase},
};
pub trait NeuraOptimizerBase {
type Output<NetworkInput, NetworkGradient>;
}
pub trait NeuraOptimizerFinal<LayerOutput>: NeuraOptimizerBase {
fn eval_final(&self, output: LayerOutput) -> Self::Output<LayerOutput, ()>;
}
pub trait NeuraOptimizerTransient<LayerOutput>: NeuraOptimizerBase {
fn eval_layer<
Input,
NetworkGradient,
RecGradient,
Layer: NeuraTrainableLayer<Input, Output = LayerOutput>,
>(
&self,
layer: &Layer,
input: &Input,
rec_opt_output: Self::Output<LayerOutput, RecGradient>,
combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient,
) -> Self::Output<Input, NetworkGradient>;
}
pub trait NeuraOptimizer<Input, Target, Trainable: NeuraTrainableNetworkBase<Input>> {
fn get_gradient(
&self,
trainable: &Trainable,
input: &Input,
target: &Target,
) -> Trainable::Gradient;
fn score(&self, trainable: &Trainable, input: &Input, target: &Target) -> f64;
}
pub struct NeuraBackprop<Loss> {
loss: Loss,
}
impl<Loss> NeuraBackprop<Loss> {
pub fn new(loss: Loss) -> Self {
Self { loss }
}
}
impl<
Input,
Target,
Trainable: NeuraTrainableNetworkBase<Input>,
Loss: NeuraLoss<Trainable::Output, Target = Target> + Clone,
> NeuraOptimizer<Input, Target, Trainable> for NeuraBackprop<Loss>
where
<Loss as NeuraLoss<Trainable::Output>>::Output: ToPrimitive,
Trainable: for<'a> NeuraTrainableNetwork<Input, (&'a NeuraBackprop<Loss>, &'a Target)>,
{
fn get_gradient(
&self,
trainable: &Trainable,
input: &Input,
target: &Target,
) -> Trainable::Gradient {
let (_, gradient) = trainable.traverse(input, &(self, target));
gradient
}
fn score(&self, trainable: &Trainable, input: &Input, target: &Target) -> f64 {
let output = trainable.eval(&input);
self.loss.eval(target, &output).to_f64().unwrap()
}
}
impl<Loss, Target> NeuraOptimizerBase for (&NeuraBackprop<Loss>, &Target) {
type Output<NetworkInput, NetworkGradient> = (NetworkInput, NetworkGradient); // epsilon, gradient
}
impl<LayerOutput, Target, Loss: NeuraLoss<LayerOutput, Target = Target>>
NeuraOptimizerFinal<LayerOutput> for (&NeuraBackprop<Loss>, &Target)
{
fn eval_final(&self, output: LayerOutput) -> Self::Output<LayerOutput, ()> {
(self.0.loss.nabla(self.1, &output), ())
}
}
impl<LayerOutput, Target, Loss> NeuraOptimizerTransient<LayerOutput>
for (&NeuraBackprop<Loss>, &Target)
{
fn eval_layer<
Input,
NetworkGradient,
RecGradient,
Layer: NeuraTrainableLayer<Input, Output = LayerOutput>,
>(
&self,
layer: &Layer,
input: &Input,
rec_opt_output: Self::Output<LayerOutput, RecGradient>,
combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient,
) -> Self::Output<Input, NetworkGradient> {
let (epsilon_in, rec_gradient) = rec_opt_output;
let (epsilon_out, layer_gradient) = layer.backprop_layer(input, epsilon_in);
(epsilon_out, combine_gradients(layer_gradient, rec_gradient))
}
}

@ -1,52 +1,6 @@
use num::ToPrimitive; use crate::{
algebra::NeuraVectorSpace, network::NeuraTrainableNetworkBase, optimize::NeuraOptimizer,
use crate::{algebra::NeuraVectorSpace, derivable::NeuraLoss, network::NeuraTrainableNetwork}; };
pub trait NeuraGradientSolver<Input, Target, Trainable: NeuraTrainableNetwork<Input>> {
fn get_gradient(
&self,
trainable: &Trainable,
input: &Input,
target: &Target,
) -> Trainable::Gradient;
fn score(&self, trainable: &Trainable, input: &Input, target: &Target) -> f64;
}
#[non_exhaustive]
pub struct NeuraBackprop<Loss> {
loss: Loss,
}
impl<Loss> NeuraBackprop<Loss> {
pub fn new(loss: Loss) -> Self {
Self { loss }
}
}
impl<
Input,
Target,
Trainable: NeuraTrainableNetwork<Input>,
Loss: NeuraLoss<Trainable::Output, Target = Target> + Clone,
> NeuraGradientSolver<Input, Target, Trainable> for NeuraBackprop<Loss>
where
<Loss as NeuraLoss<Trainable::Output>>::Output: ToPrimitive,
{
fn get_gradient(
&self,
trainable: &Trainable,
input: &Input,
target: &Target,
) -> Trainable::Gradient {
trainable.backpropagate(input, target, self.loss.clone()).1
}
fn score(&self, trainable: &Trainable, input: &Input, target: &Target) -> f64 {
let output = trainable.eval(&input);
self.loss.eval(target, &output).to_f64().unwrap()
}
}
#[non_exhaustive] #[non_exhaustive]
pub struct NeuraBatchedTrainer { pub struct NeuraBatchedTrainer {
@ -118,8 +72,8 @@ impl NeuraBatchedTrainer {
pub fn train< pub fn train<
Input: Clone, Input: Clone,
Target: Clone, Target: Clone,
Network: NeuraTrainableNetwork<Input>, Network: NeuraTrainableNetworkBase<Input>,
GradientSolver: NeuraGradientSolver<Input, Target, Network>, GradientSolver: NeuraOptimizer<Input, Target, Network>,
Inputs: IntoIterator<Item = (Input, Target)>, Inputs: IntoIterator<Item = (Input, Target)>,
>( >(
&self, &self,
@ -185,10 +139,11 @@ mod test {
use super::*; use super::*;
use crate::{ use crate::{
assert_approx, assert_approx,
derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0}, derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0, NeuraLoss},
layer::{dense::NeuraDenseLayer, NeuraLayer}, layer::{dense::NeuraDenseLayer, NeuraLayer},
network::sequential::{NeuraSequential, NeuraSequentialTail}, network::sequential::{NeuraSequential, NeuraSequentialTail},
neura_sequential, neura_sequential,
optimize::NeuraBackprop,
}; };
#[test] #[test]

Loading…
Cancel
Save