🎨 Generic way of computing backpropagation and other gradient solvers

main
Shad Amethyst 2 years ago
parent cb862f12cc
commit 81de6ddbcd

@ -5,6 +5,7 @@ pub mod algebra;
pub mod derivable;
pub mod layer;
pub mod network;
pub mod optimize;
pub mod train;
mod utils;
@ -21,5 +22,6 @@ pub mod prelude {
pub use crate::network::sequential::{
NeuraSequential, NeuraSequentialConstruct, NeuraSequentialTail,
};
pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer};
pub use crate::optimize::NeuraBackprop;
pub use crate::train::NeuraBatchedTrainer;
}

@ -1,25 +1,29 @@
use crate::{algebra::NeuraVectorSpace, derivable::NeuraLoss, layer::NeuraLayer};
use crate::{algebra::NeuraVectorSpace, layer::NeuraLayer, optimize::NeuraOptimizerBase};
pub mod sequential;
pub trait NeuraTrainableNetwork<Input>: NeuraLayer<Input> {
pub trait NeuraTrainableNetworkBase<Input>: NeuraLayer<Input> {
type Gradient: NeuraVectorSpace;
type LayerOutput;
fn default_gradient(&self) -> Self::Gradient;
fn apply_gradient(&mut self, gradient: &Self::Gradient);
/// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information.
fn backpropagate<Loss: NeuraLoss<Self::Output>>(
&self,
input: &Input,
target: &Loss::Target,
loss: Loss,
) -> (Input, Self::Gradient);
/// Should return the regularization gradient
fn regularize(&self) -> Self::Gradient;
/// Called before an iteration begins, to allow the network to set itself up for training or not.
fn prepare(&mut self, train_iteration: bool);
}
pub trait NeuraTrainableNetwork<Input, Optimizer>: NeuraTrainableNetworkBase<Input>
where
Optimizer: NeuraOptimizerBase,
{
fn traverse(
&self,
input: &Input,
optimizer: &Optimizer,
) -> Optimizer::Output<Input, Self::Gradient>;
}

@ -1,7 +1,7 @@
use super::NeuraTrainableNetwork;
use super::{NeuraTrainableNetwork, NeuraTrainableNetworkBase};
use crate::{
derivable::NeuraLoss,
layer::{NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayer},
optimize::{NeuraOptimizerFinal, NeuraOptimizerTransient},
};
mod construct;
@ -129,10 +129,11 @@ impl<
impl<
Input,
Layer: NeuraTrainableLayer<Input>,
ChildNetwork: NeuraTrainableNetwork<Layer::Output>,
> NeuraTrainableNetwork<Input> for NeuraSequential<Layer, ChildNetwork>
ChildNetwork: NeuraTrainableNetworkBase<Layer::Output>,
> NeuraTrainableNetworkBase<Input> for NeuraSequential<Layer, ChildNetwork>
{
type Gradient = (Layer::Gradient, Box<ChildNetwork::Gradient>);
type LayerOutput = Layer::Output;
fn default_gradient(&self) -> Self::Gradient {
(
@ -146,25 +147,6 @@ impl<
self.child_network.apply_gradient(&gradient.1);
}
fn backpropagate<Loss: NeuraLoss<Self::Output>>(
&self,
input: &Input,
target: &Loss::Target,
loss: Loss,
) -> (Input, Self::Gradient) {
let next_activation = self.layer.eval(input);
let (backprop_gradient, weights_gradient) =
self.child_network
.backpropagate(&next_activation, target, loss);
let (backprop_gradient, layer_gradient) =
self.layer.backprop_layer(input, backprop_gradient);
(
backprop_gradient,
(layer_gradient, Box::new(weights_gradient)),
)
}
fn regularize(&self) -> Self::Gradient {
(
self.layer.regularize_layer(),
@ -179,8 +161,9 @@ impl<
}
/// A dummy implementation of `NeuraTrainableNetwork`, which simply calls `loss.eval` in `backpropagate`.
impl<Input: Clone> NeuraTrainableNetwork<Input> for () {
impl<Input: Clone> NeuraTrainableNetworkBase<Input> for () {
type Gradient = ();
type LayerOutput = Input;
#[inline(always)]
fn default_gradient(&self) -> () {
@ -192,18 +175,6 @@ impl<Input: Clone> NeuraTrainableNetwork<Input> for () {
// Noop
}
#[inline(always)]
fn backpropagate<Loss: NeuraLoss<Self::Output>>(
&self,
final_activation: &Input,
target: &Loss::Target,
loss: Loss,
) -> (Input, Self::Gradient) {
let backprop_epsilon = loss.nabla(target, &final_activation);
(backprop_epsilon, ())
}
#[inline(always)]
fn regularize(&self) -> () {
()
@ -215,6 +186,44 @@ impl<Input: Clone> NeuraTrainableNetwork<Input> for () {
}
}
impl<
Input,
Layer: NeuraTrainableLayer<Input>,
Optimizer: NeuraOptimizerTransient<Layer::Output>,
ChildNetwork: NeuraTrainableNetworkBase<Layer::Output>,
> NeuraTrainableNetwork<Input, Optimizer> for NeuraSequential<Layer, ChildNetwork>
where
ChildNetwork: NeuraTrainableNetwork<Layer::Output, Optimizer>,
{
fn traverse(
&self,
input: &Input,
optimizer: &Optimizer,
) -> Optimizer::Output<Input, Self::Gradient> {
let next_activation = self.layer.eval(input);
let child_result = self.child_network.traverse(&next_activation, optimizer);
optimizer.eval_layer(
&self.layer,
input,
child_result,
|layer_gradient, child_gradient| (layer_gradient, Box::new(child_gradient)),
)
}
}
impl<Input: Clone, Optimizer: NeuraOptimizerFinal<Input>> NeuraTrainableNetwork<Input, Optimizer>
for ()
{
fn traverse(
&self,
input: &Input,
optimizer: &Optimizer,
) -> Optimizer::Output<Input, Self::Gradient> {
optimizer.eval_final(input.clone())
}
}
impl<Layer> From<Layer> for NeuraSequential<Layer, ()> {
fn from(layer: Layer) -> Self {
Self {

@ -0,0 +1,112 @@
use num::ToPrimitive;
use crate::{
derivable::NeuraLoss,
layer::NeuraTrainableLayer,
network::{NeuraTrainableNetwork, NeuraTrainableNetworkBase},
};
pub trait NeuraOptimizerBase {
type Output<NetworkInput, NetworkGradient>;
}
pub trait NeuraOptimizerFinal<LayerOutput>: NeuraOptimizerBase {
fn eval_final(&self, output: LayerOutput) -> Self::Output<LayerOutput, ()>;
}
pub trait NeuraOptimizerTransient<LayerOutput>: NeuraOptimizerBase {
fn eval_layer<
Input,
NetworkGradient,
RecGradient,
Layer: NeuraTrainableLayer<Input, Output = LayerOutput>,
>(
&self,
layer: &Layer,
input: &Input,
rec_opt_output: Self::Output<LayerOutput, RecGradient>,
combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient,
) -> Self::Output<Input, NetworkGradient>;
}
pub trait NeuraOptimizer<Input, Target, Trainable: NeuraTrainableNetworkBase<Input>> {
fn get_gradient(
&self,
trainable: &Trainable,
input: &Input,
target: &Target,
) -> Trainable::Gradient;
fn score(&self, trainable: &Trainable, input: &Input, target: &Target) -> f64;
}
pub struct NeuraBackprop<Loss> {
loss: Loss,
}
impl<Loss> NeuraBackprop<Loss> {
pub fn new(loss: Loss) -> Self {
Self { loss }
}
}
impl<
Input,
Target,
Trainable: NeuraTrainableNetworkBase<Input>,
Loss: NeuraLoss<Trainable::Output, Target = Target> + Clone,
> NeuraOptimizer<Input, Target, Trainable> for NeuraBackprop<Loss>
where
<Loss as NeuraLoss<Trainable::Output>>::Output: ToPrimitive,
Trainable: for<'a> NeuraTrainableNetwork<Input, (&'a NeuraBackprop<Loss>, &'a Target)>,
{
fn get_gradient(
&self,
trainable: &Trainable,
input: &Input,
target: &Target,
) -> Trainable::Gradient {
let (_, gradient) = trainable.traverse(input, &(self, target));
gradient
}
fn score(&self, trainable: &Trainable, input: &Input, target: &Target) -> f64 {
let output = trainable.eval(&input);
self.loss.eval(target, &output).to_f64().unwrap()
}
}
impl<Loss, Target> NeuraOptimizerBase for (&NeuraBackprop<Loss>, &Target) {
type Output<NetworkInput, NetworkGradient> = (NetworkInput, NetworkGradient); // epsilon, gradient
}
impl<LayerOutput, Target, Loss: NeuraLoss<LayerOutput, Target = Target>>
NeuraOptimizerFinal<LayerOutput> for (&NeuraBackprop<Loss>, &Target)
{
fn eval_final(&self, output: LayerOutput) -> Self::Output<LayerOutput, ()> {
(self.0.loss.nabla(self.1, &output), ())
}
}
impl<LayerOutput, Target, Loss> NeuraOptimizerTransient<LayerOutput>
for (&NeuraBackprop<Loss>, &Target)
{
fn eval_layer<
Input,
NetworkGradient,
RecGradient,
Layer: NeuraTrainableLayer<Input, Output = LayerOutput>,
>(
&self,
layer: &Layer,
input: &Input,
rec_opt_output: Self::Output<LayerOutput, RecGradient>,
combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient,
) -> Self::Output<Input, NetworkGradient> {
let (epsilon_in, rec_gradient) = rec_opt_output;
let (epsilon_out, layer_gradient) = layer.backprop_layer(input, epsilon_in);
(epsilon_out, combine_gradients(layer_gradient, rec_gradient))
}
}

@ -1,52 +1,6 @@
use num::ToPrimitive;
use crate::{algebra::NeuraVectorSpace, derivable::NeuraLoss, network::NeuraTrainableNetwork};
pub trait NeuraGradientSolver<Input, Target, Trainable: NeuraTrainableNetwork<Input>> {
fn get_gradient(
&self,
trainable: &Trainable,
input: &Input,
target: &Target,
) -> Trainable::Gradient;
fn score(&self, trainable: &Trainable, input: &Input, target: &Target) -> f64;
}
#[non_exhaustive]
pub struct NeuraBackprop<Loss> {
loss: Loss,
}
impl<Loss> NeuraBackprop<Loss> {
pub fn new(loss: Loss) -> Self {
Self { loss }
}
}
impl<
Input,
Target,
Trainable: NeuraTrainableNetwork<Input>,
Loss: NeuraLoss<Trainable::Output, Target = Target> + Clone,
> NeuraGradientSolver<Input, Target, Trainable> for NeuraBackprop<Loss>
where
<Loss as NeuraLoss<Trainable::Output>>::Output: ToPrimitive,
{
fn get_gradient(
&self,
trainable: &Trainable,
input: &Input,
target: &Target,
) -> Trainable::Gradient {
trainable.backpropagate(input, target, self.loss.clone()).1
}
fn score(&self, trainable: &Trainable, input: &Input, target: &Target) -> f64 {
let output = trainable.eval(&input);
self.loss.eval(target, &output).to_f64().unwrap()
}
}
use crate::{
algebra::NeuraVectorSpace, network::NeuraTrainableNetworkBase, optimize::NeuraOptimizer,
};
#[non_exhaustive]
pub struct NeuraBatchedTrainer {
@ -118,8 +72,8 @@ impl NeuraBatchedTrainer {
pub fn train<
Input: Clone,
Target: Clone,
Network: NeuraTrainableNetwork<Input>,
GradientSolver: NeuraGradientSolver<Input, Target, Network>,
Network: NeuraTrainableNetworkBase<Input>,
GradientSolver: NeuraOptimizer<Input, Target, Network>,
Inputs: IntoIterator<Item = (Input, Target)>,
>(
&self,
@ -185,10 +139,11 @@ mod test {
use super::*;
use crate::{
assert_approx,
derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0},
derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0, NeuraLoss},
layer::{dense::NeuraDenseLayer, NeuraLayer},
network::sequential::{NeuraSequential, NeuraSequentialTail},
neura_sequential,
optimize::NeuraBackprop,
};
#[test]

Loading…
Cancel
Save