It was becoming almost impossible to manage the dimensions of the layers, especially with convolution layers. Generic consts are nice, but they are a bit too early to have right now for this use-case. We'll probably be expanding the implementations to accept const or dynamically-sized layers at some point, for performance-critical applications.main
parent
cc7686569a
commit
2edbff860c
@ -0,0 +1,180 @@
|
|||||||
|
use super::{NeuraLayer, NeuraTrainableLayer};
|
||||||
|
use crate::{
|
||||||
|
algebra::{NeuraMatrix, NeuraVector, NeuraVectorSpace},
|
||||||
|
derivable::NeuraDerivable,
|
||||||
|
};
|
||||||
|
|
||||||
|
use rand::Rng;
|
||||||
|
use rand_distr::Distribution;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct NeuraDenseLayer<
|
||||||
|
Act: NeuraDerivable<f64>,
|
||||||
|
Reg: NeuraDerivable<f64>,
|
||||||
|
const INPUT_LEN: usize,
|
||||||
|
const OUTPUT_LEN: usize,
|
||||||
|
> {
|
||||||
|
weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
|
||||||
|
bias: NeuraVector<OUTPUT_LEN, f64>,
|
||||||
|
activation: Act,
|
||||||
|
regularization: Reg,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<
|
||||||
|
Act: NeuraDerivable<f64>,
|
||||||
|
Reg: NeuraDerivable<f64>,
|
||||||
|
const INPUT_LEN: usize,
|
||||||
|
const OUTPUT_LEN: usize,
|
||||||
|
> NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
|
||||||
|
{
|
||||||
|
pub fn new(
|
||||||
|
weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
|
||||||
|
bias: NeuraVector<OUTPUT_LEN, f64>,
|
||||||
|
activation: Act,
|
||||||
|
regularization: Reg,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
weights,
|
||||||
|
bias,
|
||||||
|
activation,
|
||||||
|
regularization,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self {
|
||||||
|
let mut weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64> = NeuraMatrix::from_value(0.0f64);
|
||||||
|
|
||||||
|
// Use Xavier (or He) initialisation, using the harmonic mean
|
||||||
|
// Ref: https://www.deeplearning.ai/ai-notes/initialization/index.html
|
||||||
|
let distribution = rand_distr::Normal::new(
|
||||||
|
0.0,
|
||||||
|
activation.variance_hint() * 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
// let distribution = rand_distr::Uniform::new(-0.5, 0.5);
|
||||||
|
|
||||||
|
for i in 0..OUTPUT_LEN {
|
||||||
|
for j in 0..INPUT_LEN {
|
||||||
|
weights[i][j] = distribution.sample(rng);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Self {
|
||||||
|
weights,
|
||||||
|
// Biases are initialized based on the activation's hint
|
||||||
|
bias: NeuraVector::from_value(activation.bias_hint()),
|
||||||
|
activation,
|
||||||
|
regularization,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<
|
||||||
|
Act: NeuraDerivable<f64>,
|
||||||
|
Reg: NeuraDerivable<f64>,
|
||||||
|
const INPUT_LEN: usize,
|
||||||
|
const OUTPUT_LEN: usize,
|
||||||
|
> NeuraLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
|
||||||
|
{
|
||||||
|
type Input = NeuraVector<INPUT_LEN, f64>;
|
||||||
|
|
||||||
|
type Output = NeuraVector<OUTPUT_LEN, f64>;
|
||||||
|
|
||||||
|
fn eval(&self, input: &Self::Input) -> Self::Output {
|
||||||
|
let mut result = self.weights.multiply_vector(input);
|
||||||
|
|
||||||
|
for i in 0..OUTPUT_LEN {
|
||||||
|
result[i] = self.activation.eval(result[i] + self.bias[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<
|
||||||
|
Act: NeuraDerivable<f64>,
|
||||||
|
Reg: NeuraDerivable<f64>,
|
||||||
|
const INPUT_LEN: usize,
|
||||||
|
const OUTPUT_LEN: usize,
|
||||||
|
> NeuraTrainableLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
|
||||||
|
{
|
||||||
|
type Delta = (
|
||||||
|
NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
|
||||||
|
NeuraVector<OUTPUT_LEN, f64>,
|
||||||
|
);
|
||||||
|
|
||||||
|
fn backpropagate(
|
||||||
|
&self,
|
||||||
|
input: &Self::Input,
|
||||||
|
epsilon: Self::Output,
|
||||||
|
) -> (Self::Input, Self::Delta) {
|
||||||
|
let evaluated = self.weights.multiply_vector(input);
|
||||||
|
// Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron),
|
||||||
|
// with `self.activation'(input) ° epsilon = delta`
|
||||||
|
let mut delta: NeuraVector<OUTPUT_LEN, f64> = epsilon.clone();
|
||||||
|
for i in 0..OUTPUT_LEN {
|
||||||
|
delta[i] *= self.activation.derivate(evaluated[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute the weight gradient
|
||||||
|
let weights_gradient = delta.reverse_dot(input);
|
||||||
|
|
||||||
|
let new_epsilon = self.weights.transpose_multiply_vector(&delta);
|
||||||
|
|
||||||
|
// According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
|
||||||
|
// The gradient of the bias is equal to the delta term of the backpropagation algorithm
|
||||||
|
let bias_gradient = delta;
|
||||||
|
|
||||||
|
(new_epsilon, (weights_gradient, bias_gradient))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_gradient(&mut self, gradient: &Self::Delta) {
|
||||||
|
NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0);
|
||||||
|
NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn regularize(&self) -> Self::Delta {
|
||||||
|
let mut res = Self::Delta::default();
|
||||||
|
|
||||||
|
for i in 0..OUTPUT_LEN {
|
||||||
|
for j in 0..INPUT_LEN {
|
||||||
|
res.0[i][j] = self.regularization.derivate(self.weights[i][j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: biases aren't taken into account here, as per https://stats.stackexchange.com/questions/153605/no-regularisation-term-for-bias-unit-in-neural-network
|
||||||
|
|
||||||
|
res
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
use crate::{
|
||||||
|
derivable::{activation::Relu, regularize::NeuraL0},
|
||||||
|
utils::uniform_vector,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_from_rng() {
|
||||||
|
let mut rng = rand::thread_rng();
|
||||||
|
let layer: NeuraDenseLayer<_, _, 64, 32> =
|
||||||
|
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0);
|
||||||
|
let mut input = [0.0; 64];
|
||||||
|
for x in 0..64 {
|
||||||
|
input[x] = rng.gen();
|
||||||
|
}
|
||||||
|
assert!(layer.eval(&input.into()).len() == 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_stack_overflow_big_layer() {
|
||||||
|
let layer = NeuraDenseLayer::from_rng(&mut rand::thread_rng(), Relu, NeuraL0)
|
||||||
|
as NeuraDenseLayer<Relu, NeuraL0, 1000, 1000>;
|
||||||
|
|
||||||
|
layer.backpropagate(&uniform_vector(), uniform_vector());
|
||||||
|
|
||||||
|
<NeuraDenseLayer<Relu, NeuraL0, 1000, 1000> as NeuraTrainableLayer>::Delta::zero();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,170 @@
|
|||||||
|
mod dense;
|
||||||
|
pub use dense::NeuraDenseLayer;
|
||||||
|
|
||||||
|
mod convolution;
|
||||||
|
pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DBlockLayer, NeuraConv2DPadLayer};
|
||||||
|
|
||||||
|
mod dropout;
|
||||||
|
pub use dropout::NeuraDropoutLayer;
|
||||||
|
|
||||||
|
mod softmax;
|
||||||
|
pub use softmax::NeuraSoftmaxLayer;
|
||||||
|
|
||||||
|
mod one_hot;
|
||||||
|
pub use one_hot::NeuraOneHotLayer;
|
||||||
|
|
||||||
|
mod lock;
|
||||||
|
pub use lock::NeuraLockLayer;
|
||||||
|
|
||||||
|
mod pool;
|
||||||
|
pub use pool::{NeuraGlobalPoolLayer, NeuraPool1DLayer};
|
||||||
|
|
||||||
|
mod reshape;
|
||||||
|
pub use reshape::{NeuraFlattenLayer, NeuraReshapeLayer};
|
||||||
|
|
||||||
|
use crate::algebra::NeuraVectorSpace;
|
||||||
|
|
||||||
|
pub trait NeuraLayer {
|
||||||
|
type Input;
|
||||||
|
type Output;
|
||||||
|
|
||||||
|
fn eval(&self, input: &Self::Input) -> Self::Output;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait NeuraTrainableLayer: NeuraLayer {
|
||||||
|
/// The representation of the layer gradient as a vector space
|
||||||
|
type Delta: NeuraVectorSpace;
|
||||||
|
|
||||||
|
/// Computes the backpropagation term and the derivative of the internal weights,
|
||||||
|
/// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
|
||||||
|
///
|
||||||
|
/// Note: we introduce the term `epsilon`, which together with the activation of the current function can be used to compute `delta_l`:
|
||||||
|
/// ```no_rust
|
||||||
|
/// f_l'(a_l) * epsilon_l = delta_l
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
|
||||||
|
/// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`.
|
||||||
|
/// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers.
|
||||||
|
fn backpropagate(
|
||||||
|
&self,
|
||||||
|
input: &Self::Input,
|
||||||
|
epsilon: Self::Output,
|
||||||
|
) -> (Self::Input, Self::Delta);
|
||||||
|
|
||||||
|
/// Computes the regularization
|
||||||
|
fn regularize(&self) -> Self::Delta;
|
||||||
|
|
||||||
|
/// Applies `δW_l` to the weights of the layer
|
||||||
|
fn apply_gradient(&mut self, gradient: &Self::Delta);
|
||||||
|
|
||||||
|
/// Called before an iteration begins, to allow the layer to set itself up for training.
|
||||||
|
#[inline(always)]
|
||||||
|
fn prepare_epoch(&mut self) {}
|
||||||
|
|
||||||
|
/// Called at the end of training, to allow the layer to clean itself up
|
||||||
|
#[inline(always)]
|
||||||
|
fn cleanup(&mut self) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[macro_export]
|
||||||
|
macro_rules! neura_layer {
|
||||||
|
( "dense", $( $shape:expr ),*; $activation:expr ) => {
|
||||||
|
$crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0)
|
||||||
|
as neura_layer!("_dense_shape", $($shape),*)
|
||||||
|
};
|
||||||
|
|
||||||
|
( "dense", $( $shape:expr ),*; $activation:expr, $regularization:expr ) => {
|
||||||
|
$crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $regularization)
|
||||||
|
as neura_layer!("_dense_shape", $($shape),*)
|
||||||
|
};
|
||||||
|
|
||||||
|
( "_dense_shape", $output:expr ) => {
|
||||||
|
$crate::layer::NeuraDenseLayer<_, _, _, $output>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "_dense_shape", $input:expr, $output:expr ) => {
|
||||||
|
$crate::layer::NeuraDenseLayer<_, _, $input, $output>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "dropout", $probability:expr ) => {
|
||||||
|
$crate::layer::NeuraDropoutLayer::new($probability, rand::thread_rng())
|
||||||
|
as $crate::layer::NeuraDropoutLayer<_, _>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "softmax" ) => {
|
||||||
|
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<_>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "softmax", $length:expr ) => {
|
||||||
|
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<$length>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "one_hot" ) => {
|
||||||
|
$crate::layer::NeuraOneHotLayer as $crate::layer::NeuraOneHotLayer<2, _>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "lock", $layer:expr ) => {
|
||||||
|
$crate::layer::NeuraLockLayer($layer)
|
||||||
|
};
|
||||||
|
|
||||||
|
( "conv1d_pad", $length:expr, $feats:expr; $window:expr; $layer:expr ) => {
|
||||||
|
$crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<$length, $feats, $window, _>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "conv1d_pad"; $window:expr; $layer:expr ) => {
|
||||||
|
$crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<_, _, $window, _>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "conv2d_pad", $feats:expr, $length:expr; $width:expr, $window:expr; $layer:expr ) => {
|
||||||
|
$crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<$length, $feats, $window, _>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "conv2d_pad"; $width:expr, $window:expr; $layer:expr ) => {
|
||||||
|
$crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "conv2d_block", $feats:expr, $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
|
||||||
|
$crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, $feats, $block_size, _>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "conv2d_block", $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
|
||||||
|
$crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, _, $block_size, _>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "pool_global"; $reduce:expr ) => {
|
||||||
|
$crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "pool_global", $feats:expr, $length:expr; $reduce:expr ) => {
|
||||||
|
$crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<$length, $feats, _>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "pool1d", $blocklength:expr; $reduce:expr ) => {
|
||||||
|
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<_, $blocklength, _, _>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "pool1d", $blocks:expr, $blocklength:expr; $reduce:expr ) => {
|
||||||
|
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, _, _>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "pool1d", $feats:expr, $blocks:expr, $blocklength:expr; $reduce:expr ) => {
|
||||||
|
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, $feats, _>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "unstable_flatten" ) => {
|
||||||
|
$crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<_, _, f64>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "unstable_flatten", $width:expr, $height:expr ) => {
|
||||||
|
$crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<$width, $height, f64>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "unstable_reshape", $height:expr ) => {
|
||||||
|
$crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<_, $height, f64>
|
||||||
|
};
|
||||||
|
|
||||||
|
( "unstable_reshape", $width:expr, $height:expr ) => {
|
||||||
|
$crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<$width, $height, f64>
|
||||||
|
};
|
||||||
|
}
|
Loading…
Reference in new issue