It was becoming almost impossible to manage the dimensions of the layers, especially with convolution layers. Generic consts are nice, but they are a bit too early to have right now for this use-case. We'll probably be expanding the implementations to accept const or dynamically-sized layers at some point, for performance-critical applications.main
parent
cc7686569a
commit
2edbff860c
@ -0,0 +1,180 @@
|
||||
use super::{NeuraLayer, NeuraTrainableLayer};
|
||||
use crate::{
|
||||
algebra::{NeuraMatrix, NeuraVector, NeuraVectorSpace},
|
||||
derivable::NeuraDerivable,
|
||||
};
|
||||
|
||||
use rand::Rng;
|
||||
use rand_distr::Distribution;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct NeuraDenseLayer<
|
||||
Act: NeuraDerivable<f64>,
|
||||
Reg: NeuraDerivable<f64>,
|
||||
const INPUT_LEN: usize,
|
||||
const OUTPUT_LEN: usize,
|
||||
> {
|
||||
weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
|
||||
bias: NeuraVector<OUTPUT_LEN, f64>,
|
||||
activation: Act,
|
||||
regularization: Reg,
|
||||
}
|
||||
|
||||
impl<
|
||||
Act: NeuraDerivable<f64>,
|
||||
Reg: NeuraDerivable<f64>,
|
||||
const INPUT_LEN: usize,
|
||||
const OUTPUT_LEN: usize,
|
||||
> NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
|
||||
{
|
||||
pub fn new(
|
||||
weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
|
||||
bias: NeuraVector<OUTPUT_LEN, f64>,
|
||||
activation: Act,
|
||||
regularization: Reg,
|
||||
) -> Self {
|
||||
Self {
|
||||
weights,
|
||||
bias,
|
||||
activation,
|
||||
regularization,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self {
|
||||
let mut weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64> = NeuraMatrix::from_value(0.0f64);
|
||||
|
||||
// Use Xavier (or He) initialisation, using the harmonic mean
|
||||
// Ref: https://www.deeplearning.ai/ai-notes/initialization/index.html
|
||||
let distribution = rand_distr::Normal::new(
|
||||
0.0,
|
||||
activation.variance_hint() * 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64),
|
||||
)
|
||||
.unwrap();
|
||||
// let distribution = rand_distr::Uniform::new(-0.5, 0.5);
|
||||
|
||||
for i in 0..OUTPUT_LEN {
|
||||
for j in 0..INPUT_LEN {
|
||||
weights[i][j] = distribution.sample(rng);
|
||||
}
|
||||
}
|
||||
|
||||
Self {
|
||||
weights,
|
||||
// Biases are initialized based on the activation's hint
|
||||
bias: NeuraVector::from_value(activation.bias_hint()),
|
||||
activation,
|
||||
regularization,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<
|
||||
Act: NeuraDerivable<f64>,
|
||||
Reg: NeuraDerivable<f64>,
|
||||
const INPUT_LEN: usize,
|
||||
const OUTPUT_LEN: usize,
|
||||
> NeuraLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
|
||||
{
|
||||
type Input = NeuraVector<INPUT_LEN, f64>;
|
||||
|
||||
type Output = NeuraVector<OUTPUT_LEN, f64>;
|
||||
|
||||
fn eval(&self, input: &Self::Input) -> Self::Output {
|
||||
let mut result = self.weights.multiply_vector(input);
|
||||
|
||||
for i in 0..OUTPUT_LEN {
|
||||
result[i] = self.activation.eval(result[i] + self.bias[i]);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl<
|
||||
Act: NeuraDerivable<f64>,
|
||||
Reg: NeuraDerivable<f64>,
|
||||
const INPUT_LEN: usize,
|
||||
const OUTPUT_LEN: usize,
|
||||
> NeuraTrainableLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
|
||||
{
|
||||
type Delta = (
|
||||
NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
|
||||
NeuraVector<OUTPUT_LEN, f64>,
|
||||
);
|
||||
|
||||
fn backpropagate(
|
||||
&self,
|
||||
input: &Self::Input,
|
||||
epsilon: Self::Output,
|
||||
) -> (Self::Input, Self::Delta) {
|
||||
let evaluated = self.weights.multiply_vector(input);
|
||||
// Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron),
|
||||
// with `self.activation'(input) ° epsilon = delta`
|
||||
let mut delta: NeuraVector<OUTPUT_LEN, f64> = epsilon.clone();
|
||||
for i in 0..OUTPUT_LEN {
|
||||
delta[i] *= self.activation.derivate(evaluated[i]);
|
||||
}
|
||||
|
||||
// Compute the weight gradient
|
||||
let weights_gradient = delta.reverse_dot(input);
|
||||
|
||||
let new_epsilon = self.weights.transpose_multiply_vector(&delta);
|
||||
|
||||
// According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
|
||||
// The gradient of the bias is equal to the delta term of the backpropagation algorithm
|
||||
let bias_gradient = delta;
|
||||
|
||||
(new_epsilon, (weights_gradient, bias_gradient))
|
||||
}
|
||||
|
||||
fn apply_gradient(&mut self, gradient: &Self::Delta) {
|
||||
NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0);
|
||||
NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1);
|
||||
}
|
||||
|
||||
fn regularize(&self) -> Self::Delta {
|
||||
let mut res = Self::Delta::default();
|
||||
|
||||
for i in 0..OUTPUT_LEN {
|
||||
for j in 0..INPUT_LEN {
|
||||
res.0[i][j] = self.regularization.derivate(self.weights[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
// Note: biases aren't taken into account here, as per https://stats.stackexchange.com/questions/153605/no-regularisation-term-for-bias-unit-in-neural-network
|
||||
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::{
|
||||
derivable::{activation::Relu, regularize::NeuraL0},
|
||||
utils::uniform_vector,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_from_rng() {
|
||||
let mut rng = rand::thread_rng();
|
||||
let layer: NeuraDenseLayer<_, _, 64, 32> =
|
||||
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0);
|
||||
let mut input = [0.0; 64];
|
||||
for x in 0..64 {
|
||||
input[x] = rng.gen();
|
||||
}
|
||||
assert!(layer.eval(&input.into()).len() == 32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stack_overflow_big_layer() {
|
||||
let layer = NeuraDenseLayer::from_rng(&mut rand::thread_rng(), Relu, NeuraL0)
|
||||
as NeuraDenseLayer<Relu, NeuraL0, 1000, 1000>;
|
||||
|
||||
layer.backpropagate(&uniform_vector(), uniform_vector());
|
||||
|
||||
<NeuraDenseLayer<Relu, NeuraL0, 1000, 1000> as NeuraTrainableLayer>::Delta::zero();
|
||||
}
|
||||
}
|
@ -0,0 +1,170 @@
|
||||
mod dense;
|
||||
pub use dense::NeuraDenseLayer;
|
||||
|
||||
mod convolution;
|
||||
pub use convolution::{NeuraConv1DPadLayer, NeuraConv2DBlockLayer, NeuraConv2DPadLayer};
|
||||
|
||||
mod dropout;
|
||||
pub use dropout::NeuraDropoutLayer;
|
||||
|
||||
mod softmax;
|
||||
pub use softmax::NeuraSoftmaxLayer;
|
||||
|
||||
mod one_hot;
|
||||
pub use one_hot::NeuraOneHotLayer;
|
||||
|
||||
mod lock;
|
||||
pub use lock::NeuraLockLayer;
|
||||
|
||||
mod pool;
|
||||
pub use pool::{NeuraGlobalPoolLayer, NeuraPool1DLayer};
|
||||
|
||||
mod reshape;
|
||||
pub use reshape::{NeuraFlattenLayer, NeuraReshapeLayer};
|
||||
|
||||
use crate::algebra::NeuraVectorSpace;
|
||||
|
||||
pub trait NeuraLayer {
|
||||
type Input;
|
||||
type Output;
|
||||
|
||||
fn eval(&self, input: &Self::Input) -> Self::Output;
|
||||
}
|
||||
|
||||
pub trait NeuraTrainableLayer: NeuraLayer {
|
||||
/// The representation of the layer gradient as a vector space
|
||||
type Delta: NeuraVectorSpace;
|
||||
|
||||
/// Computes the backpropagation term and the derivative of the internal weights,
|
||||
/// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
|
||||
///
|
||||
/// Note: we introduce the term `epsilon`, which together with the activation of the current function can be used to compute `delta_l`:
|
||||
/// ```no_rust
|
||||
/// f_l'(a_l) * epsilon_l = delta_l
|
||||
/// ```
|
||||
///
|
||||
/// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
|
||||
/// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`.
|
||||
/// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers.
|
||||
fn backpropagate(
|
||||
&self,
|
||||
input: &Self::Input,
|
||||
epsilon: Self::Output,
|
||||
) -> (Self::Input, Self::Delta);
|
||||
|
||||
/// Computes the regularization
|
||||
fn regularize(&self) -> Self::Delta;
|
||||
|
||||
/// Applies `δW_l` to the weights of the layer
|
||||
fn apply_gradient(&mut self, gradient: &Self::Delta);
|
||||
|
||||
/// Called before an iteration begins, to allow the layer to set itself up for training.
|
||||
#[inline(always)]
|
||||
fn prepare_epoch(&mut self) {}
|
||||
|
||||
/// Called at the end of training, to allow the layer to clean itself up
|
||||
#[inline(always)]
|
||||
fn cleanup(&mut self) {}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! neura_layer {
|
||||
( "dense", $( $shape:expr ),*; $activation:expr ) => {
|
||||
$crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0)
|
||||
as neura_layer!("_dense_shape", $($shape),*)
|
||||
};
|
||||
|
||||
( "dense", $( $shape:expr ),*; $activation:expr, $regularization:expr ) => {
|
||||
$crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $regularization)
|
||||
as neura_layer!("_dense_shape", $($shape),*)
|
||||
};
|
||||
|
||||
( "_dense_shape", $output:expr ) => {
|
||||
$crate::layer::NeuraDenseLayer<_, _, _, $output>
|
||||
};
|
||||
|
||||
( "_dense_shape", $input:expr, $output:expr ) => {
|
||||
$crate::layer::NeuraDenseLayer<_, _, $input, $output>
|
||||
};
|
||||
|
||||
( "dropout", $probability:expr ) => {
|
||||
$crate::layer::NeuraDropoutLayer::new($probability, rand::thread_rng())
|
||||
as $crate::layer::NeuraDropoutLayer<_, _>
|
||||
};
|
||||
|
||||
( "softmax" ) => {
|
||||
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<_>
|
||||
};
|
||||
|
||||
( "softmax", $length:expr ) => {
|
||||
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<$length>
|
||||
};
|
||||
|
||||
( "one_hot" ) => {
|
||||
$crate::layer::NeuraOneHotLayer as $crate::layer::NeuraOneHotLayer<2, _>
|
||||
};
|
||||
|
||||
( "lock", $layer:expr ) => {
|
||||
$crate::layer::NeuraLockLayer($layer)
|
||||
};
|
||||
|
||||
( "conv1d_pad", $length:expr, $feats:expr; $window:expr; $layer:expr ) => {
|
||||
$crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<$length, $feats, $window, _>
|
||||
};
|
||||
|
||||
( "conv1d_pad"; $window:expr; $layer:expr ) => {
|
||||
$crate::layer::NeuraConv1DPadLayer::new($layer, Default::default()) as $crate::layer::NeuraConv1DPadLayer<_, _, $window, _>
|
||||
};
|
||||
|
||||
( "conv2d_pad", $feats:expr, $length:expr; $width:expr, $window:expr; $layer:expr ) => {
|
||||
$crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<$length, $feats, $window, _>
|
||||
};
|
||||
|
||||
( "conv2d_pad"; $width:expr, $window:expr; $layer:expr ) => {
|
||||
$crate::layer::NeuraConv2DPadLayer::new($layer, Default::default(), $width) as $crate::layer::NeuraConv2DPadLayer<_, _, $window, _>
|
||||
};
|
||||
|
||||
( "conv2d_block", $feats:expr, $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
|
||||
$crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, $feats, $block_size, _>
|
||||
};
|
||||
|
||||
( "conv2d_block", $width:expr, $height:expr; $block_size:expr; $layer:expr ) => {
|
||||
$crate::layer::NeuraConv2DBlockLayer::new($layer) as $crate::layer::NeuraConv2DBlockLayer<$width, $height, _, $block_size, _>
|
||||
};
|
||||
|
||||
( "pool_global"; $reduce:expr ) => {
|
||||
$crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<_, _, _>
|
||||
};
|
||||
|
||||
( "pool_global", $feats:expr, $length:expr; $reduce:expr ) => {
|
||||
$crate::layer::NeuraGlobalPoolLayer::new($reduce) as $crate::layer::NeuraGlobalPoolLayer<$length, $feats, _>
|
||||
};
|
||||
|
||||
( "pool1d", $blocklength:expr; $reduce:expr ) => {
|
||||
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<_, $blocklength, _, _>
|
||||
};
|
||||
|
||||
( "pool1d", $blocks:expr, $blocklength:expr; $reduce:expr ) => {
|
||||
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, _, _>
|
||||
};
|
||||
|
||||
( "pool1d", $feats:expr, $blocks:expr, $blocklength:expr; $reduce:expr ) => {
|
||||
$crate::layer::NeuraPool1DLayer::new($reduce) as $crate::layer::NeuraPool1DLayer<$blocks, $blocklength, $feats, _>
|
||||
};
|
||||
|
||||
( "unstable_flatten" ) => {
|
||||
$crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<_, _, f64>
|
||||
};
|
||||
|
||||
( "unstable_flatten", $width:expr, $height:expr ) => {
|
||||
$crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<$width, $height, f64>
|
||||
};
|
||||
|
||||
( "unstable_reshape", $height:expr ) => {
|
||||
$crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<_, $height, f64>
|
||||
};
|
||||
|
||||
( "unstable_reshape", $width:expr, $height:expr ) => {
|
||||
$crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<$width, $height, f64>
|
||||
};
|
||||
}
|
Loading…
Reference in new issue