Re-order arguments of neura_layer, implement softmax and normalization

main
Shad Amethyst 2 years ago
parent 220c61ff6b
commit bca56a5557

@ -2,22 +2,26 @@
use std::io::Write; use std::io::Write;
use neuramethyst::prelude::*; use neuramethyst::derivable::activation::Linear;
use neuramethyst::derivable::activation::{Relu, Tanh, LeakyRelu}; #[allow(unused_imports)]
use neuramethyst::derivable::activation::{LeakyRelu, Relu, Tanh};
use neuramethyst::derivable::loss::Euclidean; use neuramethyst::derivable::loss::Euclidean;
use neuramethyst::derivable::regularize::NeuraElastic;
use neuramethyst::prelude::*;
use rand::Rng; use rand::Rng;
fn main() { fn main() {
let mut network = neura_network![ let mut network = neura_network![
neura_layer!("dense", LeakyRelu(0.01), 9, 2), neura_layer!("dense", 2, 8; LeakyRelu(0.01)),
neura_layer!("dropout", 0.1), neura_layer!("dropout", 0.1),
neura_layer!("dense", LeakyRelu(0.01), 9), neura_layer!("dense", 8; LeakyRelu(0.01), NeuraElastic::new(0.0001, 0.002)),
neura_layer!("dropout", 0.3), neura_layer!("dropout", 0.3),
neura_layer!("dense", LeakyRelu(0.01), 6), neura_layer!("dense", 8; LeakyRelu(0.01), NeuraElastic::new(0.0001, 0.002)),
neura_layer!("dropout", 0.1), neura_layer!("dropout", 0.1),
neura_layer!("dense", LeakyRelu(0.01), 4), neura_layer!("dense", 4; LeakyRelu(0.1), NeuraElastic::new(0.0001, 0.002)),
neura_layer!("dense", LeakyRelu(0.1), 2) neura_layer!("dense", 2; Linear),
neura_layer!("softmax"),
]; ];
// println!("{:#?}", network); // println!("{:#?}", network);
@ -39,20 +43,23 @@ fn main() {
let test_inputs: Vec<_> = inputs.clone().take(100).collect(); let test_inputs: Vec<_> = inputs.clone().take(100).collect();
let mut trainer = NeuraBatchedTrainer::new(0.1, 4000); let mut trainer = NeuraBatchedTrainer::new(0.25, 1000);
trainer.log_epochs = 500; trainer.log_epochs = 50;
trainer.learning_momentum = 0.05;
trainer.batch_size = 2000;
trainer.train( trainer.train(
NeuraBackprop::new(Euclidean), NeuraBackprop::new(Euclidean),
&mut network, &mut network,
inputs, inputs,
&test_inputs &test_inputs,
); );
let mut file = std::fs::File::create("target/bivariate.csv").unwrap(); let mut file = std::fs::File::create("target/bivariate.csv").unwrap();
for (input, _target) in test_inputs { for (input, _target) in test_inputs {
let guess = argmax(&network.eval(&input)); let guess = argmax(&network.eval(&input));
writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap(); writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap();
// println!("{:?}", network.eval(&input));
} }
// println!("{:#?}", network); // println!("{:#?}", network);

@ -1,25 +1,30 @@
#![feature(generic_arg_infer)] #![feature(generic_arg_infer)]
use neuramethyst::prelude::*; use neuramethyst::derivable::activation::Relu;
use neuramethyst::derivable::activation::{Relu};
use neuramethyst::derivable::loss::Euclidean; use neuramethyst::derivable::loss::Euclidean;
use neuramethyst::prelude::*;
fn main() { fn main() {
let mut network = neura_network![ let mut network = neura_network![
neura_layer!("dense", Relu, 4, 2), neura_layer!("dense", 2, 4; Relu),
neura_layer!("dense", Relu, 3), neura_layer!("dense", 3; Relu),
neura_layer!("dense", Relu, 1) neura_layer!("dense", 1; Relu)
]; ];
let inputs = [ let inputs = [
([0.0, 0.0], [0.0]), ([0.0, 0.0], [0.0]),
([0.0, 1.0], [1.0]), ([0.0, 1.0], [1.0]),
([1.0, 0.0], [1.0]), ([1.0, 0.0], [1.0]),
([1.0, 1.0], [0.0]) ([1.0, 1.0], [0.0]),
]; ];
for (input, target) in inputs { for (input, target) in inputs {
println!("Input: {:?}, target: {}, actual: {:.3}", &input, target[0], network.eval(&input)[0]); println!(
"Input: {:?}, target: {}, actual: {:.3}",
&input,
target[0],
network.eval(&input)[0]
);
} }
let mut trainer = NeuraBatchedTrainer::new(0.05, 1000); let mut trainer = NeuraBatchedTrainer::new(0.05, 1000);
@ -35,6 +40,11 @@ fn main() {
); );
for (input, target) in inputs { for (input, target) in inputs {
println!("Input: {:?}, target: {}, actual: {:.3}", &input, target[0], network.eval(&input)[0]); println!(
"Input: {:?}, target: {}, actual: {:.3}",
&input,
target[0],
network.eval(&input)[0]
);
} }
} }

@ -36,10 +36,9 @@ impl NeuraDerivable<f32> for Relu {
} }
#[derive(Clone, Copy, Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]
pub struct LeakyRelu(pub f64); pub struct LeakyRelu<F>(pub F);
impl NeuraDerivable<f64> for LeakyRelu<f64> {
impl NeuraDerivable<f64> for LeakyRelu {
#[inline(always)] #[inline(always)]
fn eval(&self, input: f64) -> f64 { fn eval(&self, input: f64) -> f64 {
if input > 0.0 { if input > 0.0 {
@ -59,13 +58,13 @@ impl NeuraDerivable<f64> for LeakyRelu {
} }
} }
impl NeuraDerivable<f32> for LeakyRelu { impl NeuraDerivable<f32> for LeakyRelu<f32> {
#[inline(always)] #[inline(always)]
fn eval(&self, input: f32) -> f32 { fn eval(&self, input: f32) -> f32 {
if input > 0.0 { if input > 0.0 {
input input
} else { } else {
(self.0 as f32) * input self.0 * input
} }
} }
@ -74,7 +73,7 @@ impl NeuraDerivable<f32> for LeakyRelu {
if input > 0.0 { if input > 0.0 {
1.0 1.0
} else { } else {
self.0 as f32 self.0
} }
} }
} }

@ -1,5 +1,6 @@
pub mod activation; pub mod activation;
pub mod loss; pub mod loss;
pub mod regularize;
pub trait NeuraDerivable<F> { pub trait NeuraDerivable<F> {
fn eval(&self, input: F) -> F; fn eval(&self, input: F) -> F;

@ -0,0 +1,134 @@
use super::*;
/// Default regularization, which is no regularization
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct NeuraL0;
impl NeuraDerivable<f64> for NeuraL0 {
#[inline(always)]
fn eval(&self, _input: f64) -> f64 {
0.0
}
#[inline(always)]
fn derivate(&self, _at: f64) -> f64 {
0.0
}
}
impl NeuraDerivable<f32> for NeuraL0 {
#[inline(always)]
fn eval(&self, _input: f32) -> f32 {
0.0
}
#[inline(always)]
fn derivate(&self, _at: f32) -> f32 {
0.0
}
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct NeuraL1<F>(pub F);
impl NeuraDerivable<f64> for NeuraL1<f64> {
#[inline(always)]
fn eval(&self, input: f64) -> f64 {
self.0 * input.abs()
}
#[inline(always)]
fn derivate(&self, at: f64) -> f64 {
if at > 0.0 {
self.0
} else if at < 0.0 {
-self.0
} else {
0.0
}
}
}
impl NeuraDerivable<f32> for NeuraL1<f32> {
#[inline(always)]
fn eval(&self, input: f32) -> f32 {
self.0 * input.abs()
}
#[inline(always)]
fn derivate(&self, at: f32) -> f32 {
if at > 0.0 {
self.0
} else if at < 0.0 {
-self.0
} else {
0.0
}
}
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct NeuraL2<F>(pub F);
impl NeuraDerivable<f64> for NeuraL2<f64> {
#[inline(always)]
fn eval(&self, input: f64) -> f64 {
self.0 * (input * input)
}
#[inline(always)]
fn derivate(&self, at: f64) -> f64 {
self.0 * at
}
}
impl NeuraDerivable<f32> for NeuraL2<f32> {
#[inline(always)]
fn eval(&self, input: f32) -> f32 {
self.0 * (input * input)
}
#[inline(always)]
fn derivate(&self, at: f32) -> f32 {
self.0 * at
}
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct NeuraElastic<F> {
pub l1: F,
pub l2: F,
}
impl<F> NeuraElastic<F> {
pub fn new(l1_factor: F, l2_factor: F) -> Self {
Self {
l1: l1_factor,
l2: l2_factor,
}
}
}
impl NeuraDerivable<f64> for NeuraElastic<f64> {
#[inline(always)]
fn eval(&self, input: f64) -> f64 {
NeuraL1(self.l1).eval(input) + NeuraL2(self.l2).eval(input)
}
#[inline(always)]
fn derivate(&self, at: f64) -> f64 {
NeuraL1(self.l1).derivate(at) + NeuraL2(self.l2).derivate(at)
}
}
impl NeuraDerivable<f32> for NeuraElastic<f32> {
#[inline(always)]
fn eval(&self, input: f32) -> f32 {
NeuraL1(self.l1).eval(input) + NeuraL2(self.l2).eval(input)
}
#[inline(always)]
fn derivate(&self, at: f32) -> f32 {
NeuraL1(self.l1).derivate(at) + NeuraL2(self.l2).derivate(at)
}
}

@ -1,39 +1,53 @@
use super::NeuraLayer; use super::NeuraLayer;
use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer, algebra::NeuraVectorSpace}; use crate::{
algebra::NeuraVectorSpace,
derivable::NeuraDerivable,
train::NeuraTrainableLayer,
utils::{multiply_matrix_transpose_vector, multiply_matrix_vector, reverse_dot_product},
};
use rand_distr::Distribution;
use rand::Rng; use rand::Rng;
use rand_distr::Distribution;
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct NeuraDenseLayer< pub struct NeuraDenseLayer<
Act: NeuraDerivable<f64>, Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize, const INPUT_LEN: usize,
const OUTPUT_LEN: usize, const OUTPUT_LEN: usize,
> { > {
weights: [[f64; INPUT_LEN]; OUTPUT_LEN], weights: [[f64; INPUT_LEN]; OUTPUT_LEN],
bias: [f64; OUTPUT_LEN], bias: [f64; OUTPUT_LEN],
activation: Act, activation: Act,
regularization: Reg,
} }
impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize> impl<
NeuraDenseLayer<Act, INPUT_LEN, OUTPUT_LEN> Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{ {
pub fn new( pub fn new(
weights: [[f64; INPUT_LEN]; OUTPUT_LEN], weights: [[f64; INPUT_LEN]; OUTPUT_LEN],
bias: [f64; OUTPUT_LEN], bias: [f64; OUTPUT_LEN],
activation: Act, activation: Act,
regularization: Reg,
) -> Self { ) -> Self {
Self { Self {
weights, weights,
bias, bias,
activation, activation,
regularization,
} }
} }
pub fn from_rng(rng: &mut impl Rng, activation: Act) -> Self { pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self {
let mut weights = [[0.0; INPUT_LEN]; OUTPUT_LEN]; let mut weights = [[0.0; INPUT_LEN]; OUTPUT_LEN];
let distribution = rand_distr::Normal::new(0.0, 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64)).unwrap(); let distribution =
rand_distr::Normal::new(0.0, 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64)).unwrap();
for i in 0..OUTPUT_LEN { for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN { for j in 0..INPUT_LEN {
@ -46,12 +60,17 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
// Biases are zero-initialized, as this shouldn't cause any issues during training // Biases are zero-initialized, as this shouldn't cause any issues during training
bias: [0.0; OUTPUT_LEN], bias: [0.0; OUTPUT_LEN],
activation, activation,
regularization,
} }
} }
} }
impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize> NeuraLayer impl<
for NeuraDenseLayer<Act, INPUT_LEN, OUTPUT_LEN> Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> NeuraLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{ {
type Input = [f64; INPUT_LEN]; type Input = [f64; INPUT_LEN];
@ -68,13 +87,21 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
} }
} }
impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize> NeuraTrainableLayer impl<
for NeuraDenseLayer<Act, INPUT_LEN, OUTPUT_LEN> Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> NeuraTrainableLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{ {
type Delta = ([[f64; INPUT_LEN]; OUTPUT_LEN], [f64; OUTPUT_LEN]); type Delta = ([[f64; INPUT_LEN]; OUTPUT_LEN], [f64; OUTPUT_LEN]);
// TODO: double-check the math in this // TODO: double-check the math in this
fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta) { fn backpropagate(
&self,
input: &Self::Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta) {
let evaluated = multiply_matrix_vector(&self.weights, input); let evaluated = multiply_matrix_vector(&self.weights, input);
// Compute delta from epsilon, with `self.activation'(input) ° epsilon = delta` // Compute delta from epsilon, with `self.activation'(input) ° epsilon = delta`
let mut delta = epsilon.clone(); let mut delta = epsilon.clone();
@ -96,17 +123,32 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0); NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0);
NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1); NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1);
} }
fn regularize(&self) -> Self::Delta {
let mut res = ([[0.0; INPUT_LEN]; OUTPUT_LEN], [0.0; OUTPUT_LEN]);
for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN {
res.0[i][j] = self.regularization.derivate(self.weights[i][j]);
}
}
// Note: biases aren't taken into account here, as per https://stats.stackexchange.com/questions/153605/no-regularisation-term-for-bias-unit-in-neural-network
res
}
} }
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;
use crate::derivable::activation::Relu; use crate::derivable::{activation::Relu, regularize::NeuraL0};
#[test] #[test]
fn test_from_rng() { fn test_from_rng() {
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
let layer: NeuraDenseLayer<_, 64, 32> = NeuraDenseLayer::from_rng(&mut rng, Relu); let layer: NeuraDenseLayer<_, _, 64, 32> =
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0);
let mut input = [0.0; 64]; let mut input = [0.0; 64];
for x in 0..64 { for x in 0..64 {
input[x] = rng.gen(); input[x] = rng.gen();

@ -59,6 +59,10 @@ impl<const LENGTH: usize, R: Rng> NeuraTrainableLayer for NeuraDropoutLayer<LENG
(epsilon, ()) (epsilon, ())
} }
fn regularize(&self) -> Self::Delta {
()
}
#[inline(always)] #[inline(always)]
fn apply_gradient(&mut self, _gradient: &Self::Delta) { fn apply_gradient(&mut self, _gradient: &Self::Delta) {
// Noop // Noop

@ -4,6 +4,9 @@ pub use dense::NeuraDenseLayer;
mod dropout; mod dropout;
pub use dropout::NeuraDropoutLayer; pub use dropout::NeuraDropoutLayer;
mod softmax;
pub use softmax::NeuraSoftmaxLayer;
pub trait NeuraLayer { pub trait NeuraLayer {
type Input; type Input;
type Output; type Output;
@ -13,18 +16,34 @@ pub trait NeuraLayer {
#[macro_export] #[macro_export]
macro_rules! neura_layer { macro_rules! neura_layer {
( "dense", $activation:expr, $output:expr ) => { ( "dense", $( $shape:expr ),*; $activation:expr ) => {
NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation) $crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0)
as NeuraDenseLayer<_, _, $output> as neura_layer!("_dense_shape", $($shape),*)
};
( "dense", $( $shape:expr ),*; $activation:expr, $regularization:expr ) => {
$crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $regularization)
as neura_layer!("_dense_shape", $($shape),*)
};
( "_dense_shape", $output:expr ) => {
$crate::layer::NeuraDenseLayer<_, _, _, $output>
}; };
( "dense", $activation:expr, $output:expr, $input:expr ) => { ( "_dense_shape", $input:expr, $output:expr ) => {
NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation) $crate::layer::NeuraDenseLayer<_, _, $input, $output>
as NeuraDenseLayer<_, $input, $output>
}; };
( "dropout", $probability:expr ) => { ( "dropout", $probability:expr ) => {
NeuraDropoutLayer::new($probability, rand::thread_rng()) $crate::layer::NeuraDropoutLayer::new($probability, rand::thread_rng())
as NeuraDropoutLayer<_, _> as $crate::layer::NeuraDropoutLayer<_, _>
};
( "softmax" ) => {
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<_>
};
( "softmax", $length:expr ) => {
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<$length>
}; };
} }

@ -0,0 +1,155 @@
use crate::{train::NeuraTrainableLayer, utils::multiply_vectors_pointwise};
use super::NeuraLayer;
#[non_exhaustive]
#[derive(Clone, Debug)]
pub struct NeuraSoftmaxLayer<const LENGTH: usize>;
impl<const LENGTH: usize> NeuraSoftmaxLayer<LENGTH> {
pub fn new() -> Self {
Self
}
}
impl<const LENGTH: usize> NeuraLayer for NeuraSoftmaxLayer<LENGTH> {
type Input = [f64; LENGTH];
type Output = [f64; LENGTH];
fn eval(&self, input: &Self::Input) -> Self::Output {
let mut res = input.clone();
let mut max = 0.0;
for item in &res {
if *item > max {
max = *item;
}
}
for item in &mut res {
*item = (*item - max).exp();
}
let mut sum = 0.0;
for item in &res {
sum += item;
}
for item in &mut res {
*item /= sum;
}
res
}
}
impl<const LENGTH: usize> NeuraTrainableLayer for NeuraSoftmaxLayer<LENGTH> {
type Delta = ();
fn backpropagate(
&self,
input: &Self::Input,
mut epsilon: Self::Output,
) -> (Self::Input, Self::Delta) {
// Note: a constant value can be added to `input` to bring it to increase precision
let evaluated = self.eval(input);
// Compute $a_{l-1,i} \epsilon_{l,i}$
epsilon = multiply_vectors_pointwise(&epsilon, &evaluated);
// Compute $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$
let sum_diagonal_terms: f64 = epsilon.iter().copied().sum();
for i in 0..LENGTH {
// Multiply $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$ by $a_{l-1,i}$ and add it to $a_{l-1,i} \epsilon_{l,i}$
epsilon[i] -= evaluated[i] * sum_diagonal_terms;
}
(epsilon, ())
}
fn regularize(&self) -> Self::Delta {
()
}
fn apply_gradient(&mut self, _gradient: &Self::Delta) {
// Noop
}
}
#[cfg(test)]
mod test {
use crate::algebra::NeuraVectorSpace;
use crate::utils::{
matrix_from_diagonal, multiply_matrix_vector, reverse_dot_product, uniform_vector,
};
use super::*;
#[test]
fn test_softmax_eval() {
const EPSILON: f64 = 0.000002;
let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<3>;
let result = layer.eval(&[1.0, 2.0, 8.0]);
assert!((result[0] - 0.0009088).abs() < EPSILON);
assert!((result[1] - 0.0024704).abs() < EPSILON);
assert!((result[2] - 0.9966208).abs() < EPSILON);
}
// Based on https://stats.stackexchange.com/a/306710
#[test]
fn test_softmax_backpropagation_two() {
const EPSILON: f64 = 0.000001;
let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<2>;
for input1 in [0.2, 0.3, 0.5] as [f64; 3] {
for input2 in [0.7, 1.1, 1.3] {
let input = [input1, input2];
let sum = input1.exp() + input2.exp();
let output = [input1.exp() / sum, input2.exp() / sum];
for epsilon1 in [1.7, 1.9, 2.3] {
for epsilon2 in [2.9, 3.1, 3.7] {
let epsilon = [epsilon1, epsilon2];
let (epsilon, _) = layer.backpropagate(&input, epsilon);
let expected = [
output[0] * (1.0 - output[0]) * epsilon1
- output[1] * output[0] * epsilon2,
output[1] * (1.0 - output[1]) * epsilon2
- output[1] * output[0] * epsilon1,
];
assert!((epsilon[0] - expected[0]).abs() < EPSILON);
assert!((epsilon[1] - expected[1]).abs() < EPSILON);
}
}
}
}
}
// Based on https://e2eml.school/softmax.html
#[test]
fn test_softmax_backpropagation() {
const EPSILON: f64 = 0.000001;
let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<4>;
for _ in 0..100 {
let input: [f64; 4] = uniform_vector();
let evaluated = layer.eval(&input);
let loss: [f64; 4] = uniform_vector();
let mut derivative = reverse_dot_product(&evaluated, &evaluated);
derivative.mul_assign(-1.0);
derivative.add_assign(&matrix_from_diagonal(&evaluated));
let expected = multiply_matrix_vector(&derivative, &loss);
let (actual, _) = layer.backpropagate(&input, loss);
for i in 0..4 {
assert!((expected[i] - actual[i]).abs() < EPSILON);
}
}
}
}

@ -10,15 +10,11 @@ mod utils;
pub mod prelude { pub mod prelude {
// Macros // Macros
pub use crate::{neura_network, neura_layer}; pub use crate::{neura_layer, neura_network};
// Structs and traits // Structs and traits
pub use crate::network::{NeuraNetwork}; pub use crate::layer::{NeuraDenseLayer, NeuraDropoutLayer, NeuraLayer};
pub use crate::layer::{ pub use crate::network::NeuraNetwork;
NeuraLayer,
NeuraDenseLayer,
NeuraDropoutLayer
};
pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer}; pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer};
pub use crate::utils::cycle_shuffling; pub use crate::utils::cycle_shuffling;
} }

@ -82,6 +82,10 @@ impl<Layer: NeuraTrainableLayer> NeuraTrainable for NeuraNetwork<Layer, ()> {
self.layer.backpropagate(&input, backprop_epsilon) self.layer.backpropagate(&input, backprop_epsilon)
} }
fn regularize(&self) -> Self::Delta {
self.layer.regularize()
}
fn prepare_epoch(&mut self) { fn prepare_epoch(&mut self) {
self.layer.prepare_epoch(); self.layer.prepare_epoch();
} }
@ -117,6 +121,10 @@ impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Out
(backprop_gradient, (layer_gradient, weights_gradient)) (backprop_gradient, (layer_gradient, weights_gradient))
} }
fn regularize(&self) -> Self::Delta {
(self.layer.regularize(), self.child_network.regularize())
}
fn prepare_epoch(&mut self) { fn prepare_epoch(&mut self) {
self.layer.prepare_epoch(); self.layer.prepare_epoch();
self.child_network.prepare_epoch(); self.child_network.prepare_epoch();
@ -145,7 +153,11 @@ macro_rules! neura_network {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::{derivable::activation::Relu, layer::NeuraDenseLayer, neura_layer}; use crate::{
derivable::{activation::Relu, regularize::NeuraL0},
layer::NeuraDenseLayer,
neura_layer,
};
use super::*; use super::*;
@ -154,23 +166,24 @@ mod test {
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
let _ = neura_network![ let _ = neura_network![
NeuraDenseLayer::from_rng(&mut rng, Relu) as NeuraDenseLayer<_, 8, 16>, NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>,
NeuraDenseLayer::from_rng(&mut rng, Relu) as NeuraDenseLayer<_, _, 12>, NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 12>,
NeuraDenseLayer::from_rng(&mut rng, Relu) as NeuraDenseLayer<_, _, 2> NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 2>
]; ];
let _ = let _ = neura_network![
neura_network![NeuraDenseLayer::from_rng(&mut rng, Relu) as NeuraDenseLayer<_, 8, 16>,]; NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>,
];
let _ = neura_network![ let _ = neura_network![
NeuraDenseLayer::from_rng(&mut rng, Relu) as NeuraDenseLayer<_, 8, 16>, NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>,
NeuraDenseLayer::from_rng(&mut rng, Relu) as NeuraDenseLayer<_, _, 12>, NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 12>,
]; ];
let _ = neura_network![ let _ = neura_network![
neura_layer!("dense", Relu, 16, 8), neura_layer!("dense", 8, 16; Relu),
neura_layer!("dense", Relu, 12), neura_layer!("dense", 12; Relu),
neura_layer!("dense", Relu, 2) neura_layer!("dense", 2; Relu)
]; ];
} }
} }

@ -1,8 +1,5 @@
use crate::{ use crate::{
algebra::NeuraVectorSpace, algebra::NeuraVectorSpace, derivable::NeuraLoss, layer::NeuraLayer, network::NeuraNetwork,
derivable::NeuraLoss,
layer::NeuraLayer,
network::NeuraNetwork,
}; };
// TODO: move this to layer/mod.rs // TODO: move this to layer/mod.rs
@ -26,6 +23,9 @@ pub trait NeuraTrainableLayer: NeuraLayer {
epsilon: Self::Output, epsilon: Self::Output,
) -> (Self::Input, Self::Delta); ) -> (Self::Input, Self::Delta);
/// Computes the regularization
fn regularize(&self) -> Self::Delta;
/// Applies `δW_l` to the weights of the layer /// Applies `δW_l` to the weights of the layer
fn apply_gradient(&mut self, gradient: &Self::Delta); fn apply_gradient(&mut self, gradient: &Self::Delta);
@ -51,6 +51,9 @@ pub trait NeuraTrainable: NeuraLayer {
loss: Loss, loss: Loss,
) -> (Self::Input, Self::Delta); ) -> (Self::Input, Self::Delta);
/// Should return the regularization gradient
fn regularize(&self) -> Self::Delta;
/// Called before an epoch begins, to allow the network to set itself up for training. /// Called before an epoch begins, to allow the network to set itself up for training.
fn prepare_epoch(&mut self); fn prepare_epoch(&mut self);
@ -89,8 +92,8 @@ impl<Loss: NeuraLoss + Clone> NeuraBackprop<Loss> {
} }
} }
impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone> NeuraGradientSolver<[f64; N], Loss::Target> impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone>
for NeuraBackprop<Loss> NeuraGradientSolver<[f64; N], Loss::Target> for NeuraBackprop<Loss>
{ {
fn get_gradient<Layer: NeuraLayer, ChildNetwork>( fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
&self, &self,
@ -184,15 +187,17 @@ impl NeuraBatchedTrainer {
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>, NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>,
Layer::Input: Clone, Layer::Input: Clone,
{ {
// TODO: apply shuffling?
let mut iter = inputs.into_iter(); let mut iter = inputs.into_iter();
let factor = -self.learning_rate / (self.batch_size as f64); let factor = -self.learning_rate / (self.batch_size as f64);
let momentum_factor = self.learning_momentum / self.learning_rate; let momentum_factor = self.learning_momentum / self.learning_rate;
let reg_factor = -self.learning_rate;
// Contains `momentum_factor * factor * gradient_sum_previous_iter` // Contains `momentum_factor * factor * gradient_sum_previous_iter`
let mut previous_gradient_sum = <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero(); let mut previous_gradient_sum =
<NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
'd: for epoch in 0..self.epochs { 'd: for epoch in 0..self.epochs {
let mut gradient_sum = <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero(); let mut gradient_sum =
<NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
network.prepare_epoch(); network.prepare_epoch();
for _ in 0..self.batch_size { for _ in 0..self.batch_size {
@ -205,6 +210,12 @@ impl NeuraBatchedTrainer {
} }
gradient_sum.mul_assign(factor); gradient_sum.mul_assign(factor);
// Add regularization gradient (TODO: check if it can be factored out of momentum)
let mut reg_gradient = network.regularize();
reg_gradient.mul_assign(reg_factor);
gradient_sum.add_assign(&reg_gradient);
network.apply_gradient(&gradient_sum); network.apply_gradient(&gradient_sum);
if self.learning_momentum != 0.0 { if self.learning_momentum != 0.0 {
@ -230,23 +241,21 @@ impl NeuraBatchedTrainer {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::{layer::NeuraDenseLayer, derivable::{activation::Linear, loss::Euclidean}};
use super::*; use super::*;
use crate::{
derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0},
layer::NeuraDenseLayer,
};
#[test] #[test]
fn test_backpropagation_simple() { fn test_backpropagation_simple() {
for wa in [0.0, 0.25, 0.5, 1.0] { for wa in [0.0, 0.25, 0.5, 1.0] {
for wb in [0.0, 0.25, 0.5, 1.0] { for wb in [0.0, 0.25, 0.5, 1.0] {
let network = NeuraNetwork::new( let network =
NeuraDenseLayer::new([[wa, wb]], [0.0], Linear), NeuraNetwork::new(NeuraDenseLayer::new([[wa, wb]], [0.0], Linear, NeuraL0), ());
()
); let gradient =
NeuraBackprop::new(Euclidean).get_gradient(&network, &[1.0, 1.0], &[0.0]);
let gradient = NeuraBackprop::new(Euclidean).get_gradient(
&network,
&[1.0, 1.0],
&[0.0]
);
let expected = wa + wb; let expected = wa + wb;
assert!((gradient.0[0][0] - expected) < 0.001); assert!((gradient.0[0][0] - expected) < 0.001);

@ -33,6 +33,7 @@ pub(crate) fn multiply_matrix_transpose_vector<const WIDTH: usize, const HEIGHT:
result result
} }
// Returns $left^{\top} \cdot right$, ie. $\ket{left} \bra{right}$
pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>( pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>(
left: &[f64; HEIGHT], left: &[f64; HEIGHT],
right: &[f64; WIDTH], right: &[f64; WIDTH],
@ -48,6 +49,32 @@ pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>(
result result
} }
pub(crate) fn multiply_vectors_pointwise<const LENGTH: usize>(
left: &[f64; LENGTH],
right: &[f64; LENGTH],
) -> [f64; LENGTH] {
let mut result = [0.0; LENGTH];
for i in 0..LENGTH {
result[i] = left[i] * right[i];
}
result
}
#[cfg(test)]
pub(crate) fn matrix_from_diagonal<const LENGTH: usize>(
vector: &[f64; LENGTH],
) -> [[f64; LENGTH]; LENGTH] {
let mut result = [[0.0; LENGTH]; LENGTH];
for i in 0..LENGTH {
result[i][i] = vector[i];
}
result
}
#[allow(dead_code)] #[allow(dead_code)]
pub(crate) fn assign_add_vector<const N: usize>(sum: &mut [f64; N], operand: &[f64; N]) { pub(crate) fn assign_add_vector<const N: usize>(sum: &mut [f64; N], operand: &[f64; N]) {
for i in 0..N { for i in 0..N {
@ -89,7 +116,10 @@ struct ShuffleCycled<I: Iterator, R: rand::Rng> {
rng: R, rng: R,
} }
impl<I: Iterator, R: rand::Rng> Iterator for ShuffleCycled<I, R> where I::Item: Clone { impl<I: Iterator, R: rand::Rng> Iterator for ShuffleCycled<I, R>
where
I::Item: Clone,
{
type Item = I::Item; type Item = I::Item;
#[inline] #[inline]
@ -99,7 +129,7 @@ impl<I: Iterator, R: rand::Rng> Iterator for ShuffleCycled<I, R> where I::Item:
if let Some(next) = self.iter.next() { if let Some(next) = self.iter.next() {
// Base iterator is not empty yet // Base iterator is not empty yet
self.buffer.push(next.clone()); self.buffer.push(next.clone());
return Some(next) return Some(next);
} else if self.buffer.len() > 0 { } else if self.buffer.len() > 0 {
if self.index == 0 { if self.index == 0 {
// Shuffle the vector and return the first element, setting the index to 1 // Shuffle the vector and return the first element, setting the index to 1
@ -118,12 +148,9 @@ impl<I: Iterator, R: rand::Rng> Iterator for ShuffleCycled<I, R> where I::Item:
} }
} }
pub fn cycle_shuffling<I: Iterator>( pub fn cycle_shuffling<I: Iterator>(iter: I, rng: impl rand::Rng) -> impl Iterator<Item = I::Item>
iter: I,
rng: impl rand::Rng
) -> impl Iterator<Item=I::Item>
where where
I::Item: Clone I::Item: Clone,
{ {
let size_hint = iter.size_hint(); let size_hint = iter.size_hint();
let size_hint = size_hint.1.unwrap_or(size_hint.0).max(1); let size_hint = size_hint.1.unwrap_or(size_hint.0).max(1);
@ -132,6 +159,19 @@ where
buffer: Vec::with_capacity(size_hint), buffer: Vec::with_capacity(size_hint),
index: 0, index: 0,
iter, iter,
rng rng,
} }
} }
#[cfg(test)]
pub(crate) fn uniform_vector<const LENGTH: usize>() -> [f64; LENGTH] {
use rand::Rng;
let mut res = [0.0; LENGTH];
let mut rng = rand::thread_rng();
for i in 0..LENGTH {
res[i] = rng.gen();
}
res
}

Loading…
Cancel
Save