Re-order arguments of neura_layer, implement softmax and normalization

main
Shad Amethyst 2 years ago
parent 220c61ff6b
commit bca56a5557

@ -2,22 +2,26 @@
use std::io::Write;
use neuramethyst::prelude::*;
use neuramethyst::derivable::activation::{Relu, Tanh, LeakyRelu};
use neuramethyst::derivable::activation::Linear;
#[allow(unused_imports)]
use neuramethyst::derivable::activation::{LeakyRelu, Relu, Tanh};
use neuramethyst::derivable::loss::Euclidean;
use neuramethyst::derivable::regularize::NeuraElastic;
use neuramethyst::prelude::*;
use rand::Rng;
fn main() {
let mut network = neura_network![
neura_layer!("dense", LeakyRelu(0.01), 9, 2),
neura_layer!("dense", 2, 8; LeakyRelu(0.01)),
neura_layer!("dropout", 0.1),
neura_layer!("dense", LeakyRelu(0.01), 9),
neura_layer!("dense", 8; LeakyRelu(0.01), NeuraElastic::new(0.0001, 0.002)),
neura_layer!("dropout", 0.3),
neura_layer!("dense", LeakyRelu(0.01), 6),
neura_layer!("dense", 8; LeakyRelu(0.01), NeuraElastic::new(0.0001, 0.002)),
neura_layer!("dropout", 0.1),
neura_layer!("dense", LeakyRelu(0.01), 4),
neura_layer!("dense", LeakyRelu(0.1), 2)
neura_layer!("dense", 4; LeakyRelu(0.1), NeuraElastic::new(0.0001, 0.002)),
neura_layer!("dense", 2; Linear),
neura_layer!("softmax"),
];
// println!("{:#?}", network);
@ -39,20 +43,23 @@ fn main() {
let test_inputs: Vec<_> = inputs.clone().take(100).collect();
let mut trainer = NeuraBatchedTrainer::new(0.1, 4000);
trainer.log_epochs = 500;
let mut trainer = NeuraBatchedTrainer::new(0.25, 1000);
trainer.log_epochs = 50;
trainer.learning_momentum = 0.05;
trainer.batch_size = 2000;
trainer.train(
NeuraBackprop::new(Euclidean),
&mut network,
inputs,
&test_inputs
&test_inputs,
);
let mut file = std::fs::File::create("target/bivariate.csv").unwrap();
for (input, _target) in test_inputs {
let guess = argmax(&network.eval(&input));
writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap();
// println!("{:?}", network.eval(&input));
}
// println!("{:#?}", network);

@ -1,25 +1,30 @@
#![feature(generic_arg_infer)]
use neuramethyst::prelude::*;
use neuramethyst::derivable::activation::{Relu};
use neuramethyst::derivable::activation::Relu;
use neuramethyst::derivable::loss::Euclidean;
use neuramethyst::prelude::*;
fn main() {
let mut network = neura_network![
neura_layer!("dense", Relu, 4, 2),
neura_layer!("dense", Relu, 3),
neura_layer!("dense", Relu, 1)
neura_layer!("dense", 2, 4; Relu),
neura_layer!("dense", 3; Relu),
neura_layer!("dense", 1; Relu)
];
let inputs = [
([0.0, 0.0], [0.0]),
([0.0, 1.0], [1.0]),
([1.0, 0.0], [1.0]),
([1.0, 1.0], [0.0])
([1.0, 1.0], [0.0]),
];
for (input, target) in inputs {
println!("Input: {:?}, target: {}, actual: {:.3}", &input, target[0], network.eval(&input)[0]);
println!(
"Input: {:?}, target: {}, actual: {:.3}",
&input,
target[0],
network.eval(&input)[0]
);
}
let mut trainer = NeuraBatchedTrainer::new(0.05, 1000);
@ -35,6 +40,11 @@ fn main() {
);
for (input, target) in inputs {
println!("Input: {:?}, target: {}, actual: {:.3}", &input, target[0], network.eval(&input)[0]);
println!(
"Input: {:?}, target: {}, actual: {:.3}",
&input,
target[0],
network.eval(&input)[0]
);
}
}

@ -36,10 +36,9 @@ impl NeuraDerivable<f32> for Relu {
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct LeakyRelu(pub f64);
pub struct LeakyRelu<F>(pub F);
impl NeuraDerivable<f64> for LeakyRelu {
impl NeuraDerivable<f64> for LeakyRelu<f64> {
#[inline(always)]
fn eval(&self, input: f64) -> f64 {
if input > 0.0 {
@ -59,13 +58,13 @@ impl NeuraDerivable<f64> for LeakyRelu {
}
}
impl NeuraDerivable<f32> for LeakyRelu {
impl NeuraDerivable<f32> for LeakyRelu<f32> {
#[inline(always)]
fn eval(&self, input: f32) -> f32 {
if input > 0.0 {
input
} else {
(self.0 as f32) * input
self.0 * input
}
}
@ -74,7 +73,7 @@ impl NeuraDerivable<f32> for LeakyRelu {
if input > 0.0 {
1.0
} else {
self.0 as f32
self.0
}
}
}

@ -1,5 +1,6 @@
pub mod activation;
pub mod loss;
pub mod regularize;
pub trait NeuraDerivable<F> {
fn eval(&self, input: F) -> F;

@ -0,0 +1,134 @@
use super::*;
/// Default regularization, which is no regularization
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct NeuraL0;
impl NeuraDerivable<f64> for NeuraL0 {
#[inline(always)]
fn eval(&self, _input: f64) -> f64 {
0.0
}
#[inline(always)]
fn derivate(&self, _at: f64) -> f64 {
0.0
}
}
impl NeuraDerivable<f32> for NeuraL0 {
#[inline(always)]
fn eval(&self, _input: f32) -> f32 {
0.0
}
#[inline(always)]
fn derivate(&self, _at: f32) -> f32 {
0.0
}
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct NeuraL1<F>(pub F);
impl NeuraDerivable<f64> for NeuraL1<f64> {
#[inline(always)]
fn eval(&self, input: f64) -> f64 {
self.0 * input.abs()
}
#[inline(always)]
fn derivate(&self, at: f64) -> f64 {
if at > 0.0 {
self.0
} else if at < 0.0 {
-self.0
} else {
0.0
}
}
}
impl NeuraDerivable<f32> for NeuraL1<f32> {
#[inline(always)]
fn eval(&self, input: f32) -> f32 {
self.0 * input.abs()
}
#[inline(always)]
fn derivate(&self, at: f32) -> f32 {
if at > 0.0 {
self.0
} else if at < 0.0 {
-self.0
} else {
0.0
}
}
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct NeuraL2<F>(pub F);
impl NeuraDerivable<f64> for NeuraL2<f64> {
#[inline(always)]
fn eval(&self, input: f64) -> f64 {
self.0 * (input * input)
}
#[inline(always)]
fn derivate(&self, at: f64) -> f64 {
self.0 * at
}
}
impl NeuraDerivable<f32> for NeuraL2<f32> {
#[inline(always)]
fn eval(&self, input: f32) -> f32 {
self.0 * (input * input)
}
#[inline(always)]
fn derivate(&self, at: f32) -> f32 {
self.0 * at
}
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct NeuraElastic<F> {
pub l1: F,
pub l2: F,
}
impl<F> NeuraElastic<F> {
pub fn new(l1_factor: F, l2_factor: F) -> Self {
Self {
l1: l1_factor,
l2: l2_factor,
}
}
}
impl NeuraDerivable<f64> for NeuraElastic<f64> {
#[inline(always)]
fn eval(&self, input: f64) -> f64 {
NeuraL1(self.l1).eval(input) + NeuraL2(self.l2).eval(input)
}
#[inline(always)]
fn derivate(&self, at: f64) -> f64 {
NeuraL1(self.l1).derivate(at) + NeuraL2(self.l2).derivate(at)
}
}
impl NeuraDerivable<f32> for NeuraElastic<f32> {
#[inline(always)]
fn eval(&self, input: f32) -> f32 {
NeuraL1(self.l1).eval(input) + NeuraL2(self.l2).eval(input)
}
#[inline(always)]
fn derivate(&self, at: f32) -> f32 {
NeuraL1(self.l1).derivate(at) + NeuraL2(self.l2).derivate(at)
}
}

@ -1,39 +1,53 @@
use super::NeuraLayer;
use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer, algebra::NeuraVectorSpace};
use crate::{
algebra::NeuraVectorSpace,
derivable::NeuraDerivable,
train::NeuraTrainableLayer,
utils::{multiply_matrix_transpose_vector, multiply_matrix_vector, reverse_dot_product},
};
use rand_distr::Distribution;
use rand::Rng;
use rand_distr::Distribution;
#[derive(Clone, Debug)]
pub struct NeuraDenseLayer<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> {
weights: [[f64; INPUT_LEN]; OUTPUT_LEN],
bias: [f64; OUTPUT_LEN],
activation: Act,
regularization: Reg,
}
impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
NeuraDenseLayer<Act, INPUT_LEN, OUTPUT_LEN>
impl<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{
pub fn new(
weights: [[f64; INPUT_LEN]; OUTPUT_LEN],
bias: [f64; OUTPUT_LEN],
activation: Act,
regularization: Reg,
) -> Self {
Self {
weights,
bias,
activation,
regularization,
}
}
pub fn from_rng(rng: &mut impl Rng, activation: Act) -> Self {
pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self {
let mut weights = [[0.0; INPUT_LEN]; OUTPUT_LEN];
let distribution = rand_distr::Normal::new(0.0, 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64)).unwrap();
let distribution =
rand_distr::Normal::new(0.0, 2.0 / (INPUT_LEN as f64 + OUTPUT_LEN as f64)).unwrap();
for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN {
@ -46,12 +60,17 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
// Biases are zero-initialized, as this shouldn't cause any issues during training
bias: [0.0; OUTPUT_LEN],
activation,
regularization,
}
}
}
impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize> NeuraLayer
for NeuraDenseLayer<Act, INPUT_LEN, OUTPUT_LEN>
impl<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> NeuraLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{
type Input = [f64; INPUT_LEN];
@ -68,13 +87,21 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
}
}
impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize> NeuraTrainableLayer
for NeuraDenseLayer<Act, INPUT_LEN, OUTPUT_LEN>
impl<
Act: NeuraDerivable<f64>,
Reg: NeuraDerivable<f64>,
const INPUT_LEN: usize,
const OUTPUT_LEN: usize,
> NeuraTrainableLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{
type Delta = ([[f64; INPUT_LEN]; OUTPUT_LEN], [f64; OUTPUT_LEN]);
// TODO: double-check the math in this
fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta) {
fn backpropagate(
&self,
input: &Self::Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta) {
let evaluated = multiply_matrix_vector(&self.weights, input);
// Compute delta from epsilon, with `self.activation'(input) ° epsilon = delta`
let mut delta = epsilon.clone();
@ -96,17 +123,32 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0);
NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1);
}
fn regularize(&self) -> Self::Delta {
let mut res = ([[0.0; INPUT_LEN]; OUTPUT_LEN], [0.0; OUTPUT_LEN]);
for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN {
res.0[i][j] = self.regularization.derivate(self.weights[i][j]);
}
}
// Note: biases aren't taken into account here, as per https://stats.stackexchange.com/questions/153605/no-regularisation-term-for-bias-unit-in-neural-network
res
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::derivable::activation::Relu;
use crate::derivable::{activation::Relu, regularize::NeuraL0};
#[test]
fn test_from_rng() {
let mut rng = rand::thread_rng();
let layer: NeuraDenseLayer<_, 64, 32> = NeuraDenseLayer::from_rng(&mut rng, Relu);
let layer: NeuraDenseLayer<_, _, 64, 32> =
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0);
let mut input = [0.0; 64];
for x in 0..64 {
input[x] = rng.gen();

@ -59,6 +59,10 @@ impl<const LENGTH: usize, R: Rng> NeuraTrainableLayer for NeuraDropoutLayer<LENG
(epsilon, ())
}
fn regularize(&self) -> Self::Delta {
()
}
#[inline(always)]
fn apply_gradient(&mut self, _gradient: &Self::Delta) {
// Noop

@ -4,6 +4,9 @@ pub use dense::NeuraDenseLayer;
mod dropout;
pub use dropout::NeuraDropoutLayer;
mod softmax;
pub use softmax::NeuraSoftmaxLayer;
pub trait NeuraLayer {
type Input;
type Output;
@ -13,18 +16,34 @@ pub trait NeuraLayer {
#[macro_export]
macro_rules! neura_layer {
( "dense", $activation:expr, $output:expr ) => {
NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation)
as NeuraDenseLayer<_, _, $output>
( "dense", $( $shape:expr ),*; $activation:expr ) => {
$crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $crate::derivable::regularize::NeuraL0)
as neura_layer!("_dense_shape", $($shape),*)
};
( "dense", $( $shape:expr ),*; $activation:expr, $regularization:expr ) => {
$crate::layer::NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation, $regularization)
as neura_layer!("_dense_shape", $($shape),*)
};
( "_dense_shape", $output:expr ) => {
$crate::layer::NeuraDenseLayer<_, _, _, $output>
};
( "dense", $activation:expr, $output:expr, $input:expr ) => {
NeuraDenseLayer::from_rng(&mut rand::thread_rng(), $activation)
as NeuraDenseLayer<_, $input, $output>
( "_dense_shape", $input:expr, $output:expr ) => {
$crate::layer::NeuraDenseLayer<_, _, $input, $output>
};
( "dropout", $probability:expr ) => {
NeuraDropoutLayer::new($probability, rand::thread_rng())
as NeuraDropoutLayer<_, _>
$crate::layer::NeuraDropoutLayer::new($probability, rand::thread_rng())
as $crate::layer::NeuraDropoutLayer<_, _>
};
( "softmax" ) => {
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<_>
};
( "softmax", $length:expr ) => {
$crate::layer::NeuraSoftmaxLayer::new() as $crate::layer::NeuraSoftmaxLayer<$length>
};
}

@ -0,0 +1,155 @@
use crate::{train::NeuraTrainableLayer, utils::multiply_vectors_pointwise};
use super::NeuraLayer;
#[non_exhaustive]
#[derive(Clone, Debug)]
pub struct NeuraSoftmaxLayer<const LENGTH: usize>;
impl<const LENGTH: usize> NeuraSoftmaxLayer<LENGTH> {
pub fn new() -> Self {
Self
}
}
impl<const LENGTH: usize> NeuraLayer for NeuraSoftmaxLayer<LENGTH> {
type Input = [f64; LENGTH];
type Output = [f64; LENGTH];
fn eval(&self, input: &Self::Input) -> Self::Output {
let mut res = input.clone();
let mut max = 0.0;
for item in &res {
if *item > max {
max = *item;
}
}
for item in &mut res {
*item = (*item - max).exp();
}
let mut sum = 0.0;
for item in &res {
sum += item;
}
for item in &mut res {
*item /= sum;
}
res
}
}
impl<const LENGTH: usize> NeuraTrainableLayer for NeuraSoftmaxLayer<LENGTH> {
type Delta = ();
fn backpropagate(
&self,
input: &Self::Input,
mut epsilon: Self::Output,
) -> (Self::Input, Self::Delta) {
// Note: a constant value can be added to `input` to bring it to increase precision
let evaluated = self.eval(input);
// Compute $a_{l-1,i} \epsilon_{l,i}$
epsilon = multiply_vectors_pointwise(&epsilon, &evaluated);
// Compute $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$
let sum_diagonal_terms: f64 = epsilon.iter().copied().sum();
for i in 0..LENGTH {
// Multiply $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$ by $a_{l-1,i}$ and add it to $a_{l-1,i} \epsilon_{l,i}$
epsilon[i] -= evaluated[i] * sum_diagonal_terms;
}
(epsilon, ())
}
fn regularize(&self) -> Self::Delta {
()
}
fn apply_gradient(&mut self, _gradient: &Self::Delta) {
// Noop
}
}
#[cfg(test)]
mod test {
use crate::algebra::NeuraVectorSpace;
use crate::utils::{
matrix_from_diagonal, multiply_matrix_vector, reverse_dot_product, uniform_vector,
};
use super::*;
#[test]
fn test_softmax_eval() {
const EPSILON: f64 = 0.000002;
let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<3>;
let result = layer.eval(&[1.0, 2.0, 8.0]);
assert!((result[0] - 0.0009088).abs() < EPSILON);
assert!((result[1] - 0.0024704).abs() < EPSILON);
assert!((result[2] - 0.9966208).abs() < EPSILON);
}
// Based on https://stats.stackexchange.com/a/306710
#[test]
fn test_softmax_backpropagation_two() {
const EPSILON: f64 = 0.000001;
let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<2>;
for input1 in [0.2, 0.3, 0.5] as [f64; 3] {
for input2 in [0.7, 1.1, 1.3] {
let input = [input1, input2];
let sum = input1.exp() + input2.exp();
let output = [input1.exp() / sum, input2.exp() / sum];
for epsilon1 in [1.7, 1.9, 2.3] {
for epsilon2 in [2.9, 3.1, 3.7] {
let epsilon = [epsilon1, epsilon2];
let (epsilon, _) = layer.backpropagate(&input, epsilon);
let expected = [
output[0] * (1.0 - output[0]) * epsilon1
- output[1] * output[0] * epsilon2,
output[1] * (1.0 - output[1]) * epsilon2
- output[1] * output[0] * epsilon1,
];
assert!((epsilon[0] - expected[0]).abs() < EPSILON);
assert!((epsilon[1] - expected[1]).abs() < EPSILON);
}
}
}
}
}
// Based on https://e2eml.school/softmax.html
#[test]
fn test_softmax_backpropagation() {
const EPSILON: f64 = 0.000001;
let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<4>;
for _ in 0..100 {
let input: [f64; 4] = uniform_vector();
let evaluated = layer.eval(&input);
let loss: [f64; 4] = uniform_vector();
let mut derivative = reverse_dot_product(&evaluated, &evaluated);
derivative.mul_assign(-1.0);
derivative.add_assign(&matrix_from_diagonal(&evaluated));
let expected = multiply_matrix_vector(&derivative, &loss);
let (actual, _) = layer.backpropagate(&input, loss);
for i in 0..4 {
assert!((expected[i] - actual[i]).abs() < EPSILON);
}
}
}
}

@ -10,15 +10,11 @@ mod utils;
pub mod prelude {
// Macros
pub use crate::{neura_network, neura_layer};
pub use crate::{neura_layer, neura_network};
// Structs and traits
pub use crate::network::{NeuraNetwork};
pub use crate::layer::{
NeuraLayer,
NeuraDenseLayer,
NeuraDropoutLayer
};
pub use crate::layer::{NeuraDenseLayer, NeuraDropoutLayer, NeuraLayer};
pub use crate::network::NeuraNetwork;
pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer};
pub use crate::utils::cycle_shuffling;
}

@ -82,6 +82,10 @@ impl<Layer: NeuraTrainableLayer> NeuraTrainable for NeuraNetwork<Layer, ()> {
self.layer.backpropagate(&input, backprop_epsilon)
}
fn regularize(&self) -> Self::Delta {
self.layer.regularize()
}
fn prepare_epoch(&mut self) {
self.layer.prepare_epoch();
}
@ -117,6 +121,10 @@ impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Out
(backprop_gradient, (layer_gradient, weights_gradient))
}
fn regularize(&self) -> Self::Delta {
(self.layer.regularize(), self.child_network.regularize())
}
fn prepare_epoch(&mut self) {
self.layer.prepare_epoch();
self.child_network.prepare_epoch();
@ -145,7 +153,11 @@ macro_rules! neura_network {
#[cfg(test)]
mod test {
use crate::{derivable::activation::Relu, layer::NeuraDenseLayer, neura_layer};
use crate::{
derivable::{activation::Relu, regularize::NeuraL0},
layer::NeuraDenseLayer,
neura_layer,
};
use super::*;
@ -154,23 +166,24 @@ mod test {
let mut rng = rand::thread_rng();
let _ = neura_network![
NeuraDenseLayer::from_rng(&mut rng, Relu) as NeuraDenseLayer<_, 8, 16>,
NeuraDenseLayer::from_rng(&mut rng, Relu) as NeuraDenseLayer<_, _, 12>,
NeuraDenseLayer::from_rng(&mut rng, Relu) as NeuraDenseLayer<_, _, 2>
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>,
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 12>,
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 2>
];
let _ =
neura_network![NeuraDenseLayer::from_rng(&mut rng, Relu) as NeuraDenseLayer<_, 8, 16>,];
let _ = neura_network![
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>,
];
let _ = neura_network![
NeuraDenseLayer::from_rng(&mut rng, Relu) as NeuraDenseLayer<_, 8, 16>,
NeuraDenseLayer::from_rng(&mut rng, Relu) as NeuraDenseLayer<_, _, 12>,
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, 8, 16>,
NeuraDenseLayer::from_rng(&mut rng, Relu, NeuraL0) as NeuraDenseLayer<_, _, _, 12>,
];
let _ = neura_network![
neura_layer!("dense", Relu, 16, 8),
neura_layer!("dense", Relu, 12),
neura_layer!("dense", Relu, 2)
neura_layer!("dense", 8, 16; Relu),
neura_layer!("dense", 12; Relu),
neura_layer!("dense", 2; Relu)
];
}
}

@ -1,8 +1,5 @@
use crate::{
algebra::NeuraVectorSpace,
derivable::NeuraLoss,
layer::NeuraLayer,
network::NeuraNetwork,
algebra::NeuraVectorSpace, derivable::NeuraLoss, layer::NeuraLayer, network::NeuraNetwork,
};
// TODO: move this to layer/mod.rs
@ -26,6 +23,9 @@ pub trait NeuraTrainableLayer: NeuraLayer {
epsilon: Self::Output,
) -> (Self::Input, Self::Delta);
/// Computes the regularization
fn regularize(&self) -> Self::Delta;
/// Applies `δW_l` to the weights of the layer
fn apply_gradient(&mut self, gradient: &Self::Delta);
@ -51,6 +51,9 @@ pub trait NeuraTrainable: NeuraLayer {
loss: Loss,
) -> (Self::Input, Self::Delta);
/// Should return the regularization gradient
fn regularize(&self) -> Self::Delta;
/// Called before an epoch begins, to allow the network to set itself up for training.
fn prepare_epoch(&mut self);
@ -89,8 +92,8 @@ impl<Loss: NeuraLoss + Clone> NeuraBackprop<Loss> {
}
}
impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone> NeuraGradientSolver<[f64; N], Loss::Target>
for NeuraBackprop<Loss>
impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone>
NeuraGradientSolver<[f64; N], Loss::Target> for NeuraBackprop<Loss>
{
fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
&self,
@ -184,15 +187,17 @@ impl NeuraBatchedTrainer {
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>,
Layer::Input: Clone,
{
// TODO: apply shuffling?
let mut iter = inputs.into_iter();
let factor = -self.learning_rate / (self.batch_size as f64);
let momentum_factor = self.learning_momentum / self.learning_rate;
let reg_factor = -self.learning_rate;
// Contains `momentum_factor * factor * gradient_sum_previous_iter`
let mut previous_gradient_sum = <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
let mut previous_gradient_sum =
<NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
'd: for epoch in 0..self.epochs {
let mut gradient_sum = <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
let mut gradient_sum =
<NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
network.prepare_epoch();
for _ in 0..self.batch_size {
@ -205,6 +210,12 @@ impl NeuraBatchedTrainer {
}
gradient_sum.mul_assign(factor);
// Add regularization gradient (TODO: check if it can be factored out of momentum)
let mut reg_gradient = network.regularize();
reg_gradient.mul_assign(reg_factor);
gradient_sum.add_assign(&reg_gradient);
network.apply_gradient(&gradient_sum);
if self.learning_momentum != 0.0 {
@ -230,23 +241,21 @@ impl NeuraBatchedTrainer {
#[cfg(test)]
mod test {
use crate::{layer::NeuraDenseLayer, derivable::{activation::Linear, loss::Euclidean}};
use super::*;
use crate::{
derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0},
layer::NeuraDenseLayer,
};
#[test]
fn test_backpropagation_simple() {
for wa in [0.0, 0.25, 0.5, 1.0] {
for wb in [0.0, 0.25, 0.5, 1.0] {
let network = NeuraNetwork::new(
NeuraDenseLayer::new([[wa, wb]], [0.0], Linear),
()
);
let gradient = NeuraBackprop::new(Euclidean).get_gradient(
&network,
&[1.0, 1.0],
&[0.0]
);
let network =
NeuraNetwork::new(NeuraDenseLayer::new([[wa, wb]], [0.0], Linear, NeuraL0), ());
let gradient =
NeuraBackprop::new(Euclidean).get_gradient(&network, &[1.0, 1.0], &[0.0]);
let expected = wa + wb;
assert!((gradient.0[0][0] - expected) < 0.001);

@ -33,6 +33,7 @@ pub(crate) fn multiply_matrix_transpose_vector<const WIDTH: usize, const HEIGHT:
result
}
// Returns $left^{\top} \cdot right$, ie. $\ket{left} \bra{right}$
pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>(
left: &[f64; HEIGHT],
right: &[f64; WIDTH],
@ -48,6 +49,32 @@ pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>(
result
}
pub(crate) fn multiply_vectors_pointwise<const LENGTH: usize>(
left: &[f64; LENGTH],
right: &[f64; LENGTH],
) -> [f64; LENGTH] {
let mut result = [0.0; LENGTH];
for i in 0..LENGTH {
result[i] = left[i] * right[i];
}
result
}
#[cfg(test)]
pub(crate) fn matrix_from_diagonal<const LENGTH: usize>(
vector: &[f64; LENGTH],
) -> [[f64; LENGTH]; LENGTH] {
let mut result = [[0.0; LENGTH]; LENGTH];
for i in 0..LENGTH {
result[i][i] = vector[i];
}
result
}
#[allow(dead_code)]
pub(crate) fn assign_add_vector<const N: usize>(sum: &mut [f64; N], operand: &[f64; N]) {
for i in 0..N {
@ -89,7 +116,10 @@ struct ShuffleCycled<I: Iterator, R: rand::Rng> {
rng: R,
}
impl<I: Iterator, R: rand::Rng> Iterator for ShuffleCycled<I, R> where I::Item: Clone {
impl<I: Iterator, R: rand::Rng> Iterator for ShuffleCycled<I, R>
where
I::Item: Clone,
{
type Item = I::Item;
#[inline]
@ -99,7 +129,7 @@ impl<I: Iterator, R: rand::Rng> Iterator for ShuffleCycled<I, R> where I::Item:
if let Some(next) = self.iter.next() {
// Base iterator is not empty yet
self.buffer.push(next.clone());
return Some(next)
return Some(next);
} else if self.buffer.len() > 0 {
if self.index == 0 {
// Shuffle the vector and return the first element, setting the index to 1
@ -118,12 +148,9 @@ impl<I: Iterator, R: rand::Rng> Iterator for ShuffleCycled<I, R> where I::Item:
}
}
pub fn cycle_shuffling<I: Iterator>(
iter: I,
rng: impl rand::Rng
) -> impl Iterator<Item=I::Item>
pub fn cycle_shuffling<I: Iterator>(iter: I, rng: impl rand::Rng) -> impl Iterator<Item = I::Item>
where
I::Item: Clone
I::Item: Clone,
{
let size_hint = iter.size_hint();
let size_hint = size_hint.1.unwrap_or(size_hint.0).max(1);
@ -132,6 +159,19 @@ where
buffer: Vec::with_capacity(size_hint),
index: 0,
iter,
rng
rng,
}
}
#[cfg(test)]
pub(crate) fn uniform_vector<const LENGTH: usize>() -> [f64; LENGTH] {
use rand::Rng;
let mut res = [0.0; LENGTH];
let mut rng = rand::thread_rng();
for i in 0..LENGTH {
res[i] = rng.gen();
}
res
}

Loading…
Cancel
Save