parent
220c61ff6b
commit
bca56a5557
@ -0,0 +1,134 @@
|
|||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Default regularization, which is no regularization
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||||
|
pub struct NeuraL0;
|
||||||
|
|
||||||
|
impl NeuraDerivable<f64> for NeuraL0 {
|
||||||
|
#[inline(always)]
|
||||||
|
fn eval(&self, _input: f64) -> f64 {
|
||||||
|
0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn derivate(&self, _at: f64) -> f64 {
|
||||||
|
0.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NeuraDerivable<f32> for NeuraL0 {
|
||||||
|
#[inline(always)]
|
||||||
|
fn eval(&self, _input: f32) -> f32 {
|
||||||
|
0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn derivate(&self, _at: f32) -> f32 {
|
||||||
|
0.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||||
|
pub struct NeuraL1<F>(pub F);
|
||||||
|
|
||||||
|
impl NeuraDerivable<f64> for NeuraL1<f64> {
|
||||||
|
#[inline(always)]
|
||||||
|
fn eval(&self, input: f64) -> f64 {
|
||||||
|
self.0 * input.abs()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn derivate(&self, at: f64) -> f64 {
|
||||||
|
if at > 0.0 {
|
||||||
|
self.0
|
||||||
|
} else if at < 0.0 {
|
||||||
|
-self.0
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NeuraDerivable<f32> for NeuraL1<f32> {
|
||||||
|
#[inline(always)]
|
||||||
|
fn eval(&self, input: f32) -> f32 {
|
||||||
|
self.0 * input.abs()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn derivate(&self, at: f32) -> f32 {
|
||||||
|
if at > 0.0 {
|
||||||
|
self.0
|
||||||
|
} else if at < 0.0 {
|
||||||
|
-self.0
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||||
|
pub struct NeuraL2<F>(pub F);
|
||||||
|
|
||||||
|
impl NeuraDerivable<f64> for NeuraL2<f64> {
|
||||||
|
#[inline(always)]
|
||||||
|
fn eval(&self, input: f64) -> f64 {
|
||||||
|
self.0 * (input * input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn derivate(&self, at: f64) -> f64 {
|
||||||
|
self.0 * at
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NeuraDerivable<f32> for NeuraL2<f32> {
|
||||||
|
#[inline(always)]
|
||||||
|
fn eval(&self, input: f32) -> f32 {
|
||||||
|
self.0 * (input * input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn derivate(&self, at: f32) -> f32 {
|
||||||
|
self.0 * at
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||||
|
pub struct NeuraElastic<F> {
|
||||||
|
pub l1: F,
|
||||||
|
pub l2: F,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<F> NeuraElastic<F> {
|
||||||
|
pub fn new(l1_factor: F, l2_factor: F) -> Self {
|
||||||
|
Self {
|
||||||
|
l1: l1_factor,
|
||||||
|
l2: l2_factor,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NeuraDerivable<f64> for NeuraElastic<f64> {
|
||||||
|
#[inline(always)]
|
||||||
|
fn eval(&self, input: f64) -> f64 {
|
||||||
|
NeuraL1(self.l1).eval(input) + NeuraL2(self.l2).eval(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn derivate(&self, at: f64) -> f64 {
|
||||||
|
NeuraL1(self.l1).derivate(at) + NeuraL2(self.l2).derivate(at)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NeuraDerivable<f32> for NeuraElastic<f32> {
|
||||||
|
#[inline(always)]
|
||||||
|
fn eval(&self, input: f32) -> f32 {
|
||||||
|
NeuraL1(self.l1).eval(input) + NeuraL2(self.l2).eval(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn derivate(&self, at: f32) -> f32 {
|
||||||
|
NeuraL1(self.l1).derivate(at) + NeuraL2(self.l2).derivate(at)
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,155 @@
|
|||||||
|
use crate::{train::NeuraTrainableLayer, utils::multiply_vectors_pointwise};
|
||||||
|
|
||||||
|
use super::NeuraLayer;
|
||||||
|
|
||||||
|
#[non_exhaustive]
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct NeuraSoftmaxLayer<const LENGTH: usize>;
|
||||||
|
|
||||||
|
impl<const LENGTH: usize> NeuraSoftmaxLayer<LENGTH> {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const LENGTH: usize> NeuraLayer for NeuraSoftmaxLayer<LENGTH> {
|
||||||
|
type Input = [f64; LENGTH];
|
||||||
|
type Output = [f64; LENGTH];
|
||||||
|
|
||||||
|
fn eval(&self, input: &Self::Input) -> Self::Output {
|
||||||
|
let mut res = input.clone();
|
||||||
|
|
||||||
|
let mut max = 0.0;
|
||||||
|
for item in &res {
|
||||||
|
if *item > max {
|
||||||
|
max = *item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for item in &mut res {
|
||||||
|
*item = (*item - max).exp();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut sum = 0.0;
|
||||||
|
for item in &res {
|
||||||
|
sum += item;
|
||||||
|
}
|
||||||
|
|
||||||
|
for item in &mut res {
|
||||||
|
*item /= sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
res
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const LENGTH: usize> NeuraTrainableLayer for NeuraSoftmaxLayer<LENGTH> {
|
||||||
|
type Delta = ();
|
||||||
|
|
||||||
|
fn backpropagate(
|
||||||
|
&self,
|
||||||
|
input: &Self::Input,
|
||||||
|
mut epsilon: Self::Output,
|
||||||
|
) -> (Self::Input, Self::Delta) {
|
||||||
|
// Note: a constant value can be added to `input` to bring it to increase precision
|
||||||
|
let evaluated = self.eval(input);
|
||||||
|
|
||||||
|
// Compute $a_{l-1,i} \epsilon_{l,i}$
|
||||||
|
epsilon = multiply_vectors_pointwise(&epsilon, &evaluated);
|
||||||
|
|
||||||
|
// Compute $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$
|
||||||
|
let sum_diagonal_terms: f64 = epsilon.iter().copied().sum();
|
||||||
|
|
||||||
|
for i in 0..LENGTH {
|
||||||
|
// Multiply $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$ by $a_{l-1,i}$ and add it to $a_{l-1,i} \epsilon_{l,i}$
|
||||||
|
epsilon[i] -= evaluated[i] * sum_diagonal_terms;
|
||||||
|
}
|
||||||
|
|
||||||
|
(epsilon, ())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn regularize(&self) -> Self::Delta {
|
||||||
|
()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_gradient(&mut self, _gradient: &Self::Delta) {
|
||||||
|
// Noop
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use crate::algebra::NeuraVectorSpace;
|
||||||
|
use crate::utils::{
|
||||||
|
matrix_from_diagonal, multiply_matrix_vector, reverse_dot_product, uniform_vector,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_softmax_eval() {
|
||||||
|
const EPSILON: f64 = 0.000002;
|
||||||
|
let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<3>;
|
||||||
|
|
||||||
|
let result = layer.eval(&[1.0, 2.0, 8.0]);
|
||||||
|
|
||||||
|
assert!((result[0] - 0.0009088).abs() < EPSILON);
|
||||||
|
assert!((result[1] - 0.0024704).abs() < EPSILON);
|
||||||
|
assert!((result[2] - 0.9966208).abs() < EPSILON);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Based on https://stats.stackexchange.com/a/306710
|
||||||
|
#[test]
|
||||||
|
fn test_softmax_backpropagation_two() {
|
||||||
|
const EPSILON: f64 = 0.000001;
|
||||||
|
let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<2>;
|
||||||
|
|
||||||
|
for input1 in [0.2, 0.3, 0.5] as [f64; 3] {
|
||||||
|
for input2 in [0.7, 1.1, 1.3] {
|
||||||
|
let input = [input1, input2];
|
||||||
|
let sum = input1.exp() + input2.exp();
|
||||||
|
let output = [input1.exp() / sum, input2.exp() / sum];
|
||||||
|
for epsilon1 in [1.7, 1.9, 2.3] {
|
||||||
|
for epsilon2 in [2.9, 3.1, 3.7] {
|
||||||
|
let epsilon = [epsilon1, epsilon2];
|
||||||
|
|
||||||
|
let (epsilon, _) = layer.backpropagate(&input, epsilon);
|
||||||
|
let expected = [
|
||||||
|
output[0] * (1.0 - output[0]) * epsilon1
|
||||||
|
- output[1] * output[0] * epsilon2,
|
||||||
|
output[1] * (1.0 - output[1]) * epsilon2
|
||||||
|
- output[1] * output[0] * epsilon1,
|
||||||
|
];
|
||||||
|
|
||||||
|
assert!((epsilon[0] - expected[0]).abs() < EPSILON);
|
||||||
|
assert!((epsilon[1] - expected[1]).abs() < EPSILON);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Based on https://e2eml.school/softmax.html
|
||||||
|
#[test]
|
||||||
|
fn test_softmax_backpropagation() {
|
||||||
|
const EPSILON: f64 = 0.000001;
|
||||||
|
let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<4>;
|
||||||
|
|
||||||
|
for _ in 0..100 {
|
||||||
|
let input: [f64; 4] = uniform_vector();
|
||||||
|
let evaluated = layer.eval(&input);
|
||||||
|
let loss: [f64; 4] = uniform_vector();
|
||||||
|
|
||||||
|
let mut derivative = reverse_dot_product(&evaluated, &evaluated);
|
||||||
|
derivative.mul_assign(-1.0);
|
||||||
|
derivative.add_assign(&matrix_from_diagonal(&evaluated));
|
||||||
|
|
||||||
|
let expected = multiply_matrix_vector(&derivative, &loss);
|
||||||
|
let (actual, _) = layer.backpropagate(&input, loss);
|
||||||
|
|
||||||
|
for i in 0..4 {
|
||||||
|
assert!((expected[i] - actual[i]).abs() < EPSILON);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in new issue