Softmax layer

main
Shad Amethyst 2 years ago
parent 0c97a65013
commit cb862f12cc

@ -13,10 +13,12 @@ use rand::Rng;
fn main() { fn main() {
let mut network = neura_sequential![ let mut network = neura_sequential![
neura_layer!("dense", 8), neura_layer!("dense", 8).regularization(NeuraL1(0.001)),
neura_layer!("dropout", 0.25), neura_layer!("dropout", 0.25),
neura_layer!("dense", 2).activation(Linear), neura_layer!("dense", 2)
// neura_layer!("softmax"), .activation(Linear)
.regularization(NeuraL1(0.001)),
neura_layer!("softmax"),
] ]
.construct(NeuraShape::Vector(2)) .construct(NeuraShape::Vector(2))
.unwrap(); .unwrap();

@ -1,8 +1,6 @@
use nalgebra::DVector; use nalgebra::DVector;
use num::Float; use num::Float;
use crate::algebra::NeuraVector;
use super::NeuraLoss; use super::NeuraLoss;
#[derive(Clone, Copy, Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]

@ -2,8 +2,7 @@ use crate::algebra::NeuraVectorSpace;
pub mod dense; pub mod dense;
pub mod dropout; pub mod dropout;
pub mod softmax;
pub use dense::NeuraDenseLayer;
#[derive(Clone, Copy, PartialEq, Debug)] #[derive(Clone, Copy, PartialEq, Debug)]
pub enum NeuraShape { pub enum NeuraShape {
@ -121,4 +120,8 @@ macro_rules! neura_layer {
( "dropout", $probability:expr ) => { ( "dropout", $probability:expr ) => {
$crate::layer::dropout::NeuraDropoutLayer::new($probability, rand::thread_rng()) $crate::layer::dropout::NeuraDropoutLayer::new($probability, rand::thread_rng())
}; };
( "softmax" ) => {
$crate::layer::softmax::NeuraSoftmaxLayer::new()
};
} }

@ -0,0 +1,175 @@
use nalgebra::{DVector, Scalar};
use num::{traits::NumAssignOps, Float};
use super::*;
#[derive(Clone, Debug)]
pub struct NeuraSoftmaxLayer {
shape: NeuraShape,
}
impl NeuraSoftmaxLayer {
pub fn new() -> Self {
Self {
shape: NeuraShape::Vector(0),
}
}
}
impl<F: Float + Scalar + NumAssignOps> NeuraLayer<DVector<F>> for NeuraSoftmaxLayer {
type Output = DVector<F>;
fn eval(&self, input: &DVector<F>) -> Self::Output {
let mut res = input.clone();
let mut max = F::zero();
for &item in &res {
if item > max {
max = item;
}
}
let mut sum = F::zero();
for item in &mut res {
*item = (*item - max).exp();
sum += *item;
}
res /= sum;
res
}
}
impl NeuraPartialLayer for NeuraSoftmaxLayer {
type Constructed = Self;
type Err = ();
fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
Ok(Self { shape: input_shape })
}
fn output_shape(constructed: &Self::Constructed) -> NeuraShape {
constructed.shape
}
}
impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayer<DVector<F>> for NeuraSoftmaxLayer {
type Gradient = ();
fn default_gradient(&self) -> Self::Gradient {
()
}
fn backprop_layer(
&self,
input: &DVector<F>,
mut epsilon: Self::Output,
) -> (DVector<F>, Self::Gradient) {
// Note: a constant value can be added to `input` to bring it to increase precision
let evaluated = self.eval(input);
// Compute $a_{l-1,i} \epsilon_{l,i}$
hadamard_product(&mut epsilon, &evaluated);
// Compute $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$
let sum_diagonal_terms = epsilon.sum();
for i in 0..input.len() {
// Multiply $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$ by $a_{l-1,i}$ and add it to $a_{l-1,i} \epsilon_{l,i}$
epsilon[i] -= evaluated[i] * sum_diagonal_terms;
}
(epsilon, ())
}
fn regularize_layer(&self) -> Self::Gradient {
()
}
fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
// Noop
}
}
fn hadamard_product<F: Float + std::ops::MulAssign>(left: &mut DVector<F>, right: &DVector<F>) {
for i in 0..left.len() {
left[i] *= right[i];
}
}
#[cfg(test)]
mod test {
use nalgebra::{dvector, DMatrix};
use crate::utils::uniform_vector;
use super::*;
#[test]
fn test_softmax_eval() {
const EPSILON: f64 = 0.000002;
let layer = NeuraSoftmaxLayer::new();
let result = layer.eval(&dvector![1.0, 2.0, 8.0]);
assert!((result[0] - 0.0009088).abs() < EPSILON);
assert!((result[1] - 0.0024704).abs() < EPSILON);
assert!((result[2] - 0.9966208).abs() < EPSILON);
}
// Based on https://stats.stackexchange.com/a/306710
#[test]
fn test_softmax_backpropagation_two() {
const EPSILON: f64 = 0.000001;
let layer = NeuraSoftmaxLayer::new();
for input1 in [0.2, 0.3, 0.5] as [f64; 3] {
for input2 in [0.7, 1.1, 1.3] {
let input = dvector![input1, input2];
let sum = input1.exp() + input2.exp();
let output = dvector![input1.exp() / sum, input2.exp() / sum];
for epsilon1 in [1.7, 1.9, 2.3] {
for epsilon2 in [2.9, 3.1, 3.7] {
let epsilon = dvector![epsilon1, epsilon2];
let (epsilon, _) = layer.backprop_layer(&input, epsilon);
let expected = [
output[0] * (1.0 - output[0]) * epsilon1
- output[1] * output[0] * epsilon2,
output[1] * (1.0 - output[1]) * epsilon2
- output[1] * output[0] * epsilon1,
];
assert!((epsilon[0] - expected[0]).abs() < EPSILON);
assert!((epsilon[1] - expected[1]).abs() < EPSILON);
}
}
}
}
}
// Based on https://e2eml.school/softmax.html
#[test]
fn test_softmax_backpropagation() {
const EPSILON: f64 = 0.000001;
let layer = NeuraSoftmaxLayer::new();
for _ in 0..100 {
let input = uniform_vector(4);
let evaluated = layer.eval(&input);
let loss = uniform_vector(4);
let mut derivative = &evaluated * evaluated.transpose();
derivative *= -1.0;
derivative += DMatrix::from_diagonal(&evaluated);
let expected = derivative * &loss;
let (actual, _) = layer.backprop_layer(&input, loss);
for i in 0..4 {
assert!((expected[i] - actual[i]).abs() < EPSILON);
}
}
}
}

@ -247,7 +247,7 @@ mod test {
use crate::{ use crate::{
derivable::{activation::Relu, regularize::NeuraL0}, derivable::{activation::Relu, regularize::NeuraL0},
layer::{NeuraDenseLayer, NeuraLayer, NeuraShape}, layer::{dense::NeuraDenseLayer, NeuraLayer, NeuraShape},
neura_layer, neura_layer,
}; };

@ -186,7 +186,7 @@ mod test {
use crate::{ use crate::{
assert_approx, assert_approx,
derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0}, derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0},
layer::{NeuraDenseLayer, NeuraLayer}, layer::{dense::NeuraDenseLayer, NeuraLayer},
network::sequential::{NeuraSequential, NeuraSequentialTail}, network::sequential::{NeuraSequential, NeuraSequentialTail},
neura_sequential, neura_sequential,
}; };

@ -89,17 +89,12 @@ where
} }
#[cfg(test)] #[cfg(test)]
pub(crate) fn uniform_vector<const LENGTH: usize>() -> NeuraVector<LENGTH, f64> { pub(crate) fn uniform_vector(length: usize) -> nalgebra::DVector<f64> {
use nalgebra::DVector;
use rand::Rng; use rand::Rng;
let mut res: NeuraVector<LENGTH, f64> = NeuraVector::default();
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
DVector::from_fn(length, |_, _| -> f64 { rng.gen() })
for i in 0..LENGTH {
res[i] = rng.gen();
}
res
} }
pub fn one_hot<const N: usize>(value: usize) -> NeuraVector<N, f64> { pub fn one_hot<const N: usize>(value: usize) -> NeuraVector<N, f64> {

Loading…
Cancel
Save