parent
0c97a65013
commit
cb862f12cc
@ -0,0 +1,175 @@
|
|||||||
|
use nalgebra::{DVector, Scalar};
|
||||||
|
use num::{traits::NumAssignOps, Float};
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct NeuraSoftmaxLayer {
|
||||||
|
shape: NeuraShape,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NeuraSoftmaxLayer {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
shape: NeuraShape::Vector(0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<F: Float + Scalar + NumAssignOps> NeuraLayer<DVector<F>> for NeuraSoftmaxLayer {
|
||||||
|
type Output = DVector<F>;
|
||||||
|
|
||||||
|
fn eval(&self, input: &DVector<F>) -> Self::Output {
|
||||||
|
let mut res = input.clone();
|
||||||
|
|
||||||
|
let mut max = F::zero();
|
||||||
|
for &item in &res {
|
||||||
|
if item > max {
|
||||||
|
max = item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut sum = F::zero();
|
||||||
|
for item in &mut res {
|
||||||
|
*item = (*item - max).exp();
|
||||||
|
sum += *item;
|
||||||
|
}
|
||||||
|
|
||||||
|
res /= sum;
|
||||||
|
|
||||||
|
res
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NeuraPartialLayer for NeuraSoftmaxLayer {
|
||||||
|
type Constructed = Self;
|
||||||
|
type Err = ();
|
||||||
|
|
||||||
|
fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
|
||||||
|
Ok(Self { shape: input_shape })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn output_shape(constructed: &Self::Constructed) -> NeuraShape {
|
||||||
|
constructed.shape
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayer<DVector<F>> for NeuraSoftmaxLayer {
|
||||||
|
type Gradient = ();
|
||||||
|
|
||||||
|
fn default_gradient(&self) -> Self::Gradient {
|
||||||
|
()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn backprop_layer(
|
||||||
|
&self,
|
||||||
|
input: &DVector<F>,
|
||||||
|
mut epsilon: Self::Output,
|
||||||
|
) -> (DVector<F>, Self::Gradient) {
|
||||||
|
// Note: a constant value can be added to `input` to bring it to increase precision
|
||||||
|
let evaluated = self.eval(input);
|
||||||
|
|
||||||
|
// Compute $a_{l-1,i} \epsilon_{l,i}$
|
||||||
|
hadamard_product(&mut epsilon, &evaluated);
|
||||||
|
|
||||||
|
// Compute $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$
|
||||||
|
let sum_diagonal_terms = epsilon.sum();
|
||||||
|
|
||||||
|
for i in 0..input.len() {
|
||||||
|
// Multiply $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$ by $a_{l-1,i}$ and add it to $a_{l-1,i} \epsilon_{l,i}$
|
||||||
|
epsilon[i] -= evaluated[i] * sum_diagonal_terms;
|
||||||
|
}
|
||||||
|
|
||||||
|
(epsilon, ())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn regularize_layer(&self) -> Self::Gradient {
|
||||||
|
()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
|
||||||
|
// Noop
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hadamard_product<F: Float + std::ops::MulAssign>(left: &mut DVector<F>, right: &DVector<F>) {
|
||||||
|
for i in 0..left.len() {
|
||||||
|
left[i] *= right[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use nalgebra::{dvector, DMatrix};
|
||||||
|
|
||||||
|
use crate::utils::uniform_vector;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_softmax_eval() {
|
||||||
|
const EPSILON: f64 = 0.000002;
|
||||||
|
let layer = NeuraSoftmaxLayer::new();
|
||||||
|
|
||||||
|
let result = layer.eval(&dvector![1.0, 2.0, 8.0]);
|
||||||
|
|
||||||
|
assert!((result[0] - 0.0009088).abs() < EPSILON);
|
||||||
|
assert!((result[1] - 0.0024704).abs() < EPSILON);
|
||||||
|
assert!((result[2] - 0.9966208).abs() < EPSILON);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Based on https://stats.stackexchange.com/a/306710
|
||||||
|
#[test]
|
||||||
|
fn test_softmax_backpropagation_two() {
|
||||||
|
const EPSILON: f64 = 0.000001;
|
||||||
|
let layer = NeuraSoftmaxLayer::new();
|
||||||
|
|
||||||
|
for input1 in [0.2, 0.3, 0.5] as [f64; 3] {
|
||||||
|
for input2 in [0.7, 1.1, 1.3] {
|
||||||
|
let input = dvector![input1, input2];
|
||||||
|
let sum = input1.exp() + input2.exp();
|
||||||
|
let output = dvector![input1.exp() / sum, input2.exp() / sum];
|
||||||
|
for epsilon1 in [1.7, 1.9, 2.3] {
|
||||||
|
for epsilon2 in [2.9, 3.1, 3.7] {
|
||||||
|
let epsilon = dvector![epsilon1, epsilon2];
|
||||||
|
|
||||||
|
let (epsilon, _) = layer.backprop_layer(&input, epsilon);
|
||||||
|
let expected = [
|
||||||
|
output[0] * (1.0 - output[0]) * epsilon1
|
||||||
|
- output[1] * output[0] * epsilon2,
|
||||||
|
output[1] * (1.0 - output[1]) * epsilon2
|
||||||
|
- output[1] * output[0] * epsilon1,
|
||||||
|
];
|
||||||
|
|
||||||
|
assert!((epsilon[0] - expected[0]).abs() < EPSILON);
|
||||||
|
assert!((epsilon[1] - expected[1]).abs() < EPSILON);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Based on https://e2eml.school/softmax.html
|
||||||
|
#[test]
|
||||||
|
fn test_softmax_backpropagation() {
|
||||||
|
const EPSILON: f64 = 0.000001;
|
||||||
|
let layer = NeuraSoftmaxLayer::new();
|
||||||
|
|
||||||
|
for _ in 0..100 {
|
||||||
|
let input = uniform_vector(4);
|
||||||
|
let evaluated = layer.eval(&input);
|
||||||
|
let loss = uniform_vector(4);
|
||||||
|
|
||||||
|
let mut derivative = &evaluated * evaluated.transpose();
|
||||||
|
derivative *= -1.0;
|
||||||
|
derivative += DMatrix::from_diagonal(&evaluated);
|
||||||
|
|
||||||
|
let expected = derivative * &loss;
|
||||||
|
let (actual, _) = layer.backprop_layer(&input, loss);
|
||||||
|
|
||||||
|
for i in 0..4 {
|
||||||
|
assert!((expected[i] - actual[i]).abs() < EPSILON);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in new issue