🔥 Semi-working training, although it seems to be only want to converge to zero

main
Shad Amethyst 2 years ago
parent d3d5f57a2b
commit 7a6921a1c1

@ -0,0 +1,42 @@
#![feature(generic_arg_infer)]
use neuramethyst::prelude::*;
use neuramethyst::derivable::activation::{Relu, Tanh};
use neuramethyst::derivable::loss::Euclidean;
fn main() {
let mut network = neura_network![
neura_layer!("dense", Tanh, 2, 2),
neura_layer!("dense", Tanh, 3),
neura_layer!("dense", Relu, 1)
];
let inputs = [
([0.0, 0.0], [0.0]),
([0.0, 1.0], [1.0]),
([1.0, 0.0], [1.0]),
([1.0, 1.0], [0.0])
];
// println!("{:#?}", network);
for (input, target) in inputs {
println!("Input: {:?}, target: {}, actual: {}", &input, target[0], network.eval(&input)[0]);
}
train_batched(
&mut network,
inputs.clone(),
&inputs,
NeuraBackprop::new(Euclidean),
0.01,
1,
25
);
// println!("{:#?}", network);
for (input, target) in inputs {
println!("Input: {:?}, target: {}, actual: {}", &input, target[0], network.eval(&input)[0]);
}
}

@ -1,33 +1,46 @@
/// An extension of `std::ops::AddAssign` and `std::ops::Default`
pub trait NeuraAddAssign {
pub trait NeuraVectorSpace {
fn add_assign(&mut self, other: &Self);
fn default() -> Self;
fn mul_assign(&mut self, by: f64);
fn zero() -> Self;
}
impl<Left: NeuraAddAssign, Right: NeuraAddAssign> NeuraAddAssign for (Left, Right) {
impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left, Right) {
fn add_assign(&mut self, other: &Self) {
NeuraAddAssign::add_assign(&mut self.0, &other.0);
NeuraAddAssign::add_assign(&mut self.1, &other.1);
NeuraVectorSpace::add_assign(&mut self.0, &other.0);
NeuraVectorSpace::add_assign(&mut self.1, &other.1);
}
fn mul_assign(&mut self, by: f64) {
NeuraVectorSpace::mul_assign(&mut self.0, by);
NeuraVectorSpace::mul_assign(&mut self.1, by);
}
fn default() -> Self {
(Left::default(), Right::default())
fn zero() -> Self {
(Left::zero(), Right::zero())
}
}
impl<const N: usize, T: NeuraAddAssign + Clone> NeuraAddAssign for [T; N] {
impl<const N: usize, T: NeuraVectorSpace + Clone> NeuraVectorSpace for [T; N] {
fn add_assign(&mut self, other: &[T; N]) {
for i in 0..N {
NeuraAddAssign::add_assign(&mut self[i], &other[i]);
NeuraVectorSpace::add_assign(&mut self[i], &other[i]);
}
}
fn mul_assign(&mut self, by: f64) {
for i in 0..N {
NeuraVectorSpace::mul_assign(&mut self[i], by);
}
}
fn default() -> Self {
fn zero() -> Self {
let mut res: Vec<T> = Vec::with_capacity(N);
for _ in 0..N {
res.push(T::default());
res.push(T::zero());
}
res.try_into().unwrap_or_else(|_| {
@ -39,16 +52,20 @@ impl<const N: usize, T: NeuraAddAssign + Clone> NeuraAddAssign for [T; N] {
macro_rules! base {
( $type:ty ) => {
impl NeuraAddAssign for $type {
impl NeuraVectorSpace for $type {
fn add_assign(&mut self, other: &Self) {
std::ops::AddAssign::add_assign(self, other);
}
fn default() -> Self {
<Self as Default>::default()
fn mul_assign(&mut self, other: f64) {
std::ops::MulAssign::mul_assign(self, other as $type);
}
fn zero() -> Self {
<Self as Default>::default()
}
}
};
}
base!(f32);

@ -34,3 +34,32 @@ impl NeuraDerivable<f32> for Relu {
}
}
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Tanh;
impl NeuraDerivable<f64> for Tanh {
#[inline(always)]
fn eval(&self, input: f64) -> f64 {
0.5 * input.tanh() + 0.5
}
#[inline(always)]
fn derivate(&self, at: f64) -> f64 {
let tanh = at.tanh();
0.5 * (1.0 - tanh * tanh)
}
}
impl NeuraDerivable<f32> for Tanh {
#[inline(always)]
fn eval(&self, input: f32) -> f32 {
0.5 * input.tanh() + 0.5
}
#[inline(always)]
fn derivate(&self, at: f32) -> f32 {
let tanh = at.tanh();
0.5 * (1.0 - tanh * tanh)
}
}

@ -1,12 +1,14 @@
use super::NeuraLoss;
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Euclidean;
impl<const N: usize> NeuraLoss<[f64; N]> for Euclidean {
type Out = f64;
pub struct Euclidean<const N: usize>;
impl<const N: usize> NeuraLoss for Euclidean<N> {
type Input = [f64; N];
type Target = [f64; N];
fn eval(&self, target: [f64; N], actual: [f64; N]) -> f64 {
#[inline]
fn eval(&self, target: &[f64; N], actual: &[f64; N]) -> f64 {
let mut sum_squared = 0.0;
for i in 0..N {
@ -16,7 +18,15 @@ impl<const N: usize> NeuraLoss<[f64; N]> for Euclidean {
sum_squared * 0.5
}
fn nabla(&self, target: [f64; N], actual: [f64; N]) -> [f64; N] {
todo!()
#[inline]
fn nabla(&self, target: &[f64; N], actual: &[f64; N]) -> [f64; N] {
let mut res = [0.0; N];
// ∂E(y)/∂yᵢ = yᵢ - yᵢ'
for i in 0..N {
res[i] = actual[i] - target[i];
}
res
}
}

@ -8,13 +8,13 @@ pub trait NeuraDerivable<F> {
fn derivate(&self, at: F) -> F;
}
pub trait NeuraLoss<F> {
type Out;
pub trait NeuraLoss {
type Input;
type Target;
fn eval(&self, target: Self::Target, actual: F) -> Self::Out;
fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64;
/// Should return the gradient of the loss function according to `actual`
/// ($\nabla_{\texttt{actual}} \texttt{self.eval}(\texttt{target}, \texttt{actual})$).
fn nabla(&self, target: Self::Target, actual: F) -> F;
fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input;
}

@ -1,7 +1,8 @@
use super::NeuraLayer;
use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer};
use crate::{derivable::NeuraDerivable, utils::{multiply_matrix_vector, reverse_dot_product, multiply_matrix_transpose_vector}, train::NeuraTrainableLayer, algebra::NeuraVectorSpace};
use rand::Rng;
#[derive(Clone, Debug)]
pub struct NeuraDenseLayer<
Act: NeuraDerivable<f64>,
const INPUT_LEN: usize,
@ -34,7 +35,7 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN {
weights[i][j] = rng.gen::<f64>() * multiplier;
weights[i][j] = rng.gen_range(-multiplier..multiplier);
}
}
@ -88,6 +89,11 @@ impl<Act: NeuraDerivable<f64>, const INPUT_LEN: usize, const OUTPUT_LEN: usize>
(new_epsilon, (weights_gradient, bias_gradient))
}
fn apply_gradient(&mut self, gradient: &Self::Delta) {
NeuraVectorSpace::add_assign(&mut self.weights, &gradient.0);
NeuraVectorSpace::add_assign(&mut self.bias, &gradient.1);
}
}
#[cfg(test)]

@ -1,9 +1,19 @@
#![feature(generic_arg_infer)]
pub mod algebra;
pub mod derivable;
pub mod layer;
pub mod network;
pub mod train;
pub mod algebra;
mod utils;
pub mod prelude {
// Macros
pub use crate::{neura_network, neura_layer};
// Structs and traits
pub use super::network::{NeuraNetwork};
pub use super::layer::{NeuraLayer, NeuraDenseLayer};
pub use super::train::{NeuraBackprop, train_batched};
}

@ -1,5 +1,10 @@
use crate::{layer::NeuraLayer, train::{NeuraTrainable, NeuraTrainableLayer}, derivable::NeuraLoss};
use crate::{
derivable::NeuraLoss,
layer::NeuraLayer,
train::{NeuraTrainable, NeuraTrainableLayer},
};
#[derive(Clone, Debug)]
pub struct NeuraNetwork<Layer: NeuraLayer, ChildNetwork> {
layer: Layer,
child_network: ChildNetwork,
@ -62,20 +67,44 @@ impl<Layer: NeuraLayer, ChildNetwork: NeuraLayer<Input = Layer::Output>> NeuraLa
impl<Layer: NeuraTrainableLayer> NeuraTrainable for NeuraNetwork<Layer, ()> {
type Delta = Layer::Delta;
fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) {
fn apply_gradient(&mut self, gradient: &Self::Delta) {
self.layer.apply_gradient(gradient);
}
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
&self,
input: &Self::Input,
target: &Loss::Target,
loss: Loss,
) -> (Self::Input, Self::Delta) {
let final_activation = self.layer.eval(input);
let backprop_epsilon = loss.nabla(target, final_activation);
let backprop_epsilon = loss.nabla(target, &final_activation);
self.layer.backpropagate(&input, backprop_epsilon)
}
}
impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Output>> NeuraTrainable for NeuraNetwork<Layer, ChildNetwork> {
impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainable<Input = Layer::Output>> NeuraTrainable
for NeuraNetwork<Layer, ChildNetwork>
{
type Delta = (Layer::Delta, ChildNetwork::Delta);
fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta) {
fn apply_gradient(&mut self, gradient: &Self::Delta) {
self.layer.apply_gradient(&gradient.0);
self.child_network.apply_gradient(&gradient.1);
}
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
&self,
input: &Self::Input,
target: &Loss::Target,
loss: Loss,
) -> (Self::Input, Self::Delta) {
let next_activation = self.layer.eval(input);
let (backprop_gradient, weights_gradient) = self.child_network.backpropagate(&next_activation, target, loss);
let (backprop_gradient, layer_gradient) = self.layer.backpropagate(input, backprop_gradient);
let (backprop_gradient, weights_gradient) =
self.child_network
.backpropagate(&next_activation, target, loss);
let (backprop_gradient, layer_gradient) =
self.layer.backpropagate(input, backprop_gradient);
(backprop_gradient, (layer_gradient, weights_gradient))
}

@ -1,14 +1,13 @@
use crate::{
// utils::{assign_add_vector, chunked},
algebra::NeuraVectorSpace,
derivable::NeuraLoss,
layer::NeuraLayer,
network::NeuraNetwork,
// utils::{assign_add_vector, chunked},
algebra::NeuraAddAssign,
};
pub trait NeuraTrainableLayer: NeuraLayer {
type Delta: NeuraAddAssign;
type Delta: NeuraVectorSpace;
/// Computes the backpropagation term and the derivative of the internal weights,
/// using the `input` vector outputted by the previous layer and the backpropagation term `epsilon` of the next layer.
@ -19,44 +18,134 @@ pub trait NeuraTrainableLayer: NeuraLayer {
/// ```
///
/// The function should then return a pair `(epsilon_{l-1}, δW_l)`,
/// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)`.
fn backpropagate(&self, input: &Self::Input, epsilon: Self::Output) -> (Self::Input, Self::Delta);
/// with `epsilon_{l-1}` being multiplied by `f_{l-1}'(activation)` by the next layer to obtain `delta_{l-1}`.
/// Using this intermediate value for `delta` allows us to isolate it computation to the respective layers.
fn backpropagate(
&self,
input: &Self::Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta);
/// Applies `δW_l` to the weights of the layer
fn apply_gradient(&mut self, gradient: &Self::Delta);
}
pub trait NeuraTrainable: NeuraLayer {
type Delta: NeuraAddAssign;
type Delta: NeuraVectorSpace;
fn backpropagate<Loss: NeuraLoss<Self::Output>>(&self, input: &Self::Input, target: Loss::Target, loss: Loss) -> (Self::Input, Self::Delta);
fn apply_gradient(&mut self, gradient: &Self::Delta);
/// Should implement the backpropagation algorithm, see `NeuraTrainableLayer::backpropagate` for more information.
fn backpropagate<Loss: NeuraLoss<Input = Self::Output>>(
&self,
input: &Self::Input,
target: &Loss::Target,
loss: Loss,
) -> (Self::Input, Self::Delta);
}
pub trait NeuraTrainer<F, Loss: NeuraLoss<F>> {
pub trait NeuraTrainer<Output, Target = Output> {
fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
&self,
trainable: &NeuraNetwork<Layer, ChildNetwork>,
input: &Layer::Input,
target: Loss::Target,
loss: Loss,
) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta where
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = F>
;
target: &Target,
) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta
where
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>;
fn score<Layer: NeuraLayer, ChildNetwork>(
&self,
trainable: &NeuraNetwork<Layer, ChildNetwork>,
input: &Layer::Input,
target: &Target,
) -> f64
where
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>;
}
#[non_exhaustive]
pub struct NeuraBackprop {
pub epsilon: f64,
pub batch_size: usize,
pub struct NeuraBackprop<Loss: NeuraLoss + Clone> {
loss: Loss,
}
impl<const N: usize, Loss: NeuraLoss<[f64; N]>> NeuraTrainer<[f64; N], Loss> for NeuraBackprop {
impl<Loss: NeuraLoss + Clone> NeuraBackprop<Loss> {
pub fn new(loss: Loss) -> Self {
Self { loss }
}
}
impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone> NeuraTrainer<[f64; N], Loss::Target>
for NeuraBackprop<Loss>
{
fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
&self,
trainable: &NeuraNetwork<Layer, ChildNetwork>,
input: &Layer::Input,
target: Loss::Target,
loss: Loss,
) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta where
target: &Loss::Target,
) -> <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta
where
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>,
{
trainable.backpropagate(input, target, loss).1
trainable.backpropagate(input, target, self.loss.clone()).1
}
fn score<Layer: NeuraLayer, ChildNetwork>(
&self,
trainable: &NeuraNetwork<Layer, ChildNetwork>,
input: &Layer::Input,
target: &Loss::Target,
) -> f64
where
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = [f64; N]>,
{
self.loss.eval(target, &trainable.eval(&input))
}
}
pub fn train_batched<
Output,
Target,
Trainer: NeuraTrainer<Output, Target>,
Layer: NeuraLayer,
ChildNetwork,
Inputs: IntoIterator<Item = (Layer::Input, Target)>,
>(
network: &mut NeuraNetwork<Layer, ChildNetwork>,
inputs: Inputs,
test_inputs: &[(Layer::Input, Target)],
trainer: Trainer,
learning_rate: f64,
batch_size: usize,
epochs: usize,
) where
NeuraNetwork<Layer, ChildNetwork>: NeuraTrainable<Input = Layer::Input, Output = Output>,
Inputs::IntoIter: Clone,
{
// TODO: apply shuffling?
let mut iter = inputs.into_iter().cycle();
let factor = -learning_rate / (batch_size as f64);
'd: for epoch in 0..epochs {
let mut gradient_sum = <NeuraNetwork<Layer, ChildNetwork> as NeuraTrainable>::Delta::zero();
for _ in 0..batch_size {
if let Some((input, target)) = iter.next() {
let gradient = trainer.get_gradient(&network, &input, &target);
gradient_sum.add_assign(&gradient);
} else {
break 'd;
}
}
gradient_sum.mul_assign(factor);
network.apply_gradient(&gradient_sum);
let mut loss_sum = 0.0;
for (input, target) in test_inputs {
loss_sum += trainer.score(&network, input, target);
}
loss_sum /= test_inputs.len() as f64;
println!("Epoch {epoch}, Loss: {:.3}", loss_sum);
}
}

@ -35,7 +35,7 @@ pub(crate) fn multiply_matrix_transpose_vector<const WIDTH: usize, const HEIGHT:
pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>(
left: &[f64; HEIGHT],
right: &[f64; WIDTH]
right: &[f64; WIDTH],
) -> [[f64; WIDTH]; HEIGHT] {
let mut result = [[0.0; WIDTH]; HEIGHT];

Loading…
Cancel
Save